diff --git a/CMakeLists.txt b/CMakeLists.txt
index 43a906d84493cb536bb43fcca0cd310ca4c86896..06184ac619b2843b9933b445f1acc38a04b4ffa4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,17 +55,17 @@ if (${RF_BOARD} STREQUAL "OAI_USRP")
   find_package(Boost REQUIRED)
 
 elseif (${RF_BOARD} STREQUAL "OAI_IRIS")
-    include_directories("${OPENAIR_TARGETS}/ARCH/IRIS/USERSPACE/LIB/")
+    include_directories("${OPENAIR_DIR}/sdr/IRIS/USERSPACE/LIB/")
 
     set(HW_SOURCE ${HW_SOURCE}
-            ${OPENAIR_TARGETS}/ARCH/IRIS/USERSPACE/LIB/iris_lib.cpp)
+            ${OPENAIR_DIR}/sdr/IRIS/USERSPACE/LIB/iris_lib.cpp)
     LINK_DIRECTORIES("/usr/local/lib")
     set(option_HW_lib "-lSoapySDR -rdynamic -ldl")
 
 elseif (${RF_BOARD} STREQUAL "OAI_AW2SORI")
-    include_directories("${OPENAIR_TARGETS}/ARCH/AW2SORI")
+    include_directories("${OPENAIR_DIR}/sdr/AW2SORI")
     set(HW_SOURCE ${HW_SOURCE}
-	    ${OPENAIR_TARGETS}/ARCH/AW2SORI/ARCH/AW2SORI/oaiori.c)
+	    ${OPENAIR_DIR}/sdr/AW2SORI/ARCH/AW2SORI/oaiori.c)
     LINK_DIRECTORIES("/usr/local/lib")
     set(openair_HW_lib "-shared -fPIC -msse4 -g -ggdb -lori")
 
@@ -178,7 +178,6 @@ if (CUDA_FOUND)
     # Disable warnings for CUDA
     SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-lpthread;-w;-O3;--default-stream;per-thread;-I/usr/local/cuda/inc;-L/usr/local/cuda/lib -lcutil;-rdc=true;-lcudadevrt")
     SET(CUDA_VERBOSE_BUILD ON)
-    SET(CUDA_HOST_COMPILER "/usr/bin/g++")
 
     SET(CUDA_SEPARABLE_COMPILATION ON)
 
@@ -198,44 +197,36 @@ message("CMAKE_BUILD_TYPE is ${CMAKE_BUILD_TYPE}")
 add_list_string_option(CMAKE_BUILD_TYPE "RelWithDebInfo" "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel." Debug Release RelWithDebInfo MinSizeRel)
 
 Message("Architecture is ${CMAKE_SYSTEM_PROCESSOR}")
-if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
-  set(C_FLAGS_PROCESSOR "-gdwarf-2 -mfloat-abi=hard -mfpu=neon -lgcc -lrt")
-else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
-  if(EXISTS  "/proc/cpuinfo")
-    file(STRINGS "/proc/cpuinfo" CPUINFO REGEX flags LIMIT_COUNT 1)
-    message("NOAVX512 is ${NOAVX512}")
-    if (CPUINFO MATCHES "avx512bw" AND "${NOAVX512}" STREQUAL "False")
-      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx512bw -march=skylake-avx512 -mtune=skylake-avx512 " )
-      set(COMPILATION_AVX2 "True")
-    else()
-      if (CPUINFO MATCHES "avx2")
-        set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2")
-        set(COMPILATION_AVX2 "True")
-      else()
-        set(COMPILATION_AVX2 "False")
-      endif()
-      if (CPUINFO MATCHES "sse4_1")
-        set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1 -mpclmul")
-      endif()
-      if (CPUINFO MATCHES "ssse3")
-        set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mssse3")
-      endif()
-    endif()
-  else()
-    Message("/proc/cpuinfo does not exit. We will use manual CPU flags")
-  endif()
+# in case /proc/cpuinfo exists we want to inspect available Intrinsics
+# -so not to go always through SIMDE emulation
+# -so to avoid AVX512 instructions generation by gcc   
+if(EXISTS  "/proc/cpuinfo")
+   file(STRINGS "/proc/cpuinfo" CPUINFO REGEX flags LIMIT_COUNT 1)
+   message("AVX512 is ${AVX512}")
+   message("AVX2 is ${AVX2}")
+   if ("${AVX512}" STREQUAL "False")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mno-avx512f -march=native -DSIMDE_X86_AVX2_NATIVE -DSIMDE_X86_AVX2_NATIVE -DSIMDE_X86_AVX512BW_NATIVE -DSIMDE_X86_AVX512F_NATIVE -DSIMDE_X86_AVX512VL_NATIVE -DSIMDE_X86_AVX_NATIVE -DSIMDE_X86_AVX_NATIVE -DSIMDE_X86_F16C_NATIVE -DSIMDE_X86_FMA_NATIVE -DSIMDE_X86_GFNI_NATIVE -DSIMDE_X86_MMX_NATIVE -DSIMDE_X86_PCLMUL_NATIVE -DSIMDE_X86_SSE2_NATIVE -DSIMDE_X86_SSE3_NATIVE -DSIMDE_X86_SSE4_1_NATIVE -DSIMDE_X86_SSE4_2_NATIVE -DSIMDE_X86_SSE_NATIVE -DSIMDE_X86_SSSE3_NATIVE -DSIMDE_X86_VPCLMULQDQ_NATIVE -DSIMDE_X86_XOP_HAVE_COM_ -DSIMDE_X86_XOP_NATIVE")
+   else()
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx512bw -march=skylake-avx512 -mtune=skylake-avx512")   
+   endif()
+   if (CPUINFO MATCHES "avx2" AND "${AVX2}" STREQUAL "True")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -DSIMDE_X86_AVX2_NATIVE -DSIMDE_X86_VPCLMULQDQ_NATIVE")
+   endif()
+   if (CPUINFO MATCHES "sse4_1")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -DSIMDE_X86_SSE4_1_NATIVE")
+   endif()
+   if (CPUINFO MATCHES "sse4_2")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -DSIMDE_X86_SSE4_2_NATIVE")
+   endif()
+   if (CPUINFO MATCHES "ssse3")
+      set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -DSIMDE_X86_SSSE3_NATIVE")
+   endif()
 endif()
 
-set(C_FLAGS_PROCESSOR " ${C_FLAGS_PROCESSOR} ${CFLAGS_PROCESSOR_USER}")
+set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -fno-var-tracking-assignments -march=native")
 
 Message("C_FLAGS_PROCESSOR is ${C_FLAGS_PROCESSOR}")
 
-#if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86")
-#  if ( (NOT( C_FLAGS_PROCESSOR MATCHES "ssse3")) OR (NOT( C_FLAGS_PROCESSOR MATCHES "msse4.1")) )
-#    Message(FATAL_ERROR "For x86 Architecture, you must have following flags: -mssse3 -msse4.1. The current detected flags are: ${C_FLAGS_PROCESSOR}. You can pass the flags manually in build script, for example: ./build_oai --cflags_processor \"-mssse3 -msse4.1 -mavx2\" ")
-#  endif()
-#endif()
-
 #
 # add autotools definitions that were maybe used!
 
@@ -244,7 +235,7 @@ add_definitions("-DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAV
 set(commonOpts "-pipe -Wno-packed-bitfield-compat -fPIC -Wall -fno-strict-aliasing -rdynamic")
 
 set(CMAKE_C_FLAGS
-  "${CMAKE_C_FLAGS} ${C_FLAGS_PROCESSOR} ${commonOpts} -std=gnu99 -funroll-loops")
+  "${CMAKE_C_FLAGS} ${C_FLAGS_PROCESSOR} ${commonOpts} -std=gnu11 -funroll-loops")
 set(CMAKE_CXX_FLAGS
   "${CMAKE_CXX_FLAGS} ${C_FLAGS_PROCESSOR}  ${commonOpts} -std=c++11")
 
@@ -713,6 +704,32 @@ file(GLOB F1AP_C_FILES ${F1AP_DIR}/*.c)
 add_library(F1AP ${F1AP_C_FILES} )
 
 
+# LPP
+##############
+set(LPP_DIR ${OPENAIR3_DIR}/LPP)
+set(LPP_ASN_DIR ${LPP_DIR}/MESSAGES)
+set(LPP_ASN_FILES
+  ${LPP_ASN_DIR}/37355-g60.asn
+  )
+
+set(LPP_ASN_GENERATED_C_DIR ${asn1_generated_dir}/LPP)
+
+set(lpp_cmd ${OPENAIR_CMAKE}/tools/make_asn1c_includes.sh "LPP_" "-findirect-choice -fno-include-deps" "${LPP_ASN_GENERATED_C_DIR}")
+
+compile_asn1("${LPP_ASN_FILES}" "${lpp_cmd}" lpp_flag)
+
+file(GLOB LPP_ASN_GENERATED_C_FILES ${LPP_ASN_GENERATED_C_DIR}/*.c)
+add_library(LPP_LIB
+  ${LPP_ASN_GENERATED_C_FILES}
+  )
+add_dependencies (LPP_LIB  lpp_flag)
+  
+include_directories ("${LPP_ASN_GENERATED_C_DIR}")
+include_directories ("${LPP_DIR}")
+  
+#file(GLOB LPP_C_FILES ${LPP_DIR}/*.c)
+#add_library(LPP ${LPP_C_FILES} )
+
 # Hardware dependant options
 ###################################
 add_list1_option(NB_ANTENNAS_RX "4" "Number of antennas in reception" "1" "2" "4")
@@ -735,53 +752,53 @@ set (SHLIB_LOADER_SOURCES
 ######################################################################
 
 set(HWLIB_USRP_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/USRP/USERSPACE/LIB/usrp_lib.cpp
+  ${OPENAIR_DIR}/sdr/USRP/USERSPACE/LIB/usrp_lib.cpp
   )
 add_library(oai_usrpdevif MODULE ${HWLIB_USRP_SOURCE} )
 target_include_directories(oai_usrpdevif PRIVATE
-  "${OPENAIR_TARGETS}/ARCH/USRP/USERSPACE/LIB/"
+  "${OPENAIR_DIR}/sdr/USRP/USERSPACE/LIB/"
   ${Boost_INCLUDE_DIR}
 )
 target_link_libraries(oai_usrpdevif uhd)
 
-include_directories("${OPENAIR_TARGETS}/ARCH/BLADERF/USERSPACE/LIB/")
+include_directories("${OPENAIR_DIR}/sdr/BLADERF/USERSPACE/LIB/")
 set(HWLIB_BLADERF_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.c
+  ${OPENAIR_DIR}/sdr/BLADERF/USERSPACE/LIB/bladerf_lib.c
   )
 add_library(oai_bladerfdevif MODULE ${HWLIB_BLADERF_SOURCE} )
 target_link_libraries(oai_bladerfdevif bladeRF)
 
-include_directories("${OPENAIR_TARGETS}/ARCH/LMSSDR/USERSPACE/LIB/")
+include_directories("${OPENAIR_DIR}/sdr/LMSSDR/USERSPACE/LIB/")
 
 set(HWLIB_LMSSDR_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/LMSSDR/USERSPACE/LIB/lms_lib.cpp
+  ${OPENAIR_DIR}/sdr/LMSSDR/USERSPACE/LIB/lms_lib.cpp
   )
 add_library(oai_lmssdrdevif MODULE ${HWLIB_LMSSDR_SOURCE} )
 target_include_directories(oai_lmssdrdevif PRIVATE /usr/local/include/lime)
 target_link_libraries(oai_lmssdrdevif LimeSuite )
 
 
-include_directories("${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/")
+include_directories("${OPENAIR_DIR}/sdr/ETHERNET/USERSPACE/LIB/")
 set(TPLIB_ETHERNET_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/eth_udp.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/USERSPACE/LIB/eth_raw.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/USERSPACE/LIB/eth_udp.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/USERSPACE/LIB/eth_raw.c
   )
 add_library(oai_eth_transpro MODULE ${TPLIB_ETHERNET_SOURCE} )
 
-include_directories("${OPENAIR_TARGETS}/ARCH/AW2SORI/")
+include_directories("${OPENAIR_DIR}/sdr/AW2SORI/")
 link_directories("/usr/local/lib")
 set(HWLIB_AW2SORI_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/AW2SORI/oaiori.c
+  ${OPENAIR_DIR}/sdr/AW2SORI/oaiori.c
   )
 add_library(aw2sori_transpro MODULE ${HWLIB_AW2SORI_SOURCE})
 target_compile_options(aw2sori_transpro PRIVATE -shared -fPIC -msse4 -g -ggdb -DLITE_COMPILATION)
 target_link_libraries(aw2sori_transpro libori.so)
 
-include_directories("${OPENAIR_TARGETS}/ARCH/IRIS/USERSPACE/LIB/")
+include_directories("${OPENAIR_DIR}/sdr/IRIS/USERSPACE/LIB/")
 set(option_HWIRISLIB_lib "-l SoapySDR")
 set(HWLIB_IRIS_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/IRIS/USERSPACE/LIB/iris_lib.cpp
+  ${OPENAIR_DIR}/sdr/IRIS/USERSPACE/LIB/iris_lib.cpp
   )
 add_library(oai_irisdevif MODULE ${HWLIB_IRIS_SOURCE})
 target_include_directories(oai_irisdevif PRIVATE /usr/local/lib/SoapySDR/modules0.7/)
@@ -794,11 +811,11 @@ target_link_libraries(oai_irisdevif SoapySDR)
 include_directories ("/usr/include/dpdk")
 
 set(HWLIB_BENETEL_4G_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/4g/benetel.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/4g/shared_buffers.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/4g/low.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/4g/low_dpdk.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/4g/dpdk_driver.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/4g/benetel.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/4g/shared_buffers.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/4g/low.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/4g/low_dpdk.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/4g/dpdk_driver.c
   )
 add_library(benetel_4g MODULE ${HWLIB_BENETEL_4G_SOURCE} )
 
@@ -812,11 +829,11 @@ TARGET_LINK_LIBRARIES(benetel_4g pthread dl rt m numa)
 ######################################################################
 
 set(HWLIB_BENETEL_5G_SOURCE
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/5g/benetel.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/5g/shared_buffers.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/5g/low.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/5g/low_dpdk.c
-  ${OPENAIR_TARGETS}/ARCH/ETHERNET/benetel/5g/dpdk_driver.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/5g/benetel.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/5g/shared_buffers.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/5g/low.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/5g/low_dpdk.c
+  ${OPENAIR_DIR}/sdr/ETHERNET/benetel/5g/dpdk_driver.c
   )
 add_library(benetel_5g MODULE ${HWLIB_BENETEL_5G_SOURCE} )
 
@@ -853,10 +870,7 @@ endif ()
 
 ##########################################################
 
-include_directories ("${OPENAIR_TARGETS}/ARCH/COMMON")
-
-Message("DEADLINE_SCHEDULER flag  is ${DEADLINE_SCHEDULER}")
-Message("CPU_Affinity flag is ${CPU_AFFINITY}")
+include_directories ("${OPENAIR_DIR}/sdr/COMMON")
 
 ##############################################################
 #    ???!!! TO BE DOCUMENTED OPTIONS !!!???
@@ -1025,7 +1039,7 @@ include_directories("${OPENAIR3_DIR}/ocp-gtpu")
 include_directories("${OPENAIR3_DIR}/M3AP")
 include_directories("${OPENAIR3_DIR}/MME_APP")
 include_directories("${OPENAIR_DIR}/targets/COMMON")
-include_directories("${OPENAIR_DIR}/targets/ARCH/COMMON")
+include_directories("${OPENAIR_DIR}/sdr/COMMON")
 include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/PHY")
 include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/MAC")
 include_directories("${OPENAIR2_DIR}/ENB_APP/CONTROL_MODULES/RRC")
@@ -1610,6 +1624,7 @@ set(PHY_SRC_UE
   ${OPENAIR1_DIR}/PHY/INIT/nr_init.c
   ${OPENAIR1_DIR}/PHY/INIT/nr_parms.c
   ${OPENAIR1_DIR}/PHY/MODULATION/nr_modulation.c
+  ${OPENAIR1_DIR}/PHY/NR_TRANSPORT/nr_prs.c
   ${OPENAIR1_DIR}/PHY/NR_TRANSPORT/nr_pss.c
   ${OPENAIR1_DIR}/PHY/NR_TRANSPORT/nr_sss.c
   ${OPENAIR1_DIR}/PHY/NR_TRANSPORT/nr_pbch.c
@@ -1711,14 +1726,8 @@ if (${SMBV})
   set(PHY_SRC "${PHY_SRC} ${OPENAIR1_DIR}/PHY/TOOLS/smbv.c")
 endif  (${SMBV})
 
-if (${COMPILATION_AVX2} STREQUAL "True")
-  #set(PHY_SRC ${PHY_SRC} ${OPENAIR1_DIR}/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c)
-  set(PHY_SRC_UE ${PHY_SRC_UE} ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c)
-endif ()
-
-if (${COMPILATION_AVX2} STREQUAL "True")
-  set(PHY_NR_UE_SRC ${PHY_NR_UE_SRC} ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c)
-endif ()
+set(PHY_SRC_UE ${PHY_SRC_UE} ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c)
+set(PHY_NR_UE_SRC ${PHY_NR_UE_SRC} ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c)
 
 add_library(PHY_COMMON ${PHY_SRC_COMMON})
 add_dependencies(PHY_COMMON rrc_flag)
@@ -1906,6 +1915,7 @@ set(L2_NR_SRC
   ${NR_RRC_DIR}/rrc_gNB_reconfig.c
   ${NR_RRC_DIR}/rrc_gNB_UE_context.c
   ${NR_RRC_DIR}/rrc_gNB_NGAP.c
+  ${NR_RRC_DIR}/rrc_gNB_radio_bearers.c
   )
 
 set(L2_SRC_UE
@@ -2532,15 +2542,15 @@ target_link_libraries(nrscope ${XFORMS_LIBRARIES})
 
 
 add_library(rfsimulator MODULE
-  ${OPENAIR_TARGETS}/ARCH/rfsimulator/simulator.c
-  ${OPENAIR_TARGETS}/ARCH/rfsimulator/apply_channelmod.c
-  ${OPENAIR_TARGETS}/ARCH/rfsimulator/new_channel_sim.c
+  ${OPENAIR_DIR}/sdr/rfsimulator/simulator.c
+  ${OPENAIR_DIR}/sdr/rfsimulator/apply_channelmod.c
+  ${OPENAIR_DIR}/sdr/rfsimulator/new_channel_sim.c
   ${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c
 	)
 target_link_libraries(rfsimulator SIMU_COMMON ${ATLAS_LIBRARIES})
 
 add_library(oai_iqplayer MODULE
-	${OPENAIR_TARGETS}/ARCH/iqplayer/iqplayer_lib.c
+	${OPENAIR_DIR}/sdr/iqplayer/iqplayer_lib.c
 	)
 set(CMAKE_MODULE_PATH "${OPENAIR_DIR}/cmake_targets/tools/MODULES" "${CMAKE_MODULE_PATH}")
 
@@ -2601,7 +2611,7 @@ add_executable(nfapi_test
   )
 
 add_executable(replay_node
-  ${OPENAIR_TARGETS}/ARCH/rfsimulator/stored_node.c
+  ${OPENAIR_DIR}/sdr/rfsimulator/stored_node.c
   )
 target_link_libraries (replay_node minimal_lib)
 
@@ -2628,8 +2638,8 @@ add_executable(lte-softmodem
   ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
   ${OPENAIR_TARGETS}/COMMON/create_tasks.c
   ${OPENAIR_TARGETS}/COMMON/create_tasks_mbms.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/record_player.c
+  ${OPENAIR_DIR}/sdr/COMMON/common_lib.c
+  ${OPENAIR_DIR}/sdr/COMMON/record_player.c
   ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
   ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
   ${OPENAIR2_DIR}/F1AP/dummy_enb.c
@@ -2668,8 +2678,8 @@ add_executable(ocp-enb
   ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
   ${OPENAIR_TARGETS}/COMMON/create_tasks.c
   ${OPENAIR_TARGETS}/COMMON/create_tasks_mbms.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/record_player.c
+  ${OPENAIR_DIR}/sdr/COMMON/common_lib.c
+  ${OPENAIR_DIR}/sdr/COMMON/record_player.c
   ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
   ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
   ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
@@ -2697,8 +2707,8 @@ target_link_libraries (ocp-enb ${LIBXML2_LIBRARIES} pthread m CONFIG_LIB rt cryp
 add_executable(oairu
   ${OPENAIR_TARGETS}/RT/USER/lte-ru.c
   ${OPENAIR_TARGETS}/RT/USER/ru_control.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/record_player.c
+  ${OPENAIR_DIR}/sdr/COMMON/common_lib.c
+  ${OPENAIR_DIR}/sdr/COMMON/record_player.c
   ${OPENAIR_DIR}/executables/softmodem-common.c
   ${OPENAIR_DIR}/openair1/SCHED/phy_procedures_lte_common.c
   ${OPENAIR_DIR}/executables/main_ru.c
@@ -2723,8 +2733,8 @@ add_executable(lte-uesoftmodem
   ${OPENAIR_TARGETS}/RT/USER/lte-uesoftmodem.c
   ${OPENAIR_DIR}/executables/softmodem-common.c
   ${OPENAIR_TARGETS}/COMMON/create_tasks_ue.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/record_player.c
+  ${OPENAIR_DIR}/sdr/COMMON/common_lib.c
+  ${OPENAIR_DIR}/sdr/COMMON/record_player.c
   ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
   ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
   ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
@@ -2769,8 +2779,8 @@ add_executable(nr-softmodem
   ${OPENAIR_DIR}/executables/nr-softmodem.c
   ${OPENAIR_DIR}/executables/softmodem-common.c
   ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/record_player.c
+  ${OPENAIR_DIR}/sdr/COMMON/common_lib.c
+  ${OPENAIR_DIR}/sdr/COMMON/record_player.c
   ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
   ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
   ${OPENAIR1_DIR}/SIMULATION/ETH_TRANSPORT/netlink_init.c
@@ -2814,8 +2824,8 @@ add_executable(nr-uesoftmodem
   ${OPENAIR_DIR}/executables/nr-ue.c
   ${OPENAIR_DIR}/executables/softmodem-common.c
   ${OPENAIR1_DIR}/SIMULATION/TOOLS/taus.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/record_player.c
+  ${OPENAIR_DIR}/sdr/COMMON/common_lib.c
+  ${OPENAIR_DIR}/sdr/COMMON/record_player.c
   ${OPENAIR2_DIR}/RRC/NAS/nas_config.c
   ${OPENAIR2_DIR}/LAYER2/NR_MAC_COMMON/nr_mac_common.c
   ${OPENAIR2_DIR}/RRC/NAS/rb_config.c
@@ -2829,16 +2839,14 @@ add_executable(nr-uesoftmodem
 
 target_link_libraries (nr-uesoftmodem
   -Wl,--start-group
-  RRC_LIB NR_RRC_LIB NGAP_LIB NGAP_GNB SECU_CN SECU_OSA UTIL HASHTABLE SCTP_CLIENT SCHED_RU_LIB SCHED_UE_LIB SCHED_NR_UE_LIB
-  PHY_COMMON PHY_NR_COMMON PHY_UE PHY_NR_UE PHY_RU NR_L2_UE L2_UE_LTE_NR MAC_NR_COMMON NFAPI_COMMON_LIB NFAPI_LIB NFAPI_PNF_LIB
-  NFAPI_USER_LIB MISC_NFAPI_NR_LIB S1AP_LIB S1AP_ENB
-  ${RAL_LIB} ${NAS_UE_LIB} ITTI ${FLPT_MSG_LIB} ${ATLAS_LIBRARIES}
-  NFAPI_USER_LIB S1AP_LIB S1AP_ENB
-  ${RAL_LIB} ${NAS_UE_LIB} ITTI ${FLPT_MSG_LIB} ${ATLAS_LIBRARIES} LIB_5GNAS_GNB LIB_NAS_SIMUE ${NAS_SIM_LIB}
+  NR_RRC_LIB SECU_CN SECU_OSA UTIL HASHTABLE SCHED_RU_LIB SCHED_NR_UE_LIB
+  PHY_COMMON PHY_NR_COMMON PHY_NR_UE NR_L2_UE L2_UE_LTE_NR MAC_NR_COMMON NFAPI_COMMON_LIB NFAPI_LIB NFAPI_PNF_LIB
+  NFAPI_USER_LIB MISC_NFAPI_NR_LIB
+  ${RAL_LIB} ITTI ${ATLAS_LIBRARIES} LIB_5GNAS_GNB LIB_NAS_SIMUE ${NAS_SIM_LIB}
   -Wl,--end-group z dl)
 
 target_link_libraries (nr-uesoftmodem ${LIBXML2_LIBRARIES})
-target_link_libraries (nr-uesoftmodem pthread m CONFIG_LIB rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} sctp  ${XFORMS_LIBRARIES} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES} ${ATLAS_LIBRARIES})
+target_link_libraries (nr-uesoftmodem pthread m CONFIG_LIB rt crypt ${CRYPTO_LIBRARIES} ${OPENSSL_LIBRARIES} ${XFORMS_LIBRARIES} ${CMAKE_DL_LIBS} ${LIBYAML_LIBRARIES} ${ATLAS_LIBRARIES})
 target_link_libraries (nr-uesoftmodem ${LIB_LMS_LIBRARIES})
 target_link_libraries (nr-uesoftmodem ${T_LIB})
 
@@ -2870,7 +2878,7 @@ target_link_libraries (dlsim_tm4
 add_executable(rftest
   ${OPENAIR_DIR}/openair1/PHY/TOOLS/calibration_test.c
   ${OPENAIR_DIR}/openair1/PHY/TOOLS/calibration_scope.c
-  ${OPENAIR_TARGETS}/ARCH/COMMON/common_lib.c
+  ${OPENAIR_DIR}/sdr/COMMON/common_lib.c
   ${OPENAIR_DIR}/executables/softmodem-common.c
   ${SHLIB_LOADER_SOURCES}
 )
@@ -3126,7 +3134,7 @@ function(make_driver name dir)
   endforeach()
   CONFIGURE_FILE(${OPENAIR_CMAKE}/tools/Kbuild.cmake ${OPENAIR_BIN_DIR}/${name}/Kbuild)
   add_custom_command(OUTPUT ${name}.ko
-    COMMAND make -j2 -C ${module_build_path} M=${OPENAIR_BIN_DIR}/${name}
+    COMMAND make -C ${module_build_path} M=${OPENAIR_BIN_DIR}/${name}
     WORKING_DIRECTORY ${OPENAIR_BIN_DIR}/${name}
     COMMENT "building ${module}.ko"
     VERBATIM
diff --git a/ci-scripts/Jenkinsfile-GitLab-Container b/ci-scripts/Jenkinsfile-GitLab-Container
index c8fb565488ff9c0ae41d4f727a69f9850f32f8fe..f9bb72ec47063f3e8473a029c1ffd9a613ac2f82 100644
--- a/ci-scripts/Jenkinsfile-GitLab-Container
+++ b/ci-scripts/Jenkinsfile-GitLab-Container
@@ -24,8 +24,9 @@
 def nodeExecutor = params.nodeExecutor
 
 // Tags to shorten pipeline duration
-def doMandatoryTests = false
-def doFullTestsuite = false
+def doBuild = true
+def do4Gtest = false
+def do5Gtest = false
 
 //
 def gitCommitAuthorEmailAddr
@@ -56,26 +57,30 @@ pipeline {
             LABEL_CHECK = sh returnStdout: true, script: 'ci-scripts/checkGitLabMergeRequestLabels.sh --mr-id ' + env.gitlabMergeRequestIid
             LABEL_CHECK = LABEL_CHECK.trim()
             if (LABEL_CHECK == 'NONE') {
-              def message = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): Your merge request has none of the mandatory labels:\n\n"
-              message += " - BUILD-ONLY\n"
-              message += " - 4G-LTE\n"
-              message += " - 5G-NR\n"
-              message += " - CI\n\n"
+              def message = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): Your merge request should have one of the mandatory labels:\n\n"
+              message += " - ~documentation (don't perform any stages)\n"
+              message += " - ~BUILD-ONLY (execute only build stages)\n"
+              message += " - ~4G-LTE (perform 4G tests)\n"
+              message += " - ~5G-NR (perform 5G tests)\n"
+              message += " - ~CI (perform both 4G and 5G tests)\n\n"
               message += "Not performing CI due to lack of labels"
               addGitLabMRComment comment: message
               error('Not performing CI due to lack of labels')
             } else if (LABEL_CHECK == 'FULL') {
-              doMandatoryTests = true
-              doFullTestsuite = true
+              do4Gtest = true
+              do5Gtest = true
+            } else if (LABEL_CHECK == "SHORTEN-4G") {
+              do4Gtest = true
             } else if (LABEL_CHECK == 'SHORTEN-5G') {
-              doMandatoryTests = true
+              do5Gtest = true
+            } else if (LABEL_CHECK == 'documentation') {
+              doBuild = false
             } else {
-              def message = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): We will perform only build stages on your Merge Request"
-              addGitLabMRComment comment: message
+              // is "BUILD-ONLY", will only build
             }
           } else {
-            doMandatoryTests = true
-            doFullTestsuite = true
+            do4Gtest = true
+            do5Gtest = true
           }
         }
       }
@@ -116,11 +121,12 @@ pipeline {
     // Build Stages are Mandatory
     // Later we will add a Ubuntu20 build
     stage ("Image Building Processes") {
+      when { expression {doBuild} }
       parallel {
-        stage ("Ubuntu18 Build") {
+        stage ("Ubuntu18-Image-Builder") {
           steps {
             script {
-              triggerSlaveJob ('RAN-Ubuntu18-Image-Builder', 'Ubuntu18-Images-Build')
+              triggerSlaveJob ('RAN-Ubuntu18-Image-Builder', 'Ubuntu18-Image-Builder')
             }
           }
           post {
@@ -155,10 +161,10 @@ pipeline {
             }
           }
         }
-        stage ("CppCheck Analysis") {
+        stage ("cppcheck") {
           steps {
             script {
-              triggerSlaveJob ('RAN-cppcheck', 'CppCheck Analysis')
+              triggerSlaveJob ('RAN-cppcheck', 'cppcheck')
             }
           }
           post {
@@ -177,12 +183,13 @@ pipeline {
       }
     }
     stage ("Image Test Processes") {
+      when { expression {doBuild} }
       parallel {
-        stage ("Physical Simulators") {
-          when { expression {doMandatoryTests} }
+        stage ("PhySim-Cluster") {
+          when { expression {do4Gtest || do5Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-PhySim-Cluster', 'Test-Physim-Cluster')
+              triggerSlaveJob ('RAN-PhySim-Cluster', 'PhySim-Cluster')
             }
           }
           post {
@@ -198,11 +205,11 @@ pipeline {
             }
           }
         }
-        stage ("4G RF Simulators") {
-          when { expression {doMandatoryTests} }
+        stage ("RF-Sim-Test-4G") {
+          when { expression {do4Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-RF-Sim-Test-4G', 'Test-RF-Sim-Container-4G')
+              triggerSlaveJob ('RAN-RF-Sim-Test-4G', 'RF-Sim-Test-4G')
             }
           }
           post {
@@ -218,11 +225,11 @@ pipeline {
             }
           }
         }
-        stage ("5G RF Simulators") {
-          when { expression {doMandatoryTests} }
+        stage ("RF-Sim-Test-5G") {
+          when { expression {do5Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-RF-Sim-Test-5G', 'Test-RF-Sim-Container-5G')
+              triggerSlaveJob ('RAN-RF-Sim-Test-5G', 'RF-Sim-Test-5G')
             }
           }
           post {
@@ -238,11 +245,11 @@ pipeline {
             }
           }
         }
-        stage ("4G L2 Simulators") {
-          when { expression {doMandatoryTests} }
+        stage ("L2-Sim-Test-4G") {
+          when { expression {do4Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-L2-Sim-Test-4G', 'Test-L2-Sim-Container-4G')
+              triggerSlaveJob ('RAN-L2-Sim-Test-4G', 'L2-Sim-Test-4G')
             }
           }
           post {
@@ -258,11 +265,11 @@ pipeline {
             }
           }
         }
-        stage ("5G L2 Simulators") {
-          when { expression {doMandatoryTests} }
+        stage ("L2-Sim-Test-5G") {
+          when { expression {do5Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-L2-Sim-Test-5G', 'Test-L2-Sim-Container-5G')
+              triggerSlaveJob ('RAN-L2-Sim-Test-5G', 'L2-Sim-Test-5G')
             }
           }
           post {
@@ -278,11 +285,11 @@ pipeline {
             }
           }
         }
-        stage ("NSA B200 Sanity Check") {
-          when { expression {doMandatoryTests} }
+        stage ("NSA-B200-Module-LTEBOX-Container") {
+          when { expression {do4Gtest || do5Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-NSA-B200-Module-LTEBOX-Container', 'Test-NSA-B200')
+              triggerSlaveJob ('RAN-NSA-B200-Module-LTEBOX-Container', 'NSA-B200-Module-LTEBOX-Container')
             }
           }
           post {
@@ -298,11 +305,11 @@ pipeline {
             }
           }
         }
-        stage ("SA B200 Sanity Check") {
-          when { expression {doMandatoryTests} }
+        stage ("SA-B200-Module-SABOX-Container") {
+          when { expression {do5Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-SA-B200-Module-SABOX-Container', 'Test-SA-B200')
+              triggerSlaveJob ('RAN-SA-B200-Module-SABOX-Container', 'SA-B200-Module-SABOX-Container')
             }
           }
           post {
@@ -318,17 +325,17 @@ pipeline {
             }
           }
         }
-        stage ("Test OAI NR UE - OAI gNB - TDD - Band 78 - N300") {
-          when { expression {doMandatoryTests} }
+        stage ("gNB-N300-Timing-Phytest-LDPC") {
+          when { expression {do5Gtest} }
           steps {
             script {
-              triggerSlaveJob ('RAN-gNB-nrUE-MONO-TDD-Band78-N300', 'Test-TDD-Band78-gNB-NR-UE')
+              triggerSlaveJob ('RAN-gNB-N300-Timing-Phytest-LDPC', 'gNB-N300-Timing-Phytest-LDPC')
             }
           }
           post {
             always {
               script {
-                finalizeSlaveJob('RAN-gNB-nrUE-MONO-TDD-Band78-N300')
+                finalizeSlaveJob('RAN-gNB-N300-Timing-Phytest-LDPC')
               }
             }
             failure {
@@ -339,11 +346,11 @@ pipeline {
           }
         }
         //avra is offline, re-enable once it is available
-        //stage ("Test T1 Offload") {
-        //  when { expression {doMandatoryTests} }
+        //stage ("T1-Offload-Test") {
+        //  when { expression {do5Gtest} }
         //  steps {
         //    script {
-        //      triggerSlaveJob ('RAN-T1-Offload-Test', 'Test-T1-Offload')
+        //      triggerSlaveJob ('RAN-T1-Offload-Test', 'T1-Offload-Test')
         //    }
         //  }
         //  post {
@@ -361,11 +368,11 @@ pipeline {
         //}
       }
     }
-    stage ("Images Push to Registries") {
-      when { expression {"PUSH".equals(env.gitlabActionType)} }
+    stage ("DockerHub-Push") {
+      when { expression {doBuild && "PUSH".equals(env.gitlabActionType)} }
       steps {
         script {
-          triggerSlaveJob ('RAN-DockerHub-Push', 'Push-to-Docker-Hub')
+          triggerSlaveJob ('RAN-DockerHub-Push', 'DockerHub-Push')
         }
       }
       post {
@@ -401,7 +408,6 @@ pipeline {
       script {
         def message = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): passed (" + BUILD_URL + ")"
         if ("MERGE".equals(env.gitlabActionType)) {
-          echo "This is a MERGE event"
           addGitLabMRComment comment: message
         }
         echo "Pipeline is SUCCESSFUL"
@@ -411,7 +417,6 @@ pipeline {
       script {
         def message = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): failed (" + BUILD_URL + ")"
         if ("MERGE".equals(env.gitlabActionType)) {
-          echo "This is a MERGE event"
           addGitLabMRComment comment: message
         }
         echo "Pipeline FAILED"
diff --git a/ci-scripts/Jenkinsfile-gitlab b/ci-scripts/Jenkinsfile-gitlab
index a5912f4609bce5197005c02d9ccaecf166ecb79c..405bd0f4d65c0de26053584954a6da215b02a24d 100644
--- a/ci-scripts/Jenkinsfile-gitlab
+++ b/ci-scripts/Jenkinsfile-gitlab
@@ -81,7 +81,7 @@ pipeline {
                             message += "Not performing CI due to lack of labels"
                             addGitLabMRComment comment: message
                             error('Not performing CI due to lack of labels')
-                        } else if (LABEL_CHECK == 'FULL') {
+                        } else if (LABEL_CHECK == 'FULL' || LABEL_CHECK == 'SHORTEN-4G') {
                             doMandatoryTests = true
                             doFullTestsuite = true
                         } else if (LABEL_CHECK == 'SHORTEN-5G') {
diff --git a/ci-scripts/Jenkinsfile-inria-r2lab b/ci-scripts/Jenkinsfile-inria-r2lab
deleted file mode 100644
index ada52277f4ea702b21edecdc1f18ec9b541560e3..0000000000000000000000000000000000000000
--- a/ci-scripts/Jenkinsfile-inria-r2lab
+++ /dev/null
@@ -1,457 +0,0 @@
-#!/bin/groovy
-/*
- * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The OpenAirInterface Software Alliance licenses this file to You under
- * the OAI Public License, Version 1.1  (the "License"); you may not use this file
- * except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.openairinterface.org/?page_id=698
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *-------------------------------------------------------------------------------
- * For more information about the OpenAirInterface (OAI) Software Alliance:
- *      contact@openairinterface.org
- */
-
-// Abstraction function to send social media messages:
-// like on Slack or Mattermost
-def sendSocialMediaMessage(pipeChannel, pipeColor, pipeMessage) {
-    if (params.pipelineUsesSlack != null) {
-        if (params.pipelineUsesSlack) {
-            slackSend channel: pipeChannel, color: pipeColor, message: pipeMessage
-        }
-    }
-}
-
-// Location of the test XML file to be run
-def testXMLFile = params.pythonTestXmlFile
-def mainPythonAllXmlFiles = ""
-def buildStageStatus = true
-
-// Name of the test stage
-def testStageName = params.pipelineTestStageName
-
-// Name of the branch to work on
-def ranRepoBranch = params.pythonWorkingBranch
-
-// Lease booking parameters
-def r2labStartTime = params.R2LAB_LeaseBookStartTime
-def r2labDuration = params.R2LAB_LeaseBookDuration
-
-
-// Fixed deployment
-def r2labBaseIpAddr = '192.168.3.'
-def r2labPythonExeIdx = '14'
-def r2labPythonExe = 'fit' + r2labPythonExeIdx
-def r2labENB0Idx = '23'
-def r2labENB0 = 'fit' + r2labENB0Idx
-def r2labENB0IpAddr = r2labBaseIpAddr + r2labENB0Idx
-def r2labEPC0Idx = '17'
-def r2labEPC0 = 'fit' + r2labEPC0Idx
-def r2labEPC0IpAddr = r2labBaseIpAddr + r2labEPC0Idx
-def r2labUE0Idx = '6'
-def r2labUE0 = 'fit0' + r2labUE0Idx
-def r2labUE0IpAddr = r2labBaseIpAddr + r2labUE0Idx
-def r2labENB1Idx = '16'
-def r2labENB1 = 'fit' + r2labENB1Idx
-def r2labENB1IpAddr = r2labBaseIpAddr + r2labENB1Idx
-def r2labDoAllOff = true
-
-pipeline {
-    agent {
-        label 'master'
-    }
-
-    options {
-        disableConcurrentBuilds()
-        timestamps()
-        ansiColor('xterm')
-    }
-
-    stages {
-        stage ("Book session") {
-            steps {
-                script {
-                    echo '\u2705 \u001B[32mVerify Parameters\u001B[0m'
-
-                    def allParametersPresent = true
-                    if (params.R2LAB_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.R2LAB_FitNode_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.R2LAB_LeaseBookStartTime == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.R2LAB_LeaseBookDuration == null) {
-                        allParametersPresent = false
-                    }
-
-                    // If not present picking a default Stage Name
-                    if (params.pipelineTestStageName == null) {
-                        // picking default
-                        testStageName = 'Tests at Inria R2LAB'
-                    }
-                    // If not present picking a default branch name
-                    if (params.pythonWorkingBranch == null) {
-                        ranRepoBranch = 'develop'
-                    }
-
-                    if (params.pythonTestXmlFile == null) {
-                        // picking default
-                        testXMLFile = 'xml_files/inria/enb_usrp210_band7_build.xml'
-                        echo "Test XML file(default):   ${testXMLFile}"
-                        mainPythonAllXmlFiles += "--XMLTestFile=" + testXMLFile + " "
-                    } else {
-                        String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-                        for (xmlFile in myXmlTestSuite) {
-                            mainPythonAllXmlFiles += "--XMLTestFile=" + xmlFile + " "
-                            echo "Test XML file         :   ${xmlFile}"
-                        }
-                    }
-
-                    if (!allParametersPresent) {
-                        currentBuild.result = 'ABORTED'
-                        error('Stopping early because no R2LAB credentials')
-                    }
-
-                    JOB_TIMESTAMP = sh returnStdout: true, script: 'date --rfc-3339=seconds | sed -e "s#+00:00##"'
-                    JOB_TIMESTAMP = JOB_TIMESTAMP.trim()
-
-                    echo '\u2705 \u001B[32mBook a Session\u001B[0m'
-
-                    BOOK_TIMESTAMP = sh returnStdout: true, script: 'date --rfc-3339=date'
-                    BOOK_TIMESTAMP = BOOK_TIMESTAMP.trim()
-
-                    withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_Credentials}", usernameVariable: 'r2labuser', passwordVariable: 'r2labpassword']
-                        ]) {
-                        sh "python3 /home/eurecom/inria-scripts/booking-lease.py --book-lease --from ${BOOK_TIMESTAMP}T${r2labStartTime} --duration ${r2labDuration} --slice inria_oaici ${r2labuser} ${r2labpassword}"
-                        sh "python3 /home/eurecom/inria-scripts/booking-lease.py --get-leases ${r2labuser} ${r2labpassword} | grep -v nightly"
-                    }
-
-                    // Adding a tempo after booking leases
-                    sh "sleep 10"
-                    LEASE_STATUS = sh returnStdout: true, script: "ssh -t inria_oaici@faraday.inria.fr 'rleases --check'"
-                    LEASE_STATUS = LEASE_STATUS.trim()
-                    if (LEASE_STATUS ==~ /.*Access currently denied to inria_oaici.*/) {
-                        r2labDoAllOff = false
-                        currentBuild.result = 'ABORTED'
-                        error('Stopping early because R2LAB not available')
-                    }
-                }
-            }
-        }
-        stage ("Load Images") {
-            steps {
-                script {
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'all-off'"
-                    sh "sleep 10"
-
-                    echo '\u2705 \u001B[32mLoad Image for Python Executor\u001B[0m'
-                    try {
-                       sh "ssh -t inria_oaici@faraday.inria.fr 'rload -i oai-ci-cd-u18-lowlatency-enb-ue ${r2labPythonExeIdx} > /dev/null 2>&1'"
-                    } catch (Exception e) {
-                       echo "Why is it wrong?"
-                    }
-                    try {
-                       //sh "ssh -t inria_oaici@faraday.inria.fr 'rwait --silent ${r2labPythonExeIdx}'"
-                       sh "ssh -t inria_oaici@faraday.inria.fr 'rwait ${r2labPythonExeIdx}'"
-                    } catch (Exception e) {
-                       echo "Why is it wrong?"
-                    }
-
-                    echo '\u2705 \u001B[32mLoad Image for two (2) eNBs\u001B[0m'
-                    try {
-                      sh "ssh -t inria_oaici@faraday.inria.fr 'rload -i oai-ci-cd-u18-lowlatency-enb-ue ${r2labENB0Idx},${r2labENB1Idx} > /dev/null 2>&1'"
-                    } catch (Exception e) {
-                       echo "Why is it wrong?"
-                    }
-                    try {
-                       sh "ssh -t inria_oaici@faraday.inria.fr 'rwait --silent ${r2labENB0Idx},${r2labENB1Idx}'"
-                    } catch (Exception e) {
-                       echo "Why is it wrong?"
-                    }
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'uon ${r2labENB0Idx},${r2labENB1Idx}'"
-                    sh "sleep 5"
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'uon ${r2labENB0Idx},${r2labENB1Idx}'"
-
-                    echo '\u2705 \u001B[32mLoad Image for one OAI UE\u001B[0m'
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'rload -i oai-ci-cd-u18-lowlatency-enb-ue ${r2labUE0Idx} > /dev/null 2>&1'"
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'rwait --silent ${r2labUE0Idx}'"
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'uon ${r2labUE0Idx}'"
-                    sh "sleep 5"
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'uon ${r2labUE0Idx}'"
-
-                    echo '\u2705 \u001B[32mLoad Image for one EPC\u001B[0m'
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'rload -i oai-ci-cd-u18-lowlatency-epc ${r2labEPC0Idx} > /dev/null 2>&1'"
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'rwait --silent ${r2labEPC0Idx}'"
-
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'nodes ${r2labUE0Idx},${r2labPythonExeIdx},${r2labEPC0Idx},${r2labENB0Idx},${r2labENB1Idx} && st'"
-                }
-            }
-        }
-        stage ("Prepare Python Executor") {
-            steps {
-                script {
-                    withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g && git fetch --all --prune --quiet\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g && git checkout --quiet ${ranRepoBranch}\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g && git pull --quiet origin ${ranRepoBranch}\"'"
-                        GIT_COMMIT_TO_RUN = sh returnStdout: true, script: "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g && git log -n1 --pretty=format:%H\"' | grep -v fit"
-                        GIT_COMMIT_TO_RUN = GIT_COMMIT_TO_RUN.trim()
-                        echo "Latest commit to use is ${GIT_COMMIT_TO_RUN}"
-
-                        // Putting the adaptation parameters for the OAI UE
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'scp /home/inria_oaici/for-ci/adapt_usim_parameters_${r2labUE0}.sed ${fituser}@${r2labUE0}:/tmp/adapt_usim_parameters.sed'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'scp /home/inria_oaici/for-ci/phones_list.txt ${fituser}@${r2labEPC0}:/tmp'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S cp /tmp/phones_list.txt /etc/ci\"'"
-
-                        // Out of rload, the sub-network-interfaces are not up
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ifconfig control:m11 172.16.1.102 up\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ifconfig control:m10 192.168.10.110 up\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ifconfig control:sxu 172.55.55.102 up\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ifconfig control:sxc 172.55.55.101 up\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ifconfig control:s5c 172.58.58.102 up\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ifconfig control:p5c 172.58.58.101 up\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ifconfig control:s11 172.16.1.104 up\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"ifconfig\"'"
-
-                        // Adding routes on the EPC
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ip route add default via 192.168.3.100 dev control table lte\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ip rule add from 12.0.0.0/8 table lte\"'"
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labEPC0} \"echo ${fitpasswd} | sudo -S ip rule add from 12.1.1.0/8 table lte\"'"
-
-                        // For the moment, simple way to route traffic from python executor to any UEs
-                        sh "ssh -t inria_oaici@faraday.inria.fr 'ssh -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"echo ${fitpasswd} | sudo -S ip route add 12.1.1.0/24 via ${r2labEPC0IpAddr} dev control\"'"
-                    }
-                }
-            }
-        }
-        stage ("Build and Test") {
-            steps {
-                script {
-                    withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                        sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=InitiateHtml --ranRepository=https://gitlab.eurecom.fr/oai/openairinterface5g --ranBranch=${ranRepoBranch} --ranCommitID=${GIT_COMMIT_TO_RUN} --ranAllowMerge=false --ADBIPAddress=${r2labEPC0IpAddr} --ADBUserName=${fituser} --ADBPassword=${fitpasswd} --ADBType=distributed ${mainPythonAllXmlFiles}\"'"
-                        String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-                        for (xmlFile in myXmlTestSuite) {
-                            try {
-                                sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=TesteNB --ranRepository=https://gitlab.eurecom.fr/oai/openairinterface5g --ranBranch=${ranRepoBranch} --ranCommitID=${GIT_COMMIT_TO_RUN} --ranAllowMerge=false --eNBIPAddress=${r2labENB0IpAddr} --eNBUserName=${fituser} --eNBPassword=${fitpasswd} --eNBSourceCodePath=/home/${fituser}/openairinterface5g --eNB1IPAddress=${r2labENB1IpAddr} --eNB1UserName=${fituser} --eNB1Password=${fitpasswd} --eNB1SourceCodePath=/home/${fituser}/openairinterface5g --UEIPAddress=${r2labUE0IpAddr} --UEUserName=${fituser} --UEPassword=${fitpasswd} --UESourceCodePath=/home/${fituser}/openairinterface5g --EPCIPAddress=${r2labEPC0IpAddr} --EPCType=OAI-Rel14-CUPS --EPCUserName=${fituser}  --EPCPassword=${fitpasswd} --EPCSourceCodePath=/home/${fituser}/openair-cn --ADBIPAddress=${r2labEPC0IpAddr} --ADBUserName=${fituser} --ADBPassword=${fitpasswd} --ADBType=distributed --XMLTestFile=${xmlFile}\"'"
-                            } catch (Exception e) {
-                                currentBuild.result = 'FAILURE'
-                                buildStageStatus = false
-                            }
-                        }
-                        sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=FinalizeHtml --finalStatus=${buildStageStatus} --eNBIPAddress=${r2labENB0IpAddr} --eNBUserName=${fituser} --eNBPassword=${fitpasswd}\"'"
-                    }
-                }
-            }
-        }
-        stage ("Prepare Log Collection") {
-            steps {
-                sh "ssh -t inria_oaici@faraday.inria.fr 'mkdir -p /home/inria_oaici/archives'"
-                sh "ssh -t inria_oaici@faraday.inria.fr 'touch /home/inria_oaici/archives/no_error.txt'"
-                sh "ssh -t inria_oaici@faraday.inria.fr 'rm -f /home/inria_oaici/archives/*.*'"
-                withCredentials([
-                    [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                    ]) {
-                    sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labPythonExe}:/home/${fituser}/openairinterface5g/ci-scripts/test_results.html /home/inria_oaici/archives'"
-                }
-                sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/test_results.html ."
-                script {
-                    if(fileExists("./test_results.html")) {
-                        sh "mv ./test_results.html test_results-${JOB_NAME}.html"
-                        sh "sed -i -e 's#TEMPLATE_JOB_NAME#${JOB_NAME}#' -e 's@build #TEMPLATE_BUILD_ID@build #${BUILD_ID}@' -e 's#Build-ID: TEMPLATE_BUILD_ID#Build-ID: <a href=\"${BUILD_URL}\">${BUILD_ID}</a>#' -e 's#TEMPLATE_STAGE_NAME#${testStageName}#' -e 's#TEMPLATE_BUILD_TIME#${JOB_TIMESTAMP}#' test_results-${JOB_NAME}.html"
-                        archiveArtifacts "test_results-${JOB_NAME}.html"
-                    }
-                }
-            }
-        }
-        stage ("Log Collection") {
-            parallel {
-                stage('Log Collection (OAI eNB - Build)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (OAI eNB - Build)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectBuild --eNBIPAddress=${r2labENB0IpAddr} --eNBUserName=${fituser} --eNBPassword=${fitpasswd} --eNBSourceCodePath=/home/${fituser}/openairinterface5g\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labENB0}:/home/${fituser}/openairinterface5g/cmake_targets/build.log.zip /home/inria_oaici/archives/enb.build.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/enb.build.log.zip enb.build.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("enb.build.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "enb.build.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (OAI UE - Build)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (OAI UE - Build)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectBuild --UEIPAddress=${r2labUE0IpAddr} --UEUserName=${fituser} --UEPassword=${fitpasswd} --UESourceCodePath=/home/${fituser}/openairinterface5g\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labUE0}:/home/${fituser}/openairinterface5g/cmake_targets/build.log.zip /home/inria_oaici/archives/ue.build.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/ue.build.log.zip ue.build.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("ue.build.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "ue.build.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (OAI eNB - Runs)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (OAI eNB - Runs)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollecteNB --eNBIPAddress=${r2labENB0IpAddr} --eNBUserName=${fituser} --eNBPassword=${fitpasswd} --eNBSourceCodePath=/home/${fituser}/openairinterface5g\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labENB0}:/home/${fituser}/openairinterface5g/cmake_targets/enb.log.zip /home/inria_oaici/archives/enb.run.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/enb.run.log.zip enb.run.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("enb.run.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "enb.run.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (OAI UE - Runs)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (OAI UE - Runs)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectOAIUE --UEIPAddress=${r2labUE0IpAddr} --UEUserName=${fituser} --UEPassword=${fitpasswd} --UESourceCodePath=/home/${fituser}/openairinterface5g\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labUE0}:/home/${fituser}/openairinterface5g/cmake_targets/ue.log.zip /home/inria_oaici/archives/ue.run.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/ue.run.log.zip ue.run.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("ue.run.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "ue.run.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (MME)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (MME)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectMME --EPCIPAddress=${r2labEPC0IpAddr} --EPCUserName=${fituser} --EPCPassword=${fitpasswd} --EPCSourceCodePath=/home/${fituser}/openair-cn --EPCType=OAI-Rel14-CUPS\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labEPC0}:/home/${fituser}/openair-cn/scripts/mme.log.zip /home/inria_oaici/archives/mme.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/mme.log.zip mme.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("mme.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "mme.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (HSS)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (HSS)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectHSS --EPCIPAddress=${r2labEPC0IpAddr} --EPCUserName=${fituser} --EPCPassword=${fitpasswd} --EPCSourceCodePath=/home/${fituser}/openair-cn --EPCType=OAI-Rel14-CUPS\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labEPC0}:/home/${fituser}/openair-cn/scripts/hss.log.zip /home/inria_oaici/archives/hss.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/hss.log.zip hss.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("hss.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "hss.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (SPGW)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (SPGW)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectSPGW --EPCIPAddress=${r2labEPC0IpAddr} --EPCUserName=${fituser} --EPCPassword=${fitpasswd} --EPCSourceCodePath=/home/${fituser}/openair-cn --EPCType=OAI-Rel14-CUPS\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labEPC0}:/home/${fituser}/openair-cn/scripts/spgw.log.zip /home/inria_oaici/archives/spgw.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/spgw.log.zip spgw.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("spgw.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "spgw.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (Ping)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (Ping)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectPing --EPCIPAddress=${r2labEPC0IpAddr} --EPCUserName=${fituser} --EPCPassword=${fitpasswd} --EPCSourceCodePath=/home/${fituser}/openair-cn --EPCType=OAI-Rel14-CUPS\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labEPC0}:/home/${fituser}/openair-cn/scripts/ping.log.zip /home/inria_oaici/archives/ping.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/ping.log.zip ping.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("ping.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "ping.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (Iperf)') {
-                    steps {
-                        withCredentials([
-                        [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.R2LAB_FitNode_Credentials}", usernameVariable: 'fituser', passwordVariable: 'fitpasswd']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (Iperf)\u001B[0m'
-                            sh "ssh -t -t inria_oaici@faraday.inria.fr 'ssh -t -t -b 192.168.3.100 ${fituser}@${r2labPythonExe} \"cd openairinterface5g/ci-scripts && python3 main.py --mode=LogCollectIperf --EPCIPAddress=${r2labEPC0IpAddr} --EPCUserName=${fituser} --EPCPassword=${fitpasswd} --EPCSourceCodePath=/home/${fituser}/openair-cn --EPCType=OAI-Rel14-CUPS\"'"
-                            sh "ssh -t inria_oaici@faraday.inria.fr 'scp ${fituser}@${r2labEPC0}:/home/${fituser}/openair-cn/scripts/iperf.log.zip /home/inria_oaici/archives/iperf.log.zip'"
-                        }
-                        sh "scp inria_oaici@faraday.inria.fr:/home/inria_oaici/archives/iperf.log.zip iperf.log.${env.BUILD_ID}.zip"
-                        script {
-                            if(fileExists("iperf.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "iperf.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        stage ("Clean-up Log Collection") {
-            steps {
-                sh "ssh -t inria_oaici@faraday.inria.fr 'rm -Rf /home/inria_oaici/archives'"
-            }
-        }
-    }
-
-    post {
-        always {
-            script {
-                if (r2labDoAllOff) {
-                    echo '\u2705 \u001B[32mShutdown every node\u001B[0m'
-                    sh 'ssh -t inria_oaici@faraday.inria.fr "all-off"'
-                    sh 'ssh -t inria_oaici@faraday.inria.fr "all-off"'
-                }
-            }
-        }
-    }
-}
diff --git a/ci-scripts/Jenkinsfile-physim-deploy b/ci-scripts/Jenkinsfile-physim-deploy
index eac572e6cdcb12bc06af68e7905d8b97218445af..a0ed1a2f80c73cd9ff31e746fe546ddf9ed667d3 100644
--- a/ci-scripts/Jenkinsfile-physim-deploy
+++ b/ci-scripts/Jenkinsfile-physim-deploy
@@ -32,7 +32,9 @@ def buildStageStatus = true
 def testStageName = params.pipelineTestStageName
 
 // Name of the resource
-def ciEpcResource = params.epcResource
+def lockResources = []
+if (params.LockResources != null && params.LockResources.trim().length() > 0)
+  params.LockResources.trim().split(",").each{lockResources += [resource: it.trim()]}
 
 // Global Parameters. Normally they should be populated when the master job
 // triggers the slave job with parameters
@@ -56,7 +58,7 @@ pipeline {
     disableConcurrentBuilds()
     timestamps()
     ansiColor('xterm')
-    lock(ciEpcResource)
+    lock(extra: lockResources)
   }
 
   stages {
@@ -80,7 +82,8 @@ pipeline {
             testStageName = 'Template Test Stage'
           }
 
-          if (params.smartphonesResource == null) {
+          if (params.LockResources == null) {
+            echo "no LockResources given"
             allParametersPresent = false
           }
           if (params.eNB_IPAddress == null) {
diff --git a/ci-scripts/Jenkinsfile-poll-gNB-UE b/ci-scripts/Jenkinsfile-poll-gNB-UE
deleted file mode 100644
index 88dff2b102d95c2bb71c57da087a946e28d38c24..0000000000000000000000000000000000000000
--- a/ci-scripts/Jenkinsfile-poll-gNB-UE
+++ /dev/null
@@ -1,376 +0,0 @@
-#!/bin/groovy
-/*
-* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The OpenAirInterface Software Alliance licenses this file to You under
-* the OAI Public License, Version 1.1  (the "License"); you may not use this file
-* except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*      http://www.openairinterface.org/?page_id=698
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*-------------------------------------------------------------------------------
-* For more information about the OpenAirInterface (OAI) Software Alliance:
-*      contact@openairinterface.org
-*/
-
-
-// Abstraction function to send social media messages:
-// like on Slack or Mattermost
-def sendSocialMediaMessage(pipeChannel, pipeColor, pipeMessage) {
-    if (params.pipelineUsesSlack != null) {
-        if (params.pipelineUsesSlack) {
-            slackSend channel: pipeChannel, color: pipeColor, message: pipeMessage
-        }
-    }
-}
-
-// Location of the test XML file to be run
-def testXMLFile = params.pythonTestXmlFile
-def mainPythonAllXmlFiles = ""
-def buildStageStatus = true
-
-// Name of the phone resource
-def ciUSRPsResource = params.USRPsResource
-
-// Terminate Status
-def termUE = 0
-def termENB = 1
-def termStatusArray = new Boolean[2]
-termStatusArray[termUE] = false
-termStatusArray[termENB] = false
-
-// Global Parameters.
-def eNB_Repository
-def eNB_Branch
-def eNB_CommitID
-def eNB_AllowMergeRequestProcess = false
-def eNB_TargetBranch = "develop-nr"
-def GIT_COMMIT_AUTHOR
-def GIT_COMMIT_EMAIL
-def testStageName
-// Global Parameters not to break the main.py command line and code.
-def ADB_IPAddress = "none"
-def ADB_Username = "none"
-def ADB_Password = "none"
-def EPC_IPAddress = "none"
-def EPC_Username = "none"
-def EPC_Password = "none"
-
-
-
-pipeline {
-    agent {
-        label pythonExecutor 
-    }
-    options {
-        disableConcurrentBuilds()
-        gitLabConnection('OAI GitLab')
-        ansiColor('xterm')
-	lock (ciUSRPsResource)
-    }
-
-    stages {
-        stage ('Retrieve latest from branch') {
-            steps {
-	        script {
-	            checkout([$class: 'GitSCM', branches: [[name: "${params.Branch}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[url: 'https://gitlab.eurecom.fr/oai/openairinterface5g.git']]])
-	            sh "git clean -x -d -ff"
-	            sh "git log -n1"
-	        }
-            }
-        }
-        stage ("print latest commit info") {
-            steps {
-        	    script {
-        	        echo "Building on: "
-        	        echo "  Repository -- ${GIT_URL}"
-        	        echo "  Branch -- ${GIT_BRANCH}"
-        	        echo "  Commit -- ${GIT_COMMIT}"
-        	    }
-            }
-        }
-        stage ("Verify Parameters") {
-            steps {
-                script {
-                    JOB_TIMESTAMP = sh returnStdout: true, script: 'date --utc --rfc-3339=seconds | sed -e "s#+00:00##"'
-                    JOB_TIMESTAMP = JOB_TIMESTAMP.trim()
-
-                    echo '\u2705 \u001B[32mVerify Parameters\u001B[0m'
-                    def allParametersPresent = true
-
-                    // It is already too late to check it
-                    if (params.pythonExecutor != null) {
-                       echo "eNB CI executor node  :   ${pythonExecutor}"
-                    }
-                    if (params.eNB_Repository == null) {
-                        eNB_Repository = GIT_URL 
-                    } else {
-                        eNB_Repository = params.eNB_Repository
-                    }
-                    echo "eNB_Repository = ${eNB_Repository}"
-                    if (params.eNB_Branch== null) {
-                        eNB_Branch = GIT_BRANCH
-                    } else {
-                        eNB_Branch = params.eNB_Branch
-                    }
-                    echo "eNB_Branch = ${eNB_Branch}"
-                    if (params.eNB_CommitID == null) {
-                        eNB_CommitID = GIT_COMMIT 
-                    } else {
-                        eNB_CommitID = params.eNB_CommitID
-                    }
-                    echo "eNB_CommitID = ${eNB_CommitID}"
-                    // If not present picking a default Stage Name
-                    if (params.pipelineTestStageName == null) {
-                        // picking default
-                        testStageName = 'Template Test Stage'
-                    } else {
-			testStageName = params.pipelineTestStageName
-	            }
-
-                    if (params.USRPsResource == null) {
-                        allParametersPresent = false
-                    }
-	
-                    if (params.eNB_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.UE_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.UE_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    GIT_COMMIT_AUTHOR = sh (
-                        script: 'git show -s --pretty=%an',
-                        returnStdout: true
-                    ).trim()
-                    echo "The author of the commit is: ${GIT_COMMIT_AUTHOR}"
-                    GIT_COMMIT_EMAIL = sh (
-                        script: 'git show -s --pretty=%ae',
-                        returnStdout: true
-                    ).trim()
-                    echo "The email of the author is: ${GIT_COMMIT_EMAIL}"
-                    if (allParametersPresent) {
-                        echo "All parameters are present"
-                    } else {
-                        echo "Some parameters are missing"
-                        sh "./ci-scripts/fail.sh"
-                    }
-                }
-            }
-        }
-
-        stage ("Build and Test") {
-            steps {
-        	    script {
-        	        dir ('ci-scripts') {
-                        // If not present picking a default XML file
-                        if (params.pythonTestXmlFile == null) {
-                            // picking default
-                            testXMLFile = 'xml_files/gnb_usrp_build.xml'
-                            echo "Test XML file(default):   ${testXMLFile}"
-                            mainPythonAllXmlFiles += "--XMLTestFile=" + testXMLFile + " "
-                        } else {
-                            String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-                            for (xmlFile in myXmlTestSuite) {
-                                if (fileExists(xmlFile)) {
-                            	mainPythonAllXmlFiles += "--XMLTestFile=" + xmlFile + " "
-                            	echo "Test XML file         :   ${xmlFile}"
-                                }
-                            }
-                        }
-        	    	withCredentials([
-        	    	    [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password'],
-        	    	    [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.UE_Credentials}", usernameVariable: 'UE_Username', passwordVariable: 'UE_Password']
-        	    	]) {
-        	    	    sh "python3 main.py --mode=InitiateHtml --eNBRepository=${eNB_Repository} --eNBBranch=${eNB_Branch} --eNBCommitID=${eNB_CommitID} --eNB_AllowMerge=${eNB_AllowMergeRequestProcess} --eNBTargetBranch=${eNB_TargetBranch} --ADBIPAddress=${ADB_IPAddress} --ADBUserName=${ADB_Username} --ADBPassword=${ADB_Password} --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} ${mainPythonAllXmlFiles}"
-        	    	    String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-        	    	    for (xmlFile in myXmlTestSuite) {
-        	    	    if (fileExists(xmlFile)) {
-        	    	        try {
-        	    	            sh "python3 main.py --mode=TesteNB --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath} --UEIPAddress=${params.eNB_IPAddress} --UEUserName=${eNB_Username} --UEPassword=${eNB_Password} --UESourceCodePath=${params.eNB_SourceCodePath} --eNBRepository=${eNB_Repository} --eNBBranch=${eNB_Branch} --eNBCommitID=${eNB_CommitID} --eNB_AllowMerge=${eNB_AllowMergeRequestProcess} --eNBTargetBranch=${eNB_TargetBranch} --ADBIPAddress=${ADB_IPAddress} --ADBUserName=${ADB_Username} --ADBPassword=${ADB_Password} --EPCIPAddress=${EPC_IPAddress} --EPCUserName=${EPC_Username} --EPCPassword=${EPC_Password} --EPCSourceCodePath=/tmp/${EPC_Username} --EPCType=ltebox --XMLTestFile=${xmlFile}"
-        	    	        } catch (Exception e) {
-        	    	            currentBuild.result = 'FAILURE'
-        	    	            buildStageStatus = false
-        	    	        }
-        	    	    }
-        	    	    }
-                            sh "python3 main.py --mode=FinalizeHtml --finalStatus=${buildStageStatus} --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password}"
-        	        }
-        	    }
-                }
-            }
-	}	
-	
-        stage ("Terminate") {
-            parallel {
-        	stage('Terminate NR UE') {
-        	    steps {
-        		echo '\u2705 \u001B[32mTerminate NR UE\u001B[0m'
-                        withCredentials([
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'UE_Username', passwordVariable: 'UE_Password']
-                        ]) {
-                            sh "python3 ci-scripts/main.py --mode=TerminateOAIUE --UEIPAddress=${params.eNB_IPAddress} --UEUserName=${UE_Username} --UEPassword=${UE_Password}"
-                        }
-        	    }
-        	    post {
-        		success {
-        		    script {
-        			termStatusArray[termUE] = true
-        		    }
-        		}
-        	    }
-        	}
-       	    stage('Terminate NR eNB') {
-       	        steps {
-		    echo '\u2705 \u001B[32mTerminate NR eNB\u001B[0m'
-                        withCredentials([
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-                        ]) {
-                            sh "python3 ci-scripts/main.py --mode=TerminateeNB --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password}"
-                        }
-        	      }
-                    post {
-        	        success {
-                            script {
-        		        termStatusArray[termENB] = true
-        		    }
-                        }
-        	    }
-                }
-            }
-        }
-
-        stage('Log Collection') {
-            parallel {
-        	stage('Log Collection (gNB and NR UE - Build)') {
-        	    steps {
-        		withCredentials([
-        		     [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-        		]) {
-        		    echo '\u2705 \u001B[32mLog Collection (gNB and NR UE - Build)\u001B[0m'
-        		    sh "python3 ci-scripts/main.py --mode=LogCollectBuild --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath}"
-        
-        		    echo '\u2705 \u001B[32mLog Transfer (gNB and NR UE - Build)\u001B[0m'
-        		    sh "sshpass -p \'${eNB_Password}\' scp -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10' ${eNB_Username}@${params.eNB_IPAddress}:${eNB_SourceCodePath}/cmake_targets/build.log.zip ./build.log.${env.BUILD_ID}.zip || true"
-        		}
-        		script {
-        		    if(fileExists("build.log.${env.BUILD_ID}.zip")) {
-        			archiveArtifacts "build.log.${env.BUILD_ID}.zip"
-        		    }
-        		}
-        	    }
-        	}
-        	stage('Log Collection (gNB - Run)') {
-        	    steps {
-        		withCredentials([
-        		     [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-        		]) {
-        		    echo '\u2705 \u001B[32mLog Collection (gNB - Run)\u001B[0m'
-        		    sh "python3 ci-scripts/main.py --mode=LogCollecteNB --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath}"
-        
-        		    echo '\u2705 \u001B[32mLog Transfer (gNB - Run)\u001B[0m'
-        		    sh "sshpass -p \'${eNB_Password}\' scp -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10' ${eNB_Username}@${params.eNB_IPAddress}:${eNB_SourceCodePath}/cmake_targets/enb.log.zip ./enb.log.${env.BUILD_ID}.zip || true"
-        		}
-        		script {
-        		    if(fileExists("enb.log.${env.BUILD_ID}.zip")) {
-        			archiveArtifacts "enb.log.${env.BUILD_ID}.zip"
-        		    }
-        		    if(fileExists("ci-scripts/test_results.html")) {
-        			sh "mv ci-scripts/test_results.html test_results-${JOB_NAME}.html"
-        			sh "sed -i -e 's#TEMPLATE_BUILD_TIME#${JOB_TIMESTAMP}#' -e 's#TEMPLATE_JOB_NAME#${JOB_NAME}#' -e 's@build #TEMPLATE_BUILD_ID@build #${BUILD_ID}@' -e 's#Build-ID: TEMPLATE_BUILD_ID#Build-ID: <a href=\"${BUILD_URL}\">${BUILD_ID}</a>#' -e 's#TEMPLATE_STAGE_NAME#${testStageName}#' test_results-${JOB_NAME}.html"
-        			archiveArtifacts "test_results-${JOB_NAME}.html"
-        		    }
-        		}
-        	    }
-        	}
-        	stage('Log Collection (NR UE - Run)') {
-        	    steps {
-        		withCredentials([
-        		     [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-        		]) {
-        		    echo '\u2705 \u001B[32mLog Collection (gNB - Run)\u001B[0m'
-        		    sh "python3 ci-scripts/main.py --mode=LogCollectOAIUE --UEIPAddress=${params.eNB_IPAddress} --UEUserName=${eNB_Username} --UEPassword=${eNB_Password} --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --UESourceCodePath=${params.eNB_SourceCodePath}"
-        
-        		    echo '\u2705 \u001B[32mLog Transfer (gNB - Run)\u001B[0m'
-        		    sh "sshpass -p \'${eNB_Password}\' scp -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10' ${eNB_Username}@${params.eNB_IPAddress}:${eNB_SourceCodePath}/cmake_targets/ue.log.zip ./ue.log.${env.BUILD_ID}.zip || true"
-        		}
-        		script {
-        		    if(fileExists("ue.log.${env.BUILD_ID}.zip")) {
-        			archiveArtifacts "ue.log.${env.BUILD_ID}.zip"
-        		    }
-        		}
-        	    }
-        	}
-            }
-        }
-    }
-
-    post {
-        always {
-            script {
-                if ("MERGE".equals(env.gitlabActionType)) {
-                    echo "This is a MERGE event"
-                } else {
-                    gitlabCommitStatus(name: "Test-gNB-nrUE") {
-                        if ((currentBuild.result == null) || (currentBuild.result == 'SUCCESS')) {
-                            echo "Setting the gitlab commit status to pass"
-                        } else {
-                            sh "./ci-scripts/fail.sh"
-                        }
-                    }
-                }
-                emailext attachmentsPattern: '*results*.html',
-                     body: '''Hi,
-Here are attached HTML report files for $PROJECT_NAME - Build # $BUILD_NUMBER - $BUILD_STATUS!
-
-Regards,
-OAI CI Team''',
-                     replyTo: 'no-reply@openairinterface.org',
-                     subject: '$PROJECT_NAME - Build # $BUILD_NUMBER - $BUILD_STATUS!',
-                     to: GIT_COMMIT_EMAIL
-	    }
-        }
-        success {
-            script {
-                def message = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): passed (" + BUILD_URL + ")"
-                if ("MERGE".equals(env.gitlabActionType)) {
-                    echo "This is a MERGE event"
-                    //addGitLabMRComment comment: message
-                    def message2 = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): passed (" + BUILD_URL + ") -- MergeRequest #" + env.gitlabMergeRequestIid + " (" + env.gitlabMergeRequestTitle + ")"
-                    sendSocialMediaMessage('ci-test', 'good', message2)
-                } else {
-                    sendSocialMediaMessage('ci-test', 'good', message)
-                }
-            }
-        }
-        failure {
-            script {
-                def message = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): failed (" + BUILD_URL + ")"
-                if ("MERGE".equals(env.gitlabActionType)) {
-                    echo "This is a MERGE event"
-                    //addGitLabMRComment comment: message
-                    def message2 = "OAI " + JOB_NAME + " build (" + BUILD_ID + "): failed (" + BUILD_URL + ") -- MergeRequest #" + env.gitlabMergeRequestIid + " (" + env.gitlabMergeRequestTitle + ")"
-                    sendSocialMediaMessage('ci-test', 'danger', message2)
-                } else {
-                    sendSocialMediaMessage('ci-test', 'danger', message)
-                }
-            }
-        }
-    }
-}
-
diff --git a/ci-scripts/Jenkinsfile-push-registry b/ci-scripts/Jenkinsfile-push-registry
index feb1807c9122fd96565a63e9ab60ebd157086d66..9442e9c78adc29816bd548f6623c61240e80bd47 100644
--- a/ci-scripts/Jenkinsfile-push-registry
+++ b/ci-scripts/Jenkinsfile-push-registry
@@ -23,8 +23,10 @@
 // Location of the python executor node shall be in the same subnet as the others servers
 def nodeExecutor = params.nodeExecutor
 
-// Name of the phone resource
-def ciServerResource = params.serverResource
+// Name of the resource
+def lockResources = []
+if (params.LockResources != null && params.LockResources.trim().length() > 0)
+  params.LockResources.trim().split(",").each{lockResources += [resource: it.trim()]}
 
 // Docker Hub account to push to
 def DH_Account = "oaisoftwarealliance"
@@ -36,7 +38,7 @@ pipeline {
   options {
     disableConcurrentBuilds()
     ansiColor('xterm')
-    lock (ciServerResource)
+    lock(extra: lockResources)
   }
   stages {
     stage ("Verify Parameters") {
@@ -49,7 +51,8 @@ pipeline {
           if (params.nodeExecutor != null) {
             echo "Docker Push executor node  :   ${nodeExecutor}"
           }
-          if (params.serverResource == null) {
+          if (params.LockResources == null) {
+            echo "no LockResources given"
             allParametersPresent = false
           }
         }
diff --git a/ci-scripts/Jenkinsfile-tmp-full-ran b/ci-scripts/Jenkinsfile-tmp-full-ran
index c3323f792dc5d969dd18920bf393899364afec38..e4564d777ed6bdfecb0f6a4377a2862a876a7eda 100644
--- a/ci-scripts/Jenkinsfile-tmp-full-ran
+++ b/ci-scripts/Jenkinsfile-tmp-full-ran
@@ -33,11 +33,9 @@ def buildStageStatus = true
 // Name of the test stage
 def testStageName = params.pipelineTestStageName
 
-// Name of the phone resource
-def ciSmartPhoneResource = params.smartphonesResource
-
-// Name of the phone resource
-def oaiUEResource = params.oaiUEResource
+def lockResources = []
+if (params.LockResources != null && params.LockResources.trim().length() > 0)
+  params.LockResources.trim().split(",").each{lockResources += [resource: it.trim()]}
 
 // Terminate Status
 def termENB = 0
@@ -65,7 +63,7 @@ pipeline {
     options {
         disableConcurrentBuilds()
         ansiColor('xterm')
-        lock(extra: [[resource: oaiUEResource]], resource: ciSmartPhoneResource)
+        lock(extra: lockResources)
     }
 
     stages {
@@ -85,11 +83,9 @@ pipeline {
                         testStageName = 'Template Test Stage'
                     }
 
-                    if (params.smartphonesResource == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.oaiUEResource == null) {
-                        allParametersPresent = false
+                    if (params.LockResources == null) {
+                      echo "no LockResources given"
+                      allParametersPresent = false
                     }
                     if (params.eNB_IPAddress == null) {
                         allParametersPresent = false
diff --git a/ci-scripts/Jenkinsfile-tmp-multi-enb b/ci-scripts/Jenkinsfile-tmp-multi-enb
index fb523169201794ead2598b7911999913c7665421..e0c756befb8a15cd213c100e78b33969c1b12056 100644
--- a/ci-scripts/Jenkinsfile-tmp-multi-enb
+++ b/ci-scripts/Jenkinsfile-tmp-multi-enb
@@ -33,11 +33,9 @@ def buildStageStatus = true
 // Name of the test stage
 def testStageName = params.pipelineTestStageName
 
-// Name of the phone resource
-def ciSmartPhoneResource = params.smartphonesResource
-
-// Name of the phone resource
-def ciEpcResource = params.epcResource
+def lockResources = []
+if (params.LockResources != null && params.LockResources.trim().length() > 0)
+  params.LockResources.trim().split(",").each{lockResources += [resource: it.trim()]}
 
 // Global Parameters. Normally they should be populated when the master job
 // triggers the slave job with parameters
@@ -54,7 +52,7 @@ pipeline {
     options {
         disableConcurrentBuilds()
         ansiColor('xterm')
-        lock(extra: [[resource: ciEpcResource]], resource: ciSmartPhoneResource)
+        lock(extra: lockResources)
     }
     stages {
         stage ("Verify Parameters") {
@@ -73,8 +71,9 @@ pipeline {
                         testStageName = 'Template Test Stage'
                     }
 
-                    if (params.smartphonesResource == null) {
-                        allParametersPresent = false
+                    if (params.LockResources == null) {
+                      echo "no LockResources given"
+                      allParametersPresent = false
                     }
                     // 1st eNB parameters
                     if (params.eNB_IPAddress == null) {
diff --git a/ci-scripts/Jenkinsfile-tmp-multi-enb-benetel b/ci-scripts/Jenkinsfile-tmp-multi-enb-benetel
deleted file mode 100644
index 195456da11c6971792dd30ed499f5fb072837214..0000000000000000000000000000000000000000
--- a/ci-scripts/Jenkinsfile-tmp-multi-enb-benetel
+++ /dev/null
@@ -1,293 +0,0 @@
-#!/bin/groovy
-/*
- * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The OpenAirInterface Software Alliance licenses this file to You under
- * the OAI Public License, Version 1.1  (the "License"); you may not use this file
- * except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.openairinterface.org/?page_id=698
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *-------------------------------------------------------------------------------
- * For more information about the OpenAirInterface (OAI) Software Alliance:
- *      contact@openairinterface.org
- */
-
-// Template Jenkins Declarative Pipeline script to run Test w/ RF HW
-
-// Location of the python executor node shall be in the same subnet as the others servers
-def pythonExecutor = params.pythonExecutor
-
-// Location of the test XML file to be run
-def testXMLFile = params.pythonTestXmlFile
-def mainPythonAllXmlFiles = ""
-def buildStageStatus = true
-
-// Name of the test stage
-def testStageName = params.pipelineTestStageName
-
-// Name of the phone resource
-def ciSmartPhonesResource1 = params.SmartPhonesResource1
-def ciSmartPhonesResource2 = params.SmartPhonesResource2
-
-// Global Parameters. Normally they should be populated when the master job
-// triggers the slave job with parameters
-def eNB_Repository
-def eNB_Branch
-def eNB_CommitID
-def eNB_AllowMergeRequestProcess
-def eNB_TargetBranch
-
-pipeline {
-    agent {
-        label pythonExecutor
-    }
-    options {
-        disableConcurrentBuilds()
-        ansiColor('xterm')
-        lock(extra: [[resource: ciSmartPhonesResource2]], resource: ciSmartPhonesResource1)
-    }
-    stages {
-        stage("Build Init") {
-            steps {
-                // update the build name and description
-                buildName "${params.eNB_MR}"
-                buildDescription "Branch : ${params.eNB_Branch}"
-            }
-        }
-        stage ("Verify Parameters") {
-            steps {
-                script {
-                    echo '\u2705 \u001B[32mVerify Parameters\u001B[0m'
-                    def allParametersPresent = true
-
-                    // It is already to late to check it
-                    if (params.pythonExecutor != null) {
-                        echo "eNB CI executor node  :   ${pythonExecutor}"
-                    }
-                    // If not present picking a default Stage Name
-                    if (params.pipelineTestStageName == null) {
-                        // picking default
-                        testStageName = 'Template Test Stage'
-                    }
-
-                    if (params.SmartPhonesResource1 == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.SmartPhonesResource2 == null) {
-                        allParametersPresent = false
-                    }
-                    // 1st eNB parameters
-                    if (params.eNB_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    // 2nd eNB parameters
-                    if (params.eNB1_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB1_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB1_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    // 3rd eNB parameters
-                    if (params.eNB2_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB2_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB2_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    // the following 4 parameters should be pushed by the master trigger
-                    // if not present, take the job GIT variables (used for developing)
-                    if (params.eNB_Repository == null) {
-                        eNB_Repository = env.GIT_URL
-                    } else {
-                        eNB_Repository = params.eNB_Repository
-                    }
-                    echo "eNB_Repository        :   ${eNB_Repository}"
-                    if (params.eNB_Branch == null) {
-                        eNB_Branch = env.GIT_BRANCH
-                    } else {
-                        eNB_Branch = params.eNB_Branch
-                    }
-                    echo "eNB_Branch            :   ${eNB_Branch}"
-                    if (params.eNB_CommitID == null) {
-                        eNB_CommitID = env.GIT_COMMIT
-                    } else {
-                        eNB_CommitID = params.eNB_CommitID
-                    }
-                    echo "eNB_CommitID          :   ${eNB_CommitID}"
-                    if (params.eNB_mergeRequest!= null) {
-                        eNB_AllowMergeRequestProcess = params.eNB_mergeRequest
-                        if (eNB_AllowMergeRequestProcess) {
-                            if (params.eNB_TargetBranch != null) {
-                                eNB_TargetBranch = params.eNB_TargetBranch
-                            } else {
-                                eNB_TargetBranch = 'develop'
-                            }
-                            echo "eNB_TargetBranch      :   ${eNB_TargetBranch}"
-                        }
-                    }
-
-                    if (params.EPC_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.EPC_Type == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.EPC_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.EPC_Credentials == null) {
-                        allParametersPresent = false
-                    }
-
-                    if (params.ADB_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.ADB_Credentials == null) {
-                        allParametersPresent = false
-                    }
-
-                    if (allParametersPresent) {
-                        echo "All parameters are present"
-                        if (eNB_AllowMergeRequestProcess) {
-                            sh "git fetch"
-                            sh "./ci-scripts/doGitLabMerge.sh --src-branch ${eNB_Branch} --src-commit ${eNB_CommitID} --target-branch ${eNB_TargetBranch} --target-commit latest"
-                        } else {
-                            sh "git fetch"
-                            sh "git checkout -f ${eNB_CommitID}"
-                        }
-                    } else {
-                        echo "Some parameters are missing"
-                        sh "./ci-scripts/fail.sh"
-                    }
-                }
-            }
-        }
-        stage ("Build and Test") {
-            steps {
-                script {
-                    dir ('ci-scripts') {
-                        echo "\u2705 \u001B[32m${testStageName}\u001B[0m"
-                        // If not present picking a default XML file
-                        if (params.pythonTestXmlFile == null) {
-                            // picking default
-                            testXMLFile = 'xml_files/enb_usrpB210_band7_50PRB.xml'
-                            echo "Test XML file(default):   ${testXMLFile}"
-                            mainPythonAllXmlFiles += "--XMLTestFile=" + testXMLFile + " "
-                        } else {
-                            String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-                            for (xmlFile in myXmlTestSuite) {
-                                if (fileExists(xmlFile)) {
-                                    mainPythonAllXmlFiles += "--XMLTestFile=" + xmlFile + " "
-                                    echo "Test XML file         :   ${xmlFile}"
-                                }
-                            }
-                        }
-                        withCredentials([
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB1_Credentials}", usernameVariable: 'eNB1_Username', passwordVariable: 'eNB1_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB2_Credentials}", usernameVariable: 'eNB2_Username', passwordVariable: 'eNB2_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.EPC_Credentials}", usernameVariable: 'EPC_Username', passwordVariable: 'EPC_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.ADB_Credentials}", usernameVariable: 'ADB_Username', passwordVariable: 'ADB_Password']
-                        ]) {
-                            sh "python3 main.py --mode=InitiateHtml --ranRepository=${eNB_Repository} --ranBranch=${eNB_Branch} --ranCommitID=${eNB_CommitID} --ranAllowMerge=${eNB_AllowMergeRequestProcess} --ranTargetBranch=${eNB_TargetBranch} --ADBIPAddress=${params.ADB_IPAddress} --ADBUserName=${ADB_Username} --ADBPassword=${ADB_Password} ${mainPythonAllXmlFiles}"
-                            String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-                            for (xmlFile in myXmlTestSuite) {
-                                if (fileExists(xmlFile)) {
-                                    try {
-                                        sh "python3 main.py --mode=TesteNB --ranRepository=${eNB_Repository} --ranBranch=${eNB_Branch} --ranCommitID=${eNB_CommitID} --ranAllowMerge=${eNB_AllowMergeRequestProcess} --ranTargetBranch=${eNB_TargetBranch} --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath} --eNB1IPAddress=${params.eNB1_IPAddress} --eNB1UserName=${eNB1_Username} --eNB1Password=${eNB1_Password} --eNB1SourceCodePath=${params.eNB1_SourceCodePath} --eNB2IPAddress=${params.eNB2_IPAddress} --eNB2UserName=${eNB2_Username} --eNB2Password=${eNB2_Password} --eNB2SourceCodePath=${params.eNB2_SourceCodePath} --EPCIPAddress=${params.EPC_IPAddress} --EPCType=${params.EPC_Type} --EPCUserName=${EPC_Username} --EPCPassword=${EPC_Password} --EPCSourceCodePath=${params.EPC_SourceCodePath} --ADBIPAddress=${params.ADB_IPAddress} --ADBUserName=${ADB_Username} --ADBPassword=${ADB_Password} --XMLTestFile=${xmlFile}"
-                                    } catch (Exception e) {
-                                        currentBuild.result = 'FAILURE'
-                                        buildStageStatus = false
-                                    }
-                                }
-                            }
-                            sh "python3 main.py --mode=FinalizeHtml --finalStatus=${buildStageStatus} --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password}"
-                        }
-                    }
-                }
-            }
-        }
-        stage('Log Collection') {
-            parallel {
-                stage('Log Collection (eNB - Build)') {
-                    steps {
-                        withCredentials([
-                             [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (eNB - Build)\u001B[0m'
-                            sh "python3 ci-scripts/main.py --mode=LogCollectBuild --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath}"
-
-                            echo '\u2705 \u001B[32mLog Transfer (eNB - Build)\u001B[0m'
-                            sh "sshpass -p \'${eNB_Password}\' scp -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10' ${eNB_Username}@${params.eNB_IPAddress}:${eNB_SourceCodePath}/cmake_targets/build.log.zip ./build.log.${env.BUILD_ID}.zip || true"
-                        }
-                        script {
-                            if(fileExists("build.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "build.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (eNB - Run)') {
-                    steps {
-                        withCredentials([
-                             [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (eNB - Run)\u001B[0m'
-                            sh "python3 ci-scripts/main.py --mode=LogCollecteNB --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath}"
-
-                            echo '\u2705 \u001B[32mLog Transfer (eNB - Run)\u001B[0m'
-                            sh "sshpass -p \'${eNB_Password}\' scp -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10' ${eNB_Username}@${params.eNB_IPAddress}:${eNB_SourceCodePath}/cmake_targets/enb.log.zip ./enb.log.${env.BUILD_ID}.zip || true"
-                        }
-                        script {
-                            if(fileExists("enb.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "enb.log.${env.BUILD_ID}.zip"
-                            }
-                            if(fileExists("ci-scripts/test_results.html")) {
-                                sh "mv ci-scripts/test_results.html test_results-${JOB_NAME}.html"
-                                sh "sed -i -e 's#TEMPLATE_JOB_NAME#${JOB_NAME}#' -e 's@build #TEMPLATE_BUILD_ID@build #${BUILD_ID}@' -e 's#Build-ID: TEMPLATE_BUILD_ID#Build-ID: <a href=\"${BUILD_URL}\">${BUILD_ID}</a>#' -e 's#TEMPLATE_STAGE_NAME#${testStageName}#' test_results-${JOB_NAME}.html"
-                                archiveArtifacts "test_results-${JOB_NAME}.html"
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    post {
-        always {
-            script {
-                if (params.pipelineZipsConsoleLog != null) {
-                    if (params.pipelineZipsConsoleLog) {
-                        echo "Archiving Jenkins console log"
-                        sh "wget --no-check-certificate --no-proxy ${env.JENKINS_URL}/job/${env.JOB_NAME}/${env.BUILD_ID}/consoleText -O consoleText.log || true"
-                        sh "zip -m consoleText.log.${env.BUILD_ID}.zip consoleText.log || true"
-                        if(fileExists("consoleText.log.${env.BUILD_ID}.zip")) {
-                            archiveArtifacts "consoleText.log.${env.BUILD_ID}.zip"
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/ci-scripts/Jenkinsfile-tmp-multi-enb-benetel-long b/ci-scripts/Jenkinsfile-tmp-multi-enb-benetel-long
deleted file mode 100644
index 3a002b374811e41bc7ad4b834b4fe24cb299cb0c..0000000000000000000000000000000000000000
--- a/ci-scripts/Jenkinsfile-tmp-multi-enb-benetel-long
+++ /dev/null
@@ -1,304 +0,0 @@
-#!/bin/groovy
-/*
- * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The OpenAirInterface Software Alliance licenses this file to You under
- * the OAI Public License, Version 1.1  (the "License"); you may not use this file
- * except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.openairinterface.org/?page_id=698
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *-------------------------------------------------------------------------------
- * For more information about the OpenAirInterface (OAI) Software Alliance:
- *      contact@openairinterface.org
- */
-
-// Template Jenkins Declarative Pipeline script to run Test w/ RF HW
-
-// Location of the python executor node shall be in the same subnet as the others servers
-def pythonExecutor = params.pythonExecutor
-
-// Location of the test XML file to be run
-def testXMLFile = params.pythonTestXmlFile
-def mainPythonAllXmlFiles = ""
-def buildStageStatus = true
-
-// Name of the test stage
-def testStageName = params.pipelineTestStageName
-
-// Name of the phone resource
-def ciSmartPhonesResource1 = params.SmartPhonesResource1
-def ciSmartPhonesResource2 = params.SmartPhonesResource2
-
-// Global Parameters. Populated when the master job
-// triggers the slave job with parameters
-def eNB_Repository = "https://gitlab.eurecom.fr/oai/openairinterface5g.git"
-def eNB_Branch = "develop"
-def eNB_CommitID
-def eNB_AllowMergeRequestProcess = false
-def eNB_TargetBranch = "develop"
-def eNB_MR = "develop"
-
-pipeline {
-    agent {
-        label pythonExecutor
-    }
-    options {
-        disableConcurrentBuilds()
-        ansiColor('xterm')
-        lock(extra: [[resource: ciSmartPhonesResource2]], resource: ciSmartPhonesResource1)
-    }
-    stages {
-        stage("Build Init") {
-            steps {
-                script {
-                    //retrieve latest commit ID from branch
-                    latest_commit_from_branch= sh returnStdout: true, script: 'git log -1 origin/${eNB_Branch} | grep commit'
-                    echo "Branch ${eNB_Branch}"
-                    echo "${latest_commit_from_branch}"
-                    tmp=latest_commit_from_branch.split()
-                    echo "${tmp}"
-                    eNB_CommitID = tmp[1]
-                    echo "eNB_CommitID ${eNB_CommitID}"
-                    // update the build name and description
-                    buildName "${params.eNB_MR}"
-                    buildDescription "Commit : ${eNB_CommitID}"
-                }
-            }
-        }
-        stage ("Verify Parameters") {
-            steps {
-                script {
-                    echo '\u2705 \u001B[32mVerify Parameters\u001B[0m'
-                    def allParametersPresent = true
-
-                    // It is already to late to check it
-                    if (params.pythonExecutor != null) {
-                        echo "eNB CI executor node  :   ${pythonExecutor}"
-                    }
-                    // If not present picking a default Stage Name
-                    if (params.pipelineTestStageName == null) {
-                        // picking default
-                        testStageName = 'Template Test Stage'
-                    }
-
-                    if (params.SmartPhonesResource1 == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.SmartPhonesResource2 == null) {
-                        allParametersPresent = false
-                    }
-                    // 1st eNB parameters
-                    if (params.eNB_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    // 2nd eNB parameters
-                    if (params.eNB1_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB1_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB1_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    // 3rd eNB parameters
-                    if (params.eNB2_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB2_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.eNB2_Credentials == null) {
-                        allParametersPresent = false
-                    }
-                    // the following 4 parameters should be pushed by the master trigger
-                    // if not present, take the job GIT variables (used for developing)
-                    if (params.eNB_Repository == null) {
-                        eNB_Repository = env.GIT_URL
-                    } else {
-                        eNB_Repository = params.eNB_Repository
-                    }
-                    echo "eNB_Repository        :   ${eNB_Repository}"
-                    if (params.eNB_Branch == null) {
-                        eNB_Branch = env.GIT_BRANCH
-                    } else {
-                        eNB_Branch = params.eNB_Branch
-                    }
-                    echo "eNB_Branch            :   ${eNB_Branch}"
-                    //if (params.eNB_CommitID == null) {
-                    //    eNB_CommitID = env.GIT_COMMIT
-                    //} else {
-                    //    eNB_CommitID = params.eNB_CommitID
-                    //}
-                    echo "eNB_CommitID          :   ${eNB_CommitID}"
-                    if (params.eNB_mergeRequest!= null) {
-                        eNB_AllowMergeRequestProcess = params.eNB_mergeRequest
-                        if (eNB_AllowMergeRequestProcess) {
-                            if (params.eNB_TargetBranch != null) {
-                                eNB_TargetBranch = params.eNB_TargetBranch
-                            } else {
-                                eNB_TargetBranch = 'develop'
-                            }
-                            echo "eNB_TargetBranch      :   ${eNB_TargetBranch}"
-                        }
-                    }
-
-                    if (params.EPC_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.EPC_Type == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.EPC_SourceCodePath == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.EPC_Credentials == null) {
-                        allParametersPresent = false
-                    }
-
-                    if (params.ADB_IPAddress == null) {
-                        allParametersPresent = false
-                    }
-                    if (params.ADB_Credentials == null) {
-                        allParametersPresent = false
-                    }
-
-                    if (allParametersPresent) {
-                        echo "All parameters are present"
-                        if (eNB_AllowMergeRequestProcess) {
-                            sh "git fetch"
-                            sh "./ci-scripts/doGitLabMerge.sh --src-branch ${eNB_Branch} --src-commit ${eNB_CommitID} --target-branch ${eNB_TargetBranch} --target-commit latest"
-                        } else {
-                            sh "git fetch"
-                            sh "git checkout -f ${eNB_CommitID}"
-                        }
-                    } else {
-                        echo "Some parameters are missing"
-                        sh "./ci-scripts/fail.sh"
-                    }
-                }
-            }
-        }
-        stage ("Build and Test") {
-            steps {
-                script {
-                    dir ('ci-scripts') {
-                        echo "\u2705 \u001B[32m${testStageName}\u001B[0m"
-                        // If not present picking a default XML file
-                        if (params.pythonTestXmlFile == null) {
-                            // picking default
-                            testXMLFile = 'xml_files/enb_usrpB210_band7_50PRB.xml'
-                            echo "Test XML file(default):   ${testXMLFile}"
-                            mainPythonAllXmlFiles += "--XMLTestFile=" + testXMLFile + " "
-                        } else {
-                            String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-                            for (xmlFile in myXmlTestSuite) {
-                                if (fileExists(xmlFile)) {
-                                    mainPythonAllXmlFiles += "--XMLTestFile=" + xmlFile + " "
-                                    echo "Test XML file         :   ${xmlFile}"
-                                }
-                            }
-                        }
-                        withCredentials([
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB1_Credentials}", usernameVariable: 'eNB1_Username', passwordVariable: 'eNB1_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB2_Credentials}", usernameVariable: 'eNB2_Username', passwordVariable: 'eNB2_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.EPC_Credentials}", usernameVariable: 'EPC_Username', passwordVariable: 'EPC_Password'],
-                            [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.ADB_Credentials}", usernameVariable: 'ADB_Username', passwordVariable: 'ADB_Password']
-                        ]) {
-                            sh "python3 main.py --mode=InitiateHtml --ranRepository=${eNB_Repository} --ranBranch=${eNB_Branch} --ranCommitID=${eNB_CommitID} --ranAllowMerge=${eNB_AllowMergeRequestProcess} --ranTargetBranch=${eNB_TargetBranch} --ADBIPAddress=${params.ADB_IPAddress} --ADBUserName=${ADB_Username} --ADBPassword=${ADB_Password} ${mainPythonAllXmlFiles}"
-                            String[] myXmlTestSuite = testXMLFile.split("\\r?\\n")
-                            for (xmlFile in myXmlTestSuite) {
-                                if (fileExists(xmlFile)) {
-                                    try {
-                                        sh "python3 main.py --mode=TesteNB --ranRepository=${eNB_Repository} --ranBranch=${eNB_Branch} --ranCommitID=${eNB_CommitID} --ranAllowMerge=${eNB_AllowMergeRequestProcess} --ranTargetBranch=${eNB_TargetBranch} --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath} --eNB1IPAddress=${params.eNB1_IPAddress} --eNB1UserName=${eNB1_Username} --eNB1Password=${eNB1_Password} --eNB1SourceCodePath=${params.eNB1_SourceCodePath} --eNB2IPAddress=${params.eNB2_IPAddress} --eNB2UserName=${eNB2_Username} --eNB2Password=${eNB2_Password} --eNB2SourceCodePath=${params.eNB2_SourceCodePath} --EPCIPAddress=${params.EPC_IPAddress} --EPCType=${params.EPC_Type} --EPCUserName=${EPC_Username} --EPCPassword=${EPC_Password} --EPCSourceCodePath=${params.EPC_SourceCodePath} --ADBIPAddress=${params.ADB_IPAddress} --ADBUserName=${ADB_Username} --ADBPassword=${ADB_Password} --XMLTestFile=${xmlFile}"
-                                    } catch (Exception e) {
-                                        currentBuild.result = 'FAILURE'
-                                        buildStageStatus = false
-                                    }
-                                }
-                            }
-                            sh "python3 main.py --mode=FinalizeHtml --finalStatus=${buildStageStatus} --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password}"
-                        }
-                    }
-                }
-            }
-        }
-        stage('Log Collection') {
-            parallel {
-                stage('Log Collection (eNB - Build)') {
-                    steps {
-                        withCredentials([
-                             [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (eNB - Build)\u001B[0m'
-                            sh "python3 ci-scripts/main.py --mode=LogCollectBuild --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath}"
-
-                            echo '\u2705 \u001B[32mLog Transfer (eNB - Build)\u001B[0m'
-                            sh "sshpass -p \'${eNB_Password}\' scp -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10' ${eNB_Username}@${params.eNB_IPAddress}:${eNB_SourceCodePath}/cmake_targets/build.log.zip ./build.log.${env.BUILD_ID}.zip || true"
-                        }
-                        script {
-                            if(fileExists("build.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "build.log.${env.BUILD_ID}.zip"
-                            }
-                        }
-                    }
-                }
-                stage('Log Collection (eNB - Run)') {
-                    steps {
-                        withCredentials([
-                             [$class: 'UsernamePasswordMultiBinding', credentialsId: "${params.eNB_Credentials}", usernameVariable: 'eNB_Username', passwordVariable: 'eNB_Password']
-                        ]) {
-                            echo '\u2705 \u001B[32mLog Collection (eNB - Run)\u001B[0m'
-                            sh "python3 ci-scripts/main.py --mode=LogCollecteNB --eNBIPAddress=${params.eNB_IPAddress} --eNBUserName=${eNB_Username} --eNBPassword=${eNB_Password} --eNBSourceCodePath=${params.eNB_SourceCodePath}"
-
-                            echo '\u2705 \u001B[32mLog Transfer (eNB - Run)\u001B[0m'
-                            sh "sshpass -p \'${eNB_Password}\' scp -o 'StrictHostKeyChecking no' -o 'ConnectTimeout 10' ${eNB_Username}@${params.eNB_IPAddress}:${eNB_SourceCodePath}/cmake_targets/enb.log.zip ./enb.log.${env.BUILD_ID}.zip || true"
-                        }
-                        script {
-                            if(fileExists("enb.log.${env.BUILD_ID}.zip")) {
-                                archiveArtifacts "enb.log.${env.BUILD_ID}.zip"
-                            }
-                            if(fileExists("ci-scripts/test_results.html")) {
-                                sh "mv ci-scripts/test_results.html test_results-${JOB_NAME}.html"
-                                sh "sed -i -e 's#TEMPLATE_JOB_NAME#${JOB_NAME}#' -e 's@build #TEMPLATE_BUILD_ID@build #${BUILD_ID}@' -e 's#Build-ID: TEMPLATE_BUILD_ID#Build-ID: <a href=\"${BUILD_URL}\">${BUILD_ID}</a>#' -e 's#TEMPLATE_STAGE_NAME#${testStageName}#' test_results-${JOB_NAME}.html"
-                                archiveArtifacts "test_results-${JOB_NAME}.html"
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    post {
-        always {
-            script {
-                if (params.pipelineZipsConsoleLog != null) {
-                    if (params.pipelineZipsConsoleLog) {
-                        echo "Archiving Jenkins console log"
-                        sh "wget --no-check-certificate --no-proxy ${env.JENKINS_URL}/job/${env.JOB_NAME}/${env.BUILD_ID}/consoleText -O consoleText.log || true"
-                        sh "zip -m consoleText.log.${env.BUILD_ID}.zip consoleText.log || true"
-                        if(fileExists("consoleText.log.${env.BUILD_ID}.zip")) {
-                            archiveArtifacts "consoleText.log.${env.BUILD_ID}.zip"
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/ci-scripts/Jenkinsfile-tmp-multi-enb-nsa b/ci-scripts/Jenkinsfile-tmp-multi-enb-nsa
index 6e987a5e38547155cd37ff4d52d27e40dc2f0a60..1d977f00d35c809100b8a665a0ee9bcfc59fbdff 100644
--- a/ci-scripts/Jenkinsfile-tmp-multi-enb-nsa
+++ b/ci-scripts/Jenkinsfile-tmp-multi-enb-nsa
@@ -34,10 +34,9 @@ def buildStageStatus = true
 // Name of the test stage
 def testStageName = params.pipelineTestStageName
 
-// Name of the phone/server resource
-def ciSmartPhonesResource1 = params.SmartPhonesResource1
-def ciSmartPhonesResource2 = params.SmartPhonesResource2
-def ciSmartPhonesResource3 = params.SmartPhonesResource3
+def lockResources = []
+if (params.LockResources != null && params.LockResources.trim().length() > 0)
+  params.LockResources.trim().split(",").each{lockResources += [resource: it.trim()]}
 
 // Global Parameters. Normally they should be populated when the master job
 // triggers the slave job with parameters
@@ -56,7 +55,7 @@ pipeline {
   options {
     disableConcurrentBuilds()
     ansiColor('xterm')
-    lock(extra: [[resource: ciSmartPhonesResource2],[resource: ciSmartPhonesResource1]],resource: ciSmartPhonesResource3)
+    lock(extra: lockResources)
   }
   stages {
     stage("Build Init") {
@@ -82,15 +81,10 @@ pipeline {
             testStageName = 'Template Test Stage'
           }
 
-          if (params.SmartPhonesResource1 == null) {
+          if (params.LockResources == null) {
+            echo "no LockResources given"
             allParametersPresent = false
           }
-          if (params.SmartPhonesResource2 == null) {
-            allParametersPresent = false
-          }
-          if (params.SmartPhonesResource3 == null) {
-            allParametersPresent = false
-          }          
           // 1st eNB parameters
           if (params.eNB_IPAddress == null) {
             allParametersPresent = false
diff --git a/ci-scripts/Jenkinsfile-tmp-ran b/ci-scripts/Jenkinsfile-tmp-ran
index a0d0f13871ac21770dd9f971d6e1c28c989aecd0..271b9d042a545515f5da88d5c31f8ef2c45d41fc 100644
--- a/ci-scripts/Jenkinsfile-tmp-ran
+++ b/ci-scripts/Jenkinsfile-tmp-ran
@@ -33,11 +33,9 @@ def buildStageStatus = true
 // Name of the test stage
 def testStageName = params.pipelineTestStageName
 
-// Name of the phone resource
-def ciSmartPhoneResource = params.smartphonesResource
-
-// Name of the phone resource
-def ciEpcResource = params.epcResource
+def lockResources = []
+if (params.LockResources != null && params.LockResources.trim().length() > 0)
+  params.LockResources.trim().split(",").each{lockResources += [resource: it.trim()]}
 
 // Terminate Status
 def termUE = 0
@@ -67,7 +65,7 @@ pipeline {
     options {
         disableConcurrentBuilds()
         ansiColor('xterm')
-        lock(extra: [[resource: ciEpcResource]], resource: ciSmartPhoneResource)
+        lock(extra: lockResources)
     }
     // the following parameter options are commented out so it shows the ones
     // that you SHALL have to run the job.
@@ -79,7 +77,7 @@ pipeline {
         string(name: 'pythonTestXmlFile', defaultValue: 'enb_usrpB210_band7_50PRB.xml', description: 'Location of the Test XML to be run')
         string(name: 'pipelineTestStageName', defaultValue: 'Test COTS-UE - OAI eNB - LTEBOX EPC', description: 'Naming of the Test Stage')
         booleanParam(name: 'pipelineZipsConsoleLog', defaultValue: 'True', description: 'If true, the pipeline script retrieves the job console log, zips it and archives it as artifact')
-        string(name: 'smartphonesResource', defaultValue: 'CI-Bench-1-Phones', description: 'Lockeable Resource to prevent multiple jobs to run simultaneously with the same resource')
+        string(name: 'lockResources', defaultValue: 'CI-Bench-1-Phones', description: 'Lockeable Resource to prevent multiple jobs to run simultaneously with the same resource')
 
         //eNB parameters
         string(name: 'eNB_IPAddress', defaultValue: '192.168.XX.XX', description: 'IP Address of eNB')
@@ -115,8 +113,9 @@ pipeline {
                         testStageName = 'Template Test Stage'
                     }
 
-                    if (params.smartphonesResource == null) {
-                        allParametersPresent = false
+                    if (params.LockResources == null) {
+                      echo "no LockResources given"
+                      allParametersPresent = false
                     }
                     if (params.eNB_IPAddress == null) {
                         allParametersPresent = false
diff --git a/ci-scripts/Jenkinsfile-tmp-ue b/ci-scripts/Jenkinsfile-tmp-ue
index e5e456cf7fd1ebb4efeaec07e8ad9a0c4dcd9b21..5a1e28d9f9cd8ae6f66a5c69ba60372f1878b458 100644
--- a/ci-scripts/Jenkinsfile-tmp-ue
+++ b/ci-scripts/Jenkinsfile-tmp-ue
@@ -33,8 +33,9 @@ def buildStageStatus = true
 // Name of the test stage
 def testStageName = params.pipelineTestStageName
 
-// Name of the phone resource
-def ciSmartPhoneResource = params.smartphonesResource
+def lockResources = []
+if (params.LockResources != null && params.LockResources.trim().length() > 0)
+  params.LockResources.trim().split(",").each{lockResources += [resource: it.trim()]}
 
 // Terminate Status
 def termUE = 0
@@ -64,7 +65,7 @@ pipeline {
     options {
         disableConcurrentBuilds()
         ansiColor('xterm')
-        lock (ciSmartPhoneResource)
+        lock(extra: lockResources)
     }
     stages {
         stage ("Verify Parameters") {
@@ -83,8 +84,9 @@ pipeline {
                         testStageName = 'Template Test Stage'
                     }
 
-                    if (params.smartphonesResource == null) {
-                        allParametersPresent = false
+                    if (params.LockResources == null) {
+                      echo "no LockResources given"
+                      allParametersPresent = false
                     }
                     if (params.UE_IPAddress == null) {
                         allParametersPresent = false
diff --git a/ci-scripts/checkGitLabMergeRequestLabels.sh b/ci-scripts/checkGitLabMergeRequestLabels.sh
index 7e4ea1c7de51f2c8b07865d9274401476493e77e..41220ab770015b42f4b2a60a79fe217e1e9a47be 100755
--- a/ci-scripts/checkGitLabMergeRequestLabels.sh
+++ b/ci-scripts/checkGitLabMergeRequestLabels.sh
@@ -73,36 +73,50 @@ done
 
 LABELS=`curl --silent "https://gitlab.eurecom.fr/api/v4/projects/oai%2Fopenairinterface5g/merge_requests/$MERGE_REQUEST_ID" | jq '.labels' || true`
 
-IS_MR_BUILD_ONLY=`echo $LABELS | grep -c BUILD-ONLY || true`
-IS_MR_CI=`echo $LABELS | grep -c CI || true`
-IS_MR_4G=`echo $LABELS | grep -c 4G-LTE || true`
-IS_MR_5G=`echo $LABELS | grep -c 5G-NR || true`
+IS_MR_DOCUMENTATION=`echo $LABELS | grep -ic documentation`
+IS_MR_BUILD_ONLY=`echo $LABELS | grep -c BUILD-ONLY`
+IS_MR_CI=`echo $LABELS | grep -c CI`
+IS_MR_4G=`echo $LABELS | grep -c 4G-LTE`
+IS_MR_5G=`echo $LABELS | grep -c 5G-NR`
 
-# First case: none is present! No CI
-if [ $IS_MR_BUILD_ONLY -eq 0 ] && [ $IS_MR_CI -eq 0 ] && [ $IS_MR_4G -eq 0 ] && [ $IS_MR_5G -eq 0 ]
+# none is present! No CI
+if [ $IS_MR_BUILD_ONLY -eq 0 ] && [ $IS_MR_CI -eq 0 ] && [ $IS_MR_4G -eq 0 ] && [ $IS_MR_5G -eq 0 ] && [ $IS_MR_DOCUMENTATION -eq 0 ]
 then
     echo "NONE"
     exit 0
 fi
 
-# Second case: Build-Only
-if [ $IS_MR_BUILD_ONLY -eq 1 ]
+# 4G and 5G or CI labels: run everything (4G, 5G)
+if [ $IS_MR_4G -eq 1 ] && [ $IS_MR_5G -eq 1 ] || [ $IS_MR_CI -eq 1 ]
 then
-    echo "BUILD-ONLY"
+    echo "FULL"
     exit 0
 fi
 
-# Third case: CI or 4G label --> Full CI run
-if [ $IS_MR_4G -eq 1 ] || [ $IS_MR_CI -eq 1 ] 
+# 4G is present: run only 4G
+if [ $IS_MR_4G -eq 1 ]
 then
-    echo "FULL"
-    exit 0
+    echo "SHORTEN-4G"
+    exit 1
 fi
 
-# Fourth case: 5G label
-if [ $IS_MR_BUILD_ONLY -eq 0 ] && [ $IS_MR_CI -eq 0 ] && [ $IS_MR_4G -eq 0 ] && [ $IS_MR_5G -eq 1 ]
+# 5G is present: run only 5G
+if [ $IS_MR_5G -eq 1 ]
 then
     echo "SHORTEN-5G"
     exit 0
 fi
 
+# BUILD-ONLY is present: only build stages
+if [ $IS_MR_BUILD_ONLY -eq 1 ]
+then
+    echo "BUILD-ONLY"
+    exit 0
+fi
+
+# Documentation is present: don't do anything
+if [ $IS_MR_DOCUMENTATION -eq 1 ]
+then
+    echo "documentation"
+    exit 1
+fi
diff --git a/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf b/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf
index 2911e0f515e62047c862f237b9ddeb9df8915ed5..d9e4ed0f3add783394e5834b642f9a3466af256b 100644
--- a/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf
+++ b/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf
@@ -15,7 +15,7 @@ gNBs =
     // Tracking area code, 0x0000 and 0xfffe are reserved values
     tracking_area_code  =  1;
 
-    plmn_list = ({ mcc = 208; mnc = 97; mnc_length = 2; snssaiList = ({ sst = 1 }) });
+    plmn_list = ({ mcc = 208; mnc = 97; mnc_length = 2; snssaiList = ({ sst = 1, sd = 0x010203 }) });
 
     nr_cellid = 12345678L
 
diff --git a/ci-scripts/datalog_rt_stats.1x1.60.yaml b/ci-scripts/datalog_rt_stats.1x1.60.yaml
index 264b38775a966ed745eb27d1940211a8e3e62c57..492a3bd8bf7aefb78e1241dc2d8ef28a5dbc4c95 100644
--- a/ci-scripts/datalog_rt_stats.1x1.60.yaml
+++ b/ci-scripts/datalog_rt_stats.1x1.60.yaml
@@ -13,12 +13,12 @@ Ref :
   feptx_total : 59.0
   L1 Tx processing : 210.0
   DLSCH encoding : 129.0
-  L1 Rx processing : 287.0
+  L1 Rx processing : 359.0
   PUSCH inner-receiver : 166.0
-  PUSCH decoding : 176.0
-  Schedule Response : 42.0
+  PUSCH decoding : 270.0
+  Schedule Response : 52.0
   DL & UL scheduling timing : 13.0
-  UL Indication : 55.0
+  UL Indication : 65.0
 Threshold :
   feprx : 1.25
   feptx_prec : 1.25
diff --git a/ci-scripts/datalog_rt_stats.default.yaml b/ci-scripts/datalog_rt_stats.default.yaml
index cddb26d5312498e0b8f765e8614263b7a2fd9284..079da93fab3c92a071e60cc0ea3f42d69ed0160b 100644
--- a/ci-scripts/datalog_rt_stats.default.yaml
+++ b/ci-scripts/datalog_rt_stats.default.yaml
@@ -13,12 +13,12 @@ Ref :
   feptx_total : 62.0
   L1 Tx processing : 170.0
   DLSCH encoding : 118.0
-  L1 Rx processing : 223.0
+  L1 Rx processing : 275.0
   PUSCH inner-receiver : 107.0
-  PUSCH decoding : 170.0
-  Schedule Response : 15.0
+  PUSCH decoding : 246.0
+  Schedule Response : 22.0
   DL & UL scheduling timing : 10.0
-  UL Indication : 27.0
+  UL Indication : 34.0
 Threshold :
   feprx : 1.25
   feptx_prec : 1.25
diff --git a/ci-scripts/main.py b/ci-scripts/main.py
index b5d0ef2d97e79bc67796fb9a05f859ab23712d03..eb9e60a83e8161e98f2c6343af343b87152559ee 100644
--- a/ci-scripts/main.py
+++ b/ci-scripts/main.py
@@ -414,10 +414,10 @@ def GetParametersFromXML(action):
 			CONTAINERS.nb_healthy[0] = int(string_field)
 		string_field=test.findtext('d_retx_th')
 		if (string_field is not None):
-			CONTAINERS.ran_checkers['d_retx_th']= string_field
+			CONTAINERS.ran_checkers['d_retx_th'] = [float(x) for x in string_field.split(',')]
 		string_field=test.findtext('u_retx_th')
 		if (string_field is not None):
-			CONTAINERS.ran_checkers['u_retx_th']= string_field
+			CONTAINERS.ran_checkers['u_retx_th'] = [float(x) for x in string_field.split(',')]
 
 	elif action == 'PingFromContainer':
 		string_field = test.findtext('container_name')
diff --git a/ci-scripts/oai-ci-vm-tool b/ci-scripts/oai-ci-vm-tool
index 73d95aadb9ee28f899d8968e4ea7bceda6a39b56..7fec088c1a959f1b69847454673bdb516d6acaf2 100755
--- a/ci-scripts/oai-ci-vm-tool
+++ b/ci-scripts/oai-ci-vm-tool
@@ -177,7 +177,7 @@ function variant__v1__enb_usrp {
 
 function variant__v2__basic_sim {
     NB_PATTERN_FILES=11
-    BUILD_OPTIONS="--eNB --UE"
+    BUILD_OPTIONS="--eNB --UE --noavx512"
     VM_MEMORY=8192
     RUN_OPTIONS="complex"
 }
@@ -201,28 +201,28 @@ function variant__v5__gnb_usrp {
     VM_MEMORY=10240
     VM_CPU=8
     NB_PATTERN_FILES=6
-    BUILD_OPTIONS="--gNB -w USRP"
+    BUILD_OPTIONS="--gNB -w USRP --noavx512"
 }
 
 function variant__v6__nr_ue_usrp {
     VM_MEMORY=4096
     VM_CPU=4
     NB_PATTERN_FILES=6
-    BUILD_OPTIONS="--nrUE -w USRP"
+    BUILD_OPTIONS="--nrUE -w USRP --noavx512"
 }
 
 function variant__v7__enb_ethernet {
     VM_MEMORY=4096
     ARCHIVES_LOC=enb_eth
     NB_PATTERN_FILES=6
-    BUILD_OPTIONS="--eNB -w USRP"
+    BUILD_OPTIONS="--eNB -w USRP --noavx512"
 }
 
 function variant__v8__ue_ethernet {
     VM_MEMORY=4096
     ARCHIVES_LOC=ue_eth
     NB_PATTERN_FILES=10
-    BUILD_OPTIONS="--UE -w USRP"
+    BUILD_OPTIONS="--UE -w USRP --noavx512"
 }
 
 function variant__v10__flexran_rtc {
diff --git a/ci-scripts/ran.py b/ci-scripts/ran.py
index b9e7eb476efa5035be86df4fc8bc682217371c75..1d8186264751e0092b63fc334103d5cbc79ad72a 100644
--- a/ci-scripts/ran.py
+++ b/ci-scripts/ran.py
@@ -975,29 +975,24 @@ class RANManagement():
 				if result is not None:
 					mbmsRequestMsg += 1
 			#FR1 NSA test : add new markers to make sure gNB is used
-			result = re.search('\[gNB [0-9]+\]\[RAPROC\] PUSCH with TC_RNTI 0x[0-9a-fA-F]+ received correctly, adding UE MAC Context UE_id [0-9]+\/RNTI 0x[0-9a-fA-F]+', str(line))
+			result = re.search('\[gNB [0-9]+\]\[RAPROC\] PUSCH with TC_RNTI 0x[0-9a-fA-F]+ received correctly, adding UE MAC Context RNTI 0x[0-9a-fA-F]+', str(line))
 			if result is not None:
 				NSA_RAPROC_PUSCH_check = 1
-			#dlsch and ulsch statistics
-			#keys below are the markers we are loooking for, loop over this keys list
-			#everytime these markers are found in the log file, the previous ones are overwritten in the dict
-			#eventually we record and print only the last occurence 
-			keys = {'UE ID','dlsch_rounds','dlsch_total_bytes','ulsch_rounds','ulsch_total_bytes_scheduled'}
+
+			# Collect information on UE DLSCH and ULSCH statistics
+			keys = {'dlsch_rounds','dlsch_total_bytes','ulsch_rounds','ulsch_total_bytes_scheduled'}
 			for k in keys:
 				result = re.search(k, line)
-				if result is not None:
-					ue_prefix = 'ue0'
-					ue_res = re.search('UE ID 1|UE 1:', line)
-					if ue_res is not None:
-						ue_prefix = 'ue1'
-					ue_res = re.search('UE ID 2|UE 2:', line)
-					if ue_res is not None:
-						ue_prefix = 'ue2'
-					ue_res = re.search('UE ID 3|UE 3:', line)
-					if ue_res is not None:
-						ue_prefix = 'ue3'
-					#remove 1- all useless char before relevant info (ulsch or dlsch) 2- trailing char
-					dlsch_ulsch_stats[ue_prefix+k]=re.sub(r'^.*\]\s+', r'' , line.rstrip())
+				if result is None:
+					continue
+				result = re.search('UE (?:RNTI )?([0-9a-f]{4})', line)
+				if result is None:
+					logging.error(f'did not find RNTI while matching key {k}')
+					continue
+				rnti = result.group(1)
+
+				#remove 1- all useless char before relevant info (ulsch or dlsch) 2- trailing char
+				dlsch_ulsch_stats[rnti+k]=re.sub(r'^.*\]\s+', r'' , line.rstrip())
 
 			result = re.search('Received NR_RRCReconfigurationComplete from UE', str(line))
 			if result is not None:
@@ -1155,12 +1150,10 @@ class RANManagement():
 			#checker
 			if (len(dlsch_ulsch_stats)!=0) and (len(checkers)!=0):
 				if 'd_retx_th' in checkers:
-					checkers['d_retx_th'] = [float(x) for x in checkers['d_retx_th'].split(',')]
 					dlsch_checker_status = list(0 for i in checkers['d_retx_th'])#status 0 / -1
 					d_perc_retx = list(0 for i in checkers['d_retx_th'])#results in %
 
 				if 'u_retx_th' in checkers:
-					checkers['u_retx_th'] = [float(x) for x in checkers['u_retx_th'].split(',')]
 					ulsch_checker_status = list(0 for i in checkers['u_retx_th'])
 					u_perc_retx = list(0 for i in checkers['u_retx_th'])
 
diff --git a/ci-scripts/xml_files/fr1_sa_amarisoft_ue_1x.xml b/ci-scripts/xml_files/fr1_sa_amarisoft_ue_1x.xml
index ea031a0174ef631c6a063fc9b276b133ec9d3a9e..c5bbf97a1eab1cb23411e271e68ae9d662128563 100644
--- a/ci-scripts/xml_files/fr1_sa_amarisoft_ue_1x.xml
+++ b/ci-scripts/xml_files/fr1_sa_amarisoft_ue_1x.xml
@@ -45,7 +45,7 @@
 	<testCase id="040000">
 		<class>Initialize_eNB</class>
 		<desc>Initialize gNB</desc>
-		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.asue.conf --sa -q --usrp-tx-thread-config 1 --T_stdout 2 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
+		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.asue.conf --sa -q --usrp-tx-thread-config 1 --T_stdout 2 --tune-offset 30000000 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
 		<eNB_instance>0</eNB_instance>
 		<eNB_serverId>0</eNB_serverId>
 		<air_interface>nr</air_interface>
diff --git a/ci-scripts/xml_files/fr1_sa_quectel.xml b/ci-scripts/xml_files/fr1_sa_quectel.xml
index fd983d8835b15c63e1d64a1c2113e893334eb559..9f2661f5c68e87749a555db564c56c579050f11f 100644
--- a/ci-scripts/xml_files/fr1_sa_quectel.xml
+++ b/ci-scripts/xml_files/fr1_sa_quectel.xml
@@ -64,7 +64,7 @@
 	<testCase id="040000">
 		<class>Initialize_eNB</class>
 		<desc>Initialize gNB</desc>
-		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.ddsuu.2x2.usrpn310.conf --sa -q --usrp-tx-thread-config 1 --T_stdout 2 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
+		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.ddsuu.2x2.usrpn310.conf --sa -q --usrp-tx-thread-config 1 --T_stdout 2 --tune-offset 30000000 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
 		<eNB_instance>0</eNB_instance>
 		<eNB_serverId>0</eNB_serverId>
 		<air_interface>nr</air_interface>
diff --git a/ci-scripts/xml_files/fr1_sa_quectel_162prb.xml b/ci-scripts/xml_files/fr1_sa_quectel_162prb.xml
index f426bd6035416ac5aa7ae32feb663710d5b940db..f43347d7b677d6abc7095e47f765bc677f4951b4 100644
--- a/ci-scripts/xml_files/fr1_sa_quectel_162prb.xml
+++ b/ci-scripts/xml_files/fr1_sa_quectel_162prb.xml
@@ -62,7 +62,7 @@
 	<testCase id="042000">
 		<class>Initialize_eNB</class>
 		<desc>Initialize gNB</desc>
-		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.162PRB.2x2.usrpn310.conf --sa -q --usrp-tx-thread-config 1 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
+		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.162PRB.2x2.usrpn310.conf --sa -q --usrp-tx-thread-config 1 --tune-offset 30000000 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
 		<eNB_instance>0</eNB_instance>
 		<eNB_serverId>0</eNB_serverId>
 		<air_interface>nr</air_interface>
diff --git a/ci-scripts/xml_files/fr1_sa_quectel_stages.xml b/ci-scripts/xml_files/fr1_sa_quectel_stages.xml
index 48a9b60e7dbe6512245e1627ab952323c70965cb..f5a07d7589995bad82e0d11b1742edc22f25935d 100644
--- a/ci-scripts/xml_files/fr1_sa_quectel_stages.xml
+++ b/ci-scripts/xml_files/fr1_sa_quectel_stages.xml
@@ -68,7 +68,7 @@
 	<testCase id="041000">
 		<class>Initialize_eNB</class>
 		<desc>Initialize gNB</desc>
-		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf --sa -q --usrp-tx-thread-config 1 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
+		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf --sa -q --usrp-tx-thread-config 1 --tune-offset 30000000 --log_config.global_log_options level,nocolor,time</Initialize_eNB_args>
 		<eNB_instance>0</eNB_instance>
 		<eNB_serverId>0</eNB_serverId>
 		<air_interface>nr</air_interface>
diff --git a/ci-scripts/xml_files/gnb_usrp_build.xml b/ci-scripts/xml_files/gnb_usrp_build.xml
index f8ec79764639342c525431c55bf04099b264de68..a2b14f740c5ffda9d5d8e1a0d6a3ca33f68f4fba 100644
--- a/ci-scripts/xml_files/gnb_usrp_build.xml
+++ b/ci-scripts/xml_files/gnb_usrp_build.xml
@@ -34,7 +34,7 @@
 		<mode>TesteNB</mode>	
 		<class>Build_eNB</class>
 		<desc>Build gNB (USRP)</desc>
-		<Build_eNB_args>--gNB -w USRP --ninja --cmake-opt -DBoost_INCLUDE_DIR=/usr/include/boost169 --noavx512</Build_eNB_args>
+		<Build_eNB_args>--gNB -w USRP --ninja --cmake-opt -DBoost_INCLUDE_DIR=/usr/include/boost169 --cmake-opt -DCUDA_HOST_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/gcc -c -P</Build_eNB_args>
 		<forced_workspace_cleanup>True</forced_workspace_cleanup>
 	</testCase>
 
diff --git a/ci-scripts/yaml_files/5g_l2sim_tdd/README.md b/ci-scripts/yaml_files/5g_l2sim_tdd/README.md
index c3f631cf7682620a603ab2dba67ef0476bb43e5f..9578d037ca1dba23901000979a2bba77ad75eb6c 100644
--- a/ci-scripts/yaml_files/5g_l2sim_tdd/README.md
+++ b/ci-scripts/yaml_files/5g_l2sim_tdd/README.md
@@ -33,10 +33,10 @@ Now pull images.
 
 ```bash
 $ docker pull mysql:5.7
-$ docker pull oaisoftwarealliance/oai-amf:latest
-$ docker pull oaisoftwarealliance/oai-nrf:latest
-$ docker pull oaisoftwarealliance/oai-smf:latest
-$ docker pull oaisoftwarealliance/oai-spgwu-tiny:latest
+$ docker pull oaisoftwarealliance/oai-amf:develop
+$ docker pull oaisoftwarealliance/oai-nrf:develop
+$ docker pull oaisoftwarealliance/oai-smf:develop
+$ docker pull oaisoftwarealliance/oai-spgwu-tiny:develop
 
 $ docker pull oaisoftwarealliance/oai-gnb:develop
 $ docker pull oaisoftwarealliance/oai-nr-ue:develop
@@ -46,10 +46,10 @@ $ docker pull oaisoftwarealliance/proxy:latest
 And **re-tag** them for tutorials' docker-compose file to work.
 
 ```bash
-$ docker image tag oaisoftwarealliance/oai-amf:latest oai-amf:latest
-$ docker image tag oaisoftwarealliance/oai-nrf:latest oai-nrf:latest
-$ docker image tag oaisoftwarealliance/oai-smf:latest oai-smf:latest
-$ docker image tag oaisoftwarealliance/oai-spgwu-tiny:latest oai-spgwu-tiny:latest
+$ docker image tag oaisoftwarealliance/oai-amf:latest oai-amf:august-dev
+$ docker image tag oaisoftwarealliance/oai-nrf:latest oai-nrf:august-dev
+$ docker image tag oaisoftwarealliance/oai-smf:latest oai-smf:august-dev
+$ docker image tag oaisoftwarealliance/oai-spgwu-tiny:latest oai-spgwu-tiny:august-dev
 
 $ docker image tag oaisoftwarealliance/oai-gnb:develop oai-gnb:develop
 $ docker image tag oaisoftwarealliance/oai-nr-ue:develop oai-nr-ue:develop
diff --git a/ci-scripts/yaml_files/5g_rfsimulator/README.md b/ci-scripts/yaml_files/5g_rfsimulator/README.md
index 36169dca8b57c5328891fde71e0d0d6906cd0d8d..f457d4c3f65a97616de57b7ce3570953ee3a8ba9 100644
--- a/ci-scripts/yaml_files/5g_rfsimulator/README.md
+++ b/ci-scripts/yaml_files/5g_rfsimulator/README.md
@@ -50,10 +50,10 @@ Now pull images.
 
 ```bash
 $ docker pull mysql:5.7
-$ docker pull oaisoftwarealliance/oai-amf:latest
-$ docker pull oaisoftwarealliance/oai-nrf:latest
-$ docker pull oaisoftwarealliance/oai-smf:latest
-$ docker pull oaisoftwarealliance/oai-spgwu-tiny:latest
+$ docker pull oaisoftwarealliance/oai-amf:develop
+$ docker pull oaisoftwarealliance/oai-nrf:develop
+$ docker pull oaisoftwarealliance/oai-smf:develop
+$ docker pull oaisoftwarealliance/oai-spgwu-tiny:develop
 
 $ docker pull oaisoftwarealliance/oai-gnb:develop
 $ docker pull oaisoftwarealliance/oai-nr-ue:develop
@@ -62,10 +62,10 @@ $ docker pull oaisoftwarealliance/oai-nr-ue:develop
 And **re-tag** them for tutorials' docker-compose file to work.
 
 ```bash
-$ docker image tag oaisoftwarealliance/oai-amf:latest oai-amf:latest
-$ docker image tag oaisoftwarealliance/oai-nrf:latest oai-nrf:latest
-$ docker image tag oaisoftwarealliance/oai-smf:latest oai-smf:latest
-$ docker image tag oaisoftwarealliance/oai-spgwu-tiny:latest oai-spgwu-tiny:latest
+$ docker image tag oaisoftwarealliance/oai-amf:latest oai-amf:august-dev
+$ docker image tag oaisoftwarealliance/oai-nrf:latest oai-nrf:august-dev
+$ docker image tag oaisoftwarealliance/oai-smf:latest oai-smf:august-dev
+$ docker image tag oaisoftwarealliance/oai-spgwu-tiny:latest oai-spgwu-tiny:august-dev
 
 $ docker image tag oaisoftwarealliance/oai-gnb:develop oai-gnb:develop
 $ docker image tag oaisoftwarealliance/oai-nr-ue:develop oai-nr-ue:develop
diff --git a/cmake_targets/autotests/test_case_list.xml b/cmake_targets/autotests/test_case_list.xml
index 88654ac9b54dc53054038be6f5336141396502f4..8158cd4e9ec3b58793d9f48f590e6bfc96754a16 100755
--- a/cmake_targets/autotests/test_case_list.xml
+++ b/cmake_targets/autotests/test_case_list.xml
@@ -7936,7 +7936,7 @@ c
     <eNB_pre_exec>$OPENAIR_DIR/cmake_targets/autotests/tools/free_mem.bash</eNB_pre_exec>
     <eNB_pre_exec_args></eNB_pre_exec_args>
     <eNB_main_exec>sudo -E chrt -f 99 $OPENAIR_DIR/cmake_targets/ran_build/build/lte-softmodem</eNB_main_exec>
-    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf --rf-config-file $OPENAIR_DIR/targets/ARCH/LMSSDR/enb_sodera_highband_5MHz_rx19dB_txfull.ini   </eNB_main_exec_args>
+    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf --rf-config-file $OPENAIR_DIR/sdr/LMSSDR/enb_sodera_highband_5MHz_rx19dB_txfull.ini   </eNB_main_exec_args>
     <eNB_traffic_exec></eNB_traffic_exec>
     <eNB_traffic_exec_args></eNB_traffic_exec_args>
     <eNB_search_expr_true></eNB_search_expr_true>
@@ -8012,7 +8012,7 @@ c
     <eNB_pre_exec>$OPENAIR_DIR/cmake_targets/autotests/tools/free_mem.bash</eNB_pre_exec>
     <eNB_pre_exec_args></eNB_pre_exec_args>
     <eNB_main_exec>sudo -E chrt -f 99 $OPENAIR_DIR/cmake_targets/ran_build/build/lte-softmodem</eNB_main_exec>
-    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf  --rf-config-file    $OPENAIR_DIR/targets/ARCH/LMSSDR/enb_sodera_highband_10MHz_rx19dB_txfull.ini   </eNB_main_exec_args>
+    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf  --rf-config-file    $OPENAIR_DIR/sdr/LMSSDR/enb_sodera_highband_10MHz_rx19dB_txfull.ini   </eNB_main_exec_args>
     <eNB_traffic_exec></eNB_traffic_exec>
     <eNB_traffic_exec_args></eNB_traffic_exec_args>
     <eNB_search_expr_true></eNB_search_expr_true>
@@ -8087,7 +8087,7 @@ c
     <eNB_pre_exec>$OPENAIR_DIR/cmake_targets/autotests/tools/free_mem.bash</eNB_pre_exec>
     <eNB_pre_exec_args></eNB_pre_exec_args>
     <eNB_main_exec>sudo -E chrt -f 99 $OPENAIR_DIR/cmake_targets/ran_build/build/lte-softmodem</eNB_main_exec>
-    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf     --rf-config-file $OPENAIR_DIR/targets/ARCH/LMSSDR/enb_sodera_highband_20MHz_rx19dB_txfull.ini </eNB_main_exec_args>
+    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf     --rf-config-file $OPENAIR_DIR/sdr/LMSSDR/enb_sodera_highband_20MHz_rx19dB_txfull.ini </eNB_main_exec_args>
     <eNB_traffic_exec></eNB_traffic_exec>
     <eNB_traffic_exec_args></eNB_traffic_exec_args>
     <eNB_search_expr_true></eNB_search_expr_true>
@@ -8162,7 +8162,7 @@ c
     <eNB_pre_exec>$OPENAIR_DIR/cmake_targets/autotests/tools/free_mem.bash</eNB_pre_exec>
     <eNB_pre_exec_args></eNB_pre_exec_args>
     <eNB_main_exec>sudo -E chrt -f 99 $OPENAIR_DIR/cmake_targets/ran_build/build/lte-softmodem</eNB_main_exec>
-    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf    --rf-config-file $OPENAIR_DIR/targets/ARCH/LMSSDR/enb_sodera_highband_5MHz_rx19dB_txfull.ini </eNB_main_exec_args>
+    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf    --rf-config-file $OPENAIR_DIR/sdr/LMSSDR/enb_sodera_highband_5MHz_rx19dB_txfull.ini </eNB_main_exec_args>
     <eNB_traffic_exec></eNB_traffic_exec>
     <eNB_traffic_exec_args></eNB_traffic_exec_args>
     <eNB_search_expr_true></eNB_search_expr_true>
@@ -8236,7 +8236,7 @@ c
     <eNB_pre_exec>$OPENAIR_DIR/cmake_targets/autotests/tools/free_mem.bash</eNB_pre_exec>
     <eNB_pre_exec_args></eNB_pre_exec_args>
     <eNB_main_exec>sudo -E chrt -f 99 $OPENAIR_DIR/cmake_targets/ran_build/build/lte-softmodem</eNB_main_exec>
-    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf    --rf-config-file $OPENAIR_DIR/targets/ARCH/LMSSDR/enb_sodera_highband_10MHz_rx19dB_txfull.ini</eNB_main_exec_args>
+    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf    --rf-config-file $OPENAIR_DIR/sdr/LMSSDR/enb_sodera_highband_10MHz_rx19dB_txfull.ini</eNB_main_exec_args>
     <eNB_traffic_exec></eNB_traffic_exec>
     <eNB_traffic_exec_args></eNB_traffic_exec_args>
     <eNB_search_expr_true></eNB_search_expr_true>
@@ -8310,7 +8310,7 @@ c
     <eNB_pre_exec>$OPENAIR_DIR/cmake_targets/autotests/tools/free_mem.bash</eNB_pre_exec>
     <eNB_pre_exec_args></eNB_pre_exec_args>
     <eNB_main_exec>sudo -E chrt -f 99 $OPENAIR_DIR/cmake_targets/ran_build/build/lte-softmodem</eNB_main_exec>
-    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf     --rf-config-file $OPENAIR_DIR/targets/ARCH/LMSSDR/enb_sodera_highband_20MHz_rx19dB_txfull.ini</eNB_main_exec_args>
+    <eNB_main_exec_args> -O $OPENAIR_DIR/targets/PROJECTS/GENERIC-LTE-EPC/CONF/enb.band7.tm1.lmssdr.conf     --rf-config-file $OPENAIR_DIR/sdr/LMSSDR/enb_sodera_highband_20MHz_rx19dB_txfull.ini</eNB_main_exec_args>
     <eNB_traffic_exec></eNB_traffic_exec>
     <eNB_traffic_exec_args></eNB_traffic_exec_args>
     <eNB_search_expr_true></eNB_search_expr_true>
diff --git a/cmake_targets/build_oai b/cmake_targets/build_oai
index ac15f5866295d9bcde9824719011fedea1730027..104c83de23dd6aaa0a5dab3e9a310f26f81a9cd4 100755
--- a/cmake_targets/build_oai
+++ b/cmake_targets/build_oai
@@ -55,7 +55,8 @@ BUILD_COVERITY_SCAN=0
 DISABLE_HARDWARE_DEPENDENCY="False"
 CMAKE_BUILD_TYPE="RelWithDebInfo"
 CMAKE_CMD="$CMAKE"
-NOAVX512="False"
+AVX512="True"
+AVX2="True"
 BUILD_ECLIPSE=0
 NR="False"
 OPTIONAL_LIBRARIES="telnetsrv enbscope uescope nrscope"
@@ -127,11 +128,9 @@ Options:
 -x | --xforms
    Will compile with software oscilloscope features
 --verbose-ci
-  Compile with verbose instructions in CI Docker env
+   Compile with verbose instructions in CI Docker env
 --verbose-compile
    Shows detailed compilation instructions in makefile
---cflags_processor
-   Manually Add CFLAGS of processor if they are not detected correctly by script. Only add these flags if you know your processor supports them. Example flags: -msse3 -msse4.1 -msse4.2 -mavx2
 --build-doxygen
    Builds doxygen based documentation.
 --build-coverity-scan
@@ -159,10 +158,10 @@ Options:
    Build eclipse project files.
 --build-lib <libraries>
    Build optional shared library, <libraries> can be one or several of $OPTIONAL_LIBRARIES or \"all\"
---usrp-recplay
-   Build for I/Q record-playback modes
 --noavx512
-   Build without AVX512 if it is present on CPU
+   Disable AVX512 intrinsics whatever processor capability is
+--noavx2
+   Disable AVX2 intrinsics if processor supports it or use SIMDE emulation
 -k | --skip-shared-libraries
    Skip build for shared libraries to reduce compilation time when building frequently for debugging purposes
 --ninja
@@ -349,10 +348,6 @@ function main() {
             VERBOSE_COMPILE=1
             echo_info "Will compile with verbose instructions"
             shift;;
-       --cflags_processor)
-            CMAKE_CMD="$CMAKE_CMD -DCFLAGS_PROCESSOR_USER=\"$2\""
-            echo_info "Setting CPU FLAGS from USER to: $2"
-            shift 2;;
        --build-doxygen)
 	        BUILD_DOXYGEN=1
             echo_info "Will build doxygen support"
@@ -430,8 +425,12 @@ function main() {
             fi
             shift 2;;		
         --noavx512)
-            NOAVX512="True"
-            echo_info "Disabling AVX512"
+            AVX512="False"
+            echo_info "Disabling AVX512 instructions"
+            shift 1;;
+        --noavx2)
+            AVX2="False"
+            echo_info "Disabling AVX2 instructions"
             shift 1;;
         -k | --skip-shared-libraries)
             SKIP_SHARED_LIB_FLAG="True"
@@ -614,9 +613,24 @@ function main() {
   cd  $DIR/$BUILD_DIR/build
   if [[ ${#CMAKE_C_FLAGS[@]} > 0 ]]; then CMAKE_CMD="$CMAKE_CMD -DCMAKE_C_FLAGS=\"${CMAKE_C_FLAGS[*]}\""; fi
   if [[ ${#CMAKE_CXX_FLAGS[@]} > 0 ]]; then CMAKE_CMD="$CMAKE_CMD -DCMAKE_CXX_FLAGS=\"${CMAKE_CXX_FLAGS[*]}\""; fi
+
+  # if --noavx512 is not specified the AVX512 equals "True" by default
+  # override AVX512 to "False" if processor does not have avx512
+  # this avoids having to specify --noavx512 systematically
+  if [ -f /proc/cpuinfo ]; then
+      if grep -q avx512 /proc/cpuinfo
+      then
+        echo_info "Processor has avx512 intrinsics"
+      else
+        echo_info "Processor does not have avx512 intrinsics"
+        echo_info "Forcing AVX512 to False"
+	AVX512="False"
+      fi
+  fi
+
   # for historical reasons we build in a subdirectory cmake_targets/XYZ/build,
   # e.g., cmake_targets/ran_build/build, hence the ../../..
-  CMAKE_CMD="$CMAKE_CMD -DNOAVX512=\"${NOAVX512[*]}\" ../../.."
+  CMAKE_CMD="$CMAKE_CMD -DAVX512=\"${AVX512[*]}\" -DAVX2=\"${AVX2[*]}\" ../../.."
   echo_info "running $CMAKE_CMD"
   eval $CMAKE_CMD
   
diff --git a/cmake_targets/tools/build_helper b/cmake_targets/tools/build_helper
index 7ab868fb09728278b4950a3ffcbab1bd0bbc8c10..8aef6224a9a9a204b4ffc9c177a0ce92491c4551 100755
--- a/cmake_targets/tools/build_helper
+++ b/cmake_targets/tools/build_helper
@@ -123,6 +123,7 @@ check_supported_distribution() {
         "rhel8.7")     return 0 ;;
         "rhel9.0")     return 0 ;;
         "centos7")     return 0 ;;
+        "centos8")     return 0 ;;
     esac
     return 1
 }
@@ -898,6 +899,7 @@ check_install_oai_software() {
   fi
 
     install_asn1c_from_source $1
+    install_simde_from_source $1
 }
 
 install_asn1c_from_source(){
@@ -924,6 +926,37 @@ install_asn1c_from_source(){
     ) > $asn1_install_log 2>&1
 }
 
+install_simde_from_source(){
+    echo_info "\nInstalling SIMDE from source without test cases (header files only)"
+    cd /tmp
+    $SUDO rm -rf /tmp/simde
+    git clone https://github.com/simd-everywhere/simde-no-tests.git /tmp/simde
+    cd /tmp/simde
+    # brute force copy into /usr/include
+    $SUDO \cp -rv ../simde /usr/include
+    $SUDO patch /usr/include/simde/x86/avx.h << FIN
+--- /usr/include/simde/x86/avx.h.old	2022-10-03 19:17:39.828223432 +0200
++++ /usr/include/simde/x86/avx.h	2022-10-05 16:19:55.086019445 +0200
+@@ -3636,12 +3636,16 @@
+ SIMDE_FUNCTION_ATTRIBUTES
+ simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8)
+     SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) {
++#if defined(SIMDE_X86_AVX_NATIVE) 
++   return _mm256_insertf128_si256(a, b, imm8);
++#else
+   simde__m256i_private a_ = simde__m256i_to_private(a);
+   simde__m128i_private b_ = simde__m128i_to_private(b);
+ 
+   a_.m128i_private[imm8] = b_;
+ 
+   return simde__m256i_from_private(a_);
++#endif
+ }
+ #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES)
+   #undef _mm256_insertf128_si256
+FIN
+}
+
 #################################################
 # 2. compile
 ################################################
diff --git a/common/utils/DOC/loader/devusage.md b/common/utils/DOC/loader/devusage.md
index bd45c88a85223ce1123877f339c4bfbf19663e94..47c4806eac0148c2ab69c946f94500f46ad2d3fa 100644
--- a/common/utils/DOC/loader/devusage.md
+++ b/common/utils/DOC/loader/devusage.md
@@ -10,7 +10,7 @@ As a developer you may need to look at these sections:
 
 Loader usage examples can be found in oai sources:
 
-*  device and transport initialization code: [function `load_lib` in *targets/ARCH/COMMON/__common_lib.c__* ](https://gitlab.eurecom.fr/oai/openairinterface5g/blob/develop/targets/ARCH/COMMON/common_lib.c#L91)
+*  device and transport initialization code: [function `load_lib` in *sdr/COMMON/__common_lib.c__* ](https://gitlab.eurecom.fr/oai/openairinterface5g/blob/develop/sdr/COMMON/common_lib.c#L91)
 *  turbo encoder and decoder initialization: [function `load_codinglib`in *openair1/PHY/CODING/__coding_load.c__*](https://gitlab.eurecom.fr/oai/openairinterface5g/blob/develop/openair1/PHY/CODING/coding_load.c#L113)
 
 [loader home page](../loader.md)
diff --git a/common/utils/T/T_messages.txt b/common/utils/T/T_messages.txt
index 716c77823d58dc3ec79f787a97534086df09c1df..4d18bea3843c0392e7cb7767094d41846cd24b39 100644
--- a/common/utils/T/T_messages.txt
+++ b/common/utils/T/T_messages.txt
@@ -105,6 +105,10 @@ ID = GNB_PHY_PRACH_INPUT_SIGNAL
     DESC = gNodeB input data in the time domain for slots with PRACH detection
     GROUP = ALL:PHY:GRAPHIC:HEAVY:GNB
     FORMAT = int,frame : int,slot : int,antenna : buffer,rxdata
+ID = GNB_PHY_DL_OUTPUT_SIGNAL
+    DESC = gNodeB output data in the freq domain for slots
+    GROUP = ALL:PHY:GRAPHIC:HEAVY:GNB
+    FORMAT = int,gNB_ID : int,frame : int,slot : int,antenna : buffer,txdata
 
 #MAC logs
 ID = ENB_MAC_UE_DL_SDU
@@ -1305,7 +1309,11 @@ ID = UE_PHY_INPUT_SIGNAL
 ID = UE_PHY_DL_CHANNEL_ESTIMATE
     DESC = UE channel estimation in the time domain
     GROUP = ALL:PHY:GRAPHIC:HEAVY:UE
-    FORMAT = int,eNB_ID : int,frame : int,subframe : int,antenna : buffer,chest_t
+    FORMAT = int,eNB_ID : int,rsc_id : int,frame : int,subframe : int,antenna : buffer,chest_t
+ID = UE_PHY_DL_CHANNEL_ESTIMATE_FREQ
+    DESC = UE channel estimation in the frequency domain
+    GROUP = ALL:PHY:GRAPHIC:HEAVY:UE
+    FORMAT = int,eNB_ID : int,rsc_id : int,frame : int,subframe : int,antenna : buffer,chestF_t
 ID = UE_PHY_PDCCH_IQ
     DESC = UE PDCCH received IQ data
     GROUP = ALL:PHY:GRAPHIC:HEAVY:UE
diff --git a/common/utils/T/tracer/extract_prs_dumps.sh b/common/utils/T/tracer/extract_prs_dumps.sh
new file mode 100755
index 0000000000000000000000000000000000000000..dd56940e4165d1c109225009527e1d32aa37a255
--- /dev/null
+++ b/common/utils/T/tracer/extract_prs_dumps.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+: ${1?"Usage: $0 [-g num_gnb] [-n num_rsc] [-f filename] [-c count]"}
+
+# Usage info
+show_help()
+{
+echo "
+    Usage of script with arguments: [-g num_gnb] [-n num_rsc] [-f filename] [-c count]
+
+    -g num_gnb    Number of active gNBs
+    -n num_rsc    Number of PRS resources
+    -f filename   T tracer recorded .raw filename
+    -c count      Number of dump instances to be extacted
+
+    For Example:  ./extract_prs_dumps.sh -g 1 -n 8 -f fr2_64_prbs.raw -c 100
+
+    -h            Help
+"
+exit 0
+}
+
+while getopts g:n:f:c:h flag
+do
+   case "${flag}" in
+      g) num_gnb=${OPTARG};;
+      n) num_rsc=${OPTARG};;
+      f) file=${OPTARG};;
+      c) count=${OPTARG};;
+      h) show_help;;
+      *) exit -1;;
+   esac
+done
+echo "num_gnb: $num_gnb";
+echo "num_rsc: $num_rsc";
+echo "filename: $file";
+echo "count: $count";
+
+for (( i = 0; i < $num_gnb; i++ ))
+do
+   for (( j = 0; j < $num_rsc; j++ ))
+   do
+      name=chF_gnb${i}_${j}.raw
+      echo "Extracting $name"
+      ./extract -d ../T_messages.txt $file UE_PHY_DL_CHANNEL_ESTIMATE_FREQ chestF_t -f eNB_ID $i -f rsc_id $j -o $name -count $count
+      name=chT_gnb${i}_${j}.raw
+      echo "Extracting $name"
+      ./extract -d ../T_messages.txt $file UE_PHY_DL_CHANNEL_ESTIMATE chest_t -f eNB_ID $i -f rsc_id $j -o $name -count $count
+   done
+done
+
+# zip the extracted dumps
+name=prs_dumps.tgz
+tar cvzf $name chF_gnb* chT_gnb*
+echo "created a zip file $name"
diff --git a/common/utils/nr/nr_common.c b/common/utils/nr/nr_common.c
index 465679d002a145aac27ea070c428a89ba6e35688..5a5ab834206974c99a3928a284e0afe18da2e945 100644
--- a/common/utils/nr/nr_common.c
+++ b/common/utils/nr/nr_common.c
@@ -198,8 +198,8 @@ int cce_to_reg_interleaving(const int R, int k, int n_shift, const int C, int L,
     f = k;
   else {
     int c = k/R;
-     int r = k%R;
-     f = (r*C + c + n_shift)%(N_regs/L);
+    int r = k % R;
+    f = (r * C + c + n_shift) % (N_regs / L);
   }
   return f;
 }
@@ -664,10 +664,26 @@ void get_samplerate_and_bw(int mu,
     }
   } else if (mu == 3) {
     switch(n_rb) {
-      case 66:
+      case 132:
+      case 128:
         if (threequarter_fs) {
           *sample_rate=184.32e6;
           *samples_per_frame = 1843200;
+          *tx_bw = 200e6;
+          *rx_bw = 200e6;
+        } else {
+          *sample_rate = 245.76e6;
+          *samples_per_frame = 2457600;
+          *tx_bw = 200e6;
+          *rx_bw = 200e6;
+        }
+        break;
+
+      case 66:
+      case 64:
+        if (threequarter_fs) {
+          *sample_rate=92.16e6;
+          *samples_per_frame = 921600;
           *tx_bw = 100e6;
           *rx_bw = 100e6;
         } else {
@@ -676,7 +692,6 @@ void get_samplerate_and_bw(int mu,
           *tx_bw = 100e6;
           *rx_bw = 100e6;
         }
-
         break;
 
       case 32:
@@ -691,7 +706,6 @@ void get_samplerate_and_bw(int mu,
           *tx_bw = 50e6;
           *rx_bw = 50e6;
         }
-
         break;
 
       default:
diff --git a/common/utils/telnetsrv/telnetsrv_cpumeasur_def.h b/common/utils/telnetsrv/telnetsrv_cpumeasur_def.h
index 7ba4444abf038139e0d67c3ba1570da5209734c2..5905732df854acca82e65d6c1bb776508d2bcaea 100644
--- a/common/utils/telnetsrv/telnetsrv_cpumeasur_def.h
+++ b/common/utils/telnetsrv/telnetsrv_cpumeasur_def.h
@@ -98,8 +98,8 @@
 /* from openair1/PHY/defs_nr_UE.h */
 #define CPU_PHYNRUE_MEASURE \
 { \
-    {"phy_proc",          &(UE->phy_proc[0]),0,RX_NB_TH},\
-    {"phy_proc_rx",       &(UE-> phy_proc_rx[0]),0,RX_NB_TH},\
+    {"phy_proc",          &(UE->phy_proc),0,1},\
+    {"phy_proc_rx",       &(UE-> phy_proc_rx),0,1},\
     {"phy_proc_tx",       &(UE->phy_proc_tx),0,1},\
     {"ue_ul_indication_stats",       &(UE->ue_ul_indication_stats),0,1},\
     {"ofdm_mod_stats",       &(UE->ofdm_mod_stats),0,1},\
@@ -111,7 +111,7 @@
     {"ulsch_interleaving_stats",       &(UE->ulsch_interleaving_stats),0,1},\
     {"ulsch_multiplexing_stats",       &(UE->ulsch_multiplexing_stats),0,1},\
     {"generic_stat",       &(UE->generic_stat),0,1},\
-    {"generic_stat_bis",       &(UE->generic_stat_bis[0][0]),0,RX_NB_TH,LTE_SLOTS_PER_SUBFRAME},\
+    {"generic_stat_bis",       &(UE->generic_stat_bis[0]),0,LTE_SLOTS_PER_SUBFRAME},\
     {"ofdm_demod_stats",       &(UE->ofdm_demod_stats),0,1},\
     {"dlsch_rx_pdcch_stats",       &(UE->dlsch_rx_pdcch_stats),0,1},\
     {"rx_dft_stats",       &(UE->rx_dft_stats),0,1},\
@@ -139,14 +139,14 @@
     {"dlsch_modulation_SIC_stats",       &(UE->dlsch_modulation_SIC_stats),0,1},\
     {"dlsch...ping_unit_SIC_stats",       &(UE->dlsch_llr_stripping_unit_SIC_stats),0,1},\
     {"dlsch_unscrambling_SIC_stats",       &(UE->dlsch_unscrambling_SIC_stats),0,1},\
-    {"ue_front_end_stat",       &(UE->ue_front_end_stat[0]),0,RX_NB_TH},\
-    {"ue_front_end_per_slot_stat",      &(UE->ue_front_end_per_slot_stat[0][0]),0,RX_NB_TH,LTE_SLOTS_PER_SUBFRAME},\
-    {"pdcch_procedures_stat",       &(UE->pdcch_procedures_stat[0]),0,RX_NB_TH},\
+    {"ue_front_end_stat",       &(UE->ue_front_end_stat),0,1},\
+    {"ue_front_end_per_slot_stat",      &(UE->ue_front_end_per_slot_stat[0]),0,LTE_SLOTS_PER_SUBFRAME},\
+    {"pdcch_procedures_stat",       &(UE->pdcch_procedures_stat),0,1},\
     {"rx_pdsch_stats",              &(UE->rx_pdsch_stats), 0, 1}, \
-    {"pdsch_procedures_stat",       &(UE->pdsch_procedures_stat[0]),0,RX_NB_TH},\
-    {"pdsch_procedures_per_slot_stat",  &(UE->pdsch_procedures_per_slot_stat[0][0]),0,RX_NB_TH,LTE_SLOTS_PER_SUBFRAME},\
-    {"dlsch_procedures_stat",       &(UE->dlsch_procedures_stat[0]),0,RX_NB_TH},\
-    {"dlsch_decoding_stats",       &(UE->dlsch_decoding_stats[0]),0,RX_NB_TH},\
-    {"dlsch_llr_stats_para", &(UE->dlsch_llr_stats_parallelization[0][0]),0,RX_NB_TH,LTE_SLOTS_PER_SUBFRAME},\
+    {"pdsch_procedures_stat",       &(UE->pdsch_procedures_stat),0,1},\
+    {"pdsch_procedures_per_slot_stat",  &(UE->pdsch_procedures_per_slot_stat[0]),0,LTE_SLOTS_PER_SUBFRAME},\
+    {"dlsch_procedures_stat",       &(UE->dlsch_procedures_stat),0,1},\
+    {"dlsch_decoding_stats",       &(UE->dlsch_decoding_stats),0,1},\
+    {"dlsch_llr_stats_para", &(UE->dlsch_llr_stats_parallelization[0]),0,LTE_SLOTS_PER_SUBFRAME},\
 }
 #endif
diff --git a/common/utils/time_meas.h b/common/utils/time_meas.h
index eb81d09434cefab1879c28aad1c286eaee15d45a..943856fa1fdf9de4f86d5a5b939b9af4d6a09337 100644
--- a/common/utils/time_meas.h
+++ b/common/utils/time_meas.h
@@ -37,7 +37,7 @@ extern double cpu_freq_GHz  __attribute__ ((aligned(32)));;
 // structure to store data to compute cpu measurment
 #if defined(__x86_64__) || defined(__i386__)
   typedef long long oai_cputime_t;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   typedef uint32_t oai_cputime_t;
 #else
   #error "building on unsupported CPU architecture"
@@ -107,7 +107,7 @@ static inline unsigned long long rdtsc_oai(void) {
   return (d<<32) | a;
 }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline uint32_t rdtsc_oai(void) __attribute__((always_inline));
 static inline uint32_t rdtsc_oai(void) {
   uint32_t r = 0;
diff --git a/common/utils/utils.h b/common/utils/utils.h
index 5963ae0c8e03b0e9652c5954fc053123078fa90a..af90adc8678ab488fd26d5aa49572e736f788830 100644
--- a/common/utils/utils.h
+++ b/common/utils/utils.h
@@ -34,11 +34,7 @@ extern "C" {
 #endif
 
 #ifndef malloc16
-#  ifdef __AVX2__
 #    define malloc16(x) memalign(32,x+32)
-#  else
-#    define malloc16(x) memalign(16,x+16)
-#  endif
 #endif
 #define free16(y,x) free(y)
 #define bigmalloc malloc
@@ -54,11 +50,7 @@ extern "C" {
   } while (0)
 
 static inline void *malloc16_clear( size_t size ) {
-#ifdef __AVX2__
   void *ptr = memalign(32, size+32);
-#else
-  void *ptr = memalign(16, size+16);
-#endif
   DevAssert(ptr);
   memset( ptr, 0, size );
   return ptr;
@@ -91,11 +83,7 @@ static inline void *malloc_or_fail(size_t size) {
 # define msg(aRGS...) LOG_D(PHY, ##aRGS)
 #endif
 #ifndef malloc16
-#  ifdef __AVX2__
 #    define malloc16(x) memalign(32,x)
-#  else
-#    define malloc16(x) memalign(16,x)
-#  endif
 #endif
 
 #define free16(y,x) free(y)
diff --git a/doc/BUILD.md b/doc/BUILD.md
index c4272140f2fefc29b21c3b02e35cae25e17409c0..dee55599cc50d2e47ea0931f36d66ff82472741c 100644
--- a/doc/BUILD.md
+++ b/doc/BUILD.md
@@ -78,7 +78,7 @@ cd cmake_targets/
 ```
 
 - The `-I` option is to install pre-requisites, you only need it the first time you build the softmodem or when some oai dependencies have changed.
-- The `-w` option is to select the radio head support you want to include in your build. Radio head support is provided via a shared library, which is called the "oai device" The build script creates a soft link from `liboai_device.so` to the true device which will be used at run-time (here the USRP one,`liboai_usrpdevif.so` . USRP is the only hardware tested today in the Continuous Integration process. The RF simulator[RF simulator](../targets/ARCH/rfsimulator/README.md) is implemented as a specific device replacing RF hardware, it can be specifically built using `-w SIMU` option, but is also built during any softmodem build.
+- The `-w` option is to select the radio head support you want to include in your build. Radio head support is provided via a shared library, which is called the "oai device" The build script creates a soft link from `liboai_device.so` to the true device which will be used at run-time (here the USRP one,`liboai_usrpdevif.so` . USRP is the only hardware tested today in the Continuous Integration process. The RF simulator[RF simulator](../sdr/rfsimulator/README.md) is implemented as a specific device replacing RF hardware, it can be specifically built using `-w SIMU` option, but is also built during any softmodem build.
 - `--eNB` is to build the `lte-softmodem` executable and all required shared libraries
 - `--gNB` is to build the `nr-softmodem` executable and all required shared libraries
 - `--UE` is to build the `lte-uesoftmodem` executable and all required shared libraries
diff --git a/doc/FEATURE_SET.md b/doc/FEATURE_SET.md
index 5db6e44d8d2f788375f05649f033a0de2d95be02..d660a875a7a1ae532922a229e13ee4e0b7840552 100644
--- a/doc/FEATURE_SET.md
+++ b/doc/FEATURE_SET.md
@@ -298,6 +298,10 @@ The following features are valid for the gNB and the 5G-NR UE.
     - SRS signal reception
     - Channel estimation (with T tracer real time monitoring)
     - Power noise estimation
+*  NR-PRS
+    - Rel16 Positioning reference signal(PRS) generation and modulation
+    - Multiple PRS resources, one per beam is supported in FR2 TDD mode
+    - FR1 and FR2 support with config file
 *  NR-PRACH
    - Formats 0,1,2,3, A1-A3, B1-B3
 *  Highly efficient 3GPP compliant LDPC encoder and decoder (BG1 and BG2 are supported)
@@ -328,6 +332,7 @@ The following features are valid for the gNB and the 5G-NR UE.
   - HARQ procedures for uplink
 - Scheduler procedures for SRS reception
   - Periodic SRS reception
+  - TPMI computation based on SRS up 4 antenna ports and 1 layer
 - MAC procedures to handle CSI measurement report
   - evalution of RSRP report
   - evaluation of CQI report
@@ -458,6 +463,11 @@ The following features are valid for the gNB and the 5G-NR UE.
 * NR-SRS
     - Generation of sequence at PHY
     - SRS signal transmission
+* NR-PRS
+    - PRS based Channel estimation with T tracer dumps
+    - Time of arrival(ToA) estimation based on channel impulse response(CIR)
+    - Finer ToA estimation by 16x oversampled IDFT for CIR
+    - Support for multiple gNB reception with gNBs synced via GPSDO
 * NR-PRACH
    - Formats 0,1,2,3, A1-A3, B1-B3
 * SS-RSRP
diff --git a/doc/RUNMODEM.md b/doc/RUNMODEM.md
index e8537d0d03197785fbb18dc0a46065927f70f70b..7e899455bac7ad364a102a0628240adf0fdd936d 100644
--- a/doc/RUNMODEM.md
+++ b/doc/RUNMODEM.md
@@ -23,7 +23,7 @@ It is planned to enhance this simulator with the following functionalities:
 - Support for multiple UE connections,each UE being a `lte-uesoftmodem` or `nr_uesoftmodem` instance.
 - Support for multiple eNodeB's or gNodeB's for hand-over tests
 
-   This is an easy use-case to setup and test, as no specific hardware is required. The [rfsimulator page](../targets/ARCH/rfsimulator/README.md ) contains the detailed documentation.
+   This is an easy use-case to setup and test, as no specific hardware is required. The [rfsimulator page](../sdr/rfsimulator/README.md ) contains the detailed documentation.
 
 # L2 nFAPI Simulator
 
@@ -102,7 +102,7 @@ Some other useful paramters of the UE are
 
 Instead of randomly generated payload, in the phy-test mode we can also inject/receive user-plane traffic over a TUN interface. This is the so-called noS1 mode. 
 
-This setup is described in the [rfsimulator page](../targets/ARCH/rfsimulator/README.md#5g-case). In theory this should also work with the real hardware target although this has yet to be tested.
+This setup is described in the [rfsimulator page](../sdr/rfsimulator/README.md#5g-case). In theory this should also work with the real hardware target although this has yet to be tested.
 
 ## do-ra setup with OAI
 
@@ -181,6 +181,17 @@ where `-r` sets the transmission bandwidth configuration in terms of RBs, `-C` s
 
 Additionally, at UE side `--uecap_file` option can be used to pass the UE Capabilities input file (path location + filename), e.g. `--uecap_file ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/uecap.xml`
 
+### Run OAI with SDAP & Custom DRBs
+
+To run OAI gNB with SDAP, simply include `--gNBs.[0].enable_sdap 1` to the binary's arguments.
+
+The DRB creation is dependent on the 5QI. 
+If the 5QI corresponds to a GBR Flow it assigns a dedicated data radio bearer.
+The Non-GBR flows use a shared data radio bearer.
+
+To hardcode the DRBs for testing purposes, simply add `--gNBs.[0].drbs x` to the binary's arguements, where `x` is the number of DRBs, along with SDAP.
+The hardcoded DRBs will be treated like GBR Flows. Due to code limitations at this point the max. number of DRBs is 4. 
+
 ## IF setup with OAI
 
 OAI is also compatible with Intermediate Frequency (IF) equipment. This allows to use RF front-end that with arbitrary frequencies bands that do not comply with the standardised 3GPP NR bands. 
diff --git a/doc/RUN_NR_PRS.md b/doc/RUN_NR_PRS.md
new file mode 100644
index 0000000000000000000000000000000000000000..919796742e232d569029e389f5f8a4022c00fa6f
--- /dev/null
+++ b/doc/RUN_NR_PRS.md
@@ -0,0 +1,190 @@
+<table style="border-collapse: collapse; border: none;">
+  <tr style="border-collapse: collapse; border: none;">
+    <td style="border-collapse: collapse; border: none;">
+      <a href="http://www.openairinterface.org/">
+         <img src="./images/oai_final_logo.png" alt="" border=3 height=50 width=150>
+         </img>
+      </a>
+    </td>
+    <td style="border-collapse: collapse; border: none; vertical-align: center;">
+      <b><font size = "8">Running NR PRS with OAI gNB and nrUE</font></b>
+    </td>
+  </tr>
+</table>
+
+This page is valid on tags starting from **`2022.w37`**.
+
+
+After you have [built the softmodem executables](BUILD.md), go to the build directory `openairinterface5g/cmake_targets/ran_build/build/` and start testing the Rel16 PRS usecases.
+
+# PRS parameters and config files
+
+| **Mode** 	                    | **gNB config**                                                                          	  | **nrUE config**           	|
+|-----------------------------	|-------------------------------------------------------------------------------------------  |---------------------------	|
+| **FR1 40MHz<br>30kHz SCS**  	| gnb0.sa.band78.fr1.106PRB.prs.usrpx310.conf<br>gnb1.sa.band78.fr1.106PRB.prs.usrpx310.conf 	| ue.nr.prs.fr1.106prb.conf 	|
+| **FR2 100MHz<br>120kHz SCS**  | gnb0.sa.band261.fr2.64PRB.prs.usrpx310.conf<br>gnb1.sa.band261.fr2.64PRB.prs.usrpx310.conf 	| ue.nr.prs.fr2.64prb.conf  	|
+
+Under gNB and nrUE config files, parameters are configured under `prs_config` section. nrUE is capable to receive downlink PRS signal from multiple gNB simultaneously and therefore nrUE config file contains multiple `prs_config` sections, corresponding to each gNB. These parameters can be changed as per the test scenario.
+
+As of now, PRS **Comb size of 2 and 4** is supported and validated with R&S Spectrum analyzer. Also Muting is NOT supported yet. Sample configuration as below:
+
+```
+prs_config = (
+{
+  NumPRSResources       = 1;
+  PRSResourceSetPeriod  = [20, 2];
+  SymbolStart           = [7];
+  NumPRSSymbols         = [6];
+  NumRB                 = 106;
+  RBOffset              = 0;
+  CombSize              = 4;
+  REOffset              = [0];
+  PRSResourceOffset     = [0];
+  PRSResourceRepetition = 1;
+  PRSResourceTimeGap    = 1;
+  NPRS_ID               = [0];
+  MutingPattern1        = [];
+  MutingPattern2        = [];
+  MutingBitRepetition   = 1;
+}
+);
+```
+To TURN OFF PRS, set `NumPRSResources=0` in gNB `prs_config` section. nrUE config has `Active_gNBs` to specify number of active gNBs transmitting PRS signal simultaneously. Find the help string for PRS parameters in `openair2/COMMON/prs_nr_paramdef.h` <br><br>
+
+# gNB in `phy-test` mode
+Note that `numactl` is only needed if you run on a NUMA architecture with more than 1 CPU. In this case it should be installed on Linux using command `sudo apt-get install -y numactl`
+
+Also check the numa nodes USRP’s are connected to, using the following command:
+
+```cat /sys/class/net/eth_if/device/numa_node```
+
+Where `eth_if` has to be replaced with the name of the network interface the USRP is connected to. 
+In our case the output is 0 and hence we use `numactl --cpunodebind=0 --membind=0`
+
+## FR1 test
+Open a terminal on the host machine, and execute below command to launch gNB with **X310 USRPs**
+
+```sudo numactl --cpunodebind=0 --membind=0 ./nr-softmodem -E -O../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band78.fr1.106PRB.prs.usrpx310.conf --phy-test```
+
+If **N310 USRPs** are used, then run above command `without -E option` i.e without 3/4 sampling rate.<br><br>
+
+
+To run using **rfsimulator**, execute following command:  
+
+```sudo RFSIMULATOR=server ./nr-softmodem -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band78.fr1.106PRB.prs.usrpx310.conf --parallel-config PARALLEL_SINGLE_THREAD --noS1 --nokrnmod 1 --rfsim --phy-test --lowmem```
+
+## FR2 test
+In FR2 mode, we need RF beamforming module to transmit signal in mmWave frequency range. **X310 USRPs** can be used with BasicTx daughtercard to transmit baseband signal at **Intermediate Frequncy(IF)** and then RF beamforming module would perform beamforming and the upconversion to FR2 frequencies. IF can be specified using `if_freq` in the RU section of gNB config.
+
+If RF beamforming module is NOT present, gNB can still be launched with USRP alone; to transmit at supported `if_freq`.
+
+```sudo numactl --cpunodebind=0 --membind=0 ./nr-softmodem -E -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band261.fr2.64PRB.prs.usrpx310.conf --phy-test```<br><br>
+
+
+To run using **rfsimulator**, execute following command:  
+
+```sudo RFSIMULATOR=server ./nr-softmodem -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band261.fr2.64PRB.prs.usrpx310.conf --parallel-config PARALLEL_SINGLE_THREAD --noS1 --nokrnmod 1 --rfsim --phy-test --lowmem```
+
+## Multiple gNB scenario
+PRS is primarily used for positioning and localization of the UE with multiple gNBs transmitting simultaneously. OAI PRS implementation supports multiple gNB transmission provided all the gNBs are tightely synchronized using GPSDO clock. Therefore before running this scenario, make sure the USRPs has built-in GPSDO and the GPS antennas are connected with good satellite visibility. Also every time a gNB is launched, wait until `GPS LOCKED` is printed on the terminal during gNB startup. If USRP fails to lock with GPSDO, try again until its locked.
+
+To use GPSDO, make sure to change `clock_source` and `time_source` to `gpsdo` in RU section of gNB config.
+
+```sudo numactl --cpunodebind=0 --membind=0 ./nr-softmodem -E -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.band78.fr1.106PRB.prs.usrpx310.conf --phy-test```
+
+```sudo numactl --cpunodebind=1 --membind=1 ./nr-softmodem -E -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb1.band78.fr1.106PRB.prs.usrpx310.conf --phy-test```<br><br>
+
+
+# nrUE in `phy-test` mode
+While running gNB and nrUE on the same host machine, `reconfig.raw` and `rbconfig.raw` files would be generated with the launch of gNB and and then nrUE would automatically source it from build directory. However, if gNB and nrUE are running on two different host machines, then run gNB first with the corresponding config and exit after few seconds. This would generate `reconfig.raw` and `rbconfig.raw` files.
+
+After this, nrUE can be launched using one of the below commands depending on the test scenario. If UE is NOT able to connect to the gNB, then check the USRP connections or try increasing `--ue-rxgain` in steps of 10dB.  
+
+Also check the instructions on `numactl` in gNB test section as it applies for nrUE execution as well.
+
+## FR1 test
+Once gNB is up and running, open another terminal and execute below command to launch nrUE with **X310 USRPs**. Make sure to specify `IP_ADDR1` and `IP_ADDR2`(optional) correctly as per USRPs IP address
+
+```sudo numactl --cpunodebind=0 --membind=0 ./nr-uesoftmodem -E --phy-test --usrp-args "addr=IP_ADDR1,second_addr=IP_ADDR2,time_source=internal,clock_source=internal" -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr1.106prb.conf --ue-rxgain 80 --ue-fo-compensation --non-stop```  
+
+If **N310 USRPs** are used, then run above command `without -E option` i.e without 3/4 sampling rate.<br><br>
+
+
+To run using **rfsimulator** with local ETH IF `127.0.0.1`, execute following command:  
+
+```sudo RFSIMULATOR=127.0.0.1 ./nr-uesoftmodem --nokrnmod 1 --rfsim --phy-test --noS1 -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr1.106prb.conf```  
+
+## FR2 test
+Like gNB, RF beamforming module is receiving at mmWave frequencies and then **X310 USRPs** with BasicRx daughtercard to receive the signal at intermediate frequncy(IF) from RF beamforming module. IF can be specified using `--if_freq` option in nrUE command line.
+
+If RF beamforming module is NOT present, nrUE can still be launched with USRP alone; to receive at `if_freq` and validation can be done. Make sure to specify `if_freq` in the range supported by USRP nrUE is running with.
+
+```sudo numactl --cpunodebind=0 --membind=0 ./nr-uesoftmodem -E --phy-test -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr2.64prb.conf --usrp-args "addr=IP_ADDR1,second_addr=IP_ADDR2,time_source=internal,clock_source=internal" --ue-rxgain 80 --ue-fo-compensation --if_freq 50000000 --non-stop```<br><br>
+
+
+To run using **rfsimulator** with local ETH IF `127.0.0.1`, execute following command:  
+
+```sudo RFSIMULATOR=127.0.0.1 ./nr-uesoftmodem --nokrnmod 1 --rfsim --phy-test --noS1 -O ../../../targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr2.64prb.conf```
+
+## Multiple gNB scenario
+In nrUE prs config file, change `Active_gNBs` to the actual number of gNBs launched. Also verify the parameter in `prs_config` sections of nrUE config is matching with that of gNB config used. And launch nrUE using one of the above commands depending on FR1/FR2 test scenario.
+
+After successful connection, UE starts estimating channel based on the downlink PRS pilots using Least-Squares(LS) method. In the frequency domain, linear interpolation is used to reconstruct the channel over entire PRS bandwidth using LS estimates at pilot locations. UE also measures Time of Arrival(ToA) based on the time domain impulse response. On the console, ToA measurement is printed for each PRS resource.
+
+```
+[PHY]   [gNB 0][rsc 0][Rx 0][sfn 314][slot 2] DL PRS ToA ==> 1 / 1024 samples, peak channel power 15.6 dBm, SNR +4 dB
+[PHY]   [gNB 0][rsc 1][Rx 0][sfn 314][slot 12] DL PRS ToA ==> 1 / 1024 samples, peak channel power 15.6 dBm, SNR +4 dB
+[PHY]   [gNB 0][rsc 2][Rx 0][sfn 314][slot 22] DL PRS ToA ==> 1 / 1024 samples, peak channel power 15.6 dBm, SNR +4 dB
+[PHY]   [gNB 0][rsc 3][Rx 0][sfn 314][slot 32] DL PRS ToA ==> 1 / 1024 samples, peak channel power 15.6 dBm, SNR +4 dB
+```
+
+At UE side, T tracer is used to dump PRS channel estimates, both in time and frequency domain using `UE_PHY_DL_CHANNEL_ESTIMATE` and `UE_PHY_DL_CHANNEL_ESTIMATE_FREQ` respectively. These dumps can be enabled using options `--T_stdout 0` without console prints or `--T_stdout 2` with console prints; in above nrUE launch command.<br><br>
+
+# Recording T tracer dumps
+Once nrUE is launched with `--T_stdout 0 or 2` option, open another terminal. Navigate to T tracer directory ```common/utils/T/tracer/``` and build the T tracer binary using ```make```
+
+Once the build is successful, execute following command to start recording the PRS channel estimates dumps
+
+```./record -d ../T_messages.txt -on LEGACY_PHY_INFO -on UE_PHY_DL_CHANNEL_ESTIMATE -on UE_PHY_DL_CHANNEL_ESTIMATE_FREQ -o prs_dumps.raw```  
+
+Exit using `Ctrl+C` to stop recording, else it will keep running and take lot of disk space. Generally running it for 1-2 minutes should collect sufficient dumps.<br><br>
+
+
+To check the contents of recorded .raw file, replay it by executing:
+
+```./replay -i prs_dumps.raw```
+
+and textlog it on another terminal with following command:
+
+```./textlog -d ../T_messages.txt -ON -no-gui```<br><br>
+
+# Extracting PRS channel estimates
+Once T tracer dumps are recorded, PRS channel estimates can be extracted from .raw file using bash script `extract_prs_dumps.sh` in T tracer directory ```common/utils/T/tracer/```
+
+```./extract_prs_dumps.sh -g <num_gnb> -n <num_resources> -f <recorded .raw file> -c <count>```
+
+In the end, the script will zip all the extracted dumps to `prs_dumps.tgz`. Make sure to check help in running script using -h option: 
+```./extract_prs_dumps.sh -h```<br><br>
+
+# Using Matlab/Octave script to visualize PRS channel estimates
+We have developed `plot_prs_Ttracer_dumps.m` script to visualize the extracted PRS dumps offline in Matlab/Octave. Location of the script is `openair1/PHY/NR_UE_ESTIMATION/plot_prs_Ttracer_dumps.m`
+
+Make sure to enter the parameters script asks as input like below:
+
+```
+Enter the directory path to T tracer dumps: '<workspace>/openairinterface5g/common/utils/T/tracer'  
+Enter the OFDM FFT size used for file parsing: <frame_parms->ofdm_symbol_size>  
+Enter number of PRS respurces: <NumPRSResources>  
+Enter number of active gNBs: <Active_gNBs>
+```
+This script will read the IQ data from extracted PRS dumps(chF_gnbX_Y.raw and chT_gnbX_Y.raw) and plot them like below
+<table style="border-collapse: collapse; border: none;">
+  <tr style="border-collapse: collapse; border: none;">
+    <td style="border-collapse: collapse; border: none;">
+         <img src="./images/PRS_CFR_FR2_64PRB_8rsc.PNG" alt="" border=1 height=400 width=500>
+         <img src="./images/PRS_CIR_FR2_64PRB_8rsc.PNG" alt="" border=1 height=400 width=500>
+         </img>
+         <figcaption align = "center"><b>Fig.1 - FR2 100MHz test</b></figcaption>
+    </td>
+  </tr>
+</table>
\ No newline at end of file
diff --git a/doc/TESTBenches.md b/doc/TESTBenches.md
index dd14817863cef96cee9856473a8ebbe5ba6e7a48..7bad43df65cb88315a5a181c45b39acdb83838ee 100644
--- a/doc/TESTBenches.md
+++ b/doc/TESTBenches.md
@@ -18,10 +18,10 @@
 | idefix        | 172.21.16.135   | CI-NSA-MiniBench      | Quectel            | Quectel module                                        |
 | amariue       | 172.21.16.144   | CI-Amarisoft-UE-Usage | nrUE               | Amarisoft UE simulator                                |
 | bellatrix     | 192.168.117.115 | CI-RAN-VM-Deployment  | Executor           | --                                                    |
-| nano          | 192.168.12.62   | CI-Bench-1-Phones     | EPC, adb           | 2x COTS (adb)                                         |
-| hutch         | 192.168.12.19   | CI-Bench-1-Phones     | eNB (B7)           | B200mini (30C5239)                                    |
-| starsky       | 192.168.12.18   | CI-Bench-1-Phones     | eNB (B40)          | b200mini (30A3E3C)                                    |
-| carabe        | 192.168.12.211  | CI-Bench-2-OAI-Phone  | UE (B7UE)          | B200mini (30AE8C9)                                    |
+| nano          | 172.21.18.48    | CI-Bench-1-Phones     | EPC, adb           | 2x COTS (adb)                                         |
+| hutch         | 172.21.18.46    | CI-Bench-1-Phones     | eNB (B7)           | B200mini (30C5239)                                    |
+| starsky       | 172.21.18.45    | CI-Bench-1-Phones     | eNB (B40)          | b200mini (30A3E3C)                                    |
+| carabe        | 172.21.18.47    | CI-Bench-2-OAI-Phone  | UE (B7UE)          | B200mini (30AE8C9)                                    |
 
 Note: The available resources, and their current usage, is indicated here:
 - [Lockable resources of jenkins-oai](https://jenkins-oai.eurecom.fr/lockable-resources/):
@@ -97,7 +97,8 @@ Webhook
   - obelix + B200, nepes + B200, idefix + Quectel, porcepix w/ sabox
   - basic SA test (40 MHz TDD)
 - [RAN-PhySim-Cluster](https://jenkins-oai.eurecom.fr/job/RAN-PhySim-Cluster/)
-  - asterix (`Asterix-OC-oaicicd-session` resource), tests in OpenShift Cluster
+  - poseidon (jumphost, `Asterix-OC-oaicicd-session` resource), tests in
+    OpenShift Cluster
   - unitary simulators (`nr_dlsim`, etc.)
 - [RAN-RF-Sim-Test-4G](https://jenkins-oai.eurecom.fr/job/RAN-RF-Sim-Test-4G/)
   - obelix (eNB, 1x UE, OAI EPC)
@@ -106,8 +107,8 @@ Webhook
   - obelix (gNB, 2x UE, OAI 5GC)
   - uses RFsimulator, TDD 40MHz, FDD 40MHz, F1 split
 - [RAN-RHEL8-Cluster-Image-Builder](https://jenkins-oai.eurecom.fr/job/RAN-RHEL8-Cluster-Image-Builder/)
-  - asterix (`Asterix-OC-oaicicd-session` resource): RHEL 8 image build using
-    the OpenShift in Eurecom
+  - poseidon (jumphost, `Asterix-OC-oaicicd-session` resource): RHEL 8 image
+    build using the OpenShift Cluster
 - [RAN-Ubuntu18-Image-Builder](https://jenkins-oai.eurecom.fr/job/RAN-Ubuntu18-Image-Builder/)
   - obelix: Ubuntu 18 image build using docker
 
@@ -144,10 +145,6 @@ runs tests:
 - bellatrix: runs 4G/5G simulators directly (eNB + 1x UE + (opt.) OAI EPC, gNB + 1x UE in "noS1")
 
 triggers pipelines:
-- [eNB-CI-F1-FDD-Band7-B210](https://open5glab.eurecom.fr:8083/jenkins/job/eNB-CI-F1-FDD-Band7-B210/)
-  - hutch + B210, nano w/ ltebox + 2x UE
-  - tests 4G FDD with F1 split, 5MHz, 10MHz, 20MHz. 5MHz stable, rest known to
-    be unstable
 - [eNB-CI-FDD-Band7-B210](https://open5glab.eurecom.fr:8083/jenkins/job/eNB-CI-FDD-Band7-B210/)
   - hutch + B210, nano w/ ltebox + 2x UE
   - tests T tracer, information through FlexRAN, RRC inactivity timers,
diff --git a/doc/TESTING_5GSA_setup.md b/doc/TESTING_5GSA_setup.md
index 1e27e1616d4e57167a54c89bac8a83d1001d30ac..b12b3478e86c104c8d4f1624b648cbd26f253492 100644
--- a/doc/TESTING_5GSA_setup.md
+++ b/doc/TESTING_5GSA_setup.md
@@ -214,4 +214,21 @@ sudo RFSIMULATOR=127.0.0.1 ./nr-uesoftmodem -r 106 --numerology 1 --band 78 -C 3
     --rfsim --sa --nokrnmod -O <PATH_TO_UE_CONF_FILE>
 ```
 
+If you get the following error:
+
+```bash
+Assertion (k2 >= ((5))) failed!
+In get_k2() /home/mir/workspace/openairinterface5g/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c:147
+Slot offset K2 (2) cannot be less than DURATION_RX_TO_TX (5). K2 set according to min_rxtxtime in config file.
+```
+
+Add the following parameter (i.e., min_rxtxtime) in the gNB configuration file, just after nr_cellid.
+
+```bash
+nr_cellid = 12345678L;
+min_rxtxtime=6;
+```
+or --gNBs.[0].min_rxtxtime 6 to the gNB command line
+
+
 The IP address at the execution command of the OAI UE corresponds to the target IP of the gNB host that the RFSIMULATOR at the UE will connect to. In the above example, we assume that the gNB and UE are running on the same host so the specified address (127.0.0.1) is the one of the loopback interface.  
diff --git a/doc/images/PRS_CFR_FR2_64PRB_8rsc.PNG b/doc/images/PRS_CFR_FR2_64PRB_8rsc.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..4df6443696a270aae7739f50fc5849c1a428a9bd
Binary files /dev/null and b/doc/images/PRS_CFR_FR2_64PRB_8rsc.PNG differ
diff --git a/doc/images/PRS_CIR_FR2_64PRB_8rsc.PNG b/doc/images/PRS_CIR_FR2_64PRB_8rsc.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..fca786e0ab04547de32c466e1e675d6d403aae90
Binary files /dev/null and b/doc/images/PRS_CIR_FR2_64PRB_8rsc.PNG differ
diff --git a/doc/tutorial_resources/docker-compose-basic-nrf.yaml b/doc/tutorial_resources/docker-compose-basic-nrf.yaml
index 1c78bf37f9333257d482b924a61cff03eb764c73..b88020e6b02307678d27886d43467712690b84a6 100644
--- a/doc/tutorial_resources/docker-compose-basic-nrf.yaml
+++ b/doc/tutorial_resources/docker-compose-basic-nrf.yaml
@@ -51,13 +51,6 @@ services:
         networks:
             public_net:
                 ipv4_address: 192.168.70.136
-        volumes:
-            - ./healthscripts/udr-healthcheck.sh:/openair-udr/bin/udr-healthcheck.sh
-        healthcheck:
-            test: /bin/bash -c "/openair-udr/bin/udr-healthcheck.sh"
-            interval: 10s
-            timeout: 5s
-            retries: 5
     oai-udm:
         container_name: "oai-udm"
         image: oai-udm:develop
@@ -86,13 +79,6 @@ services:
         networks:
             public_net:
                 ipv4_address: 192.168.70.137
-        volumes:
-            - ./healthscripts/udm-healthcheck.sh:/openair-udm/bin/udm-healthcheck.sh
-        healthcheck:
-            test: /bin/bash -c "/openair-udm/bin/udm-healthcheck.sh"
-            interval: 10s
-            timeout: 5s
-            retries: 5
     oai-ausf:
         container_name: "oai-ausf"
         image: oai-ausf:develop
@@ -120,13 +106,6 @@ services:
         networks:
             public_net:
                 ipv4_address: 192.168.70.138
-        volumes:
-            - ./healthscripts/ausf-healthcheck.sh:/openair-ausf/bin/ausf-healthcheck.sh
-        healthcheck:
-            test: /bin/bash -c "/openair-ausf/bin/ausf-healthcheck.sh"
-            interval: 10s
-            timeout: 5s
-            retries: 5
     oai-nrf:
         container_name: "oai-nrf"
         image: oai-nrf:develop
@@ -141,13 +120,6 @@ services:
         networks:
             public_net:
                 ipv4_address: 192.168.70.130
-        volumes:
-            - ./healthscripts/nrf-healthcheck.sh:/openair-nrf/bin/nrf-healthcheck.sh
-        healthcheck:
-            test: /bin/bash -c "/openair-nrf/bin/nrf-healthcheck.sh"
-            interval: 10s
-            timeout: 5s
-            retries: 5
     oai-amf:
         container_name: "oai-amf"
         image: oai-amf:develop
@@ -213,13 +185,6 @@ services:
             - mysql
             - oai-nrf
             - oai-ausf
-        volumes:
-            - ./healthscripts/amf-healthcheck.sh:/openair-amf/bin/amf-healthcheck.sh
-        healthcheck:
-            test: /bin/bash -c "/openair-amf/bin/amf-healthcheck.sh"
-            interval: 10s
-            timeout: 15s
-            retries: 5
         networks:
             public_net:
                 ipv4_address: 192.168.70.132
@@ -270,13 +235,6 @@ services:
         depends_on:
             - oai-nrf
             - oai-amf
-        volumes:
-            - ./healthscripts/smf-healthcheck.sh:/openair-smf/bin/smf-healthcheck.sh
-        healthcheck:
-            test: /bin/bash -c "/openair-smf/bin/smf-healthcheck.sh"
-            interval: 10s
-            timeout: 5s
-            retries: 5
         networks:
             public_net:
                 ipv4_address: 192.168.70.133
@@ -325,13 +283,6 @@ services:
         cap_drop:
             - ALL
         privileged: true
-        volumes:
-            - ./healthscripts/spgwu-healthcheck.sh:/openair-spgwu-tiny/bin/spgwu-healthcheck.sh
-        healthcheck:
-            test: /bin/bash -c "/openair-spgwu-tiny/bin/spgwu-healthcheck.sh"
-            interval: 10s
-            timeout: 5s
-            retries: 5
         networks:
             public_net:
                 ipv4_address: 192.168.70.134
diff --git a/executables/main-fs6.c b/executables/main-fs6.c
index 01dfc526e976822ee58c4d72bafa016be929e2e0..acae580512fed96a9c8586618c0673cd631415b8 100644
--- a/executables/main-fs6.c
+++ b/executables/main-fs6.c
@@ -41,7 +41,7 @@
 #include <executables/split_headers.h>
 #include <openair1/PHY/CODING/coding_extern.h>
 #include <threadPool/thread-pool.h>
-#include <emmintrin.h>
+#include "PHY/sse_intrin.h"
 
 #define FS6_BUF_SIZE 1000*1000
 static UDPsock_t sockFS6;
diff --git a/executables/main-ocp.c b/executables/main-ocp.c
index d05b683f3931003f8d3903405133365d89812620..54ae31f4aa2e9ef2ea2212d638477e2d7f230816 100644
--- a/executables/main-ocp.c
+++ b/executables/main-ocp.c
@@ -75,7 +75,8 @@ int oai_exit = 0;
 double cpuf;
 THREAD_STRUCT thread_struct;
 
-uint16_t sf_ahead=4;
+extern uint16_t sf_ahead; // Bell Labs
+//uint16_t sf_ahead=4;
 //uint16_t slot_ahead=6;
 int otg_enabled;
 uint64_t  downlink_frequency[MAX_NUM_CCs][4];
@@ -725,12 +726,8 @@ void ocp_tx_rf(RU_t *ru, L1_rxtx_proc_t *proc) {
     }
 
 #if defined(__x86_64) || defined(__i386__)
-#ifdef __AVX2__
     sf_extension = (sf_extension)&0xfffffff8;
-#else
-    sf_extension = (sf_extension)&0xfffffffc;
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     sf_extension = (sf_extension)&0xfffffffc;
 #endif
 
@@ -1141,6 +1138,7 @@ int main ( int argc, char **argv ) {
   int i;
   int CC_id = 0;
   int node_type = ngran_eNB;
+  sf_ahead=4; // Bell Labs
   AssertFatal(load_configmodule(argc,argv,0), "[SOFTMODEM] Error, configuration module init failed\n");
   logInit();
   printf("Reading in command-line options\n");
diff --git a/executables/main_ru.c b/executables/main_ru.c
index cec4ecc9f3dc24b5f633b706b521cc73be64ef83..9db2e95576ee33c58cbad6600cc6d163e5c940ed 100644
--- a/executables/main_ru.c
+++ b/executables/main_ru.c
@@ -45,8 +45,8 @@
 #include "common/utils/load_module_shlib.h"
 
 
-#include "../../ARCH/COMMON/common_lib.h"
-#include "../../ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/if_defs.h"
 
 
 #include "PHY/phy_vars.h"
diff --git a/executables/nr-gnb.c b/executables/nr-gnb.c
index 176bd640bc843c6491868027e8b66f683d87d663..e5f24c98d841773aebd37f2945e3dbfa64e8d7fc 100644
--- a/executables/nr-gnb.c
+++ b/executables/nr-gnb.c
@@ -55,7 +55,7 @@
 #undef MALLOC //there are two conflicting definitions, so we better make sure we don't use it at all
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
-#include "../../ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
diff --git a/executables/nr-ru.c b/executables/nr-ru.c
index 63982c161dad8dfd04a93494304ab2887f6510ef..71e7a4f4b1f18b6ded46eac09f32978ff2cbacb8 100644
--- a/executables/nr-ru.c
+++ b/executables/nr-ru.c
@@ -37,8 +37,8 @@
 #include "common/ran_context.h"
 #include "rt_profiling.h"
 
-#include "../../ARCH/COMMON/common_lib.h"
-#include "../../ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 
 #include "PHY/LTE_TRANSPORT/if4_tools.h"
 
diff --git a/executables/nr-softmodem-common.h b/executables/nr-softmodem-common.h
index e0114b8d14d86c89bd191d0d0a189cfbed1d4c7e..36eedd2d3b4ea722bf8456ccfbf9308fbc44bef2 100644
--- a/executables/nr-softmodem-common.h
+++ b/executables/nr-softmodem-common.h
@@ -24,7 +24,7 @@
 #include <unistd.h>
 
 #include <sys/sysinfo.h>
-#include "targets/ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 #undef MALLOC
 #include "assertions.h"
 #include "PHY/types.h"
@@ -99,6 +99,8 @@
 #define CONFIG_HLP_DISABLNBIOT   "disable nb-iot, even if defined in config\n"
 #define CONFIG_HLP_LDPC_OFFLOAD  "enable LDPC offload\n"
 #define CONFIG_HLP_USRP_ARGS     "set the arguments to identify USRP (same syntax as in UHD)\n"
+#define CONFIG_HLP_TX_SUBDEV     "set the arguments to select tx_subdev (same syntax as in UHD)\n"
+#define CONFIG_HLP_RX_SUBDEV     "set the arguments to select rx_subdev (same syntax as in UHD)\n"
 
 #define CONFIG_HLP_FLOG          "Enable online log \n"
 #define CONFIG_HLP_LOGL          "Set the global log level, valid options: (4:trace, 3:debug, 2:info, 1:warn, (0:error))\n"
diff --git a/executables/nr-softmodem.c b/executables/nr-softmodem.c
index 5da5fe42f5269b52e9d2b9f558a72f2c0df23906..e3998975e85d744ebc0be2e8a224973e6e318f5f 100644
--- a/executables/nr-softmodem.c
+++ b/executables/nr-softmodem.c
@@ -39,8 +39,8 @@
 #undef MALLOC //there are two conflicting definitions, so we better make sure we don't use it at all
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
-#include "../../ARCH/COMMON/common_lib.h"
-#include "../../ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/if_defs.h"
 
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
@@ -313,12 +313,12 @@ int create_gNB_tasks(uint32_t gnb_nb) {
     }
   }
 
-  if (AMF_MODE_ENABLED) {
+  if (get_softmodem_params()->sa) {
 
-   char*             gnb_ipv4_address_for_NGU      = NULL;
-   uint32_t          gnb_port_for_NGU              = 0;
-   char*             gnb_ipv4_address_for_S1U      = NULL;
-   uint32_t          gnb_port_for_S1U              = 0;
+    char*             gnb_ipv4_address_for_NGU      = NULL;
+    uint32_t          gnb_port_for_NGU              = 0;
+    char*             gnb_ipv4_address_for_S1U      = NULL;
+    uint32_t          gnb_port_for_S1U              = 0;
     paramdef_t NETParams[]  =  GNBNETPARAMS_DESC;
     char aprefix[MAX_OPTNAME_SIZE*2 + 8];
     sprintf(aprefix,"%s.[%i].%s",GNB_CONFIG_STRING_GNB_LIST,0,GNB_CONFIG_STRING_NETWORK_INTERFACES_CONFIG);
@@ -326,23 +326,17 @@ int create_gNB_tasks(uint32_t gnb_nb) {
     
     for(int i = GNB_INTERFACE_NAME_FOR_NG_AMF_IDX; i <= GNB_IPV4_ADDRESS_FOR_NG_AMF_IDX; i++) {
       if( NETParams[i].strptr == NULL) {
-	LOG_E(NGAP, "No configuration in the file.\n");
-	NGAP_CONF_MODE = 0;
+	LOG_E(NGAP, "No AMF configuration in the file.\n");
       } else {
 	LOG_D(NGAP, "Configuration in the file: %s.\n",*NETParams[i].strptr);
       }
     }
-
+    
     if (gnb_nb > 0) {
-      if(NGAP_CONF_MODE) {
-        if (itti_create_task (TASK_NGAP, ngap_gNB_task, NULL) < 0) {
-          LOG_E(NGAP, "Create task for NGAP failed\n");
-          return -1;
-        }
-      } else {
-        LOG_I(NGAP, "Ngap task not created\n");
+      if (itti_create_task (TASK_NGAP, ngap_gNB_task, NULL) < 0) {
+        LOG_E(NGAP, "Create task for NGAP failed\n");
+        return -1;
       }
-
     }
   }
 
@@ -359,8 +353,8 @@ int create_gNB_tasks(uint32_t gnb_nb) {
       return -1;
     }
 
-    //Use check on x2ap to consider the NSA scenario and check on AMF_MODE_ENABLED for the SA scenario
-    if(is_x2ap_enabled() || AMF_MODE_ENABLED) {
+    //Use check on x2ap to consider the NSA scenario and check for SA scenario
+    if(is_x2ap_enabled() || get_softmodem_params()->sa) {
       if (itti_create_task (TASK_GTPV1_U, &gtpv1uTask, NULL) < 0) {
         LOG_E(GTPU, "Create task for GTPV1U failed\n");
         return -1;
@@ -630,8 +624,6 @@ int main( int argc, char **argv ) {
   }
 
   openair0_cfg[0].threequarter_fs = threequarter_fs;
-  AMF_MODE_ENABLED = get_softmodem_params()->sa;
-  NGAP_CONF_MODE   = get_softmodem_params()->sa;
 
   if (get_softmodem_params()->do_ra)
     AssertFatal(get_softmodem_params()->phy_test == 0,"RA and phy_test are mutually exclusive\n");
diff --git a/executables/nr-ue.c b/executables/nr-ue.c
index 08cb7de7e5f043f51b05834baca42a0a2d2cbcb9..1781ea3d91db49cebfad916f3ce1028179ff978b 100644
--- a/executables/nr-ue.c
+++ b/executables/nr-ue.c
@@ -34,6 +34,7 @@
 #include "LAYER2/nr_pdcp/nr_pdcp_entity.h"
 #include "SCHED_NR_UE/pucch_uci_ue_nr.h"
 #include "openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.h"
+#include "PHY/NR_REFSIG/refsig_defs_ue.h"
 
 /*
  *  NR SLOT PROCESSING SEQUENCE
@@ -112,12 +113,10 @@ static size_t dump_L1_UE_meas_stats(PHY_VARS_NR_UE *ue, char *output, size_t max
   const char *end = output + max_len;
   output += print_meas_log(&ue->phy_proc_tx, "L1 TX processing", NULL, NULL, output, end - output);
   output += print_meas_log(&ue->ulsch_encoding_stats, "ULSCH encoding", NULL, NULL, output, end - output);
-  output += print_meas_log(&ue->phy_proc_rx[0], "L1 RX processing t0", NULL, NULL, output, end - output);
-  output += print_meas_log(&ue->phy_proc_rx[1], "L1 RX processing t1", NULL, NULL, output, end - output);
+  output += print_meas_log(&ue->phy_proc_rx, "L1 RX processing", NULL, NULL, output, end - output);
   output += print_meas_log(&ue->ue_ul_indication_stats, "UL Indication", NULL, NULL, output, end - output);
   output += print_meas_log(&ue->rx_pdsch_stats, "PDSCH receiver", NULL, NULL, output, end - output);
-  output += print_meas_log(&ue->dlsch_decoding_stats[0], "PDSCH decoding t0", NULL, NULL, output, end - output);
-  output += print_meas_log(&ue->dlsch_decoding_stats[1], "PDSCH decoding t1", NULL, NULL, output, end - output);
+  output += print_meas_log(&ue->dlsch_decoding_stats, "PDSCH decoding", NULL, NULL, output, end - output);
   output += print_meas_log(&ue->dlsch_deinterleaving_stats, " -> Deinterleive", NULL, NULL, output, end - output);
   output += print_meas_log(&ue->dlsch_rate_unmatching_stats, " -> Rate Unmatch", NULL, NULL, output, end - output);
   output += print_meas_log(&ue->dlsch_ldpc_decoding_stats, " ->  LDPC Decode", NULL, NULL, output, end - output);
@@ -326,7 +325,7 @@ static void check_nr_prach(NR_UE_MAC_INST_t *mac, nr_uplink_indication_t *ul_inf
     {
       L1_nsa_prach_procedures(ul_info->frame_tx, ul_info->slot_tx, prach_pdu);
       ul_config->number_pdus = 0;
-      ul_info->ue_sched_mode = SCHED_ALL;
+      ul_info->ue_sched_mode = SCHED_PUSCH;
     }
     else if (nr_prach == 2)
     {
@@ -426,7 +425,7 @@ static void *NRUE_phy_stub_standalone_pnf_task(void *arg)
     ul_info.slot_rx = slot;
     ul_info.slot_tx = (slot + slot_ahead) % slots_per_frame;
     ul_info.frame_tx = (ul_info.slot_rx + slot_ahead >= slots_per_frame) ? ul_info.frame_rx + 1 : ul_info.frame_rx;
-    ul_info.ue_sched_mode = SCHED_ALL;
+    ul_info.ue_sched_mode = SCHED_PUSCH;
 
     if (pthread_mutex_lock(&mac->mutex_dl_info)) abort();
 
@@ -436,7 +435,6 @@ static void *NRUE_phy_stub_standalone_pnf_task(void *arg)
     mac->dl_info.module_id = mod_id;
     mac->dl_info.frame = frame;
     mac->dl_info.slot = slot;
-    mac->dl_info.thread_id = 0;
     mac->dl_info.dci_ind = NULL;
     mac->dl_info.rx_ind = NULL;
 
@@ -457,8 +455,8 @@ static void *NRUE_phy_stub_standalone_pnf_task(void *arg)
     {
       LOG_D(NR_MAC, "Slot %d. calling nr_ue_ul_ind() and nr_ue_pucch_scheduler() from %s\n", ul_info.slot_tx, __FUNCTION__);
       nr_ue_scheduler(NULL, &ul_info);
-      nr_ue_prach_scheduler(mod_id, ul_info.frame_tx, ul_info.slot_tx, ul_info.thread_id);
-      nr_ue_pucch_scheduler(mod_id, ul_info.frame_tx, ul_info.slot_tx, ul_info.thread_id);
+      nr_ue_prach_scheduler(mod_id, ul_info.frame_tx, ul_info.slot_tx);
+      nr_ue_pucch_scheduler(mod_id, ul_info.frame_tx, ul_info.slot_tx, NULL);
       check_nr_prach(mac, &ul_info, &prach_resources);
     }
     if (!IS_SOFTMODEM_NOS1 && get_softmodem_params()->sa) {
@@ -595,13 +593,13 @@ static void UE_synch(void *arg) {
 
           UE->rfdevice.trx_set_freq_func(&UE->rfdevice,&openair0_cfg[0],0);
         }
+      }
+      break;
 
-        break;
+    case si:
+    default:
+      break;
 
-      case si:
-      default:
-        break;
-      }
   }
 }
 
@@ -610,12 +608,9 @@ void processSlotTX(void *arg) {
   nr_rxtx_thread_data_t *rxtxD = (nr_rxtx_thread_data_t *) arg;
   UE_nr_rxtx_proc_t *proc = &rxtxD->proc;
   PHY_VARS_NR_UE    *UE   = rxtxD->UE;
-  fapi_nr_config_request_t *cfg = &UE->nrUE_config;
-  int tx_slot_type = nr_ue_slot_select(cfg, proc->frame_tx, proc->nr_slot_tx);
-  uint8_t gNB_id = 0;
 
-  LOG_D(PHY,"processSlotTX %d.%d => slot type %d\n",proc->frame_tx,proc->nr_slot_tx,tx_slot_type);
-  if (tx_slot_type == NR_UPLINK_SLOT || tx_slot_type == NR_MIXED_SLOT){
+  LOG_D(PHY,"%d.%d => slot type %d\n", proc->frame_tx, proc->nr_slot_tx, proc->tx_slot_type);
+  if (proc->tx_slot_type == NR_UPLINK_SLOT || proc->tx_slot_type == NR_MIXED_SLOT){
 
     // trigger L2 to run ue_scheduler thru IF module
     // [TODO] mapping right after NR initial sync
@@ -625,33 +620,26 @@ void processSlotTX(void *arg) {
       memset((void*)&ul_indication, 0, sizeof(ul_indication));
 
       ul_indication.module_id = UE->Mod_id;
-      ul_indication.gNB_index = gNB_id;
+      ul_indication.gNB_index = proc->gNB_id;
       ul_indication.cc_id     = UE->CC_id;
       ul_indication.frame_rx  = proc->frame_rx;
       ul_indication.slot_rx   = proc->nr_slot_rx;
       ul_indication.frame_tx  = proc->frame_tx;
       ul_indication.slot_tx   = proc->nr_slot_tx;
-      ul_indication.thread_id = proc->thread_id;
       ul_indication.ue_sched_mode = rxtxD->ue_sched_mode;
 
       UE->if_inst->ul_indication(&ul_indication);
       stop_meas(&UE->ue_ul_indication_stats);
     }
 
-    if (rxtxD->ue_sched_mode != NOT_PUSCH) {
-      phy_procedures_nrUE_TX(UE,proc,0);
-    }
+    phy_procedures_nrUE_TX(UE,proc,proc->gNB_id);
   }
 }
 
-void processSlotRX(void *arg) {
+void UE_processing(nr_rxtx_thread_data_t *rxtxD) {
 
-  nr_rxtx_thread_data_t *rxtxD = (nr_rxtx_thread_data_t *) arg;
   UE_nr_rxtx_proc_t *proc = &rxtxD->proc;
   PHY_VARS_NR_UE    *UE   = rxtxD->UE;
-  fapi_nr_config_request_t *cfg = &UE->nrUE_config;
-  int rx_slot_type = nr_ue_slot_select(cfg, proc->frame_rx, proc->nr_slot_rx);
-  int tx_slot_type = nr_ue_slot_select(cfg, proc->frame_tx, proc->nr_slot_tx);
   uint8_t gNB_id = 0;
   NR_UE_PDCCH_CONFIG phy_pdcch_config={0};
 
@@ -665,7 +653,7 @@ void processSlotRX(void *arg) {
     }
   }
 
-  if (rx_slot_type == NR_DOWNLINK_SLOT || rx_slot_type == NR_MIXED_SLOT){
+  if (proc->rx_slot_type == NR_DOWNLINK_SLOT || proc->rx_slot_type == NR_MIXED_SLOT){
 
     if(UE->if_inst != NULL && UE->if_inst->dl_indication != NULL) {
       nr_downlink_indication_t dl_indication;
@@ -695,35 +683,39 @@ void processSlotRX(void *arg) {
     if (res == NULL)
       return; // Tpool has been stopped
     delNotifiedFIFO_elt(res);
-
-    // calling UL_indication to schedule things other than PUSCH (eg, PUCCH)
-    rxtxD->ue_sched_mode = NOT_PUSCH;
-    processSlotTX(rxtxD);
-
-  } else {
-    rxtxD->ue_sched_mode = SCHED_ALL;
-    processSlotTX(rxtxD);
   }
 
-  if (tx_slot_type == NR_UPLINK_SLOT || tx_slot_type == NR_MIXED_SLOT){
+  if (proc->tx_slot_type == NR_UPLINK_SLOT || proc->tx_slot_type == NR_MIXED_SLOT) {
+    nr_phy_data_t phy_data = {0};
+    if (UE->if_inst != NULL && UE->if_inst->ul_indication != NULL) {
+      nr_uplink_indication_t ul_indication;
+      memset((void*)&ul_indication, 0, sizeof(ul_indication));
+      ul_indication.module_id     = UE->Mod_id;
+      ul_indication.gNB_index     = gNB_id;
+      ul_indication.cc_id         = UE->CC_id;
+      ul_indication.frame_rx      = proc->frame_rx;
+      ul_indication.slot_rx       = proc->nr_slot_rx;
+      ul_indication.frame_tx      = proc->frame_tx;
+      ul_indication.slot_tx       = proc->nr_slot_tx;
+      ul_indication.ue_sched_mode = SCHED_PUCCH;
+      ul_indication.phy_data      = &phy_data;
+      UE->if_inst->ul_indication(&ul_indication);
+    }
     if (UE->UE_mode[gNB_id] <= PUSCH) {
-      if (get_softmodem_params()->usim_test==0) {
-        pucch_procedures_ue_nr(UE,
-                               gNB_id,
-                               proc);
-      }
-
-      LOG_D(PHY, "Sending Uplink data \n");
-      nr_ue_pusch_common_procedures(UE,
-                                    proc->nr_slot_tx,
-                                    &UE->frame_parms,
-                                    UE->frame_parms.nb_antennas_tx);
+      pucch_procedures_ue_nr(UE,
+                             gNB_id,
+                             proc,
+                             &phy_data);
     }
 
-    if (UE->UE_mode[gNB_id] > NOT_SYNCHED && UE->UE_mode[gNB_id] < PUSCH) {
-      nr_ue_prach_procedures(UE, proc, gNB_id);
-    }
-    LOG_D(PHY,"****** end TX-Chain for AbsSubframe %d.%d ******\n", proc->frame_tx, proc->nr_slot_tx);
+    LOG_D(PHY, "Sending Uplink data \n");
+    nr_ue_pusch_common_procedures(UE,
+                                  proc->nr_slot_tx,
+                                  &UE->frame_parms,
+                                  UE->frame_parms.nb_antennas_tx);
+
+    if (UE->UE_mode[gNB_id] > NOT_SYNCHED && UE->UE_mode[gNB_id] < PUSCH)
+      nr_ue_prach_procedures(UE, proc, proc->gNB_id);
   }
 
   ue_ta_procedures(UE, proc->nr_slot_tx, proc->frame_tx);
@@ -832,6 +824,7 @@ void *UE_thread(void *arg) {
   openair0_timestamp timestamp, writeTimestamp;
   void *rxp[NB_ANTENNAS_RX], *txp[NB_ANTENNAS_TX];
   int start_rx_stream = 0;
+  fapi_nr_config_request_t *cfg = &UE->nrUE_config;
   AssertFatal(0== openair0_device_load(&(UE->rfdevice), &openair0_cfg[0]), "");
   UE->rfdevice.host_type = RAU_HOST;
   UE->lost_sync = 0;
@@ -844,8 +837,6 @@ void *UE_thread(void *arg) {
   notifiedFIFO_t freeBlocks;
   initNotifiedFIFO_nothreadSafe(&freeBlocks);
 
-  int nbSlotProcessing=0;
-  int thread_idx=0;
   NR_UE_MAC_INST_t *mac = get_mac_inst(0);
   int timing_advance = UE->timing_advance;
 
@@ -853,21 +844,8 @@ void *UE_thread(void *arg) {
   const int nb_slot_frame = UE->frame_parms.slots_per_frame;
   int absolute_slot=0, decoded_frame_rx=INT_MAX, trashed_frames=0;
 
-  for (int i=0; i<NR_RX_NB_TH+1; i++) {// NR_RX_NB_TH working + 1 we are making to be pushed
-    notifiedFIFO_elt_t *newElt = newNotifiedFIFO_elt(sizeof(nr_rxtx_thread_data_t), RX_JOB_ID,&nf,processSlotRX);
-    nr_rxtx_thread_data_t *curMsg=(nr_rxtx_thread_data_t *)NotifiedFifoData(newElt);
-    initNotifiedFIFO(&curMsg->txFifo);
-    pushNotifiedFIFO_nothreadSafe(&freeBlocks, newElt);
-  }
-
   while (!oai_exit) {
     if (UE->lost_sync) {
-      int nb = abortTpoolJob(&(get_nrUE_params()->Tpool),RX_JOB_ID);
-      nb += abortNotifiedFIFOJob(&nf, RX_JOB_ID);
-      LOG_I(PHY,"Number of aborted slots %d\n",nb);
-      for (int i=0; i<nb; i++)
-        pushNotifiedFIFO_nothreadSafe(&freeBlocks, newNotifiedFIFO_elt(sizeof(nr_rxtx_thread_data_t), RX_JOB_ID,&nf,processSlotRX));
-      nbSlotProcessing = 0;
       UE->is_synchronized = 0;
       UE->lost_sync = 0;
     }
@@ -929,25 +907,19 @@ void *UE_thread(void *arg) {
 
     absolute_slot++;
 
-    // whatever means thread_idx
-    // Fix me: will be wrong when slot 1 is slow, as slot 2 finishes
-    // Slot 3 will overlap if NR_RX_NB_TH is 2
-    // this is general failure in UE !!!
-    thread_idx = absolute_slot % NR_RX_NB_TH;
     int slot_nr = absolute_slot % nb_slot_frame;
-    notifiedFIFO_elt_t *msgToPush;
-    AssertFatal((msgToPush=pullNotifiedFIFO_nothreadSafe(&freeBlocks)) != NULL,"chained list failure");
-    nr_rxtx_thread_data_t *curMsg=(nr_rxtx_thread_data_t *)NotifiedFifoData(msgToPush);
-    curMsg->UE=UE;
+    nr_rxtx_thread_data_t curMsg = {0};
+    curMsg.UE=UE;
     // update thread index for received subframe
-    curMsg->proc.thread_id   = thread_idx;
-    curMsg->proc.CC_id       = UE->CC_id;
-    curMsg->proc.nr_slot_rx  = slot_nr;
-    curMsg->proc.nr_slot_tx  = (absolute_slot + DURATION_RX_TO_TX) % nb_slot_frame;
-    curMsg->proc.frame_rx    = (absolute_slot/nb_slot_frame) % MAX_FRAME_NUMBER;
-    curMsg->proc.frame_tx    = ((absolute_slot+DURATION_RX_TO_TX)/nb_slot_frame) % MAX_FRAME_NUMBER;
-    curMsg->proc.decoded_frame_rx=-1;
-    //LOG_I(PHY,"Process slot %d thread Idx %d total gain %d\n", slot_nr, thread_idx, UE->rx_total_gain_dB);
+    curMsg.proc.CC_id       = UE->CC_id;
+    curMsg.proc.nr_slot_rx  = slot_nr;
+    curMsg.proc.nr_slot_tx  = (absolute_slot + DURATION_RX_TO_TX) % nb_slot_frame;
+    curMsg.proc.frame_rx    = (absolute_slot/nb_slot_frame) % MAX_FRAME_NUMBER;
+    curMsg.proc.frame_tx    = ((absolute_slot+DURATION_RX_TO_TX)/nb_slot_frame) % MAX_FRAME_NUMBER;
+    curMsg.proc.rx_slot_type = nr_ue_slot_select(cfg, curMsg.proc.frame_rx, curMsg.proc.nr_slot_rx);
+    curMsg.proc.tx_slot_type = nr_ue_slot_select(cfg, curMsg.proc.frame_tx, curMsg.proc.nr_slot_tx);
+    curMsg.proc.decoded_frame_rx=-1;
+    //LOG_I(PHY,"Process slot %d total gain %d\n", slot_nr, UE->rx_total_gain_dB);
 
 #ifdef OAI_ADRV9371_ZC706
     /*uint32_t total_gain_dB_prev = 0;
@@ -1002,30 +974,20 @@ void *UE_thread(void *arg) {
         LOG_E(PHY,"can't compensate: diff =%d\n", first_symbols);
     }
 
-    curMsg->proc.timestamp_tx = timestamp+
+    curMsg.proc.timestamp_tx = timestamp+
       UE->frame_parms.get_samples_slot_timestamp(slot_nr,&UE->frame_parms,DURATION_RX_TO_TX) 
       - firstSymSamp;
 
-    notifiedFIFO_elt_t *res;
-
-    while (nbSlotProcessing >= NR_RX_NB_TH) {
-      res=pullTpool(&nf, &(get_nrUE_params()->Tpool));
-      if (res == NULL)
-        break; // Tpool has been stopped
-      nbSlotProcessing--;
-      nr_rxtx_thread_data_t *tmp=(nr_rxtx_thread_data_t *)res->msgData;
-
-      if (tmp->proc.decoded_frame_rx != -1)
-        decoded_frame_rx=(((mac->mib->systemFrameNumber.buf[0] >> mac->mib->systemFrameNumber.bits_unused)<<4) | tmp->proc.decoded_frame_rx);
-      else
-         decoded_frame_rx=-1;
+    UE_processing(&curMsg);
 
-      pushNotifiedFIFO_nothreadSafe(&freeBlocks,res);
-    }
+    if (curMsg.proc.decoded_frame_rx != -1)
+      decoded_frame_rx=(((mac->mib->systemFrameNumber.buf[0] >> mac->mib->systemFrameNumber.bits_unused)<<4) | curMsg.proc.decoded_frame_rx);
+    else
+       decoded_frame_rx=-1;
 
-    if (decoded_frame_rx>0 && decoded_frame_rx != curMsg->proc.frame_rx)
+    if (decoded_frame_rx>0 && decoded_frame_rx != curMsg.proc.frame_rx)
       LOG_E(PHY,"Decoded frame index (%d) is not compatible with current context (%d), UE should go back to synch mode\n",
-            decoded_frame_rx, curMsg->proc.frame_rx);
+            decoded_frame_rx, curMsg.proc.frame_rx);
 
     // use previous timing_advance value to compute writeTimestamp
     writeTimestamp = timestamp+
@@ -1080,10 +1042,6 @@ void *UE_thread(void *arg) {
     for (int i=0; i<UE->frame_parms.nb_antennas_tx; i++)
       memset(txp[i], 0, writeBlockSize);
 
-    nbSlotProcessing++;
-    LOG_D(PHY,"Number of slots being processed at the moment: %d\n",nbSlotProcessing);
-    pushTpool(&(get_nrUE_params()->Tpool), msgToPush);
-
   } // while !oai_exit
 
   return NULL;
diff --git a/executables/nr-uesoftmodem.c b/executables/nr-uesoftmodem.c
index d4b0156843b863c27b4522fb0ae583c57a9bc1b9..a3a3fac12bd7b2613cc657f3b6deabc9803f9911 100644
--- a/executables/nr-uesoftmodem.c
+++ b/executables/nr-uesoftmodem.c
@@ -35,8 +35,8 @@
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 #include "common/utils/nr/nr_common.h"
 
-#include "../../ARCH/COMMON/common_lib.h"
-#include "../../ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/if_defs.h"
 
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 #include "openair1/PHY/MODULATION/nr_modulation.h"
@@ -117,6 +117,8 @@ int                 vcdflag = 0;
 
 double          rx_gain_off = 0.0;
 char             *usrp_args = NULL;
+char             *tx_subdev = NULL;
+char             *rx_subdev = NULL;
 char       *rrc_config_path = NULL;
 char            *uecap_file = NULL;
 int               dumpframe = 0;
@@ -359,6 +361,8 @@ void init_openair0(void) {
     openair0_cfg[card].configFilename = get_softmodem_params()->rf_config_file;
 
     if (usrp_args) openair0_cfg[card].sdr_addrs = usrp_args;
+    if (tx_subdev) openair0_cfg[card].tx_subdev = tx_subdev;
+    if (rx_subdev) openair0_cfg[card].rx_subdev = rx_subdev;
 
   }
 }
diff --git a/executables/nr-uesoftmodem.h b/executables/nr-uesoftmodem.h
index e4bbae562c30aadfa6c10df5a1424402876b997f..f8ce6c1f27163b65c4f8548c95a7e544a486d4f2 100644
--- a/executables/nr-uesoftmodem.h
+++ b/executables/nr-uesoftmodem.h
@@ -30,6 +30,8 @@
 /*------------------------------------------------------------------------------------------------------------------------------------------*/
 #define CMDLINE_NRUEPARAMS_DESC {  \
     {"usrp-args",                CONFIG_HLP_USRP_ARGS,   0,               strptr:&usrp_args,         defstrval:"type=b200", TYPE_STRING,   0},    \
+    {"tx_subdev",                CONFIG_HLP_TX_SUBDEV,   0,               strptr:&tx_subdev,         defstrval:NULL, TYPE_STRING,   0},    \
+    {"rx_subdev",                CONFIG_HLP_RX_SUBDEV,   0,               strptr:&rx_subdev,         defstrval:NULL, TYPE_STRING,   0},    \
     {"single-thread-disable",    CONFIG_HLP_NOSNGLT,     PARAMFLAG_BOOL,  iptr:&single_thread_flag,           defintval:1,           TYPE_INT,    0}, \
     {"dlsch-parallel",           CONFIG_HLP_DLSCH_PARA,  0,               u8ptr:NULL,       defintval:0,           TYPE_UINT8,  0}, \
     {"offset-divisor",           CONFIG_HLP_OFFSET_DIV,  0,               uptr:&nrUE_params.ofdm_offset_divisor,    defuintval:8,           TYPE_UINT32,  0}, \
@@ -97,5 +99,4 @@ extern void start_oai_nrue_threads(void);
 void *UE_thread(void *arg);
 void init_nr_ue_vars(PHY_VARS_NR_UE *ue, uint8_t UE_id, uint8_t abstraction_flag);
 void init_nrUE_standalone_thread(int ue_idx);
-
 #endif
diff --git a/executables/softmodem-common.c b/executables/softmodem-common.c
index d42c46a8953169f4756ec9de16967b73c1d9fa55..5f06f85fa9adc628dd60773c6893e502c26884c9 100644
--- a/executables/softmodem-common.c
+++ b/executables/softmodem-common.c
@@ -210,10 +210,11 @@ void set_softmodem_sighandler(void) {
   act.sa_handler=signal_handler;
   sigaction(SOFTMODEM_RTSIGNAL,&act,&oldact);
   // Disabled in order generate a core dump for analysis with gdb
+  // Enable for clean exit on CTRL-C (i.e. record player, USRP...) 
+  signal(SIGINT,  signal_handler);
   # if 0
   printf("Send signal %d to display resource usage...\n",SIGRTMIN+1);
   signal(SIGSEGV, signal_handler);
-  signal(SIGINT,  signal_handler);
   signal(SIGTERM, signal_handler);
   signal(SIGABRT, signal_handler);
   #endif
diff --git a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_interface.h b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_interface.h
index e1cfc2633607f4a8c95b81c4a469aa878bcd0b41..f14d5220bd852d7a706ae3b20e954cfe9552e160 100644
--- a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_interface.h
+++ b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_interface.h
@@ -4060,4 +4060,42 @@ int nfapi_p7_update_checksum(uint8_t* buffer, uint32_t len);
  */
 int nfapi_p7_update_transmit_timestamp(uint8_t* buffer, uint32_t timestamp);
 
+/*! \brief Encodes a nfapi_nr_srs_normalized_channel_iq_matrix_t to a buffer
+ *
+ *  \param pMessageBuf A pointer to a nfapi_nr_srs_normalized_channel_iq_matrix_t structure
+ *  \param pPackedBuf A pointer to the buffer that the nfapi_nr_srs_normalized_channel_iq_matrix_t will be packed into
+ *  \param packedBufLen The size of the buffer
+ *  \return number of bytes written to the buffer
+ */
+int pack_nr_srs_normalized_channel_iq_matrix(void *pMessageBuf, void *pPackedBuf, uint32_t packedBufLen);
+
+/*! \brief Decodes a nfapi_nr_srs_normalized_channel_iq_matrix_t from a buffer
+ *
+ *  \param pMessageBuf A pointer to an encoded nfapi_nr_srs_normalized_channel_iq_matrix_t
+ *  \param messageBufLen The size of the encoded nfapi_nr_srs_normalized_channel_iq_matrix_t
+ *  \param pUnpackedBuf A pointer to the nfapi_nr_srs_normalized_channel_iq_matrix_t
+ *  \param unpackedBufLen The size of nfapi_nr_srs_normalized_channel_iq_matrix_t structure.
+ *  \return 0 means success, -1 means failure.
+ */
+int unpack_nr_srs_normalized_channel_iq_matrix(void *pMessageBuf, uint32_t messageBufLen, void *pUnpackedBuf, uint32_t unpackedBufLen);
+
+/*! \brief Encodes a nfapi_nr_srs_beamforming_report_t to a buffer
+ *
+ *  \param pMessageBuf A pointer to a nfapi_nr_srs_beamforming_report_t structure
+ *  \param pPackedBuf A pointer to the buffer that the nfapi_nr_srs_beamforming_report_t will be packed into
+ *  \param packedBufLen The size of the buffer
+ *  \return number of bytes written to the buffer
+ */
+int pack_nr_srs_beamforming_report(void *pMessageBuf, void *pPackedBuf, uint32_t packedBufLen);
+
+/*! \brief Decodes a nfapi_nr_srs_beamforming_report_t from a buffer
+ *
+ *  \param pMessageBuf A pointer to an encoded nfapi_nr_srs_beamforming_report_t
+ *  \param messageBufLen The size of the encoded nfapi_nr_srs_beamforming_report_t
+ *  \param pUnpackedBuf A pointer to the nfapi_nr_srs_beamforming_report_t
+ *  \param unpackedBufLen The size of nfapi_nr_srs_beamforming_report_t structure.
+ *  \return 0 means success, -1 means failure.
+ */
+int unpack_nr_srs_beamforming_report(void *pMessageBuf, uint32_t messageBufLen, void *pUnpackedBuf, uint32_t unpackedBufLen);
+
 #endif /* _NFAPI_INTERFACE_H_ */
diff --git a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
index 205e107bf9f65a0a372cb2931d5f5876505c71ca..cf17d6cdd48f63ce5fcdc13fe5580d7aaad789b1 100644
--- a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
+++ b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
@@ -1107,17 +1107,15 @@ typedef struct {
 typedef struct
 {
   nfapi_nr_dig_bf_interface_t* dig_bf_interface_list;
-
 } nfapi_nr_ul_beamforming_number_of_prgs_t;
 
 typedef struct
 {
-  uint16_t num_prgs;
-  uint16_t prg_size;
-  //watchout: dig_bf_interface here, in table 3-43 it's dig_bf_interfaces
-  uint8_t  dig_bf_interface;
-  nfapi_nr_ul_beamforming_number_of_prgs_t* prgs_list;//
-
+  uint8_t trp_scheme;         // This field shall be set to 0, to identify that this table is used.
+  uint16_t num_prgs;          // Number of PRGs spanning this allocation. Value : 1->275
+  uint16_t prg_size;          // Size in RBs of a precoding resource block group (PRG) – to which the same digital beamforming gets applied. Value: 1->275
+  uint8_t dig_bf_interface;   // Number of logical antenna ports (parallel streams) resulting from the Rx combining. Value: 0->255
+  nfapi_nr_ul_beamforming_number_of_prgs_t *prgs_list;
 } nfapi_nr_ul_beamforming_t;
 
 typedef struct
@@ -1292,6 +1290,28 @@ typedef struct
 
 } nfapi_nr_pucch_pdu_t;
 
+typedef struct {
+  uint16_t srs_bandwidth_start;                   // PRB index for the start of SRS signal transmission. The PRB index is relative to the CRB0 or reference Point A. 3GPP TS 38.211, section 6.4.1.4.3. Value: 0->268
+  uint8_t sequence_group;                         // Sequence group (u) as defined in 3GPP TS 38.211, section 6.4.1.4.2. Value: 0->29
+  uint8_t sequence_number;                        // Sequence number (v) as defined in 3GPP TS 38.211, section 6.4.1.4.2 TS 38.211. Value: 0->1
+} nfapi_v4_srs_parameters_symbols_t;
+
+typedef struct {
+  uint16_t srs_bandwidth_size;                    // mSRS,b: Number of PRB’s that are sounded for each SRS symbol, per 3GPP TS 38.211, section 6.4.1.4.3. Value: 4->272
+  nfapi_v4_srs_parameters_symbols_t *symbol_list;
+  uint32_t usage;                                 // Bitmap indicating the type of report(s) expected at L2 from the SRS signaled by this PDU. Bit positions: 0 – beamManagement; 1 – codebook; 2 – nonCodebook; 3 – antennaSwitching; 4 – 255: reserved. For each of this bit positions: 1 = requested; 0 = not requested. nUsage = sum(all bits in usage)
+  uint8_t report_type[4];                         // Interpretation of each Report Type depends on usage: beamManagement (1 = PRG SNR, 2-255 reserved); codebook (1 = PRG I and Q channel estimate, per srs Tx port and gNB antenna element, 2-255 reserved); nonCodebook (1 = PRG I and Q channel estimate, per SRI and gNB antenna element, 2-255 reserved); antennaSwitching (1 = SVD representation UE Rx and gNB sets of antenna element, 2-255 reserved); all (0 – no report required).
+  uint8_t singular_Value_representation;          // 0 – 8-bit dB; 1 – 16-bit linear; 255 – not applicable
+  uint8_t iq_representation;                      // 0 – 16 bit; 1 – 32-bit; 255 - not applicable
+  uint16_t prg_size;                              // 1-272; 0 – reserved
+  uint8_t num_total_ue_antennas;                  // 1 … 16 in this release. This is the total number of UE antennas for the usage.
+  uint32_t ue_antennas_in_this_srs_resource_set;  // Bitmap of UE antenna indices for the SRS Resource set, to which the SRS Resource for this PDU corresponds.
+  uint32_t sampled_ue_antennas;                   // Bitmap of UE antenna indices sampled by the SRS waveform corresponding to this PDU’s SRS Resource. Codebook: corresponds to antenna ports in SRS Resources; non-overlapping indices; Non-codebook: corresponds to SRIs; Antennas-switch: indices of UE Rx antennas in the total number of antennas.
+  uint8_t report_scope;                           // Which antennas Report (in ReportType) should account for: Value: 0: ports in sampledUeAntennas (i.e. SRS Resource); 1: ports in ueAntennasInSrsResourceSet (i.e. SRS Resource set. For antSwith reports of SVD type, value 0 is only allowed if sampledUeAntennas = ueAntennasInSrsResourceSet.
+  uint8_t num_ul_spatial_streams_ports;           // In this release, L2 may set this number to 0 to leave spatial stream index assignment to L1 (e.g. L1 uses spatial streams reserved for SRS), regardless of the information in capability maxNumberUlSpatialStreams.
+  uint8_t Ul_spatial_stream_ports[256];           // Number of ports used for signaling this SRS allocation. Value: 0 --> (max # spatial streams - 1, per TLV)
+} nfapi_v4_srs_parameters_t;
+
 typedef struct {
   uint16_t rnti;                      // UE RNTI, Value: 1->65535
   uint32_t handle;                    // An opaque handling returned in the SRS.indication
@@ -1317,6 +1337,7 @@ typedef struct {
   uint16_t t_srs;                     // SRS-Periodicity in slots [3GPP TS 38.211, Sec 6.4.1.4.4], Value: 1,2,3,4,5,8,10,16,20,32,40,64,80,160,320,640,1280,2560
   uint16_t t_offset;                  // Slot offset value [3GPP TS 38.211, Sec 6.4.1.4.3], Value:0->2559
   nfapi_nr_ul_beamforming_t beamforming;
+  nfapi_v4_srs_parameters_t srs_parameters_v4;
 } nfapi_nr_srs_pdu_t;
 
 typedef enum {
@@ -1660,33 +1681,63 @@ typedef struct
 } nfapi_nr_uci_indication_t;
 
 
-/// 5G PHY FAPI Specification: SRS indication - Section 3.4.10, Table 3-73
+/// 5G PHY FAPI Specification: SRS indication - Section 3.4.10
+
+// Normalized channel I/Q matrix
 
 typedef struct {
-  uint8_t rb_snr;                 // SNR value in dB. Value: 0 -> 255 representing -64 dB to 63 dB with a step size 0.5 dB, 0xff will be set if this field is invalid.
-} nfapi_nr_srs_indication_reported_symbol_resource_block_t;
+  uint8_t normalized_iq_representation; // 0: 16-bit normalized complex number (iqSize = 2); 1: 32-bit normalized complex number (iqSize = 4)
+  uint16_t num_gnb_antenna_elements;    // Ng: Number of gNB antenna elements. Value: 0511
+  uint16_t num_ue_srs_ports;            // Nu: Number of sampled UE SRS ports. Value: 07
+  uint16_t prg_size;                    // Size in RBs of a precoding resource block group (PRG) – to which the same digital beamforming gets applied. Value: 1->272
+  uint16_t num_prgs;                    // Number of PRGs Np to be reported for this SRS PDU. Value: 0-> 272
+  uint8_t channel_matrix[272*512*8*4];  // Array of (numPRGs*Nu*Ng) entries of the type denoted by iqRepresentation H{PRG pI} [ueAntenna uI, gNB antenna gI] = array[uI*Ng*Np + gI*Np + pI]; uI: 0…Nu-1 (UE antenna index); gI: 0…Ng-1 (gNB antenna index); pI: 0…Np-1 (PRG index)
+} nfapi_nr_srs_normalized_channel_iq_matrix_t;
+
+// Beamforming report
+
+typedef struct {
+  uint8_t rb_snr;                       // SNR value in dB. Value: 0 -> 255 representing -64 dB to 63 dB with a step size 0.5 dB, 0xff will be set if this field is invalid.
+} nfapi_nr_srs_reported_symbol_prgs_t;
+
+typedef struct {
+  uint16_t num_prgs;                    // Number of PRBs to be reported for this SRS PDU. Value: 0 -> 272.
+  nfapi_nr_srs_reported_symbol_prgs_t *prg_list;
+} nfapi_nr_srs_reported_symbol_t;
+
+typedef struct {
+  uint16_t prg_size;                    // Size in RBs of a precoding resource block group (PRG) – to which the same digital beamforming gets applied. Value: 1->275
+  uint8_t num_symbols;                  // Number of symbols for SRS. Value: 1 -> 4. If a PHY does not report for individual symbols then this parameter should be set to 1.
+  uint8_t wide_band_snr;                // SNR value in dB measured within configured SRS bandwidth on each symbol. Value: 0 -> 255 representing -64 dB to 63 dB with a step size 0.5 dB. 0xff will be set if this field is invalid.
+  uint8_t num_reported_symbols;         // Number of symbols reported in this message. This allows PHY to report individual symbols or aggregated symbols where this field will be set to 1. Value: 1 -> 4.
+  nfapi_nr_srs_reported_symbol_t *prgs;
+} nfapi_nr_srs_beamforming_report_t;
+
+// SRS indication
 
 typedef struct {
-  uint16_t num_rbs;               // Number of PRBs to be reported for this SRS PDU. Value: 0 -> 272.
-  nfapi_nr_srs_indication_reported_symbol_resource_block_t* rb_list;
-} nfapi_nr_srs_indication_reported_symbol_t;
+  uint16_t tag;                         // 0: Report is carried directly in the value field; 3: The offset from the end of the control portion of the message to the beginning of the report. Other values are reserved.
+  uint32_t length;                      // Length of the actual report in bytes, without the padding bytes.
+  uint32_t value[16384];                // tag=0: Only the most significant bytes of the size indicated by ‘length’ field are valid. Remaining bytes are zero padded to the nearest 32-bit bit boundary; Tag=2 Offset from the end of the control portion of the message to the payload is in the value field. Occupies 32-bits.
+} nfapi_srs_report_tlv_t;
 
 typedef struct {
-  uint32_t handle;                // The handle passed to the PHY in the the UL_TTI.request SRS PDU.
-  uint16_t rnti;                  // The RNTI passed to the PHY in the UL_TTI.request SRS PDU. Value: 1 -> 65535.
-  uint16_t timing_advance;        // Timing advance TA measured for the UE [TS 38.213, Section 4.2]. NTA_new = NTA_old + (TA − 31) * 16 * 64 / (2^u). Value: 0 -> 63. 0xffff should be set if this field is invalid.
-  uint8_t num_symbols;            // Number of symbols for SRS. Value: 1 -> 4. If a PHY does not report for individual symbols then this parameter should be set to 1.
-  uint8_t wide_band_snr;          // SNR value in dB measured within configured SRS bandwidth on each symbol. Value: 0 -> 255 representing -64 dB to 63 dB with a step size 0.5 dB. 0xff will be set if this field is invalid.
-  uint8_t num_reported_symbols;   // Number of symbols reported in this message. This allows PHY to report individual symbols or aggregated symbols where this field will be set to 1. Value: 1 -> 4.
-  nfapi_nr_srs_indication_reported_symbol_t* reported_symbol_list;
+  uint32_t handle;                      // The handle passed to the PHY in the the UL_TTI.request SRS PDU.
+  uint16_t rnti;                        // The RNTI passed to the PHY in the UL_TTI.request SRS PDU. Value: 1 -> 65535.
+  uint16_t timing_advance_offset;       // Timing advance TA measured for the UE in multiples of 16 * 64 * Tc / (2^u) [TS 38.213, Section 4.2]. Value: 0 -> 63. 0xffff will be set if this field is invalid.
+  int16_t timing_advance_offset_nsec;   // Timing advance measured for the UE between the reference uplink time and the observed arrival time for the UE. Value: -16800 … +16800 nanoseconds. 0xffff should be set if this field is invalid.
+  uint8_t srs_usage;                    // 0 – beamManagement; 1 – codebook; 2 – nonCodebook; 3 – antennaSwitching; 4 – 255: reserved; Note: This field matches the SRS usage field of the SRS PDU to which this report is linked.
+  uint8_t report_type;                  // The type of report included in or pointed to by Report TLV depends on the SRS usage: Beam management (1: Beamforming report); Codebook (1: Normalized Channel I/Q Matrix); nonCodebook (1: Normalized Channel I/Q Matrix); antennaSwitch (1: Channel SVD Representation); all (0: null report)
+  nfapi_srs_report_tlv_t *report_tlv;
 } nfapi_nr_srs_indication_pdu_t;
 
 typedef struct {
   nfapi_p7_message_header_t header;
-  uint16_t sfn;                   // SFN. Value: 0 -> 1023
-  uint16_t slot;                  // Slot. Value: 0 -> 159
-  uint8_t number_of_pdus;         // Number of PDUs included in this message. Value: 0 -> 255
-  nfapi_nr_srs_indication_pdu_t* pdu_list;
+  uint16_t sfn;                         // SFN. Value: 0 -> 1023
+  uint16_t slot;                        // Slot. Value: 0 -> 159
+  uint16_t control_length;              // Size of control portion of SRS indication. 0 if reports are included inline; >0 if reports are concatenated to the end of the message.
+  uint8_t number_of_pdus;               // Number of PDUs included in this message. Value: 0 -> 255
+  nfapi_nr_srs_indication_pdu_t *pdu_list;
 } nfapi_nr_srs_indication_t;
 
 
diff --git a/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c b/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
index 98f89f599c83955496c1d8503e32d739f3d4aba3..e183736578c3ec86fb739ca6bd67cc5e0e5c71db 100644
--- a/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
+++ b/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
@@ -3099,42 +3099,131 @@ return 1;
 
 //SRS INDICATION
 
-static uint8_t pack_nr_srs_indication_body(nfapi_nr_srs_indication_pdu_t *value, uint8_t **ppWritePackedMsg, uint8_t *end)
-{
-	if(!(push32(value->handle, ppWritePackedMsg, end) &&
-	 	 push16(value->rnti, ppWritePackedMsg, end) &&
-		 push16(value->timing_advance, ppWritePackedMsg, end) &&
-		 push8(value->num_symbols, ppWritePackedMsg, end) &&
-		 push8(value->wide_band_snr, ppWritePackedMsg, end) &&
-		 push8(value->num_reported_symbols, ppWritePackedMsg, end) &&
-		 push8(value->reported_symbol_list->num_rbs, ppWritePackedMsg, end)
-		 ))
-		  return 0;
-	for(int i = 0; i < value->reported_symbol_list->num_rbs; i++)
-	{
-		if (!push8(value->reported_symbol_list->rb_list[i].rb_snr, ppWritePackedMsg, end))
-			return 0;
-	}
-	return 1;
+int pack_nr_srs_normalized_channel_iq_matrix(void *pMessageBuf, void *pPackedBuf, uint32_t packedBufLen) {
+
+  nfapi_nr_srs_normalized_channel_iq_matrix_t *nr_srs_normalized_channel_iq_matrix = (nfapi_nr_srs_normalized_channel_iq_matrix_t*)pMessageBuf;
+
+  uint8_t *pWritePackedMessage = pPackedBuf;
+  uint8_t *end = pPackedBuf + packedBufLen;
+
+  if(!(push8(nr_srs_normalized_channel_iq_matrix->normalized_iq_representation, &pWritePackedMessage, end) &&
+       push16(nr_srs_normalized_channel_iq_matrix->num_gnb_antenna_elements, &pWritePackedMessage, end) &&
+       push16(nr_srs_normalized_channel_iq_matrix->num_ue_srs_ports, &pWritePackedMessage, end) &&
+       push16(nr_srs_normalized_channel_iq_matrix->prg_size, &pWritePackedMessage, end) &&
+       push16(nr_srs_normalized_channel_iq_matrix->num_prgs, &pWritePackedMessage, end))) {
+    return 0;
+  }
+
+  uint16_t channel_matrix_size = nr_srs_normalized_channel_iq_matrix->num_prgs*nr_srs_normalized_channel_iq_matrix->num_ue_srs_ports*nr_srs_normalized_channel_iq_matrix->num_gnb_antenna_elements;
+  if (nr_srs_normalized_channel_iq_matrix->normalized_iq_representation == 0) {
+    channel_matrix_size <<= 1;
+  } else {
+    channel_matrix_size <<= 2;
+  }
+
+  for(int i = 0; i < channel_matrix_size; i++) {
+    if (!push8(nr_srs_normalized_channel_iq_matrix->channel_matrix[i], &pWritePackedMessage, end)) {
+      return 0;
+    }
+  }
+
+  // Message length
+  uintptr_t msgHead = (uintptr_t)pPackedBuf;
+  uintptr_t msgEnd = (uintptr_t)pWritePackedMessage;
+  return (msgEnd-msgHead);
 }
 
-static uint8_t pack_nr_srs_indication(void *msg, uint8_t **ppWritePackedMsg, uint8_t *end, nfapi_p7_codec_config_t* config)
-{
-	nfapi_nr_srs_indication_t *pNfapiMsg = (nfapi_nr_srs_indication_t*)msg;
+static uint8_t pack_nr_srs_reported_symbol(nfapi_nr_srs_reported_symbol_t *prgs, uint8_t **ppWritePackedMsg, uint8_t *end) {
 
-	if (!(push16(pNfapiMsg->sfn , ppWritePackedMsg, end) &&
-		push16(pNfapiMsg->slot , ppWritePackedMsg, end) &&
-		push16(pNfapiMsg->number_of_pdus, ppWritePackedMsg, end)
-		))
-			return 0;
+  if(!push16(prgs->num_prgs, ppWritePackedMsg, end)) {
+    return 0;
+  }
 
-	for(int i=0; i<pNfapiMsg->number_of_pdus;i++)
-	{
-		if(!pack_nr_srs_indication_body(&(pNfapiMsg->pdu_list[i]),ppWritePackedMsg,end))
-		return 0;
-	}
+  for(int i = 0; i < prgs->num_prgs; i++) {
+    if (!push8(prgs->prg_list[i].rb_snr, ppWritePackedMsg, end)) {
+      return 0;
+    }
+  }
 
-return 1;
+  return 1;
+}
+
+int pack_nr_srs_beamforming_report(void *pMessageBuf, void *pPackedBuf, uint32_t packedBufLen) {
+
+  nfapi_nr_srs_beamforming_report_t *nr_srs_beamforming_report = (nfapi_nr_srs_beamforming_report_t*)pMessageBuf;
+
+  uint8_t *pWritePackedMessage = pPackedBuf;
+  uint8_t *end = pPackedBuf + packedBufLen;
+
+  if(!(push16(nr_srs_beamforming_report->prg_size, &pWritePackedMessage, end) &&
+       push8(nr_srs_beamforming_report->num_symbols, &pWritePackedMessage, end) &&
+       push8(nr_srs_beamforming_report->wide_band_snr, &pWritePackedMessage, end) &&
+       push8(nr_srs_beamforming_report->num_reported_symbols, &pWritePackedMessage, end))) {
+    return 0;
+  }
+
+  if(!pack_nr_srs_reported_symbol(nr_srs_beamforming_report->prgs, &pWritePackedMessage, end)) {
+    return 0;
+  }
+
+  // Message length
+  uintptr_t msgHead = (uintptr_t)pPackedBuf;
+  uintptr_t msgEnd = (uintptr_t)pWritePackedMessage;
+  return (msgEnd-msgHead);
+}
+
+static uint8_t pack_nr_srs_report_tlv(nfapi_srs_report_tlv_t *report_tlv, uint8_t **ppWritePackedMsg, uint8_t *end) {
+
+  if(!(push16(report_tlv->tag, ppWritePackedMsg, end) &&
+       push32(report_tlv->length, ppWritePackedMsg, end))) {
+    return 0;
+  }
+
+  for(int i = 0; i < report_tlv->length; i++) {
+    if (!push32(report_tlv->value[i], ppWritePackedMsg, end)) {
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
+static uint8_t pack_nr_srs_indication_body(nfapi_nr_srs_indication_pdu_t *value, uint8_t **ppWritePackedMsg, uint8_t *end) {
+
+  if(!(push32(value->handle, ppWritePackedMsg, end) &&
+       push16(value->rnti, ppWritePackedMsg, end) &&
+       push16(value->timing_advance_offset, ppWritePackedMsg, end) &&
+       pushs16(value->timing_advance_offset_nsec, ppWritePackedMsg, end) &&
+       push8(value->srs_usage, ppWritePackedMsg, end) &&
+       push8(value->report_type, ppWritePackedMsg, end))) {
+    return 0;
+  }
+
+  if(!pack_nr_srs_report_tlv(value->report_tlv,ppWritePackedMsg, end)) {
+    return 0;
+  }
+
+  return 1;
+}
+
+static uint8_t pack_nr_srs_indication(void *msg, uint8_t **ppWritePackedMsg, uint8_t *end, nfapi_p7_codec_config_t* config) {
+
+  nfapi_nr_srs_indication_t *pNfapiMsg = (nfapi_nr_srs_indication_t*)msg;
+
+  if (!(push16(pNfapiMsg->sfn , ppWritePackedMsg, end) &&
+        push16(pNfapiMsg->slot , ppWritePackedMsg, end) &&
+        push16(pNfapiMsg->control_length , ppWritePackedMsg, end) &&
+        push8(pNfapiMsg->number_of_pdus, ppWritePackedMsg, end))) {
+    return 0;
+  }
+
+  for(int i=0; i<pNfapiMsg->number_of_pdus;i++) {
+    if(!pack_nr_srs_indication_body(&(pNfapiMsg->pdu_list[i]),ppWritePackedMsg, end)) {
+      return 0;
+    }
+  }
+
+  return 1;
 }
 
 //RACH INDICATION
@@ -5793,40 +5882,133 @@ return 1;
 
 //SRS INDICATION
 
-static uint8_t unpack_nr_srs_indication_body(nfapi_nr_srs_indication_pdu_t* value, uint8_t **ppReadPackedMsg, uint8_t *end)
-{
-	if(!(pull32(ppReadPackedMsg, &value->handle, end) &&
-	 	 pull16(ppReadPackedMsg, &value->rnti, end) &&
-		 pull16(ppReadPackedMsg, &value->timing_advance, end) &&
-		 pull8(ppReadPackedMsg, &value->num_symbols, end) &&
-		 pull8(ppReadPackedMsg, &value->wide_band_snr, end) &&
-		 pull8(ppReadPackedMsg, &value->num_reported_symbols, end) &&
-		 pull16(ppReadPackedMsg, &value->reported_symbol_list->num_rbs, end)
-		 ))
-		  return 0;
-	for(int i = 0; i < value->reported_symbol_list->num_rbs; i++)
-	{
-		if (!pull8(ppReadPackedMsg, &value->reported_symbol_list->rb_list[i].rb_snr, end))
-			return 0;
-	}
-	return 1;
+int unpack_nr_srs_normalized_channel_iq_matrix(void *pMessageBuf, uint32_t messageBufLen, void *pUnpackedBuf, uint32_t unpackedBufLen) {
+
+  nfapi_nr_srs_normalized_channel_iq_matrix_t *nr_srs_normalized_channel_iq_matrix = (nfapi_nr_srs_normalized_channel_iq_matrix_t*)pUnpackedBuf;
+  uint8_t *pReadPackedMessage = pMessageBuf;
+  uint8_t *end = pMessageBuf + messageBufLen;
+
+  memset(pUnpackedBuf, 0, unpackedBufLen);
+
+  if(!(pull8(&pReadPackedMessage, &nr_srs_normalized_channel_iq_matrix->normalized_iq_representation, end) &&
+       pull16(&pReadPackedMessage, &nr_srs_normalized_channel_iq_matrix->num_gnb_antenna_elements, end) &&
+       pull16(&pReadPackedMessage, &nr_srs_normalized_channel_iq_matrix->num_ue_srs_ports, end) &&
+       pull16(&pReadPackedMessage, &nr_srs_normalized_channel_iq_matrix->prg_size, end) &&
+       pull16(&pReadPackedMessage, &nr_srs_normalized_channel_iq_matrix->num_prgs, end))) {
+    return -1;
+  }
+
+  uint16_t channel_matrix_size = nr_srs_normalized_channel_iq_matrix->num_prgs*nr_srs_normalized_channel_iq_matrix->num_ue_srs_ports*nr_srs_normalized_channel_iq_matrix->num_gnb_antenna_elements;
+  if (nr_srs_normalized_channel_iq_matrix->normalized_iq_representation == 0) {
+    channel_matrix_size <<= 1;
+  } else {
+    channel_matrix_size <<= 2;
+  }
+
+  for(int i = 0; i < channel_matrix_size; i++) {
+    if (!pull8(&pReadPackedMessage, &nr_srs_normalized_channel_iq_matrix->channel_matrix[i], end)) {
+      return 0;
+    }
+  }
+
+  return 0;
 }
 
-static uint8_t unpack_nr_srs_indication(uint8_t **ppReadPackedMsg, uint8_t *end, nfapi_nr_srs_indication_t *pNfapiMsg, nfapi_p7_codec_config_t* config)
-{
-	if (!(pull16(ppReadPackedMsg,&pNfapiMsg->sfn , end) &&
-		pull16(ppReadPackedMsg,&pNfapiMsg->slot , end) &&
-		pull8(ppReadPackedMsg,&pNfapiMsg->number_of_pdus, end)
-		))
-			return 0;
+static uint8_t unpack_nr_srs_reported_symbol(nfapi_nr_srs_reported_symbol_t *prgs, uint8_t **ppReadPackedMsg, uint8_t *end) {
 
-	for(int i=0; i<pNfapiMsg->number_of_pdus;i++)
-	{
-		if (!unpack_nr_srs_indication_body(&pNfapiMsg->pdu_list[i], ppReadPackedMsg, end))
-		return 0;
-	}
+  if(!pull16(ppReadPackedMsg, &prgs->num_prgs, end)) {
+    return 0;
+  }
 
-return 1;
+  if(!prgs->prg_list) {
+    prgs->prg_list = (nfapi_nr_srs_reported_symbol_prgs_t*) calloc(1, prgs->num_prgs*sizeof(nfapi_nr_srs_reported_symbol_prgs_t));
+  }
+
+  for(int i = 0; i < prgs->num_prgs; i++) {
+    if (!pull8(ppReadPackedMsg, &prgs->prg_list[i].rb_snr, end)) {
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
+int unpack_nr_srs_beamforming_report(void *pMessageBuf, uint32_t messageBufLen, void *pUnpackedBuf, uint32_t unpackedBufLen) {
+
+  nfapi_nr_srs_beamforming_report_t *nr_srs_beamforming_report = (nfapi_nr_srs_beamforming_report_t*)pUnpackedBuf;
+  uint8_t *pReadPackedMessage = pMessageBuf;
+  uint8_t *end = pMessageBuf + messageBufLen;
+
+  memset(pUnpackedBuf, 0, unpackedBufLen);
+
+  if(!(pull16(&pReadPackedMessage, &nr_srs_beamforming_report->prg_size, end) &&
+       pull8(&pReadPackedMessage, &nr_srs_beamforming_report->num_symbols, end) &&
+       pull8(&pReadPackedMessage, &nr_srs_beamforming_report->wide_band_snr, end) &&
+       pull8(&pReadPackedMessage, &nr_srs_beamforming_report->num_reported_symbols, end))) {
+    return -1;
+  }
+
+  if(!nr_srs_beamforming_report->prgs) {
+    nr_srs_beamforming_report->prgs = (nfapi_nr_srs_reported_symbol_t*) calloc(1, sizeof(nfapi_nr_srs_reported_symbol_t));
+  }
+
+  if(!unpack_nr_srs_reported_symbol(nr_srs_beamforming_report->prgs, &pReadPackedMessage, end)) {
+    return -1;
+  }
+
+  return 0;
+}
+
+static uint8_t unpack_nr_srs_report_tlv(nfapi_srs_report_tlv_t *report_tlv, uint8_t **ppReadPackedMsg, uint8_t *end) {
+
+  if(!(pull16(ppReadPackedMsg, &report_tlv->tag, end) &&
+       pull32(ppReadPackedMsg, &report_tlv->length, end))) {
+    return 0;
+  }
+
+  for(int i = 0; i < report_tlv->length; i++) {
+    if (!pull32(ppReadPackedMsg, &report_tlv->value[i], end)) {
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
+static uint8_t unpack_nr_srs_indication_body(nfapi_nr_srs_indication_pdu_t *value, uint8_t **ppReadPackedMsg, uint8_t *end) {
+
+  if(!(pull32(ppReadPackedMsg, &value->handle, end) &&
+       pull16(ppReadPackedMsg, &value->rnti, end) &&
+       pull16(ppReadPackedMsg, &value->timing_advance_offset, end) &&
+       pulls16(ppReadPackedMsg, &value->timing_advance_offset_nsec, end) &&
+       pull8(ppReadPackedMsg, &value->srs_usage, end) &&
+       pull8(ppReadPackedMsg, &value->report_type, end))) {
+    return 0;
+  }
+
+  if(!unpack_nr_srs_report_tlv(value->report_tlv, ppReadPackedMsg, end)) {
+    return 0;
+  }
+
+  return 1;
+}
+
+static uint8_t unpack_nr_srs_indication(uint8_t **ppReadPackedMsg, uint8_t *end, nfapi_nr_srs_indication_t *pNfapiMsg, nfapi_p7_codec_config_t* config) {
+
+  if (!(pull16(ppReadPackedMsg,&pNfapiMsg->sfn, end) &&
+        pull16(ppReadPackedMsg,&pNfapiMsg->slot, end) &&
+        pull16(ppReadPackedMsg,&pNfapiMsg->control_length, end) &&
+        pull8(ppReadPackedMsg,&pNfapiMsg->number_of_pdus, end))) {
+    return 0;
+  }
+
+  for(int i=0; i<pNfapiMsg->number_of_pdus; i++) {
+    if (!unpack_nr_srs_indication_body(&pNfapiMsg->pdu_list[i], ppReadPackedMsg, end)) {
+      return 0;
+    }
+  }
+
+  return 1;
 }
 
 //NR RACH
diff --git a/openair1/PHY/CODING/3gpplte_sse.c b/openair1/PHY/CODING/3gpplte_sse.c
index 393b0f4a5d2ae5af14bfaf6c620bea95ac2e72ca..731f3710b459daec501e7326ff1ababe1531123e 100644
--- a/openair1/PHY/CODING/3gpplte_sse.c
+++ b/openair1/PHY/CODING/3gpplte_sse.c
@@ -58,7 +58,7 @@ struct treillis {
   int exit_state;
 }  __attribute__ ((aligned(64)));
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 struct treillis {
   union {
@@ -127,29 +127,9 @@ char interleave_compact_byte(short *base_interleaver,unsigned char *input, unsig
   char expandInput[768*8] __attribute__((aligned(32)));
   int i,loop=n>>4;
 #if defined(__x86_64__) || defined(__i386__)
-#ifndef __AVX2__
-  __m128i *i_128=(__m128i *)input, *o_128=(__m128i *)expandInput;
-  __m128i tmp1, tmp2, tmp3, tmp4;
-  __m128i BIT_MASK = _mm_set_epi8(  0b00000001,
-                                    0b00000010,
-                                    0b00000100,
-                                    0b00001000,
-                                    0b00010000,
-                                    0b00100000,
-                                    0b01000000,
-                                    0b10000000,
-                                    0b00000001,
-                                    0b00000010,
-                                    0b00000100,
-                                    0b00001000,
-                                    0b00010000,
-                                    0b00100000,
-                                    0b01000000,
-                                    0b10000000);
-#else
   __m256i *i_256=(__m256i *)input, *o_256=(__m256i *)expandInput;
   __m256i tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  __m256i BIT_MASK = _mm256_set_epi8(  0b00000001,
+  __m256i BIT_MASK = simde_mm256_set_epi8(  0b00000001,
                                        0b00000010,
                                        0b00000100,
                                        0b00001000,
@@ -181,8 +161,7 @@ char interleave_compact_byte(short *base_interleaver,unsigned char *input, unsig
                                        0b00100000,
                                        0b01000000,
                                        0b10000000);
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   uint8x16_t *i_128=(uint8x16_t *)input, *o_128=(uint8x16_t *)expandInput;
   uint8x16_t tmp1,tmp2;
   uint16x8_t tmp3;
@@ -206,116 +185,82 @@ char interleave_compact_byte(short *base_interleaver,unsigned char *input, unsig
                                 0b00000001
                         };
 #endif
-#ifndef __AVX2__
-
-  if ((n&15) > 0)
-    loop++;
-
-#else
   loop=n>>5;
 
   if ((n&31) > 0)
     loop++;
 
-#endif
-
   for (i=0; i<loop ; i++ ) {
     // int cur_byte=i<<3;
     // for (b=0;b<8;b++)
     //   expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b);
 #if defined(__x86_64__) || defined(__i386__)
-#ifndef __AVX2__
-    tmp1=_mm_load_si128(i_128++);       // tmp1 = B0,B1,...,B15
-    tmp2=_mm_unpacklo_epi8(tmp1,tmp1);  // tmp2 = B0,B0,B1,B1,...,B7,B7
-    tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-    tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-    tmp2=_mm_unpackhi_epi8(tmp1,tmp1);  // tmp2 = B8,B8,B9,B9,...,B15,B15
-    tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B8,B8,B8,B9,B9,B9,B9,B10,B10,B10,B10,B11,B11,B11,B11
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B8,B8,B8,B8,B8,B8,B8,B8,B9,B9,B9,B9,B9,B9,B9,B9
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B10,B10,B10,B10,B10,B10,B10,B10,B11,B11,B11,B11,B11,B11,B11,B11
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-    tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12,B12,B12,B12,B13,B13,B13,B13,B14,B14,B14,B14,B15,B15,B15,B15
-    tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12,B12,B12,B12,B12,B12,B12,B12,B13,B13,B13,B13,B13,B13,B13,B13
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-    tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B14,B14,B14,B14,B14,B14,B14,B14,B15,B15,B15,B15,B15,B15,B15,B15
-    *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);;
-#else
-    tmp1=_mm256_load_si256(i_256++);       // tmp1 = B0,B1,...,B15,...,B31
+    tmp1=simde_mm256_load_si256(i_256++);       // tmp1 = B0,B1,...,B15,...,B31
     //print_bytes2("in",(uint8_t*)&tmp1);
-    tmp2=_mm256_unpacklo_epi8(tmp1,tmp1);  // tmp2 = B0,B0,B1,B1,...,B7,B7,B16,B16,B17,B17,...,B23,B23
-    tmp3=_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3,B16,B16,B16,B16,...,B19,B19,B19,B19
-    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1,B16,B16...,B17..,B17
-    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3,B18...,B18,B19,...,B19
-    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B0 B1 B2 B3
-    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B16 B17 B18 B19
+    tmp2=simde_mm256_unpacklo_epi8(tmp1,tmp1);  // tmp2 = B0,B0,B1,B1,...,B7,B7,B16,B16,B17,B17,...,B23,B23
+    tmp3=simde_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3,B16,B16,B16,B16,...,B19,B19,B19,B19
+    tmp4=simde_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1,B16,B16...,B17..,B17
+    tmp5=simde_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3,B18...,B18,B19,...,B19
+    tmp6=simde_mm256_insertf128_si256(tmp4,simde_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B0 B1 B2 B3
+    tmp7=simde_mm256_insertf128_si256(tmp5,simde_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B16 B17 B18 B19
     //print_bytes2("tmp2",(uint8_t*)&tmp2);
     //print_bytes2("tmp3",(uint8_t*)&tmp3);
     //print_bytes2("tmp4",(uint8_t*)&tmp4);
     //print_bytes2("tmp5",(uint8_t*)&tmp4);
     //print_bytes2("tmp6",(uint8_t*)&tmp6);
     //print_bytes2("tmp7",(uint8_t*)&tmp7);
-    o_256[0]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    o_256[0]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
     //print_bytes2("out",(uint8_t*)o_256);
-    o_256[4]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    o_256[4]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
     //print_bytes2("out",(uint8_t*)(o_256+4));
-    tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7,B20,B20,B20,B20,...,B23,B23,B23,B23
-    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5,B20,B20...,B21..,B21
-    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7,B22...,B22,B23,...,B23
-    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B4 B5 B6 B7
-    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B20 B21 B22 B23
+    tmp3=simde_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7,B20,B20,B20,B20,...,B23,B23,B23,B23
+    tmp4=simde_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5,B20,B20...,B21..,B21
+    tmp5=simde_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7,B22...,B22,B23,...,B23
+    tmp6=simde_mm256_insertf128_si256(tmp4,simde_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B4 B5 B6 B7
+    tmp7=simde_mm256_insertf128_si256(tmp5,simde_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B20 B21 B22 B23
     //print_bytes2("tmp2",(uint8_t*)&tmp2);
     //print_bytes2("tmp3",(uint8_t*)&tmp3);
     //print_bytes2("tmp4",(uint8_t*)&tmp4);
     //print_bytes2("tmp5",(uint8_t*)&tmp4);
     //print_bytes2("tmp6",(uint8_t*)&tmp6);
     //print_bytes2("tmp7",(uint8_t*)&tmp7);
-    o_256[1]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    o_256[1]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
     //print_bytes2("out",(uint8_t*)(o_256+1));
-    o_256[5]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    o_256[5]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
     //print_bytes2("out",(uint8_t*)(o_256+4));
-    tmp2=_mm256_unpackhi_epi8(tmp1,tmp1);  // tmp2 = B8 B9 B10 B11 B12 B13 B14 B15 B25 B26 B27 B28 B29 B30 B31
-    tmp3=_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B9,B10,B11,B26,B27,B28,B29
-    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B8,B9,B26,B27
-    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B10,B11,B28,B29
-    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B8 B9 B10 B11
-    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B26 B27 B28 B29
+    tmp2=simde_mm256_unpackhi_epi8(tmp1,tmp1);  // tmp2 = B8 B9 B10 B11 B12 B13 B14 B15 B25 B26 B27 B28 B29 B30 B31
+    tmp3=simde_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B9,B10,B11,B26,B27,B28,B29
+    tmp4=simde_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B8,B9,B26,B27
+    tmp5=simde_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B10,B11,B28,B29
+    tmp6=simde_mm256_insertf128_si256(tmp4,simde_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B8 B9 B10 B11
+    tmp7=simde_mm256_insertf128_si256(tmp5,simde_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B26 B27 B28 B29
     //print_bytes2("tmp2",(uint8_t*)&tmp2);
     //print_bytes2("tmp3",(uint8_t*)&tmp3);
     //print_bytes2("tmp4",(uint8_t*)&tmp4);
     //print_bytes2("tmp5",(uint8_t*)&tmp4);
     //print_bytes2("tmp6",(uint8_t*)&tmp6);
     //print_bytes2("tmp7",(uint8_t*)&tmp7);
-    o_256[2]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    o_256[2]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
     //print_bytes2("out",(uint8_t*)(o_256+2));
-    o_256[6]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    o_256[6]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
     //print_bytes2("out",(uint8_t*)(o_256+4));
-    tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12 B13 B14 B15 B28 B29 B30 B31
-    tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12 B13 B28 B29
-    tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 = B14 B15 B30 B31
-    tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B12 B13 B14 B15
-    tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B28 B29 B30 B31
+    tmp3=simde_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12 B13 B14 B15 B28 B29 B30 B31
+    tmp4=simde_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12 B13 B28 B29
+    tmp5=simde_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 = B14 B15 B30 B31
+    tmp6=simde_mm256_insertf128_si256(tmp4,simde_mm256_extracti128_si256(tmp5,0),1);  // tmp6 = B12 B13 B14 B15
+    tmp7=simde_mm256_insertf128_si256(tmp5,simde_mm256_extracti128_si256(tmp4,1),0);  // tmp7 = B28 B29 B30 B31
     //print_bytes2("tmp2",(uint8_t*)&tmp2);
     //print_bytes2("tmp3",(uint8_t*)&tmp3);
     //print_bytes2("tmp4",(uint8_t*)&tmp4);
     //print_bytes2("tmp5",(uint8_t*)&tmp4);
     //print_bytes2("tmp6",(uint8_t*)&tmp6);
     //print_bytes2("tmp7",(uint8_t*)&tmp7);
-    o_256[3]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
+    o_256[3]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp6,BIT_MASK),BIT_MASK);
     //print_bytes2("out",(uint8_t*)(o_256+3));
-    o_256[7]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
+    o_256[7]=simde_mm256_cmpeq_epi8(simde_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);;
     //print_bytes2("out",(uint8_t*)(o_256+7));
     o_256+=8;
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     tmp1=vld1q_u8((uint8_t *)i_128);
     //print_bytes("tmp1:",(uint8_t*)&tmp1);
     uint8x16x2_t temp1 =  vzipq_u8(tmp1,tmp1);
@@ -372,14 +317,9 @@ char interleave_compact_byte(short *base_interleaver,unsigned char *input, unsig
 
   short *ptr_intl=base_interleaver;
 #if defined(__x86_64) || defined(__i386__)
-#ifndef __AVX2__
-  __m128i tmp={0};
-  uint16_t *systematic2_ptr=(uint16_t *) output;
-#else
   __m256i tmp={0};
   uint32_t *systematic2_ptr=(uint32_t *) output;
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   uint8x16_t tmp;
   const uint8_t __attribute__ ((aligned (16))) _Powers[16]=
   { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 };
@@ -387,68 +327,44 @@ char interleave_compact_byte(short *base_interleaver,unsigned char *input, unsig
   uint8x16_t Powers= vld1q_u8(_Powers);
   uint8_t *systematic2_ptr=(uint8_t *) output;
 #endif
-#ifndef __AVX2__
-  int input_length_words=1+((n-1)>>1);
-#else
   int input_length_words=1+((n-1)>>2);
-#endif
 
   for ( i=0; i<  input_length_words ; i ++ ) {
 #if defined(__x86_64__) || defined(__i386__)
-#ifndef __AVX2__
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],7);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],6);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],5);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],4);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],3);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],2);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],1);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],0);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+7);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+6);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+5);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+4);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+3);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+2);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+1);
-    tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],8+0);
-    *systematic2_ptr++=(unsigned short)_mm_movemask_epi8(tmp);
-#else
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],7);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],6);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],5);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],4);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],3);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],2);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],1);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],0);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+7);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+6);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+5);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+4);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+3);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+2);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+1);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+0);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+7);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+6);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+5);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+4);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+3);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+2);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+1);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+0);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+7);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+6);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+5);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+4);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+3);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+2);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+1);
-    tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+0);
-    *systematic2_ptr++=(unsigned int)_mm256_movemask_epi8(tmp);
-#endif
-#elif defined(__arm__)
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],7);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],6);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],5);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],4);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],3);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],2);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],1);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],0);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+7);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+6);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+5);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+4);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+3);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+2);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+1);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+0);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+7);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+6);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+5);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+4);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+3);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+2);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+1);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+0);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+7);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+6);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+5);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+4);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+3);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+2);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+1);
+    tmp=simde_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+0);
+    *systematic2_ptr++=(unsigned int)simde_mm256_movemask_epi8(tmp);
+#elif defined(__arm__) || defined(__aarch64__)
     tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,7);
     tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,6);
     tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,5);
@@ -514,7 +430,7 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
   interleave_compact_byte(base_interleaver,input,systematic2,input_length_bytes);
 #if defined(__x86_64__) || defined(__i386__)
   __m64 *ptr_output=(__m64 *) output;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   uint8x8_t *ptr_output=(uint8x8_t *)output;
 #endif
   unsigned char cur_s1, cur_s2;
@@ -533,7 +449,7 @@ void threegpplte_turbo_encoder_sse(unsigned char *input,
       */
       *ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate],
                                   all_treillis[state1][cur_s2].parity2_64[code_rate]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       *ptr_output++ = vadd_u8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate],
                               all_treillis[state0][cur_s1].parity2_64[code_rate]);
 #endif
diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c
index 619a8f07b9ac7211d97f915157641e7282f155f3..082dfb94c647fa096d0dc38520b310353a8afa3b 100644
--- a/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c
+++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c
@@ -58,7 +58,6 @@
 #include "mex.h"
 #endif
 
-#ifdef __AVX2__
 #include "PHY/sse_intrin.h"
 
 //#define DEBUG_LOGMAP
@@ -66,10 +65,6 @@
 #ifdef DEBUG_LOGMAP
 #define print_shorts(s,x) fprintf(fdavx2,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7]);fprintf(fdavx2b,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[8],(x)[9],(x)[10],(x)[11],(x)[12],(x)[13],(x)[14],(x)[15])
 FILE *fdavx2,*fdavx2b;
-#else
-
-#endif
-
 
 #define print_bytes(s,x) printf("%s %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7],(x)[8],(x)[9],(x)[10],(x)[11],(x)[12],(x)[13],(x)[14],(x)[15],(x)[16],(x)[17],(x)[18],(x)[19],(x)[20],(x)[21],(x)[22],(x)[23],(x)[24],(x)[25],(x)[26],(x)[27],(x)[28],(x)[29],(x)[30],(x)[31])
 
@@ -143,8 +138,8 @@ void compute_gamma16avx2(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_pa
   K1=frame_length>>3;
 
   for (k=0; k<K1; k++) {
-    m11_128[k] = _mm256_srai_epi16(_mm256_adds_epi16(systematic128[k],y_parity128[k]),1);
-    m10_128[k] = _mm256_srai_epi16(_mm256_subs_epi16(systematic128[k],y_parity128[k]),1);
+    m11_128[k] = simde_mm256_srai_epi16(simde_mm256_adds_epi16(systematic128[k],y_parity128[k]),1);
+    m10_128[k] = simde_mm256_srai_epi16(simde_mm256_subs_epi16(systematic128[k],y_parity128[k]),1);
 #ifdef DEBUG_LOGMAP
     fprintf(fdavx2,"Loop index k %d\n",k);
     fprintf(fdavx2b,"Loop index k %d\n",k);
@@ -156,8 +151,8 @@ void compute_gamma16avx2(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_pa
   }
 
   // Termination
-  m11_128[k] = _mm256_srai_epi16(_mm256_adds_epi16(systematic128[k+term_flag],y_parity128[k]),1);
-  m10_128[k] = _mm256_srai_epi16(_mm256_subs_epi16(systematic128[k+term_flag],y_parity128[k]),1);
+  m11_128[k] = simde_mm256_srai_epi16(simde_mm256_adds_epi16(systematic128[k+term_flag],y_parity128[k]),1);
+  m10_128[k] = simde_mm256_srai_epi16(simde_mm256_subs_epi16(systematic128[k+term_flag],y_parity128[k]),1);
 
 #ifdef DEBUG_LOGMAP
   fprintf(fdavx2,"Loop index k %d (term flag %d)\n",k,term_flag);
@@ -196,14 +191,14 @@ void compute_alpha16avx2(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,uint16
 
     if (rerun_flag == 0) {
 
-      alpha128[0] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,0,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,0);
-      alpha128[1] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
-      alpha128[2] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
-      alpha128[3] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
-      alpha128[4] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
-      alpha128[5] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
-      alpha128[6] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
-      alpha128[7] = _mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
+      alpha128[0] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,0,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,0);
+      alpha128[1] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
+      alpha128[2] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
+      alpha128[3] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
+      alpha128[4] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
+      alpha128[5] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
+      alpha128[6] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
+      alpha128[7] = simde_mm256_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
 #ifdef DEBUG_LOGMAP
       fprintf(fdavx2,"Initial alpha\n");
       fprintf(fdavx2b,"Initial alpha\n");
@@ -218,14 +213,14 @@ void compute_alpha16avx2(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,uint16
 #endif
     } else {
       //set initial alpha in columns 1-7 from final alpha from last run in columns 0-6
-      alpha128[0] = _mm256_slli_si256(alpha128[frame_length],2);
-      alpha128[1] = _mm256_slli_si256(alpha128[1+frame_length],2);
-      alpha128[2] = _mm256_slli_si256(alpha128[2+frame_length],2);
-      alpha128[3] = _mm256_slli_si256(alpha128[3+frame_length],2);
-      alpha128[4] = _mm256_slli_si256(alpha128[4+frame_length],2);
-      alpha128[5] = _mm256_slli_si256(alpha128[5+frame_length],2);
-      alpha128[6] = _mm256_slli_si256(alpha128[6+frame_length],2);
-      alpha128[7] = _mm256_slli_si256(alpha128[7+frame_length],2);
+      alpha128[0] = simde_mm256_slli_si256(alpha128[frame_length],2);
+      alpha128[1] = simde_mm256_slli_si256(alpha128[1+frame_length],2);
+      alpha128[2] = simde_mm256_slli_si256(alpha128[2+frame_length],2);
+      alpha128[3] = simde_mm256_slli_si256(alpha128[3+frame_length],2);
+      alpha128[4] = simde_mm256_slli_si256(alpha128[4+frame_length],2);
+      alpha128[5] = simde_mm256_slli_si256(alpha128[5+frame_length],2);
+      alpha128[6] = simde_mm256_slli_si256(alpha128[6+frame_length],2);
+      alpha128[7] = simde_mm256_slli_si256(alpha128[7+frame_length],2);
       // set initial alpha in column 0 to (0,-MAX/2,...,-MAX/2)
       alpha[16] = -MAX/2;
       alpha[32] = -MAX/2;
@@ -266,63 +261,63 @@ void compute_alpha16avx2(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,uint16
          k++) {
 
 
-      a1=_mm256_load_si256(&alpha_ptr[1]);
-      a3=_mm256_load_si256(&alpha_ptr[3]);
-      a5=_mm256_load_si256(&alpha_ptr[5]);
-      a7=_mm256_load_si256(&alpha_ptr[7]);
-
-      m_b0 = _mm256_adds_epi16(a1,*m11p);  // m11
-      m_b4 = _mm256_subs_epi16(a1,*m11p);  // m00=-m11
-      m_b1 = _mm256_subs_epi16(a3,*m10p);  // m01=-m10
-      m_b5 = _mm256_adds_epi16(a3,*m10p);  // m10
-      m_b2 = _mm256_adds_epi16(a5,*m10p);  // m10
-      m_b6 = _mm256_subs_epi16(a5,*m10p);  // m01=-m10
-      m_b3 = _mm256_subs_epi16(a7,*m11p);  // m00=-m11
-      m_b7 = _mm256_adds_epi16(a7,*m11p);  // m11
-
-      a0=_mm256_load_si256(&alpha_ptr[0]);
-      a2=_mm256_load_si256(&alpha_ptr[2]);
-      a4=_mm256_load_si256(&alpha_ptr[4]);
-      a6=_mm256_load_si256(&alpha_ptr[6]);
-
-      new0 = _mm256_subs_epi16(a0,*m11p);  // m00=-m11
-      new4 = _mm256_adds_epi16(a0,*m11p);  // m11
-      new1 = _mm256_adds_epi16(a2,*m10p);  // m10
-      new5 = _mm256_subs_epi16(a2,*m10p);  // m01=-m10
-      new2 = _mm256_subs_epi16(a4,*m10p);  // m01=-m10
-      new6 = _mm256_adds_epi16(a4,*m10p);  // m10
-      new3 = _mm256_adds_epi16(a6,*m11p);  // m11
-      new7 = _mm256_subs_epi16(a6,*m11p);  // m00=-m11
-
-      a0 = _mm256_max_epi16(m_b0,new0);
-      a1 = _mm256_max_epi16(m_b1,new1);
-      a2 = _mm256_max_epi16(m_b2,new2);
-      a3 = _mm256_max_epi16(m_b3,new3);
-      a4 = _mm256_max_epi16(m_b4,new4);
-      a5 = _mm256_max_epi16(m_b5,new5);
-      a6 = _mm256_max_epi16(m_b6,new6);
-      a7 = _mm256_max_epi16(m_b7,new7);
-
-      alpha_max = _mm256_max_epi16(a0,a1);
-      alpha_max = _mm256_max_epi16(alpha_max,a2);
-      alpha_max = _mm256_max_epi16(alpha_max,a3);
-      alpha_max = _mm256_max_epi16(alpha_max,a4);
-      alpha_max = _mm256_max_epi16(alpha_max,a5);
-      alpha_max = _mm256_max_epi16(alpha_max,a6);
-      alpha_max = _mm256_max_epi16(alpha_max,a7);
+      a1=simde_mm256_load_si256(&alpha_ptr[1]);
+      a3=simde_mm256_load_si256(&alpha_ptr[3]);
+      a5=simde_mm256_load_si256(&alpha_ptr[5]);
+      a7=simde_mm256_load_si256(&alpha_ptr[7]);
+
+      m_b0 = simde_mm256_adds_epi16(a1,*m11p);  // m11
+      m_b4 = simde_mm256_subs_epi16(a1,*m11p);  // m00=-m11
+      m_b1 = simde_mm256_subs_epi16(a3,*m10p);  // m01=-m10
+      m_b5 = simde_mm256_adds_epi16(a3,*m10p);  // m10
+      m_b2 = simde_mm256_adds_epi16(a5,*m10p);  // m10
+      m_b6 = simde_mm256_subs_epi16(a5,*m10p);  // m01=-m10
+      m_b3 = simde_mm256_subs_epi16(a7,*m11p);  // m00=-m11
+      m_b7 = simde_mm256_adds_epi16(a7,*m11p);  // m11
+
+      a0=simde_mm256_load_si256(&alpha_ptr[0]);
+      a2=simde_mm256_load_si256(&alpha_ptr[2]);
+      a4=simde_mm256_load_si256(&alpha_ptr[4]);
+      a6=simde_mm256_load_si256(&alpha_ptr[6]);
+
+      new0 = simde_mm256_subs_epi16(a0,*m11p);  // m00=-m11
+      new4 = simde_mm256_adds_epi16(a0,*m11p);  // m11
+      new1 = simde_mm256_adds_epi16(a2,*m10p);  // m10
+      new5 = simde_mm256_subs_epi16(a2,*m10p);  // m01=-m10
+      new2 = simde_mm256_subs_epi16(a4,*m10p);  // m01=-m10
+      new6 = simde_mm256_adds_epi16(a4,*m10p);  // m10
+      new3 = simde_mm256_adds_epi16(a6,*m11p);  // m11
+      new7 = simde_mm256_subs_epi16(a6,*m11p);  // m00=-m11
+
+      a0 = simde_mm256_max_epi16(m_b0,new0);
+      a1 = simde_mm256_max_epi16(m_b1,new1);
+      a2 = simde_mm256_max_epi16(m_b2,new2);
+      a3 = simde_mm256_max_epi16(m_b3,new3);
+      a4 = simde_mm256_max_epi16(m_b4,new4);
+      a5 = simde_mm256_max_epi16(m_b5,new5);
+      a6 = simde_mm256_max_epi16(m_b6,new6);
+      a7 = simde_mm256_max_epi16(m_b7,new7);
+
+      alpha_max = simde_mm256_max_epi16(a0,a1);
+      alpha_max = simde_mm256_max_epi16(alpha_max,a2);
+      alpha_max = simde_mm256_max_epi16(alpha_max,a3);
+      alpha_max = simde_mm256_max_epi16(alpha_max,a4);
+      alpha_max = simde_mm256_max_epi16(alpha_max,a5);
+      alpha_max = simde_mm256_max_epi16(alpha_max,a6);
+      alpha_max = simde_mm256_max_epi16(alpha_max,a7);
 
       alpha_ptr+=8;
       m11p++;
       m10p++;
 
-      alpha_ptr[0] = _mm256_subs_epi16(a0,alpha_max);
-      alpha_ptr[1] = _mm256_subs_epi16(a1,alpha_max);
-      alpha_ptr[2] = _mm256_subs_epi16(a2,alpha_max);
-      alpha_ptr[3] = _mm256_subs_epi16(a3,alpha_max);
-      alpha_ptr[4] = _mm256_subs_epi16(a4,alpha_max);
-      alpha_ptr[5] = _mm256_subs_epi16(a5,alpha_max);
-      alpha_ptr[6] = _mm256_subs_epi16(a6,alpha_max);
-      alpha_ptr[7] = _mm256_subs_epi16(a7,alpha_max);
+      alpha_ptr[0] = simde_mm256_subs_epi16(a0,alpha_max);
+      alpha_ptr[1] = simde_mm256_subs_epi16(a1,alpha_max);
+      alpha_ptr[2] = simde_mm256_subs_epi16(a2,alpha_max);
+      alpha_ptr[3] = simde_mm256_subs_epi16(a3,alpha_max);
+      alpha_ptr[4] = simde_mm256_subs_epi16(a4,alpha_max);
+      alpha_ptr[5] = simde_mm256_subs_epi16(a5,alpha_max);
+      alpha_ptr[6] = simde_mm256_subs_epi16(a6,alpha_max);
+      alpha_ptr[7] = simde_mm256_subs_epi16(a7,alpha_max);
 
 #ifdef DEBUG_LOGMAP
       fprintf(fdavx2,"Loop index %d\n",k);
@@ -540,14 +535,14 @@ void compute_beta16avx2(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,uint16_
     } else {
 
       beta128 = (__m256i*)&beta[0];
-      beta_ptr[0] = _mm256_srli_si256(beta128[0],2);
-      beta_ptr[1] = _mm256_srli_si256(beta128[1],2);
-      beta_ptr[2] = _mm256_srli_si256(beta128[2],2);
-      beta_ptr[3] = _mm256_srli_si256(beta128[3],2);
-      beta_ptr[4] = _mm256_srli_si256(beta128[4],2);
-      beta_ptr[5] = _mm256_srli_si256(beta128[5],2);
-      beta_ptr[6] = _mm256_srli_si256(beta128[6],2);
-      beta_ptr[7] = _mm256_srli_si256(beta128[7],2);
+      beta_ptr[0] = simde_mm256_srli_si256(beta128[0],2);
+      beta_ptr[1] = simde_mm256_srli_si256(beta128[1],2);
+      beta_ptr[2] = simde_mm256_srli_si256(beta128[2],2);
+      beta_ptr[3] = simde_mm256_srli_si256(beta128[3],2);
+      beta_ptr[4] = simde_mm256_srli_si256(beta128[4],2);
+      beta_ptr[5] = simde_mm256_srli_si256(beta128[5],2);
+      beta_ptr[6] = simde_mm256_srli_si256(beta128[6],2);
+      beta_ptr[7] = simde_mm256_srli_si256(beta128[7],2);
 #ifdef DEBUG_LOGMAP
       fprintf(fdavx2,"beta init (second run)\n");
       fprintf(fdavx2b,"beta init (second run)\n");
@@ -563,23 +558,23 @@ void compute_beta16avx2(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,uint16_
     }
 
 
-    beta_ptr[0] = _mm256_insert_epi16(beta_ptr[0],beta0_16,7);
-    beta_ptr[1] = _mm256_insert_epi16(beta_ptr[1],beta1_16,7);
-    beta_ptr[2] = _mm256_insert_epi16(beta_ptr[2],beta2_16,7);
-    beta_ptr[3] = _mm256_insert_epi16(beta_ptr[3],beta3_16,7);
-    beta_ptr[4] = _mm256_insert_epi16(beta_ptr[4],beta4_16,7);
-    beta_ptr[5] = _mm256_insert_epi16(beta_ptr[5],beta5_16,7);
-    beta_ptr[6] = _mm256_insert_epi16(beta_ptr[6],beta6_16,7);
-    beta_ptr[7] = _mm256_insert_epi16(beta_ptr[7],beta7_16,7);
-
-    beta_ptr[0] = _mm256_insert_epi16(beta_ptr[0],beta0_cw2_16,15);
-    beta_ptr[1] = _mm256_insert_epi16(beta_ptr[1],beta1_cw2_16,15);
-    beta_ptr[2] = _mm256_insert_epi16(beta_ptr[2],beta2_cw2_16,15);
-    beta_ptr[3] = _mm256_insert_epi16(beta_ptr[3],beta3_cw2_16,15);
-    beta_ptr[4] = _mm256_insert_epi16(beta_ptr[4],beta4_cw2_16,15);
-    beta_ptr[5] = _mm256_insert_epi16(beta_ptr[5],beta5_cw2_16,15);
-    beta_ptr[6] = _mm256_insert_epi16(beta_ptr[6],beta6_cw2_16,15);
-    beta_ptr[7] = _mm256_insert_epi16(beta_ptr[7],beta7_cw2_16,15);
+    beta_ptr[0] = simde_mm256_insert_epi16(beta_ptr[0],beta0_16,7);
+    beta_ptr[1] = simde_mm256_insert_epi16(beta_ptr[1],beta1_16,7);
+    beta_ptr[2] = simde_mm256_insert_epi16(beta_ptr[2],beta2_16,7);
+    beta_ptr[3] = simde_mm256_insert_epi16(beta_ptr[3],beta3_16,7);
+    beta_ptr[4] = simde_mm256_insert_epi16(beta_ptr[4],beta4_16,7);
+    beta_ptr[5] = simde_mm256_insert_epi16(beta_ptr[5],beta5_16,7);
+    beta_ptr[6] = simde_mm256_insert_epi16(beta_ptr[6],beta6_16,7);
+    beta_ptr[7] = simde_mm256_insert_epi16(beta_ptr[7],beta7_16,7);
+
+    beta_ptr[0] = simde_mm256_insert_epi16(beta_ptr[0],beta0_cw2_16,15);
+    beta_ptr[1] = simde_mm256_insert_epi16(beta_ptr[1],beta1_cw2_16,15);
+    beta_ptr[2] = simde_mm256_insert_epi16(beta_ptr[2],beta2_cw2_16,15);
+    beta_ptr[3] = simde_mm256_insert_epi16(beta_ptr[3],beta3_cw2_16,15);
+    beta_ptr[4] = simde_mm256_insert_epi16(beta_ptr[4],beta4_cw2_16,15);
+    beta_ptr[5] = simde_mm256_insert_epi16(beta_ptr[5],beta5_cw2_16,15);
+    beta_ptr[6] = simde_mm256_insert_epi16(beta_ptr[6],beta6_cw2_16,15);
+    beta_ptr[7] = simde_mm256_insert_epi16(beta_ptr[7],beta7_cw2_16,15);
 
 #ifdef DEBUG_LOGMAP
       fprintf(fdavx2,"beta init (after insert) \n");
@@ -605,64 +600,64 @@ void compute_beta16avx2(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,uint16_
     for (k=(frame_length>>3)-1; k>=loopval; k--) {
 
       
-      b4 = _mm256_load_si256(&beta_ptr[4]);
-      b5 = _mm256_load_si256(&beta_ptr[5]);
-      b6 = _mm256_load_si256(&beta_ptr[6]);
-      b7 = _mm256_load_si256(&beta_ptr[7]);
-
-      m_b0 = _mm256_adds_epi16(b4,*m11p);  //m11
-      m_b1 = _mm256_subs_epi16(b4,*m11p);  //m00
-      m_b2 = _mm256_subs_epi16(b5,*m10p);  //m01
-      m_b3 = _mm256_adds_epi16(b5,*m10p);  //m10
-      m_b4 = _mm256_adds_epi16(b6,*m10p);  //m10
-      m_b5 = _mm256_subs_epi16(b6,*m10p);  //m01
-      m_b6 = _mm256_subs_epi16(b7,*m11p);  //m00
-      m_b7 = _mm256_adds_epi16(b7,*m11p);  //m11
-
-      b0 = _mm256_load_si256(&beta_ptr[0]);
-      b1 = _mm256_load_si256(&beta_ptr[1]);
-      b2 = _mm256_load_si256(&beta_ptr[2]);
-      b3 = _mm256_load_si256(&beta_ptr[3]);
-
-      new0 = _mm256_subs_epi16(b0,*m11p);  //m00
-      new1 = _mm256_adds_epi16(b0,*m11p);  //m11
-      new2 = _mm256_adds_epi16(b1,*m10p);  //m10
-      new3 = _mm256_subs_epi16(b1,*m10p);  //m01
-      new4 = _mm256_subs_epi16(b2,*m10p);  //m01
-      new5 = _mm256_adds_epi16(b2,*m10p);  //m10
-      new6 = _mm256_adds_epi16(b3,*m11p);  //m11
-      new7 = _mm256_subs_epi16(b3,*m11p);  //m00
-
-
-      b0 = _mm256_max_epi16(m_b0,new0);
-      b1 = _mm256_max_epi16(m_b1,new1);
-      b2 = _mm256_max_epi16(m_b2,new2);
-      b3 = _mm256_max_epi16(m_b3,new3);
-      b4 = _mm256_max_epi16(m_b4,new4);
-      b5 = _mm256_max_epi16(m_b5,new5);
-      b6 = _mm256_max_epi16(m_b6,new6);
-      b7 = _mm256_max_epi16(m_b7,new7);
-
-      beta_max = _mm256_max_epi16(b0,b1);
-      beta_max = _mm256_max_epi16(beta_max   ,b2);
-      beta_max = _mm256_max_epi16(beta_max   ,b3);
-      beta_max = _mm256_max_epi16(beta_max   ,b4);
-      beta_max = _mm256_max_epi16(beta_max   ,b5);
-      beta_max = _mm256_max_epi16(beta_max   ,b6);
-      beta_max = _mm256_max_epi16(beta_max   ,b7);
+      b4 = simde_mm256_load_si256(&beta_ptr[4]);
+      b5 = simde_mm256_load_si256(&beta_ptr[5]);
+      b6 = simde_mm256_load_si256(&beta_ptr[6]);
+      b7 = simde_mm256_load_si256(&beta_ptr[7]);
+
+      m_b0 = simde_mm256_adds_epi16(b4,*m11p);  //m11
+      m_b1 = simde_mm256_subs_epi16(b4,*m11p);  //m00
+      m_b2 = simde_mm256_subs_epi16(b5,*m10p);  //m01
+      m_b3 = simde_mm256_adds_epi16(b5,*m10p);  //m10
+      m_b4 = simde_mm256_adds_epi16(b6,*m10p);  //m10
+      m_b5 = simde_mm256_subs_epi16(b6,*m10p);  //m01
+      m_b6 = simde_mm256_subs_epi16(b7,*m11p);  //m00
+      m_b7 = simde_mm256_adds_epi16(b7,*m11p);  //m11
+
+      b0 = simde_mm256_load_si256(&beta_ptr[0]);
+      b1 = simde_mm256_load_si256(&beta_ptr[1]);
+      b2 = simde_mm256_load_si256(&beta_ptr[2]);
+      b3 = simde_mm256_load_si256(&beta_ptr[3]);
+
+      new0 = simde_mm256_subs_epi16(b0,*m11p);  //m00
+      new1 = simde_mm256_adds_epi16(b0,*m11p);  //m11
+      new2 = simde_mm256_adds_epi16(b1,*m10p);  //m10
+      new3 = simde_mm256_subs_epi16(b1,*m10p);  //m01
+      new4 = simde_mm256_subs_epi16(b2,*m10p);  //m01
+      new5 = simde_mm256_adds_epi16(b2,*m10p);  //m10
+      new6 = simde_mm256_adds_epi16(b3,*m11p);  //m11
+      new7 = simde_mm256_subs_epi16(b3,*m11p);  //m00
+
+
+      b0 = simde_mm256_max_epi16(m_b0,new0);
+      b1 = simde_mm256_max_epi16(m_b1,new1);
+      b2 = simde_mm256_max_epi16(m_b2,new2);
+      b3 = simde_mm256_max_epi16(m_b3,new3);
+      b4 = simde_mm256_max_epi16(m_b4,new4);
+      b5 = simde_mm256_max_epi16(m_b5,new5);
+      b6 = simde_mm256_max_epi16(m_b6,new6);
+      b7 = simde_mm256_max_epi16(m_b7,new7);
+
+      beta_max = simde_mm256_max_epi16(b0,b1);
+      beta_max = simde_mm256_max_epi16(beta_max   ,b2);
+      beta_max = simde_mm256_max_epi16(beta_max   ,b3);
+      beta_max = simde_mm256_max_epi16(beta_max   ,b4);
+      beta_max = simde_mm256_max_epi16(beta_max   ,b5);
+      beta_max = simde_mm256_max_epi16(beta_max   ,b6);
+      beta_max = simde_mm256_max_epi16(beta_max   ,b7);
 
       beta_ptr-=8;
       m11p--;
       m10p--;
 
-      beta_ptr[0] = _mm256_subs_epi16(b0,beta_max);
-      beta_ptr[1] = _mm256_subs_epi16(b1,beta_max);
-      beta_ptr[2] = _mm256_subs_epi16(b2,beta_max);
-      beta_ptr[3] = _mm256_subs_epi16(b3,beta_max);
-      beta_ptr[4] = _mm256_subs_epi16(b4,beta_max);
-      beta_ptr[5] = _mm256_subs_epi16(b5,beta_max);
-      beta_ptr[6] = _mm256_subs_epi16(b6,beta_max);
-      beta_ptr[7] = _mm256_subs_epi16(b7,beta_max);
+      beta_ptr[0] = simde_mm256_subs_epi16(b0,beta_max);
+      beta_ptr[1] = simde_mm256_subs_epi16(b1,beta_max);
+      beta_ptr[2] = simde_mm256_subs_epi16(b2,beta_max);
+      beta_ptr[3] = simde_mm256_subs_epi16(b3,beta_max);
+      beta_ptr[4] = simde_mm256_subs_epi16(b4,beta_max);
+      beta_ptr[5] = simde_mm256_subs_epi16(b5,beta_max);
+      beta_ptr[6] = simde_mm256_subs_epi16(b6,beta_max);
+      beta_ptr[7] = simde_mm256_subs_epi16(b7,beta_max);
 
 #ifdef DEBUG_LOGMAP
       fprintf(fdavx2,"Loop index %d, mb\n",k);
@@ -742,22 +737,22 @@ void compute_ext16avx2(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* e
       print_shorts("b6:",&beta_ptr[6]);
       print_shorts("b7:",&beta_ptr[7]);
     */
-    m00_4 = _mm256_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
-    m11_4 = _mm256_adds_epi16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
-    m00_3 = _mm256_adds_epi16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00;
-    m11_3 = _mm256_adds_epi16(alpha_ptr[6],beta_ptr[3]); //ALPHA_BETA_3m11;
-    m00_2 = _mm256_adds_epi16(alpha_ptr[1],beta_ptr[4]); //ALPHA_BETA_2m00;
-    m11_2 = _mm256_adds_epi16(alpha_ptr[1],beta_ptr[0]); //ALPHA_BETA_2m11;
-    m11_1 = _mm256_adds_epi16(alpha_ptr[0],beta_ptr[4]); //ALPHA_BETA_1m11;
-    m00_1 = _mm256_adds_epi16(alpha_ptr[0],beta_ptr[0]); //ALPHA_BETA_1m00;
-    m01_4 = _mm256_adds_epi16(alpha_ptr[5],beta_ptr[6]); //ALPHA_BETA_4m01;
-    m10_4 = _mm256_adds_epi16(alpha_ptr[5],beta_ptr[2]); //ALPHA_BETA_4m10;
-    m01_3 = _mm256_adds_epi16(alpha_ptr[4],beta_ptr[2]); //ALPHA_BETA_3m01;
-    m10_3 = _mm256_adds_epi16(alpha_ptr[4],beta_ptr[6]); //ALPHA_BETA_3m10;
-    m01_2 = _mm256_adds_epi16(alpha_ptr[3],beta_ptr[1]); //ALPHA_BETA_2m01;
-    m10_2 = _mm256_adds_epi16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10;
-    m10_1 = _mm256_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
-    m01_1 = _mm256_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
+    m00_4 = simde_mm256_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
+    m11_4 = simde_mm256_adds_epi16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
+    m00_3 = simde_mm256_adds_epi16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00;
+    m11_3 = simde_mm256_adds_epi16(alpha_ptr[6],beta_ptr[3]); //ALPHA_BETA_3m11;
+    m00_2 = simde_mm256_adds_epi16(alpha_ptr[1],beta_ptr[4]); //ALPHA_BETA_2m00;
+    m11_2 = simde_mm256_adds_epi16(alpha_ptr[1],beta_ptr[0]); //ALPHA_BETA_2m11;
+    m11_1 = simde_mm256_adds_epi16(alpha_ptr[0],beta_ptr[4]); //ALPHA_BETA_1m11;
+    m00_1 = simde_mm256_adds_epi16(alpha_ptr[0],beta_ptr[0]); //ALPHA_BETA_1m00;
+    m01_4 = simde_mm256_adds_epi16(alpha_ptr[5],beta_ptr[6]); //ALPHA_BETA_4m01;
+    m10_4 = simde_mm256_adds_epi16(alpha_ptr[5],beta_ptr[2]); //ALPHA_BETA_4m10;
+    m01_3 = simde_mm256_adds_epi16(alpha_ptr[4],beta_ptr[2]); //ALPHA_BETA_3m01;
+    m10_3 = simde_mm256_adds_epi16(alpha_ptr[4],beta_ptr[6]); //ALPHA_BETA_3m10;
+    m01_2 = simde_mm256_adds_epi16(alpha_ptr[3],beta_ptr[1]); //ALPHA_BETA_2m01;
+    m10_2 = simde_mm256_adds_epi16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10;
+    m10_1 = simde_mm256_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10;
+    m01_1 = simde_mm256_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01;
     /*
       print_shorts("m11_1:",&m11_1);
       print_shorts("m11_2:",&m11_2);
@@ -776,34 +771,34 @@ void compute_ext16avx2(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* e
       print_shorts("m01_3:",&m01_3);
       print_shorts("m01_4:",&m01_4);
     */
-    m01_1 = _mm256_max_epi16(m01_1,m01_2);
-    m01_1 = _mm256_max_epi16(m01_1,m01_3);
-    m01_1 = _mm256_max_epi16(m01_1,m01_4);
-    m00_1 = _mm256_max_epi16(m00_1,m00_2);
-    m00_1 = _mm256_max_epi16(m00_1,m00_3);
-    m00_1 = _mm256_max_epi16(m00_1,m00_4);
-    m10_1 = _mm256_max_epi16(m10_1,m10_2);
-    m10_1 = _mm256_max_epi16(m10_1,m10_3);
-    m10_1 = _mm256_max_epi16(m10_1,m10_4);
-    m11_1 = _mm256_max_epi16(m11_1,m11_2);
-    m11_1 = _mm256_max_epi16(m11_1,m11_3);
-    m11_1 = _mm256_max_epi16(m11_1,m11_4);
+    m01_1 = simde_mm256_max_epi16(m01_1,m01_2);
+    m01_1 = simde_mm256_max_epi16(m01_1,m01_3);
+    m01_1 = simde_mm256_max_epi16(m01_1,m01_4);
+    m00_1 = simde_mm256_max_epi16(m00_1,m00_2);
+    m00_1 = simde_mm256_max_epi16(m00_1,m00_3);
+    m00_1 = simde_mm256_max_epi16(m00_1,m00_4);
+    m10_1 = simde_mm256_max_epi16(m10_1,m10_2);
+    m10_1 = simde_mm256_max_epi16(m10_1,m10_3);
+    m10_1 = simde_mm256_max_epi16(m10_1,m10_4);
+    m11_1 = simde_mm256_max_epi16(m11_1,m11_2);
+    m11_1 = simde_mm256_max_epi16(m11_1,m11_3);
+    m11_1 = simde_mm256_max_epi16(m11_1,m11_4);
 
     //      print_shorts("m11_1:",&m11_1);
 
-    m01_1 = _mm256_subs_epi16(m01_1,*m10_128);
-    m00_1 = _mm256_subs_epi16(m00_1,*m11_128);
-    m10_1 = _mm256_adds_epi16(m10_1,*m10_128);
-    m11_1 = _mm256_adds_epi16(m11_1,*m11_128);
+    m01_1 = simde_mm256_subs_epi16(m01_1,*m10_128);
+    m00_1 = simde_mm256_subs_epi16(m00_1,*m11_128);
+    m10_1 = simde_mm256_adds_epi16(m10_1,*m10_128);
+    m11_1 = simde_mm256_adds_epi16(m11_1,*m11_128);
 
     //      print_shorts("m10_1:",&m10_1);
     //      print_shorts("m11_1:",&m11_1);
-    m01_1 = _mm256_max_epi16(m01_1,m00_1);
-    m10_1 = _mm256_max_epi16(m10_1,m11_1);
+    m01_1 = simde_mm256_max_epi16(m01_1,m00_1);
+    m10_1 = simde_mm256_max_epi16(m10_1,m11_1);
     //      print_shorts("m01_1:",&m01_1);
     //      print_shorts("m10_1:",&m10_1);
 
-    *ext_128 = _mm256_subs_epi16(m10_1,m01_1);
+    *ext_128 = simde_mm256_subs_epi16(m10_1,m01_1);
 
 #ifdef DEBUG_LOGMAP
     fprintf(fdavx2,"ext %p\n",ext_128);
@@ -933,7 +928,7 @@ unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y,
   uint32_t db;
 
 
-  __m256i tmp={0}, zeros=_mm256_setzero_si256();
+  __m256i tmp={0}, zeros=simde_mm256_setzero_si256();
 
 
   int offset8_flag=0;
@@ -1063,22 +1058,22 @@ unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y,
 
     for (i=0; i<(n>>3); i++) { // steady-state portion
 
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],0);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],8);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],1);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],9);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],2);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],10);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],3);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],11);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],4);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],12);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],5);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],13);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],6);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],14);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],7);
-      ((__m256i *)systematic2)[i]=_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],15);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],0);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],8);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],1);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],9);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],2);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],10);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],3);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],11);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],4);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],12);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],5);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],13);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],6);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],14);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[*pi4_p],7);
+      ((__m256i *)systematic2)[i]=simde_mm256_insert_epi16(((__m256i *)systematic2)[i],ext[8+*pi4_p++],15);
 #ifdef DEBUG_LOGMAP
       print_shorts("syst2",(int16_t*)&((__m256i *)systematic2)[i]);
 #endif
@@ -1096,23 +1091,23 @@ unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y,
 
     for (i=0; i<(n>>3); i++) {
 
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],0);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],8);
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],1);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],9);
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],2);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],10);
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],3);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],11);
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],4);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],12);
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],5);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],13);
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],6);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],14);
-      tmp=_mm256_insert_epi16(tmp,ext2[*pi5_p],7);
-      tmp=_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],15);
-      ((__m256i *)systematic1)[i] = _mm256_adds_epi16(_mm256_subs_epi16(tmp,((__m256i*)ext)[i]),((__m256i *)systematic0)[i]);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],0);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],8);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],1);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],9);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],2);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],10);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],3);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],11);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],4);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],12);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],5);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],13);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],6);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],14);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[*pi5_p],7);
+      tmp=simde_mm256_insert_epi16(tmp,ext2[8+*pi5_p++],15);
+      ((__m256i *)systematic1)[i] = simde_mm256_adds_epi16(simde_mm256_subs_epi16(tmp,((__m256i*)ext)[i]),((__m256i *)systematic0)[i]);
 #ifdef DEBUG_LOGMAP
       print_shorts("syst1",(int16_t*)&((__m256i *)systematic1)[i]);
 #endif
@@ -1124,27 +1119,27 @@ unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y,
 
       for (i=0; i<(n>>3); i++) {
 
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],7);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],15);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],6);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],14);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],5);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],13);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],4);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],12);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],3);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],11);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],2);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],10);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],1);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],9);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],0);
-        tmp=_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],8);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],7);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],15);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],6);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],14);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],5);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],13);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],4);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],12);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],3);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],11);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],2);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],10);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],1);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],9);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p],0);
+        tmp=simde_mm256_insert_epi16(tmp, ((llr_t*)ext2)[8+*pi6_p++],8);
 #ifdef DEBUG_LOGMAP
 	print_shorts("tmp",(int16_t*)&tmp);
 #endif
-        tmp=_mm256_cmpgt_epi8(_mm256_packs_epi16(tmp,zeros),zeros);
-        db=(uint32_t)_mm256_movemask_epi8(tmp);
+        tmp=simde_mm256_cmpgt_epi8(simde_mm256_packs_epi16(tmp,zeros),zeros);
+        db=(uint32_t)simde_mm256_movemask_epi8(tmp);
 	decoded_bytes[i]=db&0xff;
 	decoded_bytes2[i]=(uint8_t)(db>>16)&0xff;
 #ifdef DEBUG_LOGMAP
@@ -1261,7 +1256,7 @@ unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y,
 
       for (i=0; i<myloop; i++) {
 
-        *ext_128=_mm256_adds_epi16(_mm256_subs_epi16(*ext_128,*s1_128++),*s0_128++);
+        *ext_128=simde_mm256_adds_epi16(simde_mm256_subs_epi16(*ext_128,*s1_128++),*s0_128++);
         ext_128++;
       }
     }
@@ -1278,7 +1273,7 @@ unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y,
 #endif
   return(iteration_cnt);
 }
-#else  //__AVX2__
+#else
 unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y,
 						  int16_t *y2,
 						  uint8_t *decoded_bytes,
@@ -1307,6 +1302,6 @@ void init_td16avx2(void)
     
 }
 
-#endif //__AVX2__
+#endif
 
 
diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
index c9d2579314f32a8f3c674ebde6470e700acfe9a9..a91519d010b1605f47c3affa9e0c3640fabfcadd 100644
--- a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
+++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
@@ -37,7 +37,6 @@
 ///
 ///
 
-#undef __AVX2__
 #include "PHY/sse_intrin.h"
 
 #ifndef TEST_DEBUG
@@ -63,8 +62,6 @@
   #define print_shorts(s,x) fprintf(fdsse4,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7])
 #endif
 
-#undef __AVX2__
-
 #ifdef DEBUG_LOGMAP
   FILE *fdsse4;
 #endif
@@ -121,7 +118,7 @@ void compute_gamma16(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity
   __m128i *y_parity128   = (__m128i *)y_parity;
   __m128i *m10_128        = (__m128i *)m10;
   __m128i *m11_128        = (__m128i *)m11;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *systematic128  = (int16x8_t *)systematic;
   int16x8_t *y_parity128    = (int16x8_t *)y_parity;
   int16x8_t *m10_128        = (int16x8_t *)m10;
@@ -130,28 +127,13 @@ void compute_gamma16(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity
 #ifdef DEBUG_LOGMAP
   fprintf(fdsse4,"compute_gamma (sse_16bit), %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length);
 #endif
-#ifndef __AVX2__
   K1=frame_length>>3;
-#else
-
-  if ((frame_length&15) > 0)
-    K1=(frame_length+1)>>4;
-  else
-    K1=frame_length>>4;
-
-#endif
 
   for (k=0; k<K1; k++) {
 #if defined(__x86_64__) || defined(__i386__)
-#ifndef __AVX2__
     m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k],y_parity128[k]),1);
     m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k],y_parity128[k]),1);
-#else
-    ((__m256i *)m11_128)[k] = _mm256_srai_epi16(_mm256_adds_epi16(((__m256i *)systematic128)[k],((__m256i *)y_parity128)[k]),1);
-    //    ((__m256i*)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i*)y_parity128)[k],((__m256i*)systematic128)[k]),1);
-    ((__m256i *)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i *)systematic128)[k],((__m256i *)y_parity128)[k]),1);
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     m11_128[k] = vhaddq_s16(systematic128[k],y_parity128[k]);
     m10_128[k] = vhsubq_s16(systematic128[k],y_parity128[k]);
 #endif
@@ -168,13 +150,12 @@ void compute_gamma16(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity
   // Termination
 #if defined(__x86_64__) || defined(__i386__)
   m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k+term_flag],y_parity128[k]),1);
-  //#ifndef __AVX2__
 #if 1
   m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k+term_flag],y_parity128[k]),1);
 #else
   m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(y_parity128[k],systematic128[k+term_flag]),1);
 #endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   m11_128[k] = vhaddq_s16(systematic128[k+term_flag],y_parity128[k]);
   m10_128[k] = vhsubq_s16(systematic128[k+term_flag],y_parity128[k]);
 #endif
@@ -193,7 +174,6 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
   int k,l,l2,K1,rerun_flag=0;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *alpha128=(__m128i *)alpha,*alpha_ptr,*m11p,*m10p;
-  //#ifndef __AVX2__
 #if 1
   __m128i a0,a1,a2,a3,a4,a5,a6,a7;
   __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
@@ -206,7 +186,7 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
   __m256i m11m10_256;
   __m256i alpha_max;
 #endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *alpha128=(int16x8_t *)alpha,*alpha_ptr;
   int16x8_t a0,a1,a2,a3,a4,a5,a6,a7,*m11p,*m10p;
   int16x8_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
@@ -222,8 +202,7 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
   for (l=K1;; l=l2,rerun_flag=1) {
 #if defined(__x86_64__) || defined(__i386__)
     alpha128 = (__m128i *)alpha;
-    //#ifdef __AVX2__
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     alpha128 = (int16x8_t *)alpha;
 #endif
 
@@ -237,7 +216,7 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
       alpha128[5] = _mm_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
       alpha128[6] = _mm_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
       alpha128[7] = _mm_set_epi16(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       alpha128[0] = vdupq_n_s16(-MAX/2);
       alpha128[0] = vsetq_lane_s16(0,alpha128[0],0);
       alpha128[1] = vdupq_n_s16(-MAX/2);
@@ -270,7 +249,7 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
       alpha128[5] = _mm_slli_si128(alpha128[5+frame_length],2);
       alpha128[6] = _mm_slli_si128(alpha128[6+frame_length],2);
       alpha128[7] = _mm_slli_si128(alpha128[7+frame_length],2);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       alpha128[0] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[frame_length],16);
       alpha128[0] = vsetq_lane_s16(alpha[8],alpha128[0],3);
       alpha128[1] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[1+frame_length],16);
@@ -310,11 +289,10 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
     }
 
     alpha_ptr = &alpha128[0];
-    //#ifdef __AVX2__
 #if defined(__x86_64__) || defined(__i386__)
     m11p = (__m128i *)m_11;
     m10p = (__m128i *)m_10;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     m11p = (int16x8_t *)m_11;
     m10p = (int16x8_t *)m_10;
 #endif
@@ -323,8 +301,6 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
          k<l;
          k++) {
 #if defined(__x86_64__) || defined(__i386__)
-      //#ifndef __AVX2__
-#if 1
       a1=_mm_load_si128(&alpha_ptr[1]);
       a3=_mm_load_si128(&alpha_ptr[3]);
       a5=_mm_load_si128(&alpha_ptr[5]);
@@ -364,31 +340,7 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
       alpha_max = _mm_max_epi16(alpha_max,a5);
       alpha_max = _mm_max_epi16(alpha_max,a6);
       alpha_max = _mm_max_epi16(alpha_max,a7);
-#else
-      a02=_mm256_load_si256(&alpha_ptr256[0]);
-      a13=_mm256_load_si256(&alpha_ptr256[1]);
-      a64=_mm256_load_si256(&alpha_ptr256[2]);
-      a75=_mm256_load_si256(&alpha_ptr256[3]);
-      m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m11p,0);
-      m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m10p,1);
-      m_b01 = _mm256_adds_epi16(a13,m11m10_256); //negative m10
-      m_b23 = _mm256_subs_epi16(a75,m11m10_256); //negative m10
-      m_b45 = _mm256_subs_epi16(a13,m11m10_256); //negative m10
-      m_b67 = _mm256_adds_epi16(a75,m11m10_256); //negative m10
-      new01 = _mm256_subs_epi16(a02,m11m10_256);  //negative m10
-      new23 = _mm256_adds_epi16(a64,m11m10_256);  //negative m10
-      new45 = _mm256_adds_epi16(a02,m11m10_256);  //negative m10
-      new67 = _mm256_subs_epi16(a64,m11m10_256);  //negative m10
-      a01   = _mm256_max_epi16(m_b01,new01);
-      a23   = _mm256_max_epi16(m_b23,new23);
-      a45   = _mm256_max_epi16(m_b45,new45);
-      a67   = _mm256_max_epi16(m_b67,new67);
-      alpha_max = _mm256_max_epi16(a01,a23);
-      alpha_max = _mm256_max_epi16(alpha_max,a45);
-      alpha_max = _mm256_max_epi16(alpha_max,a67);
-      alpha_max = _mm256_max_epi16(alpha_max,_mm256_permutevar8x32_epi32(alpha_max,_mm256_set_epi32(3,2,1,0,7,6,5,4)));
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       m_b0 = vqaddq_s16(alpha_ptr[1],*m11p);  // m11
       m_b4 = vqsubq_s16(alpha_ptr[1],*m11p);  // m00=-m11
       m_b1 = vqsubq_s16(alpha_ptr[3],*m10p);  // m01=-m10
@@ -423,12 +375,9 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
       alpha_max = vmaxq_s16(alpha_max,a7);
 #endif
       alpha_ptr+=8;
-      //#ifdef __AVX2__
       m11p++;
       m10p++;
 #if defined(__x86_64__) || defined(__i386__)
-      //#ifndef __AVX2__
-#if 1
       alpha_ptr[0] = _mm_subs_epi16(a0,alpha_max);
       alpha_ptr[1] = _mm_subs_epi16(a1,alpha_max);
       alpha_ptr[2] = _mm_subs_epi16(a2,alpha_max);
@@ -437,17 +386,7 @@ void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned s
       alpha_ptr[5] = _mm_subs_epi16(a5,alpha_max);
       alpha_ptr[6] = _mm_subs_epi16(a6,alpha_max);
       alpha_ptr[7] = _mm_subs_epi16(a7,alpha_max);
-#else
-      a01   = _mm256_subs_epi16(a01,alpha_max);
-      a23   = _mm256_subs_epi16(a23,alpha_max);
-      a45   = _mm256_subs_epi16(a45,alpha_max);
-      a67   = _mm256_subs_epi16(a67,alpha_max);
-      alpha_ptr256[0] = _mm256_permute2x128_si256(a01,a23,0x20);  //a02
-      alpha_ptr256[1] = _mm256_permute2x128_si256(a01,a23,0x13);  //a13
-      alpha_ptr256[2] = _mm256_permute2x128_si256(a45,a67,0x02);  //a64
-      alpha_ptr256[3] = _mm256_permute2x128_si256(a45,a67,0x31);  //a75
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       alpha_ptr[0] = vqsubq_s16(a0,alpha_max);
       alpha_ptr[1] = vqsubq_s16(a1,alpha_max);
       alpha_ptr[2] = vqsubq_s16(a2,alpha_max);
@@ -511,7 +450,7 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
   __m128i new0,new1,new2,new3,new4,new5,new6,new7;
   __m128i *beta128,*alpha128,*beta_ptr;
   __m128i beta_max;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t m11_128,m10_128;
   int16x8_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
   int16x8_t new0,new1,new2,new3,new4,new5,new6,new7;
@@ -527,7 +466,6 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
   // termination for beta initialization
   //  fprintf(fdsse4,"beta init: offset8 %d\n",offset8_flag);
   m11=(int16_t)m_11[2+frame_length];
-  //#ifndef __AVX2__
 #if 1
   m10=(int16_t)m_10[2+frame_length];
 #else
@@ -580,7 +518,7 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
 #if defined(__x86_64__) || defined(__i386__)
     beta_ptr   = (__m128i *)&beta[frame_length<<3];
     alpha128   = (__m128i *)&alpha[0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     beta_ptr   = (int16x8_t *)&beta[frame_length<<3];
     alpha128   = (int16x8_t *)&alpha[0];
 #endif
@@ -616,7 +554,7 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
       beta_ptr[5] = _mm_srli_si128(beta128[5],2);
       beta_ptr[6] = _mm_srli_si128(beta128[6],2);
       beta_ptr[7] = _mm_srli_si128(beta128[7],2);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       beta128 = (int16x8_t *)&beta[0];
       beta_ptr   = (int16x8_t *)&beta[frame_length<<3];
       beta_ptr[0] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[0],16);
@@ -658,7 +596,7 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
     beta_ptr[5] = _mm_insert_epi16(beta_ptr[5],beta5_16,7);
     beta_ptr[6] = _mm_insert_epi16(beta_ptr[6],beta6_16,7);
     beta_ptr[7] = _mm_insert_epi16(beta_ptr[7],beta7_16,7);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     beta_ptr[0] = vsetq_lane_s16(beta0_16,beta_ptr[0],7);
     beta_ptr[1] = vsetq_lane_s16(beta1_16,beta_ptr[1],7);
     beta_ptr[2] = vsetq_lane_s16(beta2_16,beta_ptr[2],7);
@@ -685,8 +623,6 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
 #if defined(__x86_64__) || defined(__i386__)
       m11_128=((__m128i *)m_11)[k];
       m10_128=((__m128i *)m_10)[k];
-      //#ifndef __AVX2__
-#if 1
       m_b0 = _mm_adds_epi16(beta_ptr[4],m11_128);  //m11
       m_b1 = _mm_subs_epi16(beta_ptr[4],m11_128);  //m00
       m_b2 = _mm_subs_epi16(beta_ptr[5],m10_128);  //m01
@@ -703,25 +639,8 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
       new5 = _mm_adds_epi16(beta_ptr[2],m10_128);  //m10
       new6 = _mm_adds_epi16(beta_ptr[3],m11_128);  //m11
       new7 = _mm_subs_epi16(beta_ptr[3],m11_128);  //m00
-#else
-      b01=_mm256_load_si256(&((_m256i *)beta_ptr)[0]);
-      b23=_mm256_load_si256(&((_m256i *)beta_ptr)[1]);
-      b45=_mm256_load_si256(&((_m256i *)beta_ptr)[2]);
-      b67=_mm256_load_si256(&((_m256i *)beta_ptr)[3]);
-      m11m10_256 = _mm256_insertf128_si256(m11m10_256,m11_128,0);
-      m11m10_256 = _mm256_insertf128_si256(m11m10_256,m10_128,1);
-      m_b02 = _mm256_adds_epi16(b45,m11m10_256); //negative m10
-      m_b13 = _mm256_subs_epi16(b45,m11m10_256); //negative m10
-      m_b64 = _mm256_subs_epi16(b67,m11m10_256); //negative m10
-      m_b75 = _mm256_adds_epi16(b67,m11m10_256); //negative m10
-      new02 = _mm256_subs_epi16(b01,m11m10_256);  //negative m10
-      new13 = _mm256_adds_epi16(b01,m11m10_256);  //negative m10
-      new64 = _mm256_adds_epi16(b23,m11m10_256);  //negative m10
-      new75 = _mm256_subs_epi16(b24,m11m10_256);  //negative m10
-#endif
+
       beta_ptr-=8;
-      //#ifndef __AVX2__
-#if 1
       beta_ptr[0] = _mm_max_epi16(m_b0,new0);
       beta_ptr[1] = _mm_max_epi16(m_b1,new1);
       beta_ptr[2] = _mm_max_epi16(m_b2,new2);
@@ -745,25 +664,7 @@ void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
       beta_ptr[5] = _mm_subs_epi16(beta_ptr[5],beta_max);
       beta_ptr[6] = _mm_subs_epi16(beta_ptr[6],beta_max);
       beta_ptr[7] = _mm_subs_epi16(beta_ptr[7],beta_max);
-#else
-      b02   = _mm256_max_epi16(m_b02,new02);
-      b13   = _mm256_max_epi16(m_b13,new13);
-      b64   = _mm256_max_epi16(m_b64,new64);
-      b75   = _mm256_max_epi16(m_b75,new75);
-      beta_max = _mm256_max_epi16(b02,b13);
-      beta_max = _mm256_max_epi16(beta_max,b64);
-      beta_max = _mm256_max_epi16(beta_max,b75);
-      beta_max = _mm256_max_epi16(beta_max,_mm256_permutevar8x32_epi32(betaa_max,_mm256_set_epi32(3,2,1,0,7,6,5,4)));
-      b02   = _mm256_subs_epi16(b02,beta_max);
-      b13   = _mm256_subs_epi16(b13,beta_max);
-      b64   = _mm256_subs_epi16(b64,beta_max);
-      b75   = _mm256_subs_epi16(b75,beta_max);
-      ((_m256i *)beta_ptr)[0]) = _mm256_permute2x128_si256(b02,b13,0x02); //b01
-      ((_m256i *)beta_ptr)[1]) = _mm256_permute2x128_si256(b02,b13,0x31); //b23
-      ((_m256i *)beta_ptr)[2]) = _mm256_permute2x128_si256(b64,b75,0x13); //b45
-      ((_m256i *)beta_ptr)[3]) = _mm256_permute2x128_si256(b64,b75,0x20); //b67
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       m11_128=((int16x8_t *)m_11)[k];
       m10_128=((int16x8_t *)m_10)[k];
       m_b0 = vqaddq_s16(beta_ptr[4],m11_128);  //m11
@@ -836,7 +737,7 @@ void compute_ext16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext,
   __m128i m01_1,m01_2,m01_3,m01_4;
   __m128i m10_1,m10_2,m10_3,m10_4;
   __m128i m11_1,m11_2,m11_3,m11_4;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *alpha128=(int16x8_t *)alpha;
   int16x8_t *beta128=(int16x8_t *)beta;
   int16x8_t *m11_128,*m10_128,*ext_128;
@@ -880,7 +781,6 @@ void compute_ext16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext,
       print_shorts("b6:",&beta_ptr[6]);
       print_shorts("b7:",&beta_ptr[7]);
     */
-    //#ifndef __AVX2__
 #if 1
     m00_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00;
     m11_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11;
@@ -966,7 +866,7 @@ void compute_ext16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext,
     print_shorts("m10_1:",(int16_t *)&m10_1);
     print_shorts("m01_1:",(int16_t *)&m01_1);
 #endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     m11_128        = (int16x8_t *)&m_11[k<<3];
     m10_128        = (int16x8_t *)&m_10[k<<3];
     ext_128        = (int16x8_t *)&ext[k<<3];
@@ -1107,7 +1007,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
   __m128i *yp128;
   __m128i tmp={0}, zeros=_mm_setzero_si128();
   __m128i tmpe;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *yp128;
   //  int16x8_t tmp128[(n+8)>>3];
   int16x8_t tmp, zeros=vdupq_n_s16(0);
@@ -1155,7 +1055,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
 
 #if defined(__x86_64__) || defined(__i386__)
   yp128 = (__m128i *)y;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   yp128 = (int16x8_t *)y;
 #endif
   s = systematic0;
@@ -1228,7 +1128,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
 #ifdef DEBUG_LOGMAP
     fprintf(fdsse4,"init7: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]);
 #endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     s[j]   = vgetq_lane_s16(yp128[0],0);
     yp1[j] = vgetq_lane_s16(yp128[0],1);
     yp2[j] = vgetq_lane_s16(yp128[0],2);
@@ -1315,7 +1215,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
       ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],5);
       ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],6);
       ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],7);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],0);
       ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],1);
       ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],2);
@@ -1346,7 +1246,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
       tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],6);
       tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],7);
       ((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,0);
       tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,1);
       tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,2);
@@ -1381,7 +1281,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
 #endif
         tmp=_mm_cmpgt_epi8(_mm_packs_epi16(tmp,zeros),zeros);
         decoded_bytes[i]=(unsigned char)_mm_movemask_epi8(tmp);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,7);
         tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,6);
         tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,5);
@@ -1464,7 +1364,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
       __m128i *ext_128=(__m128i *) ext;
       __m128i *s1_128=(__m128i *) systematic1;
       __m128i *s0_128=(__m128i *) systematic0;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       int16x8_t *ext_128=(int16x8_t *) ext;
       int16x8_t *s1_128=(int16x8_t *) systematic1;
       int16x8_t *s0_128=(int16x8_t *) systematic0;
@@ -1474,7 +1374,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
       for (i=0; i<myloop; i++) {
 #if defined(__x86_64__) || defined(__i386__)
         *ext_128=_mm_adds_epi16(_mm_subs_epi16(*ext_128,*s1_128++),*s0_128++);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         *ext_128=vqaddq_s16(vqsubq_s16(*ext_128,*s1_128++),*s0_128++);
 #endif
         ext_128++;
diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
index af02a1605475b1600d97ecb20c791e57494307f3..0614a9d07107cae3344e9a69e0646ea99366f0e1 100644
--- a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
+++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
@@ -153,7 +153,7 @@ void compute_gamma8(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity,
   __m128i *y_parity128   = (__m128i *)y_parity;
   __m128i *m10_128        = (__m128i *)m10;
   __m128i *m11_128        = (__m128i *)m11;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int8x16_t *systematic128  = (int8x16_t *)systematic;
   int8x16_t *y_parity128    = (int8x16_t *)y_parity;
   int8x16_t *m10_128        = (int8x16_t *)m10;
@@ -177,7 +177,7 @@ void compute_gamma8(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity,
                                  _mm_srai_epi16(_mm_adds_epi16(sh,yph),1));
     m10_128[k] = _mm_packs_epi16(_mm_srai_epi16(_mm_subs_epi16(sl,ypl),1),
                                  _mm_srai_epi16(_mm_subs_epi16(sh,yph),1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     m11_128[k] = vhaddq_s8(systematic128[k],y_parity128[k]);
     m10_128[k] = vhsubq_s8(systematic128[k],y_parity128[k]);
 #endif
@@ -193,7 +193,7 @@ void compute_gamma8(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity,
                                _mm_srai_epi16(_mm_adds_epi16(sh,yph),1));
   m10_128[k] = _mm_packs_epi16(_mm_srai_epi16(_mm_subs_epi16(sl,ypl),1),
                                _mm_srai_epi16(_mm_subs_epi16(sh,yph),1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   m11_128[k] = vhaddq_s8(systematic128[k+term_flag],y_parity128[k]);
   m10_128[k] = vhsubq_s8(systematic128[k+term_flag],y_parity128[k]);
 #endif
@@ -209,7 +209,7 @@ void compute_alpha8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
   __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
   __m128i new0,new1,new2,new3,new4,new5,new6,new7;
   __m128i alpha_max;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int8x16_t *alpha128=(int8x16_t *)alpha,*alpha_ptr;
   int8x16_t *m11p,*m10p;
   int8x16_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
@@ -299,7 +299,7 @@ void compute_alpha8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sh
     alpha[112] = -MAX8/2;
   }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   alpha128[0] = vdupq_n_s8(-MAX8/2);
   alpha128[0] = vsetq_lane_s8(0,alpha128[0],0);
   alpha128[1] = vdupq_n_s8(-MAX8/2);
@@ -401,7 +401,7 @@ void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sho
   __m128i new0,new1,new2,new3,new4,new5,new6,new7;
   __m128i *beta128,*alpha128,*beta_ptr;
   __m128i beta_max;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int8x16_t m11_128,m10_128;
   int8x16_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7;
   int8x16_t new0,new1,new2,new3,new4,new5,new6,new7;
@@ -421,7 +421,7 @@ void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sho
 #if defined(__x86_64__) || defined(__i386__)
   beta_ptr   = (__m128i *)&beta[frame_length<<3];
   alpha128   = (__m128i *)&alpha[0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   beta_ptr   = (int8x16_t *)&beta[frame_length<<3];
   alpha128   = (int8x16_t *)&alpha[0];
 #endif
@@ -451,7 +451,7 @@ void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sho
       beta_ptr[5] = _mm_insert_epi8(beta_ptr[5],beta5,15);
       beta_ptr[6] = _mm_insert_epi8(beta_ptr[6],beta6,15);
       beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       beta_ptr[0] = vsetq_lane_s8(beta0,beta_ptr[0],15);
       beta_ptr[1] = vsetq_lane_s8(beta1,beta_ptr[1],15);
       beta_ptr[2] = vsetq_lane_s8(beta2,beta_ptr[2],15);
@@ -465,7 +465,7 @@ void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sho
 
 #if defined(__x86_64__) || defined(__i386__)
     beta_ptr = (__m128i *)&beta[frame_length<<3];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     beta_ptr = (int8x16_t *)&beta[frame_length<<3];
 #endif
 
@@ -515,7 +515,7 @@ void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sho
       beta_ptr[5] = _mm_subs_epi8(beta_ptr[5],beta_max);
       beta_ptr[6] = _mm_subs_epi8(beta_ptr[6],beta_max);
       beta_ptr[7] = _mm_subs_epi8(beta_ptr[7],beta_max);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       m11_128=((int8x16_t *)m_11)[k];
       m10_128=((int8x16_t *)m_10)[k];
       m_b0 = vqaddq_s8(beta_ptr[4],m11_128);  //m11
@@ -575,7 +575,7 @@ void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sho
     beta_ptr[5] = _mm_srli_si128(beta128[5],1);
     beta_ptr[6] = _mm_srli_si128(beta128[6],1);
     beta_ptr[7] = _mm_srli_si128(beta128[7],1);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     beta128 = (int8x16_t *)&beta[0];
     beta_ptr   = (int8x16_t *)&beta[frame_length<<3];
     beta_ptr[0] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[0],8);
@@ -608,7 +608,7 @@ void compute_ext8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, l
   __m128i m01_1,m01_2,m01_3,m01_4;
   __m128i m10_1,m10_2,m10_3,m10_4;
   __m128i m11_1,m11_2,m11_3,m11_4;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int8x16_t *alpha128=(int8x16_t *)alpha;
   int8x16_t *beta128=(int8x16_t *)beta;
   int8x16_t *m11_128,*m10_128,*ext_128;
@@ -670,7 +670,7 @@ void compute_ext8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, l
     *ext_128 = _mm_subs_epi8(m10_1,m01_1);
     alpha_ptr+=8;
     beta_ptr+=8;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     m11_128        = (int8x16_t *)&m_11[k<<4];
     m10_128        = (int8x16_t *)&m_10[k<<4];
     ext_128        = (int8x16_t *)&ext[k<<4];
@@ -820,7 +820,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
   __m128i *yp128;
   __m128i tmp128[(n+8)>>3];
   __m128i tmp={0}, zeros=_mm_setzero_si128();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int8x16_t *yp128;
   int8x16_t tmp128[(n+8)>>3];
   int8x16_t tmp, zeros=vdupq_n_s8(0);
@@ -900,7 +900,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
       ((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],3),_mm_srai_epi16(((__m128i *)y)[j+1],4));
 
   yp128 = (__m128i *)y8;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t avg=vdupq_n_s32(0);
 
   for (i=0; i<(3*(n>>4))+1; i++) {
@@ -1019,7 +1019,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
       tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],13);
       tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],14);
       ((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],15);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,0);
       tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,1);
       tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,2);
@@ -1067,7 +1067,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
         tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15);
         decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros));
         ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0);
         tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1);
         tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,2);
@@ -1111,7 +1111,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
         tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15);
         tmp128[i] = _mm_adds_epi8(((__m128i *)ext2)[i],((__m128i *)systematic2)[i]);
         ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0);
         tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1);
         tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,2);
@@ -1166,7 +1166,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
           }
         }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         uint8x16_t *dbytes=(uint8x16_t *)decoded_bytes_interl;
         uint16x8_t mask  __attribute__((aligned(16)));
         int n_128=n2>>7;
@@ -1208,7 +1208,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
           tmp=_mm_insert_epi8(tmp, ((llr_t *)tmp128)[*pi6_p++],8);
           tmp=_mm_cmpgt_epi8(tmp,zeros);
           ((uint16_t *)decoded_bytes)[i]=(uint16_t)_mm_movemask_epi8(tmp);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
           tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,7);
           tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,6);
           tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,5);
@@ -1286,7 +1286,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
       __m128i *ext_128=(__m128i *) ext;
       __m128i *s1_128=(__m128i *) systematic1;
       __m128i *s0_128=(__m128i *) systematic0;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       int8x16_t *ext_128=(int8x16_t *) ext;
       int8x16_t *s1_128=(int8x16_t *) systematic1;
       int8x16_t *s0_128=(int8x16_t *) systematic0;
@@ -1296,7 +1296,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y,
       for (i=0; i<myloop; i++) {
 #if defined(__x86_64__) || defined(__i386__)
         *ext_128=_mm_adds_epi8(_mm_subs_epi8(*ext_128,*s1_128++),*s0_128++);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         *ext_128=vqaddq_s8(vqsubq_s8(*ext_128,*s1_128++),*s0_128++);
 #endif
         ext_128++;
diff --git a/openair1/PHY/CODING/TESTBENCH/ldpctest.c b/openair1/PHY/CODING/TESTBENCH/ldpctest.c
index 841a31fd850713839941888f9c4351b7980d7d9d..411d62a5ac8cf24c1b8e1ffc7b7413196b84cfe7 100644
--- a/openair1/PHY/CODING/TESTBENCH/ldpctest.c
+++ b/openair1/PHY/CODING/TESTBENCH/ldpctest.c
@@ -36,11 +36,7 @@
 #define MAX_BLOCK_LENGTH 8448
 
 #ifndef malloc16
-#  ifdef __AVX2__
 #    define malloc16(x) memalign(32,x)
-#  else
-#    define malloc16(x) memalign(16,x)
-#  endif
 #endif
 
 #define NR_LDPC_PROFILER_DETAIL
diff --git a/openair1/PHY/CODING/TESTBENCH/pdcch_test.c b/openair1/PHY/CODING/TESTBENCH/pdcch_test.c
index b301959fbbe83714f0bfda5bd6f490932d250389..f4574bb0c523391ec3c960a2cbf34f6e0a423341 100644
--- a/openair1/PHY/CODING/TESTBENCH/pdcch_test.c
+++ b/openair1/PHY/CODING/TESTBENCH/pdcch_test.c
@@ -26,7 +26,6 @@
 
 #include "PHY/defs.h"
 #include "PHY/vars.h"
-#include "ARCH/CBMIMO1/DEVICE_DRIVER/vars.h"
 #include "MAC_INTERFACE/vars.h"
 #include "PHY/CODING/defs.h"
 //#include "PHY/CODING/lte_interleaver_inline.h"
diff --git a/openair1/PHY/CODING/coding_defs.h b/openair1/PHY/CODING/coding_defs.h
index 1854f28fa788f6602366f959b191486a861340ec..2f5749df07d9e5b40b8d0ece77427218c0230ef1 100644
--- a/openair1/PHY/CODING/coding_defs.h
+++ b/openair1/PHY/CODING/coding_defs.h
@@ -472,6 +472,13 @@ void nr_interleaving_ldpc(uint32_t E, uint8_t Qm, uint8_t *e,uint8_t *f);
 
 void nr_deinterleaving_ldpc(uint32_t E, uint8_t Qm, int16_t *e,int16_t *f);
 
+int nr_get_R_ldpc_decoder(int rvidx,
+                          int E,
+                          int BG,
+                          int Z,
+                          int *llrLen,
+                          int round);
+
 int nr_rate_matching_ldpc(uint32_t Tbslbrm,
                           uint8_t BG,
                           uint16_t Z,
diff --git a/openair1/PHY/CODING/crc.h b/openair1/PHY/CODING/crc.h
index d6474eae9b7e49405724abccd147063a2b92c272..bca966d62bd1eb55db679b4a43f0bf64c6f07f34 100644
--- a/openair1/PHY/CODING/crc.h
+++ b/openair1/PHY/CODING/crc.h
@@ -44,8 +44,7 @@
 
 #include "crcext.h"
 #include "types.h"
-#include <immintrin.h>
-#include <wmmintrin.h>
+#include "PHY/sse_intrin.h"
 
 /**
  * PCLMULQDQ CRC computation context structure
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_bnProc.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_bnProc.h
index c21169869cb56f7e9245c80365fa6951afcc4acd..09e15cea83a113743478f021a4f5784a733e4fce 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_bnProc.h
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_bnProc.h
@@ -30,7 +30,7 @@
 
 #ifndef __NR_LDPC_BNPROC__H__
 #define __NR_LDPC_BNPROC__H__
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 /**
    \brief Performs first part of BN processing on the BN processing buffer and stores the results in the LLR results buffer.
           At every BN, the sum of the returned LLRs from the connected CNs and the LLR of the receiver input is computed.
@@ -81,22 +81,22 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         p_bnProcBufRes[i] = p_llrProcBuf256[i];
 
         // First 16 LLRs of first CN
-        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
-        ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);
+        ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
 
-        ymmRes0 = _mm256_adds_epi16(ymm0, ymm1);
+        ymmRes0 = simde_mm256_adds_epi16(ymm0, ymm1);
 
         // Second 16 LLRs of first CN
-        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j+1]);
-        ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j+1]);
+        ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
 
-        ymmRes1 = _mm256_adds_epi16(ymm0, ymm1);
+        ymmRes1 = simde_mm256_adds_epi16(ymm0, ymm1);
 
         // Pack results back to epi8
-        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
         // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
         // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+        *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
         // Next result
         p_llrRes++;
@@ -125,31 +125,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<2; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -179,31 +179,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<3; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -233,31 +233,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<4; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -287,31 +287,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<5; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -341,31 +341,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<6; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -395,31 +395,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<7; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -449,31 +449,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<8; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -503,31 +503,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<9; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -557,31 +557,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<10; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -611,31 +611,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<11; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -665,31 +665,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<12; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -719,31 +719,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<13; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -773,31 +773,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<14; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -827,31 +827,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<15; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -881,31 +881,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<16; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -935,31 +935,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<17; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -989,31 +989,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<18; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1043,31 +1043,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<19; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1097,31 +1097,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<20; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1151,31 +1151,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<21; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1205,31 +1205,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<22; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1259,31 +1259,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<23; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1313,31 +1313,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<24; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1367,31 +1367,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<25; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1421,31 +1421,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<26; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1475,31 +1475,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<27; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1529,31 +1529,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<28; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1583,31 +1583,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<29; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1637,31 +1637,31 @@ static inline void nrLDPC_bnProcPc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_
         for (i=0,j=0; i<M; i++,j+=2)
         {
             // First 16 LLRs of first CN
-            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j]);
-            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
+            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j]);
+            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);
 
             // Loop over CNs
             for (k=1; k<30; k++)
             {
-                ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
-                ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+                ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j]);
+                ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-                ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
-                ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+                ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[k*cnOffsetInGroup + j+1]);
+                ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
             }
 
             // Add LLR from receiver input
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
+            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);
+            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);
 
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
+            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j+1]);
+            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);
 
             // Pack results back to epi8
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
+            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-            *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8);
+            *p_llrRes = simde_mm256_permute4x64_epi64(ymm0, 0xD8);
 
             // Next result
             p_llrRes++;
@@ -1730,7 +1730,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -1765,7 +1765,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -1800,7 +1800,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -1835,7 +1835,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -1870,7 +1870,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -1905,7 +1905,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -1940,7 +1940,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -1975,7 +1975,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2010,7 +2010,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2045,7 +2045,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2080,7 +2080,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
         // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2115,7 +2115,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2150,7 +2150,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2185,7 +2185,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2220,7 +2220,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2255,7 +2255,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2290,7 +2290,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2325,7 +2325,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2360,7 +2360,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2395,7 +2395,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2430,7 +2430,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2465,7 +2465,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2500,7 +2500,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2535,7 +2535,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2570,7 +2570,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2605,7 +2605,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2640,7 +2640,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2675,7 +2675,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2710,7 +2710,7 @@ static inline void nrLDPC_bnProc(t_nrLDPC_lut* p_lut, int8_t* bnProcBuf, int8_t*
             // Loop over BNs
             for (i=0; i<M; i++)
             {
-                *p_res = _mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
+                *p_res = simde_mm256_subs_epi8(*p_llrRes, p_bnProcBuf[k*cnOffsetInGroup + i]);
 
                 p_res++;
                 p_llrRes++;
@@ -2741,7 +2741,7 @@ static inline void nrLDPC_llr2bit(int8_t* out, int8_t* llrOut, uint16_t numLLR)
 
     for (i=0; i<M; i++)
     {
-        *p_out++ = _mm256_and_si256(*p_ones, _mm256_cmpgt_epi8(*p_zeros, *p_llrOut));
+        *p_out++ = simde_mm256_and_si256(*p_ones, simde_mm256_cmpgt_epi8(*p_zeros, *p_llrOut));
         p_llrOut++;
     }
 
@@ -2792,9 +2792,9 @@ static inline void nrLDPC_llr2bitPacked(int8_t* out, int8_t* llrOut, uint16_t nu
     for (i=0; i<M; i++)
     {
         // Move LSB to MSB on 8 bits
-        inPerm = _mm256_shuffle_epi8(*p_llrOut,*p_shuffle);
+        inPerm = simde_mm256_shuffle_epi8(*p_llrOut,*p_shuffle);
         // Hard decision
-        *p_bits++ = _mm256_movemask_epi8(inPerm);
+        *p_bits++ = simde_mm256_movemask_epi8(inPerm);
         p_llrOut++;
     }
 
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_cnProc.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_cnProc.h
index f792f993e9c78066328ff859bb14afd9eaa69ca7..984cb5e11dfd43968df24f6afa5980f243352ddd 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_cnProc.h
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_cnProc.h
@@ -32,6 +32,8 @@
 #ifndef __NR_LDPC_DECODER_CNPROC__H__
 #define __NR_LDPC_DECODER_CNPROC__H__
 
+#include "PHY/sse_intrin.h"
+
 /**
    \brief Performs CN processing for BG2 on the CN processing buffer and stores the results in the CN processing results buffer.
    \param p_lut Pointer to decoder LUTs
@@ -39,12 +41,9 @@
    \param Z Lifting size
 */
 
-
-#ifdef __AVX512BW__
+#if defined(__AVX512BW__)
 #include "nrLDPC_cnProc_avx512.h"
-
 #else
-
 static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z)
 {
     const uint8_t*  lut_numCnInCnGroups   = p_lut->numCnInCnGroups;
@@ -102,20 +101,20 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
                 // Abs and sign of 32 CNs (first BN)
               //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
                 ymm0 = pj0[i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // 32 CNs of second BN
                 //  ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
                 ymm0 = pj1[i];
-                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                sgn  = _mm256_sign_epi8(sgn, ymm0);
+                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                //*p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                //*p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 //p_cnProcBufResBit++;
-                p_cnProcBufResBit[i]=_mm256_sign_epi8(min, sgn);
+                p_cnProcBufResBit[i]=simde_mm256_sign_epi8(min, sgn);
             }
         }
     }
@@ -149,20 +148,20 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<3; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -198,20 +197,20 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<4; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -248,20 +247,20 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<5; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -299,20 +298,20 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<7; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -351,20 +350,20 @@ static inline void nrLDPC_cnProc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<9; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -440,18 +439,18 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // 32 CNs of second BN
                 ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
-                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                sgn  = _mm256_sign_epi8(sgn, ymm0);
+                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
 
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -487,20 +486,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<3; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG4[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -537,20 +536,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<4; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG5[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -588,20 +587,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<5; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG6[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -640,20 +639,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG7[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<6; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG7[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -692,20 +691,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<7; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG8[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -745,20 +744,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG9[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<8; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG9[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -798,20 +797,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<9; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG10[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -856,20 +855,20 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
             {
                 // Abs and sign of 32 CNs (first BN)
                 ymm0 = p_cnProcBuf[lut_idxCnProcG19[j][0] + i];
-                sgn  = _mm256_sign_epi8(*p_ones, ymm0);
-                min  = _mm256_abs_epi8(ymm0);
+                sgn  = simde_mm256_sign_epi8(*p_ones, ymm0);
+                min  = simde_mm256_abs_epi8(ymm0);
 
                 // Loop over BNs
                 for (k=1; k<18; k++)
                 {
                     ymm0 = p_cnProcBuf[lut_idxCnProcG19[j][k] + i];
-                    min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-                    sgn  = _mm256_sign_epi8(sgn, ymm0);
+                    min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+                    sgn  = simde_mm256_sign_epi8(sgn, ymm0);
                 }
 
                 // Store result
-                min = _mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
-                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+                min = simde_mm256_min_epu8(min, *p_maxLLR); // 128 in epi8 is -127
+                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 p_cnProcBufResBit++;
             }
         }
@@ -878,6 +877,7 @@ static inline void nrLDPC_cnProc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBuf, int
 }
 
 #endif
+
 /**
    \brief Performs parity check for BG1 on the CN processing buffer. Stops as soon as error is detected.
    \param p_lut Pointer to decoder LUTs
@@ -937,7 +937,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -956,7 +956,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1005,7 +1005,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1024,7 +1024,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1073,7 +1073,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1093,7 +1093,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1142,7 +1142,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1161,7 +1161,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1210,7 +1210,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1229,7 +1229,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1278,7 +1278,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1297,7 +1297,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1346,7 +1346,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1365,7 +1365,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1414,7 +1414,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1433,7 +1433,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1482,7 +1482,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1501,7 +1501,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG1(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1578,7 +1578,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1597,7 +1597,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1646,7 +1646,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1665,7 +1665,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1714,7 +1714,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1733,7 +1733,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1782,7 +1782,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1801,7 +1801,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1850,7 +1850,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1869,7 +1869,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
@@ -1918,7 +1918,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
                 // Add BN and input LLR, extract the sign bit
                 // and add in GF(2) (xor)
-                pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+                pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
             }
 
             // If no error pcRes should be 0
@@ -1937,7 +1937,7 @@ static inline uint32_t nrLDPC_cnProcPc_BG2(t_nrLDPC_lut* p_lut, int8_t* cnProcBu
 
             // Add BN and input LLR, extract the sign bit
             // and add in GF(2) (xor)
-            pcRes ^= _mm256_movemask_epi8(_mm256_adds_epi8(ymm0,ymm1));
+            pcRes ^= simde_mm256_movemask_epi8(simde_mm256_adds_epi8(ymm0,ymm1));
         }
 
         // If no error pcRes should be 0
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
index 0ab3ea8e6b17b35b585af4687a5b902406559028..1f918ee83bf78975a40fb460dde3772a71748051 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c
@@ -26,7 +26,7 @@
 */
 
 #include <stdint.h>
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 #include "nrLDPCdecoder_defs.h"
 #include "nrLDPC_types.h"
 #include "nrLDPC_init.h"
@@ -42,7 +42,7 @@
 /----------------------------------------------------------------------*/
 
 //BG1-------------------------------------------------------------------
-#ifdef __AVX512BW__
+#if defined(__AVX512BW__)
 
 #include "cnProc_avx512/nrLDPC_cnProc_BG1_R13_AVX512.h"
 #include "cnProc_avx512/nrLDPC_cnProc_BG1_R23_AVX512.h"
@@ -85,7 +85,7 @@
 
 //bnProc----------------------------------------------------------------
 
-#ifdef __AVX512BW__
+#if defined(__AVX512BW__)
 //BG1-------------------------------------------------------------------
 #include "bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h"
 #include "bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h"
@@ -224,7 +224,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
         {
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_cnProc_BG1_R13_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG1_R13_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -234,7 +234,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
 
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_cnProc_BG1_R23_AVX512(cnProcBuf,cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG1_R23_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -244,7 +244,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
 
             case 89:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                  nrLDPC_cnProc_BG1_R89_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG1_R89_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -261,7 +261,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
         switch (R) {
             case 15:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_cnProc_BG2_R15_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG2_R15_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -270,7 +270,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                  nrLDPC_cnProc_BG2_R13_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG2_R13_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -279,7 +279,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                  nrLDPC_cnProc_BG2_R23_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG2_R23_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -382,7 +382,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
         switch (R) {
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG1_R13_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG1_R13_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -391,7 +391,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG1_R23_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG1_R23_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -400,7 +400,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 89:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG1_R89_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG1_R89_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -416,7 +416,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
         switch (R) {
             case 15:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG2_R15_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG2_R15_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -425,7 +425,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG2_R13_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG2_R13_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -435,7 +435,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
 
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG2_R23_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG2_R23_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -490,7 +490,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
            switch (R) {
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_cnProc_BG1_R13_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG1_R13_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -499,7 +499,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                  nrLDPC_cnProc_BG1_R23_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG1_R23_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -508,7 +508,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 89:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                  nrLDPC_cnProc_BG1_R89_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG1_R89_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -524,7 +524,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
            switch (R) {
             case 15:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_cnProc_BG2_R15_AVX512(cnProcBuf,cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG2_R15_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -533,7 +533,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                  nrLDPC_cnProc_BG2_R13_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG2_R13_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -542,7 +542,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             } 
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                  nrLDPC_cnProc_BG2_R23_AVX512(cnProcBuf, cnProcBufRes, Z);
                 #else
                 nrLDPC_cnProc_BG2_R23_AVX2(cnProcBuf, cnProcBufRes, Z);
@@ -639,7 +639,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
           switch (R) {
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG1_R13_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG1_R13_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -648,7 +648,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG1_R23_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG1_R23_AVX2(bnProcBuf,bnProcBufRes,llrRes, Z);
@@ -657,7 +657,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 89:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG1_R89_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG1_R89_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -670,7 +670,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
           {
             case 15:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG2_R15_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG2_R15_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -679,7 +679,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 13:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG2_R13_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG2_R13_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
@@ -688,7 +688,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, uint32_
             }
             case 23:
             {
-                #ifdef __AVX512BW__
+                #if defined(__AVX512BW__)
                 nrLDPC_bnProc_BG2_R23_AVX512(bnProcBuf, bnProcBufRes,llrRes, Z);
                 #else
                 nrLDPC_bnProc_BG2_R23_AVX2(bnProcBuf, bnProcBufRes,llrRes, Z);
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
index f51f607b885d20552442d9f72b83b504c83ecbe9..e53d7d4d640e71077ef9b2b7e6e8310324627c7c 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
@@ -43,8 +43,8 @@ void nrLDPC_bnProcPc_BG1_generator_AVX2(const char *dir, int R)
     abort();
   }
 
-//  fprintf(fd,"#include <stdint.h>\n");
-//  fprintf(fd,"#include <immintrin.h>\n");
+  fprintf(fd,"#include <stdint.h>\n");
+  fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
 
   fprintf(fd,"static inline void nrLDPC_bnProcPc_BG1_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]);
     const uint8_t*  lut_numBnInBnGroups;
@@ -120,21 +120,21 @@ void nrLDPC_bnProcPc_BG1_generator_AVX2(const char *dir, int R)
          fprintf(fd,"           p_bnProcBufRes[i] = p_llrProcBuf256[i];\n");
        
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"           ymmRes0 = _mm256_adds_epi16(ymm0, ymm1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"           ymmRes0 = simde_mm256_adds_epi16(ymm0, ymm1);\n");
 
         
             // Second 16 LLRs of first CN
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1 ]);\n");
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j + 1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymm0, ymm1);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j + 1 ]);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j + 1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymm0, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
 
         fprintf(fd,"}\n");
@@ -167,32 +167,32 @@ fprintf(fd,  "// Process group with 2 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n");
 
             // Loop over CNs
         for (k=1; k<2; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
 
         fprintf(fd,"}\n");
@@ -224,31 +224,31 @@ fprintf(fd,  "// Process group with 3 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<3; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
             }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -281,32 +281,32 @@ fprintf(fd,  "// Process group with 4 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<4; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -337,32 +337,32 @@ fprintf(fd,  "// Process group with 5 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<5; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -394,32 +394,32 @@ fprintf(fd,  "// Process group with 6 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<6; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -450,33 +450,33 @@ fprintf(fd,  "// Process group with 7 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<7; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -507,34 +507,34 @@ fprintf(fd,  "// Process group with 8 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<8; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
 
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -564,33 +564,33 @@ fprintf(fd,  "// Process group with 9 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<9; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -621,32 +621,32 @@ fprintf(fd,  "// Process group with 10 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<10; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -679,32 +679,32 @@ fprintf(fd,  "// Process group with 11 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<11; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -734,32 +734,32 @@ fprintf(fd,  "// Process group with 12 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<12; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -790,32 +790,32 @@ fprintf(fd,  "// Process group with 13 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<13; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
             }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -847,32 +847,32 @@ fprintf(fd,  "// Process group with 14 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<14; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -903,32 +903,32 @@ fprintf(fd,  "// Process group with 15 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<15; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-         fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+         fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -960,32 +960,32 @@ fprintf(fd,  "// Process group with 16 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<16; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1016,32 +1016,32 @@ fprintf(fd,  "// Process group with 17 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<17; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1072,32 +1072,32 @@ fprintf(fd,  "// Process group with 18 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<18; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1127,32 +1127,32 @@ fprintf(fd,  "// Process group with 19 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<19; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1183,32 +1183,32 @@ fprintf(fd,  "// Process group with 20 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<20; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1243,32 +1243,32 @@ fprintf(fd,  "// Process group with 21 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<21; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1298,32 +1298,32 @@ fprintf(fd,  "// Process group with 22 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<22; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-         fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+         fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1354,32 +1354,32 @@ fprintf(fd,  "// Process group with <23 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<23; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1411,32 +1411,32 @@ fprintf(fd,  "// Process group with 24 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<24; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1467,32 +1467,32 @@ fprintf(fd,  "// Process group with 25 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<25; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1524,32 +1524,32 @@ fprintf(fd,  "// Process group with 26 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<26; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1580,32 +1580,32 @@ fprintf(fd,  "// Process group with 27 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<27; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1636,32 +1636,32 @@ fprintf(fd,  "// Process group with 28 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<28; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1691,32 +1691,32 @@ fprintf(fd,  "// Process group with 29 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<29; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1747,32 +1747,32 @@ fprintf(fd,  "// Process group with 30 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<30; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c
index c2c96280e9a84a2554f3648b8afb27bd1cd216e2..049cca34bef5cce93c3cbd9cf700aea6b1d95ed7 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdint.h>
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 #include "../../nrLDPCdecoder_defs.h"
 #include "../../nrLDPC_types.h"
 
@@ -43,8 +43,8 @@ void nrLDPC_bnProcPc_BG2_generator_AVX2(const char *dir, int R)
     abort();
   }
 
-//  fprintf(fd,"#include <stdint.h>\n");
-  //fprintf(fd,"#include <immintrin.h>\n");
+  fprintf(fd,"#include <stdint.h>\n");
+  fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
 
   fprintf(fd,"static inline void nrLDPC_bnProcPc_BG2_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z  ) {\n",ratestr[R]);
     const uint8_t*  lut_numBnInBnGroups;
@@ -120,21 +120,21 @@ void nrLDPC_bnProcPc_BG2_generator_AVX2(const char *dir, int R)
          fprintf(fd,"           p_bnProcBufRes[i] = p_llrProcBuf256[i];\n");
 
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"           ymmRes0 = _mm256_adds_epi16(ymm0, ymm1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"           ymmRes0 = simde_mm256_adds_epi16(ymm0, ymm1);\n");
 
 
             // Second 16 LLRs of first CN
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1 ]);\n");
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j + 1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymm0, ymm1);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j + 1 ]);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j + 1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymm0, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
 
         fprintf(fd,"}\n");
@@ -166,32 +166,32 @@ fprintf(fd,  "// Process group with 2 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n");
 
             // Loop over CNs
         for (k=1; k<2; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
 
         fprintf(fd,"}\n");
@@ -223,31 +223,31 @@ fprintf(fd,  "// Process group with 3 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<3; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
             }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -280,32 +280,32 @@ fprintf(fd,  "// Process group with 4 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<4; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -336,32 +336,32 @@ fprintf(fd,  "// Process group with 5 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<5; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -393,32 +393,32 @@ fprintf(fd,  "// Process group with 6 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<6; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -449,33 +449,33 @@ fprintf(fd,  "// Process group with 7 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<7; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -506,34 +506,34 @@ fprintf(fd,  "// Process group with 8 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<8; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
 
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -563,33 +563,33 @@ fprintf(fd,  "// Process group with 9 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<9; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        //fprintf(fd,"         (__m256i*) &llrRes[%d + i]    = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -620,32 +620,32 @@ fprintf(fd,  "// Process group with 10 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<10; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -678,32 +678,32 @@ fprintf(fd,  "// Process group with 11 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<11; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -733,32 +733,32 @@ fprintf(fd,  "// Process group with 12 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<12; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -789,32 +789,32 @@ fprintf(fd,  "// Process group with 13 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<13; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
             }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -846,32 +846,32 @@ fprintf(fd,  "// Process group with 14 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<14; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -902,32 +902,32 @@ fprintf(fd,  "// Process group with 15 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<15; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-         fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+         fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -959,32 +959,32 @@ fprintf(fd,  "// Process group with 16 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<16; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1015,32 +1015,32 @@ fprintf(fd,  "// Process group with 17 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<17; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1071,32 +1071,32 @@ fprintf(fd,  "// Process group with 18 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<18; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1126,32 +1126,32 @@ fprintf(fd,  "// Process group with 19 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<19; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1182,32 +1182,32 @@ fprintf(fd,  "// Process group with 20 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<20; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1242,32 +1242,32 @@ fprintf(fd,  "// Process group with 21 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<21; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1297,32 +1297,32 @@ fprintf(fd,  "// Process group with 22 CNs \n");
         // Loop over BNs
         fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"            ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<22; k++)
         {
-        fprintf(fd,"            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "           ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "           ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"            ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"            ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"            ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-         fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+         fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1353,32 +1353,32 @@ fprintf(fd,  "// Process group with <23 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<23; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1410,32 +1410,32 @@ fprintf(fd,  "// Process group with 24 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<24; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1466,32 +1466,32 @@ fprintf(fd,  "// Process group with 25 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<25; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1523,32 +1523,32 @@ fprintf(fd,  "// Process group with 26 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<26; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1579,32 +1579,32 @@ fprintf(fd,  "// Process group with 27 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<27; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1635,32 +1635,32 @@ fprintf(fd,  "// Process group with 28 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<28; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1690,32 +1690,32 @@ fprintf(fd,  "// Process group with 29 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<29; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
@@ -1746,32 +1746,32 @@ fprintf(fd,  "// Process group with 30 CNs \n");
         // Loop over BNs
         fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
             // First 16 LLRs of first CN
-        fprintf(fd,"        ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
 
             // Loop over CNs
         for (k=1; k<30; k++)
         {
-        fprintf(fd,"        ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        ymm1 = simde_mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
 
-        fprintf(fd, "       ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n");
+        fprintf(fd, "       ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1); \n");
         }
 
             // Add LLR from receiver input
-        fprintf(fd,"        ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
-        fprintf(fd,"        ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n");
+        fprintf(fd,"        ymm0    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        ymmRes0 = simde_mm256_adds_epi16(ymmRes0, ymm0);\n");
 
-        fprintf(fd,"        ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
-        fprintf(fd,"        ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n");
+        fprintf(fd,"        ymm1    = simde_mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        ymmRes1 = simde_mm256_adds_epi16(ymmRes1, ymm1);\n");
 
             // Pack results back to epi8
-        fprintf(fd,"        ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n");
+        fprintf(fd,"        ymm0 = simde_mm256_packs_epi16(ymmRes0, ymmRes1);\n");
             // ymm0     = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]]
             // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]]
-        fprintf(fd,"            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n");
+        fprintf(fd,"            p_llrRes[i] = simde_mm256_permute4x64_epi64(ymm0, 0xD8);\n");
 
         fprintf(fd,"}\n");
     }
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c
index 972a5720ca41f865b8b917cf884804682ee46b09..bfc22c0524dd76c67462eba76dc0d389ab50cf54 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c
@@ -22,1091 +22,981 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 #include "../../nrLDPCdecoder_defs.h"
 #include "../../nrLDPC_types.h"
 
-
-void nrLDPC_bnProc_BG1_generator_AVX2(const char *dir, int R)
+void nrLDPC_bnProc_BG1_generator_AVX2(const char* dir, int R)
 {
-  const char *ratestr[3]={"13","23","89"};
-
-  if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
+  const char* ratestr[3] = {"13", "23", "89"};
 
+  if (R < 0 || R > 2) {
+    printf("Illegal R %d\n", R);
+    abort();
+  }
 
- // system("mkdir -p ../ldpc_gen_files");
+  // system("mkdir -p ../ldpc_gen_files");
 
-  char fname[FILENAME_MAX+1];
+  char fname[FILENAME_MAX + 1];
   snprintf(fname, sizeof(fname), "%s/bnProc/nrLDPC_bnProc_BG1_R%s_AVX2.h", dir, ratestr[R]);
-  FILE *fd=fopen(fname,"w");
+  FILE* fd = fopen(fname, "w");
   if (fd == NULL) {
     printf("Cannot create file %s\n", fname);
     abort();
   }
 
-  //fprintf(fd,"#include <stdint.h>\n");
-  //fprintf(fd,"#include <immintrin.h>\n");
-
-
-    fprintf(fd,"static inline void nrLDPC_bnProc_BG1_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z ) {\n", ratestr[R]);
-
-    const uint8_t*  lut_numBnInBnGroups;
-    const uint32_t* lut_startAddrBnGroups;
-    const uint16_t* lut_startAddrBnGroupsLlr;
-    if (R==0) {
-
-
-      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG1_R13;
-      lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R13;
-      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R13;
-
-    }
-    else if (R==1){
-
-      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG1_R23;
-      lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R23;
-      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R23;
-    }
-    else if (R==2) {
-
-      lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R89;
-      lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R89;
-      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R89;
-    }
-  else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
-
-
-    //uint32_t M;
-    //uint32_t M32rem;
-   // uint32_t i;
-    uint32_t k;
-    // Offset to each bit within a group in terms of 32 Byte
-    uint32_t cnOffsetInGroup;
-    uint8_t idxBnGroup = 0;
-    fprintf(fd,"        uint32_t M, i; \n");
-
-
-
-// =====================================================================
-    // Process group with 1 CN
-    // Already done in bnProcBufPc
-
-    // =====================================================================
-
-fprintf(fd,  "// Process group with 2 CNs \n");
+  // fprintf(fd,"#include <stdint.h>\n");
+  // fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
+
+  fprintf(fd, "static inline void nrLDPC_bnProc_BG1_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z ) {\n", ratestr[R]);
+
+  const uint8_t* lut_numBnInBnGroups;
+  const uint32_t* lut_startAddrBnGroups;
+  const uint16_t* lut_startAddrBnGroupsLlr;
+  if (R == 0) {
+    lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R13;
+    lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R13;
+    lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R13;
+
+  } else if (R == 1) {
+    lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R23;
+    lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R23;
+    lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R23;
+  } else if (R == 2) {
+    lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R89;
+    lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R89;
+    lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R89;
+  } else {
+    printf("aborting, illegal R %d\n", R);
+    fclose(fd);
+    abort();
+  }
 
-    if (lut_numBnInBnGroups[1] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // uint32_t M;
+  // uint32_t M32rem;
+  // uint32_t i;
+  uint32_t k;
+  // Offset to each bit within a group in terms of 32 Byte
+  uint32_t cnOffsetInGroup;
+  uint8_t idxBnGroup = 0;
+  fprintf(fd, "        uint32_t M, i; \n");
 
-        // Number of groups of 32 BNs or parallel processing
-        fprintf(fd," M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[1] );
+  // =====================================================================
+  // Process group with 1 CN
+  // Already done in bnProcBufPc
 
+  // =====================================================================
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>5;
+  fprintf(fd, "// Process group with 2 CNs \n");
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<2; k++)
-        {
-    
-          // Loop over BNs
-        fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+  if (lut_numBnInBnGroups[1] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-         fprintf(fd,"}\n");
+    // Number of groups of 32 BNs or parallel processing
+    fprintf(fd, " M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[1]);
 
-        }
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[1] * NR_LDPC_ZMAX) >> 5;
 
+    // Set pointers to start of group 2
 
+    // Loop over CNs
+    for (k = 0; k < 2; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
+      fprintf(fd, "}\n");
     }
+  }
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 3 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[2] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 3 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-         fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[2] );
+  if (lut_numBnInBnGroups[2] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[2]);
 
-        // Set pointers to start of group 2
-        //fprintf(fd,"    ((__m256i*) bnProcBuf)     = ((__m256i*) &bnProcBuf)    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
-        
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[2] * NR_LDPC_ZMAX) >> 5;
 
-        for (k=0; k<3; k++)
-        {
-  
+    // Set pointers to start of group 2
+    // fprintf(fd,"    ((__m256i*) bnProcBuf)     = ((__m256i*) &bnProcBuf)    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    for (k = 0; k < 3; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-         fprintf(fd,"}\n");
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 4 CNs \n");
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 4 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[3] > 0)
-    {
-        // If elements in group move to next address
-        idxBnGroup++;
+  if (lut_numBnInBnGroups[3] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[3] );
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[3]);
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>5;
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[3] * NR_LDPC_ZMAX) >> 5;
 
-        // Set pointers to start of group 2
-      
-    
+    // Set pointers to start of group 2
 
-        for (k=0; k<4; k++)
-        {
-  
-          // Loop over BNs
-        fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup),(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), ((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup));
+    for (k = 0; k < 4; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              ((lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup),
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              ((lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup));
 
-         fprintf(fd,"}\n");
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-    fprintf(fd,  "// Process group with 5 CNs \n");
-
-
+  fprintf(fd, "// Process group with 5 CNs \n");
 
-    if (lut_numBnInBnGroups[4] > 0)
-    {
+  if (lut_numBnInBnGroups[4] > 0) {
     // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[4] );
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[4]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<5; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[4] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 5; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 6 CNs \n");
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 6 CNs \n");
-
- // Process group with 6 CNs
-
-    if (lut_numBnInBnGroups[5] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 6 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[5] );
+  if (lut_numBnInBnGroups[5] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[5]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<6; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[5] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 6; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 7 CNs \n");
-
- // Process group with 7 CNs
+  fprintf(fd, "// Process group with 7 CNs \n");
 
-    if (lut_numBnInBnGroups[6] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 7 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[6] );
+  if (lut_numBnInBnGroups[6] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[6]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<7; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[6] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 7; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 8 CNs \n");
-
- // Process group with 8 CNs
+  fprintf(fd, "// Process group with 8 CNs \n");
 
-    if (lut_numBnInBnGroups[7] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 8 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[7] );
+  if (lut_numBnInBnGroups[7] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[7]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<8; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[7] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 8; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 9 CNs \n");
+  // =====================================================================
 
- // Process group with 9 CNs
+  fprintf(fd, "// Process group with 9 CNs \n");
 
-    if (lut_numBnInBnGroups[8] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 9 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[8] );
+  if (lut_numBnInBnGroups[8] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[8]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<9; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[8] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 9; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-fprintf(fd,  "// Process group with 10 CNs \n");
-
- // Process group with 10 CNs
+  fprintf(fd, "// Process group with 10 CNs \n");
 
-    if (lut_numBnInBnGroups[9] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 10 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[9] );
+  if (lut_numBnInBnGroups[9] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[9]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<10; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[9] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 10; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 11 CNs \n");
 
-    // =====================================================================
-
-fprintf(fd,  "// Process group with 11 CNs \n");
-
-    if (lut_numBnInBnGroups[10] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[10] );;
+  if (lut_numBnInBnGroups[10] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[10]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<11; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[10] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 11; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
-      // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with 12 CNs \n");
-
+  }
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[11] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 12 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[11] );;
+  if (lut_numBnInBnGroups[11] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[11]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<12; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[11] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 12; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-    // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with 13 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[12] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 13 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[12] );;
+  if (lut_numBnInBnGroups[12] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[12]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<13; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[12] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 13; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 14 CNs \n");
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 14 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[13] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[13] );;
+  if (lut_numBnInBnGroups[13] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[13]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<14; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[13] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 14; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 15 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[14] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 15 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[14] );;
+  if (lut_numBnInBnGroups[14] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[14]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<15; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[14] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 15; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 16 CNs \n");
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 16 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[15] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[15] );;
+  if (lut_numBnInBnGroups[15] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[15]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<16; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[15] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 16; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
+  // Process group with 17 CNs
 
-   // =====================================================================
-    // Process group with 17 CNs
-
-fprintf(fd,  "// Process group with 17 CNs \n");
-
- // Process group with 17 CNs
+  fprintf(fd, "// Process group with 17 CNs \n");
 
-    if (lut_numBnInBnGroups[16] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 17 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[16] );;
+  if (lut_numBnInBnGroups[16] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[16]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<17; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[16] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 17; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 18 CNs \n");
-
- // Process group with 8 CNs
+  fprintf(fd, "// Process group with 18 CNs \n");
 
-    if (lut_numBnInBnGroups[17] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 8 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[17] );;
+  if (lut_numBnInBnGroups[17] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[17]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<18; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[17] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 18; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 19 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[18] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 19 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[18] );;
+  if (lut_numBnInBnGroups[18] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[18]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<19; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[18] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 19; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 20 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[19] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 20 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[19] );;
+  if (lut_numBnInBnGroups[19] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[19]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<20; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[19] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 20; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 21 CNs \n");
 
+  if (lut_numBnInBnGroups[20] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[20]);
+    ;
 
-    // =====================================================================
-
-fprintf(fd,  "// Process group with 21 CNs \n");
-
-
-
-
-
-    if (lut_numBnInBnGroups[20] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[20] );;
-
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>5;
-
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<21; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[20] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 21; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
-      // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with 22 CNs \n");
-
+  }
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[21] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 22 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[21] );;
+  if (lut_numBnInBnGroups[21] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[21]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<22; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[21] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 22; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-    // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with <23 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[22] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with <23 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[22] );;
+  if (lut_numBnInBnGroups[22] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[22]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<23; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[22] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 23; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 24 CNs \n");
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 24 CNs \n");
-
- // Process group with 4 CNs
-
-    if (lut_numBnInBnGroups[23] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 4 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[23] );;
+  if (lut_numBnInBnGroups[23] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[23]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<24; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[23] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 24; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 25 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[24] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[24] );;
+  fprintf(fd, "// Process group with 25 CNs \n");
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>5;
+  if (lut_numBnInBnGroups[24] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<25; k++)
-        {
-  
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[24]);
+    ;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[24] * NR_LDPC_ZMAX) >> 5;
 
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 25; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 26 CNs \n");
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 26 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[25] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[25] );;
+  if (lut_numBnInBnGroups[25] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[25]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<26; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[25] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 26; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 27 CNs \n");
-
- // Process group with 17 CNs
+  fprintf(fd, "// Process group with 27 CNs \n");
 
-    if (lut_numBnInBnGroups[26] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 17 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[26] );;
+  if (lut_numBnInBnGroups[26] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[26]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<27; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[26] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 27; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 28 CNs \n");
-
- // Process group with 8 CNs
+  fprintf(fd, "// Process group with 28 CNs \n");
 
-    if (lut_numBnInBnGroups[27] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 8 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[27] );;
+  if (lut_numBnInBnGroups[27] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[27]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<28; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[27] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 28; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-   // =====================================================================
-
-fprintf(fd,  "// Process group with 29 CNs \n");
+  // =====================================================================
 
- // Process group with 9 CNs
+  fprintf(fd, "// Process group with 29 CNs \n");
 
-    if (lut_numBnInBnGroups[28] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 9 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[28] );;
+  if (lut_numBnInBnGroups[28] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[28]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<29; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[28] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 29; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-fprintf(fd,  "// Process group with 30 CNs \n");
-
- // Process group with 20 CNs
+  fprintf(fd, "// Process group with 30 CNs \n");
 
-    if (lut_numBnInBnGroups[29] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 20 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[29] );;
+  if (lut_numBnInBnGroups[29] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[29]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<30; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[29] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 30; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-    fprintf(fd,"}\n");
+  fprintf(fd, "}\n");
   fclose(fd);
-}//end of the function  nrLDPC_bnProc_BG1
-
-
-
-
-
+} // end of the function  nrLDPC_bnProc_BG1
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c
index c23fd0b5a75d036513be1a6b0c6f9f7bc0346262..278b8883192de127d880e500d3ce644c6218f6bc 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c
@@ -25,1084 +25,974 @@
 #include "../../nrLDPCdecoder_defs.h"
 #include "../../nrLDPC_types.h"
 
-
-
-void nrLDPC_bnProc_BG2_generator_AVX2(const char *dir, int R)
+void nrLDPC_bnProc_BG2_generator_AVX2(const char* dir, int R)
 {
-  const char *ratestr[3]={"15","13","23"};
-
-  if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
+  const char* ratestr[3] = {"15", "13", "23"};
 
+  if (R < 0 || R > 2) {
+    printf("Illegal R %d\n", R);
+    abort();
+  }
 
- // system("mkdir -p ../ldpc_gen_files");
+  // system("mkdir -p ../ldpc_gen_files");
 
-  char fname[FILENAME_MAX+1];
+  char fname[FILENAME_MAX + 1];
   snprintf(fname, sizeof(fname), "%s/bnProc/nrLDPC_bnProc_BG2_R%s_AVX2.h", dir, ratestr[R]);
-  FILE *fd=fopen(fname,"w");
+  FILE* fd = fopen(fname, "w");
   if (fd == NULL) {
     printf("Cannot create file %s\n", fname);
     abort();
   }
 
+  fprintf(fd, "static inline void nrLDPC_bnProc_BG2_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z  ) {\n", ratestr[R]);
+  const uint8_t* lut_numBnInBnGroups;
+  const uint32_t* lut_startAddrBnGroups;
+  const uint16_t* lut_startAddrBnGroupsLlr;
+  if (R == 0) {
+    lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R15;
+    lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R15;
+    lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15;
+
+  } else if (R == 1) {
+    lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R13;
+    lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R13;
+    lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13;
+  } else if (R == 2) {
+    lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R23;
+    lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R23;
+    lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23;
+  } else {
+    printf("aborting, illegal R %d\n", R);
+    fclose(fd);
+    abort();
+  }
 
+  // uint32_t M;
+  // uint32_t M32rem;
+  // uint32_t i;
+  uint32_t k;
+  // Offset to each bit within a group in terms of 32 Byte
+  uint32_t cnOffsetInGroup;
+  uint8_t idxBnGroup = 0;
 
-    fprintf(fd,"static inline void nrLDPC_bnProc_BG2_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z  ) {\n",ratestr[R]);
-    const uint8_t*  lut_numBnInBnGroups;
-    const uint32_t* lut_startAddrBnGroups;
-    const uint16_t* lut_startAddrBnGroupsLlr;
-    if (R==0) {
-
-
-      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG2_R15;
-      lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R15;
-      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15;
-
-    }
-    else if (R==1){
-
-      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG2_R13;
-      lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R13;
-      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13;
-    }
-    else if (R==2) {
-
-      lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R23;
-      lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R23;
-      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23;
-    }
-  else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
-
-
-    //uint32_t M;
-    //uint32_t M32rem;
-   // uint32_t i;
-    uint32_t k;
-    // Offset to each bit within a group in terms of 32 Byte
-    uint32_t cnOffsetInGroup;
-    uint8_t idxBnGroup = 0;
-
-
-
-     fprintf(fd,"        uint32_t M, i; \n");
-
-
-
-// =====================================================================
-    // Process group with 1 CN
-    // Already done in bnProcBufPc
-
-    // =====================================================================
-
-fprintf(fd,  "// Process group with 2 CNs \n");
-
-    if (lut_numBnInBnGroups[1] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "        uint32_t M, i; \n");
 
-        // Number of groups of 32 BNs or parallel processing
-        fprintf(fd," M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[1] );
+  // =====================================================================
+  // Process group with 1 CN
+  // Already done in bnProcBufPc
 
+  // =====================================================================
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>5;
+  fprintf(fd, "// Process group with 2 CNs \n");
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<2; k++)
-        {
-    
-          // Loop over BNs
-        fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+  if (lut_numBnInBnGroups[1] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-         fprintf(fd,"}\n");
+    // Number of groups of 32 BNs or parallel processing
+    fprintf(fd, " M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[1]);
 
-        }
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[1] * NR_LDPC_ZMAX) >> 5;
 
+    // Set pointers to start of group 2
 
+    // Loop over CNs
+    for (k = 0; k < 2; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
+      fprintf(fd, "}\n");
     }
+  }
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 3 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[2] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 3 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-         fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[2] );
+  if (lut_numBnInBnGroups[2] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[2]);
 
-        // Set pointers to start of group 2
-        //fprintf(fd,"    ((__m256i*) bnProcBuf)     = ((__m256i*) &bnProcBuf)    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
-        
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[2] * NR_LDPC_ZMAX) >> 5;
 
-        for (k=0; k<3; k++)
-        {
-  
+    // Set pointers to start of group 2
+    // fprintf(fd,"    ((__m256i*) bnProcBuf)     = ((__m256i*) &bnProcBuf)    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    for (k = 0; k < 3; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-         fprintf(fd,"}\n");
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 4 CNs \n");
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 4 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[3] > 0)
-    {
-        // If elements in group move to next address
-        idxBnGroup++;
+  if (lut_numBnInBnGroups[3] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[3] );
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[3]);
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>5;
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[3] * NR_LDPC_ZMAX) >> 5;
 
-        // Set pointers to start of group 2
-      
-    
+    // Set pointers to start of group 2
 
-        for (k=0; k<4; k++)
-        {
-  
-          // Loop over BNs
-        fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup),(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), ((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup));
+    for (k = 0; k < 4; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              ((lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup),
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              ((lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup));
 
-         fprintf(fd,"}\n");
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-    fprintf(fd,  "// Process group with 5 CNs \n");
-
-
+  fprintf(fd, "// Process group with 5 CNs \n");
 
-    if (lut_numBnInBnGroups[4] > 0)
-    {
+  if (lut_numBnInBnGroups[4] > 0) {
     // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[4] );
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[4]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<5; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[4] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 5; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 6 CNs \n");
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 6 CNs \n");
-
- // Process group with 6 CNs
-
-    if (lut_numBnInBnGroups[5] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 6 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[5] );
+  if (lut_numBnInBnGroups[5] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[5]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<6; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[5] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 6; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 7 CNs \n");
-
- // Process group with 7 CNs
+  fprintf(fd, "// Process group with 7 CNs \n");
 
-    if (lut_numBnInBnGroups[6] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 7 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[6] );
+  if (lut_numBnInBnGroups[6] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[6]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<7; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[6] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 7; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 8 CNs \n");
-
- // Process group with 8 CNs
+  fprintf(fd, "// Process group with 8 CNs \n");
 
-    if (lut_numBnInBnGroups[7] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 8 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[7] );
+  if (lut_numBnInBnGroups[7] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[7]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<8; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[7] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 8; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 9 CNs \n");
+  // =====================================================================
 
- // Process group with 9 CNs
+  fprintf(fd, "// Process group with 9 CNs \n");
 
-    if (lut_numBnInBnGroups[8] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 9 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[8] );
+  if (lut_numBnInBnGroups[8] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[8]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<9; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[8] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 9; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-fprintf(fd,  "// Process group with 10 CNs \n");
-
- // Process group with 10 CNs
+  fprintf(fd, "// Process group with 10 CNs \n");
 
-    if (lut_numBnInBnGroups[9] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 10 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[9] );
+  if (lut_numBnInBnGroups[9] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[9]);
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<10; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[9] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 10; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 11 CNs \n");
 
-    // =====================================================================
-
-fprintf(fd,  "// Process group with 11 CNs \n");
-
-    if (lut_numBnInBnGroups[10] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[10] );;
+  if (lut_numBnInBnGroups[10] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[10]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<11; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[10] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 11; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
-      // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with 12 CNs \n");
-
+  }
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[11] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 12 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[11] );;
+  if (lut_numBnInBnGroups[11] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[11]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<12; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[11] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 12; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-    // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with 13 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[12] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 13 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[12] );;
+  if (lut_numBnInBnGroups[12] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[12]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<13; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[12] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 13; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 14 CNs \n");
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 14 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[13] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[13] );;
+  if (lut_numBnInBnGroups[13] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[13]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<14; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[13] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 14; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 15 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[14] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 15 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[14] );;
+  if (lut_numBnInBnGroups[14] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[14]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<15; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[14] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 15; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 16 CNs \n");
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 16 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[15] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[15] );;
+  if (lut_numBnInBnGroups[15] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[15]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<16; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[15] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 16; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
+  // Process group with 17 CNs
 
-   // =====================================================================
-    // Process group with 17 CNs
-
-fprintf(fd,  "// Process group with 17 CNs \n");
-
- // Process group with 17 CNs
+  fprintf(fd, "// Process group with 17 CNs \n");
 
-    if (lut_numBnInBnGroups[16] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 17 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[16] );;
+  if (lut_numBnInBnGroups[16] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[16]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<17; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[16] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 17; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 18 CNs \n");
-
- // Process group with 8 CNs
+  fprintf(fd, "// Process group with 18 CNs \n");
 
-    if (lut_numBnInBnGroups[17] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 8 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[17] );;
+  if (lut_numBnInBnGroups[17] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[17]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<18; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[17] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 18; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 19 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[18] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 19 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[18] );;
+  if (lut_numBnInBnGroups[18] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[18]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<19; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[18] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 19; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 20 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[19] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 20 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[19] );;
+  if (lut_numBnInBnGroups[19] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[19]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<20; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[19] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 20; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 21 CNs \n");
 
+  if (lut_numBnInBnGroups[20] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[20]);
+    ;
 
-    // =====================================================================
-
-fprintf(fd,  "// Process group with 21 CNs \n");
-
-
-
-
-
-    if (lut_numBnInBnGroups[20] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[20] );;
-
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>5;
-
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<21; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[20] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 21; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
-      // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with 22 CNs \n");
-
+  }
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[21] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with 22 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[21] );;
+  if (lut_numBnInBnGroups[21] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[21]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<22; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[21] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 22; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-    // =====================================================================
-
-
-
-fprintf(fd,  "// Process group with <23 CNs \n");
-
-
+  // =====================================================================
 
-    if (lut_numBnInBnGroups[22] > 0)
-    {
-  // If elements in group move to next address
-        idxBnGroup++;
+  fprintf(fd, "// Process group with <23 CNs \n");
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[22] );;
+  if (lut_numBnInBnGroups[22] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[22]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<23; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[22] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 23; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 24 CNs \n");
 
-    // =====================================================================
-
-
-fprintf(fd,  "// Process group with 24 CNs \n");
-
- // Process group with 4 CNs
-
-    if (lut_numBnInBnGroups[23] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 4 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[23] );;
+  if (lut_numBnInBnGroups[23] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[23]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<24; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[23] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 24; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 25 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[24] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[24] );;
+  fprintf(fd, "// Process group with 25 CNs \n");
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>5;
+  if (lut_numBnInBnGroups[24] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<25; k++)
-        {
-  
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[24]);
+    ;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[24] * NR_LDPC_ZMAX) >> 5;
 
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 25; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
+  fprintf(fd, "// Process group with 26 CNs \n");
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 26 CNs \n");
-
-
-
-    if (lut_numBnInBnGroups[25] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
-
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[25] );;
+  if (lut_numBnInBnGroups[25] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[25]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<26; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[25] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 26; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 27 CNs \n");
-
- // Process group with 17 CNs
+  fprintf(fd, "// Process group with 27 CNs \n");
 
-    if (lut_numBnInBnGroups[26] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 17 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[26] );;
+  if (lut_numBnInBnGroups[26] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[26]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<27; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[26] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 27; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-
-fprintf(fd,  "// Process group with 28 CNs \n");
-
- // Process group with 8 CNs
+  fprintf(fd, "// Process group with 28 CNs \n");
 
-    if (lut_numBnInBnGroups[27] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 8 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[27] );;
+  if (lut_numBnInBnGroups[27] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[27]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<28; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[27] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 28; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-   // =====================================================================
-
-fprintf(fd,  "// Process group with 29 CNs \n");
+  // =====================================================================
 
- // Process group with 9 CNs
+  fprintf(fd, "// Process group with 29 CNs \n");
 
-    if (lut_numBnInBnGroups[28] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 9 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[28] );;
+  if (lut_numBnInBnGroups[28] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[28]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<29; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[28] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 29; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
+  // =====================================================================
 
-   // =====================================================================
-
-fprintf(fd,  "// Process group with 30 CNs \n");
-
- // Process group with 20 CNs
+  fprintf(fd, "// Process group with 30 CNs \n");
 
-    if (lut_numBnInBnGroups[29] > 0)
-    {
- // If elements in group move to next address
-        idxBnGroup++;
+  // Process group with 20 CNs
 
-        // Number of groups of 32 BNs for parallel processing
-        fprintf(fd,"       M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[29] );;
+  if (lut_numBnInBnGroups[29] > 0) {
+    // If elements in group move to next address
+    idxBnGroup++;
 
-        // Set the offset to each CN within a group in terms of 16 Byte
-        cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>5;
+    // Number of groups of 32 BNs for parallel processing
+    fprintf(fd, "       M = (%d*Z + 31)>>5;\n", lut_numBnInBnGroups[29]);
+    ;
 
-        // Set pointers to start of group 2
-  
-            // Loop over CNs
-        for (k=0; k<30; k++)
-        {
-  
+    // Set the offset to each CN within a group in terms of 16 Byte
+    cnOffsetInGroup = (lut_numBnInBnGroups[29] * NR_LDPC_ZMAX) >> 5;
 
-          // Loop over BNs
-       fprintf(fd,"            for (i=0;i<M;i++) {\n");
-        fprintf(fd,"            ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup);
+    // Set pointers to start of group 2
 
-         fprintf(fd,"}\n");
+    // Loop over CNs
+    for (k = 0; k < 30; k++) {
+      // Loop over BNs
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      fprintf(fd,
+              "            ((__m256i*)bnProcBufRes)[%d + i ] = simde_mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup,
+              (lut_startAddrBnGroupsLlr[idxBnGroup] >> 5),
+              (lut_startAddrBnGroups[idxBnGroup] >> 5) + k * cnOffsetInGroup);
 
-        }
+      fprintf(fd, "}\n");
     }
+  }
 
-    fprintf(fd,"}\n");
+  fprintf(fd, "}\n");
   fclose(fd);
-}//end of the function  nrLDPC_bnProc_BG2
-
-
+} // end of the function  nrLDPC_bnProc_BG2
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c
index 2bbca1591f716fc004d3c4fe58e3ac899a62ff42..49b01926094f4c317de4a62e52ace326b18c98ca 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c
@@ -22,7 +22,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 #include "../../nrLDPCdecoder_defs.h"
 #include "../../nrLDPC_types.h"
 
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c
index 418fe1f36f8dfde85f126ee0d984cd977b31ecaa..b63c1ced8b991a50e39b059b801501d441728e2e 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c
@@ -22,7 +22,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 #include "../../nrLDPCdecoder_defs.h"
 #include "../../nrLDPC_types.h"
 
@@ -44,8 +44,8 @@ void nrLDPC_bnProcPc_BG2_generator_AVX512(const char *dir, int R)
     abort();
   }
 
-//  fprintf(fd,"#include <stdint.h>\n");
-  //fprintf(fd,"#include <immintrin.h>\n");
+  // fprintf(fd,"#include <stdint.h>\n");
+  // fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
 
   fprintf(fd,"static inline void nrLDPC_bnProcPc_BG2_R%s_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]);
     const uint8_t*  lut_numBnInBnGroups;
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c
index 2c049842208f982d6879379128499f4f6ad6d0bb..533ebc95f33d7e4dcb5da2c81748b62a7a7ef556 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 #include "../../nrLDPCdecoder_defs.h"
 #include "../../nrLDPC_types.h"
 
@@ -45,7 +45,7 @@ void nrLDPC_bnProc_BG1_generator_AVX512(const char *dir, int R)
   }
 
   //fprintf(fd,"#include <stdint.h>\n");
-  //fprintf(fd,"#include <immintrin.h>\n");
+  //fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
 
 
     fprintf(fd,"static inline void nrLDPC_bnProc_BG1_R%s_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z ) {\n", ratestr[R]);
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c
index bcd1179b126fa11572349cd81359db68f46a8f4d..bd7a9455a806ccea6488a84ce7371a208d0a3480 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c
@@ -22,7 +22,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 #include "../../nrLDPCdecoder_defs.h"
 #include "../../nrLDPC_types.h"
 
@@ -45,7 +45,7 @@ void nrLDPC_bnProc_BG2_generator_AVX512(const char *dir, int R)
   }
 
   fprintf(fd,"#include <stdint.h>\n");
-  fprintf(fd,"#include <immintrin.h>\n");
+  fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
 
     fprintf(fd,"void nrLDPC_bnProc_BG2_R%s_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z  ) {\n",ratestr[R]);
     const uint8_t*  lut_numBnInBnGroups;
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG1_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG1_avx2.c
index f998f138593a7086b8ddfce73401cd9081984343..a4a269a2f639ba1b3e6fc8a5b619901d146d6342 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG1_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG1_avx2.c
@@ -42,7 +42,7 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
   }
 
   fprintf(fd,"#include <stdint.h>\n");
-  fprintf(fd,"#include <immintrin.h>\n");
+  fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
 
 
   fprintf(fd,"static inline void nrLDPC_cnProc_BG1_R%s_AVX2(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n",ratestr[R]);
@@ -87,8 +87,8 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
   const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
 
   fprintf(fd,"                __m256i ymm0, min, sgn,ones,maxLLR;\n");
-  fprintf(fd,"                ones   = _mm256_set1_epi8((char)1);\n");
-  fprintf(fd,"                maxLLR = _mm256_set1_epi8((char)127);\n");
+  fprintf(fd,"                ones   = simde_mm256_set1_epi8((char)1);\n");
+  fprintf(fd,"                maxLLR = simde_mm256_set1_epi8((char)127);\n");
 
   fprintf(fd,"                uint32_t  M;\n");
 
@@ -121,27 +121,27 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  // 32 CNs of second BN
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][1]);
 	  
-	  //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	  fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	  //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	  fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	  
-	  //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"            }\n");
         }
     }
@@ -180,10 +180,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -191,19 +191,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	      //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[1]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[1]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"            }\n");
         }
     }
@@ -245,10 +245,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -256,19 +256,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[2]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[2]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"           }\n");
         }
     }
@@ -310,10 +310,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -321,19 +321,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[3]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[3]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"            }\n");
 	}
     }
@@ -378,10 +378,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG7[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -389,19 +389,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG7[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[4]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[4]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"            }\n");
 	}
     }
@@ -447,10 +447,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG8[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -458,19 +458,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG8[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[5]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[5]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"              }\n");
         }
     }
@@ -516,10 +516,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>5)+lut_idxCnProcG9[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -527,19 +527,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>5)+lut_idxCnProcG9[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[6]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[6]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"            }\n");
 	}
     }
@@ -586,10 +586,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>5)+lut_idxCnProcG10[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -597,19 +597,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>5)+lut_idxCnProcG10[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[7]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[7]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"            }\n");
         }
     }
@@ -659,10 +659,10 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	  // Abs and sign of 32 CNs (first BN)
 	  //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	  fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>5)+lut_idxCnProcG19[j][0]);
-	  //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	  fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	  //                min  = _mm256_abs_epi8(ymm0);
-	  fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	  //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	  fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	  //                min  = simde_mm256_abs_epi8(ymm0);
+	  fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	  // Loop over BNs
@@ -670,19 +670,19 @@ void nrLDPC_cnProc_BG1_generator_AVX2(const char* dir, int R)
 	    {
 	      fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>5)+lut_idxCnProcG19[j][k]);
 	      
-	      //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	      fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	      //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	      fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	      //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	      fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	      //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
 	  // Store result
-	  //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-	  fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-	  //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+	  //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+	  fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+	  //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
 	  //                p_cnProcBufResBit++;
-	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[8]>>5)+(j*bitOffsetInGroup));
+	  fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[8]>>5)+(j*bitOffsetInGroup));
 	  fprintf(fd,"            }\n");
         }
     }
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG2_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG2_avx2.c
index 9a5ff84c868eefd34d53e0fe4b4e5cbffdd18858..0d0c1305a04b20266bcf12fec497a2ea240f6fe2 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG2_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/cnProc_gen_BG2_avx2.c
@@ -45,7 +45,7 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
   }
 
   fprintf(fd,"#include <stdint.h>\n");
-  fprintf(fd,"#include <immintrin.h>\n");
+  fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
   fprintf(fd,"static inline void nrLDPC_cnProc_BG2_R%s_AVX2(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n",ratestr[R]);
 
   const uint8_t*  lut_numCnInCnGroups;
@@ -77,8 +77,8 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
 
 
   fprintf(fd,"                __m256i ymm0, min, sgn,ones,maxLLR;\n");
-  fprintf(fd,"                ones   = _mm256_set1_epi8((char)1);\n");
-  fprintf(fd,"                maxLLR = _mm256_set1_epi8((char)127);\n");
+  fprintf(fd,"                ones   = simde_mm256_set1_epi8((char)1);\n");
+  fprintf(fd,"                maxLLR = simde_mm256_set1_epi8((char)127);\n");
     fprintf(fd,"                uint32_t M;\n");
  
 
@@ -100,35 +100,35 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
             // Abs and sign of 32 CNs (first BN)
             //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
             fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]);
-            //                sgn  = _mm256_sign_epi8(ones, ymm0);
-            fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-            //                min  = _mm256_abs_epi8(ymm0);
-            fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+            //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+            fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+            //                min  = simde_mm256_abs_epi8(ymm0);
+            fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
             
             // 32 CNs of second BN
             //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][1] + i];
             fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][1]);
             
-            //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-            fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+            //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+            fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
             
-            //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-            fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+            //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+            fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
             
             // Store result
-            //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-            fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-            //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+            //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+            fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+            //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
             //                p_cnProcBufResBit++;
-            fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup));
+            fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[0]>>5)+(j*bitOffsetInGroup));
 
             // Abs and sign of 32 CNs (first BN)
             //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
             fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>5)+lut_idxCnProcG3[j][0]+1);
-            //                sgn  = _mm256_sign_epi8(ones, ymm0);
-            fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-            //                min  = _mm256_abs_epi8(ymm0);
-            fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+            //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+            fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+            //                min  = simde_mm256_abs_epi8(ymm0);
+            fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
       
             fprintf(fd,"            }\n");
           }
@@ -161,10 +161,10 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
           // Abs and sign of 32 CNs (first BN)
           //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
           fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][0]);
-          //                sgn  = _mm256_sign_epi8(ones, ymm0);
-           fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-          //                min  = _mm256_abs_epi8(ymm0);
-          fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+          //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+           fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+          //                min  = simde_mm256_abs_epi8(ymm0);
+          fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
             
             
           // Loop over BNs
@@ -172,19 +172,19 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
             {
             fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>5)+lut_idxCnProcG4[j][k]);
                 
-            //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-            fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+            //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+            fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
                 
-            //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-                fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+            //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+                fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
             }
             
             // Store result
-            //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-            fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-            //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+            //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+            fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+            //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
                 //                p_cnProcBufResBit++;
-            fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[1]>>5)+(j*bitOffsetInGroup));
+            fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[1]>>5)+(j*bitOffsetInGroup));
             fprintf(fd,"            }\n");
           }
       }
@@ -218,10 +218,10 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
         // Abs and sign of 32 CNs (first BN)
         //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
         fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][0]);
-        //                sgn  = _mm256_sign_epi8(ones, ymm0);
-        fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-        //                min  = _mm256_abs_epi8(ymm0);
-        fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+        //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+        fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+        //                min  = simde_mm256_abs_epi8(ymm0);
+        fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
         
         
         // Loop over BNs
@@ -229,18 +229,18 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
         {
           fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>5)+lut_idxCnProcG5[j][k]);
             
-          //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-          fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+          //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+          fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
             
-          //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-          fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+          //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+          fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
         }
         
           // Store result
-        //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-        fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-        //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
-        fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[2]>>5)+(j*bitOffsetInGroup));
+        //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+        fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+        //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
+        fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[2]>>5)+(j*bitOffsetInGroup));
         fprintf(fd,"           }\n");
       }
     }
@@ -275,10 +275,10 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
 	    // Abs and sign of 32 CNs (first BN)
 	    //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
 	    fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][0]);
-	    //                sgn  = _mm256_sign_epi8(ones, ymm0);
-	    fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-	   //                min  = _mm256_abs_epi8(ymm0);
-	    fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+	    //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+	    fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+	   //                min  = simde_mm256_abs_epi8(ymm0);
+	    fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
 	  
 	  
 	    // Loop over BNs
@@ -286,19 +286,19 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
 	    {
 	    fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>5)+lut_idxCnProcG6[j][k]);
 	      
-	    //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-	    fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+	    //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+	    fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
 	      
-	    //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-	    fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+	    //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+	    fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
 	    }
 	  
       // Store result
-      //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-      fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-      //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+      //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+      //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
       //                p_cnProcBufResBit++;
-      fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[3]>>5)+(j*bitOffsetInGroup));
+      fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[3]>>5)+(j*bitOffsetInGroup));
       fprintf(fd,"            }\n");
 	  }
   }
@@ -338,30 +338,30 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
         // Abs and sign of 32 CNs (first BN)
         //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
         fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG8[j][0]);
-        //                sgn  = _mm256_sign_epi8(ones, ymm0);
-        fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-        //                min  = _mm256_abs_epi8(ymm0);
-        fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+        //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+        fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+        //                min  = simde_mm256_abs_epi8(ymm0);
+        fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
         
 	      // Loop over BNs
           for (k=1; k<7; k++)
           {
           fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>5)+lut_idxCnProcG8[j][k]);
             
-          //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-          fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+          //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+          fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
             
-            //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-          fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+            //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+          fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
           
           }
 	  
 	        // Store result
-          //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-          //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+          //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+          fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+          //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
           //                p_cnProcBufResBit++;
-          fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[4]>>5)+(j*bitOffsetInGroup));
+          fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[4]>>5)+(j*bitOffsetInGroup));
           fprintf(fd,"              }\n");
         }
     }
@@ -401,10 +401,10 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
       // Abs and sign of 32 CNs (first BN)
         //                ymm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
       fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG10[j][0]);
-        //                sgn  = _mm256_sign_epi8(ones, ymm0);
-      fprintf(fd,"                sgn  = _mm256_sign_epi8(ones, ymm0);\n");
-        //                min  = _mm256_abs_epi8(ymm0);
-      fprintf(fd,"                min  = _mm256_abs_epi8(ymm0);\n");
+        //                sgn  = simde_mm256_sign_epi8(ones, ymm0);
+      fprintf(fd,"                sgn  = simde_mm256_sign_epi8(ones, ymm0);\n");
+        //                min  = simde_mm256_abs_epi8(ymm0);
+      fprintf(fd,"                min  = simde_mm256_abs_epi8(ymm0);\n");
         
 	  
 	  // Loop over BNs
@@ -412,19 +412,19 @@ void nrLDPC_cnProc_BG2_generator_AVX2(const char* dir, int R)
 	     {
           fprintf(fd,"                ymm0 = ((__m256i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>5)+lut_idxCnProcG10[j][k]);
             
-            //                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));
-          fprintf(fd,"                min  = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0));\n");
+            //                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));
+          fprintf(fd,"                min  = simde_mm256_min_epu8(min, simde_mm256_abs_epi8(ymm0));\n");
             
-            //                sgn  = _mm256_sign_epi8(sgn, ymm0);
-          fprintf(fd,"                sgn  = _mm256_sign_epi8(sgn, ymm0);\n");
+            //                sgn  = simde_mm256_sign_epi8(sgn, ymm0);
+          fprintf(fd,"                sgn  = simde_mm256_sign_epi8(sgn, ymm0);\n");
         }
 	  
           // Store result
-            //                min = _mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm256_min_epu8(min, maxLLR);\n");
-            //                *p_cnProcBufResBit = _mm256_sign_epi8(min, sgn);
+            //                min = simde_mm256_min_epu8(min, maxLLR); // 128 in epi8 is -127
+          fprintf(fd,"                min = simde_mm256_min_epu8(min, maxLLR);\n");
+            //                *p_cnProcBufResBit = simde_mm256_sign_epi8(min, sgn);
             //                p_cnProcBufResBit++;
-          fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = _mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[5]>>5)+(j*bitOffsetInGroup));
+          fprintf(fd,"                ((__m256i*)cnProcBufRes)[%d+i] = simde_mm256_sign_epi8(min, sgn);\n",(lut_startAddrCnGroups[5]>>5)+(j*bitOffsetInGroup));
           fprintf(fd,"            }\n");
       }
     }
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG1_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG1_avx512.c
index b007ac0e0bf156679463852e576636938bfec2fa..aafcc987905694c9de76d19e248492cc125b591b 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG1_avx512.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG1_avx512.c
@@ -26,572 +26,535 @@
 
 void nrLDPC_cnProc_BG1_generator_AVX512(const char *dir, int R)
 {
-  const char *ratestr[3]={"13","23","89"};
-
-  if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
+  const char *ratestr[3] = {"13", "23", "89"};
 
+  if (R < 0 || R > 2) {
+    printf("Illegal R %d\n", R);
+    abort();
+  }
 
- // system("mkdir -p ../ldpc_gen_files");
+  // system("mkdir -p ../ldpc_gen_files");
 
-  char fname[FILENAME_MAX+1];
+  char fname[FILENAME_MAX + 1];
   snprintf(fname, sizeof(fname), "%s/cnProc_avx512/nrLDPC_cnProc_BG1_R%s_AVX512.h", dir, ratestr[R]);
-  FILE *fd=fopen(fname,"w");
+  FILE *fd = fopen(fname, "w");
   if (fd == NULL) {
     printf("Cannot create file %s\n", fname);
     abort();
   }
 
- // fprintf(fd,"#include <stdint.h>\n");
-//  fprintf(fd,"#include <immintrin.h>\n");
-
-
-  fprintf(fd,   "#define conditional_negate(a,b,z) _mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
-
+  // fprintf(fd,"#include <stdint.h>\n");
+  // fprintf(fd,"#include \"PHY/sse_intrin.h\"\n");
 
-  fprintf(fd,"static inline void nrLDPC_cnProc_BG1_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n",ratestr[R]);
+  // fprintf(fd,   "#define conditional_negate(a,b,z) _mm512_mask_sub_epi8(a,simde_mm512_movepi8_mask(b),z,a)\n");
 
-  const uint8_t*  lut_numCnInCnGroups;
-  const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG1;
-
-  if (R==0)      lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R13;
-  else if (R==1) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R23;
-  else if (R==2) lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R89;
-  else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
+  fprintf(fd, "static inline void nrLDPC_cnProc_BG1_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n", ratestr[R]);
 
+  const uint8_t *lut_numCnInCnGroups;
+  const uint32_t *lut_startAddrCnGroups = lut_startAddrCnGroups_BG1;
 
+  if (R == 0)
+    lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R13;
+  else if (R == 1)
+    lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R23;
+  else if (R == 2)
+    lut_numCnInCnGroups = lut_numCnInCnGroups_BG1_R89;
+  else {
+    printf("aborting, illegal R %d\n", R);
+    fclose(fd);
+    abort();
+  }
 
   uint32_t j;
   uint32_t k;
   // Offset to each bit within a group in terms of 64  Byte
   uint32_t bitOffsetInGroup;
 
+  fprintf(fd, "                uint32_t M, i;\n");
+  fprintf(fd, "                __m512i zmm0, min, sgn,zeros,maxLLR, ones;\n");
 
-  fprintf(fd,"                uint32_t M, i;\n");
-  fprintf(fd,"                __m512i zmm0, min, sgn,zeros,maxLLR, ones;\n");
-
-  fprintf(fd,"                  zeros  = _mm512_setzero_si512();\n");
-  fprintf(fd,"                  maxLLR = _mm512_set1_epi8((char)127);\n");
-  fprintf(fd,"                 ones = _mm512_set1_epi8((char)1);\n");
-
-
+  fprintf(fd, "                  zeros  = _mm512_setzero_si512();\n");
+  fprintf(fd, "                  maxLLR = _mm512_set1_epi8((char)127);\n");
+  fprintf(fd, "                 ones = _mm512_set1_epi8((char)1);\n");
 
   // =====================================================================
   // Process group with 3 BNs
-  fprintf(fd,"//Process group with 3 BNs\n");
+  fprintf(fd, "//Process group with 3 BNs\n");
   // LUT with offsets for bits that need to be processed
   // 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
   // Offsets are in units of bitOffsetInGroup (1*384/32)12
-     // Offsets are in units of bitOffsetInGroup (1*384/32)12
-
-  const uint8_t lut_idxCnProcG3[3][2] = {{12,24}, {0,24}, {0,12}};
-
-  if (lut_numCnInCnGroups[0] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-      //M = (lut_numCnInCnGroups[0]*Z + 63)>>6;
+  // Offsets are in units of bitOffsetInGroup (1*384/32)12
 
-       fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[0] );
+  const uint8_t lut_idxCnProcG3[3][2] = {{12, 24}, {0, 24}, {0, 12}};
 
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[0]*NR_LDPC_ZMAX)>>6;
+  if (lut_numCnInCnGroups[0] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[0]*Z + 63)>>6;
 
-     // Loop over every BN
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[0]);
 
-      for (j=0; j<3; j++)
-        {
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[0] * NR_LDPC_ZMAX) >> 6;
 
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-            fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>6)+lut_idxCnProcG3[j][0]/2);
-            fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-            fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
+    // Loop over every BN
 
+    for (j = 0; j < 3; j++) {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[0] >> 6) + lut_idxCnProcG3[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
 
-         // for (k=1; k<2; k++)
-            //{
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>6)+lut_idxCnProcG3[j][1]/2);
+      // for (k=1; k<2; k++)
+      //{
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[0] >> 6) + lut_idxCnProcG3[j][1] / 2);
 
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+      //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+      fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
 
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-           // }
+      //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      // }
 
-          // Store result
-          //                min = _mm512_min_epu8(min, *maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-          //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
-              //                p_cnProcBufResBit++;
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[0]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
-    
+      // Store result
+      //                min = _mm512_min_epu8(min, *maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+      //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
+      //                p_cnProcBufResBit++;
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[0] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
+  }
 
   // =====================================================================
   // Process group with 4 BNs
-  fprintf(fd,"//Process group with 4 BNs\n");
-    // Offset is 5*384/32 = 30
-  const uint8_t lut_idxCnProcG4[4][3] = {{60,120,180}, {0,120,180}, {0,60,180}, {0,60,120}};
-
-  if (lut_numCnInCnGroups[1] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-      //M = (lut_numCnInCnGroups[1]*Z + 63)>>6;
-        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[1] );
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[1]*NR_LDPC_ZMAX)>>6;
-
-      // Loop over every BN
-      for (j=0; j<4; j++)
-        {
-        // Loop over CNs
-          //      for (i=0; i<M; i++,iprime++)
-          //            {
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"              zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>6)+lut_idxCnProcG4[j][0]/2);
-               fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-            fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<3; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>6)+lut_idxCnProcG4[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-          //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
-              //                p_cnProcBufResBit++;
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[1]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+  fprintf(fd, "//Process group with 4 BNs\n");
+  // Offset is 5*384/32 = 30
+  const uint8_t lut_idxCnProcG4[4][3] = {{60, 120, 180}, {0, 120, 180}, {0, 60, 180}, {0, 60, 120}};
+
+  if (lut_numCnInCnGroups[1] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[1]*Z + 63)>>6;
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[1]);
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[1] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+    for (j = 0; j < 4; j++) {
+      // Loop over CNs
+      //      for (i=0; i<M; i++,iprime++)
+      //            {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "              zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[1] >> 6) + lut_idxCnProcG4[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 3; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[1] >> 6) + lut_idxCnProcG4[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+      //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
+      //                p_cnProcBufResBit++;
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[1] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
-
+  }
 
   // =====================================================================
   // Process group with 5 BNs
-  fprintf(fd,"//Process group with 5 BNs\n");
+  fprintf(fd, "//Process group with 5 BNs\n");
   // Offset is 18*384/32 = 216
-  const uint16_t lut_idxCnProcG5[5][4] = {{216,432,648,864}, {0,432,648,864},
-                                          {0,216,648,864}, {0,216,432,864}, {0,216,432,648}};
+  const uint16_t lut_idxCnProcG5[5][4] = {{216, 432, 648, 864}, {0, 432, 648, 864}, {0, 216, 648, 864}, {0, 216, 432, 864}, {0, 216, 432, 648}};
 
+  if (lut_numCnInCnGroups[2] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[2]*Z + 63)>>6;
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[2]);
 
-  if (lut_numCnInCnGroups[2] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-      //M = (lut_numCnInCnGroups[2]*Z + 63)>>6;
-      fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[2] );
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[2] * NR_LDPC_ZMAX) >> 6;
 
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[2]*NR_LDPC_ZMAX)>>6;
+    // Loop over every BN
 
-      // Loop over every BN
+    for (j = 0; j < 5; j++) {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[2] >> 6) + lut_idxCnProcG5[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
 
-      for (j=0; j<5; j++)
-        {
+      // Loop over BNs
+      for (k = 1; k < 4; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[2] >> 6) + lut_idxCnProcG5[j][k] / 2);
 
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>6)+lut_idxCnProcG5[j][0]/2);
-           fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-           fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
 
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
 
-          // Loop over BNs
-          for (k=1; k<4; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>6)+lut_idxCnProcG5[j][k]/2);
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
 
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[2]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"           }\n");
-        }
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[2] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "           }\n");
     }
+  }
 
   // =====================================================================
   // Process group with 6 BNs
-  fprintf(fd,"//Process group with 6 BNs\n");
-    // Offset is 8*384/32 = 48
-  const uint16_t lut_idxCnProcG6[6][5] = {{96,192,288,384,480}, {0,192,288,384,480},
-                                          {0,96,288,384,480}, {0,96,192,384,480},
-                                          {0,96,192,288,480}, {0,96,192,288,384}};
-
+  fprintf(fd, "//Process group with 6 BNs\n");
+  // Offset is 8*384/32 = 48
+  const uint16_t lut_idxCnProcG6[6][5] = {{96, 192, 288, 384, 480}, {0, 192, 288, 384, 480}, {0, 96, 288, 384, 480}, {0, 96, 192, 384, 480}, {0, 96, 192, 288, 480}, {0, 96, 192, 288, 384}};
 
-  if (lut_numCnInCnGroups[3] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-      //M = (lut_numCnInCnGroups[3]*Z + 63)>>6;
+  if (lut_numCnInCnGroups[3] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[3]*Z + 63)>>6;
 
-      fprintf(fd, "M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[3] );
+    fprintf(fd, "M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[3]);
 
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[3]*NR_LDPC_ZMAX)>>6;
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[3] * NR_LDPC_ZMAX) >> 6;
 
-      // Loop over every BN
+    // Loop over every BN
 
-      for (j=0; j<6; j++)
-        {
+    for (j = 0; j < 6; j++) {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[3] >> 6) + lut_idxCnProcG6[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
 
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>6)+lut_idxCnProcG6[j][0]/2);
-           fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-           fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
+      // Loop over BNs
+      for (k = 1; k < 5; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[3] >> 6) + lut_idxCnProcG6[j][k] / 2);
 
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
 
-          // Loop over BNs
-          for (k=1; k<5; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>6)+lut_idxCnProcG6[j][k]/2);
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
 
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
 
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[3]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[3] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
-
+  }
 
   // =====================================================================
   // Process group with 7 BNs
-  fprintf(fd,"//Process group with 7 BNs\n");
-    // Offset is 5*384/32 = 30
-  const uint16_t lut_idxCnProcG7[7][6] = {{60,120,180,240,300,360}, {0,120,180,240,300,360},
-                                          {0,60,180,240,300,360},   {0,60,120,240,300,360},
-                                          {0,60,120,180,300,360},   {0,60,120,180,240,360},
-                                          {0,60,120,180,240,300}};
-
-
-  if (lut_numCnInCnGroups[4] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-     // M = (lut_numCnInCnGroups[4]*Z + 63)>>6;
-     fprintf(fd, "M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[4] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[4]*NR_LDPC_ZMAX)>>6;
-
-      // Loop over every BN
-
-      for (j=0; j<7; j++)
-        {
-          // Loop over CNs
-          //      for (i=0; i<M; i++,iprime++)
-          //            {
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0= ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>6)+lut_idxCnProcG7[j][0]/2);
-           fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-           fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<6; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>6)+lut_idxCnProcG7[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[4]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+  fprintf(fd, "//Process group with 7 BNs\n");
+  // Offset is 5*384/32 = 30
+  const uint16_t lut_idxCnProcG7[7][6] = {{60, 120, 180, 240, 300, 360},
+                                          {0, 120, 180, 240, 300, 360},
+                                          {0, 60, 180, 240, 300, 360},
+                                          {0, 60, 120, 240, 300, 360},
+                                          {0, 60, 120, 180, 300, 360},
+                                          {0, 60, 120, 180, 240, 360},
+                                          {0, 60, 120, 180, 240, 300}};
+
+  if (lut_numCnInCnGroups[4] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[4]*Z + 63)>>6;
+    fprintf(fd, "M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[4]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[4] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 7; j++) {
+      // Loop over CNs
+      //      for (i=0; i<M; i++,iprime++)
+      //            {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0= ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[4] >> 6) + lut_idxCnProcG7[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 6; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[4] >> 6) + lut_idxCnProcG7[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[4] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
-
+  }
 
   // =====================================================================
   // Process group with 8 BNs
-  fprintf(fd,"//Process group with 8 BNs\n");
-    // Offset is 2*384/32 = 24
-    const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168},
-                                           {0,24,72,96,120,144,168}, {0,24,48,96,120,144,168},
-                                           {0,24,48,72,120,144,168}, {0,24,48,72,96,144,168},
-                                           {0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}};
-
- 
-  if (lut_numCnInCnGroups[5] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-     // M = (lut_numCnInCnGroups[5]*Z + 63)>>6;
-     fprintf(fd, "M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[5] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[5]*NR_LDPC_ZMAX)>>6;
-
-      // Loop over every BN
-
-      for (j=0; j<8; j++)
-        {
-          // Loop over CNs
-          //      for (i=0; i<M; i++,iprime++)
-          //            {
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>6)+lut_idxCnProcG8[j][0]/2);
-           fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-           fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<7; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>6)+lut_idxCnProcG8[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[5]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"              }\n");
-        }
+  fprintf(fd, "//Process group with 8 BNs\n");
+  // Offset is 2*384/32 = 24
+  const uint8_t lut_idxCnProcG8[8][7] = {{24, 48, 72, 96, 120, 144, 168},
+                                         {0, 48, 72, 96, 120, 144, 168},
+                                         {0, 24, 72, 96, 120, 144, 168},
+                                         {0, 24, 48, 96, 120, 144, 168},
+                                         {0, 24, 48, 72, 120, 144, 168},
+                                         {0, 24, 48, 72, 96, 144, 168},
+                                         {0, 24, 48, 72, 96, 120, 168},
+                                         {0, 24, 48, 72, 96, 120, 144}};
+
+  if (lut_numCnInCnGroups[5] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[5]*Z + 63)>>6;
+    fprintf(fd, "M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[5]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[5] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 8; j++) {
+      // Loop over CNs
+      //      for (i=0; i<M; i++,iprime++)
+      //            {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[5] >> 6) + lut_idxCnProcG8[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 7; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[5] >> 6) + lut_idxCnProcG8[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[5] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "              }\n");
     }
+  }
 
-
- // =====================================================================
+  // =====================================================================
   // Process group with 9 BNs
 
-  fprintf(fd,"//Process group with 9 BNs\n");
-    // Offset is 2*384/32 = 12
-  const uint8_t lut_idxCnProcG9[9][8] = {{24,48,72,96,120,144,168,192}, {0,48,72,96,120,144,168,192},
-                                         {0,24,72,96,120,144,168,192}, {0,24,48,96,120,144,168,192},
-                                         {0,24,48,72,120,144,168,192}, {0,24,48,72,96,144,168,192},
-                                         {0,24,48,72,96,120,168,192}, {0,24,48,72,96,120,144,192},
-                                         {0,24,48,72,96,120,144,168}};
-
-
-  if (lut_numCnInCnGroups[6] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-     // M = (lut_numCnInCnGroups[5]*Z + 63)>>6;
-     fprintf(fd, "M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[6] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[6]*NR_LDPC_ZMAX)>>6;
-
-      // Loop over every BN
-
-      for (j=0; j<9; j++)
-        {
-          // Loop over CNs
-          //      for (i=0; i<M; i++,iprime++)
-          //            {
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>6)+lut_idxCnProcG9[j][0]/2);
-           fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-           fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<8; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[6]>>6)+lut_idxCnProcG9[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[6]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"              }\n");
-        }
+  fprintf(fd, "//Process group with 9 BNs\n");
+  // Offset is 2*384/32 = 12
+  const uint8_t lut_idxCnProcG9[9][8] = {{24, 48, 72, 96, 120, 144, 168, 192},
+                                         {0, 48, 72, 96, 120, 144, 168, 192},
+                                         {0, 24, 72, 96, 120, 144, 168, 192},
+                                         {0, 24, 48, 96, 120, 144, 168, 192},
+                                         {0, 24, 48, 72, 120, 144, 168, 192},
+                                         {0, 24, 48, 72, 96, 144, 168, 192},
+                                         {0, 24, 48, 72, 96, 120, 168, 192},
+                                         {0, 24, 48, 72, 96, 120, 144, 192},
+                                         {0, 24, 48, 72, 96, 120, 144, 168}};
+
+  if (lut_numCnInCnGroups[6] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[5]*Z + 63)>>6;
+    fprintf(fd, "M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[6]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[6] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 9; j++) {
+      // Loop over CNs
+      //      for (i=0; i<M; i++,iprime++)
+      //            {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[6] >> 6) + lut_idxCnProcG9[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 8; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[6] >> 6) + lut_idxCnProcG9[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[6] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "              }\n");
     }
-
-
-
-
-
+  }
 
   // =====================================================================
   // Process group with 10 BNs
-  fprintf(fd,"//Process group with 10 BNs\n");
-        // Offset is 1*384/32 = 6
-  const uint8_t lut_idxCnProcG10[10][9] = {{12,24,36,48,60,72,84,96,108}, {0,24,36,48,60,72,84,96,108},
-                                           {0,12,36,48,60,72,84,96,108}, {0,12,24,48,60,72,84,96,108},
-                                           {0,12,24,36,60,72,84,96,108}, {0,12,24,36,48,72,84,96,108},
-                                           {0,12,24,36,48,60,84,96,108}, {0,12,24,36,48,60,72,96,108},
-                                           {0,12,24,36,48,60,72,84,108}, {0,12,24,36,48,60,72,84,96}};
-
-  if (lut_numCnInCnGroups[7] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-      //M = (lut_numCnInCnGroups[7]*Z + 63)>>6;
-      fprintf(fd, " M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[7] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[7]*NR_LDPC_ZMAX)>>6;
-
-
-      // Loop over every BN
-
-      for (j=0; j<10; j++)
-        {
-          // Loop over CNs
-          //      for (i=0; i<M; i++,iprime++)
-          //            {
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>6)+lut_idxCnProcG10[j][0]/2);
-           fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-           fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<9; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[7]>>6)+lut_idxCnProcG10[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min,sgn,zeros);\n",(lut_startAddrCnGroups[7]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+  fprintf(fd, "//Process group with 10 BNs\n");
+  // Offset is 1*384/32 = 6
+  const uint8_t lut_idxCnProcG10[10][9] = {{12, 24, 36, 48, 60, 72, 84, 96, 108},
+                                           {0, 24, 36, 48, 60, 72, 84, 96, 108},
+                                           {0, 12, 36, 48, 60, 72, 84, 96, 108},
+                                           {0, 12, 24, 48, 60, 72, 84, 96, 108},
+                                           {0, 12, 24, 36, 60, 72, 84, 96, 108},
+                                           {0, 12, 24, 36, 48, 72, 84, 96, 108},
+                                           {0, 12, 24, 36, 48, 60, 84, 96, 108},
+                                           {0, 12, 24, 36, 48, 60, 72, 96, 108},
+                                           {0, 12, 24, 36, 48, 60, 72, 84, 108},
+                                           {0, 12, 24, 36, 48, 60, 72, 84, 96}};
+
+  if (lut_numCnInCnGroups[7] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[7]*Z + 63)>>6;
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[7]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[7] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 10; j++) {
+      // Loop over CNs
+      //      for (i=0; i<M; i++,iprime++)
+      //            {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[7] >> 6) + lut_idxCnProcG10[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 9; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[7] >> 6) + lut_idxCnProcG10[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min,sgn,zeros);\n", (lut_startAddrCnGroups[7] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
-
+  }
 
   // =====================================================================
   // Process group with 19 BNs
-  fprintf(fd,"//Process group with 19 BNs\n");
+  fprintf(fd, "//Process group with 19 BNs\n");
   // Offset is 4*384/32 = 24
-  const uint16_t lut_idxCnProcG19[19][18] = {{48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864},
-                                             {0,48,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,192,240,288,336,384,432,480,528,576,624,672,720,768,816,864},
-                                             {0,48,96,144,240,288,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,288,336,384,432,480,528,576,624,672,720,768,816,864},
-                                             {0,48,96,144,192,240,336,384,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,384,432,480,528,576,624,672,720,768,816,864},
-                                             {0,48,96,144,192,240,288,336,432,480,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,480,528,576,624,672,720,768,816,864},
-                                             {0,48,96,144,192,240,288,336,384,432,528,576,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,576,624,672,720,768,816,864},
-                                             {0,48,96,144,192,240,288,336,384,432,480,528,624,672,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,672,720,768,816,864},
-                                             {0,48,96,144,192,240,288,336,384,432,480,528,576,624,720,768,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,768,816,864},
-                                             {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,816,864}, {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,864},
-                                             {0,48,96,144,192,240,288,336,384,432,480,528,576,624,672,720,768,816}};
-
-
-  if (lut_numCnInCnGroups[8] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-     // M = (lut_numCnInCnGroups[8]*Z + 63)>>6;
-     fprintf(fd, " M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[8] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[8]*NR_LDPC_ZMAX)>>6;
-
-      // Loop over every BN
-
-      for (j=0; j<19; j++)
-        {
-          // Loop over CNs
-          //      for (i=0; i<M; i++,iprime++)
-          //            {
-          fprintf(fd,"            for (i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>6)+lut_idxCnProcG19[j][0]/2);
-           fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-           fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<18; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[8]>>6)+lut_idxCnProcG19[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[8]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+  const uint16_t lut_idxCnProcG19[19][18] = {{48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 432, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 480, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 528, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 576, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 624, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 672, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 720, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 768, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 816, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 864},
+                                             {0, 48, 96, 144, 192, 240, 288, 336, 384, 432, 480, 528, 576, 624, 672, 720, 768, 816}};
+
+  if (lut_numCnInCnGroups[8] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    // M = (lut_numCnInCnGroups[8]*Z + 63)>>6;
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[8]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG1_R13[8] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 19; j++) {
+      // Loop over CNs
+      //      for (i=0; i<M; i++,iprime++)
+      //            {
+      fprintf(fd, "            for (i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[8] >> 6) + lut_idxCnProcG19[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 18; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[8] >> 6) + lut_idxCnProcG19[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[8] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
+  }
 
-  fprintf(fd,"}\n");
+  fprintf(fd, "}\n");
   fclose(fd);
-}//end of the function  nrLDPC_cnProc_BG1
-
-
-
-
-
+} // end of the function  nrLDPC_cnProc_BG1
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG2_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG2_avx512.c
index 8a9e592f6f707669a64a42920551f70ff1cfd83a..70e760cc6a1251dd7cdd33efa41c0ab5a7daced7 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG2_avx512.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/cnProc_gen_BG2_avx512.c
@@ -26,389 +26,336 @@
 
 void nrLDPC_cnProc_BG2_generator_AVX512(const char *dir, int R)
 {
-  const char *ratestr[3]={"15","13","23"};
-
-  if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
+  const char *ratestr[3] = {"15", "13", "23"};
 
+  if (R < 0 || R > 2) {
+    printf("Illegal R %d\n", R);
+    abort();
+  }
 
- // system("mkdir -p ../ldpc_gen_files");
+  // system("mkdir -p ../ldpc_gen_files");
 
-  char fname[FILENAME_MAX+1];
+  char fname[FILENAME_MAX + 1];
   snprintf(fname, sizeof(fname), "%s/cnProc_avx512/nrLDPC_cnProc_BG2_R%s_AVX512.h", dir, ratestr[R]);
-  FILE *fd=fopen(fname,"w");
+  FILE *fd = fopen(fname, "w");
   if (fd == NULL) {
     printf("Cannot create file %s\n", fname);
     abort();
   }
 
-  //fprintf(fd,"#include <stdint.h>\n");
-//  fprintf(fd,"#include <immintrin.h>\n");
-
-
-  fprintf(fd,   "#define conditional_negate(a,b,z) _mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
-
-
-
- fprintf(fd,"static inline void nrLDPC_cnProc_BG2_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n",ratestr[R]);
-  const uint8_t*  lut_numCnInCnGroups;
-  const uint32_t* lut_startAddrCnGroups = lut_startAddrCnGroups_BG2;
-
-  if (R==0)      lut_numCnInCnGroups = lut_numCnInCnGroups_BG2_R15;
-  else if (R==1) lut_numCnInCnGroups = lut_numCnInCnGroups_BG2_R13;
-  else if (R==2) lut_numCnInCnGroups = lut_numCnInCnGroups_BG2_R23;
-  else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
-
+  fprintf(fd, "#define conditional_negate(a,b,z) _mm512_mask_sub_epi8(a,_mm512_movepi8_mask(b),z,a)\n");
+
+  fprintf(fd, "static inline void nrLDPC_cnProc_BG2_R%s_AVX512(int8_t* cnProcBuf, int8_t* cnProcBufRes, uint16_t Z) {\n", ratestr[R]);
+  const uint8_t *lut_numCnInCnGroups;
+  const uint32_t *lut_startAddrCnGroups = lut_startAddrCnGroups_BG2;
+
+  if (R == 0)
+    lut_numCnInCnGroups = lut_numCnInCnGroups_BG2_R15;
+  else if (R == 1)
+    lut_numCnInCnGroups = lut_numCnInCnGroups_BG2_R13;
+  else if (R == 2)
+    lut_numCnInCnGroups = lut_numCnInCnGroups_BG2_R23;
+  else {
+    printf("aborting, illegal R %d\n", R);
+    fclose(fd);
+    abort();
+  }
 
   // Number of CNs in Groups
-  //uint32_t M;
+  // uint32_t M;
   uint32_t j;
   uint32_t k;
   // Offset to each bit within a group in terms of 64  Byte
   uint32_t bitOffsetInGroup;
 
- fprintf(fd,"                uint32_t M;\n");
-  fprintf(fd,"                __m512i zmm0, min, sgn,zeros,ones,maxLLR;\n");
-  fprintf(fd,"                zeros  = _mm512_setzero_si512();\n");
-  fprintf(fd,"                maxLLR = _mm512_set1_epi8((char)127);\n");
-    fprintf(fd,"               ones = _mm512_set1_epi8((char)1);\n");
+  fprintf(fd, "                uint32_t M;\n");
+  fprintf(fd, "                __m512i zmm0, min, sgn,zeros,ones,maxLLR;\n");
+  fprintf(fd, "                zeros  = _mm512_setzero_si512();\n");
+  fprintf(fd, "                maxLLR = _mm512_set1_epi8((char)127);\n");
+  fprintf(fd, "               ones = _mm512_set1_epi8((char)1);\n");
   // =====================================================================
   // Process group with 3 BNs
-  fprintf(fd,"//Process group with 3 BNs\n");
+  fprintf(fd, "//Process group with 3 BNs\n");
   // LUT with offsets for bits that need to be processed
   // 1. bit proc requires LLRs of 2. and 3. bit, 2.bits of 1. and 3. etc.
   // Offsets are in units of bitOffsetInGroup
-   const uint8_t lut_idxCnProcG3[3][2] = {{72,144}, {0,144}, {0,72}};
-
-
-  if (lut_numCnInCnGroups[0] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-       fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[0] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[0]*NR_LDPC_ZMAX)>>6;
-
-
-      // Loop over every BN
-
-      for (j=0; j<3; j++)
-        {
-
-
-
-          fprintf(fd,"            for (int i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-            fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>6)+lut_idxCnProcG3[j][0]/2);
-            fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-            fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-         // for (k=1; k<2; k++)
-            //{
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[0]>>6)+lut_idxCnProcG3[j][1]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-           // }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, *maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-          //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
-              //                p_cnProcBufResBit++;
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[0]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
-
+  const uint8_t lut_idxCnProcG3[3][2] = {{72, 144}, {0, 144}, {0, 72}};
+
+  if (lut_numCnInCnGroups[0] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[0]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[0] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 3; j++) {
+      fprintf(fd, "            for (int i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[0] >> 6) + lut_idxCnProcG3[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // for (k=1; k<2; k++)
+      //{
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[0] >> 6) + lut_idxCnProcG3[j][1] / 2);
+
+      //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+      fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+      //                sgn  = _mm512_sign_epi8(*p_ones, zmm0);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      // }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, *maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+      //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
+      //                p_cnProcBufResBit++;
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[0] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
-
+  }
 
   // =====================================================================
   // Process group with 4 BNs
-  fprintf(fd,"//Process group with 4 BNs\n");
- // Offset is 20*384/32 = 240
-    const uint16_t lut_idxCnProcG4[4][3] = {{240,480,720}, {0,480,720}, {0,240,720}, {0,240,480}};
-
-
-  if (lut_numCnInCnGroups[1] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-      fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[1] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[1]*NR_LDPC_ZMAX)>>6;
-
-      // Loop over every BN
-      for (j=0; j<4; j++)
-        {
-          fprintf(fd,"            for (int i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>6)+lut_idxCnProcG4[j][0]/2);
-            fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-          fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<3; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[1]>>6)+lut_idxCnProcG4[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(sgn, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-          //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
-              //                p_cnProcBufResBit++;
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[1]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+  fprintf(fd, "//Process group with 4 BNs\n");
+  // Offset is 20*384/32 = 240
+  const uint16_t lut_idxCnProcG4[4][3] = {{240, 480, 720}, {0, 480, 720}, {0, 240, 720}, {0, 240, 480}};
+
+  if (lut_numCnInCnGroups[1] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[1]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[1] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+    for (j = 0; j < 4; j++) {
+      fprintf(fd, "            for (int i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[1] >> 6) + lut_idxCnProcG4[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 3; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[1] >> 6) + lut_idxCnProcG4[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(sgn, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+      //                *p_cnProcBufResBit = _mm512_sign_epi8(min, sgn);
+      //                p_cnProcBufResBit++;
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[1] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
-
+  }
 
   // =====================================================================
   // Process group with 5 BNs
-  fprintf(fd,"//Process group with 5 BNs\n");
-    // Offset is 9*384/32 = 108
-    const uint16_t lut_idxCnProcG5[5][4] = {{108,216,324,432}, {0,216,324,432},
-                                            {0,108,324,432}, {0,108,216,432}, {0,108,216,324}};
-                                            
-
+  fprintf(fd, "//Process group with 5 BNs\n");
+  // Offset is 9*384/32 = 108
+  const uint16_t lut_idxCnProcG5[5][4] = {{108, 216, 324, 432}, {0, 216, 324, 432}, {0, 108, 324, 432}, {0, 108, 216, 432}, {0, 108, 216, 324}};
 
+  if (lut_numCnInCnGroups[2] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[2]);
 
-  if (lut_numCnInCnGroups[2] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-       fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[2] );
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[2] * NR_LDPC_ZMAX) >> 6;
 
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[2]*NR_LDPC_ZMAX)>>6;
+    // Loop over every BN
 
-      // Loop over every BN
+    for (j = 0; j < 5; j++) {
+      fprintf(fd, "            for (int i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[2] >> 6) + lut_idxCnProcG5[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
 
-      for (j=0; j<5; j++)
-        {
+      // Loop over BNs
+      for (k = 1; k < 4; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[2] >> 6) + lut_idxCnProcG5[j][k] / 2);
 
-          fprintf(fd,"            for (int i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>6)+lut_idxCnProcG5[j][0]/2);
-            fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-          fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
 
+        //                sgn  = _mm512_sign_epi8(sgn, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
 
-          // Loop over BNs
-          for (k=1; k<4; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[2]>>6)+lut_idxCnProcG5[j][k]/2);
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
 
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(sgn, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[2]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"           }\n");
-        }
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[2] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "           }\n");
     }
+  }
 
   // =====================================================================
   // Process group with 6 BNs
-  fprintf(fd,"//Process group with 6 BNs\n");
-    // Offset is 3*384/32 = 36
-  const uint16_t lut_idxCnProcG6[6][5] = {{36,72,108,144,180}, {0,72,108,144,180},
-                                            {0,36,108,144,180}, {0,36,72,144,180},
-                                            {0,36,72,108,180}, {0,36,72,108,144}};
-                                            
-
+  fprintf(fd, "//Process group with 6 BNs\n");
+  // Offset is 3*384/32 = 36
+  const uint16_t lut_idxCnProcG6[6][5] = {{36, 72, 108, 144, 180}, {0, 72, 108, 144, 180}, {0, 36, 108, 144, 180}, {0, 36, 72, 144, 180}, {0, 36, 72, 108, 180}, {0, 36, 72, 108, 144}};
 
+  if (lut_numCnInCnGroups[3] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[3]);
 
-  if (lut_numCnInCnGroups[3] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-       fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[3] );
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[3] * NR_LDPC_ZMAX) >> 6;
 
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[3]*NR_LDPC_ZMAX)>>6;
+    // Loop over every BN
 
-      // Loop over every BN
+    for (j = 0; j < 6; j++) {
+      fprintf(fd, "            for (int i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[3] >> 6) + lut_idxCnProcG6[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
 
-      for (j=0; j<6; j++)
-        {
+      // Loop over BNs
+      for (k = 1; k < 5; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[3] >> 6) + lut_idxCnProcG6[j][k] / 2);
 
-          fprintf(fd,"            for (int i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>6)+lut_idxCnProcG6[j][0]/2);
-            fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-          fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
 
+        //                sgn  = _mm512_sign_epi8(sgn, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
 
-          // Loop over BNs
-          for (k=1; k<5; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[3]>>6)+lut_idxCnProcG6[j][k]/2);
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
 
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(sgn, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[3]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[3] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
-
-
+  }
 
   // =====================================================================
   // Process group with 8 BNs
-  fprintf(fd,"//Process group with 8 BNs\n");
- // Offset is 2*384/32 = 24
-    const uint8_t lut_idxCnProcG8[8][7] = {{24,48,72,96,120,144,168}, {0,48,72,96,120,144,168},
-                                           {0,24,72,96,120,144,168}, {0,24,48,96,120,144,168},
-                                           {0,24,48,72,120,144,168}, {0,24,48,72,96,144,168},
-                                           {0,24,48,72,96,120,168}, {0,24,48,72,96,120,144}};
-
-                                           
-
-
-  if (lut_numCnInCnGroups[4] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-       fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[4] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[4]*NR_LDPC_ZMAX)>>6;
-
-      // Loop over every BN
-
-      for (j=0; j<8; j++)
-        {
-
-          fprintf(fd,"            for (int i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>6)+lut_idxCnProcG8[j][0]/2);
-            fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-          fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<7; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[4]>>6)+lut_idxCnProcG8[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(sgn, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n",(lut_startAddrCnGroups[4]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"              }\n");
-        }
+  fprintf(fd, "//Process group with 8 BNs\n");
+  // Offset is 2*384/32 = 24
+  const uint8_t lut_idxCnProcG8[8][7] = {{24, 48, 72, 96, 120, 144, 168},
+                                         {0, 48, 72, 96, 120, 144, 168},
+                                         {0, 24, 72, 96, 120, 144, 168},
+                                         {0, 24, 48, 96, 120, 144, 168},
+                                         {0, 24, 48, 72, 120, 144, 168},
+                                         {0, 24, 48, 72, 96, 144, 168},
+                                         {0, 24, 48, 72, 96, 120, 168},
+                                         {0, 24, 48, 72, 96, 120, 144}};
+
+  if (lut_numCnInCnGroups[4] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[4]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[4] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 8; j++) {
+      fprintf(fd, "            for (int i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[4] >> 6) + lut_idxCnProcG8[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 7; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[4] >> 6) + lut_idxCnProcG8[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(sgn, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min, sgn,zeros);\n", (lut_startAddrCnGroups[4] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "              }\n");
     }
-
+  }
 
   // =====================================================================
   // Process group with 10 BNs
-  fprintf(fd,"//Process group with 10 BNs\n");
-
-  const uint8_t lut_idxCnProcG10[10][9] = {{24,48,72,96,120,144,168,192,216}, {0,48,72,96,120,144,168,192,216},
-                                             {0,24,72,96,120,144,168,192,216}, {0,24,48,96,120,144,168,192,216},
-                                             {0,24,48,72,120,144,168,192,216}, {0,24,48,72,96,144,168,192,216},
-                                             {0,24,48,72,96,120,168,192,216}, {0,24,48,72,96,120,144,192,216},
-                                             {0,24,48,72,96,120,144,168,216}, {0,24,48,72,96,120,144,168,192}};
-
-                                             
-
-
-  if (lut_numCnInCnGroups[5] > 0)
-    {
-      // Number of groups of 64  CNs for parallel processing
-      // Ceil for values not divisible by 64
-       fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numCnInCnGroups[5] );
-
-      // Set the offset to each bit within a group in terms of 64  Byte
-      bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[5]*NR_LDPC_ZMAX)>>6;
-
-     // Loop over every BN
-
-      for (j=0; j<10; j++)
-        {
-
-          fprintf(fd,"            for (int i=0;i<M;i++) {\n");
-          // Abs and sign of 64  CNs (first BN)
-          //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
-          fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>6)+lut_idxCnProcG10[j][0]/2);
-            fprintf(fd,"                sgn  = _mm512_xor_si512(ones, zmm0);\n");
-          fprintf(fd,"                min  = _mm512_abs_epi8(zmm0);\n");
-
-
-          // Loop over BNs
-          for (k=1; k<9; k++)
-            {
-              fprintf(fd,"                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n",(lut_startAddrCnGroups[5]>>6)+lut_idxCnProcG10[j][k]/2);
-
-              //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
-              fprintf(fd,"                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
-
-              //                sgn  = _mm512_sign_epi8(sgn, zmm0);
-              fprintf(fd,"                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
-            }
-
-          // Store result
-          //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
-          fprintf(fd,"                min = _mm512_min_epu8(min, maxLLR);\n");
-
-          fprintf(fd,"                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min,sgn,zeros);\n",(lut_startAddrCnGroups[5]>>6)+(j*bitOffsetInGroup));
-          fprintf(fd,"            }\n");
-        }
+  fprintf(fd, "//Process group with 10 BNs\n");
+
+  const uint8_t lut_idxCnProcG10[10][9] = {{24, 48, 72, 96, 120, 144, 168, 192, 216},
+                                           {0, 48, 72, 96, 120, 144, 168, 192, 216},
+                                           {0, 24, 72, 96, 120, 144, 168, 192, 216},
+                                           {0, 24, 48, 96, 120, 144, 168, 192, 216},
+                                           {0, 24, 48, 72, 120, 144, 168, 192, 216},
+                                           {0, 24, 48, 72, 96, 144, 168, 192, 216},
+                                           {0, 24, 48, 72, 96, 120, 168, 192, 216},
+                                           {0, 24, 48, 72, 96, 120, 144, 192, 216},
+                                           {0, 24, 48, 72, 96, 120, 144, 168, 216},
+                                           {0, 24, 48, 72, 96, 120, 144, 168, 192}};
+
+  if (lut_numCnInCnGroups[5] > 0) {
+    // Number of groups of 64  CNs for parallel processing
+    // Ceil for values not divisible by 64
+    fprintf(fd, " M = (%d*Z + 63)>>6;\n", lut_numCnInCnGroups[5]);
+
+    // Set the offset to each bit within a group in terms of 64  Byte
+    bitOffsetInGroup = (lut_numCnInCnGroups_BG2_R15[5] * NR_LDPC_ZMAX) >> 6;
+
+    // Loop over every BN
+
+    for (j = 0; j < 10; j++) {
+      fprintf(fd, "            for (int i=0;i<M;i++) {\n");
+      // Abs and sign of 64  CNs (first BN)
+      //                zmm0 = p_cnProcBuf[lut_idxCnProcG3[j][0] + i];
+      fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[5] >> 6) + lut_idxCnProcG10[j][0] / 2);
+      fprintf(fd, "                sgn  = _mm512_xor_si512(ones, zmm0);\n");
+      fprintf(fd, "                min  = _mm512_abs_epi8(zmm0);\n");
+
+      // Loop over BNs
+      for (k = 1; k < 9; k++) {
+        fprintf(fd, "                zmm0 = ((__m512i*)cnProcBuf)[%d+i];\n", (lut_startAddrCnGroups[5] >> 6) + lut_idxCnProcG10[j][k] / 2);
+
+        //                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));
+        fprintf(fd, "                min  = _mm512_min_epu8(min, _mm512_abs_epi8(zmm0));\n");
+
+        //                sgn  = _mm512_sign_epi8(sgn, zmm0);
+        fprintf(fd, "                sgn  = _mm512_xor_si512(sgn, zmm0);\n");
+      }
+
+      // Store result
+      //                min = _mm512_min_epu8(min, maxLLR); // 128 in epi8 is -127
+      fprintf(fd, "                min = _mm512_min_epu8(min, maxLLR);\n");
+
+      fprintf(fd, "                ((__m512i*)cnProcBufRes)[%d+i] = conditional_negate(min,sgn,zeros);\n", (lut_startAddrCnGroups[5] >> 6) + (j * bitOffsetInGroup));
+      fprintf(fd, "            }\n");
     }
+  }
 
-
-  fprintf(fd,"}\n");
+  fprintf(fd, "}\n");
   fclose(fd);
-}//end of the function  nrLDPC_cnProc_BG2
-
-
-
-
-
-
-
-
-
+} // end of the function  nrLDPC_cnProc_BG2
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc192_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc192_byte.c
index bfdf416605a97918aad47979202ad9a366d968cb..1dfdfa3d9bb3b0ba0f433681d9df65fabdb797a8 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc192_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc192_byte.c
@@ -11,141 +11,141 @@ static inline void ldpc192_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[5019],_mm256_xor_si256(c2[3170],_mm256_xor_si256(c2[3432],_mm256_xor_si256(c2[5282],_mm256_xor_si256(c2[5028],_mm256_xor_si256(c2[6877],_mm256_xor_si256(c2[6086],_mm256_xor_si256(c2[4777],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[4529],_mm256_xor_si256(c2[2414],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[51],_mm256_xor_si256(c2[3220],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[5609],_mm256_xor_si256(c2[4288],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[6408],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[2992],_mm256_xor_si256(c2[2990],_mm256_xor_si256(c2[6688],_mm256_xor_si256(c2[3004],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[6433],_mm256_xor_si256(c2[7767],_mm256_xor_si256(c2[4865],_mm256_xor_si256(c2[8293],_mm256_xor_si256(c2[123],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[1180],_mm256_xor_si256(c2[3567],_mm256_xor_si256(c2[1983],_mm256_xor_si256(c2[4623],_mm256_xor_si256(c2[4632],_mm256_xor_si256(c2[5956],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[2268],_mm256_xor_si256(c2[5704],_mm256_xor_si256(c2[6768],_mm256_xor_si256(c2[6506],_mm256_xor_si256(c2[5448],_mm256_xor_si256(c2[6252],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[5725],_mm256_xor_si256(c2[2835],_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[4419],_mm256_xor_si256(c2[4959],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[3375],_mm256_xor_si256(c2[4969],_mm256_xor_si256(c2[4441],_mm256_xor_si256(c2[4178],_mm256_xor_si256(c2[5513],_mm256_xor_si256(c2[3136],_mm256_xor_si256(c2[3660],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[2089],_mm256_xor_si256(c2[7120],_mm256_xor_si256(c2[4479],c2[1577]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[5019],simde_mm256_xor_si256(c2[3170],simde_mm256_xor_si256(c2[3432],simde_mm256_xor_si256(c2[5282],simde_mm256_xor_si256(c2[5028],simde_mm256_xor_si256(c2[6877],simde_mm256_xor_si256(c2[6086],simde_mm256_xor_si256(c2[4777],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[4529],simde_mm256_xor_si256(c2[2414],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[51],simde_mm256_xor_si256(c2[3220],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[5609],simde_mm256_xor_si256(c2[4288],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[6408],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[2992],simde_mm256_xor_si256(c2[2990],simde_mm256_xor_si256(c2[6688],simde_mm256_xor_si256(c2[3004],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[6433],simde_mm256_xor_si256(c2[7767],simde_mm256_xor_si256(c2[4865],simde_mm256_xor_si256(c2[8293],simde_mm256_xor_si256(c2[123],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[1180],simde_mm256_xor_si256(c2[3567],simde_mm256_xor_si256(c2[1983],simde_mm256_xor_si256(c2[4623],simde_mm256_xor_si256(c2[4632],simde_mm256_xor_si256(c2[5956],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[2268],simde_mm256_xor_si256(c2[5704],simde_mm256_xor_si256(c2[6768],simde_mm256_xor_si256(c2[6506],simde_mm256_xor_si256(c2[5448],simde_mm256_xor_si256(c2[6252],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[5725],simde_mm256_xor_si256(c2[2835],simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[4419],simde_mm256_xor_si256(c2[4959],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[3375],simde_mm256_xor_si256(c2[4969],simde_mm256_xor_si256(c2[4441],simde_mm256_xor_si256(c2[4178],simde_mm256_xor_si256(c2[5513],simde_mm256_xor_si256(c2[3136],simde_mm256_xor_si256(c2[3660],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[2089],simde_mm256_xor_si256(c2[7120],simde_mm256_xor_si256(c2[4479],c2[1577]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 1
-     d2[6]=_mm256_xor_si256(c2[5019],_mm256_xor_si256(c2[5283],_mm256_xor_si256(c2[3434],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[5546],_mm256_xor_si256(c2[5028],_mm256_xor_si256(c2[5292],_mm256_xor_si256(c2[7141],_mm256_xor_si256(c2[6350],_mm256_xor_si256(c2[4777],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[2404],_mm256_xor_si256(c2[4529],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[2678],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[315],_mm256_xor_si256(c2[3484],_mm256_xor_si256(c2[5330],_mm256_xor_si256(c2[5609],_mm256_xor_si256(c2[5873],_mm256_xor_si256(c2[4552],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[6408],_mm256_xor_si256(c2[6672],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[2184],_mm256_xor_si256(c2[3256],_mm256_xor_si256(c2[3254],_mm256_xor_si256(c2[6952],_mm256_xor_si256(c2[3268],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[6697],_mm256_xor_si256(c2[7767],_mm256_xor_si256(c2[8031],_mm256_xor_si256(c2[5129],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[123],_mm256_xor_si256(c2[387],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[3567],_mm256_xor_si256(c2[3831],_mm256_xor_si256(c2[2247],_mm256_xor_si256(c2[4887],_mm256_xor_si256(c2[4632],_mm256_xor_si256(c2[4896],_mm256_xor_si256(c2[6220],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[1745],_mm256_xor_si256(c2[2532],_mm256_xor_si256(c2[5968],_mm256_xor_si256(c2[7032],_mm256_xor_si256(c2[6770],_mm256_xor_si256(c2[5712],_mm256_xor_si256(c2[6252],_mm256_xor_si256(c2[6516],_mm256_xor_si256(c2[1239],_mm256_xor_si256(c2[5989],_mm256_xor_si256(c2[2835],_mm256_xor_si256(c2[3099],_mm256_xor_si256(c2[989],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[3639],_mm256_xor_si256(c2[4969],_mm256_xor_si256(c2[5233],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[4442],_mm256_xor_si256(c2[5513],_mm256_xor_si256(c2[5777],_mm256_xor_si256(c2[3400],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[3148],_mm256_xor_si256(c2[3936],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[7120],_mm256_xor_si256(c2[7384],_mm256_xor_si256(c2[4743],c2[1841])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[6]=simde_mm256_xor_si256(c2[5019],simde_mm256_xor_si256(c2[5283],simde_mm256_xor_si256(c2[3434],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[5546],simde_mm256_xor_si256(c2[5028],simde_mm256_xor_si256(c2[5292],simde_mm256_xor_si256(c2[7141],simde_mm256_xor_si256(c2[6350],simde_mm256_xor_si256(c2[4777],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[2404],simde_mm256_xor_si256(c2[4529],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[2678],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[315],simde_mm256_xor_si256(c2[3484],simde_mm256_xor_si256(c2[5330],simde_mm256_xor_si256(c2[5609],simde_mm256_xor_si256(c2[5873],simde_mm256_xor_si256(c2[4552],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[6408],simde_mm256_xor_si256(c2[6672],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[2184],simde_mm256_xor_si256(c2[3256],simde_mm256_xor_si256(c2[3254],simde_mm256_xor_si256(c2[6952],simde_mm256_xor_si256(c2[3268],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[6697],simde_mm256_xor_si256(c2[7767],simde_mm256_xor_si256(c2[8031],simde_mm256_xor_si256(c2[5129],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[123],simde_mm256_xor_si256(c2[387],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[3567],simde_mm256_xor_si256(c2[3831],simde_mm256_xor_si256(c2[2247],simde_mm256_xor_si256(c2[4887],simde_mm256_xor_si256(c2[4632],simde_mm256_xor_si256(c2[4896],simde_mm256_xor_si256(c2[6220],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[1745],simde_mm256_xor_si256(c2[2532],simde_mm256_xor_si256(c2[5968],simde_mm256_xor_si256(c2[7032],simde_mm256_xor_si256(c2[6770],simde_mm256_xor_si256(c2[5712],simde_mm256_xor_si256(c2[6252],simde_mm256_xor_si256(c2[6516],simde_mm256_xor_si256(c2[1239],simde_mm256_xor_si256(c2[5989],simde_mm256_xor_si256(c2[2835],simde_mm256_xor_si256(c2[3099],simde_mm256_xor_si256(c2[989],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[3639],simde_mm256_xor_si256(c2[4969],simde_mm256_xor_si256(c2[5233],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[4442],simde_mm256_xor_si256(c2[5513],simde_mm256_xor_si256(c2[5777],simde_mm256_xor_si256(c2[3400],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[3148],simde_mm256_xor_si256(c2[3936],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[7120],simde_mm256_xor_si256(c2[7384],simde_mm256_xor_si256(c2[4743],c2[1841])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[12]=_mm256_xor_si256(c2[5283],_mm256_xor_si256(c2[3434],_mm256_xor_si256(c2[3432],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[5282],_mm256_xor_si256(c2[5546],_mm256_xor_si256(c2[5292],_mm256_xor_si256(c2[6877],_mm256_xor_si256(c2[7141],_mm256_xor_si256(c2[6086],_mm256_xor_si256(c2[6350],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[2404],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[2678],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[315],_mm256_xor_si256(c2[3220],_mm256_xor_si256(c2[3484],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[5330],_mm256_xor_si256(c2[5873],_mm256_xor_si256(c2[4552],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[6672],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[2184],_mm256_xor_si256(c2[3256],_mm256_xor_si256(c2[2990],_mm256_xor_si256(c2[3254],_mm256_xor_si256(c2[6688],_mm256_xor_si256(c2[6952],_mm256_xor_si256(c2[3268],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[6433],_mm256_xor_si256(c2[6697],_mm256_xor_si256(c2[8031],_mm256_xor_si256(c2[5129],_mm256_xor_si256(c2[8293],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[387],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[1180],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[3831],_mm256_xor_si256(c2[2247],_mm256_xor_si256(c2[4623],_mm256_xor_si256(c2[4887],_mm256_xor_si256(c2[4896],_mm256_xor_si256(c2[6220],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[1745],_mm256_xor_si256(c2[2268],_mm256_xor_si256(c2[2532],_mm256_xor_si256(c2[5704],_mm256_xor_si256(c2[5968],_mm256_xor_si256(c2[7032],_mm256_xor_si256(c2[6506],_mm256_xor_si256(c2[6770],_mm256_xor_si256(c2[5448],_mm256_xor_si256(c2[5712],_mm256_xor_si256(c2[6516],_mm256_xor_si256(c2[1239],_mm256_xor_si256(c2[5725],_mm256_xor_si256(c2[5989],_mm256_xor_si256(c2[3099],_mm256_xor_si256(c2[989],_mm256_xor_si256(c2[4419],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[3375],_mm256_xor_si256(c2[3639],_mm256_xor_si256(c2[5233],_mm256_xor_si256(c2[4441],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[4178],_mm256_xor_si256(c2[4442],_mm256_xor_si256(c2[5777],_mm256_xor_si256(c2[3400],_mm256_xor_si256(c2[3660],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[3148],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[3936],_mm256_xor_si256(c2[2089],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[7384],_mm256_xor_si256(c2[4743],_mm256_xor_si256(c2[1577],c2[1841]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[5283],simde_mm256_xor_si256(c2[3434],simde_mm256_xor_si256(c2[3432],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[5282],simde_mm256_xor_si256(c2[5546],simde_mm256_xor_si256(c2[5292],simde_mm256_xor_si256(c2[6877],simde_mm256_xor_si256(c2[7141],simde_mm256_xor_si256(c2[6086],simde_mm256_xor_si256(c2[6350],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[2404],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[2678],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[315],simde_mm256_xor_si256(c2[3220],simde_mm256_xor_si256(c2[3484],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[5330],simde_mm256_xor_si256(c2[5873],simde_mm256_xor_si256(c2[4552],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[6672],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[2184],simde_mm256_xor_si256(c2[3256],simde_mm256_xor_si256(c2[2990],simde_mm256_xor_si256(c2[3254],simde_mm256_xor_si256(c2[6688],simde_mm256_xor_si256(c2[6952],simde_mm256_xor_si256(c2[3268],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[6433],simde_mm256_xor_si256(c2[6697],simde_mm256_xor_si256(c2[8031],simde_mm256_xor_si256(c2[5129],simde_mm256_xor_si256(c2[8293],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[387],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[1180],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[3831],simde_mm256_xor_si256(c2[2247],simde_mm256_xor_si256(c2[4623],simde_mm256_xor_si256(c2[4887],simde_mm256_xor_si256(c2[4896],simde_mm256_xor_si256(c2[6220],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[1745],simde_mm256_xor_si256(c2[2268],simde_mm256_xor_si256(c2[2532],simde_mm256_xor_si256(c2[5704],simde_mm256_xor_si256(c2[5968],simde_mm256_xor_si256(c2[7032],simde_mm256_xor_si256(c2[6506],simde_mm256_xor_si256(c2[6770],simde_mm256_xor_si256(c2[5448],simde_mm256_xor_si256(c2[5712],simde_mm256_xor_si256(c2[6516],simde_mm256_xor_si256(c2[1239],simde_mm256_xor_si256(c2[5725],simde_mm256_xor_si256(c2[5989],simde_mm256_xor_si256(c2[3099],simde_mm256_xor_si256(c2[989],simde_mm256_xor_si256(c2[4419],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[3375],simde_mm256_xor_si256(c2[3639],simde_mm256_xor_si256(c2[5233],simde_mm256_xor_si256(c2[4441],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[4178],simde_mm256_xor_si256(c2[4442],simde_mm256_xor_si256(c2[5777],simde_mm256_xor_si256(c2[3400],simde_mm256_xor_si256(c2[3660],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[3148],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[3936],simde_mm256_xor_si256(c2[2089],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[7384],simde_mm256_xor_si256(c2[4743],simde_mm256_xor_si256(c2[1577],c2[1841]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[18]=_mm256_xor_si256(c2[5283],_mm256_xor_si256(c2[3434],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[5282],_mm256_xor_si256(c2[5546],_mm256_xor_si256(c2[5292],_mm256_xor_si256(c2[7141],_mm256_xor_si256(c2[6086],_mm256_xor_si256(c2[6350],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[2404],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[2678],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[315],_mm256_xor_si256(c2[3484],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[5330],_mm256_xor_si256(c2[5873],_mm256_xor_si256(c2[4552],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[6672],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[2184],_mm256_xor_si256(c2[3256],_mm256_xor_si256(c2[3254],_mm256_xor_si256(c2[6688],_mm256_xor_si256(c2[6952],_mm256_xor_si256(c2[3268],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[6433],_mm256_xor_si256(c2[6697],_mm256_xor_si256(c2[8031],_mm256_xor_si256(c2[5129],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[387],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[1180],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[3831],_mm256_xor_si256(c2[2247],_mm256_xor_si256(c2[4623],_mm256_xor_si256(c2[4887],_mm256_xor_si256(c2[4896],_mm256_xor_si256(c2[6220],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[1745],_mm256_xor_si256(c2[2532],_mm256_xor_si256(c2[5704],_mm256_xor_si256(c2[5968],_mm256_xor_si256(c2[7032],_mm256_xor_si256(c2[6770],_mm256_xor_si256(c2[5448],_mm256_xor_si256(c2[5712],_mm256_xor_si256(c2[6516],_mm256_xor_si256(c2[1239],_mm256_xor_si256(c2[5989],_mm256_xor_si256(c2[3099],_mm256_xor_si256(c2[989],_mm256_xor_si256(c2[4419],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[3375],_mm256_xor_si256(c2[3639],_mm256_xor_si256(c2[5233],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[4178],_mm256_xor_si256(c2[4442],_mm256_xor_si256(c2[5777],_mm256_xor_si256(c2[3400],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[3148],_mm256_xor_si256(c2[3936],_mm256_xor_si256(c2[2089],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[7384],_mm256_xor_si256(c2[4743],_mm256_xor_si256(c2[1577],c2[1841])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[18]=simde_mm256_xor_si256(c2[5283],simde_mm256_xor_si256(c2[3434],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[5282],simde_mm256_xor_si256(c2[5546],simde_mm256_xor_si256(c2[5292],simde_mm256_xor_si256(c2[7141],simde_mm256_xor_si256(c2[6086],simde_mm256_xor_si256(c2[6350],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[2404],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[2678],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[315],simde_mm256_xor_si256(c2[3484],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[5330],simde_mm256_xor_si256(c2[5873],simde_mm256_xor_si256(c2[4552],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[6672],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[2184],simde_mm256_xor_si256(c2[3256],simde_mm256_xor_si256(c2[3254],simde_mm256_xor_si256(c2[6688],simde_mm256_xor_si256(c2[6952],simde_mm256_xor_si256(c2[3268],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[6433],simde_mm256_xor_si256(c2[6697],simde_mm256_xor_si256(c2[8031],simde_mm256_xor_si256(c2[5129],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[387],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[1180],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[3831],simde_mm256_xor_si256(c2[2247],simde_mm256_xor_si256(c2[4623],simde_mm256_xor_si256(c2[4887],simde_mm256_xor_si256(c2[4896],simde_mm256_xor_si256(c2[6220],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[1745],simde_mm256_xor_si256(c2[2532],simde_mm256_xor_si256(c2[5704],simde_mm256_xor_si256(c2[5968],simde_mm256_xor_si256(c2[7032],simde_mm256_xor_si256(c2[6770],simde_mm256_xor_si256(c2[5448],simde_mm256_xor_si256(c2[5712],simde_mm256_xor_si256(c2[6516],simde_mm256_xor_si256(c2[1239],simde_mm256_xor_si256(c2[5989],simde_mm256_xor_si256(c2[3099],simde_mm256_xor_si256(c2[989],simde_mm256_xor_si256(c2[4419],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[3375],simde_mm256_xor_si256(c2[3639],simde_mm256_xor_si256(c2[5233],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[4178],simde_mm256_xor_si256(c2[4442],simde_mm256_xor_si256(c2[5777],simde_mm256_xor_si256(c2[3400],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[3148],simde_mm256_xor_si256(c2[3936],simde_mm256_xor_si256(c2[2089],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[7384],simde_mm256_xor_si256(c2[4743],simde_mm256_xor_si256(c2[1577],c2[1841])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[24]=_mm256_xor_si256(c2[3172],c2[5561]);
+     d2[24]=simde_mm256_xor_si256(c2[3172],c2[5561]);
 
 //row: 5
-     d2[30]=_mm256_xor_si256(c2[4226],_mm256_xor_si256(c2[2377],_mm256_xor_si256(c2[2645],_mm256_xor_si256(c2[4489],_mm256_xor_si256(c2[792],_mm256_xor_si256(c2[4241],_mm256_xor_si256(c2[6084],_mm256_xor_si256(c2[5293],_mm256_xor_si256(c2[3708],_mm256_xor_si256(c2[3984],_mm256_xor_si256(c2[2401],_mm256_xor_si256(c2[1347],_mm256_xor_si256(c2[3736],_mm256_xor_si256(c2[1621],_mm256_xor_si256(c2[7696],_mm256_xor_si256(c2[5055],_mm256_xor_si256(c2[7705],_mm256_xor_si256(c2[2427],_mm256_xor_si256(c2[4273],_mm256_xor_si256(c2[4816],_mm256_xor_si256(c2[3495],_mm256_xor_si256(c2[7716],_mm256_xor_si256(c2[5621],_mm256_xor_si256(c2[7995],_mm256_xor_si256(c2[1133],_mm256_xor_si256(c2[2199],_mm256_xor_si256(c2[2197],_mm256_xor_si256(c2[5895],_mm256_xor_si256(c2[2211],_mm256_xor_si256(c2[7755],_mm256_xor_si256(c2[5640],_mm256_xor_si256(c2[6974],_mm256_xor_si256(c2[4072],_mm256_xor_si256(c2[7500],_mm256_xor_si256(c2[7777],_mm256_xor_si256(c2[8042],_mm256_xor_si256(c2[387],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[1190],_mm256_xor_si256(c2[3830],_mm256_xor_si256(c2[3845],_mm256_xor_si256(c2[5163],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[1733],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[4911],_mm256_xor_si256(c2[5981],_mm256_xor_si256(c2[5713],_mm256_xor_si256(c2[4661],_mm256_xor_si256(c2[5465],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[4932],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[8379],_mm256_xor_si256(c2[3626],_mm256_xor_si256(c2[4681],_mm256_xor_si256(c2[4166],_mm256_xor_si256(c2[207],_mm256_xor_si256(c2[2582],_mm256_xor_si256(c2[4176],_mm256_xor_si256(c2[3648],_mm256_xor_si256(c2[3385],_mm256_xor_si256(c2[4720],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[2873],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[2885],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[6327],_mm256_xor_si256(c2[3686],_mm256_xor_si256(c2[784],c2[5269]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[30]=simde_mm256_xor_si256(c2[4226],simde_mm256_xor_si256(c2[2377],simde_mm256_xor_si256(c2[2645],simde_mm256_xor_si256(c2[4489],simde_mm256_xor_si256(c2[792],simde_mm256_xor_si256(c2[4241],simde_mm256_xor_si256(c2[6084],simde_mm256_xor_si256(c2[5293],simde_mm256_xor_si256(c2[3708],simde_mm256_xor_si256(c2[3984],simde_mm256_xor_si256(c2[2401],simde_mm256_xor_si256(c2[1347],simde_mm256_xor_si256(c2[3736],simde_mm256_xor_si256(c2[1621],simde_mm256_xor_si256(c2[7696],simde_mm256_xor_si256(c2[5055],simde_mm256_xor_si256(c2[7705],simde_mm256_xor_si256(c2[2427],simde_mm256_xor_si256(c2[4273],simde_mm256_xor_si256(c2[4816],simde_mm256_xor_si256(c2[3495],simde_mm256_xor_si256(c2[7716],simde_mm256_xor_si256(c2[5621],simde_mm256_xor_si256(c2[7995],simde_mm256_xor_si256(c2[1133],simde_mm256_xor_si256(c2[2199],simde_mm256_xor_si256(c2[2197],simde_mm256_xor_si256(c2[5895],simde_mm256_xor_si256(c2[2211],simde_mm256_xor_si256(c2[7755],simde_mm256_xor_si256(c2[5640],simde_mm256_xor_si256(c2[6974],simde_mm256_xor_si256(c2[4072],simde_mm256_xor_si256(c2[7500],simde_mm256_xor_si256(c2[7777],simde_mm256_xor_si256(c2[8042],simde_mm256_xor_si256(c2[387],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[1190],simde_mm256_xor_si256(c2[3830],simde_mm256_xor_si256(c2[3845],simde_mm256_xor_si256(c2[5163],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[1733],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[4911],simde_mm256_xor_si256(c2[5981],simde_mm256_xor_si256(c2[5713],simde_mm256_xor_si256(c2[4661],simde_mm256_xor_si256(c2[5465],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[4932],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[8379],simde_mm256_xor_si256(c2[3626],simde_mm256_xor_si256(c2[4681],simde_mm256_xor_si256(c2[4166],simde_mm256_xor_si256(c2[207],simde_mm256_xor_si256(c2[2582],simde_mm256_xor_si256(c2[4176],simde_mm256_xor_si256(c2[3648],simde_mm256_xor_si256(c2[3385],simde_mm256_xor_si256(c2[4720],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[2873],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[2885],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[6327],simde_mm256_xor_si256(c2[3686],simde_mm256_xor_si256(c2[784],c2[5269]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[36]=_mm256_xor_si256(c2[5810],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[8320],_mm256_xor_si256(c2[7550],_mm256_xor_si256(c2[7861],_mm256_xor_si256(c2[4968],c2[505])))))));
+     d2[36]=simde_mm256_xor_si256(c2[5810],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[8320],simde_mm256_xor_si256(c2[7550],simde_mm256_xor_si256(c2[7861],simde_mm256_xor_si256(c2[4968],c2[505])))))));
 
 //row: 7
-     d2[42]=_mm256_xor_si256(c2[2376],_mm256_xor_si256(c2[7933],_mm256_xor_si256(c2[7443],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[627],c2[4923])))));
+     d2[42]=simde_mm256_xor_si256(c2[2376],simde_mm256_xor_si256(c2[7933],simde_mm256_xor_si256(c2[7443],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[627],c2[4923])))));
 
 //row: 8
-     d2[48]=_mm256_xor_si256(c2[5813],_mm256_xor_si256(c2[7922],_mm256_xor_si256(c2[3964],_mm256_xor_si256(c2[6073],_mm256_xor_si256(c2[4226],_mm256_xor_si256(c2[6077],_mm256_xor_si256(c2[6341],_mm256_xor_si256(c2[6076],_mm256_xor_si256(c2[7921],_mm256_xor_si256(c2[8185],_mm256_xor_si256(c2[5019],_mm256_xor_si256(c2[5822],_mm256_xor_si256(c2[7937],_mm256_xor_si256(c2[7671],_mm256_xor_si256(c2[1069],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[6880],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[542],_mm256_xor_si256(c2[5033],_mm256_xor_si256(c2[5571],_mm256_xor_si256(c2[7680],_mm256_xor_si256(c2[3988],_mm256_xor_si256(c2[6097],_mm256_xor_si256(c2[2928],_mm256_xor_si256(c2[4779],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[5317],_mm256_xor_si256(c2[7432],_mm256_xor_si256(c2[3208],_mm256_xor_si256(c2[5317],_mm256_xor_si256(c2[830],_mm256_xor_si256(c2[2681],_mm256_xor_si256(c2[2945],_mm256_xor_si256(c2[1361],_mm256_xor_si256(c2[845],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[4008],_mm256_xor_si256(c2[5859],_mm256_xor_si256(c2[6123],_mm256_xor_si256(c2[5860],_mm256_xor_si256(c2[7705],_mm256_xor_si256(c2[7969],_mm256_xor_si256(c2[6397],_mm256_xor_si256(c2[65],_mm256_xor_si256(c2[5076],_mm256_xor_si256(c2[7191],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[2701],_mm256_xor_si256(c2[2965],_mm256_xor_si256(c2[7202],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[3244],_mm256_xor_si256(c2[2714],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[4829],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[5895],_mm256_xor_si256(c2[3784],_mm256_xor_si256(c2[5629],_mm256_xor_si256(c2[5893],_mm256_xor_si256(c2[7476],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[1144],_mm256_xor_si256(c2[3792],_mm256_xor_si256(c2[5907],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[2740],_mm256_xor_si256(c2[3004],_mm256_xor_si256(c2[7227],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[2223],_mm256_xor_si256(c2[5653],_mm256_xor_si256(c2[7768],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[2485],_mm256_xor_si256(c2[2749],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[3026],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[3027],_mm256_xor_si256(c2[3291],_mm256_xor_si256(c2[1968],_mm256_xor_si256(c2[3819],_mm256_xor_si256(c2[4083],_mm256_xor_si256(c2[4361],_mm256_xor_si256(c2[6470],_mm256_xor_si256(c2[2777],_mm256_xor_si256(c2[4886],_mm256_xor_si256(c2[5417],_mm256_xor_si256(c2[7262],_mm256_xor_si256(c2[7526],_mm256_xor_si256(c2[5426],_mm256_xor_si256(c2[7541],_mm256_xor_si256(c2[6744],_mm256_xor_si256(c2[412],_mm256_xor_si256(c2[2787],_mm256_xor_si256(c2[4632],_mm256_xor_si256(c2[4896],_mm256_xor_si256(c2[4896],_mm256_xor_si256(c2[2269],_mm256_xor_si256(c2[4384],_mm256_xor_si256(c2[3062],_mm256_xor_si256(c2[4913],_mm256_xor_si256(c2[5177],_mm256_xor_si256(c2[6492],_mm256_xor_si256(c2[8343],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[7562],_mm256_xor_si256(c2[1224],_mm256_xor_si256(c2[7300],_mm256_xor_si256(c2[698],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[6242],_mm256_xor_si256(c2[8093],_mm256_xor_si256(c2[8357],_mm256_xor_si256(c2[7046],_mm256_xor_si256(c2[708],_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[3878],_mm256_xor_si256(c2[6519],_mm256_xor_si256(c2[8364],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[3629],_mm256_xor_si256(c2[5738],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[5213],_mm256_xor_si256(c2[7058],_mm256_xor_si256(c2[7322],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[5753],_mm256_xor_si256(c2[7862],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[3639],_mm256_xor_si256(c2[3903],_mm256_xor_si256(c2[4169],_mm256_xor_si256(c2[6014],_mm256_xor_si256(c2[6278],_mm256_xor_si256(c2[5763],_mm256_xor_si256(c2[7872],_mm256_xor_si256(c2[5235],_mm256_xor_si256(c2[7080],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[6817],_mm256_xor_si256(c2[7081],_mm256_xor_si256(c2[6301],_mm256_xor_si256(c2[8416],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[6039],_mm256_xor_si256(c2[4454],_mm256_xor_si256(c2[6305],_mm256_xor_si256(c2[6569],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[5787],_mm256_xor_si256(c2[4466],_mm256_xor_si256(c2[6317],_mm256_xor_si256(c2[6581],_mm256_xor_si256(c2[2883],_mm256_xor_si256(c2[4728],_mm256_xor_si256(c2[4992],_mm256_xor_si256(c2[7908],_mm256_xor_si256(c2[1576],_mm256_xor_si256(c2[5273],_mm256_xor_si256(c2[7382],_mm256_xor_si256(c2[2365],_mm256_xor_si256(c2[4216],_mm256_xor_si256(c2[4480],c2[4481]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[48]=simde_mm256_xor_si256(c2[5813],simde_mm256_xor_si256(c2[7922],simde_mm256_xor_si256(c2[3964],simde_mm256_xor_si256(c2[6073],simde_mm256_xor_si256(c2[4226],simde_mm256_xor_si256(c2[6077],simde_mm256_xor_si256(c2[6341],simde_mm256_xor_si256(c2[6076],simde_mm256_xor_si256(c2[7921],simde_mm256_xor_si256(c2[8185],simde_mm256_xor_si256(c2[5019],simde_mm256_xor_si256(c2[5822],simde_mm256_xor_si256(c2[7937],simde_mm256_xor_si256(c2[7671],simde_mm256_xor_si256(c2[1069],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[6880],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[542],simde_mm256_xor_si256(c2[5033],simde_mm256_xor_si256(c2[5571],simde_mm256_xor_si256(c2[7680],simde_mm256_xor_si256(c2[3988],simde_mm256_xor_si256(c2[6097],simde_mm256_xor_si256(c2[2928],simde_mm256_xor_si256(c2[4779],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[5317],simde_mm256_xor_si256(c2[7432],simde_mm256_xor_si256(c2[3208],simde_mm256_xor_si256(c2[5317],simde_mm256_xor_si256(c2[830],simde_mm256_xor_si256(c2[2681],simde_mm256_xor_si256(c2[2945],simde_mm256_xor_si256(c2[1361],simde_mm256_xor_si256(c2[845],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[4008],simde_mm256_xor_si256(c2[5859],simde_mm256_xor_si256(c2[6123],simde_mm256_xor_si256(c2[5860],simde_mm256_xor_si256(c2[7705],simde_mm256_xor_si256(c2[7969],simde_mm256_xor_si256(c2[6397],simde_mm256_xor_si256(c2[65],simde_mm256_xor_si256(c2[5076],simde_mm256_xor_si256(c2[7191],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[2701],simde_mm256_xor_si256(c2[2965],simde_mm256_xor_si256(c2[7202],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[3244],simde_mm256_xor_si256(c2[2714],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[4829],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[5895],simde_mm256_xor_si256(c2[3784],simde_mm256_xor_si256(c2[5629],simde_mm256_xor_si256(c2[5893],simde_mm256_xor_si256(c2[7476],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[1144],simde_mm256_xor_si256(c2[3792],simde_mm256_xor_si256(c2[5907],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[2740],simde_mm256_xor_si256(c2[3004],simde_mm256_xor_si256(c2[7227],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[2223],simde_mm256_xor_si256(c2[5653],simde_mm256_xor_si256(c2[7768],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[2485],simde_mm256_xor_si256(c2[2749],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[3026],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[3027],simde_mm256_xor_si256(c2[3291],simde_mm256_xor_si256(c2[1968],simde_mm256_xor_si256(c2[3819],simde_mm256_xor_si256(c2[4083],simde_mm256_xor_si256(c2[4361],simde_mm256_xor_si256(c2[6470],simde_mm256_xor_si256(c2[2777],simde_mm256_xor_si256(c2[4886],simde_mm256_xor_si256(c2[5417],simde_mm256_xor_si256(c2[7262],simde_mm256_xor_si256(c2[7526],simde_mm256_xor_si256(c2[5426],simde_mm256_xor_si256(c2[7541],simde_mm256_xor_si256(c2[6744],simde_mm256_xor_si256(c2[412],simde_mm256_xor_si256(c2[2787],simde_mm256_xor_si256(c2[4632],simde_mm256_xor_si256(c2[4896],simde_mm256_xor_si256(c2[4896],simde_mm256_xor_si256(c2[2269],simde_mm256_xor_si256(c2[4384],simde_mm256_xor_si256(c2[3062],simde_mm256_xor_si256(c2[4913],simde_mm256_xor_si256(c2[5177],simde_mm256_xor_si256(c2[6492],simde_mm256_xor_si256(c2[8343],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[7562],simde_mm256_xor_si256(c2[1224],simde_mm256_xor_si256(c2[7300],simde_mm256_xor_si256(c2[698],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[6242],simde_mm256_xor_si256(c2[8093],simde_mm256_xor_si256(c2[8357],simde_mm256_xor_si256(c2[7046],simde_mm256_xor_si256(c2[708],simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[3878],simde_mm256_xor_si256(c2[6519],simde_mm256_xor_si256(c2[8364],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[3629],simde_mm256_xor_si256(c2[5738],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[5213],simde_mm256_xor_si256(c2[7058],simde_mm256_xor_si256(c2[7322],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[5753],simde_mm256_xor_si256(c2[7862],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[3639],simde_mm256_xor_si256(c2[3903],simde_mm256_xor_si256(c2[4169],simde_mm256_xor_si256(c2[6014],simde_mm256_xor_si256(c2[6278],simde_mm256_xor_si256(c2[5763],simde_mm256_xor_si256(c2[7872],simde_mm256_xor_si256(c2[5235],simde_mm256_xor_si256(c2[7080],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[6817],simde_mm256_xor_si256(c2[7081],simde_mm256_xor_si256(c2[6301],simde_mm256_xor_si256(c2[8416],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[6039],simde_mm256_xor_si256(c2[4454],simde_mm256_xor_si256(c2[6305],simde_mm256_xor_si256(c2[6569],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[5787],simde_mm256_xor_si256(c2[4466],simde_mm256_xor_si256(c2[6317],simde_mm256_xor_si256(c2[6581],simde_mm256_xor_si256(c2[2883],simde_mm256_xor_si256(c2[4728],simde_mm256_xor_si256(c2[4992],simde_mm256_xor_si256(c2[7908],simde_mm256_xor_si256(c2[1576],simde_mm256_xor_si256(c2[5273],simde_mm256_xor_si256(c2[7382],simde_mm256_xor_si256(c2[2365],simde_mm256_xor_si256(c2[4216],simde_mm256_xor_si256(c2[4480],c2[4481]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[54]=_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[2125],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[1456],_mm256_xor_si256(c2[6757],_mm256_xor_si256(c2[4167],_mm256_xor_si256(c2[8401],c2[4994])))))));
+     d2[54]=simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[2125],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[1456],simde_mm256_xor_si256(c2[6757],simde_mm256_xor_si256(c2[4167],simde_mm256_xor_si256(c2[8401],c2[4994])))))));
 
 //row: 10
-     d2[60]=_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[5044],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[4047],_mm256_xor_si256(c2[8285],c2[701])))));
+     d2[60]=simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[5044],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[4047],simde_mm256_xor_si256(c2[8285],c2[701])))));
 
 //row: 11
-     d2[66]=_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[6864],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[7132],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[529],_mm256_xor_si256(c2[2112],_mm256_xor_si256(c2[4225],_mm256_xor_si256(c2[281],_mm256_xor_si256(c2[1600],_mm256_xor_si256(c2[1864],_mm256_xor_si256(c2[2124],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[2916],_mm256_xor_si256(c2[1599],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[1349],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[6888],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[5834],_mm256_xor_si256(c2[7417],_mm256_xor_si256(c2[8223],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[1359],_mm256_xor_si256(c2[6108],_mm256_xor_si256(c2[7697],_mm256_xor_si256(c2[3736],_mm256_xor_si256(c2[5319],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[5328],_mm256_xor_si256(c2[6914],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[2175],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[7982],_mm256_xor_si256(c2[1118],_mm256_xor_si256(c2[3756],_mm256_xor_si256(c2[5345],_mm256_xor_si256(c2[1661],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[3244],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[5618],_mm256_xor_si256(c2[5620],_mm256_xor_si256(c2[7203],_mm256_xor_si256(c2[6686],_mm256_xor_si256(c2[8269],_mm256_xor_si256(c2[6684],_mm256_xor_si256(c2[8273],_mm256_xor_si256(c2[1935],_mm256_xor_si256(c2[3518],_mm256_xor_si256(c2[6698],_mm256_xor_si256(c2[8281],_mm256_xor_si256(c2[3795],_mm256_xor_si256(c2[5378],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[3014],_mm256_xor_si256(c2[4333],_mm256_xor_si256(c2[4597],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[1695],_mm256_xor_si256(c2[3540],_mm256_xor_si256(c2[5129],_mm256_xor_si256(c2[3817],_mm256_xor_si256(c2[5136],_mm256_xor_si256(c2[5400],_mm256_xor_si256(c2[4082],_mm256_xor_si256(c2[5665],_mm256_xor_si256(c2[4874],_mm256_xor_si256(c2[6457],_mm256_xor_si256(c2[7261],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[5677],_mm256_xor_si256(c2[7260],_mm256_xor_si256(c2[8317],_mm256_xor_si256(c2[1453],_mm256_xor_si256(c2[8332],_mm256_xor_si256(c2[1204],_mm256_xor_si256(c2[1468],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[2786],_mm256_xor_si256(c2[5693],_mm256_xor_si256(c2[7276],_mm256_xor_si256(c2[2256],_mm256_xor_si256(c2[5175],_mm256_xor_si256(c2[6494],_mm256_xor_si256(c2[6758],_mm256_xor_si256(c2[5968],_mm256_xor_si256(c2[7551],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[3604],_mm256_xor_si256(c2[1753],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[2284],_mm256_xor_si256(c2[1505],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[3088],_mm256_xor_si256(c2[4669],_mm256_xor_si256(c2[6252],_mm256_xor_si256(c2[972],_mm256_xor_si256(c2[2561],_mm256_xor_si256(c2[6529],_mm256_xor_si256(c2[7848],_mm256_xor_si256(c2[8112],_mm256_xor_si256(c2[4419],_mm256_xor_si256(c2[6002],_mm256_xor_si256(c2[8113],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[4153],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[1789],_mm256_xor_si256(c2[4694],_mm256_xor_si256(c2[6277],_mm256_xor_si256(c2[7069],_mm256_xor_si256(c2[205],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[1541],_mm256_xor_si256(c2[1805],_mm256_xor_si256(c2[8141],_mm256_xor_si256(c2[1277],_mm256_xor_si256(c2[7872],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[760],_mm256_xor_si256(c2[2079],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[6830],_mm256_xor_si256(c2[8413],_mm256_xor_si256(c2[7360],_mm256_xor_si256(c2[496],_mm256_xor_si256(c2[6578],_mm256_xor_si256(c2[7897],_mm256_xor_si256(c2[8161],_mm256_xor_si256(c2[7372],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[5789],_mm256_xor_si256(c2[7372],_mm256_xor_si256(c2[2367],_mm256_xor_si256(c2[3686],_mm256_xor_si256(c2[3950],_mm256_xor_si256(c2[8173],_mm256_xor_si256(c2[1309],_mm256_xor_si256(c2[5271],_mm256_xor_si256(c2[6854],c2[7649])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[66]=simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[6864],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[7132],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[529],simde_mm256_xor_si256(c2[2112],simde_mm256_xor_si256(c2[4225],simde_mm256_xor_si256(c2[281],simde_mm256_xor_si256(c2[1600],simde_mm256_xor_si256(c2[1864],simde_mm256_xor_si256(c2[2124],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[2916],simde_mm256_xor_si256(c2[1599],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[1349],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[6888],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[5834],simde_mm256_xor_si256(c2[7417],simde_mm256_xor_si256(c2[8223],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[1359],simde_mm256_xor_si256(c2[6108],simde_mm256_xor_si256(c2[7697],simde_mm256_xor_si256(c2[3736],simde_mm256_xor_si256(c2[5319],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[5328],simde_mm256_xor_si256(c2[6914],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[2175],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[7982],simde_mm256_xor_si256(c2[1118],simde_mm256_xor_si256(c2[3756],simde_mm256_xor_si256(c2[5345],simde_mm256_xor_si256(c2[1661],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[3244],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[5618],simde_mm256_xor_si256(c2[5620],simde_mm256_xor_si256(c2[7203],simde_mm256_xor_si256(c2[6686],simde_mm256_xor_si256(c2[8269],simde_mm256_xor_si256(c2[6684],simde_mm256_xor_si256(c2[8273],simde_mm256_xor_si256(c2[1935],simde_mm256_xor_si256(c2[3518],simde_mm256_xor_si256(c2[6698],simde_mm256_xor_si256(c2[8281],simde_mm256_xor_si256(c2[3795],simde_mm256_xor_si256(c2[5378],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[3014],simde_mm256_xor_si256(c2[4333],simde_mm256_xor_si256(c2[4597],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[1695],simde_mm256_xor_si256(c2[3540],simde_mm256_xor_si256(c2[5129],simde_mm256_xor_si256(c2[3817],simde_mm256_xor_si256(c2[5136],simde_mm256_xor_si256(c2[5400],simde_mm256_xor_si256(c2[4082],simde_mm256_xor_si256(c2[5665],simde_mm256_xor_si256(c2[4874],simde_mm256_xor_si256(c2[6457],simde_mm256_xor_si256(c2[7261],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[5677],simde_mm256_xor_si256(c2[7260],simde_mm256_xor_si256(c2[8317],simde_mm256_xor_si256(c2[1453],simde_mm256_xor_si256(c2[8332],simde_mm256_xor_si256(c2[1204],simde_mm256_xor_si256(c2[1468],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[2786],simde_mm256_xor_si256(c2[5693],simde_mm256_xor_si256(c2[7276],simde_mm256_xor_si256(c2[2256],simde_mm256_xor_si256(c2[5175],simde_mm256_xor_si256(c2[6494],simde_mm256_xor_si256(c2[6758],simde_mm256_xor_si256(c2[5968],simde_mm256_xor_si256(c2[7551],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[3604],simde_mm256_xor_si256(c2[1753],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[2284],simde_mm256_xor_si256(c2[1505],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[3088],simde_mm256_xor_si256(c2[4669],simde_mm256_xor_si256(c2[6252],simde_mm256_xor_si256(c2[972],simde_mm256_xor_si256(c2[2561],simde_mm256_xor_si256(c2[6529],simde_mm256_xor_si256(c2[7848],simde_mm256_xor_si256(c2[8112],simde_mm256_xor_si256(c2[4419],simde_mm256_xor_si256(c2[6002],simde_mm256_xor_si256(c2[8113],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[4153],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[1789],simde_mm256_xor_si256(c2[4694],simde_mm256_xor_si256(c2[6277],simde_mm256_xor_si256(c2[7069],simde_mm256_xor_si256(c2[205],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[1541],simde_mm256_xor_si256(c2[1805],simde_mm256_xor_si256(c2[8141],simde_mm256_xor_si256(c2[1277],simde_mm256_xor_si256(c2[7872],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[760],simde_mm256_xor_si256(c2[2079],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[6830],simde_mm256_xor_si256(c2[8413],simde_mm256_xor_si256(c2[7360],simde_mm256_xor_si256(c2[496],simde_mm256_xor_si256(c2[6578],simde_mm256_xor_si256(c2[7897],simde_mm256_xor_si256(c2[8161],simde_mm256_xor_si256(c2[7372],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[5789],simde_mm256_xor_si256(c2[7372],simde_mm256_xor_si256(c2[2367],simde_mm256_xor_si256(c2[3686],simde_mm256_xor_si256(c2[3950],simde_mm256_xor_si256(c2[8173],simde_mm256_xor_si256(c2[1309],simde_mm256_xor_si256(c2[5271],simde_mm256_xor_si256(c2[6854],c2[7649])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[72]=_mm256_xor_si256(c2[3434],_mm256_xor_si256(c2[6881],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[4909],c2[2858])))));
+     d2[72]=simde_mm256_xor_si256(c2[3434],simde_mm256_xor_si256(c2[6881],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[4909],c2[2858])))));
 
 //row: 13
-     d2[78]=_mm256_xor_si256(c2[1323],_mm256_xor_si256(c2[1587],_mm256_xor_si256(c2[8185],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[6603],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[3445],_mm256_xor_si256(c2[2654],_mm256_xor_si256(c2[1081],_mm256_xor_si256(c2[1345],_mm256_xor_si256(c2[8209],_mm256_xor_si256(c2[7155],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[1097],_mm256_xor_si256(c2[7429],_mm256_xor_si256(c2[5057],_mm256_xor_si256(c2[4529],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[8235],_mm256_xor_si256(c2[1634],_mm256_xor_si256(c2[1913],_mm256_xor_si256(c2[2177],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[5077],_mm256_xor_si256(c2[2712],_mm256_xor_si256(c2[2976],_mm256_xor_si256(c2[5356],_mm256_xor_si256(c2[6941],_mm256_xor_si256(c2[8007],_mm256_xor_si256(c2[8005],_mm256_xor_si256(c2[3256],_mm256_xor_si256(c2[2726],_mm256_xor_si256(c2[8019],_mm256_xor_si256(c2[5116],_mm256_xor_si256(c2[3001],_mm256_xor_si256(c2[4071],_mm256_xor_si256(c2[4335],_mm256_xor_si256(c2[1433],_mm256_xor_si256(c2[4861],_mm256_xor_si256(c2[4874],_mm256_xor_si256(c2[5138],_mm256_xor_si256(c2[5403],_mm256_xor_si256(c2[6195],_mm256_xor_si256(c2[8318],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[6998],_mm256_xor_si256(c2[1191],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[2524],_mm256_xor_si256(c2[7008],_mm256_xor_si256(c2[6232],_mm256_xor_si256(c2[6496],_mm256_xor_si256(c2[7289],_mm256_xor_si256(c2[2272],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[3074],_mm256_xor_si256(c2[2016],_mm256_xor_si256(c2[2556],_mm256_xor_si256(c2[2820],_mm256_xor_si256(c2[5990],_mm256_xor_si256(c2[2293],_mm256_xor_si256(c2[7586],_mm256_xor_si256(c2[7850],_mm256_xor_si256(c2[5740],_mm256_xor_si256(c2[987],_mm256_xor_si256(c2[1527],_mm256_xor_si256(c2[6015],_mm256_xor_si256(c2[8390],_mm256_xor_si256(c2[1273],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[1817],_mm256_xor_si256(c2[2081],_mm256_xor_si256(c2[8151],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[7635],_mm256_xor_si256(c2[7899],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[7104],_mm256_xor_si256(c2[5259],_mm256_xor_si256(c2[3424],_mm256_xor_si256(c2[3688],_mm256_xor_si256(c2[1047],c2[6592])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[78]=simde_mm256_xor_si256(c2[1323],simde_mm256_xor_si256(c2[1587],simde_mm256_xor_si256(c2[8185],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[6603],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[3445],simde_mm256_xor_si256(c2[2654],simde_mm256_xor_si256(c2[1081],simde_mm256_xor_si256(c2[1345],simde_mm256_xor_si256(c2[8209],simde_mm256_xor_si256(c2[7155],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[1097],simde_mm256_xor_si256(c2[7429],simde_mm256_xor_si256(c2[5057],simde_mm256_xor_si256(c2[4529],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[8235],simde_mm256_xor_si256(c2[1634],simde_mm256_xor_si256(c2[1913],simde_mm256_xor_si256(c2[2177],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[5077],simde_mm256_xor_si256(c2[2712],simde_mm256_xor_si256(c2[2976],simde_mm256_xor_si256(c2[5356],simde_mm256_xor_si256(c2[6941],simde_mm256_xor_si256(c2[8007],simde_mm256_xor_si256(c2[8005],simde_mm256_xor_si256(c2[3256],simde_mm256_xor_si256(c2[2726],simde_mm256_xor_si256(c2[8019],simde_mm256_xor_si256(c2[5116],simde_mm256_xor_si256(c2[3001],simde_mm256_xor_si256(c2[4071],simde_mm256_xor_si256(c2[4335],simde_mm256_xor_si256(c2[1433],simde_mm256_xor_si256(c2[4861],simde_mm256_xor_si256(c2[4874],simde_mm256_xor_si256(c2[5138],simde_mm256_xor_si256(c2[5403],simde_mm256_xor_si256(c2[6195],simde_mm256_xor_si256(c2[8318],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[6998],simde_mm256_xor_si256(c2[1191],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[2524],simde_mm256_xor_si256(c2[7008],simde_mm256_xor_si256(c2[6232],simde_mm256_xor_si256(c2[6496],simde_mm256_xor_si256(c2[7289],simde_mm256_xor_si256(c2[2272],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[3074],simde_mm256_xor_si256(c2[2016],simde_mm256_xor_si256(c2[2556],simde_mm256_xor_si256(c2[2820],simde_mm256_xor_si256(c2[5990],simde_mm256_xor_si256(c2[2293],simde_mm256_xor_si256(c2[7586],simde_mm256_xor_si256(c2[7850],simde_mm256_xor_si256(c2[5740],simde_mm256_xor_si256(c2[987],simde_mm256_xor_si256(c2[1527],simde_mm256_xor_si256(c2[6015],simde_mm256_xor_si256(c2[8390],simde_mm256_xor_si256(c2[1273],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[1817],simde_mm256_xor_si256(c2[2081],simde_mm256_xor_si256(c2[8151],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[7635],simde_mm256_xor_si256(c2[7899],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[7104],simde_mm256_xor_si256(c2[5259],simde_mm256_xor_si256(c2[3424],simde_mm256_xor_si256(c2[3688],simde_mm256_xor_si256(c2[1047],c2[6592])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[84]=_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[6481],_mm256_xor_si256(c2[2560],_mm256_xor_si256(c2[6794],_mm256_xor_si256(c2[7336],c2[3420])))));
+     d2[84]=simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[6481],simde_mm256_xor_si256(c2[2560],simde_mm256_xor_si256(c2[6794],simde_mm256_xor_si256(c2[7336],c2[3420])))));
 
 //row: 15
-     d2[90]=_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[6868],_mm256_xor_si256(c2[7130],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[4489],_mm256_xor_si256(c2[279],_mm256_xor_si256(c2[2128],_mm256_xor_si256(c2[1073],_mm256_xor_si256(c2[1337],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[6892],_mm256_xor_si256(c2[5832],_mm256_xor_si256(c2[8221],_mm256_xor_si256(c2[6112],_mm256_xor_si256(c2[3470],_mm256_xor_si256(c2[3734],_mm256_xor_si256(c2[3749],_mm256_xor_si256(c2[6912],_mm256_xor_si256(c2[53],_mm256_xor_si256(c2[317],_mm256_xor_si256(c2[854],_mm256_xor_si256(c2[7980],_mm256_xor_si256(c2[3760],_mm256_xor_si256(c2[1659],_mm256_xor_si256(c2[4033],_mm256_xor_si256(c2[5354],_mm256_xor_si256(c2[5618],_mm256_xor_si256(c2[6684],_mm256_xor_si256(c2[6688],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[1933],_mm256_xor_si256(c2[6696],_mm256_xor_si256(c2[3793],_mm256_xor_si256(c2[1420],_mm256_xor_si256(c2[1684],_mm256_xor_si256(c2[3012],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[3544],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[4080],_mm256_xor_si256(c2[4608],_mm256_xor_si256(c2[4872],_mm256_xor_si256(c2[4872],_mm256_xor_si256(c2[7265],_mm256_xor_si256(c2[5681],_mm256_xor_si256(c2[8057],_mm256_xor_si256(c2[8321],_mm256_xor_si256(c2[8330],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[5427],_mm256_xor_si256(c2[5691],_mm256_xor_si256(c2[5173],_mm256_xor_si256(c2[5966],_mm256_xor_si256(c2[685],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[8079],_mm256_xor_si256(c2[2019],_mm256_xor_si256(c2[1757],_mm256_xor_si256(c2[435],_mm256_xor_si256(c2[699],_mm256_xor_si256(c2[1503],_mm256_xor_si256(c2[4673],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[6533],_mm256_xor_si256(c2[4417],_mm256_xor_si256(c2[7853],_mm256_xor_si256(c2[8117],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[4692],_mm256_xor_si256(c2[6809],_mm256_xor_si256(c2[7073],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[8139],_mm256_xor_si256(c2[7612],_mm256_xor_si256(c2[7876],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[758],_mm256_xor_si256(c2[6828],_mm256_xor_si256(c2[7358],_mm256_xor_si256(c2[6576],_mm256_xor_si256(c2[7370],_mm256_xor_si256(c2[5523],_mm256_xor_si256(c2[5787],_mm256_xor_si256(c2[2365],_mm256_xor_si256(c2[8177],_mm256_xor_si256(c2[5005],c2[5269]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[90]=simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[6868],simde_mm256_xor_si256(c2[7130],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[4489],simde_mm256_xor_si256(c2[279],simde_mm256_xor_si256(c2[2128],simde_mm256_xor_si256(c2[1073],simde_mm256_xor_si256(c2[1337],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[6892],simde_mm256_xor_si256(c2[5832],simde_mm256_xor_si256(c2[8221],simde_mm256_xor_si256(c2[6112],simde_mm256_xor_si256(c2[3470],simde_mm256_xor_si256(c2[3734],simde_mm256_xor_si256(c2[3749],simde_mm256_xor_si256(c2[6912],simde_mm256_xor_si256(c2[53],simde_mm256_xor_si256(c2[317],simde_mm256_xor_si256(c2[854],simde_mm256_xor_si256(c2[7980],simde_mm256_xor_si256(c2[3760],simde_mm256_xor_si256(c2[1659],simde_mm256_xor_si256(c2[4033],simde_mm256_xor_si256(c2[5354],simde_mm256_xor_si256(c2[5618],simde_mm256_xor_si256(c2[6684],simde_mm256_xor_si256(c2[6688],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[1933],simde_mm256_xor_si256(c2[6696],simde_mm256_xor_si256(c2[3793],simde_mm256_xor_si256(c2[1420],simde_mm256_xor_si256(c2[1684],simde_mm256_xor_si256(c2[3012],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[3544],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[4080],simde_mm256_xor_si256(c2[4608],simde_mm256_xor_si256(c2[4872],simde_mm256_xor_si256(c2[4872],simde_mm256_xor_si256(c2[7265],simde_mm256_xor_si256(c2[5681],simde_mm256_xor_si256(c2[8057],simde_mm256_xor_si256(c2[8321],simde_mm256_xor_si256(c2[8330],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[5427],simde_mm256_xor_si256(c2[5691],simde_mm256_xor_si256(c2[5173],simde_mm256_xor_si256(c2[5966],simde_mm256_xor_si256(c2[685],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[8079],simde_mm256_xor_si256(c2[2019],simde_mm256_xor_si256(c2[1757],simde_mm256_xor_si256(c2[435],simde_mm256_xor_si256(c2[699],simde_mm256_xor_si256(c2[1503],simde_mm256_xor_si256(c2[4673],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[6533],simde_mm256_xor_si256(c2[4417],simde_mm256_xor_si256(c2[7853],simde_mm256_xor_si256(c2[8117],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[4692],simde_mm256_xor_si256(c2[6809],simde_mm256_xor_si256(c2[7073],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[8139],simde_mm256_xor_si256(c2[7612],simde_mm256_xor_si256(c2[7876],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[758],simde_mm256_xor_si256(c2[6828],simde_mm256_xor_si256(c2[7358],simde_mm256_xor_si256(c2[6576],simde_mm256_xor_si256(c2[7370],simde_mm256_xor_si256(c2[5523],simde_mm256_xor_si256(c2[5787],simde_mm256_xor_si256(c2[2365],simde_mm256_xor_si256(c2[8177],simde_mm256_xor_si256(c2[5005],c2[5269]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[96]=_mm256_xor_si256(c2[3173],_mm256_xor_si256(c2[1324],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[3436],_mm256_xor_si256(c2[3182],_mm256_xor_si256(c2[5031],_mm256_xor_si256(c2[4240],_mm256_xor_si256(c2[3444],_mm256_xor_si256(c2[2931],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[2677],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[6637],_mm256_xor_si256(c2[4792],_mm256_xor_si256(c2[6652],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[3220],_mm256_xor_si256(c2[3757],_mm256_xor_si256(c2[2436],_mm256_xor_si256(c2[6663],_mm256_xor_si256(c2[4562],_mm256_xor_si256(c2[6936],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[1140],_mm256_xor_si256(c2[1144],_mm256_xor_si256(c2[4836],_mm256_xor_si256(c2[1152],_mm256_xor_si256(c2[6696],_mm256_xor_si256(c2[4587],_mm256_xor_si256(c2[5921],_mm256_xor_si256(c2[3013],_mm256_xor_si256(c2[6447],_mm256_xor_si256(c2[6724],_mm256_xor_si256(c2[6989],_mm256_xor_si256(c2[7781],_mm256_xor_si256(c2[1721],_mm256_xor_si256(c2[137],_mm256_xor_si256(c2[2777],_mm256_xor_si256(c2[6733],_mm256_xor_si256(c2[2786],_mm256_xor_si256(c2[4104],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[8076],_mm256_xor_si256(c2[422],_mm256_xor_si256(c2[3852],_mm256_xor_si256(c2[4922],_mm256_xor_si256(c2[4660],_mm256_xor_si256(c2[3602],_mm256_xor_si256(c2[4406],_mm256_xor_si256(c2[7576],_mm256_xor_si256(c2[3879],_mm256_xor_si256(c2[989],_mm256_xor_si256(c2[7320],_mm256_xor_si256(c2[2573],_mm256_xor_si256(c2[3113],_mm256_xor_si256(c2[7601],_mm256_xor_si256(c2[1529],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[2595],_mm256_xor_si256(c2[2332],_mm256_xor_si256(c2[3661],_mm256_xor_si256(c2[1284],_mm256_xor_si256(c2[1814],_mm256_xor_si256(c2[1032],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[243],_mm256_xor_si256(c2[507],_mm256_xor_si256(c2[5268],_mm256_xor_si256(c2[2633],c2[8172]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[96]=simde_mm256_xor_si256(c2[3173],simde_mm256_xor_si256(c2[1324],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[3436],simde_mm256_xor_si256(c2[3182],simde_mm256_xor_si256(c2[5031],simde_mm256_xor_si256(c2[4240],simde_mm256_xor_si256(c2[3444],simde_mm256_xor_si256(c2[2931],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[2677],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[6637],simde_mm256_xor_si256(c2[4792],simde_mm256_xor_si256(c2[6652],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[3220],simde_mm256_xor_si256(c2[3757],simde_mm256_xor_si256(c2[2436],simde_mm256_xor_si256(c2[6663],simde_mm256_xor_si256(c2[4562],simde_mm256_xor_si256(c2[6936],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[1140],simde_mm256_xor_si256(c2[1144],simde_mm256_xor_si256(c2[4836],simde_mm256_xor_si256(c2[1152],simde_mm256_xor_si256(c2[6696],simde_mm256_xor_si256(c2[4587],simde_mm256_xor_si256(c2[5921],simde_mm256_xor_si256(c2[3013],simde_mm256_xor_si256(c2[6447],simde_mm256_xor_si256(c2[6724],simde_mm256_xor_si256(c2[6989],simde_mm256_xor_si256(c2[7781],simde_mm256_xor_si256(c2[1721],simde_mm256_xor_si256(c2[137],simde_mm256_xor_si256(c2[2777],simde_mm256_xor_si256(c2[6733],simde_mm256_xor_si256(c2[2786],simde_mm256_xor_si256(c2[4104],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[8076],simde_mm256_xor_si256(c2[422],simde_mm256_xor_si256(c2[3852],simde_mm256_xor_si256(c2[4922],simde_mm256_xor_si256(c2[4660],simde_mm256_xor_si256(c2[3602],simde_mm256_xor_si256(c2[4406],simde_mm256_xor_si256(c2[7576],simde_mm256_xor_si256(c2[3879],simde_mm256_xor_si256(c2[989],simde_mm256_xor_si256(c2[7320],simde_mm256_xor_si256(c2[2573],simde_mm256_xor_si256(c2[3113],simde_mm256_xor_si256(c2[7601],simde_mm256_xor_si256(c2[1529],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[2595],simde_mm256_xor_si256(c2[2332],simde_mm256_xor_si256(c2[3661],simde_mm256_xor_si256(c2[1284],simde_mm256_xor_si256(c2[1814],simde_mm256_xor_si256(c2[1032],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[243],simde_mm256_xor_si256(c2[507],simde_mm256_xor_si256(c2[5268],simde_mm256_xor_si256(c2[2633],c2[8172]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[102]=_mm256_xor_si256(c2[1058],_mm256_xor_si256(c2[4131],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[1793],c2[6329]))));
+     d2[102]=simde_mm256_xor_si256(c2[1058],simde_mm256_xor_si256(c2[4131],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[1793],c2[6329]))));
 
 //row: 18
-     d2[108]=_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[941],_mm256_xor_si256(c2[6494],_mm256_xor_si256(c2[1276],c2[1284]))));
+     d2[108]=simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[941],simde_mm256_xor_si256(c2[6494],simde_mm256_xor_si256(c2[1276],c2[1284]))));
 
 //row: 19
-     d2[114]=_mm256_xor_si256(c2[4492],_mm256_xor_si256(c2[5556],_mm256_xor_si256(c2[6424],_mm256_xor_si256(c2[4849],c2[1440]))));
+     d2[114]=simde_mm256_xor_si256(c2[4492],simde_mm256_xor_si256(c2[5556],simde_mm256_xor_si256(c2[6424],simde_mm256_xor_si256(c2[4849],c2[1440]))));
 
 //row: 20
-     d2[120]=_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[1852],_mm256_xor_si256(c2[2114],_mm256_xor_si256(c2[3964],_mm256_xor_si256(c2[7133],_mm256_xor_si256(c2[3710],_mm256_xor_si256(c2[5559],_mm256_xor_si256(c2[4768],_mm256_xor_si256(c2[3459],_mm256_xor_si256(c2[1876],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[3205],_mm256_xor_si256(c2[1096],_mm256_xor_si256(c2[7165],_mm256_xor_si256(c2[3732],_mm256_xor_si256(c2[7180],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[3748],_mm256_xor_si256(c2[4285],_mm256_xor_si256(c2[2964],_mm256_xor_si256(c2[7191],_mm256_xor_si256(c2[5090],_mm256_xor_si256(c2[7464],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[1668],_mm256_xor_si256(c2[1672],_mm256_xor_si256(c2[5364],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[7224],_mm256_xor_si256(c2[5115],_mm256_xor_si256(c2[6449],_mm256_xor_si256(c2[3541],_mm256_xor_si256(c2[6975],_mm256_xor_si256(c2[2222],_mm256_xor_si256(c2[7252],_mm256_xor_si256(c2[7517],_mm256_xor_si256(c2[8309],_mm256_xor_si256(c2[2249],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[3305],_mm256_xor_si256(c2[5680],_mm256_xor_si256(c2[3314],_mm256_xor_si256(c2[4632],_mm256_xor_si256(c2[675],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[950],_mm256_xor_si256(c2[4380],_mm256_xor_si256(c2[5450],_mm256_xor_si256(c2[5188],_mm256_xor_si256(c2[4130],_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[8104],_mm256_xor_si256(c2[4407],_mm256_xor_si256(c2[1517],_mm256_xor_si256(c2[7848],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[8129],_mm256_xor_si256(c2[2057],_mm256_xor_si256(c2[3651],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[2860],_mm256_xor_si256(c2[4189],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[2342],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[2354],_mm256_xor_si256(c2[771],_mm256_xor_si256(c2[5796],_mm256_xor_si256(c2[3161],c2[253]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[120]=simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[1852],simde_mm256_xor_si256(c2[2114],simde_mm256_xor_si256(c2[3964],simde_mm256_xor_si256(c2[7133],simde_mm256_xor_si256(c2[3710],simde_mm256_xor_si256(c2[5559],simde_mm256_xor_si256(c2[4768],simde_mm256_xor_si256(c2[3459],simde_mm256_xor_si256(c2[1876],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[3205],simde_mm256_xor_si256(c2[1096],simde_mm256_xor_si256(c2[7165],simde_mm256_xor_si256(c2[3732],simde_mm256_xor_si256(c2[7180],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[3748],simde_mm256_xor_si256(c2[4285],simde_mm256_xor_si256(c2[2964],simde_mm256_xor_si256(c2[7191],simde_mm256_xor_si256(c2[5090],simde_mm256_xor_si256(c2[7464],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[1668],simde_mm256_xor_si256(c2[1672],simde_mm256_xor_si256(c2[5364],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[7224],simde_mm256_xor_si256(c2[5115],simde_mm256_xor_si256(c2[6449],simde_mm256_xor_si256(c2[3541],simde_mm256_xor_si256(c2[6975],simde_mm256_xor_si256(c2[2222],simde_mm256_xor_si256(c2[7252],simde_mm256_xor_si256(c2[7517],simde_mm256_xor_si256(c2[8309],simde_mm256_xor_si256(c2[2249],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[3305],simde_mm256_xor_si256(c2[5680],simde_mm256_xor_si256(c2[3314],simde_mm256_xor_si256(c2[4632],simde_mm256_xor_si256(c2[675],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[950],simde_mm256_xor_si256(c2[4380],simde_mm256_xor_si256(c2[5450],simde_mm256_xor_si256(c2[5188],simde_mm256_xor_si256(c2[4130],simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[8104],simde_mm256_xor_si256(c2[4407],simde_mm256_xor_si256(c2[1517],simde_mm256_xor_si256(c2[7848],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[8129],simde_mm256_xor_si256(c2[2057],simde_mm256_xor_si256(c2[3651],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[2860],simde_mm256_xor_si256(c2[4189],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[2342],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[2354],simde_mm256_xor_si256(c2[771],simde_mm256_xor_si256(c2[5796],simde_mm256_xor_si256(c2[3161],c2[253]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[126]=_mm256_xor_si256(c2[3444],_mm256_xor_si256(c2[1647],_mm256_xor_si256(c2[2308],_mm256_xor_si256(c2[5784],c2[519]))));
+     d2[126]=simde_mm256_xor_si256(c2[3444],simde_mm256_xor_si256(c2[1647],simde_mm256_xor_si256(c2[2308],simde_mm256_xor_si256(c2[5784],c2[519]))));
 
 //row: 22
-     d2[132]=_mm256_xor_si256(c2[7920],_mm256_xor_si256(c2[3048],_mm256_xor_si256(c2[2533],c2[6012])));
+     d2[132]=simde_mm256_xor_si256(c2[7920],simde_mm256_xor_si256(c2[3048],simde_mm256_xor_si256(c2[2533],c2[6012])));
 
 //row: 23
-     d2[138]=_mm256_xor_si256(c2[6348],_mm256_xor_si256(c2[6626],_mm256_xor_si256(c2[7777],c2[7344])));
+     d2[138]=simde_mm256_xor_si256(c2[6348],simde_mm256_xor_si256(c2[6626],simde_mm256_xor_si256(c2[7777],c2[7344])));
 
 //row: 24
-     d2[144]=_mm256_xor_si256(c2[7660],_mm256_xor_si256(c2[5811],_mm256_xor_si256(c2[6073],_mm256_xor_si256(c2[7923],_mm256_xor_si256(c2[2643],_mm256_xor_si256(c2[7669],_mm256_xor_si256(c2[1071],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[7418],_mm256_xor_si256(c2[5835],_mm256_xor_si256(c2[4781],_mm256_xor_si256(c2[7164],_mm256_xor_si256(c2[5055],_mm256_xor_si256(c2[2677],_mm256_xor_si256(c2[7960],_mm256_xor_si256(c2[2692],_mm256_xor_si256(c2[5861],_mm256_xor_si256(c2[7707],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[8244],_mm256_xor_si256(c2[6929],_mm256_xor_si256(c2[2703],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[2976],_mm256_xor_si256(c2[4561],_mm256_xor_si256(c2[5633],_mm256_xor_si256(c2[5631],_mm256_xor_si256(c2[876],_mm256_xor_si256(c2[5645],_mm256_xor_si256(c2[2736],_mm256_xor_si256(c2[627],_mm256_xor_si256(c2[1961],_mm256_xor_si256(c2[7500],_mm256_xor_si256(c2[2487],_mm256_xor_si256(c2[2764],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[6208],_mm256_xor_si256(c2[4624],_mm256_xor_si256(c2[7264],_mm256_xor_si256(c2[5152],_mm256_xor_si256(c2[7273],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[4634],_mm256_xor_si256(c2[4116],_mm256_xor_si256(c2[4909],_mm256_xor_si256(c2[8345],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[700],_mm256_xor_si256(c2[8089],_mm256_xor_si256(c2[446],_mm256_xor_si256(c2[3616],_mm256_xor_si256(c2[8366],_mm256_xor_si256(c2[5476],_mm256_xor_si256(c2[3360],_mm256_xor_si256(c2[7060],_mm256_xor_si256(c2[7600],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[6016],_mm256_xor_si256(c2[7610],_mm256_xor_si256(c2[7082],_mm256_xor_si256(c2[6819],_mm256_xor_si256(c2[8148],_mm256_xor_si256(c2[5777],_mm256_xor_si256(c2[6301],_mm256_xor_si256(c2[5525],_mm256_xor_si256(c2[6313],_mm256_xor_si256(c2[4730],_mm256_xor_si256(c2[1308],_mm256_xor_si256(c2[7120],c2[4212]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[144]=simde_mm256_xor_si256(c2[7660],simde_mm256_xor_si256(c2[5811],simde_mm256_xor_si256(c2[6073],simde_mm256_xor_si256(c2[7923],simde_mm256_xor_si256(c2[2643],simde_mm256_xor_si256(c2[7669],simde_mm256_xor_si256(c2[1071],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[7418],simde_mm256_xor_si256(c2[5835],simde_mm256_xor_si256(c2[4781],simde_mm256_xor_si256(c2[7164],simde_mm256_xor_si256(c2[5055],simde_mm256_xor_si256(c2[2677],simde_mm256_xor_si256(c2[7960],simde_mm256_xor_si256(c2[2692],simde_mm256_xor_si256(c2[5861],simde_mm256_xor_si256(c2[7707],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[8244],simde_mm256_xor_si256(c2[6929],simde_mm256_xor_si256(c2[2703],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[2976],simde_mm256_xor_si256(c2[4561],simde_mm256_xor_si256(c2[5633],simde_mm256_xor_si256(c2[5631],simde_mm256_xor_si256(c2[876],simde_mm256_xor_si256(c2[5645],simde_mm256_xor_si256(c2[2736],simde_mm256_xor_si256(c2[627],simde_mm256_xor_si256(c2[1961],simde_mm256_xor_si256(c2[7500],simde_mm256_xor_si256(c2[2487],simde_mm256_xor_si256(c2[2764],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[6208],simde_mm256_xor_si256(c2[4624],simde_mm256_xor_si256(c2[7264],simde_mm256_xor_si256(c2[5152],simde_mm256_xor_si256(c2[7273],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[4634],simde_mm256_xor_si256(c2[4116],simde_mm256_xor_si256(c2[4909],simde_mm256_xor_si256(c2[8345],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[700],simde_mm256_xor_si256(c2[8089],simde_mm256_xor_si256(c2[446],simde_mm256_xor_si256(c2[3616],simde_mm256_xor_si256(c2[8366],simde_mm256_xor_si256(c2[5476],simde_mm256_xor_si256(c2[3360],simde_mm256_xor_si256(c2[7060],simde_mm256_xor_si256(c2[7600],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[6016],simde_mm256_xor_si256(c2[7610],simde_mm256_xor_si256(c2[7082],simde_mm256_xor_si256(c2[6819],simde_mm256_xor_si256(c2[8148],simde_mm256_xor_si256(c2[5777],simde_mm256_xor_si256(c2[6301],simde_mm256_xor_si256(c2[5525],simde_mm256_xor_si256(c2[6313],simde_mm256_xor_si256(c2[4730],simde_mm256_xor_si256(c2[1308],simde_mm256_xor_si256(c2[7120],c2[4212]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 25
-     d2[150]=_mm256_xor_si256(c2[2126],_mm256_xor_si256(c2[4560],_mm256_xor_si256(c2[8273],c2[6507])));
+     d2[150]=simde_mm256_xor_si256(c2[2126],simde_mm256_xor_si256(c2[4560],simde_mm256_xor_si256(c2[8273],c2[6507])));
 
 //row: 26
-     d2[156]=_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[4514],_mm256_xor_si256(c2[3218],c2[2296])));
+     d2[156]=simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[4514],simde_mm256_xor_si256(c2[3218],c2[2296])));
 
 //row: 27
-     d2[162]=_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[600],c2[1419]));
+     d2[162]=simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[600],c2[1419]));
 
 //row: 28
-     d2[168]=_mm256_xor_si256(c2[7920],_mm256_xor_si256(c2[5064],_mm256_xor_si256(c2[5509],c2[5006])));
+     d2[168]=simde_mm256_xor_si256(c2[7920],simde_mm256_xor_si256(c2[5064],simde_mm256_xor_si256(c2[5509],c2[5006])));
 
 //row: 29
-     d2[174]=_mm256_xor_si256(c2[528],_mm256_xor_si256(c2[7132],_mm256_xor_si256(c2[7394],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[797],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[2392],_mm256_xor_si256(c2[1337],_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[7405],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[7156],_mm256_xor_si256(c2[6096],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[6376],_mm256_xor_si256(c2[3734],_mm256_xor_si256(c2[3998],_mm256_xor_si256(c2[4013],_mm256_xor_si256(c2[7176],_mm256_xor_si256(c2[317],_mm256_xor_si256(c2[581],_mm256_xor_si256(c2[1118],_mm256_xor_si256(c2[8244],_mm256_xor_si256(c2[4024],_mm256_xor_si256(c2[1923],_mm256_xor_si256(c2[4297],_mm256_xor_si256(c2[5618],_mm256_xor_si256(c2[5882],_mm256_xor_si256(c2[6948],_mm256_xor_si256(c2[6952],_mm256_xor_si256(c2[1933],_mm256_xor_si256(c2[2197],_mm256_xor_si256(c2[6960],_mm256_xor_si256(c2[4057],_mm256_xor_si256(c2[1684],_mm256_xor_si256(c2[1948],_mm256_xor_si256(c2[3276],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[3808],_mm256_xor_si256(c2[4085],_mm256_xor_si256(c2[4344],_mm256_xor_si256(c2[4872],_mm256_xor_si256(c2[5136],_mm256_xor_si256(c2[7529],_mm256_xor_si256(c2[5945],_mm256_xor_si256(c2[8321],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[1465],_mm256_xor_si256(c2[5691],_mm256_xor_si256(c2[5955],_mm256_xor_si256(c2[5437],_mm256_xor_si256(c2[6230],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[1213],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[699],_mm256_xor_si256(c2[963],_mm256_xor_si256(c2[1488],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[4937],_mm256_xor_si256(c2[1240],_mm256_xor_si256(c2[6797],_mm256_xor_si256(c2[4681],_mm256_xor_si256(c2[8117],_mm256_xor_si256(c2[8381],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[4956],_mm256_xor_si256(c2[7073],_mm256_xor_si256(c2[7337],_mm256_xor_si256(c2[484],_mm256_xor_si256(c2[8403],_mm256_xor_si256(c2[7876],_mm256_xor_si256(c2[8140],_mm256_xor_si256(c2[5236],_mm256_xor_si256(c2[1022],_mm256_xor_si256(c2[7092],_mm256_xor_si256(c2[7622],_mm256_xor_si256(c2[6840],_mm256_xor_si256(c2[7634],_mm256_xor_si256(c2[5787],_mm256_xor_si256(c2[6051],_mm256_xor_si256(c2[2629],_mm256_xor_si256(c2[8441],_mm256_xor_si256(c2[5269],c2[5533]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[174]=simde_mm256_xor_si256(c2[528],simde_mm256_xor_si256(c2[7132],simde_mm256_xor_si256(c2[7394],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[797],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[2392],simde_mm256_xor_si256(c2[1337],simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[7405],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[7156],simde_mm256_xor_si256(c2[6096],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[6376],simde_mm256_xor_si256(c2[3734],simde_mm256_xor_si256(c2[3998],simde_mm256_xor_si256(c2[4013],simde_mm256_xor_si256(c2[7176],simde_mm256_xor_si256(c2[317],simde_mm256_xor_si256(c2[581],simde_mm256_xor_si256(c2[1118],simde_mm256_xor_si256(c2[8244],simde_mm256_xor_si256(c2[4024],simde_mm256_xor_si256(c2[1923],simde_mm256_xor_si256(c2[4297],simde_mm256_xor_si256(c2[5618],simde_mm256_xor_si256(c2[5882],simde_mm256_xor_si256(c2[6948],simde_mm256_xor_si256(c2[6952],simde_mm256_xor_si256(c2[1933],simde_mm256_xor_si256(c2[2197],simde_mm256_xor_si256(c2[6960],simde_mm256_xor_si256(c2[4057],simde_mm256_xor_si256(c2[1684],simde_mm256_xor_si256(c2[1948],simde_mm256_xor_si256(c2[3276],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[3808],simde_mm256_xor_si256(c2[4085],simde_mm256_xor_si256(c2[4344],simde_mm256_xor_si256(c2[4872],simde_mm256_xor_si256(c2[5136],simde_mm256_xor_si256(c2[7529],simde_mm256_xor_si256(c2[5945],simde_mm256_xor_si256(c2[8321],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[1465],simde_mm256_xor_si256(c2[5691],simde_mm256_xor_si256(c2[5955],simde_mm256_xor_si256(c2[5437],simde_mm256_xor_si256(c2[6230],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[1213],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[699],simde_mm256_xor_si256(c2[963],simde_mm256_xor_si256(c2[1488],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[4937],simde_mm256_xor_si256(c2[1240],simde_mm256_xor_si256(c2[6797],simde_mm256_xor_si256(c2[4681],simde_mm256_xor_si256(c2[8117],simde_mm256_xor_si256(c2[8381],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[4956],simde_mm256_xor_si256(c2[7073],simde_mm256_xor_si256(c2[7337],simde_mm256_xor_si256(c2[484],simde_mm256_xor_si256(c2[8403],simde_mm256_xor_si256(c2[7876],simde_mm256_xor_si256(c2[8140],simde_mm256_xor_si256(c2[5236],simde_mm256_xor_si256(c2[1022],simde_mm256_xor_si256(c2[7092],simde_mm256_xor_si256(c2[7622],simde_mm256_xor_si256(c2[6840],simde_mm256_xor_si256(c2[7634],simde_mm256_xor_si256(c2[5787],simde_mm256_xor_si256(c2[6051],simde_mm256_xor_si256(c2[2629],simde_mm256_xor_si256(c2[8441],simde_mm256_xor_si256(c2[5269],c2[5533]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 30
-     d2[180]=_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[1853],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[2115],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[8188],_mm256_xor_si256(c2[3711],_mm256_xor_si256(c2[5296],_mm256_xor_si256(c2[5560],_mm256_xor_si256(c2[4505],_mm256_xor_si256(c2[4769],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[1877],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[817],_mm256_xor_si256(c2[3206],_mm256_xor_si256(c2[1097],_mm256_xor_si256(c2[6902],_mm256_xor_si256(c2[7166],_mm256_xor_si256(c2[7181],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[3485],_mm256_xor_si256(c2[3749],_mm256_xor_si256(c2[4286],_mm256_xor_si256(c2[2965],_mm256_xor_si256(c2[6928],_mm256_xor_si256(c2[7192],_mm256_xor_si256(c2[5091],_mm256_xor_si256(c2[7201],_mm256_xor_si256(c2[7465],_mm256_xor_si256(c2[339],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[1409],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[5101],_mm256_xor_si256(c2[5365],_mm256_xor_si256(c2[1681],_mm256_xor_si256(c2[6961],_mm256_xor_si256(c2[7225],_mm256_xor_si256(c2[4852],_mm256_xor_si256(c2[5116],_mm256_xor_si256(c2[6444],_mm256_xor_si256(c2[3542],_mm256_xor_si256(c2[6712],_mm256_xor_si256(c2[6976],_mm256_xor_si256(c2[7253],_mm256_xor_si256(c2[7248],_mm256_xor_si256(c2[7512],_mm256_xor_si256(c2[8040],_mm256_xor_si256(c2[8304],_mm256_xor_si256(c2[1441],_mm256_xor_si256(c2[2244],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[3036],_mm256_xor_si256(c2[3300],_mm256_xor_si256(c2[3315],_mm256_xor_si256(c2[4633],_mm256_xor_si256(c2[412],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[687],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[4117],_mm256_xor_si256(c2[4381],_mm256_xor_si256(c2[1214],_mm256_xor_si256(c2[5451],_mm256_xor_si256(c2[4925],_mm256_xor_si256(c2[5189],_mm256_xor_si256(c2[3867],_mm256_xor_si256(c2[4131],_mm256_xor_si256(c2[4935],_mm256_xor_si256(c2[8105],_mm256_xor_si256(c2[4144],_mm256_xor_si256(c2[4408],_mm256_xor_si256(c2[1512],_mm256_xor_si256(c2[7849],_mm256_xor_si256(c2[2832],_mm256_xor_si256(c2[3096],_mm256_xor_si256(c2[3636],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[8124],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[2052],_mm256_xor_si256(c2[3652],_mm256_xor_si256(c2[2860],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[4190],_mm256_xor_si256(c2[1813],_mm256_xor_si256(c2[2079],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[1561],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[2355],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[5797],_mm256_xor_si256(c2[3156],_mm256_xor_si256(c2[8437],c2[254])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[180]=simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[1853],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[2115],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[8188],simde_mm256_xor_si256(c2[3711],simde_mm256_xor_si256(c2[5296],simde_mm256_xor_si256(c2[5560],simde_mm256_xor_si256(c2[4505],simde_mm256_xor_si256(c2[4769],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[1877],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[817],simde_mm256_xor_si256(c2[3206],simde_mm256_xor_si256(c2[1097],simde_mm256_xor_si256(c2[6902],simde_mm256_xor_si256(c2[7166],simde_mm256_xor_si256(c2[7181],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[3485],simde_mm256_xor_si256(c2[3749],simde_mm256_xor_si256(c2[4286],simde_mm256_xor_si256(c2[2965],simde_mm256_xor_si256(c2[6928],simde_mm256_xor_si256(c2[7192],simde_mm256_xor_si256(c2[5091],simde_mm256_xor_si256(c2[7201],simde_mm256_xor_si256(c2[7465],simde_mm256_xor_si256(c2[339],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[1409],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[5101],simde_mm256_xor_si256(c2[5365],simde_mm256_xor_si256(c2[1681],simde_mm256_xor_si256(c2[6961],simde_mm256_xor_si256(c2[7225],simde_mm256_xor_si256(c2[4852],simde_mm256_xor_si256(c2[5116],simde_mm256_xor_si256(c2[6444],simde_mm256_xor_si256(c2[3542],simde_mm256_xor_si256(c2[6712],simde_mm256_xor_si256(c2[6976],simde_mm256_xor_si256(c2[7253],simde_mm256_xor_si256(c2[7248],simde_mm256_xor_si256(c2[7512],simde_mm256_xor_si256(c2[8040],simde_mm256_xor_si256(c2[8304],simde_mm256_xor_si256(c2[1441],simde_mm256_xor_si256(c2[2244],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[3036],simde_mm256_xor_si256(c2[3300],simde_mm256_xor_si256(c2[3315],simde_mm256_xor_si256(c2[4633],simde_mm256_xor_si256(c2[412],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[687],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[4117],simde_mm256_xor_si256(c2[4381],simde_mm256_xor_si256(c2[1214],simde_mm256_xor_si256(c2[5451],simde_mm256_xor_si256(c2[4925],simde_mm256_xor_si256(c2[5189],simde_mm256_xor_si256(c2[3867],simde_mm256_xor_si256(c2[4131],simde_mm256_xor_si256(c2[4935],simde_mm256_xor_si256(c2[8105],simde_mm256_xor_si256(c2[4144],simde_mm256_xor_si256(c2[4408],simde_mm256_xor_si256(c2[1512],simde_mm256_xor_si256(c2[7849],simde_mm256_xor_si256(c2[2832],simde_mm256_xor_si256(c2[3096],simde_mm256_xor_si256(c2[3636],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[8124],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[2052],simde_mm256_xor_si256(c2[3652],simde_mm256_xor_si256(c2[2860],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[4190],simde_mm256_xor_si256(c2[1813],simde_mm256_xor_si256(c2[2079],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[1561],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[2355],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[5797],simde_mm256_xor_si256(c2[3156],simde_mm256_xor_si256(c2[8437],c2[254])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 31
-     d2[186]=_mm256_xor_si256(c2[5549],_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[3436],_mm256_xor_si256(c2[3962],_mm256_xor_si256(c2[3698],_mm256_xor_si256(c2[5812],_mm256_xor_si256(c2[5284],_mm256_xor_si256(c2[5548],_mm256_xor_si256(c2[5558],_mm256_xor_si256(c2[5294],_mm256_xor_si256(c2[7407],_mm256_xor_si256(c2[7143],_mm256_xor_si256(c2[6616],_mm256_xor_si256(c2[6088],_mm256_xor_si256(c2[6352],_mm256_xor_si256(c2[1071],_mm256_xor_si256(c2[5307],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[5053],_mm256_xor_si256(c2[4789],_mm256_xor_si256(c2[2944],_mm256_xor_si256(c2[2680],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[581],_mm256_xor_si256(c2[317],_mm256_xor_si256(c2[3744],_mm256_xor_si256(c2[3480],_mm256_xor_si256(c2[5596],_mm256_xor_si256(c2[5068],_mm256_xor_si256(c2[5332],_mm256_xor_si256(c2[6133],_mm256_xor_si256(c2[5869],_mm256_xor_si256(c2[4812],_mm256_xor_si256(c2[4548],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[6938],_mm256_xor_si256(c2[6674],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[1922],_mm256_xor_si256(c2[2186],_mm256_xor_si256(c2[3516],_mm256_xor_si256(c2[3252],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[3256],_mm256_xor_si256(c2[7212],_mm256_xor_si256(c2[6684],_mm256_xor_si256(c2[6948],_mm256_xor_si256(c2[6156],_mm256_xor_si256(c2[3528],_mm256_xor_si256(c2[3264],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[6963],_mm256_xor_si256(c2[6435],_mm256_xor_si256(c2[6699],_mm256_xor_si256(c2[8297],_mm256_xor_si256(c2[8033],_mm256_xor_si256(c2[5389],_mm256_xor_si256(c2[5125],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[653],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[912],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[1704],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[4097],_mm256_xor_si256(c2[3833],_mm256_xor_si256(c2[2513],_mm256_xor_si256(c2[2249],_mm256_xor_si256(c2[5153],_mm256_xor_si256(c2[4625],_mm256_xor_si256(c2[4889],_mm256_xor_si256(c2[5162],_mm256_xor_si256(c2[4898],_mm256_xor_si256(c2[6480],_mm256_xor_si256(c2[6216],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[1995],_mm256_xor_si256(c2[2259],_mm256_xor_si256(c2[2005],_mm256_xor_si256(c2[1741],_mm256_xor_si256(c2[2798],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[6228],_mm256_xor_si256(c2[5700],_mm256_xor_si256(c2[5964],_mm256_xor_si256(c2[7298],_mm256_xor_si256(c2[7034],_mm256_xor_si256(c2[7036],_mm256_xor_si256(c2[6772],_mm256_xor_si256(c2[5978],_mm256_xor_si256(c2[5450],_mm256_xor_si256(c2[5714],_mm256_xor_si256(c2[6782],_mm256_xor_si256(c2[6518],_mm256_xor_si256(c2[1505],_mm256_xor_si256(c2[1241],_mm256_xor_si256(c2[6255],_mm256_xor_si256(c2[5991],_mm256_xor_si256(c2[3365],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[4949],_mm256_xor_si256(c2[4421],_mm256_xor_si256(c2[4685],_mm256_xor_si256(c2[5489],_mm256_xor_si256(c2[5225],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[3905],_mm256_xor_si256(c2[3377],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[5499],_mm256_xor_si256(c2[5235],_mm256_xor_si256(c2[4971],_mm256_xor_si256(c2[4707],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[4180],_mm256_xor_si256(c2[4444],_mm256_xor_si256(c2[6037],_mm256_xor_si256(c2[5773],_mm256_xor_si256(c2[3660],_mm256_xor_si256(c2[3396],_mm256_xor_si256(c2[4190],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[3408],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[4202],_mm256_xor_si256(c2[3938],_mm256_xor_si256(c2[2619],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[2355],_mm256_xor_si256(c2[7644],_mm256_xor_si256(c2[7380],_mm256_xor_si256(c2[5009],_mm256_xor_si256(c2[4745],_mm256_xor_si256(c2[2101],_mm256_xor_si256(c2[1573],c2[1837]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[186]=simde_mm256_xor_si256(c2[5549],simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[3436],simde_mm256_xor_si256(c2[3962],simde_mm256_xor_si256(c2[3698],simde_mm256_xor_si256(c2[5812],simde_mm256_xor_si256(c2[5284],simde_mm256_xor_si256(c2[5548],simde_mm256_xor_si256(c2[5558],simde_mm256_xor_si256(c2[5294],simde_mm256_xor_si256(c2[7407],simde_mm256_xor_si256(c2[7143],simde_mm256_xor_si256(c2[6616],simde_mm256_xor_si256(c2[6088],simde_mm256_xor_si256(c2[6352],simde_mm256_xor_si256(c2[1071],simde_mm256_xor_si256(c2[5307],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[5053],simde_mm256_xor_si256(c2[4789],simde_mm256_xor_si256(c2[2944],simde_mm256_xor_si256(c2[2680],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[581],simde_mm256_xor_si256(c2[317],simde_mm256_xor_si256(c2[3744],simde_mm256_xor_si256(c2[3480],simde_mm256_xor_si256(c2[5596],simde_mm256_xor_si256(c2[5068],simde_mm256_xor_si256(c2[5332],simde_mm256_xor_si256(c2[6133],simde_mm256_xor_si256(c2[5869],simde_mm256_xor_si256(c2[4812],simde_mm256_xor_si256(c2[4548],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[6938],simde_mm256_xor_si256(c2[6674],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[1922],simde_mm256_xor_si256(c2[2186],simde_mm256_xor_si256(c2[3516],simde_mm256_xor_si256(c2[3252],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[3256],simde_mm256_xor_si256(c2[7212],simde_mm256_xor_si256(c2[6684],simde_mm256_xor_si256(c2[6948],simde_mm256_xor_si256(c2[6156],simde_mm256_xor_si256(c2[3528],simde_mm256_xor_si256(c2[3264],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[6963],simde_mm256_xor_si256(c2[6435],simde_mm256_xor_si256(c2[6699],simde_mm256_xor_si256(c2[8297],simde_mm256_xor_si256(c2[8033],simde_mm256_xor_si256(c2[5389],simde_mm256_xor_si256(c2[5125],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[653],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[912],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[1704],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[4097],simde_mm256_xor_si256(c2[3833],simde_mm256_xor_si256(c2[2513],simde_mm256_xor_si256(c2[2249],simde_mm256_xor_si256(c2[5153],simde_mm256_xor_si256(c2[4625],simde_mm256_xor_si256(c2[4889],simde_mm256_xor_si256(c2[5162],simde_mm256_xor_si256(c2[4898],simde_mm256_xor_si256(c2[6480],simde_mm256_xor_si256(c2[6216],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[1995],simde_mm256_xor_si256(c2[2259],simde_mm256_xor_si256(c2[2005],simde_mm256_xor_si256(c2[1741],simde_mm256_xor_si256(c2[2798],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[6228],simde_mm256_xor_si256(c2[5700],simde_mm256_xor_si256(c2[5964],simde_mm256_xor_si256(c2[7298],simde_mm256_xor_si256(c2[7034],simde_mm256_xor_si256(c2[7036],simde_mm256_xor_si256(c2[6772],simde_mm256_xor_si256(c2[5978],simde_mm256_xor_si256(c2[5450],simde_mm256_xor_si256(c2[5714],simde_mm256_xor_si256(c2[6782],simde_mm256_xor_si256(c2[6518],simde_mm256_xor_si256(c2[1505],simde_mm256_xor_si256(c2[1241],simde_mm256_xor_si256(c2[6255],simde_mm256_xor_si256(c2[5991],simde_mm256_xor_si256(c2[3365],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[4949],simde_mm256_xor_si256(c2[4421],simde_mm256_xor_si256(c2[4685],simde_mm256_xor_si256(c2[5489],simde_mm256_xor_si256(c2[5225],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[3905],simde_mm256_xor_si256(c2[3377],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[5499],simde_mm256_xor_si256(c2[5235],simde_mm256_xor_si256(c2[4971],simde_mm256_xor_si256(c2[4707],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[4180],simde_mm256_xor_si256(c2[4444],simde_mm256_xor_si256(c2[6037],simde_mm256_xor_si256(c2[5773],simde_mm256_xor_si256(c2[3660],simde_mm256_xor_si256(c2[3396],simde_mm256_xor_si256(c2[4190],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[3408],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[4202],simde_mm256_xor_si256(c2[3938],simde_mm256_xor_si256(c2[2619],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[2355],simde_mm256_xor_si256(c2[7644],simde_mm256_xor_si256(c2[7380],simde_mm256_xor_si256(c2[5009],simde_mm256_xor_si256(c2[4745],simde_mm256_xor_si256(c2[2101],simde_mm256_xor_si256(c2[1573],c2[1837]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[192]=_mm256_xor_si256(c2[5016],_mm256_xor_si256(c2[3173],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[3435],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[1587],_mm256_xor_si256(c2[5031],_mm256_xor_si256(c2[6616],_mm256_xor_si256(c2[6880],_mm256_xor_si256(c2[5825],_mm256_xor_si256(c2[6089],_mm256_xor_si256(c2[4780],_mm256_xor_si256(c2[3197],_mm256_xor_si256(c2[1873],_mm256_xor_si256(c2[2137],_mm256_xor_si256(c2[4526],_mm256_xor_si256(c2[2417],_mm256_xor_si256(c2[8222],_mm256_xor_si256(c2[39],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[3217],_mm256_xor_si256(c2[4805],_mm256_xor_si256(c2[5069],_mm256_xor_si256(c2[5606],_mm256_xor_si256(c2[4285],_mm256_xor_si256(c2[8248],_mm256_xor_si256(c2[65],_mm256_xor_si256(c2[6411],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[1659],_mm256_xor_si256(c2[1923],_mm256_xor_si256(c2[2989],_mm256_xor_si256(c2[2729],_mm256_xor_si256(c2[2993],_mm256_xor_si256(c2[6421],_mm256_xor_si256(c2[6685],_mm256_xor_si256(c2[3001],_mm256_xor_si256(c2[8281],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[6172],_mm256_xor_si256(c2[6436],_mm256_xor_si256(c2[7764],_mm256_xor_si256(c2[4862],_mm256_xor_si256(c2[8032],_mm256_xor_si256(c2[8296],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[385],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[1177],_mm256_xor_si256(c2[3564],_mm256_xor_si256(c2[1980],_mm256_xor_si256(c2[4356],_mm256_xor_si256(c2[4620],_mm256_xor_si256(c2[4635],_mm256_xor_si256(c2[5953],_mm256_xor_si256(c2[1732],_mm256_xor_si256(c2[1996],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[2007],_mm256_xor_si256(c2[2271],_mm256_xor_si256(c2[5437],_mm256_xor_si256(c2[5701],_mm256_xor_si256(c2[6771],_mm256_xor_si256(c2[6245],_mm256_xor_si256(c2[6509],_mm256_xor_si256(c2[5187],_mm256_xor_si256(c2[5451],_mm256_xor_si256(c2[4133],_mm256_xor_si256(c2[6255],_mm256_xor_si256(c2[972],_mm256_xor_si256(c2[5464],_mm256_xor_si256(c2[5728],_mm256_xor_si256(c2[2832],_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[4152],_mm256_xor_si256(c2[4416],_mm256_xor_si256(c2[4956],_mm256_xor_si256(c2[733],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[3108],_mm256_xor_si256(c2[3372],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[4180],_mm256_xor_si256(c2[4444],_mm256_xor_si256(c2[3917],_mm256_xor_si256(c2[4181],_mm256_xor_si256(c2[5510],_mm256_xor_si256(c2[3133],_mm256_xor_si256(c2[3399],_mm256_xor_si256(c2[3663],_mm256_xor_si256(c2[2881],_mm256_xor_si256(c2[3411],_mm256_xor_si256(c2[3675],_mm256_xor_si256(c2[1828],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[7117],_mm256_xor_si256(c2[4476],_mm256_xor_si256(c2[1310],c2[1574])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[192]=simde_mm256_xor_si256(c2[5016],simde_mm256_xor_si256(c2[3173],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[3435],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[1587],simde_mm256_xor_si256(c2[5031],simde_mm256_xor_si256(c2[6616],simde_mm256_xor_si256(c2[6880],simde_mm256_xor_si256(c2[5825],simde_mm256_xor_si256(c2[6089],simde_mm256_xor_si256(c2[4780],simde_mm256_xor_si256(c2[3197],simde_mm256_xor_si256(c2[1873],simde_mm256_xor_si256(c2[2137],simde_mm256_xor_si256(c2[4526],simde_mm256_xor_si256(c2[2417],simde_mm256_xor_si256(c2[8222],simde_mm256_xor_si256(c2[39],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[3217],simde_mm256_xor_si256(c2[4805],simde_mm256_xor_si256(c2[5069],simde_mm256_xor_si256(c2[5606],simde_mm256_xor_si256(c2[4285],simde_mm256_xor_si256(c2[8248],simde_mm256_xor_si256(c2[65],simde_mm256_xor_si256(c2[6411],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[1659],simde_mm256_xor_si256(c2[1923],simde_mm256_xor_si256(c2[2989],simde_mm256_xor_si256(c2[2729],simde_mm256_xor_si256(c2[2993],simde_mm256_xor_si256(c2[6421],simde_mm256_xor_si256(c2[6685],simde_mm256_xor_si256(c2[3001],simde_mm256_xor_si256(c2[8281],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[6172],simde_mm256_xor_si256(c2[6436],simde_mm256_xor_si256(c2[7764],simde_mm256_xor_si256(c2[4862],simde_mm256_xor_si256(c2[8032],simde_mm256_xor_si256(c2[8296],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[385],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[1177],simde_mm256_xor_si256(c2[3564],simde_mm256_xor_si256(c2[1980],simde_mm256_xor_si256(c2[4356],simde_mm256_xor_si256(c2[4620],simde_mm256_xor_si256(c2[4635],simde_mm256_xor_si256(c2[5953],simde_mm256_xor_si256(c2[1732],simde_mm256_xor_si256(c2[1996],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[2007],simde_mm256_xor_si256(c2[2271],simde_mm256_xor_si256(c2[5437],simde_mm256_xor_si256(c2[5701],simde_mm256_xor_si256(c2[6771],simde_mm256_xor_si256(c2[6245],simde_mm256_xor_si256(c2[6509],simde_mm256_xor_si256(c2[5187],simde_mm256_xor_si256(c2[5451],simde_mm256_xor_si256(c2[4133],simde_mm256_xor_si256(c2[6255],simde_mm256_xor_si256(c2[972],simde_mm256_xor_si256(c2[5464],simde_mm256_xor_si256(c2[5728],simde_mm256_xor_si256(c2[2832],simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[4152],simde_mm256_xor_si256(c2[4416],simde_mm256_xor_si256(c2[4956],simde_mm256_xor_si256(c2[733],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[3108],simde_mm256_xor_si256(c2[3372],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[4180],simde_mm256_xor_si256(c2[4444],simde_mm256_xor_si256(c2[3917],simde_mm256_xor_si256(c2[4181],simde_mm256_xor_si256(c2[5510],simde_mm256_xor_si256(c2[3133],simde_mm256_xor_si256(c2[3399],simde_mm256_xor_si256(c2[3663],simde_mm256_xor_si256(c2[2881],simde_mm256_xor_si256(c2[3411],simde_mm256_xor_si256(c2[3675],simde_mm256_xor_si256(c2[1828],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[7117],simde_mm256_xor_si256(c2[4476],simde_mm256_xor_si256(c2[1310],c2[1574])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[198]=_mm256_xor_si256(c2[808],_mm256_xor_si256(c2[2136],_mm256_xor_si256(c2[2513],c2[2631])));
+     d2[198]=simde_mm256_xor_si256(c2[808],simde_mm256_xor_si256(c2[2136],simde_mm256_xor_si256(c2[2513],c2[2631])));
 
 //row: 34
-     d2[204]=_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[5368],_mm256_xor_si256(c2[2820],c2[6543])));
+     d2[204]=simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[5368],simde_mm256_xor_si256(c2[2820],c2[6543])));
 
 //row: 35
-     d2[210]=_mm256_xor_si256(c2[7661],_mm256_xor_si256(c2[5812],_mm256_xor_si256(c2[6074],_mm256_xor_si256(c2[7924],_mm256_xor_si256(c2[7670],_mm256_xor_si256(c2[1072],_mm256_xor_si256(c2[281],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[7419],_mm256_xor_si256(c2[5836],_mm256_xor_si256(c2[4776],_mm256_xor_si256(c2[7165],_mm256_xor_si256(c2[5056],_mm256_xor_si256(c2[2678],_mm256_xor_si256(c2[2693],_mm256_xor_si256(c2[5856],_mm256_xor_si256(c2[7708],_mm256_xor_si256(c2[8245],_mm256_xor_si256(c2[6924],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[4562],_mm256_xor_si256(c2[4036],_mm256_xor_si256(c2[5628],_mm256_xor_si256(c2[5632],_mm256_xor_si256(c2[877],_mm256_xor_si256(c2[5640],_mm256_xor_si256(c2[2737],_mm256_xor_si256(c2[628],_mm256_xor_si256(c2[1956],_mm256_xor_si256(c2[7501],_mm256_xor_si256(c2[2488],_mm256_xor_si256(c2[2765],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[3816],_mm256_xor_si256(c2[6209],_mm256_xor_si256(c2[4625],_mm256_xor_si256(c2[7265],_mm256_xor_si256(c2[7274],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[4635],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[4117],_mm256_xor_si256(c2[4910],_mm256_xor_si256(c2[8340],_mm256_xor_si256(c2[963],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[8090],_mm256_xor_si256(c2[447],_mm256_xor_si256(c2[3617],_mm256_xor_si256(c2[8367],_mm256_xor_si256(c2[5477],_mm256_xor_si256(c2[3361],_mm256_xor_si256(c2[7061],_mm256_xor_si256(c2[7601],_mm256_xor_si256(c2[3636],_mm256_xor_si256(c2[6017],_mm256_xor_si256(c2[7611],_mm256_xor_si256(c2[7083],_mm256_xor_si256(c2[6820],_mm256_xor_si256(c2[8149],_mm256_xor_si256(c2[5772],_mm256_xor_si256(c2[6302],_mm256_xor_si256(c2[5520],_mm256_xor_si256(c2[6314],_mm256_xor_si256(c2[4731],_mm256_xor_si256(c2[1309],_mm256_xor_si256(c2[7121],c2[4213])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[210]=simde_mm256_xor_si256(c2[7661],simde_mm256_xor_si256(c2[5812],simde_mm256_xor_si256(c2[6074],simde_mm256_xor_si256(c2[7924],simde_mm256_xor_si256(c2[7670],simde_mm256_xor_si256(c2[1072],simde_mm256_xor_si256(c2[281],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[7419],simde_mm256_xor_si256(c2[5836],simde_mm256_xor_si256(c2[4776],simde_mm256_xor_si256(c2[7165],simde_mm256_xor_si256(c2[5056],simde_mm256_xor_si256(c2[2678],simde_mm256_xor_si256(c2[2693],simde_mm256_xor_si256(c2[5856],simde_mm256_xor_si256(c2[7708],simde_mm256_xor_si256(c2[8245],simde_mm256_xor_si256(c2[6924],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[4562],simde_mm256_xor_si256(c2[4036],simde_mm256_xor_si256(c2[5628],simde_mm256_xor_si256(c2[5632],simde_mm256_xor_si256(c2[877],simde_mm256_xor_si256(c2[5640],simde_mm256_xor_si256(c2[2737],simde_mm256_xor_si256(c2[628],simde_mm256_xor_si256(c2[1956],simde_mm256_xor_si256(c2[7501],simde_mm256_xor_si256(c2[2488],simde_mm256_xor_si256(c2[2765],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[3816],simde_mm256_xor_si256(c2[6209],simde_mm256_xor_si256(c2[4625],simde_mm256_xor_si256(c2[7265],simde_mm256_xor_si256(c2[7274],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[4635],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[4117],simde_mm256_xor_si256(c2[4910],simde_mm256_xor_si256(c2[8340],simde_mm256_xor_si256(c2[963],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[8090],simde_mm256_xor_si256(c2[447],simde_mm256_xor_si256(c2[3617],simde_mm256_xor_si256(c2[8367],simde_mm256_xor_si256(c2[5477],simde_mm256_xor_si256(c2[3361],simde_mm256_xor_si256(c2[7061],simde_mm256_xor_si256(c2[7601],simde_mm256_xor_si256(c2[3636],simde_mm256_xor_si256(c2[6017],simde_mm256_xor_si256(c2[7611],simde_mm256_xor_si256(c2[7083],simde_mm256_xor_si256(c2[6820],simde_mm256_xor_si256(c2[8149],simde_mm256_xor_si256(c2[5772],simde_mm256_xor_si256(c2[6302],simde_mm256_xor_si256(c2[5520],simde_mm256_xor_si256(c2[6314],simde_mm256_xor_si256(c2[4731],simde_mm256_xor_si256(c2[1309],simde_mm256_xor_si256(c2[7121],c2[4213])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[216]=_mm256_xor_si256(c2[4752],_mm256_xor_si256(c2[6771],_mm256_xor_si256(c2[2559],c2[5760])));
+     d2[216]=simde_mm256_xor_si256(c2[4752],simde_mm256_xor_si256(c2[6771],simde_mm256_xor_si256(c2[2559],c2[5760])));
 
 //row: 37
-     d2[222]=_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[2112],_mm256_xor_si256(c2[1600],_mm256_xor_si256(c2[1864],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[2916],_mm256_xor_si256(c2[3446],_mm256_xor_si256(c2[1349],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[7417],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[1359],_mm256_xor_si256(c2[7697],_mm256_xor_si256(c2[5319],_mm256_xor_si256(c2[5328],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[2175],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[1118],_mm256_xor_si256(c2[5345],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[3244],_mm256_xor_si256(c2[5618],_mm256_xor_si256(c2[7203],_mm256_xor_si256(c2[8269],_mm256_xor_si256(c2[8273],_mm256_xor_si256(c2[3518],_mm256_xor_si256(c2[8281],_mm256_xor_si256(c2[5378],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[4333],_mm256_xor_si256(c2[4597],_mm256_xor_si256(c2[1695],_mm256_xor_si256(c2[5129],_mm256_xor_si256(c2[5136],_mm256_xor_si256(c2[5400],_mm256_xor_si256(c2[5665],_mm256_xor_si256(c2[6457],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[7260],_mm256_xor_si256(c2[1453],_mm256_xor_si256(c2[1204],_mm256_xor_si256(c2[1468],_mm256_xor_si256(c2[2786],_mm256_xor_si256(c2[7276],_mm256_xor_si256(c2[6494],_mm256_xor_si256(c2[6758],_mm256_xor_si256(c2[7551],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[4910],_mm256_xor_si256(c2[3604],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[2284],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[3088],_mm256_xor_si256(c2[6252],_mm256_xor_si256(c2[2561],_mm256_xor_si256(c2[7848],_mm256_xor_si256(c2[8112],_mm256_xor_si256(c2[6002],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[1789],_mm256_xor_si256(c2[6277],_mm256_xor_si256(c2[205],_mm256_xor_si256(c2[1541],_mm256_xor_si256(c2[1805],_mm256_xor_si256(c2[1277],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[2079],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[8413],_mm256_xor_si256(c2[496],_mm256_xor_si256(c2[7897],_mm256_xor_si256(c2[8161],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[7372],_mm256_xor_si256(c2[3686],_mm256_xor_si256(c2[3950],_mm256_xor_si256(c2[1309],c2[6854])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[222]=simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[2112],simde_mm256_xor_si256(c2[1600],simde_mm256_xor_si256(c2[1864],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[2916],simde_mm256_xor_si256(c2[3446],simde_mm256_xor_si256(c2[1349],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[7417],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[1359],simde_mm256_xor_si256(c2[7697],simde_mm256_xor_si256(c2[5319],simde_mm256_xor_si256(c2[5328],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[2175],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[1118],simde_mm256_xor_si256(c2[5345],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[3244],simde_mm256_xor_si256(c2[5618],simde_mm256_xor_si256(c2[7203],simde_mm256_xor_si256(c2[8269],simde_mm256_xor_si256(c2[8273],simde_mm256_xor_si256(c2[3518],simde_mm256_xor_si256(c2[8281],simde_mm256_xor_si256(c2[5378],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[4333],simde_mm256_xor_si256(c2[4597],simde_mm256_xor_si256(c2[1695],simde_mm256_xor_si256(c2[5129],simde_mm256_xor_si256(c2[5136],simde_mm256_xor_si256(c2[5400],simde_mm256_xor_si256(c2[5665],simde_mm256_xor_si256(c2[6457],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[7260],simde_mm256_xor_si256(c2[1453],simde_mm256_xor_si256(c2[1204],simde_mm256_xor_si256(c2[1468],simde_mm256_xor_si256(c2[2786],simde_mm256_xor_si256(c2[7276],simde_mm256_xor_si256(c2[6494],simde_mm256_xor_si256(c2[6758],simde_mm256_xor_si256(c2[7551],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[4910],simde_mm256_xor_si256(c2[3604],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[2284],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[3088],simde_mm256_xor_si256(c2[6252],simde_mm256_xor_si256(c2[2561],simde_mm256_xor_si256(c2[7848],simde_mm256_xor_si256(c2[8112],simde_mm256_xor_si256(c2[6002],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[1789],simde_mm256_xor_si256(c2[6277],simde_mm256_xor_si256(c2[205],simde_mm256_xor_si256(c2[1541],simde_mm256_xor_si256(c2[1805],simde_mm256_xor_si256(c2[1277],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[2079],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[8413],simde_mm256_xor_si256(c2[496],simde_mm256_xor_si256(c2[7897],simde_mm256_xor_si256(c2[8161],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[7372],simde_mm256_xor_si256(c2[3686],simde_mm256_xor_si256(c2[3950],simde_mm256_xor_si256(c2[1309],c2[6854])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[228]=_mm256_xor_si256(c2[6605],_mm256_xor_si256(c2[4601],_mm256_xor_si256(c2[387],c2[5952])));
+     d2[228]=simde_mm256_xor_si256(c2[6605],simde_mm256_xor_si256(c2[4601],simde_mm256_xor_si256(c2[387],c2[5952])));
 
 //row: 39
-     d2[234]=_mm256_xor_si256(c2[542],_mm256_xor_si256(c2[7694],_mm256_xor_si256(c2[6952],c2[2607])));
+     d2[234]=simde_mm256_xor_si256(c2[542],simde_mm256_xor_si256(c2[7694],simde_mm256_xor_si256(c2[6952],c2[2607])));
 
 //row: 40
-     d2[240]=_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[1417],c2[6543]));
+     d2[240]=simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[1417],c2[6543]));
 
 //row: 41
-     d2[246]=_mm256_xor_si256(c2[5293],_mm256_xor_si256(c2[6903],_mm256_xor_si256(c2[3016],c2[219])));
+     d2[246]=simde_mm256_xor_si256(c2[5293],simde_mm256_xor_si256(c2[6903],simde_mm256_xor_si256(c2[3016],c2[219])));
 
 //row: 42
-     d2[252]=_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[2113],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[3963],_mm256_xor_si256(c2[4491],_mm256_xor_si256(c2[3709],_mm256_xor_si256(c2[5294],_mm256_xor_si256(c2[5558],_mm256_xor_si256(c2[4503],_mm256_xor_si256(c2[4767],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[1875],_mm256_xor_si256(c2[557],_mm256_xor_si256(c2[821],_mm256_xor_si256(c2[3204],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[6900],_mm256_xor_si256(c2[7164],_mm256_xor_si256(c2[7179],_mm256_xor_si256(c2[1637],_mm256_xor_si256(c2[1901],_mm256_xor_si256(c2[3483],_mm256_xor_si256(c2[3747],_mm256_xor_si256(c2[3744],_mm256_xor_si256(c2[4284],_mm256_xor_si256(c2[2969],_mm256_xor_si256(c2[6926],_mm256_xor_si256(c2[7190],_mm256_xor_si256(c2[5089],_mm256_xor_si256(c2[7205],_mm256_xor_si256(c2[7469],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[1407],_mm256_xor_si256(c2[1671],_mm256_xor_si256(c2[5105],_mm256_xor_si256(c2[5369],_mm256_xor_si256(c2[1685],_mm256_xor_si256(c2[6965],_mm256_xor_si256(c2[7229],_mm256_xor_si256(c2[4850],_mm256_xor_si256(c2[5114],_mm256_xor_si256(c2[6448],_mm256_xor_si256(c2[3540],_mm256_xor_si256(c2[6710],_mm256_xor_si256(c2[6974],_mm256_xor_si256(c2[7251],_mm256_xor_si256(c2[7252],_mm256_xor_si256(c2[7516],_mm256_xor_si256(c2[8044],_mm256_xor_si256(c2[8308],_mm256_xor_si256(c2[2248],_mm256_xor_si256(c2[664],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[3304],_mm256_xor_si256(c2[3313],_mm256_xor_si256(c2[4637],_mm256_xor_si256(c2[410],_mm256_xor_si256(c2[674],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[685],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[4121],_mm256_xor_si256(c2[4385],_mm256_xor_si256(c2[5449],_mm256_xor_si256(c2[4923],_mm256_xor_si256(c2[5187],_mm256_xor_si256(c2[3865],_mm256_xor_si256(c2[4129],_mm256_xor_si256(c2[4933],_mm256_xor_si256(c2[8103],_mm256_xor_si256(c2[4142],_mm256_xor_si256(c2[4406],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[7853],_mm256_xor_si256(c2[2836],_mm256_xor_si256(c2[3100],_mm256_xor_si256(c2[3640],_mm256_xor_si256(c2[7864],_mm256_xor_si256(c2[8128],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[2056],_mm256_xor_si256(c2[3650],_mm256_xor_si256(c2[2858],_mm256_xor_si256(c2[3122],_mm256_xor_si256(c2[2595],_mm256_xor_si256(c2[2859],_mm256_xor_si256(c2[4188],_mm256_xor_si256(c2[1817],_mm256_xor_si256(c2[2077],_mm256_xor_si256(c2[2341],_mm256_xor_si256(c2[1565],_mm256_xor_si256(c2[2089],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[5801],_mm256_xor_si256(c2[3160],_mm256_xor_si256(c2[8441],c2[252]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[252]=simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[2113],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[3963],simde_mm256_xor_si256(c2[4491],simde_mm256_xor_si256(c2[3709],simde_mm256_xor_si256(c2[5294],simde_mm256_xor_si256(c2[5558],simde_mm256_xor_si256(c2[4503],simde_mm256_xor_si256(c2[4767],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[1875],simde_mm256_xor_si256(c2[557],simde_mm256_xor_si256(c2[821],simde_mm256_xor_si256(c2[3204],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[6900],simde_mm256_xor_si256(c2[7164],simde_mm256_xor_si256(c2[7179],simde_mm256_xor_si256(c2[1637],simde_mm256_xor_si256(c2[1901],simde_mm256_xor_si256(c2[3483],simde_mm256_xor_si256(c2[3747],simde_mm256_xor_si256(c2[3744],simde_mm256_xor_si256(c2[4284],simde_mm256_xor_si256(c2[2969],simde_mm256_xor_si256(c2[6926],simde_mm256_xor_si256(c2[7190],simde_mm256_xor_si256(c2[5089],simde_mm256_xor_si256(c2[7205],simde_mm256_xor_si256(c2[7469],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[1407],simde_mm256_xor_si256(c2[1671],simde_mm256_xor_si256(c2[5105],simde_mm256_xor_si256(c2[5369],simde_mm256_xor_si256(c2[1685],simde_mm256_xor_si256(c2[6965],simde_mm256_xor_si256(c2[7229],simde_mm256_xor_si256(c2[4850],simde_mm256_xor_si256(c2[5114],simde_mm256_xor_si256(c2[6448],simde_mm256_xor_si256(c2[3540],simde_mm256_xor_si256(c2[6710],simde_mm256_xor_si256(c2[6974],simde_mm256_xor_si256(c2[7251],simde_mm256_xor_si256(c2[7252],simde_mm256_xor_si256(c2[7516],simde_mm256_xor_si256(c2[8044],simde_mm256_xor_si256(c2[8308],simde_mm256_xor_si256(c2[2248],simde_mm256_xor_si256(c2[664],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[3304],simde_mm256_xor_si256(c2[3313],simde_mm256_xor_si256(c2[4637],simde_mm256_xor_si256(c2[410],simde_mm256_xor_si256(c2[674],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[685],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[4121],simde_mm256_xor_si256(c2[4385],simde_mm256_xor_si256(c2[5449],simde_mm256_xor_si256(c2[4923],simde_mm256_xor_si256(c2[5187],simde_mm256_xor_si256(c2[3865],simde_mm256_xor_si256(c2[4129],simde_mm256_xor_si256(c2[4933],simde_mm256_xor_si256(c2[8103],simde_mm256_xor_si256(c2[4142],simde_mm256_xor_si256(c2[4406],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[7853],simde_mm256_xor_si256(c2[2836],simde_mm256_xor_si256(c2[3100],simde_mm256_xor_si256(c2[3640],simde_mm256_xor_si256(c2[7864],simde_mm256_xor_si256(c2[8128],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[2056],simde_mm256_xor_si256(c2[3650],simde_mm256_xor_si256(c2[2858],simde_mm256_xor_si256(c2[3122],simde_mm256_xor_si256(c2[2595],simde_mm256_xor_si256(c2[2859],simde_mm256_xor_si256(c2[4188],simde_mm256_xor_si256(c2[1817],simde_mm256_xor_si256(c2[2077],simde_mm256_xor_si256(c2[2341],simde_mm256_xor_si256(c2[1565],simde_mm256_xor_si256(c2[2089],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[5801],simde_mm256_xor_si256(c2[3160],simde_mm256_xor_si256(c2[8441],c2[252]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 43
-     d2[258]=_mm256_xor_si256(c2[7394],_mm256_xor_si256(c2[5545],_mm256_xor_si256(c2[5813],_mm256_xor_si256(c2[7393],_mm256_xor_si256(c2[7657],_mm256_xor_si256(c2[7409],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[8197],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[4503],_mm256_xor_si256(c2[7152],_mm256_xor_si256(c2[5569],_mm256_xor_si256(c2[4515],_mm256_xor_si256(c2[6904],_mm256_xor_si256(c2[4789],_mm256_xor_si256(c2[2153],_mm256_xor_si256(c2[2417],_mm256_xor_si256(c2[2426],_mm256_xor_si256(c2[5595],_mm256_xor_si256(c2[7177],_mm256_xor_si256(c2[7441],_mm256_xor_si256(c2[7984],_mm256_xor_si256(c2[6663],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[2716],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[4301],_mm256_xor_si256(c2[5367],_mm256_xor_si256(c2[5365],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[5379],_mm256_xor_si256(c2[2476],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[1695],_mm256_xor_si256(c2[7240],_mm256_xor_si256(c2[2221],_mm256_xor_si256(c2[2498],_mm256_xor_si256(c2[2763],_mm256_xor_si256(c2[3291],_mm256_xor_si256(c2[3555],_mm256_xor_si256(c2[5942],_mm256_xor_si256(c2[4358],_mm256_xor_si256(c2[6734],_mm256_xor_si256(c2[6998],_mm256_xor_si256(c2[7013],_mm256_xor_si256(c2[8331],_mm256_xor_si256(c2[4104],_mm256_xor_si256(c2[4368],_mm256_xor_si256(c2[3856],_mm256_xor_si256(c2[4649],_mm256_xor_si256(c2[7815],_mm256_xor_si256(c2[8079],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[7565],_mm256_xor_si256(c2[7829],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[3350],_mm256_xor_si256(c2[8100],_mm256_xor_si256(c2[5210],_mm256_xor_si256(c2[3100],_mm256_xor_si256(c2[6530],_mm256_xor_si256(c2[6794],_mm256_xor_si256(c2[1252],_mm256_xor_si256(c2[7334],_mm256_xor_si256(c2[3375],_mm256_xor_si256(c2[5486],_mm256_xor_si256(c2[5750],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[6816],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[6553],_mm256_xor_si256(c2[4971],_mm256_xor_si256(c2[7888],_mm256_xor_si256(c2[5511],_mm256_xor_si256(c2[6041],_mm256_xor_si256(c2[5259],_mm256_xor_si256(c2[6053],_mm256_xor_si256(c2[4200],_mm256_xor_si256(c2[4464],_mm256_xor_si256(c2[1048],_mm256_xor_si256(c2[6854],_mm256_xor_si256(c2[3688],c2[3952]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[258]=simde_mm256_xor_si256(c2[7394],simde_mm256_xor_si256(c2[5545],simde_mm256_xor_si256(c2[5813],simde_mm256_xor_si256(c2[7393],simde_mm256_xor_si256(c2[7657],simde_mm256_xor_si256(c2[7409],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[8197],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[4503],simde_mm256_xor_si256(c2[7152],simde_mm256_xor_si256(c2[5569],simde_mm256_xor_si256(c2[4515],simde_mm256_xor_si256(c2[6904],simde_mm256_xor_si256(c2[4789],simde_mm256_xor_si256(c2[2153],simde_mm256_xor_si256(c2[2417],simde_mm256_xor_si256(c2[2426],simde_mm256_xor_si256(c2[5595],simde_mm256_xor_si256(c2[7177],simde_mm256_xor_si256(c2[7441],simde_mm256_xor_si256(c2[7984],simde_mm256_xor_si256(c2[6663],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[2716],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[4301],simde_mm256_xor_si256(c2[5367],simde_mm256_xor_si256(c2[5365],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[5379],simde_mm256_xor_si256(c2[2476],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[1695],simde_mm256_xor_si256(c2[7240],simde_mm256_xor_si256(c2[2221],simde_mm256_xor_si256(c2[2498],simde_mm256_xor_si256(c2[2763],simde_mm256_xor_si256(c2[3291],simde_mm256_xor_si256(c2[3555],simde_mm256_xor_si256(c2[5942],simde_mm256_xor_si256(c2[4358],simde_mm256_xor_si256(c2[6734],simde_mm256_xor_si256(c2[6998],simde_mm256_xor_si256(c2[7013],simde_mm256_xor_si256(c2[8331],simde_mm256_xor_si256(c2[4104],simde_mm256_xor_si256(c2[4368],simde_mm256_xor_si256(c2[3856],simde_mm256_xor_si256(c2[4649],simde_mm256_xor_si256(c2[7815],simde_mm256_xor_si256(c2[8079],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[7565],simde_mm256_xor_si256(c2[7829],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[3350],simde_mm256_xor_si256(c2[8100],simde_mm256_xor_si256(c2[5210],simde_mm256_xor_si256(c2[3100],simde_mm256_xor_si256(c2[6530],simde_mm256_xor_si256(c2[6794],simde_mm256_xor_si256(c2[1252],simde_mm256_xor_si256(c2[7334],simde_mm256_xor_si256(c2[3375],simde_mm256_xor_si256(c2[5486],simde_mm256_xor_si256(c2[5750],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[6816],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[6553],simde_mm256_xor_si256(c2[4971],simde_mm256_xor_si256(c2[7888],simde_mm256_xor_si256(c2[5511],simde_mm256_xor_si256(c2[6041],simde_mm256_xor_si256(c2[5259],simde_mm256_xor_si256(c2[6053],simde_mm256_xor_si256(c2[4200],simde_mm256_xor_si256(c2[4464],simde_mm256_xor_si256(c2[1048],simde_mm256_xor_si256(c2[6854],simde_mm256_xor_si256(c2[3688],c2[3952]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 44
-     d2[264]=_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[7924],_mm256_xor_si256(c2[8186],_mm256_xor_si256(c2[1589],_mm256_xor_si256(c2[4226],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[2393],_mm256_xor_si256(c2[1084],_mm256_xor_si256(c2[7948],_mm256_xor_si256(c2[6888],_mm256_xor_si256(c2[830],_mm256_xor_si256(c2[7168],_mm256_xor_si256(c2[4790],_mm256_xor_si256(c2[4805],_mm256_xor_si256(c2[7968],_mm256_xor_si256(c2[1373],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[589],_mm256_xor_si256(c2[4816],_mm256_xor_si256(c2[2715],_mm256_xor_si256(c2[5089],_mm256_xor_si256(c2[6674],_mm256_xor_si256(c2[7740],_mm256_xor_si256(c2[7744],_mm256_xor_si256(c2[2989],_mm256_xor_si256(c2[3782],_mm256_xor_si256(c2[7752],_mm256_xor_si256(c2[4849],_mm256_xor_si256(c2[2740],_mm256_xor_si256(c2[4068],_mm256_xor_si256(c2[1166],_mm256_xor_si256(c2[4600],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[4877],_mm256_xor_si256(c2[5136],_mm256_xor_si256(c2[5928],_mm256_xor_si256(c2[8321],_mm256_xor_si256(c2[6737],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[939],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[6747],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[7022],_mm256_xor_si256(c2[2005],_mm256_xor_si256(c2[3075],_mm256_xor_si256(c2[2813],_mm256_xor_si256(c2[1755],_mm256_xor_si256(c2[2559],_mm256_xor_si256(c2[5729],_mm256_xor_si256(c2[2032],_mm256_xor_si256(c2[7589],_mm256_xor_si256(c2[5473],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[5748],_mm256_xor_si256(c2[8129],_mm256_xor_si256(c2[1276],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[485],_mm256_xor_si256(c2[1814],_mm256_xor_si256(c2[7884],_mm256_xor_si256(c2[8414],_mm256_xor_si256(c2[7632],_mm256_xor_si256(c2[8426],_mm256_xor_si256(c2[6843],_mm256_xor_si256(c2[3421],_mm256_xor_si256(c2[780],c2[6325])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[264]=simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[7924],simde_mm256_xor_si256(c2[8186],simde_mm256_xor_si256(c2[1589],simde_mm256_xor_si256(c2[4226],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[2393],simde_mm256_xor_si256(c2[1084],simde_mm256_xor_si256(c2[7948],simde_mm256_xor_si256(c2[6888],simde_mm256_xor_si256(c2[830],simde_mm256_xor_si256(c2[7168],simde_mm256_xor_si256(c2[4790],simde_mm256_xor_si256(c2[4805],simde_mm256_xor_si256(c2[7968],simde_mm256_xor_si256(c2[1373],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[589],simde_mm256_xor_si256(c2[4816],simde_mm256_xor_si256(c2[2715],simde_mm256_xor_si256(c2[5089],simde_mm256_xor_si256(c2[6674],simde_mm256_xor_si256(c2[7740],simde_mm256_xor_si256(c2[7744],simde_mm256_xor_si256(c2[2989],simde_mm256_xor_si256(c2[3782],simde_mm256_xor_si256(c2[7752],simde_mm256_xor_si256(c2[4849],simde_mm256_xor_si256(c2[2740],simde_mm256_xor_si256(c2[4068],simde_mm256_xor_si256(c2[1166],simde_mm256_xor_si256(c2[4600],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[4877],simde_mm256_xor_si256(c2[5136],simde_mm256_xor_si256(c2[5928],simde_mm256_xor_si256(c2[8321],simde_mm256_xor_si256(c2[6737],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[939],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[6747],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[7022],simde_mm256_xor_si256(c2[2005],simde_mm256_xor_si256(c2[3075],simde_mm256_xor_si256(c2[2813],simde_mm256_xor_si256(c2[1755],simde_mm256_xor_si256(c2[2559],simde_mm256_xor_si256(c2[5729],simde_mm256_xor_si256(c2[2032],simde_mm256_xor_si256(c2[7589],simde_mm256_xor_si256(c2[5473],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[5748],simde_mm256_xor_si256(c2[8129],simde_mm256_xor_si256(c2[1276],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[485],simde_mm256_xor_si256(c2[1814],simde_mm256_xor_si256(c2[7884],simde_mm256_xor_si256(c2[8414],simde_mm256_xor_si256(c2[7632],simde_mm256_xor_si256(c2[8426],simde_mm256_xor_si256(c2[6843],simde_mm256_xor_si256(c2[3421],simde_mm256_xor_si256(c2[780],c2[6325])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 45
-     d2[270]=_mm256_xor_si256(c2[1864],_mm256_xor_si256(c2[5620],c2[4080]));
+     d2[270]=simde_mm256_xor_si256(c2[1864],simde_mm256_xor_si256(c2[5620],c2[4080]));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc224_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc224_byte.c
index ed2ed129f9c889fdc7f574384e6a4a20d7f9b250..1a109e64f17609f7b2d506b085dc1228fc18705b 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc224_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc224_byte.c
@@ -11,141 +11,141 @@ static inline void ldpc224_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[9554],_mm256_xor_si256(c2[4008],_mm256_xor_si256(c2[4626],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[4942],_mm256_xor_si256(c2[3408],_mm256_xor_si256(c2[5558],_mm256_xor_si256(c2[9270],_mm256_xor_si256(c2[4033],_mm256_xor_si256(c2[9576],_mm256_xor_si256(c2[8360],_mm256_xor_si256(c2[7130],_mm256_xor_si256(c2[1587],_mm256_xor_si256(c2[4369],_mm256_xor_si256(c2[4989],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[7157],_mm256_xor_si256(c2[6235],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[8097],_mm256_xor_si256(c2[4089],_mm256_xor_si256(c2[9034],_mm256_xor_si256(c2[9648],_mm256_xor_si256(c2[4414],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[7816],_mm256_xor_si256(c2[5040],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[7212],_mm256_xor_si256(c2[5367],_mm256_xor_si256(c2[4150],_mm256_xor_si256(c2[1990],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[7552],_mm256_xor_si256(c2[4472],_mm256_xor_si256(c2[8474],_mm256_xor_si256(c2[6636],_mm256_xor_si256(c2[9102],_mm256_xor_si256(c2[6642],_mm256_xor_si256(c2[7272],_mm256_xor_si256(c2[4188],_mm256_xor_si256(c2[6038],_mm256_xor_si256(c2[4818],_mm256_xor_si256(c2[6978],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[4522],_mm256_xor_si256(c2[2982],_mm256_xor_si256(c2[4216],_mm256_xor_si256(c2[2074],_mm256_xor_si256(c2[7003],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[860],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[4260],_mm256_xor_si256(c2[6723],_mm256_xor_si256(c2[1179],_mm256_xor_si256(c2[2120],_mm256_xor_si256(c2[9814],_mm256_xor_si256(c2[9820],_mm256_xor_si256(c2[2747],_mm256_xor_si256(c2[8290],_mm256_xor_si256(c2[9520],_mm256_xor_si256(c2[5532],_mm256_xor_si256(c2[8615],c2[9538]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[9554],simde_mm256_xor_si256(c2[4008],simde_mm256_xor_si256(c2[4626],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[4942],simde_mm256_xor_si256(c2[3408],simde_mm256_xor_si256(c2[5558],simde_mm256_xor_si256(c2[9270],simde_mm256_xor_si256(c2[4033],simde_mm256_xor_si256(c2[9576],simde_mm256_xor_si256(c2[8360],simde_mm256_xor_si256(c2[7130],simde_mm256_xor_si256(c2[1587],simde_mm256_xor_si256(c2[4369],simde_mm256_xor_si256(c2[4989],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[7157],simde_mm256_xor_si256(c2[6235],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[8097],simde_mm256_xor_si256(c2[4089],simde_mm256_xor_si256(c2[9034],simde_mm256_xor_si256(c2[9648],simde_mm256_xor_si256(c2[4414],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[7816],simde_mm256_xor_si256(c2[5040],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[7212],simde_mm256_xor_si256(c2[5367],simde_mm256_xor_si256(c2[4150],simde_mm256_xor_si256(c2[1990],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[7552],simde_mm256_xor_si256(c2[4472],simde_mm256_xor_si256(c2[8474],simde_mm256_xor_si256(c2[6636],simde_mm256_xor_si256(c2[9102],simde_mm256_xor_si256(c2[6642],simde_mm256_xor_si256(c2[7272],simde_mm256_xor_si256(c2[4188],simde_mm256_xor_si256(c2[6038],simde_mm256_xor_si256(c2[4818],simde_mm256_xor_si256(c2[6978],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[4522],simde_mm256_xor_si256(c2[2982],simde_mm256_xor_si256(c2[4216],simde_mm256_xor_si256(c2[2074],simde_mm256_xor_si256(c2[7003],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[860],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[4260],simde_mm256_xor_si256(c2[6723],simde_mm256_xor_si256(c2[1179],simde_mm256_xor_si256(c2[2120],simde_mm256_xor_si256(c2[9814],simde_mm256_xor_si256(c2[9820],simde_mm256_xor_si256(c2[2747],simde_mm256_xor_si256(c2[8290],simde_mm256_xor_si256(c2[9520],simde_mm256_xor_si256(c2[5532],simde_mm256_xor_si256(c2[8615],c2[9538]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 1
-     d2[7]=_mm256_xor_si256(c2[9554],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[4316],_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[3086],_mm256_xor_si256(c2[4942],_mm256_xor_si256(c2[5250],_mm256_xor_si256(c2[3716],_mm256_xor_si256(c2[5866],_mm256_xor_si256(c2[9270],_mm256_xor_si256(c2[9578],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[8360],_mm256_xor_si256(c2[8668],_mm256_xor_si256(c2[7438],_mm256_xor_si256(c2[1895],_mm256_xor_si256(c2[4677],_mm256_xor_si256(c2[5297],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[7465],_mm256_xor_si256(c2[6543],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[3472],_mm256_xor_si256(c2[8405],_mm256_xor_si256(c2[4397],_mm256_xor_si256(c2[9342],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[8124],_mm256_xor_si256(c2[5348],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[7520],_mm256_xor_si256(c2[5675],_mm256_xor_si256(c2[4150],_mm256_xor_si256(c2[4458],_mm256_xor_si256(c2[2298],_mm256_xor_si256(c2[1065],_mm256_xor_si256(c2[7552],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[4780],_mm256_xor_si256(c2[8782],_mm256_xor_si256(c2[6636],_mm256_xor_si256(c2[6944],_mm256_xor_si256(c2[9410],_mm256_xor_si256(c2[6950],_mm256_xor_si256(c2[7272],_mm256_xor_si256(c2[7580],_mm256_xor_si256(c2[4496],_mm256_xor_si256(c2[6346],_mm256_xor_si256(c2[5126],_mm256_xor_si256(c2[7286],_mm256_xor_si256(c2[2046],_mm256_xor_si256(c2[4522],_mm256_xor_si256(c2[4830],_mm256_xor_si256(c2[3290],_mm256_xor_si256(c2[4524],_mm256_xor_si256(c2[2074],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[7311],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[1168],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[4260],_mm256_xor_si256(c2[4568],_mm256_xor_si256(c2[7031],_mm256_xor_si256(c2[1487],_mm256_xor_si256(c2[2120],_mm256_xor_si256(c2[2428],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[2747],_mm256_xor_si256(c2[3055],_mm256_xor_si256(c2[8598],_mm256_xor_si256(c2[9828],_mm256_xor_si256(c2[5532],_mm256_xor_si256(c2[5840],_mm256_xor_si256(c2[8923],c2[9846])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[7]=simde_mm256_xor_si256(c2[9554],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[4316],simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[3086],simde_mm256_xor_si256(c2[4942],simde_mm256_xor_si256(c2[5250],simde_mm256_xor_si256(c2[3716],simde_mm256_xor_si256(c2[5866],simde_mm256_xor_si256(c2[9270],simde_mm256_xor_si256(c2[9578],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[8360],simde_mm256_xor_si256(c2[8668],simde_mm256_xor_si256(c2[7438],simde_mm256_xor_si256(c2[1895],simde_mm256_xor_si256(c2[4677],simde_mm256_xor_si256(c2[5297],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[7465],simde_mm256_xor_si256(c2[6543],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[3472],simde_mm256_xor_si256(c2[8405],simde_mm256_xor_si256(c2[4397],simde_mm256_xor_si256(c2[9342],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[8124],simde_mm256_xor_si256(c2[5348],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[7520],simde_mm256_xor_si256(c2[5675],simde_mm256_xor_si256(c2[4150],simde_mm256_xor_si256(c2[4458],simde_mm256_xor_si256(c2[2298],simde_mm256_xor_si256(c2[1065],simde_mm256_xor_si256(c2[7552],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[4780],simde_mm256_xor_si256(c2[8782],simde_mm256_xor_si256(c2[6636],simde_mm256_xor_si256(c2[6944],simde_mm256_xor_si256(c2[9410],simde_mm256_xor_si256(c2[6950],simde_mm256_xor_si256(c2[7272],simde_mm256_xor_si256(c2[7580],simde_mm256_xor_si256(c2[4496],simde_mm256_xor_si256(c2[6346],simde_mm256_xor_si256(c2[5126],simde_mm256_xor_si256(c2[7286],simde_mm256_xor_si256(c2[2046],simde_mm256_xor_si256(c2[4522],simde_mm256_xor_si256(c2[4830],simde_mm256_xor_si256(c2[3290],simde_mm256_xor_si256(c2[4524],simde_mm256_xor_si256(c2[2074],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[7311],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[1168],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[4260],simde_mm256_xor_si256(c2[4568],simde_mm256_xor_si256(c2[7031],simde_mm256_xor_si256(c2[1487],simde_mm256_xor_si256(c2[2120],simde_mm256_xor_si256(c2[2428],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[2747],simde_mm256_xor_si256(c2[3055],simde_mm256_xor_si256(c2[8598],simde_mm256_xor_si256(c2[9828],simde_mm256_xor_si256(c2[5532],simde_mm256_xor_si256(c2[5840],simde_mm256_xor_si256(c2[8923],c2[9846])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[14]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[4316],_mm256_xor_si256(c2[4626],_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[3086],_mm256_xor_si256(c2[5250],_mm256_xor_si256(c2[3408],_mm256_xor_si256(c2[3716],_mm256_xor_si256(c2[5558],_mm256_xor_si256(c2[5866],_mm256_xor_si256(c2[9578],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[9576],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[8668],_mm256_xor_si256(c2[7438],_mm256_xor_si256(c2[1587],_mm256_xor_si256(c2[1895],_mm256_xor_si256(c2[4677],_mm256_xor_si256(c2[4989],_mm256_xor_si256(c2[5297],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[7465],_mm256_xor_si256(c2[6235],_mm256_xor_si256(c2[6543],_mm256_xor_si256(c2[3472],_mm256_xor_si256(c2[8097],_mm256_xor_si256(c2[8405],_mm256_xor_si256(c2[4089],_mm256_xor_si256(c2[4397],_mm256_xor_si256(c2[9342],_mm256_xor_si256(c2[9648],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[4414],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[7816],_mm256_xor_si256(c2[8124],_mm256_xor_si256(c2[5040],_mm256_xor_si256(c2[5348],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[7520],_mm256_xor_si256(c2[5367],_mm256_xor_si256(c2[5675],_mm256_xor_si256(c2[4458],_mm256_xor_si256(c2[1990],_mm256_xor_si256(c2[2298],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[1065],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[4780],_mm256_xor_si256(c2[8474],_mm256_xor_si256(c2[8782],_mm256_xor_si256(c2[6944],_mm256_xor_si256(c2[9410],_mm256_xor_si256(c2[6642],_mm256_xor_si256(c2[6950],_mm256_xor_si256(c2[7580],_mm256_xor_si256(c2[4188],_mm256_xor_si256(c2[4496],_mm256_xor_si256(c2[6038],_mm256_xor_si256(c2[6346],_mm256_xor_si256(c2[5126],_mm256_xor_si256(c2[6978],_mm256_xor_si256(c2[7286],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[2046],_mm256_xor_si256(c2[4830],_mm256_xor_si256(c2[3290],_mm256_xor_si256(c2[4216],_mm256_xor_si256(c2[4524],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[7311],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[1168],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[4568],_mm256_xor_si256(c2[6723],_mm256_xor_si256(c2[7031],_mm256_xor_si256(c2[1179],_mm256_xor_si256(c2[1487],_mm256_xor_si256(c2[2428],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[9820],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[3055],_mm256_xor_si256(c2[8290],_mm256_xor_si256(c2[8598],_mm256_xor_si256(c2[9520],_mm256_xor_si256(c2[9828],_mm256_xor_si256(c2[5840],_mm256_xor_si256(c2[8923],_mm256_xor_si256(c2[9538],c2[9846]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[14]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[4316],simde_mm256_xor_si256(c2[4626],simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[3086],simde_mm256_xor_si256(c2[5250],simde_mm256_xor_si256(c2[3408],simde_mm256_xor_si256(c2[3716],simde_mm256_xor_si256(c2[5558],simde_mm256_xor_si256(c2[5866],simde_mm256_xor_si256(c2[9578],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[9576],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[8668],simde_mm256_xor_si256(c2[7438],simde_mm256_xor_si256(c2[1587],simde_mm256_xor_si256(c2[1895],simde_mm256_xor_si256(c2[4677],simde_mm256_xor_si256(c2[4989],simde_mm256_xor_si256(c2[5297],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[7465],simde_mm256_xor_si256(c2[6235],simde_mm256_xor_si256(c2[6543],simde_mm256_xor_si256(c2[3472],simde_mm256_xor_si256(c2[8097],simde_mm256_xor_si256(c2[8405],simde_mm256_xor_si256(c2[4089],simde_mm256_xor_si256(c2[4397],simde_mm256_xor_si256(c2[9342],simde_mm256_xor_si256(c2[9648],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[4414],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[7816],simde_mm256_xor_si256(c2[8124],simde_mm256_xor_si256(c2[5040],simde_mm256_xor_si256(c2[5348],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[7520],simde_mm256_xor_si256(c2[5367],simde_mm256_xor_si256(c2[5675],simde_mm256_xor_si256(c2[4458],simde_mm256_xor_si256(c2[1990],simde_mm256_xor_si256(c2[2298],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[1065],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[4780],simde_mm256_xor_si256(c2[8474],simde_mm256_xor_si256(c2[8782],simde_mm256_xor_si256(c2[6944],simde_mm256_xor_si256(c2[9410],simde_mm256_xor_si256(c2[6642],simde_mm256_xor_si256(c2[6950],simde_mm256_xor_si256(c2[7580],simde_mm256_xor_si256(c2[4188],simde_mm256_xor_si256(c2[4496],simde_mm256_xor_si256(c2[6038],simde_mm256_xor_si256(c2[6346],simde_mm256_xor_si256(c2[5126],simde_mm256_xor_si256(c2[6978],simde_mm256_xor_si256(c2[7286],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[2046],simde_mm256_xor_si256(c2[4830],simde_mm256_xor_si256(c2[3290],simde_mm256_xor_si256(c2[4216],simde_mm256_xor_si256(c2[4524],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[7311],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[1168],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[4568],simde_mm256_xor_si256(c2[6723],simde_mm256_xor_si256(c2[7031],simde_mm256_xor_si256(c2[1179],simde_mm256_xor_si256(c2[1487],simde_mm256_xor_si256(c2[2428],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[9820],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[3055],simde_mm256_xor_si256(c2[8290],simde_mm256_xor_si256(c2[8598],simde_mm256_xor_si256(c2[9520],simde_mm256_xor_si256(c2[9828],simde_mm256_xor_si256(c2[5840],simde_mm256_xor_si256(c2[8923],simde_mm256_xor_si256(c2[9538],c2[9846]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[21]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[4316],_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[3086],_mm256_xor_si256(c2[5250],_mm256_xor_si256(c2[3716],_mm256_xor_si256(c2[5558],_mm256_xor_si256(c2[5866],_mm256_xor_si256(c2[9578],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[8668],_mm256_xor_si256(c2[7438],_mm256_xor_si256(c2[1587],_mm256_xor_si256(c2[1895],_mm256_xor_si256(c2[4677],_mm256_xor_si256(c2[5297],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[7465],_mm256_xor_si256(c2[6543],_mm256_xor_si256(c2[3472],_mm256_xor_si256(c2[8405],_mm256_xor_si256(c2[4089],_mm256_xor_si256(c2[4397],_mm256_xor_si256(c2[9342],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[4414],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[8124],_mm256_xor_si256(c2[5040],_mm256_xor_si256(c2[5348],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[7520],_mm256_xor_si256(c2[5675],_mm256_xor_si256(c2[4458],_mm256_xor_si256(c2[2298],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[1065],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[4780],_mm256_xor_si256(c2[8474],_mm256_xor_si256(c2[8782],_mm256_xor_si256(c2[6944],_mm256_xor_si256(c2[9410],_mm256_xor_si256(c2[6642],_mm256_xor_si256(c2[6950],_mm256_xor_si256(c2[7580],_mm256_xor_si256(c2[4496],_mm256_xor_si256(c2[6038],_mm256_xor_si256(c2[6346],_mm256_xor_si256(c2[5126],_mm256_xor_si256(c2[7286],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[2046],_mm256_xor_si256(c2[4830],_mm256_xor_si256(c2[3290],_mm256_xor_si256(c2[4524],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[7311],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[1168],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[4568],_mm256_xor_si256(c2[7031],_mm256_xor_si256(c2[1179],_mm256_xor_si256(c2[1487],_mm256_xor_si256(c2[2428],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[3055],_mm256_xor_si256(c2[8598],_mm256_xor_si256(c2[9520],_mm256_xor_si256(c2[9828],_mm256_xor_si256(c2[5840],_mm256_xor_si256(c2[8923],_mm256_xor_si256(c2[9538],c2[9846])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[21]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[4316],simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[3086],simde_mm256_xor_si256(c2[5250],simde_mm256_xor_si256(c2[3716],simde_mm256_xor_si256(c2[5558],simde_mm256_xor_si256(c2[5866],simde_mm256_xor_si256(c2[9578],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[8668],simde_mm256_xor_si256(c2[7438],simde_mm256_xor_si256(c2[1587],simde_mm256_xor_si256(c2[1895],simde_mm256_xor_si256(c2[4677],simde_mm256_xor_si256(c2[5297],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[7465],simde_mm256_xor_si256(c2[6543],simde_mm256_xor_si256(c2[3472],simde_mm256_xor_si256(c2[8405],simde_mm256_xor_si256(c2[4089],simde_mm256_xor_si256(c2[4397],simde_mm256_xor_si256(c2[9342],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[4414],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[8124],simde_mm256_xor_si256(c2[5040],simde_mm256_xor_si256(c2[5348],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[7520],simde_mm256_xor_si256(c2[5675],simde_mm256_xor_si256(c2[4458],simde_mm256_xor_si256(c2[2298],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[1065],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[4780],simde_mm256_xor_si256(c2[8474],simde_mm256_xor_si256(c2[8782],simde_mm256_xor_si256(c2[6944],simde_mm256_xor_si256(c2[9410],simde_mm256_xor_si256(c2[6642],simde_mm256_xor_si256(c2[6950],simde_mm256_xor_si256(c2[7580],simde_mm256_xor_si256(c2[4496],simde_mm256_xor_si256(c2[6038],simde_mm256_xor_si256(c2[6346],simde_mm256_xor_si256(c2[5126],simde_mm256_xor_si256(c2[7286],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[2046],simde_mm256_xor_si256(c2[4830],simde_mm256_xor_si256(c2[3290],simde_mm256_xor_si256(c2[4524],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[7311],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[1168],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[4568],simde_mm256_xor_si256(c2[7031],simde_mm256_xor_si256(c2[1179],simde_mm256_xor_si256(c2[1487],simde_mm256_xor_si256(c2[2428],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[3055],simde_mm256_xor_si256(c2[8598],simde_mm256_xor_si256(c2[9520],simde_mm256_xor_si256(c2[9828],simde_mm256_xor_si256(c2[5840],simde_mm256_xor_si256(c2[8923],simde_mm256_xor_si256(c2[9538],c2[9846])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[28]=_mm256_xor_si256(c2[3085],c2[3094]);
+     d2[28]=simde_mm256_xor_si256(c2[3085],c2[3094]);
 
 //row: 5
-     d2[35]=_mm256_xor_si256(c2[7396],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[2468],_mm256_xor_si256(c2[620],_mm256_xor_si256(c2[1237],_mm256_xor_si256(c2[2791],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[3407],_mm256_xor_si256(c2[8331],_mm256_xor_si256(c2[7112],_mm256_xor_si256(c2[1882],_mm256_xor_si256(c2[7425],_mm256_xor_si256(c2[6202],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[9284],_mm256_xor_si256(c2[6820],_mm256_xor_si256(c2[2218],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[9300],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[4999],_mm256_xor_si256(c2[4077],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[5939],_mm256_xor_si256(c2[1938],_mm256_xor_si256(c2[6876],_mm256_xor_si256(c2[7490],_mm256_xor_si256(c2[2256],_mm256_xor_si256(c2[9353],_mm256_xor_si256(c2[5658],_mm256_xor_si256(c2[2889],_mm256_xor_si256(c2[7830],_mm256_xor_si256(c2[5054],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[1992],_mm256_xor_si256(c2[9694],_mm256_xor_si256(c2[8461],_mm256_xor_si256(c2[5394],_mm256_xor_si256(c2[2314],_mm256_xor_si256(c2[6316],_mm256_xor_si256(c2[4485],_mm256_xor_si256(c2[6944],_mm256_xor_si256(c2[4484],_mm256_xor_si256(c2[5098],_mm256_xor_si256(c2[5114],_mm256_xor_si256(c2[2030],_mm256_xor_si256(c2[3880],_mm256_xor_si256(c2[2660],_mm256_xor_si256(c2[4820],_mm256_xor_si256(c2[9442],_mm256_xor_si256(c2[2371],_mm256_xor_si256(c2[831],_mm256_xor_si256(c2[2058],_mm256_xor_si256(c2[9778],_mm256_xor_si256(c2[4845],_mm256_xor_si256(c2[7924],_mm256_xor_si256(c2[7005],_mm256_xor_si256(c2[8557],_mm256_xor_si256(c2[9789],_mm256_xor_si256(c2[1162],_mm256_xor_si256(c2[2102],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[8876],_mm256_xor_si256(c2[9817],_mm256_xor_si256(c2[7663],_mm256_xor_si256(c2[7662],_mm256_xor_si256(c2[589],_mm256_xor_si256(c2[6132],_mm256_xor_si256(c2[7369],_mm256_xor_si256(c2[3374],_mm256_xor_si256(c2[6457],_mm256_xor_si256(c2[7380],c2[914]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[35]=simde_mm256_xor_si256(c2[7396],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[2468],simde_mm256_xor_si256(c2[620],simde_mm256_xor_si256(c2[1237],simde_mm256_xor_si256(c2[2791],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[3407],simde_mm256_xor_si256(c2[8331],simde_mm256_xor_si256(c2[7112],simde_mm256_xor_si256(c2[1882],simde_mm256_xor_si256(c2[7425],simde_mm256_xor_si256(c2[6202],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[9284],simde_mm256_xor_si256(c2[6820],simde_mm256_xor_si256(c2[2218],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[9300],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[4999],simde_mm256_xor_si256(c2[4077],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[5939],simde_mm256_xor_si256(c2[1938],simde_mm256_xor_si256(c2[6876],simde_mm256_xor_si256(c2[7490],simde_mm256_xor_si256(c2[2256],simde_mm256_xor_si256(c2[9353],simde_mm256_xor_si256(c2[5658],simde_mm256_xor_si256(c2[2889],simde_mm256_xor_si256(c2[7830],simde_mm256_xor_si256(c2[5054],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[1992],simde_mm256_xor_si256(c2[9694],simde_mm256_xor_si256(c2[8461],simde_mm256_xor_si256(c2[5394],simde_mm256_xor_si256(c2[2314],simde_mm256_xor_si256(c2[6316],simde_mm256_xor_si256(c2[4485],simde_mm256_xor_si256(c2[6944],simde_mm256_xor_si256(c2[4484],simde_mm256_xor_si256(c2[5098],simde_mm256_xor_si256(c2[5114],simde_mm256_xor_si256(c2[2030],simde_mm256_xor_si256(c2[3880],simde_mm256_xor_si256(c2[2660],simde_mm256_xor_si256(c2[4820],simde_mm256_xor_si256(c2[9442],simde_mm256_xor_si256(c2[2371],simde_mm256_xor_si256(c2[831],simde_mm256_xor_si256(c2[2058],simde_mm256_xor_si256(c2[9778],simde_mm256_xor_si256(c2[4845],simde_mm256_xor_si256(c2[7924],simde_mm256_xor_si256(c2[7005],simde_mm256_xor_si256(c2[8557],simde_mm256_xor_si256(c2[9789],simde_mm256_xor_si256(c2[1162],simde_mm256_xor_si256(c2[2102],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[8876],simde_mm256_xor_si256(c2[9817],simde_mm256_xor_si256(c2[7663],simde_mm256_xor_si256(c2[7662],simde_mm256_xor_si256(c2[589],simde_mm256_xor_si256(c2[6132],simde_mm256_xor_si256(c2[7369],simde_mm256_xor_si256(c2[3374],simde_mm256_xor_si256(c2[6457],simde_mm256_xor_si256(c2[7380],c2[914]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[42]=_mm256_xor_si256(c2[9244],_mm256_xor_si256(c2[7171],_mm256_xor_si256(c2[5379],_mm256_xor_si256(c2[6622],_mm256_xor_si256(c2[9731],_mm256_xor_si256(c2[6091],_mm256_xor_si256(c2[2720],c2[6443])))))));
+     d2[42]=simde_mm256_xor_si256(c2[9244],simde_mm256_xor_si256(c2[7171],simde_mm256_xor_si256(c2[5379],simde_mm256_xor_si256(c2[6622],simde_mm256_xor_si256(c2[9731],simde_mm256_xor_si256(c2[6091],simde_mm256_xor_si256(c2[2720],c2[6443])))))));
 
 //row: 7
-     d2[49]=_mm256_xor_si256(c2[5236],_mm256_xor_si256(c2[3712],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[6892],c2[9440])))));
+     d2[49]=simde_mm256_xor_si256(c2[5236],simde_mm256_xor_si256(c2[3712],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[6892],c2[9440])))));
 
 //row: 8
-     d2[56]=_mm256_xor_si256(c2[3389],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[7705],_mm256_xor_si256(c2[7087],_mm256_xor_si256(c2[8316],_mm256_xor_si256(c2[7397],_mm256_xor_si256(c2[7705],_mm256_xor_si256(c2[6468],_mm256_xor_si256(c2[5549],_mm256_xor_si256(c2[5857],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[8639],_mm256_xor_si256(c2[8028],_mm256_xor_si256(c2[7098],_mm256_xor_si256(c2[6179],_mm256_xor_si256(c2[6487],_mm256_xor_si256(c2[9255],_mm256_xor_si256(c2[8336],_mm256_xor_si256(c2[8644],_mm256_xor_si256(c2[9564],_mm256_xor_si256(c2[3112],_mm256_xor_si256(c2[2494],_mm256_xor_si256(c2[7730],_mm256_xor_si256(c2[7112],_mm256_xor_si256(c2[3418],_mm256_xor_si256(c2[2492],_mm256_xor_si256(c2[2800],_mm256_xor_si256(c2[2202],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[972],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[5284],_mm256_xor_si256(c2[4358],_mm256_xor_si256(c2[4666],_mm256_xor_si256(c2[1274],_mm256_xor_si256(c2[8066],_mm256_xor_si256(c2[7448],_mm256_xor_si256(c2[8686],_mm256_xor_si256(c2[7760],_mm256_xor_si256(c2[8068],_mm256_xor_si256(c2[5293],_mm256_xor_si256(c2[4374],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[6849],_mm256_xor_si256(c2[6231],_mm256_xor_si256(c2[999],_mm256_xor_si256(c2[381],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[9006],_mm256_xor_si256(c2[9314],_mm256_xor_si256(c2[6861],_mm256_xor_si256(c2[6250],_mm256_xor_si256(c2[1932],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[7786],_mm256_xor_si256(c2[6860],_mm256_xor_si256(c2[7168],_mm256_xor_si256(c2[2876],_mm256_xor_si256(c2[2258],_mm256_xor_si256(c2[3490],_mm256_xor_si256(c2[2564],_mm256_xor_si256(c2[2872],_mm256_xor_si256(c2[8111],_mm256_xor_si256(c2[7185],_mm256_xor_si256(c2[7493],_mm256_xor_si256(c2[5353],_mm256_xor_si256(c2[4735],_mm256_xor_si256(c2[1658],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[8737],_mm256_xor_si256(c2[7818],_mm256_xor_si256(c2[8126],_mm256_xor_si256(c2[3823],_mm256_xor_si256(c2[3212],_mm256_xor_si256(c2[1054],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[9064],_mm256_xor_si256(c2[8138],_mm256_xor_si256(c2[8446],_mm256_xor_si256(c2[7840],_mm256_xor_si256(c2[7229],_mm256_xor_si256(c2[5687],_mm256_xor_si256(c2[4761],_mm256_xor_si256(c2[5069],_mm256_xor_si256(c2[4454],_mm256_xor_si256(c2[3528],_mm256_xor_si256(c2[3836],_mm256_xor_si256(c2[1387],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[8162],_mm256_xor_si256(c2[7551],_mm256_xor_si256(c2[2316],_mm256_xor_si256(c2[1390],_mm256_xor_si256(c2[1698],_mm256_xor_si256(c2[478],_mm256_xor_si256(c2[9722],_mm256_xor_si256(c2[2944],_mm256_xor_si256(c2[2326],_mm256_xor_si256(c2[477],_mm256_xor_si256(c2[9413],_mm256_xor_si256(c2[9721],_mm256_xor_si256(c2[7874],_mm256_xor_si256(c2[1107],_mm256_xor_si256(c2[496],_mm256_xor_si256(c2[7885],_mm256_xor_si256(c2[6959],_mm256_xor_si256(c2[7267],_mm256_xor_si256(c2[9735],_mm256_xor_si256(c2[8809],_mm256_xor_si256(c2[9117],_mm256_xor_si256(c2[8515],_mm256_xor_si256(c2[7897],_mm256_xor_si256(c2[813],_mm256_xor_si256(c2[9749],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[5435],_mm256_xor_si256(c2[4509],_mm256_xor_si256(c2[4817],_mm256_xor_si256(c2[8219],_mm256_xor_si256(c2[7608],_mm256_xor_si256(c2[6679],_mm256_xor_si256(c2[6068],_mm256_xor_si256(c2[7913],_mm256_xor_si256(c2[6987],_mm256_xor_si256(c2[7295],_mm256_xor_si256(c2[5771],_mm256_xor_si256(c2[5153],_mm256_xor_si256(c2[845],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[2998],_mm256_xor_si256(c2[3306],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[4550],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[5782],_mm256_xor_si256(c2[4863],_mm256_xor_si256(c2[5171],_mm256_xor_si256(c2[7017],_mm256_xor_si256(c2[6091],_mm256_xor_si256(c2[6399],_mm256_xor_si256(c2[7957],_mm256_xor_si256(c2[7339],_mm256_xor_si256(c2[565],_mm256_xor_si256(c2[9494],_mm256_xor_si256(c2[9802],_mm256_xor_si256(c2[4876],_mm256_xor_si256(c2[3950],_mm256_xor_si256(c2[4258],_mm256_xor_si256(c2[5810],_mm256_xor_si256(c2[5199],_mm256_xor_si256(c2[3656],_mm256_xor_si256(c2[3038],_mm256_xor_si256(c2[3655],_mm256_xor_si256(c2[2736],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[2423],_mm256_xor_si256(c2[6444],_mm256_xor_si256(c2[5826],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[1206],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[3362],_mm256_xor_si256(c2[2436],_mm256_xor_si256(c2[2744],_mm256_xor_si256(c2[9229],_mm256_xor_si256(c2[8611],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[1839],_mm256_xor_si256(c2[3380],_mm256_xor_si256(c2[2454],_mm256_xor_si256(c2[2762],c2[8303]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[56]=simde_mm256_xor_si256(c2[3389],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[7705],simde_mm256_xor_si256(c2[7087],simde_mm256_xor_si256(c2[8316],simde_mm256_xor_si256(c2[7397],simde_mm256_xor_si256(c2[7705],simde_mm256_xor_si256(c2[6468],simde_mm256_xor_si256(c2[5549],simde_mm256_xor_si256(c2[5857],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[8639],simde_mm256_xor_si256(c2[8028],simde_mm256_xor_si256(c2[7098],simde_mm256_xor_si256(c2[6179],simde_mm256_xor_si256(c2[6487],simde_mm256_xor_si256(c2[9255],simde_mm256_xor_si256(c2[8336],simde_mm256_xor_si256(c2[8644],simde_mm256_xor_si256(c2[9564],simde_mm256_xor_si256(c2[3112],simde_mm256_xor_si256(c2[2494],simde_mm256_xor_si256(c2[7730],simde_mm256_xor_si256(c2[7112],simde_mm256_xor_si256(c2[3418],simde_mm256_xor_si256(c2[2492],simde_mm256_xor_si256(c2[2800],simde_mm256_xor_si256(c2[2202],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[972],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[5284],simde_mm256_xor_si256(c2[4358],simde_mm256_xor_si256(c2[4666],simde_mm256_xor_si256(c2[1274],simde_mm256_xor_si256(c2[8066],simde_mm256_xor_si256(c2[7448],simde_mm256_xor_si256(c2[8686],simde_mm256_xor_si256(c2[7760],simde_mm256_xor_si256(c2[8068],simde_mm256_xor_si256(c2[5293],simde_mm256_xor_si256(c2[4374],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[6849],simde_mm256_xor_si256(c2[6231],simde_mm256_xor_si256(c2[999],simde_mm256_xor_si256(c2[381],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[9006],simde_mm256_xor_si256(c2[9314],simde_mm256_xor_si256(c2[6861],simde_mm256_xor_si256(c2[6250],simde_mm256_xor_si256(c2[1932],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[7786],simde_mm256_xor_si256(c2[6860],simde_mm256_xor_si256(c2[7168],simde_mm256_xor_si256(c2[2876],simde_mm256_xor_si256(c2[2258],simde_mm256_xor_si256(c2[3490],simde_mm256_xor_si256(c2[2564],simde_mm256_xor_si256(c2[2872],simde_mm256_xor_si256(c2[8111],simde_mm256_xor_si256(c2[7185],simde_mm256_xor_si256(c2[7493],simde_mm256_xor_si256(c2[5353],simde_mm256_xor_si256(c2[4735],simde_mm256_xor_si256(c2[1658],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[8737],simde_mm256_xor_si256(c2[7818],simde_mm256_xor_si256(c2[8126],simde_mm256_xor_si256(c2[3823],simde_mm256_xor_si256(c2[3212],simde_mm256_xor_si256(c2[1054],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[9064],simde_mm256_xor_si256(c2[8138],simde_mm256_xor_si256(c2[8446],simde_mm256_xor_si256(c2[7840],simde_mm256_xor_si256(c2[7229],simde_mm256_xor_si256(c2[5687],simde_mm256_xor_si256(c2[4761],simde_mm256_xor_si256(c2[5069],simde_mm256_xor_si256(c2[4454],simde_mm256_xor_si256(c2[3528],simde_mm256_xor_si256(c2[3836],simde_mm256_xor_si256(c2[1387],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[8162],simde_mm256_xor_si256(c2[7551],simde_mm256_xor_si256(c2[2316],simde_mm256_xor_si256(c2[1390],simde_mm256_xor_si256(c2[1698],simde_mm256_xor_si256(c2[478],simde_mm256_xor_si256(c2[9722],simde_mm256_xor_si256(c2[2944],simde_mm256_xor_si256(c2[2326],simde_mm256_xor_si256(c2[477],simde_mm256_xor_si256(c2[9413],simde_mm256_xor_si256(c2[9721],simde_mm256_xor_si256(c2[7874],simde_mm256_xor_si256(c2[1107],simde_mm256_xor_si256(c2[496],simde_mm256_xor_si256(c2[7885],simde_mm256_xor_si256(c2[6959],simde_mm256_xor_si256(c2[7267],simde_mm256_xor_si256(c2[9735],simde_mm256_xor_si256(c2[8809],simde_mm256_xor_si256(c2[9117],simde_mm256_xor_si256(c2[8515],simde_mm256_xor_si256(c2[7897],simde_mm256_xor_si256(c2[813],simde_mm256_xor_si256(c2[9749],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[5435],simde_mm256_xor_si256(c2[4509],simde_mm256_xor_si256(c2[4817],simde_mm256_xor_si256(c2[8219],simde_mm256_xor_si256(c2[7608],simde_mm256_xor_si256(c2[6679],simde_mm256_xor_si256(c2[6068],simde_mm256_xor_si256(c2[7913],simde_mm256_xor_si256(c2[6987],simde_mm256_xor_si256(c2[7295],simde_mm256_xor_si256(c2[5771],simde_mm256_xor_si256(c2[5153],simde_mm256_xor_si256(c2[845],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[2998],simde_mm256_xor_si256(c2[3306],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[4550],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[5782],simde_mm256_xor_si256(c2[4863],simde_mm256_xor_si256(c2[5171],simde_mm256_xor_si256(c2[7017],simde_mm256_xor_si256(c2[6091],simde_mm256_xor_si256(c2[6399],simde_mm256_xor_si256(c2[7957],simde_mm256_xor_si256(c2[7339],simde_mm256_xor_si256(c2[565],simde_mm256_xor_si256(c2[9494],simde_mm256_xor_si256(c2[9802],simde_mm256_xor_si256(c2[4876],simde_mm256_xor_si256(c2[3950],simde_mm256_xor_si256(c2[4258],simde_mm256_xor_si256(c2[5810],simde_mm256_xor_si256(c2[5199],simde_mm256_xor_si256(c2[3656],simde_mm256_xor_si256(c2[3038],simde_mm256_xor_si256(c2[3655],simde_mm256_xor_si256(c2[2736],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[2423],simde_mm256_xor_si256(c2[6444],simde_mm256_xor_si256(c2[5826],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[1206],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[3362],simde_mm256_xor_si256(c2[2436],simde_mm256_xor_si256(c2[2744],simde_mm256_xor_si256(c2[9229],simde_mm256_xor_si256(c2[8611],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[1839],simde_mm256_xor_si256(c2[3380],simde_mm256_xor_si256(c2[2454],simde_mm256_xor_si256(c2[2762],c2[8303]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[63]=_mm256_xor_si256(c2[2772],_mm256_xor_si256(c2[1555],_mm256_xor_si256(c2[6614],_mm256_xor_si256(c2[2929],_mm256_xor_si256(c2[7884],_mm256_xor_si256(c2[7943],_mm256_xor_si256(c2[4259],c2[8294])))))));
+     d2[63]=simde_mm256_xor_si256(c2[2772],simde_mm256_xor_si256(c2[1555],simde_mm256_xor_si256(c2[6614],simde_mm256_xor_si256(c2[2929],simde_mm256_xor_si256(c2[7884],simde_mm256_xor_si256(c2[7943],simde_mm256_xor_si256(c2[4259],c2[8294])))))));
 
 //row: 10
-     d2[70]=_mm256_xor_si256(c2[5560],_mm256_xor_si256(c2[1573],_mm256_xor_si256(c2[4373],_mm256_xor_si256(c2[5950],_mm256_xor_si256(c2[734],c2[2355])))));
+     d2[70]=simde_mm256_xor_si256(c2[5560],simde_mm256_xor_si256(c2[1573],simde_mm256_xor_si256(c2[4373],simde_mm256_xor_si256(c2[5950],simde_mm256_xor_si256(c2[734],c2[2355])))));
 
 //row: 11
-     d2[77]=_mm256_xor_si256(c2[5854],_mm256_xor_si256(c2[2470],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[7087],_mm256_xor_si256(c2[926],_mm256_xor_si256(c2[7705],_mm256_xor_si256(c2[8933],_mm256_xor_si256(c2[5857],_mm256_xor_si256(c2[6161],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[7720],_mm256_xor_si256(c2[8028],_mm256_xor_si256(c2[9563],_mm256_xor_si256(c2[6487],_mm256_xor_si256(c2[1865],_mm256_xor_si256(c2[8644],_mm256_xor_si256(c2[3402],_mm256_xor_si256(c2[5577],_mm256_xor_si256(c2[2186],_mm256_xor_si256(c2[2494],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[7112],_mm256_xor_si256(c2[5883],_mm256_xor_si256(c2[2800],_mm256_xor_si256(c2[4667],_mm256_xor_si256(c2[1276],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[3430],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[7742],_mm256_xor_si256(c2[4666],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[7448],_mm256_xor_si256(c2[1289],_mm256_xor_si256(c2[8068],_mm256_xor_si256(c2[7758],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[9314],_mm256_xor_si256(c2[5923],_mm256_xor_si256(c2[6231],_mm256_xor_si256(c2[3464],_mm256_xor_si256(c2[381],_mm256_xor_si256(c2[2535],_mm256_xor_si256(c2[9314],_mm256_xor_si256(c2[9326],_mm256_xor_si256(c2[5942],_mm256_xor_si256(c2[6250],_mm256_xor_si256(c2[4397],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[396],_mm256_xor_si256(c2[7168],_mm256_xor_si256(c2[5334],_mm256_xor_si256(c2[2258],_mm256_xor_si256(c2[5955],_mm256_xor_si256(c2[2872],_mm256_xor_si256(c2[714],_mm256_xor_si256(c2[7493],_mm256_xor_si256(c2[7818],_mm256_xor_si256(c2[4735],_mm256_xor_si256(c2[4116],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[1347],_mm256_xor_si256(c2[8126],_mm256_xor_si256(c2[6288],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[3212],_mm256_xor_si256(c2[3519],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[1667],_mm256_xor_si256(c2[8446],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[6921],_mm256_xor_si256(c2[7229],_mm256_xor_si256(c2[8152],_mm256_xor_si256(c2[5069],_mm256_xor_si256(c2[6919],_mm256_xor_si256(c2[3836],_mm256_xor_si256(c2[3852],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[7551],_mm256_xor_si256(c2[4774],_mm256_xor_si256(c2[1698],_mm256_xor_si256(c2[2943],_mm256_xor_si256(c2[9414],_mm256_xor_si256(c2[9722],_mm256_xor_si256(c2[5409],_mm256_xor_si256(c2[2326],_mm256_xor_si256(c2[2942],_mm256_xor_si256(c2[9721],_mm256_xor_si256(c2[784],_mm256_xor_si256(c2[3572],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[496],_mm256_xor_si256(c2[495],_mm256_xor_si256(c2[7267],_mm256_xor_si256(c2[2338],_mm256_xor_si256(c2[9117],_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[7897],_mm256_xor_si256(c2[3278],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[7900],_mm256_xor_si256(c2[4817],_mm256_xor_si256(c2[829],_mm256_xor_si256(c2[7300],_mm256_xor_si256(c2[7608],_mm256_xor_si256(c2[9144],_mm256_xor_si256(c2[6068],_mm256_xor_si256(c2[523],_mm256_xor_si256(c2[7295],_mm256_xor_si256(c2[8236],_mm256_xor_si256(c2[4845],_mm256_xor_si256(c2[5153],_mm256_xor_si256(c2[3310],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[3306],_mm256_xor_si256(c2[1149],_mm256_xor_si256(c2[7015],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[8247],_mm256_xor_si256(c2[5171],_mm256_xor_si256(c2[9482],_mm256_xor_si256(c2[6399],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[7031],_mm256_xor_si256(c2[7339],_mm256_xor_si256(c2[3030],_mm256_xor_si256(c2[9802],_mm256_xor_si256(c2[7341],_mm256_xor_si256(c2[4258],_mm256_xor_si256(c2[8275],_mm256_xor_si256(c2[4891],_mm256_xor_si256(c2[5199],_mm256_xor_si256(c2[6121],_mm256_xor_si256(c2[3038],_mm256_xor_si256(c2[6120],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[8909],_mm256_xor_si256(c2[5518],_mm256_xor_si256(c2[5826],_mm256_xor_si256(c2[4597],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[5827],_mm256_xor_si256(c2[2744],_mm256_xor_si256(c2[1839],_mm256_xor_si256(c2[8303],_mm256_xor_si256(c2[8611],_mm256_xor_si256(c2[4915],_mm256_xor_si256(c2[1839],_mm256_xor_si256(c2[5838],_mm256_xor_si256(c2[2762],c2[295])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[77]=simde_mm256_xor_si256(c2[5854],simde_mm256_xor_si256(c2[2470],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[7087],simde_mm256_xor_si256(c2[926],simde_mm256_xor_si256(c2[7705],simde_mm256_xor_si256(c2[8933],simde_mm256_xor_si256(c2[5857],simde_mm256_xor_si256(c2[6161],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[7720],simde_mm256_xor_si256(c2[8028],simde_mm256_xor_si256(c2[9563],simde_mm256_xor_si256(c2[6487],simde_mm256_xor_si256(c2[1865],simde_mm256_xor_si256(c2[8644],simde_mm256_xor_si256(c2[3402],simde_mm256_xor_si256(c2[5577],simde_mm256_xor_si256(c2[2186],simde_mm256_xor_si256(c2[2494],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[7112],simde_mm256_xor_si256(c2[5883],simde_mm256_xor_si256(c2[2800],simde_mm256_xor_si256(c2[4667],simde_mm256_xor_si256(c2[1276],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[3430],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[7742],simde_mm256_xor_si256(c2[4666],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[7448],simde_mm256_xor_si256(c2[1289],simde_mm256_xor_si256(c2[8068],simde_mm256_xor_si256(c2[7758],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[9314],simde_mm256_xor_si256(c2[5923],simde_mm256_xor_si256(c2[6231],simde_mm256_xor_si256(c2[3464],simde_mm256_xor_si256(c2[381],simde_mm256_xor_si256(c2[2535],simde_mm256_xor_si256(c2[9314],simde_mm256_xor_si256(c2[9326],simde_mm256_xor_si256(c2[5942],simde_mm256_xor_si256(c2[6250],simde_mm256_xor_si256(c2[4397],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[396],simde_mm256_xor_si256(c2[7168],simde_mm256_xor_si256(c2[5334],simde_mm256_xor_si256(c2[2258],simde_mm256_xor_si256(c2[5955],simde_mm256_xor_si256(c2[2872],simde_mm256_xor_si256(c2[714],simde_mm256_xor_si256(c2[7493],simde_mm256_xor_si256(c2[7818],simde_mm256_xor_si256(c2[4735],simde_mm256_xor_si256(c2[4116],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[1347],simde_mm256_xor_si256(c2[8126],simde_mm256_xor_si256(c2[6288],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[3212],simde_mm256_xor_si256(c2[3519],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[1667],simde_mm256_xor_si256(c2[8446],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[6921],simde_mm256_xor_si256(c2[7229],simde_mm256_xor_si256(c2[8152],simde_mm256_xor_si256(c2[5069],simde_mm256_xor_si256(c2[6919],simde_mm256_xor_si256(c2[3836],simde_mm256_xor_si256(c2[3852],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[7551],simde_mm256_xor_si256(c2[4774],simde_mm256_xor_si256(c2[1698],simde_mm256_xor_si256(c2[2943],simde_mm256_xor_si256(c2[9414],simde_mm256_xor_si256(c2[9722],simde_mm256_xor_si256(c2[5409],simde_mm256_xor_si256(c2[2326],simde_mm256_xor_si256(c2[2942],simde_mm256_xor_si256(c2[9721],simde_mm256_xor_si256(c2[784],simde_mm256_xor_si256(c2[3572],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[496],simde_mm256_xor_si256(c2[495],simde_mm256_xor_si256(c2[7267],simde_mm256_xor_si256(c2[2338],simde_mm256_xor_si256(c2[9117],simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[7897],simde_mm256_xor_si256(c2[3278],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[7900],simde_mm256_xor_si256(c2[4817],simde_mm256_xor_si256(c2[829],simde_mm256_xor_si256(c2[7300],simde_mm256_xor_si256(c2[7608],simde_mm256_xor_si256(c2[9144],simde_mm256_xor_si256(c2[6068],simde_mm256_xor_si256(c2[523],simde_mm256_xor_si256(c2[7295],simde_mm256_xor_si256(c2[8236],simde_mm256_xor_si256(c2[4845],simde_mm256_xor_si256(c2[5153],simde_mm256_xor_si256(c2[3310],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[3306],simde_mm256_xor_si256(c2[1149],simde_mm256_xor_si256(c2[7015],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[8247],simde_mm256_xor_si256(c2[5171],simde_mm256_xor_si256(c2[9482],simde_mm256_xor_si256(c2[6399],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[7031],simde_mm256_xor_si256(c2[7339],simde_mm256_xor_si256(c2[3030],simde_mm256_xor_si256(c2[9802],simde_mm256_xor_si256(c2[7341],simde_mm256_xor_si256(c2[4258],simde_mm256_xor_si256(c2[8275],simde_mm256_xor_si256(c2[4891],simde_mm256_xor_si256(c2[5199],simde_mm256_xor_si256(c2[6121],simde_mm256_xor_si256(c2[3038],simde_mm256_xor_si256(c2[6120],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[8909],simde_mm256_xor_si256(c2[5518],simde_mm256_xor_si256(c2[5826],simde_mm256_xor_si256(c2[4597],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[5827],simde_mm256_xor_si256(c2[2744],simde_mm256_xor_si256(c2[1839],simde_mm256_xor_si256(c2[8303],simde_mm256_xor_si256(c2[8611],simde_mm256_xor_si256(c2[4915],simde_mm256_xor_si256(c2[1839],simde_mm256_xor_si256(c2[5838],simde_mm256_xor_si256(c2[2762],c2[295])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[84]=_mm256_xor_si256(c2[4316],_mm256_xor_si256(c2[4639],_mm256_xor_si256(c2[2608],_mm256_xor_si256(c2[1078],_mm256_xor_si256(c2[8806],c2[7033])))));
+     d2[84]=simde_mm256_xor_si256(c2[4316],simde_mm256_xor_si256(c2[4639],simde_mm256_xor_si256(c2[2608],simde_mm256_xor_si256(c2[1078],simde_mm256_xor_si256(c2[8806],c2[7033])))));
 
 //row: 13
-     d2[91]=_mm256_xor_si256(c2[4006],_mm256_xor_si256(c2[4314],_mm256_xor_si256(c2[8630],_mm256_xor_si256(c2[9241],_mm256_xor_si256(c2[7393],_mm256_xor_si256(c2[5238],_mm256_xor_si256(c2[9256],_mm256_xor_si256(c2[9564],_mm256_xor_si256(c2[8023],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[3729],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[8655],_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[2819],_mm256_xor_si256(c2[3127],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[6202],_mm256_xor_si256(c2[7435],_mm256_xor_si256(c2[8991],_mm256_xor_si256(c2[9604],_mm256_xor_si256(c2[6218],_mm256_xor_si256(c2[7466],_mm256_xor_si256(c2[7774],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[7478],_mm256_xor_si256(c2[7786],_mm256_xor_si256(c2[2857],_mm256_xor_si256(c2[8711],_mm256_xor_si256(c2[3794],_mm256_xor_si256(c2[4415],_mm256_xor_si256(c2[9036],_mm256_xor_si256(c2[2564],_mm256_xor_si256(c2[6278],_mm256_xor_si256(c2[2576],_mm256_xor_si256(c2[9662],_mm256_xor_si256(c2[4440],_mm256_xor_si256(c2[4748],_mm256_xor_si256(c2[1979],_mm256_xor_si256(c2[127],_mm256_xor_si256(c2[8457],_mm256_xor_si256(c2[8765],_mm256_xor_si256(c2[6612],_mm256_xor_si256(c2[5379],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[2312],_mm256_xor_si256(c2[9087],_mm256_xor_si256(c2[3234],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[1403],_mm256_xor_si256(c2[3869],_mm256_xor_si256(c2[1402],_mm256_xor_si256(c2[1724],_mm256_xor_si256(c2[2032],_mm256_xor_si256(c2[8810],_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[9440],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[6360],_mm256_xor_si256(c2[8836],_mm256_xor_si256(c2[9144],_mm256_xor_si256(c2[7604],_mm256_xor_si256(c2[8838],_mm256_xor_si256(c2[6388],_mm256_xor_si256(c2[6696],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[4849],_mm256_xor_si256(c2[5475],_mm256_xor_si256(c2[6707],_mm256_xor_si256(c2[7942],_mm256_xor_si256(c2[8574],_mm256_xor_si256(c2[8882],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[5801],_mm256_xor_si256(c2[6427],_mm256_xor_si256(c2[6735],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[4580],_mm256_xor_si256(c2[7061],_mm256_xor_si256(c2[7369],_mm256_xor_si256(c2[3057],_mm256_xor_si256(c2[4287],_mm256_xor_si256(c2[7986],_mm256_xor_si256(c2[9846],_mm256_xor_si256(c2[299],_mm256_xor_si256(c2[3375],c2[4298])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[91]=simde_mm256_xor_si256(c2[4006],simde_mm256_xor_si256(c2[4314],simde_mm256_xor_si256(c2[8630],simde_mm256_xor_si256(c2[9241],simde_mm256_xor_si256(c2[7393],simde_mm256_xor_si256(c2[5238],simde_mm256_xor_si256(c2[9256],simde_mm256_xor_si256(c2[9564],simde_mm256_xor_si256(c2[8023],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[3729],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[8655],simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[2819],simde_mm256_xor_si256(c2[3127],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[6202],simde_mm256_xor_si256(c2[7435],simde_mm256_xor_si256(c2[8991],simde_mm256_xor_si256(c2[9604],simde_mm256_xor_si256(c2[6218],simde_mm256_xor_si256(c2[7466],simde_mm256_xor_si256(c2[7774],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[7478],simde_mm256_xor_si256(c2[7786],simde_mm256_xor_si256(c2[2857],simde_mm256_xor_si256(c2[8711],simde_mm256_xor_si256(c2[3794],simde_mm256_xor_si256(c2[4415],simde_mm256_xor_si256(c2[9036],simde_mm256_xor_si256(c2[2564],simde_mm256_xor_si256(c2[6278],simde_mm256_xor_si256(c2[2576],simde_mm256_xor_si256(c2[9662],simde_mm256_xor_si256(c2[4440],simde_mm256_xor_si256(c2[4748],simde_mm256_xor_si256(c2[1979],simde_mm256_xor_si256(c2[127],simde_mm256_xor_si256(c2[8457],simde_mm256_xor_si256(c2[8765],simde_mm256_xor_si256(c2[6612],simde_mm256_xor_si256(c2[5379],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[2312],simde_mm256_xor_si256(c2[9087],simde_mm256_xor_si256(c2[3234],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[1403],simde_mm256_xor_si256(c2[3869],simde_mm256_xor_si256(c2[1402],simde_mm256_xor_si256(c2[1724],simde_mm256_xor_si256(c2[2032],simde_mm256_xor_si256(c2[8810],simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[9440],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[6360],simde_mm256_xor_si256(c2[8836],simde_mm256_xor_si256(c2[9144],simde_mm256_xor_si256(c2[7604],simde_mm256_xor_si256(c2[8838],simde_mm256_xor_si256(c2[6388],simde_mm256_xor_si256(c2[6696],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[4849],simde_mm256_xor_si256(c2[5475],simde_mm256_xor_si256(c2[6707],simde_mm256_xor_si256(c2[7942],simde_mm256_xor_si256(c2[8574],simde_mm256_xor_si256(c2[8882],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[5801],simde_mm256_xor_si256(c2[6427],simde_mm256_xor_si256(c2[6735],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[4580],simde_mm256_xor_si256(c2[7061],simde_mm256_xor_si256(c2[7369],simde_mm256_xor_si256(c2[3057],simde_mm256_xor_si256(c2[4287],simde_mm256_xor_si256(c2[7986],simde_mm256_xor_si256(c2[9846],simde_mm256_xor_si256(c2[299],simde_mm256_xor_si256(c2[3375],c2[4298])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[98]=_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[6068],_mm256_xor_si256(c2[9777],_mm256_xor_si256(c2[6091],c2[3683])))));
+     d2[98]=simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[6068],simde_mm256_xor_si256(c2[9777],simde_mm256_xor_si256(c2[6091],c2[3683])))));
 
 //row: 15
-     d2[105]=_mm256_xor_si256(c2[5238],_mm256_xor_si256(c2[9554],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[8009],_mm256_xor_si256(c2[8317],_mm256_xor_si256(c2[8010],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[8947],_mm256_xor_si256(c2[941],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[7409],_mm256_xor_si256(c2[4961],_mm256_xor_si256(c2[9579],_mm256_xor_si256(c2[5267],_mm256_xor_si256(c2[4051],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[6818],_mm256_xor_si256(c2[7126],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[6834],_mm256_xor_si256(c2[7142],_mm256_xor_si256(c2[8698],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[1919],_mm256_xor_si256(c2[8710],_mm256_xor_si256(c2[3781],_mm256_xor_si256(c2[9327],_mm256_xor_si256(c2[9635],_mm256_xor_si256(c2[4718],_mm256_xor_si256(c2[5339],_mm256_xor_si256(c2[9652],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[7202],_mm256_xor_si256(c2[3500],_mm256_xor_si256(c2[423],_mm256_xor_si256(c2[731],_mm256_xor_si256(c2[5672],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[1051],_mm256_xor_si256(c2[9689],_mm256_xor_si256(c2[7536],_mm256_xor_si256(c2[5995],_mm256_xor_si256(c2[6303],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[3236],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[3850],_mm256_xor_si256(c2[4158],_mm256_xor_si256(c2[2327],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[2018],_mm256_xor_si256(c2[2326],_mm256_xor_si256(c2[2956],_mm256_xor_si256(c2[9734],_mm256_xor_si256(c2[1414],_mm256_xor_si256(c2[1722],_mm256_xor_si256(c2[5424],_mm256_xor_si256(c2[509],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[6976],_mm256_xor_si256(c2[7284],_mm256_xor_si256(c2[213],_mm256_xor_si256(c2[8528],_mm256_xor_si256(c2[9762],_mm256_xor_si256(c2[7620],_mm256_xor_si256(c2[2694],_mm256_xor_si256(c2[5465],_mm256_xor_si256(c2[5773],_mm256_xor_si256(c2[6399],_mm256_xor_si256(c2[7631],_mm256_xor_si256(c2[8558],_mm256_xor_si256(c2[8866],_mm256_xor_si256(c2[9806],_mm256_xor_si256(c2[2414],_mm256_xor_si256(c2[6417],_mm256_xor_si256(c2[6725],_mm256_xor_si256(c2[5494],_mm256_xor_si256(c2[7659],_mm256_xor_si256(c2[5505],_mm256_xor_si256(c2[5504],_mm256_xor_si256(c2[8293],_mm256_xor_si256(c2[3981],_mm256_xor_si256(c2[4903],_mm256_xor_si256(c2[5211],_mm256_xor_si256(c2[1223],_mm256_xor_si256(c2[4299],_mm256_xor_si256(c2[4914],c2[5222]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[105]=simde_mm256_xor_si256(c2[5238],simde_mm256_xor_si256(c2[9554],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[8009],simde_mm256_xor_si256(c2[8317],simde_mm256_xor_si256(c2[8010],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[8947],simde_mm256_xor_si256(c2[941],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[7409],simde_mm256_xor_si256(c2[4961],simde_mm256_xor_si256(c2[9579],simde_mm256_xor_si256(c2[5267],simde_mm256_xor_si256(c2[4051],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[6818],simde_mm256_xor_si256(c2[7126],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[6834],simde_mm256_xor_si256(c2[7142],simde_mm256_xor_si256(c2[8698],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[1919],simde_mm256_xor_si256(c2[8710],simde_mm256_xor_si256(c2[3781],simde_mm256_xor_si256(c2[9327],simde_mm256_xor_si256(c2[9635],simde_mm256_xor_si256(c2[4718],simde_mm256_xor_si256(c2[5339],simde_mm256_xor_si256(c2[9652],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[7202],simde_mm256_xor_si256(c2[3500],simde_mm256_xor_si256(c2[423],simde_mm256_xor_si256(c2[731],simde_mm256_xor_si256(c2[5672],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[1051],simde_mm256_xor_si256(c2[9689],simde_mm256_xor_si256(c2[7536],simde_mm256_xor_si256(c2[5995],simde_mm256_xor_si256(c2[6303],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[3236],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[3850],simde_mm256_xor_si256(c2[4158],simde_mm256_xor_si256(c2[2327],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[2018],simde_mm256_xor_si256(c2[2326],simde_mm256_xor_si256(c2[2956],simde_mm256_xor_si256(c2[9734],simde_mm256_xor_si256(c2[1414],simde_mm256_xor_si256(c2[1722],simde_mm256_xor_si256(c2[5424],simde_mm256_xor_si256(c2[509],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[6976],simde_mm256_xor_si256(c2[7284],simde_mm256_xor_si256(c2[213],simde_mm256_xor_si256(c2[8528],simde_mm256_xor_si256(c2[9762],simde_mm256_xor_si256(c2[7620],simde_mm256_xor_si256(c2[2694],simde_mm256_xor_si256(c2[5465],simde_mm256_xor_si256(c2[5773],simde_mm256_xor_si256(c2[6399],simde_mm256_xor_si256(c2[7631],simde_mm256_xor_si256(c2[8558],simde_mm256_xor_si256(c2[8866],simde_mm256_xor_si256(c2[9806],simde_mm256_xor_si256(c2[2414],simde_mm256_xor_si256(c2[6417],simde_mm256_xor_si256(c2[6725],simde_mm256_xor_si256(c2[5494],simde_mm256_xor_si256(c2[7659],simde_mm256_xor_si256(c2[5505],simde_mm256_xor_si256(c2[5504],simde_mm256_xor_si256(c2[8293],simde_mm256_xor_si256(c2[3981],simde_mm256_xor_si256(c2[4903],simde_mm256_xor_si256(c2[5211],simde_mm256_xor_si256(c2[1223],simde_mm256_xor_si256(c2[4299],simde_mm256_xor_si256(c2[4914],c2[5222]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[112]=_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[5544],_mm256_xor_si256(c2[6162],_mm256_xor_si256(c2[4314],_mm256_xor_si256(c2[6485],_mm256_xor_si256(c2[4944],_mm256_xor_si256(c2[7101],_mm256_xor_si256(c2[8026],_mm256_xor_si256(c2[958],_mm256_xor_si256(c2[5576],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[8666],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[1279],_mm256_xor_si256(c2[5912],_mm256_xor_si256(c2[6525],_mm256_xor_si256(c2[3139],_mm256_xor_si256(c2[4695],_mm256_xor_si256(c2[8700],_mm256_xor_si256(c2[7771],_mm256_xor_si256(c2[4707],_mm256_xor_si256(c2[9633],_mm256_xor_si256(c2[5632],_mm256_xor_si256(c2[715],_mm256_xor_si256(c2[1336],_mm256_xor_si256(c2[5950],_mm256_xor_si256(c2[3192],_mm256_xor_si256(c2[9352],_mm256_xor_si256(c2[6583],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[8755],_mm256_xor_si256(c2[6903],_mm256_xor_si256(c2[5686],_mm256_xor_si256(c2[3533],_mm256_xor_si256(c2[2300],_mm256_xor_si256(c2[9088],_mm256_xor_si256(c2[6008],_mm256_xor_si256(c2[155],_mm256_xor_si256(c2[3543],_mm256_xor_si256(c2[8179],_mm256_xor_si256(c2[790],_mm256_xor_si256(c2[8178],_mm256_xor_si256(c2[8808],_mm256_xor_si256(c2[5731],_mm256_xor_si256(c2[7574],_mm256_xor_si256(c2[6361],_mm256_xor_si256(c2[8514],_mm256_xor_si256(c2[3281],_mm256_xor_si256(c2[6065],_mm256_xor_si256(c2[4525],_mm256_xor_si256(c2[5759],_mm256_xor_si256(c2[3617],_mm256_xor_si256(c2[8546],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[2396],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[4863],_mm256_xor_si256(c2[5796],_mm256_xor_si256(c2[8266],_mm256_xor_si256(c2[2722],_mm256_xor_si256(c2[3656],_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[1501],_mm256_xor_si256(c2[4290],_mm256_xor_si256(c2[9833],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[9217],_mm256_xor_si256(c2[7075],_mm256_xor_si256(c2[296],c2[1219]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[112]=simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[5544],simde_mm256_xor_si256(c2[6162],simde_mm256_xor_si256(c2[4314],simde_mm256_xor_si256(c2[6485],simde_mm256_xor_si256(c2[4944],simde_mm256_xor_si256(c2[7101],simde_mm256_xor_si256(c2[8026],simde_mm256_xor_si256(c2[958],simde_mm256_xor_si256(c2[5576],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[8666],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[1279],simde_mm256_xor_si256(c2[5912],simde_mm256_xor_si256(c2[6525],simde_mm256_xor_si256(c2[3139],simde_mm256_xor_si256(c2[4695],simde_mm256_xor_si256(c2[8700],simde_mm256_xor_si256(c2[7771],simde_mm256_xor_si256(c2[4707],simde_mm256_xor_si256(c2[9633],simde_mm256_xor_si256(c2[5632],simde_mm256_xor_si256(c2[715],simde_mm256_xor_si256(c2[1336],simde_mm256_xor_si256(c2[5950],simde_mm256_xor_si256(c2[3192],simde_mm256_xor_si256(c2[9352],simde_mm256_xor_si256(c2[6583],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[8755],simde_mm256_xor_si256(c2[6903],simde_mm256_xor_si256(c2[5686],simde_mm256_xor_si256(c2[3533],simde_mm256_xor_si256(c2[2300],simde_mm256_xor_si256(c2[9088],simde_mm256_xor_si256(c2[6008],simde_mm256_xor_si256(c2[155],simde_mm256_xor_si256(c2[3543],simde_mm256_xor_si256(c2[8179],simde_mm256_xor_si256(c2[790],simde_mm256_xor_si256(c2[8178],simde_mm256_xor_si256(c2[8808],simde_mm256_xor_si256(c2[5731],simde_mm256_xor_si256(c2[7574],simde_mm256_xor_si256(c2[6361],simde_mm256_xor_si256(c2[8514],simde_mm256_xor_si256(c2[3281],simde_mm256_xor_si256(c2[6065],simde_mm256_xor_si256(c2[4525],simde_mm256_xor_si256(c2[5759],simde_mm256_xor_si256(c2[3617],simde_mm256_xor_si256(c2[8546],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[2396],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[4863],simde_mm256_xor_si256(c2[5796],simde_mm256_xor_si256(c2[8266],simde_mm256_xor_si256(c2[2722],simde_mm256_xor_si256(c2[3656],simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[1501],simde_mm256_xor_si256(c2[4290],simde_mm256_xor_si256(c2[9833],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[9217],simde_mm256_xor_si256(c2[7075],simde_mm256_xor_si256(c2[296],c2[1219]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[119]=_mm256_xor_si256(c2[7393],_mm256_xor_si256(c2[4511],_mm256_xor_si256(c2[2694],_mm256_xor_si256(c2[9787],c2[1526]))));
+     d2[119]=simde_mm256_xor_si256(c2[7393],simde_mm256_xor_si256(c2[4511],simde_mm256_xor_si256(c2[2694],simde_mm256_xor_si256(c2[9787],c2[1526]))));
 
 //row: 18
-     d2[126]=_mm256_xor_si256(c2[2176],_mm256_xor_si256(c2[4483],_mm256_xor_si256(c2[2652],_mm256_xor_si256(c2[4876],c2[8279]))));
+     d2[126]=simde_mm256_xor_si256(c2[2176],simde_mm256_xor_si256(c2[4483],simde_mm256_xor_si256(c2[2652],simde_mm256_xor_si256(c2[4876],c2[8279]))));
 
 //row: 19
-     d2[133]=_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[1966],c2[2604]))));
+     d2[133]=simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[1966],c2[2604]))));
 
 //row: 20
-     d2[140]=_mm256_xor_si256(c2[6781],_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[1853],_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[2176],_mm256_xor_si256(c2[635],_mm256_xor_si256(c2[2792],_mm256_xor_si256(c2[6497],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[6810],_mm256_xor_si256(c2[5587],_mm256_xor_si256(c2[4357],_mm256_xor_si256(c2[8669],_mm256_xor_si256(c2[5592],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[8685],_mm256_xor_si256(c2[379],_mm256_xor_si256(c2[4384],_mm256_xor_si256(c2[3462],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[5324],_mm256_xor_si256(c2[1316],_mm256_xor_si256(c2[6261],_mm256_xor_si256(c2[6875],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[8738],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[2274],_mm256_xor_si256(c2[7215],_mm256_xor_si256(c2[4439],_mm256_xor_si256(c2[2594],_mm256_xor_si256(c2[7829],_mm256_xor_si256(c2[1377],_mm256_xor_si256(c2[9072],_mm256_xor_si256(c2[7846],_mm256_xor_si256(c2[4779],_mm256_xor_si256(c2[1699],_mm256_xor_si256(c2[5701],_mm256_xor_si256(c2[6936],_mm256_xor_si256(c2[3870],_mm256_xor_si256(c2[6329],_mm256_xor_si256(c2[3869],_mm256_xor_si256(c2[4499],_mm256_xor_si256(c2[1415],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[2045],_mm256_xor_si256(c2[4205],_mm256_xor_si256(c2[8820],_mm256_xor_si256(c2[1756],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[9156],_mm256_xor_si256(c2[4230],_mm256_xor_si256(c2[7309],_mm256_xor_si256(c2[7942],_mm256_xor_si256(c2[9174],_mm256_xor_si256(c2[547],_mm256_xor_si256(c2[1487],_mm256_xor_si256(c2[3950],_mm256_xor_si256(c2[8261],_mm256_xor_si256(c2[9202],_mm256_xor_si256(c2[7048],_mm256_xor_si256(c2[7047],_mm256_xor_si256(c2[9829],_mm256_xor_si256(c2[5517],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[2759],_mm256_xor_si256(c2[5842],c2[6765]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[140]=simde_mm256_xor_si256(c2[6781],simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[1853],simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[2176],simde_mm256_xor_si256(c2[635],simde_mm256_xor_si256(c2[2792],simde_mm256_xor_si256(c2[6497],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[6810],simde_mm256_xor_si256(c2[5587],simde_mm256_xor_si256(c2[4357],simde_mm256_xor_si256(c2[8669],simde_mm256_xor_si256(c2[5592],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[8685],simde_mm256_xor_si256(c2[379],simde_mm256_xor_si256(c2[4384],simde_mm256_xor_si256(c2[3462],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[5324],simde_mm256_xor_si256(c2[1316],simde_mm256_xor_si256(c2[6261],simde_mm256_xor_si256(c2[6875],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[8738],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[2274],simde_mm256_xor_si256(c2[7215],simde_mm256_xor_si256(c2[4439],simde_mm256_xor_si256(c2[2594],simde_mm256_xor_si256(c2[7829],simde_mm256_xor_si256(c2[1377],simde_mm256_xor_si256(c2[9072],simde_mm256_xor_si256(c2[7846],simde_mm256_xor_si256(c2[4779],simde_mm256_xor_si256(c2[1699],simde_mm256_xor_si256(c2[5701],simde_mm256_xor_si256(c2[6936],simde_mm256_xor_si256(c2[3870],simde_mm256_xor_si256(c2[6329],simde_mm256_xor_si256(c2[3869],simde_mm256_xor_si256(c2[4499],simde_mm256_xor_si256(c2[1415],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[2045],simde_mm256_xor_si256(c2[4205],simde_mm256_xor_si256(c2[8820],simde_mm256_xor_si256(c2[1756],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[9156],simde_mm256_xor_si256(c2[4230],simde_mm256_xor_si256(c2[7309],simde_mm256_xor_si256(c2[7942],simde_mm256_xor_si256(c2[9174],simde_mm256_xor_si256(c2[547],simde_mm256_xor_si256(c2[1487],simde_mm256_xor_si256(c2[3950],simde_mm256_xor_si256(c2[8261],simde_mm256_xor_si256(c2[9202],simde_mm256_xor_si256(c2[7048],simde_mm256_xor_si256(c2[7047],simde_mm256_xor_si256(c2[9829],simde_mm256_xor_si256(c2[5517],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[2759],simde_mm256_xor_si256(c2[5842],c2[6765]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[147]=_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[998],_mm256_xor_si256(c2[8854],_mm256_xor_si256(c2[5825],c2[3377]))));
+     d2[147]=simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[998],simde_mm256_xor_si256(c2[8854],simde_mm256_xor_si256(c2[5825],c2[3377]))));
 
 //row: 22
-     d2[154]=_mm256_xor_si256(c2[6469],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[1106],c2[6402])));
+     d2[154]=simde_mm256_xor_si256(c2[6469],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[1106],c2[6402])));
 
 //row: 23
-     d2[161]=_mm256_xor_si256(c2[7408],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[2609],c2[8263])));
+     d2[161]=simde_mm256_xor_si256(c2[7408],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[2609],c2[8263])));
 
 //row: 24
-     d2[168]=_mm256_xor_si256(c2[8319],_mm256_xor_si256(c2[2773],_mm256_xor_si256(c2[3391],_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[5237],_mm256_xor_si256(c2[3714],_mm256_xor_si256(c2[2173],_mm256_xor_si256(c2[4330],_mm256_xor_si256(c2[8042],_mm256_xor_si256(c2[2805],_mm256_xor_si256(c2[8348],_mm256_xor_si256(c2[7132],_mm256_xor_si256(c2[5895],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[8978],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[3754],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[5922],_mm256_xor_si256(c2[5000],_mm256_xor_si256(c2[1936],_mm256_xor_si256(c2[6862],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[7799],_mm256_xor_si256(c2[8420],_mm256_xor_si256(c2[3179],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[6581],_mm256_xor_si256(c2[3812],_mm256_xor_si256(c2[8753],_mm256_xor_si256(c2[5984],_mm256_xor_si256(c2[4132],_mm256_xor_si256(c2[2915],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[9384],_mm256_xor_si256(c2[6317],_mm256_xor_si256(c2[3237],_mm256_xor_si256(c2[7239],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[5408],_mm256_xor_si256(c2[7874],_mm256_xor_si256(c2[5407],_mm256_xor_si256(c2[6037],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[4803],_mm256_xor_si256(c2[3590],_mm256_xor_si256(c2[5743],_mm256_xor_si256(c2[510],_mm256_xor_si256(c2[3294],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[2988],_mm256_xor_si256(c2[846],_mm256_xor_si256(c2[5768],_mm256_xor_si256(c2[8854],_mm256_xor_si256(c2[9480],_mm256_xor_si256(c2[857],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[3025],_mm256_xor_si256(c2[5488],_mm256_xor_si256(c2[9806],_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[8586],_mm256_xor_si256(c2[8585],_mm256_xor_si256(c2[1512],_mm256_xor_si256(c2[7062],_mm256_xor_si256(c2[8292],_mm256_xor_si256(c2[4304],_mm256_xor_si256(c2[7380],c2[8303]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[168]=simde_mm256_xor_si256(c2[8319],simde_mm256_xor_si256(c2[2773],simde_mm256_xor_si256(c2[3391],simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[5237],simde_mm256_xor_si256(c2[3714],simde_mm256_xor_si256(c2[2173],simde_mm256_xor_si256(c2[4330],simde_mm256_xor_si256(c2[8042],simde_mm256_xor_si256(c2[2805],simde_mm256_xor_si256(c2[8348],simde_mm256_xor_si256(c2[7132],simde_mm256_xor_si256(c2[5895],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[8978],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[3754],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[5922],simde_mm256_xor_si256(c2[5000],simde_mm256_xor_si256(c2[1936],simde_mm256_xor_si256(c2[6862],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[7799],simde_mm256_xor_si256(c2[8420],simde_mm256_xor_si256(c2[3179],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[6581],simde_mm256_xor_si256(c2[3812],simde_mm256_xor_si256(c2[8753],simde_mm256_xor_si256(c2[5984],simde_mm256_xor_si256(c2[4132],simde_mm256_xor_si256(c2[2915],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[9384],simde_mm256_xor_si256(c2[6317],simde_mm256_xor_si256(c2[3237],simde_mm256_xor_si256(c2[7239],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[5408],simde_mm256_xor_si256(c2[7874],simde_mm256_xor_si256(c2[5407],simde_mm256_xor_si256(c2[6037],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[4803],simde_mm256_xor_si256(c2[3590],simde_mm256_xor_si256(c2[5743],simde_mm256_xor_si256(c2[510],simde_mm256_xor_si256(c2[3294],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[2988],simde_mm256_xor_si256(c2[846],simde_mm256_xor_si256(c2[5768],simde_mm256_xor_si256(c2[8854],simde_mm256_xor_si256(c2[9480],simde_mm256_xor_si256(c2[857],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[3025],simde_mm256_xor_si256(c2[5488],simde_mm256_xor_si256(c2[9806],simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[8586],simde_mm256_xor_si256(c2[8585],simde_mm256_xor_si256(c2[1512],simde_mm256_xor_si256(c2[7062],simde_mm256_xor_si256(c2[8292],simde_mm256_xor_si256(c2[4304],simde_mm256_xor_si256(c2[7380],c2[8303]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 25
-     d2[175]=_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[1937],_mm256_xor_si256(c2[408],c2[5434])));
+     d2[175]=simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[1937],simde_mm256_xor_si256(c2[408],c2[5434])));
 
 //row: 26
-     d2[182]=_mm256_xor_si256(c2[3391],_mm256_xor_si256(c2[4961],_mm256_xor_si256(c2[6222],c2[9761])));
+     d2[182]=simde_mm256_xor_si256(c2[3391],simde_mm256_xor_si256(c2[4961],simde_mm256_xor_si256(c2[6222],c2[9761])));
 
 //row: 27
-     d2[189]=_mm256_xor_si256(c2[4948],_mm256_xor_si256(c2[4092],c2[4429]));
+     d2[189]=simde_mm256_xor_si256(c2[4948],simde_mm256_xor_si256(c2[4092],c2[4429]));
 
 //row: 28
-     d2[196]=_mm256_xor_si256(c2[5548],_mm256_xor_si256(c2[7760],_mm256_xor_si256(c2[7972],c2[5841])));
+     d2[196]=simde_mm256_xor_si256(c2[5548],simde_mm256_xor_si256(c2[7760],simde_mm256_xor_si256(c2[7972],c2[5841])));
 
 //row: 29
-     d2[203]=_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[5550],_mm256_xor_si256(c2[6161],_mm256_xor_si256(c2[4005],_mm256_xor_si256(c2[4313],_mm256_xor_si256(c2[6484],_mm256_xor_si256(c2[4943],_mm256_xor_si256(c2[6792],_mm256_xor_si256(c2[7100],_mm256_xor_si256(c2[6794],_mm256_xor_si256(c2[957],_mm256_xor_si256(c2[5575],_mm256_xor_si256(c2[1263],_mm256_xor_si256(c2[47],_mm256_xor_si256(c2[8672],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[3122],_mm256_xor_si256(c2[5911],_mm256_xor_si256(c2[6524],_mm256_xor_si256(c2[2830],_mm256_xor_si256(c2[3138],_mm256_xor_si256(c2[4694],_mm256_xor_si256(c2[8699],_mm256_xor_si256(c2[7770],_mm256_xor_si256(c2[4706],_mm256_xor_si256(c2[9632],_mm256_xor_si256(c2[5323],_mm256_xor_si256(c2[5631],_mm256_xor_si256(c2[714],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[5648],_mm256_xor_si256(c2[5956],_mm256_xor_si256(c2[3198],_mm256_xor_si256(c2[9358],_mm256_xor_si256(c2[6274],_mm256_xor_si256(c2[6582],_mm256_xor_si256(c2[1668],_mm256_xor_si256(c2[8754],_mm256_xor_si256(c2[6902],_mm256_xor_si256(c2[5685],_mm256_xor_si256(c2[3532],_mm256_xor_si256(c2[1991],_mm256_xor_si256(c2[2299],_mm256_xor_si256(c2[9087],_mm256_xor_si256(c2[6007],_mm256_xor_si256(c2[9708],_mm256_xor_si256(c2[154],_mm256_xor_si256(c2[8178],_mm256_xor_si256(c2[789],_mm256_xor_si256(c2[7869],_mm256_xor_si256(c2[8177],_mm256_xor_si256(c2[8807],_mm256_xor_si256(c2[5730],_mm256_xor_si256(c2[7272],_mm256_xor_si256(c2[7580],_mm256_xor_si256(c2[6360],_mm256_xor_si256(c2[8513],_mm256_xor_si256(c2[2972],_mm256_xor_si256(c2[3280],_mm256_xor_si256(c2[3584],_mm256_xor_si256(c2[6064],_mm256_xor_si256(c2[4524],_mm256_xor_si256(c2[5758],_mm256_xor_si256(c2[3616],_mm256_xor_si256(c2[8545],_mm256_xor_si256(c2[1461],_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[2395],_mm256_xor_si256(c2[3627],_mm256_xor_si256(c2[4554],_mm256_xor_si256(c2[4862],_mm256_xor_si256(c2[5802],_mm256_xor_si256(c2[8265],_mm256_xor_si256(c2[2413],_mm256_xor_si256(c2[2721],_mm256_xor_si256(c2[6721],_mm256_xor_si256(c2[3655],_mm256_xor_si256(c2[1501],_mm256_xor_si256(c2[1500],_mm256_xor_si256(c2[4289],_mm256_xor_si256(c2[9832],_mm256_xor_si256(c2[899],_mm256_xor_si256(c2[1207],_mm256_xor_si256(c2[7074],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[910],c2[1218]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[203]=simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[5550],simde_mm256_xor_si256(c2[6161],simde_mm256_xor_si256(c2[4005],simde_mm256_xor_si256(c2[4313],simde_mm256_xor_si256(c2[6484],simde_mm256_xor_si256(c2[4943],simde_mm256_xor_si256(c2[6792],simde_mm256_xor_si256(c2[7100],simde_mm256_xor_si256(c2[6794],simde_mm256_xor_si256(c2[957],simde_mm256_xor_si256(c2[5575],simde_mm256_xor_si256(c2[1263],simde_mm256_xor_si256(c2[47],simde_mm256_xor_si256(c2[8672],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[3122],simde_mm256_xor_si256(c2[5911],simde_mm256_xor_si256(c2[6524],simde_mm256_xor_si256(c2[2830],simde_mm256_xor_si256(c2[3138],simde_mm256_xor_si256(c2[4694],simde_mm256_xor_si256(c2[8699],simde_mm256_xor_si256(c2[7770],simde_mm256_xor_si256(c2[4706],simde_mm256_xor_si256(c2[9632],simde_mm256_xor_si256(c2[5323],simde_mm256_xor_si256(c2[5631],simde_mm256_xor_si256(c2[714],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[5648],simde_mm256_xor_si256(c2[5956],simde_mm256_xor_si256(c2[3198],simde_mm256_xor_si256(c2[9358],simde_mm256_xor_si256(c2[6274],simde_mm256_xor_si256(c2[6582],simde_mm256_xor_si256(c2[1668],simde_mm256_xor_si256(c2[8754],simde_mm256_xor_si256(c2[6902],simde_mm256_xor_si256(c2[5685],simde_mm256_xor_si256(c2[3532],simde_mm256_xor_si256(c2[1991],simde_mm256_xor_si256(c2[2299],simde_mm256_xor_si256(c2[9087],simde_mm256_xor_si256(c2[6007],simde_mm256_xor_si256(c2[9708],simde_mm256_xor_si256(c2[154],simde_mm256_xor_si256(c2[8178],simde_mm256_xor_si256(c2[789],simde_mm256_xor_si256(c2[7869],simde_mm256_xor_si256(c2[8177],simde_mm256_xor_si256(c2[8807],simde_mm256_xor_si256(c2[5730],simde_mm256_xor_si256(c2[7272],simde_mm256_xor_si256(c2[7580],simde_mm256_xor_si256(c2[6360],simde_mm256_xor_si256(c2[8513],simde_mm256_xor_si256(c2[2972],simde_mm256_xor_si256(c2[3280],simde_mm256_xor_si256(c2[3584],simde_mm256_xor_si256(c2[6064],simde_mm256_xor_si256(c2[4524],simde_mm256_xor_si256(c2[5758],simde_mm256_xor_si256(c2[3616],simde_mm256_xor_si256(c2[8545],simde_mm256_xor_si256(c2[1461],simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[2395],simde_mm256_xor_si256(c2[3627],simde_mm256_xor_si256(c2[4554],simde_mm256_xor_si256(c2[4862],simde_mm256_xor_si256(c2[5802],simde_mm256_xor_si256(c2[8265],simde_mm256_xor_si256(c2[2413],simde_mm256_xor_si256(c2[2721],simde_mm256_xor_si256(c2[6721],simde_mm256_xor_si256(c2[3655],simde_mm256_xor_si256(c2[1501],simde_mm256_xor_si256(c2[1500],simde_mm256_xor_si256(c2[4289],simde_mm256_xor_si256(c2[9832],simde_mm256_xor_si256(c2[899],simde_mm256_xor_si256(c2[1207],simde_mm256_xor_si256(c2[7074],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[910],c2[1218]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 30
-     d2[210]=_mm256_xor_si256(c2[8319],_mm256_xor_si256(c2[2773],_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[3391],_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[3714],_mm256_xor_si256(c2[1865],_mm256_xor_si256(c2[2173],_mm256_xor_si256(c2[4022],_mm256_xor_si256(c2[4330],_mm256_xor_si256(c2[8042],_mm256_xor_si256(c2[2805],_mm256_xor_si256(c2[8040],_mm256_xor_si256(c2[8348],_mm256_xor_si256(c2[7132],_mm256_xor_si256(c2[5895],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[3446],_mm256_xor_si256(c2[3754],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[5922],_mm256_xor_si256(c2[4692],_mm256_xor_si256(c2[5000],_mm256_xor_si256(c2[1936],_mm256_xor_si256(c2[6554],_mm256_xor_si256(c2[6862],_mm256_xor_si256(c2[2553],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[7799],_mm256_xor_si256(c2[8112],_mm256_xor_si256(c2[8420],_mm256_xor_si256(c2[2871],_mm256_xor_si256(c2[3179],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[6273],_mm256_xor_si256(c2[6581],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[3812],_mm256_xor_si256(c2[8753],_mm256_xor_si256(c2[5984],_mm256_xor_si256(c2[3824],_mm256_xor_si256(c2[4132],_mm256_xor_si256(c2[2915],_mm256_xor_si256(c2[454],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[9076],_mm256_xor_si256(c2[9384],_mm256_xor_si256(c2[760],_mm256_xor_si256(c2[6317],_mm256_xor_si256(c2[3237],_mm256_xor_si256(c2[6931],_mm256_xor_si256(c2[7239],_mm256_xor_si256(c2[5408],_mm256_xor_si256(c2[7874],_mm256_xor_si256(c2[5099],_mm256_xor_si256(c2[5407],_mm256_xor_si256(c2[6037],_mm256_xor_si256(c2[2652],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[4495],_mm256_xor_si256(c2[4803],_mm256_xor_si256(c2[5732],_mm256_xor_si256(c2[3590],_mm256_xor_si256(c2[5435],_mm256_xor_si256(c2[5743],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[510],_mm256_xor_si256(c2[3294],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[2680],_mm256_xor_si256(c2[2988],_mm256_xor_si256(c2[846],_mm256_xor_si256(c2[5768],_mm256_xor_si256(c2[8546],_mm256_xor_si256(c2[8854],_mm256_xor_si256(c2[9480],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[857],_mm256_xor_si256(c2[1784],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[3025],_mm256_xor_si256(c2[5180],_mm256_xor_si256(c2[5488],_mm256_xor_si256(c2[9498],_mm256_xor_si256(c2[9806],_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[8586],_mm256_xor_si256(c2[8277],_mm256_xor_si256(c2[8585],_mm256_xor_si256(c2[1512],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[7062],_mm256_xor_si256(c2[7984],_mm256_xor_si256(c2[8292],_mm256_xor_si256(c2[4304],_mm256_xor_si256(c2[7380],_mm256_xor_si256(c2[7995],c2[8303])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[210]=simde_mm256_xor_si256(c2[8319],simde_mm256_xor_si256(c2[2773],simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[3391],simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[3714],simde_mm256_xor_si256(c2[1865],simde_mm256_xor_si256(c2[2173],simde_mm256_xor_si256(c2[4022],simde_mm256_xor_si256(c2[4330],simde_mm256_xor_si256(c2[8042],simde_mm256_xor_si256(c2[2805],simde_mm256_xor_si256(c2[8040],simde_mm256_xor_si256(c2[8348],simde_mm256_xor_si256(c2[7132],simde_mm256_xor_si256(c2[5895],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[3446],simde_mm256_xor_si256(c2[3754],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[5922],simde_mm256_xor_si256(c2[4692],simde_mm256_xor_si256(c2[5000],simde_mm256_xor_si256(c2[1936],simde_mm256_xor_si256(c2[6554],simde_mm256_xor_si256(c2[6862],simde_mm256_xor_si256(c2[2553],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[7799],simde_mm256_xor_si256(c2[8112],simde_mm256_xor_si256(c2[8420],simde_mm256_xor_si256(c2[2871],simde_mm256_xor_si256(c2[3179],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[6273],simde_mm256_xor_si256(c2[6581],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[3812],simde_mm256_xor_si256(c2[8753],simde_mm256_xor_si256(c2[5984],simde_mm256_xor_si256(c2[3824],simde_mm256_xor_si256(c2[4132],simde_mm256_xor_si256(c2[2915],simde_mm256_xor_si256(c2[454],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[9076],simde_mm256_xor_si256(c2[9384],simde_mm256_xor_si256(c2[760],simde_mm256_xor_si256(c2[6317],simde_mm256_xor_si256(c2[3237],simde_mm256_xor_si256(c2[6931],simde_mm256_xor_si256(c2[7239],simde_mm256_xor_si256(c2[5408],simde_mm256_xor_si256(c2[7874],simde_mm256_xor_si256(c2[5099],simde_mm256_xor_si256(c2[5407],simde_mm256_xor_si256(c2[6037],simde_mm256_xor_si256(c2[2652],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[4495],simde_mm256_xor_si256(c2[4803],simde_mm256_xor_si256(c2[5732],simde_mm256_xor_si256(c2[3590],simde_mm256_xor_si256(c2[5435],simde_mm256_xor_si256(c2[5743],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[510],simde_mm256_xor_si256(c2[3294],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[2680],simde_mm256_xor_si256(c2[2988],simde_mm256_xor_si256(c2[846],simde_mm256_xor_si256(c2[5768],simde_mm256_xor_si256(c2[8546],simde_mm256_xor_si256(c2[8854],simde_mm256_xor_si256(c2[9480],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[857],simde_mm256_xor_si256(c2[1784],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[3025],simde_mm256_xor_si256(c2[5180],simde_mm256_xor_si256(c2[5488],simde_mm256_xor_si256(c2[9498],simde_mm256_xor_si256(c2[9806],simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[8586],simde_mm256_xor_si256(c2[8277],simde_mm256_xor_si256(c2[8585],simde_mm256_xor_si256(c2[1512],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[7062],simde_mm256_xor_si256(c2[7984],simde_mm256_xor_si256(c2[8292],simde_mm256_xor_si256(c2[4304],simde_mm256_xor_si256(c2[7380],simde_mm256_xor_si256(c2[7995],c2[8303])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 31
-     d2[217]=_mm256_xor_si256(c2[4933],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[9242],_mm256_xor_si256(c2[4313],_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[4931],_mm256_xor_si256(c2[8012],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[5254],_mm256_xor_si256(c2[8642],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[5562],_mm256_xor_si256(c2[5870],_mm256_xor_si256(c2[4639],_mm256_xor_si256(c2[4649],_mm256_xor_si256(c2[9582],_mm256_xor_si256(c2[9274],_mm256_xor_si256(c2[4345],_mm256_xor_si256(c2[4962],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[3739],_mm256_xor_si256(c2[8672],_mm256_xor_si256(c2[2509],_mm256_xor_si256(c2[7435],_mm256_xor_si256(c2[6821],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[1892],_mm256_xor_si256(c2[9610],_mm256_xor_si256(c2[4681],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[5294],_mm256_xor_si256(c2[6837],_mm256_xor_si256(c2[1600],_mm256_xor_si256(c2[1908],_mm256_xor_si256(c2[8386],_mm256_xor_si256(c2[3464],_mm256_xor_si256(c2[2536],_mm256_xor_si256(c2[7462],_mm256_xor_si256(c2[1614],_mm256_xor_si256(c2[6540],_mm256_xor_si256(c2[8405],_mm256_xor_si256(c2[3476],_mm256_xor_si256(c2[3476],_mm256_xor_si256(c2[8402],_mm256_xor_si256(c2[9330],_mm256_xor_si256(c2[4093],_mm256_xor_si256(c2[4401],_mm256_xor_si256(c2[4413],_mm256_xor_si256(c2[9339],_mm256_xor_si256(c2[5027],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[9648],_mm256_xor_si256(c2[4411],_mm256_xor_si256(c2[4719],_mm256_xor_si256(c2[5335],_mm256_xor_si256(c2[6890],_mm256_xor_si256(c2[1961],_mm256_xor_si256(c2[3195],_mm256_xor_si256(c2[8121],_mm256_xor_si256(c2[426],_mm256_xor_si256(c2[5044],_mm256_xor_si256(c2[5352],_mm256_xor_si256(c2[5367],_mm256_xor_si256(c2[438],_mm256_xor_si256(c2[2591],_mm256_xor_si256(c2[7524],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[5672],_mm256_xor_si256(c2[9384],_mm256_xor_si256(c2[4455],_mm256_xor_si256(c2[7224],_mm256_xor_si256(c2[2302],_mm256_xor_si256(c2[5998],_mm256_xor_si256(c2[761],_mm256_xor_si256(c2[1069],_mm256_xor_si256(c2[2931],_mm256_xor_si256(c2[7857],_mm256_xor_si256(c2[9706],_mm256_xor_si256(c2[4777],_mm256_xor_si256(c2[3853],_mm256_xor_si256(c2[8471],_mm256_xor_si256(c2[8779],_mm256_xor_si256(c2[2022],_mm256_xor_si256(c2[6948],_mm256_xor_si256(c2[4481],_mm256_xor_si256(c2[9414],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[6639],_mm256_xor_si256(c2[6947],_mm256_xor_si256(c2[2651],_mm256_xor_si256(c2[7577],_mm256_xor_si256(c2[9422],_mm256_xor_si256(c2[4500],_mm256_xor_si256(c2[1417],_mm256_xor_si256(c2[6035],_mm256_xor_si256(c2[6343],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[5130],_mm256_xor_si256(c2[2357],_mm256_xor_si256(c2[7283],_mm256_xor_si256(c2[6972],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[9763],_mm256_xor_si256(c2[4834],_mm256_xor_si256(c2[8223],_mm256_xor_si256(c2[3294],_mm256_xor_si256(c2[9450],_mm256_xor_si256(c2[4528],_mm256_xor_si256(c2[7308],_mm256_xor_si256(c2[2386],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[7308],_mm256_xor_si256(c2[5461],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[532],_mm256_xor_si256(c2[6094],_mm256_xor_si256(c2[1165],_mm256_xor_si256(c2[7326],_mm256_xor_si256(c2[2397],_mm256_xor_si256(c2[8554],_mm256_xor_si256(c2[3324],_mm256_xor_si256(c2[3632],_mm256_xor_si256(c2[9494],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[2102],_mm256_xor_si256(c2[7028],_mm256_xor_si256(c2[6413],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[7354],_mm256_xor_si256(c2[2425],_mm256_xor_si256(c2[5200],_mm256_xor_si256(c2[271],_mm256_xor_si256(c2[5199],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[7981],_mm256_xor_si256(c2[3052],_mm256_xor_si256(c2[3669],_mm256_xor_si256(c2[8602],_mm256_xor_si256(c2[4906],_mm256_xor_si256(c2[9524],_mm256_xor_si256(c2[9832],_mm256_xor_si256(c2[911],_mm256_xor_si256(c2[5844],_mm256_xor_si256(c2[3994],_mm256_xor_si256(c2[8920],_mm256_xor_si256(c2[4917],_mm256_xor_si256(c2[9535],c2[9843]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[217]=simde_mm256_xor_si256(c2[4933],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[9242],simde_mm256_xor_si256(c2[4313],simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[4931],simde_mm256_xor_si256(c2[8012],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[5254],simde_mm256_xor_si256(c2[8642],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[5562],simde_mm256_xor_si256(c2[5870],simde_mm256_xor_si256(c2[4639],simde_mm256_xor_si256(c2[4649],simde_mm256_xor_si256(c2[9582],simde_mm256_xor_si256(c2[9274],simde_mm256_xor_si256(c2[4345],simde_mm256_xor_si256(c2[4962],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[3739],simde_mm256_xor_si256(c2[8672],simde_mm256_xor_si256(c2[2509],simde_mm256_xor_si256(c2[7435],simde_mm256_xor_si256(c2[6821],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[1892],simde_mm256_xor_si256(c2[9610],simde_mm256_xor_si256(c2[4681],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[5294],simde_mm256_xor_si256(c2[6837],simde_mm256_xor_si256(c2[1600],simde_mm256_xor_si256(c2[1908],simde_mm256_xor_si256(c2[8386],simde_mm256_xor_si256(c2[3464],simde_mm256_xor_si256(c2[2536],simde_mm256_xor_si256(c2[7462],simde_mm256_xor_si256(c2[1614],simde_mm256_xor_si256(c2[6540],simde_mm256_xor_si256(c2[8405],simde_mm256_xor_si256(c2[3476],simde_mm256_xor_si256(c2[3476],simde_mm256_xor_si256(c2[8402],simde_mm256_xor_si256(c2[9330],simde_mm256_xor_si256(c2[4093],simde_mm256_xor_si256(c2[4401],simde_mm256_xor_si256(c2[4413],simde_mm256_xor_si256(c2[9339],simde_mm256_xor_si256(c2[5027],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[9648],simde_mm256_xor_si256(c2[4411],simde_mm256_xor_si256(c2[4719],simde_mm256_xor_si256(c2[5335],simde_mm256_xor_si256(c2[6890],simde_mm256_xor_si256(c2[1961],simde_mm256_xor_si256(c2[3195],simde_mm256_xor_si256(c2[8121],simde_mm256_xor_si256(c2[426],simde_mm256_xor_si256(c2[5044],simde_mm256_xor_si256(c2[5352],simde_mm256_xor_si256(c2[5367],simde_mm256_xor_si256(c2[438],simde_mm256_xor_si256(c2[2591],simde_mm256_xor_si256(c2[7524],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[5672],simde_mm256_xor_si256(c2[9384],simde_mm256_xor_si256(c2[4455],simde_mm256_xor_si256(c2[7224],simde_mm256_xor_si256(c2[2302],simde_mm256_xor_si256(c2[5998],simde_mm256_xor_si256(c2[761],simde_mm256_xor_si256(c2[1069],simde_mm256_xor_si256(c2[2931],simde_mm256_xor_si256(c2[7857],simde_mm256_xor_si256(c2[9706],simde_mm256_xor_si256(c2[4777],simde_mm256_xor_si256(c2[3853],simde_mm256_xor_si256(c2[8471],simde_mm256_xor_si256(c2[8779],simde_mm256_xor_si256(c2[2022],simde_mm256_xor_si256(c2[6948],simde_mm256_xor_si256(c2[4481],simde_mm256_xor_si256(c2[9414],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[6639],simde_mm256_xor_si256(c2[6947],simde_mm256_xor_si256(c2[2651],simde_mm256_xor_si256(c2[7577],simde_mm256_xor_si256(c2[9422],simde_mm256_xor_si256(c2[4500],simde_mm256_xor_si256(c2[1417],simde_mm256_xor_si256(c2[6035],simde_mm256_xor_si256(c2[6343],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[5130],simde_mm256_xor_si256(c2[2357],simde_mm256_xor_si256(c2[7283],simde_mm256_xor_si256(c2[6972],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[9763],simde_mm256_xor_si256(c2[4834],simde_mm256_xor_si256(c2[8223],simde_mm256_xor_si256(c2[3294],simde_mm256_xor_si256(c2[9450],simde_mm256_xor_si256(c2[4528],simde_mm256_xor_si256(c2[7308],simde_mm256_xor_si256(c2[2386],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[7308],simde_mm256_xor_si256(c2[5461],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[532],simde_mm256_xor_si256(c2[6094],simde_mm256_xor_si256(c2[1165],simde_mm256_xor_si256(c2[7326],simde_mm256_xor_si256(c2[2397],simde_mm256_xor_si256(c2[8554],simde_mm256_xor_si256(c2[3324],simde_mm256_xor_si256(c2[3632],simde_mm256_xor_si256(c2[9494],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[2102],simde_mm256_xor_si256(c2[7028],simde_mm256_xor_si256(c2[6413],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[7354],simde_mm256_xor_si256(c2[2425],simde_mm256_xor_si256(c2[5200],simde_mm256_xor_si256(c2[271],simde_mm256_xor_si256(c2[5199],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[7981],simde_mm256_xor_si256(c2[3052],simde_mm256_xor_si256(c2[3669],simde_mm256_xor_si256(c2[8602],simde_mm256_xor_si256(c2[4906],simde_mm256_xor_si256(c2[9524],simde_mm256_xor_si256(c2[9832],simde_mm256_xor_si256(c2[911],simde_mm256_xor_si256(c2[5844],simde_mm256_xor_si256(c2[3994],simde_mm256_xor_si256(c2[8920],simde_mm256_xor_si256(c2[4917],simde_mm256_xor_si256(c2[9535],c2[9843]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[224]=_mm256_xor_si256(c2[9240],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[4004],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[2156],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[4635],_mm256_xor_si256(c2[2786],_mm256_xor_si256(c2[3094],_mm256_xor_si256(c2[4943],_mm256_xor_si256(c2[5251],_mm256_xor_si256(c2[8963],_mm256_xor_si256(c2[3726],_mm256_xor_si256(c2[8961],_mm256_xor_si256(c2[9269],_mm256_xor_si256(c2[8053],_mm256_xor_si256(c2[6823],_mm256_xor_si256(c2[972],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[4062],_mm256_xor_si256(c2[4374],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[981],_mm256_xor_si256(c2[1289],_mm256_xor_si256(c2[2845],_mm256_xor_si256(c2[6850],_mm256_xor_si256(c2[5620],_mm256_xor_si256(c2[5928],_mm256_xor_si256(c2[2857],_mm256_xor_si256(c2[7482],_mm256_xor_si256(c2[7790],_mm256_xor_si256(c2[3474],_mm256_xor_si256(c2[3782],_mm256_xor_si256(c2[8727],_mm256_xor_si256(c2[9033],_mm256_xor_si256(c2[9341],_mm256_xor_si256(c2[3799],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[1349],_mm256_xor_si256(c2[7201],_mm256_xor_si256(c2[7509],_mm256_xor_si256(c2[4425],_mm256_xor_si256(c2[4733],_mm256_xor_si256(c2[9674],_mm256_xor_si256(c2[6905],_mm256_xor_si256(c2[4752],_mm256_xor_si256(c2[5060],_mm256_xor_si256(c2[3836],_mm256_xor_si256(c2[1375],_mm256_xor_si256(c2[1683],_mm256_xor_si256(c2[142],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[7238],_mm256_xor_si256(c2[4158],_mm256_xor_si256(c2[7859],_mm256_xor_si256(c2[8167],_mm256_xor_si256(c2[6329],_mm256_xor_si256(c2[8795],_mm256_xor_si256(c2[6020],_mm256_xor_si256(c2[6328],_mm256_xor_si256(c2[5410],_mm256_xor_si256(c2[6958],_mm256_xor_si256(c2[3573],_mm256_xor_si256(c2[3881],_mm256_xor_si256(c2[5423],_mm256_xor_si256(c2[5731],_mm256_xor_si256(c2[4511],_mm256_xor_si256(c2[6356],_mm256_xor_si256(c2[6664],_mm256_xor_si256(c2[1123],_mm256_xor_si256(c2[1431],_mm256_xor_si256(c2[8205],_mm256_xor_si256(c2[4215],_mm256_xor_si256(c2[2675],_mm256_xor_si256(c2[3601],_mm256_xor_si256(c2[3909],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[6696],_mm256_xor_si256(c2[9467],_mm256_xor_si256(c2[9775],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[1470],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[2705],_mm256_xor_si256(c2[3013],_mm256_xor_si256(c2[3953],_mm256_xor_si256(c2[6108],_mm256_xor_si256(c2[6416],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[872],_mm256_xor_si256(c2[1806],_mm256_xor_si256(c2[9507],_mm256_xor_si256(c2[9198],_mm256_xor_si256(c2[9506],_mm256_xor_si256(c2[2440],_mm256_xor_si256(c2[7675],_mm256_xor_si256(c2[7983],_mm256_xor_si256(c2[8905],_mm256_xor_si256(c2[9213],_mm256_xor_si256(c2[5225],_mm256_xor_si256(c2[8308],_mm256_xor_si256(c2[8923],c2[9231])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[224]=simde_mm256_xor_si256(c2[9240],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[4004],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[2156],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[4635],simde_mm256_xor_si256(c2[2786],simde_mm256_xor_si256(c2[3094],simde_mm256_xor_si256(c2[4943],simde_mm256_xor_si256(c2[5251],simde_mm256_xor_si256(c2[8963],simde_mm256_xor_si256(c2[3726],simde_mm256_xor_si256(c2[8961],simde_mm256_xor_si256(c2[9269],simde_mm256_xor_si256(c2[8053],simde_mm256_xor_si256(c2[6823],simde_mm256_xor_si256(c2[972],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[4062],simde_mm256_xor_si256(c2[4374],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[981],simde_mm256_xor_si256(c2[1289],simde_mm256_xor_si256(c2[2845],simde_mm256_xor_si256(c2[6850],simde_mm256_xor_si256(c2[5620],simde_mm256_xor_si256(c2[5928],simde_mm256_xor_si256(c2[2857],simde_mm256_xor_si256(c2[7482],simde_mm256_xor_si256(c2[7790],simde_mm256_xor_si256(c2[3474],simde_mm256_xor_si256(c2[3782],simde_mm256_xor_si256(c2[8727],simde_mm256_xor_si256(c2[9033],simde_mm256_xor_si256(c2[9341],simde_mm256_xor_si256(c2[3799],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[1349],simde_mm256_xor_si256(c2[7201],simde_mm256_xor_si256(c2[7509],simde_mm256_xor_si256(c2[4425],simde_mm256_xor_si256(c2[4733],simde_mm256_xor_si256(c2[9674],simde_mm256_xor_si256(c2[6905],simde_mm256_xor_si256(c2[4752],simde_mm256_xor_si256(c2[5060],simde_mm256_xor_si256(c2[3836],simde_mm256_xor_si256(c2[1375],simde_mm256_xor_si256(c2[1683],simde_mm256_xor_si256(c2[142],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[7238],simde_mm256_xor_si256(c2[4158],simde_mm256_xor_si256(c2[7859],simde_mm256_xor_si256(c2[8167],simde_mm256_xor_si256(c2[6329],simde_mm256_xor_si256(c2[8795],simde_mm256_xor_si256(c2[6020],simde_mm256_xor_si256(c2[6328],simde_mm256_xor_si256(c2[5410],simde_mm256_xor_si256(c2[6958],simde_mm256_xor_si256(c2[3573],simde_mm256_xor_si256(c2[3881],simde_mm256_xor_si256(c2[5423],simde_mm256_xor_si256(c2[5731],simde_mm256_xor_si256(c2[4511],simde_mm256_xor_si256(c2[6356],simde_mm256_xor_si256(c2[6664],simde_mm256_xor_si256(c2[1123],simde_mm256_xor_si256(c2[1431],simde_mm256_xor_si256(c2[8205],simde_mm256_xor_si256(c2[4215],simde_mm256_xor_si256(c2[2675],simde_mm256_xor_si256(c2[3601],simde_mm256_xor_si256(c2[3909],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[6696],simde_mm256_xor_si256(c2[9467],simde_mm256_xor_si256(c2[9775],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[1470],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[2705],simde_mm256_xor_si256(c2[3013],simde_mm256_xor_si256(c2[3953],simde_mm256_xor_si256(c2[6108],simde_mm256_xor_si256(c2[6416],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[872],simde_mm256_xor_si256(c2[1806],simde_mm256_xor_si256(c2[9507],simde_mm256_xor_si256(c2[9198],simde_mm256_xor_si256(c2[9506],simde_mm256_xor_si256(c2[2440],simde_mm256_xor_si256(c2[7675],simde_mm256_xor_si256(c2[7983],simde_mm256_xor_si256(c2[8905],simde_mm256_xor_si256(c2[9213],simde_mm256_xor_si256(c2[5225],simde_mm256_xor_si256(c2[8308],simde_mm256_xor_si256(c2[8923],c2[9231])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[231]=_mm256_xor_si256(c2[5561],_mm256_xor_si256(c2[5265],_mm256_xor_si256(c2[467],c2[3070])));
+     d2[231]=simde_mm256_xor_si256(c2[5561],simde_mm256_xor_si256(c2[5265],simde_mm256_xor_si256(c2[467],c2[3070])));
 
 //row: 34
-     d2[238]=_mm256_xor_si256(c2[5546],_mm256_xor_si256(c2[8111],_mm256_xor_si256(c2[1444],c2[7018])));
+     d2[238]=simde_mm256_xor_si256(c2[5546],simde_mm256_xor_si256(c2[8111],simde_mm256_xor_si256(c2[1444],c2[7018])));
 
 //row: 35
-     d2[245]=_mm256_xor_si256(c2[8013],_mm256_xor_si256(c2[2467],_mm256_xor_si256(c2[3085],_mm256_xor_si256(c2[1237],_mm256_xor_si256(c2[3408],_mm256_xor_si256(c2[1867],_mm256_xor_si256(c2[4024],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[7729],_mm256_xor_si256(c2[2492],_mm256_xor_si256(c2[8042],_mm256_xor_si256(c2[6819],_mm256_xor_si256(c2[5589],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[2828],_mm256_xor_si256(c2[3448],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[5616],_mm256_xor_si256(c2[4694],_mm256_xor_si256(c2[1630],_mm256_xor_si256(c2[6556],_mm256_xor_si256(c2[2548],_mm256_xor_si256(c2[4093],_mm256_xor_si256(c2[7493],_mm256_xor_si256(c2[8107],_mm256_xor_si256(c2[2873],_mm256_xor_si256(c2[115],_mm256_xor_si256(c2[6275],_mm256_xor_si256(c2[3506],_mm256_xor_si256(c2[8447],_mm256_xor_si256(c2[5671],_mm256_xor_si256(c2[3826],_mm256_xor_si256(c2[2609],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[9078],_mm256_xor_si256(c2[6011],_mm256_xor_si256(c2[2931],_mm256_xor_si256(c2[6933],_mm256_xor_si256(c2[5102],_mm256_xor_si256(c2[7561],_mm256_xor_si256(c2[5101],_mm256_xor_si256(c2[8176],_mm256_xor_si256(c2[5731],_mm256_xor_si256(c2[2647],_mm256_xor_si256(c2[4497],_mm256_xor_si256(c2[3277],_mm256_xor_si256(c2[5437],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[2988],_mm256_xor_si256(c2[1448],_mm256_xor_si256(c2[2675],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[5462],_mm256_xor_si256(c2[8541],_mm256_xor_si256(c2[9174],_mm256_xor_si256(c2[551],_mm256_xor_si256(c2[1779],_mm256_xor_si256(c2[2719],_mm256_xor_si256(c2[5182],_mm256_xor_si256(c2[9493],_mm256_xor_si256(c2[579],_mm256_xor_si256(c2[8280],_mm256_xor_si256(c2[8279],_mm256_xor_si256(c2[1206],_mm256_xor_si256(c2[6749],_mm256_xor_si256(c2[7986],_mm256_xor_si256(c2[3991],_mm256_xor_si256(c2[7074],c2[7997])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[245]=simde_mm256_xor_si256(c2[8013],simde_mm256_xor_si256(c2[2467],simde_mm256_xor_si256(c2[3085],simde_mm256_xor_si256(c2[1237],simde_mm256_xor_si256(c2[3408],simde_mm256_xor_si256(c2[1867],simde_mm256_xor_si256(c2[4024],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[7729],simde_mm256_xor_si256(c2[2492],simde_mm256_xor_si256(c2[8042],simde_mm256_xor_si256(c2[6819],simde_mm256_xor_si256(c2[5589],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[2828],simde_mm256_xor_si256(c2[3448],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[5616],simde_mm256_xor_si256(c2[4694],simde_mm256_xor_si256(c2[1630],simde_mm256_xor_si256(c2[6556],simde_mm256_xor_si256(c2[2548],simde_mm256_xor_si256(c2[4093],simde_mm256_xor_si256(c2[7493],simde_mm256_xor_si256(c2[8107],simde_mm256_xor_si256(c2[2873],simde_mm256_xor_si256(c2[115],simde_mm256_xor_si256(c2[6275],simde_mm256_xor_si256(c2[3506],simde_mm256_xor_si256(c2[8447],simde_mm256_xor_si256(c2[5671],simde_mm256_xor_si256(c2[3826],simde_mm256_xor_si256(c2[2609],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[9078],simde_mm256_xor_si256(c2[6011],simde_mm256_xor_si256(c2[2931],simde_mm256_xor_si256(c2[6933],simde_mm256_xor_si256(c2[5102],simde_mm256_xor_si256(c2[7561],simde_mm256_xor_si256(c2[5101],simde_mm256_xor_si256(c2[8176],simde_mm256_xor_si256(c2[5731],simde_mm256_xor_si256(c2[2647],simde_mm256_xor_si256(c2[4497],simde_mm256_xor_si256(c2[3277],simde_mm256_xor_si256(c2[5437],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[2988],simde_mm256_xor_si256(c2[1448],simde_mm256_xor_si256(c2[2675],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[5462],simde_mm256_xor_si256(c2[8541],simde_mm256_xor_si256(c2[9174],simde_mm256_xor_si256(c2[551],simde_mm256_xor_si256(c2[1779],simde_mm256_xor_si256(c2[2719],simde_mm256_xor_si256(c2[5182],simde_mm256_xor_si256(c2[9493],simde_mm256_xor_si256(c2[579],simde_mm256_xor_si256(c2[8280],simde_mm256_xor_si256(c2[8279],simde_mm256_xor_si256(c2[1206],simde_mm256_xor_si256(c2[6749],simde_mm256_xor_si256(c2[7986],simde_mm256_xor_si256(c2[3991],simde_mm256_xor_si256(c2[7074],c2[7997])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[252]=_mm256_xor_si256(c2[9246],_mm256_xor_si256(c2[9132],_mm256_xor_si256(c2[210],c2[2100])));
+     d2[252]=simde_mm256_xor_si256(c2[9246],simde_mm256_xor_si256(c2[9132],simde_mm256_xor_si256(c2[210],c2[2100])));
 
 //row: 37
-     d2[259]=_mm256_xor_si256(c2[2776],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[7393],_mm256_xor_si256(c2[8011],_mm256_xor_si256(c2[6163],_mm256_xor_si256(c2[8026],_mm256_xor_si256(c2[8334],_mm256_xor_si256(c2[6793],_mm256_xor_si256(c2[8950],_mm256_xor_si256(c2[5252],_mm256_xor_si256(c2[2492],_mm256_xor_si256(c2[2800],_mm256_xor_si256(c2[7425],_mm256_xor_si256(c2[3113],_mm256_xor_si256(c2[1582],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[7761],_mm256_xor_si256(c2[8374],_mm256_xor_si256(c2[4988],_mm256_xor_si256(c2[6236],_mm256_xor_si256(c2[6544],_mm256_xor_si256(c2[687],_mm256_xor_si256(c2[9620],_mm256_xor_si256(c2[6248],_mm256_xor_si256(c2[6556],_mm256_xor_si256(c2[1627],_mm256_xor_si256(c2[7481],_mm256_xor_si256(c2[2564],_mm256_xor_si256(c2[3178],_mm256_xor_si256(c2[7799],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[8432],_mm256_xor_si256(c2[3210],_mm256_xor_si256(c2[3518],_mm256_xor_si256(c2[742],_mm256_xor_si256(c2[8752],_mm256_xor_si256(c2[7227],_mm256_xor_si256(c2[7535],_mm256_xor_si256(c2[5382],_mm256_xor_si256(c2[4149],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[1082],_mm256_xor_si256(c2[7857],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[9720],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[2632],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[494],_mm256_xor_si256(c2[802],_mm256_xor_si256(c2[7580],_mm256_xor_si256(c2[9423],_mm256_xor_si256(c2[1112],_mm256_xor_si256(c2[8210],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[5130],_mm256_xor_si256(c2[7606],_mm256_xor_si256(c2[7914],_mm256_xor_si256(c2[6374],_mm256_xor_si256(c2[7608],_mm256_xor_si256(c2[5158],_mm256_xor_si256(c2[5466],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[3612],_mm256_xor_si256(c2[4245],_mm256_xor_si256(c2[5477],_mm256_xor_si256(c2[6712],_mm256_xor_si256(c2[7337],_mm256_xor_si256(c2[7645],_mm256_xor_si256(c2[253],_mm256_xor_si256(c2[4564],_mm256_xor_si256(c2[5197],_mm256_xor_si256(c2[5505],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[3350],_mm256_xor_si256(c2[5824],_mm256_xor_si256(c2[6132],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[3057],_mm256_xor_si256(c2[8616],_mm256_xor_si256(c2[8924],_mm256_xor_si256(c2[2145],c2[3068])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[259]=simde_mm256_xor_si256(c2[2776],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[7393],simde_mm256_xor_si256(c2[8011],simde_mm256_xor_si256(c2[6163],simde_mm256_xor_si256(c2[8026],simde_mm256_xor_si256(c2[8334],simde_mm256_xor_si256(c2[6793],simde_mm256_xor_si256(c2[8950],simde_mm256_xor_si256(c2[5252],simde_mm256_xor_si256(c2[2492],simde_mm256_xor_si256(c2[2800],simde_mm256_xor_si256(c2[7425],simde_mm256_xor_si256(c2[3113],simde_mm256_xor_si256(c2[1582],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[7761],simde_mm256_xor_si256(c2[8374],simde_mm256_xor_si256(c2[4988],simde_mm256_xor_si256(c2[6236],simde_mm256_xor_si256(c2[6544],simde_mm256_xor_si256(c2[687],simde_mm256_xor_si256(c2[9620],simde_mm256_xor_si256(c2[6248],simde_mm256_xor_si256(c2[6556],simde_mm256_xor_si256(c2[1627],simde_mm256_xor_si256(c2[7481],simde_mm256_xor_si256(c2[2564],simde_mm256_xor_si256(c2[3178],simde_mm256_xor_si256(c2[7799],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[8432],simde_mm256_xor_si256(c2[3210],simde_mm256_xor_si256(c2[3518],simde_mm256_xor_si256(c2[742],simde_mm256_xor_si256(c2[8752],simde_mm256_xor_si256(c2[7227],simde_mm256_xor_si256(c2[7535],simde_mm256_xor_si256(c2[5382],simde_mm256_xor_si256(c2[4149],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[1082],simde_mm256_xor_si256(c2[7857],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[9720],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[2632],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[494],simde_mm256_xor_si256(c2[802],simde_mm256_xor_si256(c2[7580],simde_mm256_xor_si256(c2[9423],simde_mm256_xor_si256(c2[1112],simde_mm256_xor_si256(c2[8210],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[5130],simde_mm256_xor_si256(c2[7606],simde_mm256_xor_si256(c2[7914],simde_mm256_xor_si256(c2[6374],simde_mm256_xor_si256(c2[7608],simde_mm256_xor_si256(c2[5158],simde_mm256_xor_si256(c2[5466],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[3612],simde_mm256_xor_si256(c2[4245],simde_mm256_xor_si256(c2[5477],simde_mm256_xor_si256(c2[6712],simde_mm256_xor_si256(c2[7337],simde_mm256_xor_si256(c2[7645],simde_mm256_xor_si256(c2[253],simde_mm256_xor_si256(c2[4564],simde_mm256_xor_si256(c2[5197],simde_mm256_xor_si256(c2[5505],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[3350],simde_mm256_xor_si256(c2[5824],simde_mm256_xor_si256(c2[6132],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[3057],simde_mm256_xor_si256(c2[8616],simde_mm256_xor_si256(c2[8924],simde_mm256_xor_si256(c2[2145],c2[3068])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[266]=_mm256_xor_si256(c2[8319],_mm256_xor_si256(c2[8136],_mm256_xor_si256(c2[2914],c2[3248])));
+     d2[266]=simde_mm256_xor_si256(c2[8319],simde_mm256_xor_si256(c2[8136],simde_mm256_xor_si256(c2[2914],c2[3248])));
 
 //row: 39
-     d2[273]=_mm256_xor_si256(c2[3710],_mm256_xor_si256(c2[4048],_mm256_xor_si256(c2[5335],c2[5813])));
+     d2[273]=simde_mm256_xor_si256(c2[3710],simde_mm256_xor_si256(c2[4048],simde_mm256_xor_si256(c2[5335],c2[5813])));
 
 //row: 40
-     d2[280]=_mm256_xor_si256(c2[926],_mm256_xor_si256(c2[4117],c2[241]));
+     d2[280]=simde_mm256_xor_si256(c2[926],simde_mm256_xor_si256(c2[4117],c2[241]));
 
 //row: 41
-     d2[287]=_mm256_xor_si256(c2[7098],_mm256_xor_si256(c2[7132],_mm256_xor_si256(c2[8751],c2[2413])));
+     d2[287]=simde_mm256_xor_si256(c2[7098],simde_mm256_xor_si256(c2[7132],simde_mm256_xor_si256(c2[8751],c2[2413])));
 
 //row: 42
-     d2[294]=_mm256_xor_si256(c2[4314],_mm256_xor_si256(c2[8630],_mm256_xor_si256(c2[8933],_mm256_xor_si256(c2[9241],_mm256_xor_si256(c2[7085],_mm256_xor_si256(c2[7393],_mm256_xor_si256(c2[5547],_mm256_xor_si256(c2[9564],_mm256_xor_si256(c2[7715],_mm256_xor_si256(c2[8023],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[8655],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[3127],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[5894],_mm256_xor_si256(c2[6202],_mm256_xor_si256(c2[8991],_mm256_xor_si256(c2[9296],_mm256_xor_si256(c2[9604],_mm256_xor_si256(c2[5910],_mm256_xor_si256(c2[6218],_mm256_xor_si256(c2[8374],_mm256_xor_si256(c2[7774],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[687],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[7786],_mm256_xor_si256(c2[2549],_mm256_xor_si256(c2[2857],_mm256_xor_si256(c2[8403],_mm256_xor_si256(c2[8711],_mm256_xor_si256(c2[3794],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[4415],_mm256_xor_si256(c2[8728],_mm256_xor_si256(c2[9036],_mm256_xor_si256(c2[6278],_mm256_xor_si256(c2[2268],_mm256_xor_si256(c2[2576],_mm256_xor_si256(c2[9354],_mm256_xor_si256(c2[9662],_mm256_xor_si256(c2[4748],_mm256_xor_si256(c2[1979],_mm256_xor_si256(c2[9674],_mm256_xor_si256(c2[127],_mm256_xor_si256(c2[8765],_mm256_xor_si256(c2[6304],_mm256_xor_si256(c2[6612],_mm256_xor_si256(c2[5071],_mm256_xor_si256(c2[5379],_mm256_xor_si256(c2[2312],_mm256_xor_si256(c2[9087],_mm256_xor_si256(c2[2926],_mm256_xor_si256(c2[3234],_mm256_xor_si256(c2[1403],_mm256_xor_si256(c2[3869],_mm256_xor_si256(c2[1094],_mm256_xor_si256(c2[1402],_mm256_xor_si256(c2[2032],_mm256_xor_si256(c2[8502],_mm256_xor_si256(c2[8810],_mm256_xor_si256(c2[490],_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[9440],_mm256_xor_si256(c2[1430],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[6052],_mm256_xor_si256(c2[6360],_mm256_xor_si256(c2[9144],_mm256_xor_si256(c2[7604],_mm256_xor_si256(c2[8530],_mm256_xor_si256(c2[8838],_mm256_xor_si256(c2[6696],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[4849],_mm256_xor_si256(c2[5475],_mm256_xor_si256(c2[6399],_mm256_xor_si256(c2[6707],_mm256_xor_si256(c2[7634],_mm256_xor_si256(c2[7942],_mm256_xor_si256(c2[8882],_mm256_xor_si256(c2[1182],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[5493],_mm256_xor_si256(c2[5801],_mm256_xor_si256(c2[6735],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[4272],_mm256_xor_si256(c2[4580],_mm256_xor_si256(c2[7369],_mm256_xor_si256(c2[2749],_mm256_xor_si256(c2[3057],_mm256_xor_si256(c2[3979],_mm256_xor_si256(c2[4287],_mm256_xor_si256(c2[299],_mm256_xor_si256(c2[3375],_mm256_xor_si256(c2[3990],c2[4298]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[294]=simde_mm256_xor_si256(c2[4314],simde_mm256_xor_si256(c2[8630],simde_mm256_xor_si256(c2[8933],simde_mm256_xor_si256(c2[9241],simde_mm256_xor_si256(c2[7085],simde_mm256_xor_si256(c2[7393],simde_mm256_xor_si256(c2[5547],simde_mm256_xor_si256(c2[9564],simde_mm256_xor_si256(c2[7715],simde_mm256_xor_si256(c2[8023],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[8655],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[3127],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[5894],simde_mm256_xor_si256(c2[6202],simde_mm256_xor_si256(c2[8991],simde_mm256_xor_si256(c2[9296],simde_mm256_xor_si256(c2[9604],simde_mm256_xor_si256(c2[5910],simde_mm256_xor_si256(c2[6218],simde_mm256_xor_si256(c2[8374],simde_mm256_xor_si256(c2[7774],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[687],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[7786],simde_mm256_xor_si256(c2[2549],simde_mm256_xor_si256(c2[2857],simde_mm256_xor_si256(c2[8403],simde_mm256_xor_si256(c2[8711],simde_mm256_xor_si256(c2[3794],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[4415],simde_mm256_xor_si256(c2[8728],simde_mm256_xor_si256(c2[9036],simde_mm256_xor_si256(c2[6278],simde_mm256_xor_si256(c2[2268],simde_mm256_xor_si256(c2[2576],simde_mm256_xor_si256(c2[9354],simde_mm256_xor_si256(c2[9662],simde_mm256_xor_si256(c2[4748],simde_mm256_xor_si256(c2[1979],simde_mm256_xor_si256(c2[9674],simde_mm256_xor_si256(c2[127],simde_mm256_xor_si256(c2[8765],simde_mm256_xor_si256(c2[6304],simde_mm256_xor_si256(c2[6612],simde_mm256_xor_si256(c2[5071],simde_mm256_xor_si256(c2[5379],simde_mm256_xor_si256(c2[2312],simde_mm256_xor_si256(c2[9087],simde_mm256_xor_si256(c2[2926],simde_mm256_xor_si256(c2[3234],simde_mm256_xor_si256(c2[1403],simde_mm256_xor_si256(c2[3869],simde_mm256_xor_si256(c2[1094],simde_mm256_xor_si256(c2[1402],simde_mm256_xor_si256(c2[2032],simde_mm256_xor_si256(c2[8502],simde_mm256_xor_si256(c2[8810],simde_mm256_xor_si256(c2[490],simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[9440],simde_mm256_xor_si256(c2[1430],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[6052],simde_mm256_xor_si256(c2[6360],simde_mm256_xor_si256(c2[9144],simde_mm256_xor_si256(c2[7604],simde_mm256_xor_si256(c2[8530],simde_mm256_xor_si256(c2[8838],simde_mm256_xor_si256(c2[6696],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[4849],simde_mm256_xor_si256(c2[5475],simde_mm256_xor_si256(c2[6399],simde_mm256_xor_si256(c2[6707],simde_mm256_xor_si256(c2[7634],simde_mm256_xor_si256(c2[7942],simde_mm256_xor_si256(c2[8882],simde_mm256_xor_si256(c2[1182],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[5493],simde_mm256_xor_si256(c2[5801],simde_mm256_xor_si256(c2[6735],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[4272],simde_mm256_xor_si256(c2[4580],simde_mm256_xor_si256(c2[7369],simde_mm256_xor_si256(c2[2749],simde_mm256_xor_si256(c2[3057],simde_mm256_xor_si256(c2[3979],simde_mm256_xor_si256(c2[4287],simde_mm256_xor_si256(c2[299],simde_mm256_xor_si256(c2[3375],simde_mm256_xor_si256(c2[3990],c2[4298]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 43
-     d2[301]=_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[4622],_mm256_xor_si256(c2[5240],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[3392],_mm256_xor_si256(c2[5563],_mm256_xor_si256(c2[4022],_mm256_xor_si256(c2[5871],_mm256_xor_si256(c2[6179],_mm256_xor_si256(c2[4332],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[4654],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[8974],_mm256_xor_si256(c2[7744],_mm256_xor_si256(c2[1893],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[4990],_mm256_xor_si256(c2[5603],_mm256_xor_si256(c2[1909],_mm256_xor_si256(c2[2217],_mm256_xor_si256(c2[3766],_mm256_xor_si256(c2[7771],_mm256_xor_si256(c2[6849],_mm256_xor_si256(c2[3785],_mm256_xor_si256(c2[8711],_mm256_xor_si256(c2[4402],_mm256_xor_si256(c2[4710],_mm256_xor_si256(c2[9648],_mm256_xor_si256(c2[407],_mm256_xor_si256(c2[4720],_mm256_xor_si256(c2[5028],_mm256_xor_si256(c2[2270],_mm256_xor_si256(c2[8430],_mm256_xor_si256(c2[5353],_mm256_xor_si256(c2[5661],_mm256_xor_si256(c2[747],_mm256_xor_si256(c2[7826],_mm256_xor_si256(c2[5981],_mm256_xor_si256(c2[4764],_mm256_xor_si256(c2[2604],_mm256_xor_si256(c2[1070],_mm256_xor_si256(c2[1378],_mm256_xor_si256(c2[8166],_mm256_xor_si256(c2[5086],_mm256_xor_si256(c2[8780],_mm256_xor_si256(c2[9088],_mm256_xor_si256(c2[7257],_mm256_xor_si256(c2[9716],_mm256_xor_si256(c2[6948],_mm256_xor_si256(c2[7256],_mm256_xor_si256(c2[7886],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[6344],_mm256_xor_si256(c2[6652],_mm256_xor_si256(c2[5432],_mm256_xor_si256(c2[7592],_mm256_xor_si256(c2[2044],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[5143],_mm256_xor_si256(c2[3603],_mm256_xor_si256(c2[4830],_mm256_xor_si256(c2[2688],_mm256_xor_si256(c2[7617],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[7000],_mm256_xor_si256(c2[1474],_mm256_xor_si256(c2[2706],_mm256_xor_si256(c2[3626],_mm256_xor_si256(c2[3934],_mm256_xor_si256(c2[4874],_mm256_xor_si256(c2[7337],_mm256_xor_si256(c2[1485],_mm256_xor_si256(c2[1793],_mm256_xor_si256(c2[2104],_mm256_xor_si256(c2[2734],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[579],_mm256_xor_si256(c2[3361],_mm256_xor_si256(c2[8904],_mm256_xor_si256(c2[9833],_mm256_xor_si256(c2[286],_mm256_xor_si256(c2[6146],_mm256_xor_si256(c2[9229],_mm256_xor_si256(c2[9844],c2[297]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[301]=simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[4622],simde_mm256_xor_si256(c2[5240],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[3392],simde_mm256_xor_si256(c2[5563],simde_mm256_xor_si256(c2[4022],simde_mm256_xor_si256(c2[5871],simde_mm256_xor_si256(c2[6179],simde_mm256_xor_si256(c2[4332],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[4654],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[8974],simde_mm256_xor_si256(c2[7744],simde_mm256_xor_si256(c2[1893],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[4990],simde_mm256_xor_si256(c2[5603],simde_mm256_xor_si256(c2[1909],simde_mm256_xor_si256(c2[2217],simde_mm256_xor_si256(c2[3766],simde_mm256_xor_si256(c2[7771],simde_mm256_xor_si256(c2[6849],simde_mm256_xor_si256(c2[3785],simde_mm256_xor_si256(c2[8711],simde_mm256_xor_si256(c2[4402],simde_mm256_xor_si256(c2[4710],simde_mm256_xor_si256(c2[9648],simde_mm256_xor_si256(c2[407],simde_mm256_xor_si256(c2[4720],simde_mm256_xor_si256(c2[5028],simde_mm256_xor_si256(c2[2270],simde_mm256_xor_si256(c2[8430],simde_mm256_xor_si256(c2[5353],simde_mm256_xor_si256(c2[5661],simde_mm256_xor_si256(c2[747],simde_mm256_xor_si256(c2[7826],simde_mm256_xor_si256(c2[5981],simde_mm256_xor_si256(c2[4764],simde_mm256_xor_si256(c2[2604],simde_mm256_xor_si256(c2[1070],simde_mm256_xor_si256(c2[1378],simde_mm256_xor_si256(c2[8166],simde_mm256_xor_si256(c2[5086],simde_mm256_xor_si256(c2[8780],simde_mm256_xor_si256(c2[9088],simde_mm256_xor_si256(c2[7257],simde_mm256_xor_si256(c2[9716],simde_mm256_xor_si256(c2[6948],simde_mm256_xor_si256(c2[7256],simde_mm256_xor_si256(c2[7886],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[6344],simde_mm256_xor_si256(c2[6652],simde_mm256_xor_si256(c2[5432],simde_mm256_xor_si256(c2[7592],simde_mm256_xor_si256(c2[2044],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[5143],simde_mm256_xor_si256(c2[3603],simde_mm256_xor_si256(c2[4830],simde_mm256_xor_si256(c2[2688],simde_mm256_xor_si256(c2[7617],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[7000],simde_mm256_xor_si256(c2[1474],simde_mm256_xor_si256(c2[2706],simde_mm256_xor_si256(c2[3626],simde_mm256_xor_si256(c2[3934],simde_mm256_xor_si256(c2[4874],simde_mm256_xor_si256(c2[7337],simde_mm256_xor_si256(c2[1485],simde_mm256_xor_si256(c2[1793],simde_mm256_xor_si256(c2[2104],simde_mm256_xor_si256(c2[2734],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[579],simde_mm256_xor_si256(c2[3361],simde_mm256_xor_si256(c2[8904],simde_mm256_xor_si256(c2[9833],simde_mm256_xor_si256(c2[286],simde_mm256_xor_si256(c2[6146],simde_mm256_xor_si256(c2[9229],simde_mm256_xor_si256(c2[9844],c2[297]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 44
-     d2[308]=_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[7704],_mm256_xor_si256(c2[8322],_mm256_xor_si256(c2[6474],_mm256_xor_si256(c2[6162],_mm256_xor_si256(c2[8638],_mm256_xor_si256(c2[7104],_mm256_xor_si256(c2[9254],_mm256_xor_si256(c2[3111],_mm256_xor_si256(c2[7729],_mm256_xor_si256(c2[3417],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[5283],_mm256_xor_si256(c2[8065],_mm256_xor_si256(c2[8685],_mm256_xor_si256(c2[5292],_mm256_xor_si256(c2[6848],_mm256_xor_si256(c2[998],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[6860],_mm256_xor_si256(c2[1938],_mm256_xor_si256(c2[7785],_mm256_xor_si256(c2[2875],_mm256_xor_si256(c2[3489],_mm256_xor_si256(c2[8110],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[5352],_mm256_xor_si256(c2[1657],_mm256_xor_si256(c2[8736],_mm256_xor_si256(c2[3822],_mm256_xor_si256(c2[1053],_mm256_xor_si256(c2[9063],_mm256_xor_si256(c2[2898],_mm256_xor_si256(c2[7846],_mm256_xor_si256(c2[5686],_mm256_xor_si256(c2[4453],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[8168],_mm256_xor_si256(c2[2315],_mm256_xor_si256(c2[477],_mm256_xor_si256(c2[2943],_mm256_xor_si256(c2[476],_mm256_xor_si256(c2[1106],_mm256_xor_si256(c2[7884],_mm256_xor_si256(c2[9734],_mm256_xor_si256(c2[8514],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[5434],_mm256_xor_si256(c2[8218],_mm256_xor_si256(c2[6678],_mm256_xor_si256(c2[7912],_mm256_xor_si256(c2[5770],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[3923],_mm256_xor_si256(c2[4556],_mm256_xor_si256(c2[5788],_mm256_xor_si256(c2[7016],_mm256_xor_si256(c2[7956],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[4875],_mm256_xor_si256(c2[5816],_mm256_xor_si256(c2[3655],_mm256_xor_si256(c2[3654],_mm256_xor_si256(c2[6443],_mm256_xor_si256(c2[2131],_mm256_xor_si256(c2[3361],_mm256_xor_si256(c2[9228],_mm256_xor_si256(c2[2456],c2[3379])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[308]=simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[7704],simde_mm256_xor_si256(c2[8322],simde_mm256_xor_si256(c2[6474],simde_mm256_xor_si256(c2[6162],simde_mm256_xor_si256(c2[8638],simde_mm256_xor_si256(c2[7104],simde_mm256_xor_si256(c2[9254],simde_mm256_xor_si256(c2[3111],simde_mm256_xor_si256(c2[7729],simde_mm256_xor_si256(c2[3417],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[5283],simde_mm256_xor_si256(c2[8065],simde_mm256_xor_si256(c2[8685],simde_mm256_xor_si256(c2[5292],simde_mm256_xor_si256(c2[6848],simde_mm256_xor_si256(c2[998],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[6860],simde_mm256_xor_si256(c2[1938],simde_mm256_xor_si256(c2[7785],simde_mm256_xor_si256(c2[2875],simde_mm256_xor_si256(c2[3489],simde_mm256_xor_si256(c2[8110],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[5352],simde_mm256_xor_si256(c2[1657],simde_mm256_xor_si256(c2[8736],simde_mm256_xor_si256(c2[3822],simde_mm256_xor_si256(c2[1053],simde_mm256_xor_si256(c2[9063],simde_mm256_xor_si256(c2[2898],simde_mm256_xor_si256(c2[7846],simde_mm256_xor_si256(c2[5686],simde_mm256_xor_si256(c2[4453],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[8168],simde_mm256_xor_si256(c2[2315],simde_mm256_xor_si256(c2[477],simde_mm256_xor_si256(c2[2943],simde_mm256_xor_si256(c2[476],simde_mm256_xor_si256(c2[1106],simde_mm256_xor_si256(c2[7884],simde_mm256_xor_si256(c2[9734],simde_mm256_xor_si256(c2[8514],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[5434],simde_mm256_xor_si256(c2[8218],simde_mm256_xor_si256(c2[6678],simde_mm256_xor_si256(c2[7912],simde_mm256_xor_si256(c2[5770],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[3923],simde_mm256_xor_si256(c2[4556],simde_mm256_xor_si256(c2[5788],simde_mm256_xor_si256(c2[7016],simde_mm256_xor_si256(c2[7956],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[4875],simde_mm256_xor_si256(c2[5816],simde_mm256_xor_si256(c2[3655],simde_mm256_xor_si256(c2[3654],simde_mm256_xor_si256(c2[6443],simde_mm256_xor_si256(c2[2131],simde_mm256_xor_si256(c2[3361],simde_mm256_xor_si256(c2[9228],simde_mm256_xor_si256(c2[2456],c2[3379])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 45
-     d2[315]=_mm256_xor_si256(c2[7411],_mm256_xor_si256(c2[7787],c2[9072]));
+     d2[315]=simde_mm256_xor_si256(c2[7411],simde_mm256_xor_si256(c2[7787],c2[9072]));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc256_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc256_byte.c
index 30c446cdbfd7fca8065707663f6814ce3f2b9554..90448aa121c0022cb57084fec2f3e7d72b4bf893 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc256_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc256_byte.c
@@ -11,141 +11,141 @@ static inline void ldpc256_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[9159],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[3523],_mm256_xor_si256(c2[8803],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[5299],_mm256_xor_si256(c2[8818],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[5319],_mm256_xor_si256(c2[8837],_mm256_xor_si256(c2[10964],_mm256_xor_si256(c2[7443],_mm256_xor_si256(c2[7090],_mm256_xor_si256(c2[9923],_mm256_xor_si256(c2[10977],_mm256_xor_si256(c2[7104],_mm256_xor_si256(c2[1491],_mm256_xor_si256(c2[2546],_mm256_xor_si256(c2[7475],_mm256_xor_si256(c2[3616],_mm256_xor_si256(c2[10306],_mm256_xor_si256(c2[7844],_mm256_xor_si256(c2[10678],_mm256_xor_si256(c2[1879],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[2947],_mm256_xor_si256(c2[6117],_mm256_xor_si256(c2[6823],_mm256_xor_si256(c2[9649],_mm256_xor_si256(c2[4725],_mm256_xor_si256(c2[11058],_mm256_xor_si256(c2[1927],_mm256_xor_si256(c2[2625],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[5107],_mm256_xor_si256(c2[10038],_mm256_xor_si256(c2[7922],_mm256_xor_si256(c2[11109],_mm256_xor_si256(c2[2307],_mm256_xor_si256(c2[7943],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[9718],_mm256_xor_si256(c2[4803],_mm256_xor_si256(c2[583],_mm256_xor_si256(c2[6918],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[1652],_mm256_xor_si256(c2[599],_mm256_xor_si256(c2[8352],_mm256_xor_si256(c2[5188],_mm256_xor_si256(c2[5895],_mm256_xor_si256(c2[9780],_mm256_xor_si256(c2[7671],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[10853],_mm256_xor_si256(c2[4870],_mm256_xor_si256(c2[7335],_mm256_xor_si256(c2[1361],_mm256_xor_si256(c2[11223],_mm256_xor_si256(c2[9815],_mm256_xor_si256(c2[5607],_mm256_xor_si256(c2[7715],_mm256_xor_si256(c2[5956],_mm256_xor_si256(c2[11248],_mm256_xor_si256(c2[10192],c2[4560]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[9159],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[3523],simde_mm256_xor_si256(c2[8803],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[5299],simde_mm256_xor_si256(c2[8818],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[5319],simde_mm256_xor_si256(c2[8837],simde_mm256_xor_si256(c2[10964],simde_mm256_xor_si256(c2[7443],simde_mm256_xor_si256(c2[7090],simde_mm256_xor_si256(c2[9923],simde_mm256_xor_si256(c2[10977],simde_mm256_xor_si256(c2[7104],simde_mm256_xor_si256(c2[1491],simde_mm256_xor_si256(c2[2546],simde_mm256_xor_si256(c2[7475],simde_mm256_xor_si256(c2[3616],simde_mm256_xor_si256(c2[10306],simde_mm256_xor_si256(c2[7844],simde_mm256_xor_si256(c2[10678],simde_mm256_xor_si256(c2[1879],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[2947],simde_mm256_xor_si256(c2[6117],simde_mm256_xor_si256(c2[6823],simde_mm256_xor_si256(c2[9649],simde_mm256_xor_si256(c2[4725],simde_mm256_xor_si256(c2[11058],simde_mm256_xor_si256(c2[1927],simde_mm256_xor_si256(c2[2625],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[5107],simde_mm256_xor_si256(c2[10038],simde_mm256_xor_si256(c2[7922],simde_mm256_xor_si256(c2[11109],simde_mm256_xor_si256(c2[2307],simde_mm256_xor_si256(c2[7943],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[9718],simde_mm256_xor_si256(c2[4803],simde_mm256_xor_si256(c2[583],simde_mm256_xor_si256(c2[6918],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[1652],simde_mm256_xor_si256(c2[599],simde_mm256_xor_si256(c2[8352],simde_mm256_xor_si256(c2[5188],simde_mm256_xor_si256(c2[5895],simde_mm256_xor_si256(c2[9780],simde_mm256_xor_si256(c2[7671],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[10853],simde_mm256_xor_si256(c2[4870],simde_mm256_xor_si256(c2[7335],simde_mm256_xor_si256(c2[1361],simde_mm256_xor_si256(c2[11223],simde_mm256_xor_si256(c2[9815],simde_mm256_xor_si256(c2[5607],simde_mm256_xor_si256(c2[7715],simde_mm256_xor_si256(c2[5956],simde_mm256_xor_si256(c2[11248],simde_mm256_xor_si256(c2[10192],c2[4560]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 1
-     d2[8]=_mm256_xor_si256(c2[9159],_mm256_xor_si256(c2[9511],_mm256_xor_si256(c2[1056],_mm256_xor_si256(c2[3875],_mm256_xor_si256(c2[9155],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[5651],_mm256_xor_si256(c2[9170],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[5671],_mm256_xor_si256(c2[9189],_mm256_xor_si256(c2[10964],_mm256_xor_si256(c2[53],_mm256_xor_si256(c2[7795],_mm256_xor_si256(c2[7442],_mm256_xor_si256(c2[10275],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[7456],_mm256_xor_si256(c2[1491],_mm256_xor_si256(c2[1843],_mm256_xor_si256(c2[2898],_mm256_xor_si256(c2[7827],_mm256_xor_si256(c2[3616],_mm256_xor_si256(c2[3968],_mm256_xor_si256(c2[10658],_mm256_xor_si256(c2[8196],_mm256_xor_si256(c2[11030],_mm256_xor_si256(c2[2231],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[3299],_mm256_xor_si256(c2[6469],_mm256_xor_si256(c2[7175],_mm256_xor_si256(c2[9649],_mm256_xor_si256(c2[10001],_mm256_xor_si256(c2[5077],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[1927],_mm256_xor_si256(c2[2279],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[3332],_mm256_xor_si256(c2[5107],_mm256_xor_si256(c2[5459],_mm256_xor_si256(c2[10390],_mm256_xor_si256(c2[8274],_mm256_xor_si256(c2[11109],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[2659],_mm256_xor_si256(c2[8295],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[3728],_mm256_xor_si256(c2[5492],_mm256_xor_si256(c2[10070],_mm256_xor_si256(c2[5155],_mm256_xor_si256(c2[935],_mm256_xor_si256(c2[7270],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[1654],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[8352],_mm256_xor_si256(c2[8704],_mm256_xor_si256(c2[5540],_mm256_xor_si256(c2[6247],_mm256_xor_si256(c2[10132],_mm256_xor_si256(c2[8023],_mm256_xor_si256(c2[4850],_mm256_xor_si256(c2[10853],_mm256_xor_si256(c2[11205],_mm256_xor_si256(c2[5222],_mm256_xor_si256(c2[7687],_mm256_xor_si256(c2[1361],_mm256_xor_si256(c2[1713],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[10167],_mm256_xor_si256(c2[5607],_mm256_xor_si256(c2[5959],_mm256_xor_si256(c2[8067],_mm256_xor_si256(c2[6308],_mm256_xor_si256(c2[11248],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[10544],c2[4912])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[8]=simde_mm256_xor_si256(c2[9159],simde_mm256_xor_si256(c2[9511],simde_mm256_xor_si256(c2[1056],simde_mm256_xor_si256(c2[3875],simde_mm256_xor_si256(c2[9155],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[5651],simde_mm256_xor_si256(c2[9170],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[5671],simde_mm256_xor_si256(c2[9189],simde_mm256_xor_si256(c2[10964],simde_mm256_xor_si256(c2[53],simde_mm256_xor_si256(c2[7795],simde_mm256_xor_si256(c2[7442],simde_mm256_xor_si256(c2[10275],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[7456],simde_mm256_xor_si256(c2[1491],simde_mm256_xor_si256(c2[1843],simde_mm256_xor_si256(c2[2898],simde_mm256_xor_si256(c2[7827],simde_mm256_xor_si256(c2[3616],simde_mm256_xor_si256(c2[3968],simde_mm256_xor_si256(c2[10658],simde_mm256_xor_si256(c2[8196],simde_mm256_xor_si256(c2[11030],simde_mm256_xor_si256(c2[2231],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[3299],simde_mm256_xor_si256(c2[6469],simde_mm256_xor_si256(c2[7175],simde_mm256_xor_si256(c2[9649],simde_mm256_xor_si256(c2[10001],simde_mm256_xor_si256(c2[5077],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[1927],simde_mm256_xor_si256(c2[2279],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[3332],simde_mm256_xor_si256(c2[5107],simde_mm256_xor_si256(c2[5459],simde_mm256_xor_si256(c2[10390],simde_mm256_xor_si256(c2[8274],simde_mm256_xor_si256(c2[11109],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[2659],simde_mm256_xor_si256(c2[8295],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[3728],simde_mm256_xor_si256(c2[5492],simde_mm256_xor_si256(c2[10070],simde_mm256_xor_si256(c2[5155],simde_mm256_xor_si256(c2[935],simde_mm256_xor_si256(c2[7270],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[1654],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[8352],simde_mm256_xor_si256(c2[8704],simde_mm256_xor_si256(c2[5540],simde_mm256_xor_si256(c2[6247],simde_mm256_xor_si256(c2[10132],simde_mm256_xor_si256(c2[8023],simde_mm256_xor_si256(c2[4850],simde_mm256_xor_si256(c2[10853],simde_mm256_xor_si256(c2[11205],simde_mm256_xor_si256(c2[5222],simde_mm256_xor_si256(c2[7687],simde_mm256_xor_si256(c2[1361],simde_mm256_xor_si256(c2[1713],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[10167],simde_mm256_xor_si256(c2[5607],simde_mm256_xor_si256(c2[5959],simde_mm256_xor_si256(c2[8067],simde_mm256_xor_si256(c2[6308],simde_mm256_xor_si256(c2[11248],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[10544],c2[4912])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[16]=_mm256_xor_si256(c2[9511],_mm256_xor_si256(c2[1056],_mm256_xor_si256(c2[3523],_mm256_xor_si256(c2[3875],_mm256_xor_si256(c2[8803],_mm256_xor_si256(c2[9155],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[5299],_mm256_xor_si256(c2[5651],_mm256_xor_si256(c2[8818],_mm256_xor_si256(c2[9170],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[5671],_mm256_xor_si256(c2[8837],_mm256_xor_si256(c2[9189],_mm256_xor_si256(c2[53],_mm256_xor_si256(c2[7795],_mm256_xor_si256(c2[7090],_mm256_xor_si256(c2[7442],_mm256_xor_si256(c2[10275],_mm256_xor_si256(c2[10977],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[7104],_mm256_xor_si256(c2[7456],_mm256_xor_si256(c2[1843],_mm256_xor_si256(c2[2898],_mm256_xor_si256(c2[7475],_mm256_xor_si256(c2[7827],_mm256_xor_si256(c2[3968],_mm256_xor_si256(c2[10306],_mm256_xor_si256(c2[10658],_mm256_xor_si256(c2[7844],_mm256_xor_si256(c2[8196],_mm256_xor_si256(c2[11030],_mm256_xor_si256(c2[1879],_mm256_xor_si256(c2[2231],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[3299],_mm256_xor_si256(c2[6117],_mm256_xor_si256(c2[6469],_mm256_xor_si256(c2[6823],_mm256_xor_si256(c2[7175],_mm256_xor_si256(c2[10001],_mm256_xor_si256(c2[5077],_mm256_xor_si256(c2[11058],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[2279],_mm256_xor_si256(c2[2625],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[3332],_mm256_xor_si256(c2[5459],_mm256_xor_si256(c2[10390],_mm256_xor_si256(c2[7922],_mm256_xor_si256(c2[8274],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[2659],_mm256_xor_si256(c2[7943],_mm256_xor_si256(c2[8295],_mm256_xor_si256(c2[3728],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[5492],_mm256_xor_si256(c2[9718],_mm256_xor_si256(c2[10070],_mm256_xor_si256(c2[5155],_mm256_xor_si256(c2[583],_mm256_xor_si256(c2[935],_mm256_xor_si256(c2[6918],_mm256_xor_si256(c2[7270],_mm256_xor_si256(c2[1654],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[599],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[8704],_mm256_xor_si256(c2[5540],_mm256_xor_si256(c2[5895],_mm256_xor_si256(c2[6247],_mm256_xor_si256(c2[10132],_mm256_xor_si256(c2[7671],_mm256_xor_si256(c2[8023],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[4850],_mm256_xor_si256(c2[11205],_mm256_xor_si256(c2[4870],_mm256_xor_si256(c2[5222],_mm256_xor_si256(c2[7335],_mm256_xor_si256(c2[7687],_mm256_xor_si256(c2[1713],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[9815],_mm256_xor_si256(c2[10167],_mm256_xor_si256(c2[5959],_mm256_xor_si256(c2[7715],_mm256_xor_si256(c2[8067],_mm256_xor_si256(c2[5956],_mm256_xor_si256(c2[6308],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[10544],_mm256_xor_si256(c2[4560],c2[4912]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[16]=simde_mm256_xor_si256(c2[9511],simde_mm256_xor_si256(c2[1056],simde_mm256_xor_si256(c2[3523],simde_mm256_xor_si256(c2[3875],simde_mm256_xor_si256(c2[8803],simde_mm256_xor_si256(c2[9155],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[5299],simde_mm256_xor_si256(c2[5651],simde_mm256_xor_si256(c2[8818],simde_mm256_xor_si256(c2[9170],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[5671],simde_mm256_xor_si256(c2[8837],simde_mm256_xor_si256(c2[9189],simde_mm256_xor_si256(c2[53],simde_mm256_xor_si256(c2[7795],simde_mm256_xor_si256(c2[7090],simde_mm256_xor_si256(c2[7442],simde_mm256_xor_si256(c2[10275],simde_mm256_xor_si256(c2[10977],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[7104],simde_mm256_xor_si256(c2[7456],simde_mm256_xor_si256(c2[1843],simde_mm256_xor_si256(c2[2898],simde_mm256_xor_si256(c2[7475],simde_mm256_xor_si256(c2[7827],simde_mm256_xor_si256(c2[3968],simde_mm256_xor_si256(c2[10306],simde_mm256_xor_si256(c2[10658],simde_mm256_xor_si256(c2[7844],simde_mm256_xor_si256(c2[8196],simde_mm256_xor_si256(c2[11030],simde_mm256_xor_si256(c2[1879],simde_mm256_xor_si256(c2[2231],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[3299],simde_mm256_xor_si256(c2[6117],simde_mm256_xor_si256(c2[6469],simde_mm256_xor_si256(c2[6823],simde_mm256_xor_si256(c2[7175],simde_mm256_xor_si256(c2[10001],simde_mm256_xor_si256(c2[5077],simde_mm256_xor_si256(c2[11058],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[2279],simde_mm256_xor_si256(c2[2625],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[3332],simde_mm256_xor_si256(c2[5459],simde_mm256_xor_si256(c2[10390],simde_mm256_xor_si256(c2[7922],simde_mm256_xor_si256(c2[8274],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[2659],simde_mm256_xor_si256(c2[7943],simde_mm256_xor_si256(c2[8295],simde_mm256_xor_si256(c2[3728],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[5492],simde_mm256_xor_si256(c2[9718],simde_mm256_xor_si256(c2[10070],simde_mm256_xor_si256(c2[5155],simde_mm256_xor_si256(c2[583],simde_mm256_xor_si256(c2[935],simde_mm256_xor_si256(c2[6918],simde_mm256_xor_si256(c2[7270],simde_mm256_xor_si256(c2[1654],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[599],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[8704],simde_mm256_xor_si256(c2[5540],simde_mm256_xor_si256(c2[5895],simde_mm256_xor_si256(c2[6247],simde_mm256_xor_si256(c2[10132],simde_mm256_xor_si256(c2[7671],simde_mm256_xor_si256(c2[8023],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[4850],simde_mm256_xor_si256(c2[11205],simde_mm256_xor_si256(c2[4870],simde_mm256_xor_si256(c2[5222],simde_mm256_xor_si256(c2[7335],simde_mm256_xor_si256(c2[7687],simde_mm256_xor_si256(c2[1713],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[9815],simde_mm256_xor_si256(c2[10167],simde_mm256_xor_si256(c2[5959],simde_mm256_xor_si256(c2[7715],simde_mm256_xor_si256(c2[8067],simde_mm256_xor_si256(c2[5956],simde_mm256_xor_si256(c2[6308],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[10544],simde_mm256_xor_si256(c2[4560],c2[4912]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[24]=_mm256_xor_si256(c2[9511],_mm256_xor_si256(c2[1056],_mm256_xor_si256(c2[3875],_mm256_xor_si256(c2[8803],_mm256_xor_si256(c2[9155],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[5651],_mm256_xor_si256(c2[8818],_mm256_xor_si256(c2[9170],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[5671],_mm256_xor_si256(c2[9189],_mm256_xor_si256(c2[53],_mm256_xor_si256(c2[7795],_mm256_xor_si256(c2[7090],_mm256_xor_si256(c2[7442],_mm256_xor_si256(c2[10275],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[7104],_mm256_xor_si256(c2[7456],_mm256_xor_si256(c2[1843],_mm256_xor_si256(c2[2898],_mm256_xor_si256(c2[7827],_mm256_xor_si256(c2[3968],_mm256_xor_si256(c2[10658],_mm256_xor_si256(c2[7844],_mm256_xor_si256(c2[8196],_mm256_xor_si256(c2[11030],_mm256_xor_si256(c2[2231],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[3299],_mm256_xor_si256(c2[6469],_mm256_xor_si256(c2[6823],_mm256_xor_si256(c2[7175],_mm256_xor_si256(c2[10001],_mm256_xor_si256(c2[5077],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[2279],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[3332],_mm256_xor_si256(c2[5459],_mm256_xor_si256(c2[10390],_mm256_xor_si256(c2[7922],_mm256_xor_si256(c2[8274],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[2659],_mm256_xor_si256(c2[7943],_mm256_xor_si256(c2[8295],_mm256_xor_si256(c2[3728],_mm256_xor_si256(c2[5492],_mm256_xor_si256(c2[9718],_mm256_xor_si256(c2[10070],_mm256_xor_si256(c2[5155],_mm256_xor_si256(c2[935],_mm256_xor_si256(c2[6918],_mm256_xor_si256(c2[7270],_mm256_xor_si256(c2[1654],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[8704],_mm256_xor_si256(c2[5540],_mm256_xor_si256(c2[5895],_mm256_xor_si256(c2[6247],_mm256_xor_si256(c2[10132],_mm256_xor_si256(c2[8023],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[4850],_mm256_xor_si256(c2[11205],_mm256_xor_si256(c2[5222],_mm256_xor_si256(c2[7335],_mm256_xor_si256(c2[7687],_mm256_xor_si256(c2[1713],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[10167],_mm256_xor_si256(c2[5959],_mm256_xor_si256(c2[8067],_mm256_xor_si256(c2[5956],_mm256_xor_si256(c2[6308],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[10544],_mm256_xor_si256(c2[4560],c2[4912])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[9511],simde_mm256_xor_si256(c2[1056],simde_mm256_xor_si256(c2[3875],simde_mm256_xor_si256(c2[8803],simde_mm256_xor_si256(c2[9155],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[5651],simde_mm256_xor_si256(c2[8818],simde_mm256_xor_si256(c2[9170],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[5671],simde_mm256_xor_si256(c2[9189],simde_mm256_xor_si256(c2[53],simde_mm256_xor_si256(c2[7795],simde_mm256_xor_si256(c2[7090],simde_mm256_xor_si256(c2[7442],simde_mm256_xor_si256(c2[10275],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[7104],simde_mm256_xor_si256(c2[7456],simde_mm256_xor_si256(c2[1843],simde_mm256_xor_si256(c2[2898],simde_mm256_xor_si256(c2[7827],simde_mm256_xor_si256(c2[3968],simde_mm256_xor_si256(c2[10658],simde_mm256_xor_si256(c2[7844],simde_mm256_xor_si256(c2[8196],simde_mm256_xor_si256(c2[11030],simde_mm256_xor_si256(c2[2231],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[3299],simde_mm256_xor_si256(c2[6469],simde_mm256_xor_si256(c2[6823],simde_mm256_xor_si256(c2[7175],simde_mm256_xor_si256(c2[10001],simde_mm256_xor_si256(c2[5077],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[2279],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[3332],simde_mm256_xor_si256(c2[5459],simde_mm256_xor_si256(c2[10390],simde_mm256_xor_si256(c2[7922],simde_mm256_xor_si256(c2[8274],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[2659],simde_mm256_xor_si256(c2[7943],simde_mm256_xor_si256(c2[8295],simde_mm256_xor_si256(c2[3728],simde_mm256_xor_si256(c2[5492],simde_mm256_xor_si256(c2[9718],simde_mm256_xor_si256(c2[10070],simde_mm256_xor_si256(c2[5155],simde_mm256_xor_si256(c2[935],simde_mm256_xor_si256(c2[6918],simde_mm256_xor_si256(c2[7270],simde_mm256_xor_si256(c2[1654],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[8704],simde_mm256_xor_si256(c2[5540],simde_mm256_xor_si256(c2[5895],simde_mm256_xor_si256(c2[6247],simde_mm256_xor_si256(c2[10132],simde_mm256_xor_si256(c2[8023],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[4850],simde_mm256_xor_si256(c2[11205],simde_mm256_xor_si256(c2[5222],simde_mm256_xor_si256(c2[7335],simde_mm256_xor_si256(c2[7687],simde_mm256_xor_si256(c2[1713],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[10167],simde_mm256_xor_si256(c2[5959],simde_mm256_xor_si256(c2[8067],simde_mm256_xor_si256(c2[5956],simde_mm256_xor_si256(c2[6308],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[10544],simde_mm256_xor_si256(c2[4560],c2[4912])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[32]=_mm256_xor_si256(c2[10212],c2[2131]);
+     d2[32]=simde_mm256_xor_si256(c2[10212],c2[2131]);
 
 //row: 5
-     d2[40]=_mm256_xor_si256(c2[4579],_mm256_xor_si256(c2[7395],_mm256_xor_si256(c2[10214],_mm256_xor_si256(c2[4231],_mm256_xor_si256(c2[4582],_mm256_xor_si256(c2[8469],_mm256_xor_si256(c2[727],_mm256_xor_si256(c2[4246],_mm256_xor_si256(c2[4247],_mm256_xor_si256(c2[7426],_mm256_xor_si256(c2[739],_mm256_xor_si256(c2[4257],_mm256_xor_si256(c2[6384],_mm256_xor_si256(c2[2871],_mm256_xor_si256(c2[2518],_mm256_xor_si256(c2[758],_mm256_xor_si256(c2[5351],_mm256_xor_si256(c2[6405],_mm256_xor_si256(c2[2532],_mm256_xor_si256(c2[8182],_mm256_xor_si256(c2[9237],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[10307],_mm256_xor_si256(c2[5734],_mm256_xor_si256(c2[3264],_mm256_xor_si256(c2[6098],_mm256_xor_si256(c2[8562],_mm256_xor_si256(c2[7863],_mm256_xor_si256(c2[9638],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[2243],_mm256_xor_si256(c2[5077],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[6486],_mm256_xor_si256(c2[8610],_mm256_xor_si256(c2[9316],_mm256_xor_si256(c2[9671],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[5458],_mm256_xor_si256(c2[3350],_mm256_xor_si256(c2[6529],_mm256_xor_si256(c2[8998],_mm256_xor_si256(c2[3363],_mm256_xor_si256(c2[2663],_mm256_xor_si256(c2[10067],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[5138],_mm256_xor_si256(c2[231],_mm256_xor_si256(c2[7266],_mm256_xor_si256(c2[2338],_mm256_xor_si256(c2[7985],_mm256_xor_si256(c2[8343],_mm256_xor_si256(c2[7282],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[1315],_mm256_xor_si256(c2[10112],_mm256_xor_si256(c2[5200],_mm256_xor_si256(c2[3091],_mm256_xor_si256(c2[11189],_mm256_xor_si256(c2[6273],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[2755],_mm256_xor_si256(c2[8052],_mm256_xor_si256(c2[6643],_mm256_xor_si256(c2[5235],_mm256_xor_si256(c2[1027],_mm256_xor_si256(c2[3143],_mm256_xor_si256(c2[1376],_mm256_xor_si256(c2[6676],_mm256_xor_si256(c2[5620],_mm256_xor_si256(c2[11251],c2[9843]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[40]=simde_mm256_xor_si256(c2[4579],simde_mm256_xor_si256(c2[7395],simde_mm256_xor_si256(c2[10214],simde_mm256_xor_si256(c2[4231],simde_mm256_xor_si256(c2[4582],simde_mm256_xor_si256(c2[8469],simde_mm256_xor_si256(c2[727],simde_mm256_xor_si256(c2[4246],simde_mm256_xor_si256(c2[4247],simde_mm256_xor_si256(c2[7426],simde_mm256_xor_si256(c2[739],simde_mm256_xor_si256(c2[4257],simde_mm256_xor_si256(c2[6384],simde_mm256_xor_si256(c2[2871],simde_mm256_xor_si256(c2[2518],simde_mm256_xor_si256(c2[758],simde_mm256_xor_si256(c2[5351],simde_mm256_xor_si256(c2[6405],simde_mm256_xor_si256(c2[2532],simde_mm256_xor_si256(c2[8182],simde_mm256_xor_si256(c2[9237],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[10307],simde_mm256_xor_si256(c2[5734],simde_mm256_xor_si256(c2[3264],simde_mm256_xor_si256(c2[6098],simde_mm256_xor_si256(c2[8562],simde_mm256_xor_si256(c2[7863],simde_mm256_xor_si256(c2[9638],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[2243],simde_mm256_xor_si256(c2[5077],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[6486],simde_mm256_xor_si256(c2[8610],simde_mm256_xor_si256(c2[9316],simde_mm256_xor_si256(c2[9671],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[5458],simde_mm256_xor_si256(c2[3350],simde_mm256_xor_si256(c2[6529],simde_mm256_xor_si256(c2[8998],simde_mm256_xor_si256(c2[3363],simde_mm256_xor_si256(c2[2663],simde_mm256_xor_si256(c2[10067],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[5138],simde_mm256_xor_si256(c2[231],simde_mm256_xor_si256(c2[7266],simde_mm256_xor_si256(c2[2338],simde_mm256_xor_si256(c2[7985],simde_mm256_xor_si256(c2[8343],simde_mm256_xor_si256(c2[7282],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[1315],simde_mm256_xor_si256(c2[10112],simde_mm256_xor_si256(c2[5200],simde_mm256_xor_si256(c2[3091],simde_mm256_xor_si256(c2[11189],simde_mm256_xor_si256(c2[6273],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[2755],simde_mm256_xor_si256(c2[8052],simde_mm256_xor_si256(c2[6643],simde_mm256_xor_si256(c2[5235],simde_mm256_xor_si256(c2[1027],simde_mm256_xor_si256(c2[3143],simde_mm256_xor_si256(c2[1376],simde_mm256_xor_si256(c2[6676],simde_mm256_xor_si256(c2[5620],simde_mm256_xor_si256(c2[11251],c2[9843]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[48]=_mm256_xor_si256(c2[8101],_mm256_xor_si256(c2[7840],_mm256_xor_si256(c2[10016],_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[7255],_mm256_xor_si256(c2[4144],_mm256_xor_si256(c2[10500],c2[7014])))))));
+     d2[48]=simde_mm256_xor_si256(c2[8101],simde_mm256_xor_si256(c2[7840],simde_mm256_xor_si256(c2[10016],simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[7255],simde_mm256_xor_si256(c2[4144],simde_mm256_xor_si256(c2[10500],c2[7014])))))));
 
 //row: 7
-     d2[56]=_mm256_xor_si256(c2[9862],_mm256_xor_si256(c2[4241],_mm256_xor_si256(c2[10980],_mm256_xor_si256(c2[11024],_mm256_xor_si256(c2[2597],c2[3043])))));
+     d2[56]=simde_mm256_xor_si256(c2[9862],simde_mm256_xor_si256(c2[4241],simde_mm256_xor_si256(c2[10980],simde_mm256_xor_si256(c2[11024],simde_mm256_xor_si256(c2[2597],c2[3043])))));
 
 //row: 8
-     d2[64]=_mm256_xor_si256(c2[3879],_mm256_xor_si256(c2[7394],_mm256_xor_si256(c2[6695],_mm256_xor_si256(c2[10210],_mm256_xor_si256(c2[9506],_mm256_xor_si256(c2[1414],_mm256_xor_si256(c2[1766],_mm256_xor_si256(c2[3523],_mm256_xor_si256(c2[6694],_mm256_xor_si256(c2[7046],_mm256_xor_si256(c2[5635],_mm256_xor_si256(c2[7761],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[19],_mm256_xor_si256(c2[3190],_mm256_xor_si256(c2[3542],_mm256_xor_si256(c2[3538],_mm256_xor_si256(c2[6709],_mm256_xor_si256(c2[7061],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[6726],_mm256_xor_si256(c2[10241],_mm256_xor_si256(c2[39],_mm256_xor_si256(c2[3554],_mm256_xor_si256(c2[3557],_mm256_xor_si256(c2[6720],_mm256_xor_si256(c2[7072],_mm256_xor_si256(c2[5684],_mm256_xor_si256(c2[9207],_mm256_xor_si256(c2[2163],_mm256_xor_si256(c2[5686],_mm256_xor_si256(c2[1810],_mm256_xor_si256(c2[4981],_mm256_xor_si256(c2[5333],_mm256_xor_si256(c2[2512],_mm256_xor_si256(c2[4643],_mm256_xor_si256(c2[8166],_mm256_xor_si256(c2[5697],_mm256_xor_si256(c2[8868],_mm256_xor_si256(c2[9220],_mm256_xor_si256(c2[1824],_mm256_xor_si256(c2[4995],_mm256_xor_si256(c2[5347],_mm256_xor_si256(c2[7474],_mm256_xor_si256(c2[10997],_mm256_xor_si256(c2[8529],_mm256_xor_si256(c2[789],_mm256_xor_si256(c2[2195],_mm256_xor_si256(c2[5366],_mm256_xor_si256(c2[5718],_mm256_xor_si256(c2[9607],_mm256_xor_si256(c2[1859],_mm256_xor_si256(c2[5026],_mm256_xor_si256(c2[8197],_mm256_xor_si256(c2[8549],_mm256_xor_si256(c2[2564],_mm256_xor_si256(c2[5735],_mm256_xor_si256(c2[6087],_mm256_xor_si256(c2[5398],_mm256_xor_si256(c2[8913],_mm256_xor_si256(c2[7862],_mm256_xor_si256(c2[11025],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[7155],_mm256_xor_si256(c2[10326],_mm256_xor_si256(c2[10678],_mm256_xor_si256(c2[8930],_mm256_xor_si256(c2[1190],_mm256_xor_si256(c2[837],_mm256_xor_si256(c2[4000],_mm256_xor_si256(c2[4352],_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[4706],_mm256_xor_si256(c2[5058],_mm256_xor_si256(c2[4369],_mm256_xor_si256(c2[7892],_mm256_xor_si256(c2[10708],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[5778],_mm256_xor_si256(c2[8949],_mm256_xor_si256(c2[9301],_mm256_xor_si256(c2[7910],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[8608],_mm256_xor_si256(c2[516],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[8963],_mm256_xor_si256(c2[871],_mm256_xor_si256(c2[1223],_mm256_xor_si256(c2[11090],_mm256_xor_si256(c2[3350],_mm256_xor_si256(c2[4758],_mm256_xor_si256(c2[8273],_mm256_xor_si256(c2[2642],_mm256_xor_si256(c2[5813],_mm256_xor_si256(c2[6165],_mm256_xor_si256(c2[5829],_mm256_xor_si256(c2[9344],_mm256_xor_si256(c2[8290],_mm256_xor_si256(c2[550],_mm256_xor_si256(c2[2663],_mm256_xor_si256(c2[5826],_mm256_xor_si256(c2[6178],_mm256_xor_si256(c2[6886],_mm256_xor_si256(c2[9367],_mm256_xor_si256(c2[1619],_mm256_xor_si256(c2[11123],_mm256_xor_si256(c2[3031],_mm256_xor_si256(c2[3383],_mm256_xor_si256(c2[4438],_mm256_xor_si256(c2[7601],_mm256_xor_si256(c2[7953],_mm256_xor_si256(c2[10786],_mm256_xor_si256(c2[3046],_mm256_xor_si256(c2[6566],_mm256_xor_si256(c2[9729],_mm256_xor_si256(c2[10081],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[4801],_mm256_xor_si256(c2[5153],_mm256_xor_si256(c2[7285],_mm256_xor_si256(c2[10800],_mm256_xor_si256(c2[7635],_mm256_xor_si256(c2[11158],_mm256_xor_si256(c2[6582],_mm256_xor_si256(c2[9745],_mm256_xor_si256(c2[10097],_mm256_xor_si256(c2[3072],_mm256_xor_si256(c2[6595],_mm256_xor_si256(c2[11171],_mm256_xor_si256(c2[3431],_mm256_xor_si256(c2[615],_mm256_xor_si256(c2[3778],_mm256_xor_si256(c2[4130],_mm256_xor_si256(c2[2371],_mm256_xor_si256(c2[4500],_mm256_xor_si256(c2[8023],_mm256_xor_si256(c2[2391],_mm256_xor_si256(c2[5554],_mm256_xor_si256(c2[5906],_mm256_xor_si256(c2[10481],_mm256_xor_si256(c2[2389],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[5573],_mm256_xor_si256(c2[9088],_mm256_xor_si256(c2[10853],_mm256_xor_si256(c2[2753],_mm256_xor_si256(c2[3105],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[5218],_mm256_xor_si256(c2[5570],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[10867],_mm256_xor_si256(c2[5943],_mm256_xor_si256(c2[9458],_mm256_xor_si256(c2[4535],_mm256_xor_si256(c2[7698],_mm256_xor_si256(c2[8050],_mm256_xor_si256(c2[1717],_mm256_xor_si256(c2[327],_mm256_xor_si256(c2[3842],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[5606],_mm256_xor_si256(c2[5958],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[3847],_mm256_xor_si256(c2[4199],_mm256_xor_si256(c2[5968],_mm256_xor_si256(c2[9491],_mm256_xor_si256(c2[4912],_mm256_xor_si256(c2[8435],_mm256_xor_si256(c2[10551],_mm256_xor_si256(c2[2451],_mm256_xor_si256(c2[2803],c2[4915]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[64]=simde_mm256_xor_si256(c2[3879],simde_mm256_xor_si256(c2[7394],simde_mm256_xor_si256(c2[6695],simde_mm256_xor_si256(c2[10210],simde_mm256_xor_si256(c2[9506],simde_mm256_xor_si256(c2[1414],simde_mm256_xor_si256(c2[1766],simde_mm256_xor_si256(c2[3523],simde_mm256_xor_si256(c2[6694],simde_mm256_xor_si256(c2[7046],simde_mm256_xor_si256(c2[5635],simde_mm256_xor_si256(c2[7761],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[19],simde_mm256_xor_si256(c2[3190],simde_mm256_xor_si256(c2[3542],simde_mm256_xor_si256(c2[3538],simde_mm256_xor_si256(c2[6709],simde_mm256_xor_si256(c2[7061],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[6726],simde_mm256_xor_si256(c2[10241],simde_mm256_xor_si256(c2[39],simde_mm256_xor_si256(c2[3554],simde_mm256_xor_si256(c2[3557],simde_mm256_xor_si256(c2[6720],simde_mm256_xor_si256(c2[7072],simde_mm256_xor_si256(c2[5684],simde_mm256_xor_si256(c2[9207],simde_mm256_xor_si256(c2[2163],simde_mm256_xor_si256(c2[5686],simde_mm256_xor_si256(c2[1810],simde_mm256_xor_si256(c2[4981],simde_mm256_xor_si256(c2[5333],simde_mm256_xor_si256(c2[2512],simde_mm256_xor_si256(c2[4643],simde_mm256_xor_si256(c2[8166],simde_mm256_xor_si256(c2[5697],simde_mm256_xor_si256(c2[8868],simde_mm256_xor_si256(c2[9220],simde_mm256_xor_si256(c2[1824],simde_mm256_xor_si256(c2[4995],simde_mm256_xor_si256(c2[5347],simde_mm256_xor_si256(c2[7474],simde_mm256_xor_si256(c2[10997],simde_mm256_xor_si256(c2[8529],simde_mm256_xor_si256(c2[789],simde_mm256_xor_si256(c2[2195],simde_mm256_xor_si256(c2[5366],simde_mm256_xor_si256(c2[5718],simde_mm256_xor_si256(c2[9607],simde_mm256_xor_si256(c2[1859],simde_mm256_xor_si256(c2[5026],simde_mm256_xor_si256(c2[8197],simde_mm256_xor_si256(c2[8549],simde_mm256_xor_si256(c2[2564],simde_mm256_xor_si256(c2[5735],simde_mm256_xor_si256(c2[6087],simde_mm256_xor_si256(c2[5398],simde_mm256_xor_si256(c2[8913],simde_mm256_xor_si256(c2[7862],simde_mm256_xor_si256(c2[11025],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[7155],simde_mm256_xor_si256(c2[10326],simde_mm256_xor_si256(c2[10678],simde_mm256_xor_si256(c2[8930],simde_mm256_xor_si256(c2[1190],simde_mm256_xor_si256(c2[837],simde_mm256_xor_si256(c2[4000],simde_mm256_xor_si256(c2[4352],simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[4706],simde_mm256_xor_si256(c2[5058],simde_mm256_xor_si256(c2[4369],simde_mm256_xor_si256(c2[7892],simde_mm256_xor_si256(c2[10708],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[5778],simde_mm256_xor_si256(c2[8949],simde_mm256_xor_si256(c2[9301],simde_mm256_xor_si256(c2[7910],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[8608],simde_mm256_xor_si256(c2[516],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[8963],simde_mm256_xor_si256(c2[871],simde_mm256_xor_si256(c2[1223],simde_mm256_xor_si256(c2[11090],simde_mm256_xor_si256(c2[3350],simde_mm256_xor_si256(c2[4758],simde_mm256_xor_si256(c2[8273],simde_mm256_xor_si256(c2[2642],simde_mm256_xor_si256(c2[5813],simde_mm256_xor_si256(c2[6165],simde_mm256_xor_si256(c2[5829],simde_mm256_xor_si256(c2[9344],simde_mm256_xor_si256(c2[8290],simde_mm256_xor_si256(c2[550],simde_mm256_xor_si256(c2[2663],simde_mm256_xor_si256(c2[5826],simde_mm256_xor_si256(c2[6178],simde_mm256_xor_si256(c2[6886],simde_mm256_xor_si256(c2[9367],simde_mm256_xor_si256(c2[1619],simde_mm256_xor_si256(c2[11123],simde_mm256_xor_si256(c2[3031],simde_mm256_xor_si256(c2[3383],simde_mm256_xor_si256(c2[4438],simde_mm256_xor_si256(c2[7601],simde_mm256_xor_si256(c2[7953],simde_mm256_xor_si256(c2[10786],simde_mm256_xor_si256(c2[3046],simde_mm256_xor_si256(c2[6566],simde_mm256_xor_si256(c2[9729],simde_mm256_xor_si256(c2[10081],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[4801],simde_mm256_xor_si256(c2[5153],simde_mm256_xor_si256(c2[7285],simde_mm256_xor_si256(c2[10800],simde_mm256_xor_si256(c2[7635],simde_mm256_xor_si256(c2[11158],simde_mm256_xor_si256(c2[6582],simde_mm256_xor_si256(c2[9745],simde_mm256_xor_si256(c2[10097],simde_mm256_xor_si256(c2[3072],simde_mm256_xor_si256(c2[6595],simde_mm256_xor_si256(c2[11171],simde_mm256_xor_si256(c2[3431],simde_mm256_xor_si256(c2[615],simde_mm256_xor_si256(c2[3778],simde_mm256_xor_si256(c2[4130],simde_mm256_xor_si256(c2[2371],simde_mm256_xor_si256(c2[4500],simde_mm256_xor_si256(c2[8023],simde_mm256_xor_si256(c2[2391],simde_mm256_xor_si256(c2[5554],simde_mm256_xor_si256(c2[5906],simde_mm256_xor_si256(c2[10481],simde_mm256_xor_si256(c2[2389],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[5573],simde_mm256_xor_si256(c2[9088],simde_mm256_xor_si256(c2[10853],simde_mm256_xor_si256(c2[2753],simde_mm256_xor_si256(c2[3105],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[5218],simde_mm256_xor_si256(c2[5570],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[10867],simde_mm256_xor_si256(c2[5943],simde_mm256_xor_si256(c2[9458],simde_mm256_xor_si256(c2[4535],simde_mm256_xor_si256(c2[7698],simde_mm256_xor_si256(c2[8050],simde_mm256_xor_si256(c2[1717],simde_mm256_xor_si256(c2[327],simde_mm256_xor_si256(c2[3842],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[5606],simde_mm256_xor_si256(c2[5958],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[3847],simde_mm256_xor_si256(c2[4199],simde_mm256_xor_si256(c2[5968],simde_mm256_xor_si256(c2[9491],simde_mm256_xor_si256(c2[4912],simde_mm256_xor_si256(c2[8435],simde_mm256_xor_si256(c2[10551],simde_mm256_xor_si256(c2[2451],simde_mm256_xor_si256(c2[2803],c2[4915]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[72]=_mm256_xor_si256(c2[2467],_mm256_xor_si256(c2[7765],_mm256_xor_si256(c2[4739],_mm256_xor_si256(c2[7568],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[5200],_mm256_xor_si256(c2[10497],c2[8774])))))));
+     d2[72]=simde_mm256_xor_si256(c2[2467],simde_mm256_xor_si256(c2[7765],simde_mm256_xor_si256(c2[4739],simde_mm256_xor_si256(c2[7568],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[5200],simde_mm256_xor_si256(c2[10497],c2[8774])))))));
 
 //row: 10
-     d2[80]=_mm256_xor_si256(c2[723],_mm256_xor_si256(c2[7428],_mm256_xor_si256(c2[2533],_mm256_xor_si256(c2[117],_mm256_xor_si256(c2[6113],c2[9377])))));
+     d2[80]=simde_mm256_xor_si256(c2[723],simde_mm256_xor_si256(c2[7428],simde_mm256_xor_si256(c2[2533],simde_mm256_xor_si256(c2[117],simde_mm256_xor_si256(c2[6113],c2[9377])))));
 
 //row: 11
-     d2[88]=_mm256_xor_si256(c2[7751],_mm256_xor_si256(c2[5632],_mm256_xor_si256(c2[5984],_mm256_xor_si256(c2[10567],_mm256_xor_si256(c2[8800],_mm256_xor_si256(c2[2115],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[7395],_mm256_xor_si256(c2[5636],_mm256_xor_si256(c2[4578],_mm256_xor_si256(c2[370],_mm256_xor_si256(c2[9522],_mm256_xor_si256(c2[9874],_mm256_xor_si256(c2[3891],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[7410],_mm256_xor_si256(c2[5651],_mm256_xor_si256(c2[3185],_mm256_xor_si256(c2[10598],_mm256_xor_si256(c2[8487],_mm256_xor_si256(c2[8839],_mm256_xor_si256(c2[3911],_mm256_xor_si256(c2[2144],_mm256_xor_si256(c2[7429],_mm256_xor_si256(c2[5670],_mm256_xor_si256(c2[9556],_mm256_xor_si256(c2[7445],_mm256_xor_si256(c2[7797],_mm256_xor_si256(c2[6035],_mm256_xor_si256(c2[4276],_mm256_xor_si256(c2[5682],_mm256_xor_si256(c2[3923],_mm256_xor_si256(c2[8515],_mm256_xor_si256(c2[6756],_mm256_xor_si256(c2[9569],_mm256_xor_si256(c2[7810],_mm256_xor_si256(c2[5696],_mm256_xor_si256(c2[3937],_mm256_xor_si256(c2[83],_mm256_xor_si256(c2[9235],_mm256_xor_si256(c2[9587],_mm256_xor_si256(c2[1138],_mm256_xor_si256(c2[10642],_mm256_xor_si256(c2[6067],_mm256_xor_si256(c2[4308],_mm256_xor_si256(c2[2208],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[8898],_mm256_xor_si256(c2[7139],_mm256_xor_si256(c2[6436],_mm256_xor_si256(c2[4677],_mm256_xor_si256(c2[9270],_mm256_xor_si256(c2[7511],_mm256_xor_si256(c2[471],_mm256_xor_si256(c2[9975],_mm256_xor_si256(c2[11027],_mm256_xor_si256(c2[9268],_mm256_xor_si256(c2[1539],_mm256_xor_si256(c2[11043],_mm256_xor_si256(c2[4709],_mm256_xor_si256(c2[2950],_mm256_xor_si256(c2[5415],_mm256_xor_si256(c2[3648],_mm256_xor_si256(c2[8241],_mm256_xor_si256(c2[6130],_mm256_xor_si256(c2[6482],_mm256_xor_si256(c2[3317],_mm256_xor_si256(c2[1558],_mm256_xor_si256(c2[9650],_mm256_xor_si256(c2[7891],_mm256_xor_si256(c2[519],_mm256_xor_si256(c2[9671],_mm256_xor_si256(c2[10023],_mm256_xor_si256(c2[1217],_mm256_xor_si256(c2[10721],_mm256_xor_si256(c2[1572],_mm256_xor_si256(c2[11076],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[1588],_mm256_xor_si256(c2[1940],_mm256_xor_si256(c2[8630],_mm256_xor_si256(c2[6871],_mm256_xor_si256(c2[6514],_mm256_xor_si256(c2[4755],_mm256_xor_si256(c2[9701],_mm256_xor_si256(c2[7590],_mm256_xor_si256(c2[7942],_mm256_xor_si256(c2[899],_mm256_xor_si256(c2[10403],_mm256_xor_si256(c2[6535],_mm256_xor_si256(c2[4768],_mm256_xor_si256(c2[6882],_mm256_xor_si256(c2[1968],_mm256_xor_si256(c2[11120],_mm256_xor_si256(c2[209],_mm256_xor_si256(c2[3732],_mm256_xor_si256(c2[1973],_mm256_xor_si256(c2[8310],_mm256_xor_si256(c2[6551],_mm256_xor_si256(c2[3395],_mm256_xor_si256(c2[1636],_mm256_xor_si256(c2[10438],_mm256_xor_si256(c2[8679],_mm256_xor_si256(c2[5510],_mm256_xor_si256(c2[3751],_mm256_xor_si256(c2[11157],_mm256_xor_si256(c2[9046],_mm256_xor_si256(c2[9398],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[9748],_mm256_xor_si256(c2[10454],_mm256_xor_si256(c2[8695],_mm256_xor_si256(c2[6944],_mm256_xor_si256(c2[4833],_mm256_xor_si256(c2[5185],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[4487],_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[8005],_mm256_xor_si256(c2[8372],_mm256_xor_si256(c2[6613],_mm256_xor_si256(c2[6263],_mm256_xor_si256(c2[4496],_mm256_xor_si256(c2[3090],_mm256_xor_si256(c2[1331],_mm256_xor_si256(c2[9445],_mm256_xor_si256(c2[7334],_mm256_xor_si256(c2[7686],_mm256_xor_si256(c2[3462],_mm256_xor_si256(c2[1703],_mm256_xor_si256(c2[5927],_mm256_xor_si256(c2[4160],_mm256_xor_si256(c2[11216],_mm256_xor_si256(c2[9105],_mm256_xor_si256(c2[9457],_mm256_xor_si256(c2[9815],_mm256_xor_si256(c2[8048],_mm256_xor_si256(c2[8407],_mm256_xor_si256(c2[6640],_mm256_xor_si256(c2[4199],_mm256_xor_si256(c2[2080],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[6307],_mm256_xor_si256(c2[4548],_mm256_xor_si256(c2[4548],_mm256_xor_si256(c2[2789],_mm256_xor_si256(c2[9840],_mm256_xor_si256(c2[7729],_mm256_xor_si256(c2[8081],_mm256_xor_si256(c2[8784],_mm256_xor_si256(c2[7025],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[1393],c2[5266])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[88]=simde_mm256_xor_si256(c2[7751],simde_mm256_xor_si256(c2[5632],simde_mm256_xor_si256(c2[5984],simde_mm256_xor_si256(c2[10567],simde_mm256_xor_si256(c2[8800],simde_mm256_xor_si256(c2[2115],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[7395],simde_mm256_xor_si256(c2[5636],simde_mm256_xor_si256(c2[4578],simde_mm256_xor_si256(c2[370],simde_mm256_xor_si256(c2[9522],simde_mm256_xor_si256(c2[9874],simde_mm256_xor_si256(c2[3891],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[7410],simde_mm256_xor_si256(c2[5651],simde_mm256_xor_si256(c2[3185],simde_mm256_xor_si256(c2[10598],simde_mm256_xor_si256(c2[8487],simde_mm256_xor_si256(c2[8839],simde_mm256_xor_si256(c2[3911],simde_mm256_xor_si256(c2[2144],simde_mm256_xor_si256(c2[7429],simde_mm256_xor_si256(c2[5670],simde_mm256_xor_si256(c2[9556],simde_mm256_xor_si256(c2[7445],simde_mm256_xor_si256(c2[7797],simde_mm256_xor_si256(c2[6035],simde_mm256_xor_si256(c2[4276],simde_mm256_xor_si256(c2[5682],simde_mm256_xor_si256(c2[3923],simde_mm256_xor_si256(c2[8515],simde_mm256_xor_si256(c2[6756],simde_mm256_xor_si256(c2[9569],simde_mm256_xor_si256(c2[7810],simde_mm256_xor_si256(c2[5696],simde_mm256_xor_si256(c2[3937],simde_mm256_xor_si256(c2[83],simde_mm256_xor_si256(c2[9235],simde_mm256_xor_si256(c2[9587],simde_mm256_xor_si256(c2[1138],simde_mm256_xor_si256(c2[10642],simde_mm256_xor_si256(c2[6067],simde_mm256_xor_si256(c2[4308],simde_mm256_xor_si256(c2[2208],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[8898],simde_mm256_xor_si256(c2[7139],simde_mm256_xor_si256(c2[6436],simde_mm256_xor_si256(c2[4677],simde_mm256_xor_si256(c2[9270],simde_mm256_xor_si256(c2[7511],simde_mm256_xor_si256(c2[471],simde_mm256_xor_si256(c2[9975],simde_mm256_xor_si256(c2[11027],simde_mm256_xor_si256(c2[9268],simde_mm256_xor_si256(c2[1539],simde_mm256_xor_si256(c2[11043],simde_mm256_xor_si256(c2[4709],simde_mm256_xor_si256(c2[2950],simde_mm256_xor_si256(c2[5415],simde_mm256_xor_si256(c2[3648],simde_mm256_xor_si256(c2[8241],simde_mm256_xor_si256(c2[6130],simde_mm256_xor_si256(c2[6482],simde_mm256_xor_si256(c2[3317],simde_mm256_xor_si256(c2[1558],simde_mm256_xor_si256(c2[9650],simde_mm256_xor_si256(c2[7891],simde_mm256_xor_si256(c2[519],simde_mm256_xor_si256(c2[9671],simde_mm256_xor_si256(c2[10023],simde_mm256_xor_si256(c2[1217],simde_mm256_xor_si256(c2[10721],simde_mm256_xor_si256(c2[1572],simde_mm256_xor_si256(c2[11076],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[1588],simde_mm256_xor_si256(c2[1940],simde_mm256_xor_si256(c2[8630],simde_mm256_xor_si256(c2[6871],simde_mm256_xor_si256(c2[6514],simde_mm256_xor_si256(c2[4755],simde_mm256_xor_si256(c2[9701],simde_mm256_xor_si256(c2[7590],simde_mm256_xor_si256(c2[7942],simde_mm256_xor_si256(c2[899],simde_mm256_xor_si256(c2[10403],simde_mm256_xor_si256(c2[6535],simde_mm256_xor_si256(c2[4768],simde_mm256_xor_si256(c2[6882],simde_mm256_xor_si256(c2[1968],simde_mm256_xor_si256(c2[11120],simde_mm256_xor_si256(c2[209],simde_mm256_xor_si256(c2[3732],simde_mm256_xor_si256(c2[1973],simde_mm256_xor_si256(c2[8310],simde_mm256_xor_si256(c2[6551],simde_mm256_xor_si256(c2[3395],simde_mm256_xor_si256(c2[1636],simde_mm256_xor_si256(c2[10438],simde_mm256_xor_si256(c2[8679],simde_mm256_xor_si256(c2[5510],simde_mm256_xor_si256(c2[3751],simde_mm256_xor_si256(c2[11157],simde_mm256_xor_si256(c2[9046],simde_mm256_xor_si256(c2[9398],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[9748],simde_mm256_xor_si256(c2[10454],simde_mm256_xor_si256(c2[8695],simde_mm256_xor_si256(c2[6944],simde_mm256_xor_si256(c2[4833],simde_mm256_xor_si256(c2[5185],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[4487],simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[8005],simde_mm256_xor_si256(c2[8372],simde_mm256_xor_si256(c2[6613],simde_mm256_xor_si256(c2[6263],simde_mm256_xor_si256(c2[4496],simde_mm256_xor_si256(c2[3090],simde_mm256_xor_si256(c2[1331],simde_mm256_xor_si256(c2[9445],simde_mm256_xor_si256(c2[7334],simde_mm256_xor_si256(c2[7686],simde_mm256_xor_si256(c2[3462],simde_mm256_xor_si256(c2[1703],simde_mm256_xor_si256(c2[5927],simde_mm256_xor_si256(c2[4160],simde_mm256_xor_si256(c2[11216],simde_mm256_xor_si256(c2[9105],simde_mm256_xor_si256(c2[9457],simde_mm256_xor_si256(c2[9815],simde_mm256_xor_si256(c2[8048],simde_mm256_xor_si256(c2[8407],simde_mm256_xor_si256(c2[6640],simde_mm256_xor_si256(c2[4199],simde_mm256_xor_si256(c2[2080],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[6307],simde_mm256_xor_si256(c2[4548],simde_mm256_xor_si256(c2[4548],simde_mm256_xor_si256(c2[2789],simde_mm256_xor_si256(c2[9840],simde_mm256_xor_si256(c2[7729],simde_mm256_xor_si256(c2[8081],simde_mm256_xor_si256(c2[8784],simde_mm256_xor_si256(c2[7025],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[1393],c2[5266])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[96]=_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[3537],_mm256_xor_si256(c2[7552],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[3735],c2[2752])))));
+     d2[96]=simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[3537],simde_mm256_xor_si256(c2[7552],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[3735],c2[2752])))));
 
 //row: 13
-     d2[104]=_mm256_xor_si256(c2[8449],_mm256_xor_si256(c2[8801],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[3173],_mm256_xor_si256(c2[8453],_mm256_xor_si256(c2[5989],_mm256_xor_si256(c2[1076],_mm256_xor_si256(c2[1428],_mm256_xor_si256(c2[4949],_mm256_xor_si256(c2[8468],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[385],_mm256_xor_si256(c2[4961],_mm256_xor_si256(c2[8487],_mm256_xor_si256(c2[10262],_mm256_xor_si256(c2[10614],_mm256_xor_si256(c2[7093],_mm256_xor_si256(c2[6740],_mm256_xor_si256(c2[8503],_mm256_xor_si256(c2[9573],_mm256_xor_si256(c2[10627],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[789],_mm256_xor_si256(c2[1141],_mm256_xor_si256(c2[2196],_mm256_xor_si256(c2[7125],_mm256_xor_si256(c2[2914],_mm256_xor_si256(c2[3266],_mm256_xor_si256(c2[9956],_mm256_xor_si256(c2[7494],_mm256_xor_si256(c2[10320],_mm256_xor_si256(c2[1521],_mm256_xor_si256(c2[822],_mm256_xor_si256(c2[8212],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[5767],_mm256_xor_si256(c2[6465],_mm256_xor_si256(c2[8947],_mm256_xor_si256(c2[9299],_mm256_xor_si256(c2[4375],_mm256_xor_si256(c2[10708],_mm256_xor_si256(c2[1217],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[2275],_mm256_xor_si256(c2[2630],_mm256_xor_si256(c2[4405],_mm256_xor_si256(c2[4757],_mm256_xor_si256(c2[9680],_mm256_xor_si256(c2[7572],_mm256_xor_si256(c2[10407],_mm256_xor_si256(c2[10759],_mm256_xor_si256(c2[1957],_mm256_xor_si256(c2[7585],_mm256_xor_si256(c2[2674],_mm256_xor_si256(c2[3026],_mm256_xor_si256(c2[4790],_mm256_xor_si256(c2[9360],_mm256_xor_si256(c2[4453],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[6560],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[7650],_mm256_xor_si256(c2[8002],_mm256_xor_si256(c2[4838],_mm256_xor_si256(c2[5537],_mm256_xor_si256(c2[9430],_mm256_xor_si256(c2[7313],_mm256_xor_si256(c2[4148],_mm256_xor_si256(c2[10151],_mm256_xor_si256(c2[10503],_mm256_xor_si256(c2[4512],_mm256_xor_si256(c2[6977],_mm256_xor_si256(c2[659],_mm256_xor_si256(c2[1011],_mm256_xor_si256(c2[10865],_mm256_xor_si256(c2[9457],_mm256_xor_si256(c2[4897],_mm256_xor_si256(c2[5249],_mm256_xor_si256(c2[7365],_mm256_xor_si256(c2[5606],_mm256_xor_si256(c2[9125],_mm256_xor_si256(c2[10546],_mm256_xor_si256(c2[10898],_mm256_xor_si256(c2[9842],c2[4210])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[104]=simde_mm256_xor_si256(c2[8449],simde_mm256_xor_si256(c2[8801],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[3173],simde_mm256_xor_si256(c2[8453],simde_mm256_xor_si256(c2[5989],simde_mm256_xor_si256(c2[1076],simde_mm256_xor_si256(c2[1428],simde_mm256_xor_si256(c2[4949],simde_mm256_xor_si256(c2[8468],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[385],simde_mm256_xor_si256(c2[4961],simde_mm256_xor_si256(c2[8487],simde_mm256_xor_si256(c2[10262],simde_mm256_xor_si256(c2[10614],simde_mm256_xor_si256(c2[7093],simde_mm256_xor_si256(c2[6740],simde_mm256_xor_si256(c2[8503],simde_mm256_xor_si256(c2[9573],simde_mm256_xor_si256(c2[10627],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[789],simde_mm256_xor_si256(c2[1141],simde_mm256_xor_si256(c2[2196],simde_mm256_xor_si256(c2[7125],simde_mm256_xor_si256(c2[2914],simde_mm256_xor_si256(c2[3266],simde_mm256_xor_si256(c2[9956],simde_mm256_xor_si256(c2[7494],simde_mm256_xor_si256(c2[10320],simde_mm256_xor_si256(c2[1521],simde_mm256_xor_si256(c2[822],simde_mm256_xor_si256(c2[8212],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[5767],simde_mm256_xor_si256(c2[6465],simde_mm256_xor_si256(c2[8947],simde_mm256_xor_si256(c2[9299],simde_mm256_xor_si256(c2[4375],simde_mm256_xor_si256(c2[10708],simde_mm256_xor_si256(c2[1217],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[2275],simde_mm256_xor_si256(c2[2630],simde_mm256_xor_si256(c2[4405],simde_mm256_xor_si256(c2[4757],simde_mm256_xor_si256(c2[9680],simde_mm256_xor_si256(c2[7572],simde_mm256_xor_si256(c2[10407],simde_mm256_xor_si256(c2[10759],simde_mm256_xor_si256(c2[1957],simde_mm256_xor_si256(c2[7585],simde_mm256_xor_si256(c2[2674],simde_mm256_xor_si256(c2[3026],simde_mm256_xor_si256(c2[4790],simde_mm256_xor_si256(c2[9360],simde_mm256_xor_si256(c2[4453],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[6560],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[7650],simde_mm256_xor_si256(c2[8002],simde_mm256_xor_si256(c2[4838],simde_mm256_xor_si256(c2[5537],simde_mm256_xor_si256(c2[9430],simde_mm256_xor_si256(c2[7313],simde_mm256_xor_si256(c2[4148],simde_mm256_xor_si256(c2[10151],simde_mm256_xor_si256(c2[10503],simde_mm256_xor_si256(c2[4512],simde_mm256_xor_si256(c2[6977],simde_mm256_xor_si256(c2[659],simde_mm256_xor_si256(c2[1011],simde_mm256_xor_si256(c2[10865],simde_mm256_xor_si256(c2[9457],simde_mm256_xor_si256(c2[4897],simde_mm256_xor_si256(c2[5249],simde_mm256_xor_si256(c2[7365],simde_mm256_xor_si256(c2[5606],simde_mm256_xor_si256(c2[9125],simde_mm256_xor_si256(c2[10546],simde_mm256_xor_si256(c2[10898],simde_mm256_xor_si256(c2[9842],c2[4210])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[112]=_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[8289],_mm256_xor_si256(c2[5174],_mm256_xor_si256(c2[11171],_mm256_xor_si256(c2[5904],c2[2103])))));
+     d2[112]=simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[8289],simde_mm256_xor_si256(c2[5174],simde_mm256_xor_si256(c2[11171],simde_mm256_xor_si256(c2[5904],c2[2103])))));
 
 //row: 15
-     d2[120]=_mm256_xor_si256(c2[4933],_mm256_xor_si256(c2[7749],_mm256_xor_si256(c2[10560],_mm256_xor_si256(c2[4225],_mm256_xor_si256(c2[4577],_mm256_xor_si256(c2[2817],_mm256_xor_si256(c2[8823],_mm256_xor_si256(c2[1073],_mm256_xor_si256(c2[4240],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[19],_mm256_xor_si256(c2[7780],_mm256_xor_si256(c2[1093],_mm256_xor_si256(c2[4611],_mm256_xor_si256(c2[6738],_mm256_xor_si256(c2[3217],_mm256_xor_si256(c2[2512],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[5697],_mm256_xor_si256(c2[6759],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[8528],_mm256_xor_si256(c2[9591],_mm256_xor_si256(c2[3249],_mm256_xor_si256(c2[10661],_mm256_xor_si256(c2[6080],_mm256_xor_si256(c2[3266],_mm256_xor_si256(c2[3618],_mm256_xor_si256(c2[6452],_mm256_xor_si256(c2[8916],_mm256_xor_si256(c2[7857],_mm256_xor_si256(c2[8209],_mm256_xor_si256(c2[9984],_mm256_xor_si256(c2[1891],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[5431],_mm256_xor_si256(c2[499],_mm256_xor_si256(c2[6832],_mm256_xor_si256(c2[8964],_mm256_xor_si256(c2[9670],_mm256_xor_si256(c2[9665],_mm256_xor_si256(c2[10017],_mm256_xor_si256(c2[514],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[5812],_mm256_xor_si256(c2[3344],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[6883],_mm256_xor_si256(c2[9344],_mm256_xor_si256(c2[3365],_mm256_xor_si256(c2[3717],_mm256_xor_si256(c2[10421],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[5492],_mm256_xor_si256(c2[11121],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[7620],_mm256_xor_si256(c2[2340],_mm256_xor_si256(c2[2692],_mm256_xor_si256(c2[8339],_mm256_xor_si256(c2[8689],_mm256_xor_si256(c2[7636],_mm256_xor_si256(c2[4134],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[1317],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[5554],_mm256_xor_si256(c2[3445],_mm256_xor_si256(c2[11191],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[6627],_mm256_xor_si256(c2[644],_mm256_xor_si256(c2[2757],_mm256_xor_si256(c2[3109],_mm256_xor_si256(c2[4162],_mm256_xor_si256(c2[8406],_mm256_xor_si256(c2[6997],_mm256_xor_si256(c2[5589],_mm256_xor_si256(c2[1381],_mm256_xor_si256(c2[3489],_mm256_xor_si256(c2[1378],_mm256_xor_si256(c2[1730],_mm256_xor_si256(c2[7030],_mm256_xor_si256(c2[5974],_mm256_xor_si256(c2[11253],c2[342]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[120]=simde_mm256_xor_si256(c2[4933],simde_mm256_xor_si256(c2[7749],simde_mm256_xor_si256(c2[10560],simde_mm256_xor_si256(c2[4225],simde_mm256_xor_si256(c2[4577],simde_mm256_xor_si256(c2[2817],simde_mm256_xor_si256(c2[8823],simde_mm256_xor_si256(c2[1073],simde_mm256_xor_si256(c2[4240],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[19],simde_mm256_xor_si256(c2[7780],simde_mm256_xor_si256(c2[1093],simde_mm256_xor_si256(c2[4611],simde_mm256_xor_si256(c2[6738],simde_mm256_xor_si256(c2[3217],simde_mm256_xor_si256(c2[2512],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[5697],simde_mm256_xor_si256(c2[6759],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[8528],simde_mm256_xor_si256(c2[9591],simde_mm256_xor_si256(c2[3249],simde_mm256_xor_si256(c2[10661],simde_mm256_xor_si256(c2[6080],simde_mm256_xor_si256(c2[3266],simde_mm256_xor_si256(c2[3618],simde_mm256_xor_si256(c2[6452],simde_mm256_xor_si256(c2[8916],simde_mm256_xor_si256(c2[7857],simde_mm256_xor_si256(c2[8209],simde_mm256_xor_si256(c2[9984],simde_mm256_xor_si256(c2[1891],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[5431],simde_mm256_xor_si256(c2[499],simde_mm256_xor_si256(c2[6832],simde_mm256_xor_si256(c2[8964],simde_mm256_xor_si256(c2[9670],simde_mm256_xor_si256(c2[9665],simde_mm256_xor_si256(c2[10017],simde_mm256_xor_si256(c2[514],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[5812],simde_mm256_xor_si256(c2[3344],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[6883],simde_mm256_xor_si256(c2[9344],simde_mm256_xor_si256(c2[3365],simde_mm256_xor_si256(c2[3717],simde_mm256_xor_si256(c2[10421],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[5492],simde_mm256_xor_si256(c2[11121],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[7620],simde_mm256_xor_si256(c2[2340],simde_mm256_xor_si256(c2[2692],simde_mm256_xor_si256(c2[8339],simde_mm256_xor_si256(c2[8689],simde_mm256_xor_si256(c2[7636],simde_mm256_xor_si256(c2[4134],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[1317],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[5554],simde_mm256_xor_si256(c2[3445],simde_mm256_xor_si256(c2[11191],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[6627],simde_mm256_xor_si256(c2[644],simde_mm256_xor_si256(c2[2757],simde_mm256_xor_si256(c2[3109],simde_mm256_xor_si256(c2[4162],simde_mm256_xor_si256(c2[8406],simde_mm256_xor_si256(c2[6997],simde_mm256_xor_si256(c2[5589],simde_mm256_xor_si256(c2[1381],simde_mm256_xor_si256(c2[3489],simde_mm256_xor_si256(c2[1378],simde_mm256_xor_si256(c2[1730],simde_mm256_xor_si256(c2[7030],simde_mm256_xor_si256(c2[5974],simde_mm256_xor_si256(c2[11253],c2[342]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[128]=_mm256_xor_si256(c2[7044],_mm256_xor_si256(c2[9860],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[6688],_mm256_xor_si256(c2[10934],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[6711],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[9891],_mm256_xor_si256(c2[3204],_mm256_xor_si256(c2[6722],_mm256_xor_si256(c2[8849],_mm256_xor_si256(c2[5328],_mm256_xor_si256(c2[4983],_mm256_xor_si256(c2[6033],_mm256_xor_si256(c2[7808],_mm256_xor_si256(c2[8870],_mm256_xor_si256(c2[4997],_mm256_xor_si256(c2[10647],_mm256_xor_si256(c2[439],_mm256_xor_si256(c2[5360],_mm256_xor_si256(c2[1509],_mm256_xor_si256(c2[8199],_mm256_xor_si256(c2[5729],_mm256_xor_si256(c2[8563],_mm256_xor_si256(c2[11027],_mm256_xor_si256(c2[10320],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[4002],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[7542],_mm256_xor_si256(c2[2610],_mm256_xor_si256(c2[8951],_mm256_xor_si256(c2[11075],_mm256_xor_si256(c2[518],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[2992],_mm256_xor_si256(c2[7923],_mm256_xor_si256(c2[5815],_mm256_xor_si256(c2[6161],_mm256_xor_si256(c2[8994],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[5828],_mm256_xor_si256(c2[1269],_mm256_xor_si256(c2[3025],_mm256_xor_si256(c2[7603],_mm256_xor_si256(c2[2688],_mm256_xor_si256(c2[9731],_mm256_xor_si256(c2[4803],_mm256_xor_si256(c2[10450],_mm256_xor_si256(c2[10800],_mm256_xor_si256(c2[9747],_mm256_xor_si256(c2[6245],_mm256_xor_si256(c2[3073],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[7665],_mm256_xor_si256(c2[5556],_mm256_xor_si256(c2[2391],_mm256_xor_si256(c2[8738],_mm256_xor_si256(c2[2755],_mm256_xor_si256(c2[5220],_mm256_xor_si256(c2[10517],_mm256_xor_si256(c2[9108],_mm256_xor_si256(c2[7700],_mm256_xor_si256(c2[3492],_mm256_xor_si256(c2[5600],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[7009],_mm256_xor_si256(c2[9141],_mm256_xor_si256(c2[8085],c2[2453]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[128]=simde_mm256_xor_si256(c2[7044],simde_mm256_xor_si256(c2[9860],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[6688],simde_mm256_xor_si256(c2[10934],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[6711],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[9891],simde_mm256_xor_si256(c2[3204],simde_mm256_xor_si256(c2[6722],simde_mm256_xor_si256(c2[8849],simde_mm256_xor_si256(c2[5328],simde_mm256_xor_si256(c2[4983],simde_mm256_xor_si256(c2[6033],simde_mm256_xor_si256(c2[7808],simde_mm256_xor_si256(c2[8870],simde_mm256_xor_si256(c2[4997],simde_mm256_xor_si256(c2[10647],simde_mm256_xor_si256(c2[439],simde_mm256_xor_si256(c2[5360],simde_mm256_xor_si256(c2[1509],simde_mm256_xor_si256(c2[8199],simde_mm256_xor_si256(c2[5729],simde_mm256_xor_si256(c2[8563],simde_mm256_xor_si256(c2[11027],simde_mm256_xor_si256(c2[10320],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[4002],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[7542],simde_mm256_xor_si256(c2[2610],simde_mm256_xor_si256(c2[8951],simde_mm256_xor_si256(c2[11075],simde_mm256_xor_si256(c2[518],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[2992],simde_mm256_xor_si256(c2[7923],simde_mm256_xor_si256(c2[5815],simde_mm256_xor_si256(c2[6161],simde_mm256_xor_si256(c2[8994],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[5828],simde_mm256_xor_si256(c2[1269],simde_mm256_xor_si256(c2[3025],simde_mm256_xor_si256(c2[7603],simde_mm256_xor_si256(c2[2688],simde_mm256_xor_si256(c2[9731],simde_mm256_xor_si256(c2[4803],simde_mm256_xor_si256(c2[10450],simde_mm256_xor_si256(c2[10800],simde_mm256_xor_si256(c2[9747],simde_mm256_xor_si256(c2[6245],simde_mm256_xor_si256(c2[3073],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[7665],simde_mm256_xor_si256(c2[5556],simde_mm256_xor_si256(c2[2391],simde_mm256_xor_si256(c2[8738],simde_mm256_xor_si256(c2[2755],simde_mm256_xor_si256(c2[5220],simde_mm256_xor_si256(c2[10517],simde_mm256_xor_si256(c2[9108],simde_mm256_xor_si256(c2[7700],simde_mm256_xor_si256(c2[3492],simde_mm256_xor_si256(c2[5600],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[7009],simde_mm256_xor_si256(c2[9141],simde_mm256_xor_si256(c2[8085],c2[2453]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[136]=_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[1637],_mm256_xor_si256(c2[9761],_mm256_xor_si256(c2[624],c2[5972]))));
+     d2[136]=simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[1637],simde_mm256_xor_si256(c2[9761],simde_mm256_xor_si256(c2[624],c2[5972]))));
 
 //row: 18
-     d2[144]=_mm256_xor_si256(c2[3537],_mm256_xor_si256(c2[3367],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[9796],c2[6996]))));
+     d2[144]=simde_mm256_xor_si256(c2[3537],simde_mm256_xor_si256(c2[3367],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[9796],c2[6996]))));
 
 //row: 19
-     d2[152]=_mm256_xor_si256(c2[9857],_mm256_xor_si256(c2[3186],_mm256_xor_si256(c2[2930],_mm256_xor_si256(c2[11043],c2[167]))));
+     d2[152]=simde_mm256_xor_si256(c2[9857],simde_mm256_xor_si256(c2[3186],simde_mm256_xor_si256(c2[2930],simde_mm256_xor_si256(c2[11043],c2[167]))));
 
 //row: 20
-     d2[160]=_mm256_xor_si256(c2[9156],_mm256_xor_si256(c2[709],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[8800],_mm256_xor_si256(c2[8100],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[5296],_mm256_xor_si256(c2[8823],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[5316],_mm256_xor_si256(c2[8834],_mm256_xor_si256(c2[10961],_mm256_xor_si256(c2[7440],_mm256_xor_si256(c2[7095],_mm256_xor_si256(c2[9205],_mm256_xor_si256(c2[9920],_mm256_xor_si256(c2[10982],_mm256_xor_si256(c2[7109],_mm256_xor_si256(c2[1488],_mm256_xor_si256(c2[2551],_mm256_xor_si256(c2[7472],_mm256_xor_si256(c2[3621],_mm256_xor_si256(c2[10311],_mm256_xor_si256(c2[7841],_mm256_xor_si256(c2[10675],_mm256_xor_si256(c2[1876],_mm256_xor_si256(c2[1169],_mm256_xor_si256(c2[2944],_mm256_xor_si256(c2[6114],_mm256_xor_si256(c2[6820],_mm256_xor_si256(c2[9654],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[11063],_mm256_xor_si256(c2[8950],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[2630],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[5104],_mm256_xor_si256(c2[10035],_mm256_xor_si256(c2[7927],_mm256_xor_si256(c2[5457],_mm256_xor_si256(c2[11106],_mm256_xor_si256(c2[2304],_mm256_xor_si256(c2[7940],_mm256_xor_si256(c2[3381],_mm256_xor_si256(c2[5137],_mm256_xor_si256(c2[9715],_mm256_xor_si256(c2[4800],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[6915],_mm256_xor_si256(c2[1299],_mm256_xor_si256(c2[1649],_mm256_xor_si256(c2[596],_mm256_xor_si256(c2[8357],_mm256_xor_si256(c2[5185],_mm256_xor_si256(c2[5892],_mm256_xor_si256(c2[9777],_mm256_xor_si256(c2[7668],_mm256_xor_si256(c2[4503],_mm256_xor_si256(c2[10850],_mm256_xor_si256(c2[4867],_mm256_xor_si256(c2[7332],_mm256_xor_si256(c2[1366],_mm256_xor_si256(c2[11220],_mm256_xor_si256(c2[9812],_mm256_xor_si256(c2[5604],_mm256_xor_si256(c2[7712],_mm256_xor_si256(c2[5953],_mm256_xor_si256(c2[11253],_mm256_xor_si256(c2[10197],c2[4565]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[160]=simde_mm256_xor_si256(c2[9156],simde_mm256_xor_si256(c2[709],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[8800],simde_mm256_xor_si256(c2[8100],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[5296],simde_mm256_xor_si256(c2[8823],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[5316],simde_mm256_xor_si256(c2[8834],simde_mm256_xor_si256(c2[10961],simde_mm256_xor_si256(c2[7440],simde_mm256_xor_si256(c2[7095],simde_mm256_xor_si256(c2[9205],simde_mm256_xor_si256(c2[9920],simde_mm256_xor_si256(c2[10982],simde_mm256_xor_si256(c2[7109],simde_mm256_xor_si256(c2[1488],simde_mm256_xor_si256(c2[2551],simde_mm256_xor_si256(c2[7472],simde_mm256_xor_si256(c2[3621],simde_mm256_xor_si256(c2[10311],simde_mm256_xor_si256(c2[7841],simde_mm256_xor_si256(c2[10675],simde_mm256_xor_si256(c2[1876],simde_mm256_xor_si256(c2[1169],simde_mm256_xor_si256(c2[2944],simde_mm256_xor_si256(c2[6114],simde_mm256_xor_si256(c2[6820],simde_mm256_xor_si256(c2[9654],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[11063],simde_mm256_xor_si256(c2[8950],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[2630],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[5104],simde_mm256_xor_si256(c2[10035],simde_mm256_xor_si256(c2[7927],simde_mm256_xor_si256(c2[5457],simde_mm256_xor_si256(c2[11106],simde_mm256_xor_si256(c2[2304],simde_mm256_xor_si256(c2[7940],simde_mm256_xor_si256(c2[3381],simde_mm256_xor_si256(c2[5137],simde_mm256_xor_si256(c2[9715],simde_mm256_xor_si256(c2[4800],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[6915],simde_mm256_xor_si256(c2[1299],simde_mm256_xor_si256(c2[1649],simde_mm256_xor_si256(c2[596],simde_mm256_xor_si256(c2[8357],simde_mm256_xor_si256(c2[5185],simde_mm256_xor_si256(c2[5892],simde_mm256_xor_si256(c2[9777],simde_mm256_xor_si256(c2[7668],simde_mm256_xor_si256(c2[4503],simde_mm256_xor_si256(c2[10850],simde_mm256_xor_si256(c2[4867],simde_mm256_xor_si256(c2[7332],simde_mm256_xor_si256(c2[1366],simde_mm256_xor_si256(c2[11220],simde_mm256_xor_si256(c2[9812],simde_mm256_xor_si256(c2[5604],simde_mm256_xor_si256(c2[7712],simde_mm256_xor_si256(c2[5953],simde_mm256_xor_si256(c2[11253],simde_mm256_xor_si256(c2[10197],c2[4565]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[168]=_mm256_xor_si256(c2[8823],_mm256_xor_si256(c2[8883],_mm256_xor_si256(c2[4835],_mm256_xor_si256(c2[1380],c2[4213]))));
+     d2[168]=simde_mm256_xor_si256(c2[8823],simde_mm256_xor_si256(c2[8883],simde_mm256_xor_si256(c2[4835],simde_mm256_xor_si256(c2[1380],c2[4213]))));
 
 //row: 22
-     d2[176]=_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[5124],_mm256_xor_si256(c2[10069],c2[10836])));
+     d2[176]=simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[5124],simde_mm256_xor_si256(c2[10069],c2[10836])));
 
 //row: 23
-     d2[184]=_mm256_xor_si256(c2[9876],_mm256_xor_si256(c2[6724],_mm256_xor_si256(c2[3685],c2[8740])));
+     d2[184]=simde_mm256_xor_si256(c2[9876],simde_mm256_xor_si256(c2[6724],simde_mm256_xor_si256(c2[3685],c2[8740])));
 
 //row: 24
-     d2[192]=_mm256_xor_si256(c2[8454],_mm256_xor_si256(c2[7],_mm256_xor_si256(c2[2818],_mm256_xor_si256(c2[8098],_mm256_xor_si256(c2[5635],_mm256_xor_si256(c2[1073],_mm256_xor_si256(c2[4594],_mm256_xor_si256(c2[8113],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[4614],_mm256_xor_si256(c2[8132],_mm256_xor_si256(c2[10259],_mm256_xor_si256(c2[6738],_mm256_xor_si256(c2[6385],_mm256_xor_si256(c2[7794],_mm256_xor_si256(c2[9218],_mm256_xor_si256(c2[10272],_mm256_xor_si256(c2[6407],_mm256_xor_si256(c2[4295],_mm256_xor_si256(c2[786],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[6770],_mm256_xor_si256(c2[2919],_mm256_xor_si256(c2[9601],_mm256_xor_si256(c2[7139],_mm256_xor_si256(c2[9973],_mm256_xor_si256(c2[1174],_mm256_xor_si256(c2[467],_mm256_xor_si256(c2[2242],_mm256_xor_si256(c2[5412],_mm256_xor_si256(c2[6118],_mm256_xor_si256(c2[8944],_mm256_xor_si256(c2[4020],_mm256_xor_si256(c2[10353],_mm256_xor_si256(c2[1222],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[2275],_mm256_xor_si256(c2[4402],_mm256_xor_si256(c2[9333],_mm256_xor_si256(c2[7217],_mm256_xor_si256(c2[7219],_mm256_xor_si256(c2[10404],_mm256_xor_si256(c2[1602],_mm256_xor_si256(c2[7238],_mm256_xor_si256(c2[2679],_mm256_xor_si256(c2[4435],_mm256_xor_si256(c2[9013],_mm256_xor_si256(c2[4098],_mm256_xor_si256(c2[11141],_mm256_xor_si256(c2[6213],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[947],_mm256_xor_si256(c2[11157],_mm256_xor_si256(c2[7655],_mm256_xor_si256(c2[4483],_mm256_xor_si256(c2[5190],_mm256_xor_si256(c2[9075],_mm256_xor_si256(c2[6966],_mm256_xor_si256(c2[3793],_mm256_xor_si256(c2[10148],_mm256_xor_si256(c2[4165],_mm256_xor_si256(c2[6630],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[10518],_mm256_xor_si256(c2[9110],_mm256_xor_si256(c2[4902],_mm256_xor_si256(c2[7010],_mm256_xor_si256(c2[5251],_mm256_xor_si256(c2[10551],_mm256_xor_si256(c2[9495],c2[3863]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[192]=simde_mm256_xor_si256(c2[8454],simde_mm256_xor_si256(c2[7],simde_mm256_xor_si256(c2[2818],simde_mm256_xor_si256(c2[8098],simde_mm256_xor_si256(c2[5635],simde_mm256_xor_si256(c2[1073],simde_mm256_xor_si256(c2[4594],simde_mm256_xor_si256(c2[8113],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[4614],simde_mm256_xor_si256(c2[8132],simde_mm256_xor_si256(c2[10259],simde_mm256_xor_si256(c2[6738],simde_mm256_xor_si256(c2[6385],simde_mm256_xor_si256(c2[7794],simde_mm256_xor_si256(c2[9218],simde_mm256_xor_si256(c2[10272],simde_mm256_xor_si256(c2[6407],simde_mm256_xor_si256(c2[4295],simde_mm256_xor_si256(c2[786],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[6770],simde_mm256_xor_si256(c2[2919],simde_mm256_xor_si256(c2[9601],simde_mm256_xor_si256(c2[7139],simde_mm256_xor_si256(c2[9973],simde_mm256_xor_si256(c2[1174],simde_mm256_xor_si256(c2[467],simde_mm256_xor_si256(c2[2242],simde_mm256_xor_si256(c2[5412],simde_mm256_xor_si256(c2[6118],simde_mm256_xor_si256(c2[8944],simde_mm256_xor_si256(c2[4020],simde_mm256_xor_si256(c2[10353],simde_mm256_xor_si256(c2[1222],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[2275],simde_mm256_xor_si256(c2[4402],simde_mm256_xor_si256(c2[9333],simde_mm256_xor_si256(c2[7217],simde_mm256_xor_si256(c2[7219],simde_mm256_xor_si256(c2[10404],simde_mm256_xor_si256(c2[1602],simde_mm256_xor_si256(c2[7238],simde_mm256_xor_si256(c2[2679],simde_mm256_xor_si256(c2[4435],simde_mm256_xor_si256(c2[9013],simde_mm256_xor_si256(c2[4098],simde_mm256_xor_si256(c2[11141],simde_mm256_xor_si256(c2[6213],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[947],simde_mm256_xor_si256(c2[11157],simde_mm256_xor_si256(c2[7655],simde_mm256_xor_si256(c2[4483],simde_mm256_xor_si256(c2[5190],simde_mm256_xor_si256(c2[9075],simde_mm256_xor_si256(c2[6966],simde_mm256_xor_si256(c2[3793],simde_mm256_xor_si256(c2[10148],simde_mm256_xor_si256(c2[4165],simde_mm256_xor_si256(c2[6630],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[10518],simde_mm256_xor_si256(c2[9110],simde_mm256_xor_si256(c2[4902],simde_mm256_xor_si256(c2[7010],simde_mm256_xor_si256(c2[5251],simde_mm256_xor_si256(c2[10551],simde_mm256_xor_si256(c2[9495],c2[3863]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 25
-     d2[200]=_mm256_xor_si256(c2[8112],_mm256_xor_si256(c2[2916],_mm256_xor_si256(c2[7155],c2[7973])));
+     d2[200]=simde_mm256_xor_si256(c2[8112],simde_mm256_xor_si256(c2[2916],simde_mm256_xor_si256(c2[7155],c2[7973])));
 
 //row: 26
-     d2[208]=_mm256_xor_si256(c2[1062],_mm256_xor_si256(c2[6727],_mm256_xor_si256(c2[8166],c2[10449])));
+     d2[208]=simde_mm256_xor_si256(c2[1062],simde_mm256_xor_si256(c2[6727],simde_mm256_xor_si256(c2[8166],c2[10449])));
 
 //row: 27
-     d2[216]=_mm256_xor_si256(c2[8816],_mm256_xor_si256(c2[2915],c2[838]));
+     d2[216]=simde_mm256_xor_si256(c2[8816],simde_mm256_xor_si256(c2[2915],c2[838]));
 
 //row: 28
-     d2[224]=_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[7701],c2[11249])));
+     d2[224]=simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[7701],c2[11249])));
 
 //row: 29
-     d2[232]=_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[3168],_mm256_xor_si256(c2[5987],_mm256_xor_si256(c2[10915],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[4242],_mm256_xor_si256(c2[7763],_mm256_xor_si256(c2[10930],_mm256_xor_si256(c2[19],_mm256_xor_si256(c2[7762],_mm256_xor_si256(c2[3207],_mm256_xor_si256(c2[7783],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[9907],_mm256_xor_si256(c2[9202],_mm256_xor_si256(c2[9554],_mm256_xor_si256(c2[1124],_mm256_xor_si256(c2[2178],_mm256_xor_si256(c2[9216],_mm256_xor_si256(c2[9568],_mm256_xor_si256(c2[3955],_mm256_xor_si256(c2[5010],_mm256_xor_si256(c2[9939],_mm256_xor_si256(c2[6080],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[9956],_mm256_xor_si256(c2[10308],_mm256_xor_si256(c2[1879],_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[3284],_mm256_xor_si256(c2[3636],_mm256_xor_si256(c2[5411],_mm256_xor_si256(c2[8581],_mm256_xor_si256(c2[8935],_mm256_xor_si256(c2[9287],_mm256_xor_si256(c2[850],_mm256_xor_si256(c2[7189],_mm256_xor_si256(c2[2259],_mm256_xor_si256(c2[4391],_mm256_xor_si256(c2[5089],_mm256_xor_si256(c2[5092],_mm256_xor_si256(c2[5444],_mm256_xor_si256(c2[7571],_mm256_xor_si256(c2[1239],_mm256_xor_si256(c2[10034],_mm256_xor_si256(c2[10386],_mm256_xor_si256(c2[2310],_mm256_xor_si256(c2[4771],_mm256_xor_si256(c2[10055],_mm256_xor_si256(c2[10407],_mm256_xor_si256(c2[5840],_mm256_xor_si256(c2[7604],_mm256_xor_si256(c2[567],_mm256_xor_si256(c2[919],_mm256_xor_si256(c2[7267],_mm256_xor_si256(c2[3047],_mm256_xor_si256(c2[9030],_mm256_xor_si256(c2[9382],_mm256_xor_si256(c2[4455],_mm256_xor_si256(c2[3766],_mm256_xor_si256(c2[4116],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[10816],_mm256_xor_si256(c2[7652],_mm256_xor_si256(c2[8007],_mm256_xor_si256(c2[8359],_mm256_xor_si256(c2[981],_mm256_xor_si256(c2[10135],_mm256_xor_si256(c2[6610],_mm256_xor_si256(c2[6962],_mm256_xor_si256(c2[2054],_mm256_xor_si256(c2[7334],_mm256_xor_si256(c2[9447],_mm256_xor_si256(c2[9799],_mm256_xor_si256(c2[7330],_mm256_xor_si256(c2[3825],_mm256_xor_si256(c2[2416],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[8071],_mm256_xor_si256(c2[10179],_mm256_xor_si256(c2[8068],_mm256_xor_si256(c2[8420],_mm256_xor_si256(c2[2449],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[6672],c2[7024]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[232]=simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[3168],simde_mm256_xor_si256(c2[5987],simde_mm256_xor_si256(c2[10915],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[4242],simde_mm256_xor_si256(c2[7763],simde_mm256_xor_si256(c2[10930],simde_mm256_xor_si256(c2[19],simde_mm256_xor_si256(c2[7762],simde_mm256_xor_si256(c2[3207],simde_mm256_xor_si256(c2[7783],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[9907],simde_mm256_xor_si256(c2[9202],simde_mm256_xor_si256(c2[9554],simde_mm256_xor_si256(c2[1124],simde_mm256_xor_si256(c2[2178],simde_mm256_xor_si256(c2[9216],simde_mm256_xor_si256(c2[9568],simde_mm256_xor_si256(c2[3955],simde_mm256_xor_si256(c2[5010],simde_mm256_xor_si256(c2[9939],simde_mm256_xor_si256(c2[6080],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[9956],simde_mm256_xor_si256(c2[10308],simde_mm256_xor_si256(c2[1879],simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[3284],simde_mm256_xor_si256(c2[3636],simde_mm256_xor_si256(c2[5411],simde_mm256_xor_si256(c2[8581],simde_mm256_xor_si256(c2[8935],simde_mm256_xor_si256(c2[9287],simde_mm256_xor_si256(c2[850],simde_mm256_xor_si256(c2[7189],simde_mm256_xor_si256(c2[2259],simde_mm256_xor_si256(c2[4391],simde_mm256_xor_si256(c2[5089],simde_mm256_xor_si256(c2[5092],simde_mm256_xor_si256(c2[5444],simde_mm256_xor_si256(c2[7571],simde_mm256_xor_si256(c2[1239],simde_mm256_xor_si256(c2[10034],simde_mm256_xor_si256(c2[10386],simde_mm256_xor_si256(c2[2310],simde_mm256_xor_si256(c2[4771],simde_mm256_xor_si256(c2[10055],simde_mm256_xor_si256(c2[10407],simde_mm256_xor_si256(c2[5840],simde_mm256_xor_si256(c2[7604],simde_mm256_xor_si256(c2[567],simde_mm256_xor_si256(c2[919],simde_mm256_xor_si256(c2[7267],simde_mm256_xor_si256(c2[3047],simde_mm256_xor_si256(c2[9030],simde_mm256_xor_si256(c2[9382],simde_mm256_xor_si256(c2[4455],simde_mm256_xor_si256(c2[3766],simde_mm256_xor_si256(c2[4116],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[10816],simde_mm256_xor_si256(c2[7652],simde_mm256_xor_si256(c2[8007],simde_mm256_xor_si256(c2[8359],simde_mm256_xor_si256(c2[981],simde_mm256_xor_si256(c2[10135],simde_mm256_xor_si256(c2[6610],simde_mm256_xor_si256(c2[6962],simde_mm256_xor_si256(c2[2054],simde_mm256_xor_si256(c2[7334],simde_mm256_xor_si256(c2[9447],simde_mm256_xor_si256(c2[9799],simde_mm256_xor_si256(c2[7330],simde_mm256_xor_si256(c2[3825],simde_mm256_xor_si256(c2[2416],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[8071],simde_mm256_xor_si256(c2[10179],simde_mm256_xor_si256(c2[8068],simde_mm256_xor_si256(c2[8420],simde_mm256_xor_si256(c2[2449],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[6672],c2[7024]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 30
-     d2[240]=_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[4224],_mm256_xor_si256(c2[6691],_mm256_xor_si256(c2[7043],_mm256_xor_si256(c2[708],_mm256_xor_si256(c2[1060],_mm256_xor_si256(c2[8454],_mm256_xor_si256(c2[5298],_mm256_xor_si256(c2[8467],_mm256_xor_si256(c2[8819],_mm256_xor_si256(c2[723],_mm256_xor_si256(c2[1075],_mm256_xor_si256(c2[4263],_mm256_xor_si256(c2[8839],_mm256_xor_si256(c2[742],_mm256_xor_si256(c2[1094],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[10963],_mm256_xor_si256(c2[10258],_mm256_xor_si256(c2[10610],_mm256_xor_si256(c2[2180],_mm256_xor_si256(c2[2882],_mm256_xor_si256(c2[3234],_mm256_xor_si256(c2[10272],_mm256_xor_si256(c2[10624],_mm256_xor_si256(c2[5011],_mm256_xor_si256(c2[6066],_mm256_xor_si256(c2[10643],_mm256_xor_si256(c2[10995],_mm256_xor_si256(c2[7136],_mm256_xor_si256(c2[2211],_mm256_xor_si256(c2[2563],_mm256_xor_si256(c2[11012],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[2935],_mm256_xor_si256(c2[5047],_mm256_xor_si256(c2[5399],_mm256_xor_si256(c2[4340],_mm256_xor_si256(c2[4692],_mm256_xor_si256(c2[6467],_mm256_xor_si256(c2[9285],_mm256_xor_si256(c2[9637],_mm256_xor_si256(c2[9991],_mm256_xor_si256(c2[10343],_mm256_xor_si256(c2[1906],_mm256_xor_si256(c2[8245],_mm256_xor_si256(c2[2963],_mm256_xor_si256(c2[3315],_mm256_xor_si256(c2[5447],_mm256_xor_si256(c2[5793],_mm256_xor_si256(c2[6145],_mm256_xor_si256(c2[6148],_mm256_xor_si256(c2[6500],_mm256_xor_si256(c2[3330],_mm256_xor_si256(c2[8627],_mm256_xor_si256(c2[2295],_mm256_xor_si256(c2[11090],_mm256_xor_si256(c2[179],_mm256_xor_si256(c2[3366],_mm256_xor_si256(c2[5827],_mm256_xor_si256(c2[11111],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[6896],_mm256_xor_si256(c2[8308],_mm256_xor_si256(c2[8660],_mm256_xor_si256(c2[1623],_mm256_xor_si256(c2[1975],_mm256_xor_si256(c2[8659],_mm256_xor_si256(c2[8323],_mm256_xor_si256(c2[3751],_mm256_xor_si256(c2[4103],_mm256_xor_si256(c2[10086],_mm256_xor_si256(c2[10438],_mm256_xor_si256(c2[4822],_mm256_xor_si256(c2[5172],_mm256_xor_si256(c2[3767],_mm256_xor_si256(c2[4119],_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[8708],_mm256_xor_si256(c2[9063],_mm256_xor_si256(c2[9415],_mm256_xor_si256(c2[2037],_mm256_xor_si256(c2[10839],_mm256_xor_si256(c2[11191],_mm256_xor_si256(c2[7666],_mm256_xor_si256(c2[8018],_mm256_xor_si256(c2[3110],_mm256_xor_si256(c2[8038],_mm256_xor_si256(c2[8390],_mm256_xor_si256(c2[10503],_mm256_xor_si256(c2[10855],_mm256_xor_si256(c2[4881],_mm256_xor_si256(c2[3472],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[2064],_mm256_xor_si256(c2[9127],_mm256_xor_si256(c2[10883],_mm256_xor_si256(c2[11235],_mm256_xor_si256(c2[9124],_mm256_xor_si256(c2[9476],_mm256_xor_si256(c2[3505],_mm256_xor_si256(c2[2449],_mm256_xor_si256(c2[7728],c2[8080])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[240]=simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[4224],simde_mm256_xor_si256(c2[6691],simde_mm256_xor_si256(c2[7043],simde_mm256_xor_si256(c2[708],simde_mm256_xor_si256(c2[1060],simde_mm256_xor_si256(c2[8454],simde_mm256_xor_si256(c2[5298],simde_mm256_xor_si256(c2[8467],simde_mm256_xor_si256(c2[8819],simde_mm256_xor_si256(c2[723],simde_mm256_xor_si256(c2[1075],simde_mm256_xor_si256(c2[4263],simde_mm256_xor_si256(c2[8839],simde_mm256_xor_si256(c2[742],simde_mm256_xor_si256(c2[1094],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[10963],simde_mm256_xor_si256(c2[10258],simde_mm256_xor_si256(c2[10610],simde_mm256_xor_si256(c2[2180],simde_mm256_xor_si256(c2[2882],simde_mm256_xor_si256(c2[3234],simde_mm256_xor_si256(c2[10272],simde_mm256_xor_si256(c2[10624],simde_mm256_xor_si256(c2[5011],simde_mm256_xor_si256(c2[6066],simde_mm256_xor_si256(c2[10643],simde_mm256_xor_si256(c2[10995],simde_mm256_xor_si256(c2[7136],simde_mm256_xor_si256(c2[2211],simde_mm256_xor_si256(c2[2563],simde_mm256_xor_si256(c2[11012],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[2935],simde_mm256_xor_si256(c2[5047],simde_mm256_xor_si256(c2[5399],simde_mm256_xor_si256(c2[4340],simde_mm256_xor_si256(c2[4692],simde_mm256_xor_si256(c2[6467],simde_mm256_xor_si256(c2[9285],simde_mm256_xor_si256(c2[9637],simde_mm256_xor_si256(c2[9991],simde_mm256_xor_si256(c2[10343],simde_mm256_xor_si256(c2[1906],simde_mm256_xor_si256(c2[8245],simde_mm256_xor_si256(c2[2963],simde_mm256_xor_si256(c2[3315],simde_mm256_xor_si256(c2[5447],simde_mm256_xor_si256(c2[5793],simde_mm256_xor_si256(c2[6145],simde_mm256_xor_si256(c2[6148],simde_mm256_xor_si256(c2[6500],simde_mm256_xor_si256(c2[3330],simde_mm256_xor_si256(c2[8627],simde_mm256_xor_si256(c2[2295],simde_mm256_xor_si256(c2[11090],simde_mm256_xor_si256(c2[179],simde_mm256_xor_si256(c2[3366],simde_mm256_xor_si256(c2[5827],simde_mm256_xor_si256(c2[11111],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[6896],simde_mm256_xor_si256(c2[8308],simde_mm256_xor_si256(c2[8660],simde_mm256_xor_si256(c2[1623],simde_mm256_xor_si256(c2[1975],simde_mm256_xor_si256(c2[8659],simde_mm256_xor_si256(c2[8323],simde_mm256_xor_si256(c2[3751],simde_mm256_xor_si256(c2[4103],simde_mm256_xor_si256(c2[10086],simde_mm256_xor_si256(c2[10438],simde_mm256_xor_si256(c2[4822],simde_mm256_xor_si256(c2[5172],simde_mm256_xor_si256(c2[3767],simde_mm256_xor_si256(c2[4119],simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[8708],simde_mm256_xor_si256(c2[9063],simde_mm256_xor_si256(c2[9415],simde_mm256_xor_si256(c2[2037],simde_mm256_xor_si256(c2[10839],simde_mm256_xor_si256(c2[11191],simde_mm256_xor_si256(c2[7666],simde_mm256_xor_si256(c2[8018],simde_mm256_xor_si256(c2[3110],simde_mm256_xor_si256(c2[8038],simde_mm256_xor_si256(c2[8390],simde_mm256_xor_si256(c2[10503],simde_mm256_xor_si256(c2[10855],simde_mm256_xor_si256(c2[4881],simde_mm256_xor_si256(c2[3472],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[2064],simde_mm256_xor_si256(c2[9127],simde_mm256_xor_si256(c2[10883],simde_mm256_xor_si256(c2[11235],simde_mm256_xor_si256(c2[9124],simde_mm256_xor_si256(c2[9476],simde_mm256_xor_si256(c2[3505],simde_mm256_xor_si256(c2[2449],simde_mm256_xor_si256(c2[7728],c2[8080])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 31
-     d2[248]=_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[8449],_mm256_xor_si256(c2[4933],_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[7744],_mm256_xor_si256(c2[2821],_mm256_xor_si256(c2[1761],_mm256_xor_si256(c2[7749],_mm256_xor_si256(c2[8101],_mm256_xor_si256(c2[6007],_mm256_xor_si256(c2[1076],_mm256_xor_si256(c2[9520],_mm256_xor_si256(c2[4597],_mm256_xor_si256(c2[1776],_mm256_xor_si256(c2[7764],_mm256_xor_si256(c2[8116],_mm256_xor_si256(c2[10930],_mm256_xor_si256(c2[4964],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[9540],_mm256_xor_si256(c2[4609],_mm256_xor_si256(c2[1795],_mm256_xor_si256(c2[8135],_mm256_xor_si256(c2[3922],_mm256_xor_si256(c2[10262],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[6741],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[6036],_mm256_xor_si256(c2[6388],_mm256_xor_si256(c2[2881],_mm256_xor_si256(c2[9221],_mm256_xor_si256(c2[3943],_mm256_xor_si256(c2[10275],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[6050],_mm256_xor_si256(c2[6402],_mm256_xor_si256(c2[5712],_mm256_xor_si256(c2[789],_mm256_xor_si256(c2[6775],_mm256_xor_si256(c2[1844],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[6773],_mm256_xor_si256(c2[7845],_mm256_xor_si256(c2[2914],_mm256_xor_si256(c2[3264],_mm256_xor_si256(c2[9604],_mm256_xor_si256(c2[802],_mm256_xor_si256(c2[6790],_mm256_xor_si256(c2[7142],_mm256_xor_si256(c2[3636],_mm256_xor_si256(c2[9968],_mm256_xor_si256(c2[6100],_mm256_xor_si256(c2[1169],_mm256_xor_si256(c2[5393],_mm256_xor_si256(c2[118],_mm256_xor_si256(c2[470],_mm256_xor_si256(c2[6101],_mm256_xor_si256(c2[7168],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[10338],_mm256_xor_si256(c2[5415],_mm256_xor_si256(c2[11044],_mm256_xor_si256(c2[5761],_mm256_xor_si256(c2[6113],_mm256_xor_si256(c2[2615],_mm256_xor_si256(c2[8947],_mm256_xor_si256(c2[8946],_mm256_xor_si256(c2[4023],_mm256_xor_si256(c2[4016],_mm256_xor_si256(c2[10356],_mm256_xor_si256(c2[6148],_mm256_xor_si256(c2[1217],_mm256_xor_si256(c2[6854],_mm256_xor_si256(c2[1923],_mm256_xor_si256(c2[7201],_mm256_xor_si256(c2[1926],_mm256_xor_si256(c2[2278],_mm256_xor_si256(c2[9328],_mm256_xor_si256(c2[4405],_mm256_xor_si256(c2[2996],_mm256_xor_si256(c2[9328],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[6868],_mm256_xor_si256(c2[7220],_mm256_xor_si256(c2[4067],_mm256_xor_si256(c2[10407],_mm256_xor_si256(c2[6528],_mm256_xor_si256(c2[1605],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[6881],_mm256_xor_si256(c2[7233],_mm256_xor_si256(c2[7605],_mm256_xor_si256(c2[2674],_mm256_xor_si256(c2[9361],_mm256_xor_si256(c2[4438],_mm256_xor_si256(c2[2676],_mm256_xor_si256(c2[8656],_mm256_xor_si256(c2[9008],_mm256_xor_si256(c2[9024],_mm256_xor_si256(c2[4101],_mm256_xor_si256(c2[4804],_mm256_xor_si256(c2[11136],_mm256_xor_si256(c2[11139],_mm256_xor_si256(c2[5856],_mm256_xor_si256(c2[6208],_mm256_xor_si256(c2[5523],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[5873],_mm256_xor_si256(c2[950],_mm256_xor_si256(c2[4820],_mm256_xor_si256(c2[11152],_mm256_xor_si256(c2[1318],_mm256_xor_si256(c2[7650],_mm256_xor_si256(c2[9409],_mm256_xor_si256(c2[4486],_mm256_xor_si256(c2[10116],_mm256_xor_si256(c2[4833],_mm256_xor_si256(c2[5185],_mm256_xor_si256(c2[2738],_mm256_xor_si256(c2[9078],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[6961],_mm256_xor_si256(c2[8727],_mm256_xor_si256(c2[3444],_mm256_xor_si256(c2[3796],_mm256_xor_si256(c2[3811],_mm256_xor_si256(c2[10151],_mm256_xor_si256(c2[9091],_mm256_xor_si256(c2[4160],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[6273],_mm256_xor_si256(c2[6625],_mm256_xor_si256(c2[5590],_mm256_xor_si256(c2[659],_mm256_xor_si256(c2[4181],_mm256_xor_si256(c2[10513],_mm256_xor_si256(c2[2773],_mm256_xor_si256(c2[9105],_mm256_xor_si256(c2[9828],_mm256_xor_si256(c2[4897],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[7013],_mm256_xor_si256(c2[10177],_mm256_xor_si256(c2[4902],_mm256_xor_si256(c2[5254],_mm256_xor_si256(c2[4214],_mm256_xor_si256(c2[10546],_mm256_xor_si256(c2[3158],_mm256_xor_si256(c2[9490],_mm256_xor_si256(c2[8789],_mm256_xor_si256(c2[3506],c2[3858]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[248]=simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[8449],simde_mm256_xor_si256(c2[4933],simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[7744],simde_mm256_xor_si256(c2[2821],simde_mm256_xor_si256(c2[1761],simde_mm256_xor_si256(c2[7749],simde_mm256_xor_si256(c2[8101],simde_mm256_xor_si256(c2[6007],simde_mm256_xor_si256(c2[1076],simde_mm256_xor_si256(c2[9520],simde_mm256_xor_si256(c2[4597],simde_mm256_xor_si256(c2[1776],simde_mm256_xor_si256(c2[7764],simde_mm256_xor_si256(c2[8116],simde_mm256_xor_si256(c2[10930],simde_mm256_xor_si256(c2[4964],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[9540],simde_mm256_xor_si256(c2[4609],simde_mm256_xor_si256(c2[1795],simde_mm256_xor_si256(c2[8135],simde_mm256_xor_si256(c2[3922],simde_mm256_xor_si256(c2[10262],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[6741],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[6036],simde_mm256_xor_si256(c2[6388],simde_mm256_xor_si256(c2[2881],simde_mm256_xor_si256(c2[9221],simde_mm256_xor_si256(c2[3943],simde_mm256_xor_si256(c2[10275],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[6050],simde_mm256_xor_si256(c2[6402],simde_mm256_xor_si256(c2[5712],simde_mm256_xor_si256(c2[789],simde_mm256_xor_si256(c2[6775],simde_mm256_xor_si256(c2[1844],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[6773],simde_mm256_xor_si256(c2[7845],simde_mm256_xor_si256(c2[2914],simde_mm256_xor_si256(c2[3264],simde_mm256_xor_si256(c2[9604],simde_mm256_xor_si256(c2[802],simde_mm256_xor_si256(c2[6790],simde_mm256_xor_si256(c2[7142],simde_mm256_xor_si256(c2[3636],simde_mm256_xor_si256(c2[9968],simde_mm256_xor_si256(c2[6100],simde_mm256_xor_si256(c2[1169],simde_mm256_xor_si256(c2[5393],simde_mm256_xor_si256(c2[118],simde_mm256_xor_si256(c2[470],simde_mm256_xor_si256(c2[6101],simde_mm256_xor_si256(c2[7168],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[10338],simde_mm256_xor_si256(c2[5415],simde_mm256_xor_si256(c2[11044],simde_mm256_xor_si256(c2[5761],simde_mm256_xor_si256(c2[6113],simde_mm256_xor_si256(c2[2615],simde_mm256_xor_si256(c2[8947],simde_mm256_xor_si256(c2[8946],simde_mm256_xor_si256(c2[4023],simde_mm256_xor_si256(c2[4016],simde_mm256_xor_si256(c2[10356],simde_mm256_xor_si256(c2[6148],simde_mm256_xor_si256(c2[1217],simde_mm256_xor_si256(c2[6854],simde_mm256_xor_si256(c2[1923],simde_mm256_xor_si256(c2[7201],simde_mm256_xor_si256(c2[1926],simde_mm256_xor_si256(c2[2278],simde_mm256_xor_si256(c2[9328],simde_mm256_xor_si256(c2[4405],simde_mm256_xor_si256(c2[2996],simde_mm256_xor_si256(c2[9328],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[6868],simde_mm256_xor_si256(c2[7220],simde_mm256_xor_si256(c2[4067],simde_mm256_xor_si256(c2[10407],simde_mm256_xor_si256(c2[6528],simde_mm256_xor_si256(c2[1605],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[6881],simde_mm256_xor_si256(c2[7233],simde_mm256_xor_si256(c2[7605],simde_mm256_xor_si256(c2[2674],simde_mm256_xor_si256(c2[9361],simde_mm256_xor_si256(c2[4438],simde_mm256_xor_si256(c2[2676],simde_mm256_xor_si256(c2[8656],simde_mm256_xor_si256(c2[9008],simde_mm256_xor_si256(c2[9024],simde_mm256_xor_si256(c2[4101],simde_mm256_xor_si256(c2[4804],simde_mm256_xor_si256(c2[11136],simde_mm256_xor_si256(c2[11139],simde_mm256_xor_si256(c2[5856],simde_mm256_xor_si256(c2[6208],simde_mm256_xor_si256(c2[5523],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[5873],simde_mm256_xor_si256(c2[950],simde_mm256_xor_si256(c2[4820],simde_mm256_xor_si256(c2[11152],simde_mm256_xor_si256(c2[1318],simde_mm256_xor_si256(c2[7650],simde_mm256_xor_si256(c2[9409],simde_mm256_xor_si256(c2[4486],simde_mm256_xor_si256(c2[10116],simde_mm256_xor_si256(c2[4833],simde_mm256_xor_si256(c2[5185],simde_mm256_xor_si256(c2[2738],simde_mm256_xor_si256(c2[9078],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[6961],simde_mm256_xor_si256(c2[8727],simde_mm256_xor_si256(c2[3444],simde_mm256_xor_si256(c2[3796],simde_mm256_xor_si256(c2[3811],simde_mm256_xor_si256(c2[10151],simde_mm256_xor_si256(c2[9091],simde_mm256_xor_si256(c2[4160],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[6273],simde_mm256_xor_si256(c2[6625],simde_mm256_xor_si256(c2[5590],simde_mm256_xor_si256(c2[659],simde_mm256_xor_si256(c2[4181],simde_mm256_xor_si256(c2[10513],simde_mm256_xor_si256(c2[2773],simde_mm256_xor_si256(c2[9105],simde_mm256_xor_si256(c2[9828],simde_mm256_xor_si256(c2[4897],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[7013],simde_mm256_xor_si256(c2[10177],simde_mm256_xor_si256(c2[4902],simde_mm256_xor_si256(c2[5254],simde_mm256_xor_si256(c2[4214],simde_mm256_xor_si256(c2[10546],simde_mm256_xor_si256(c2[3158],simde_mm256_xor_si256(c2[9490],simde_mm256_xor_si256(c2[8789],simde_mm256_xor_si256(c2[3506],c2[3858]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[256]=_mm256_xor_si256(c2[7043],_mm256_xor_si256(c2[9859],_mm256_xor_si256(c2[1063],_mm256_xor_si256(c2[1415],_mm256_xor_si256(c2[6343],_mm256_xor_si256(c2[6695],_mm256_xor_si256(c2[10214],_mm256_xor_si256(c2[10933],_mm256_xor_si256(c2[2839],_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[6358],_mm256_xor_si256(c2[6710],_mm256_xor_si256(c2[9890],_mm256_xor_si256(c2[3203],_mm256_xor_si256(c2[6369],_mm256_xor_si256(c2[6721],_mm256_xor_si256(c2[8848],_mm256_xor_si256(c2[5335],_mm256_xor_si256(c2[4630],_mm256_xor_si256(c2[4982],_mm256_xor_si256(c2[7815],_mm256_xor_si256(c2[8517],_mm256_xor_si256(c2[8869],_mm256_xor_si256(c2[4644],_mm256_xor_si256(c2[4996],_mm256_xor_si256(c2[10646],_mm256_xor_si256(c2[438],_mm256_xor_si256(c2[5015],_mm256_xor_si256(c2[5367],_mm256_xor_si256(c2[1508],_mm256_xor_si256(c2[7846],_mm256_xor_si256(c2[8198],_mm256_xor_si256(c2[5376],_mm256_xor_si256(c2[5728],_mm256_xor_si256(c2[8562],_mm256_xor_si256(c2[10674],_mm256_xor_si256(c2[11026],_mm256_xor_si256(c2[9975],_mm256_xor_si256(c2[10327],_mm256_xor_si256(c2[839],_mm256_xor_si256(c2[3649],_mm256_xor_si256(c2[4001],_mm256_xor_si256(c2[4355],_mm256_xor_si256(c2[4707],_mm256_xor_si256(c2[7541],_mm256_xor_si256(c2[2609],_mm256_xor_si256(c2[8598],_mm256_xor_si256(c2[8950],_mm256_xor_si256(c2[11074],_mm256_xor_si256(c2[165],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[2999],_mm256_xor_si256(c2[7922],_mm256_xor_si256(c2[5462],_mm256_xor_si256(c2[5814],_mm256_xor_si256(c2[8993],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[5475],_mm256_xor_si256(c2[5827],_mm256_xor_si256(c2[5827],_mm256_xor_si256(c2[1268],_mm256_xor_si256(c2[2672],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[7250],_mm256_xor_si256(c2[7602],_mm256_xor_si256(c2[2695],_mm256_xor_si256(c2[9378],_mm256_xor_si256(c2[9730],_mm256_xor_si256(c2[4450],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[2694],_mm256_xor_si256(c2[10449],_mm256_xor_si256(c2[10807],_mm256_xor_si256(c2[9394],_mm256_xor_si256(c2[9746],_mm256_xor_si256(c2[6244],_mm256_xor_si256(c2[3072],_mm256_xor_si256(c2[3427],_mm256_xor_si256(c2[3779],_mm256_xor_si256(c2[7664],_mm256_xor_si256(c2[5203],_mm256_xor_si256(c2[5555],_mm256_xor_si256(c2[2038],_mm256_xor_si256(c2[2390],_mm256_xor_si256(c2[8737],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[2754],_mm256_xor_si256(c2[4867],_mm256_xor_si256(c2[5219],_mm256_xor_si256(c2[10516],_mm256_xor_si256(c2[9107],_mm256_xor_si256(c2[7347],_mm256_xor_si256(c2[7699],_mm256_xor_si256(c2[3491],_mm256_xor_si256(c2[5255],_mm256_xor_si256(c2[5607],_mm256_xor_si256(c2[3488],_mm256_xor_si256(c2[3840],_mm256_xor_si256(c2[9140],_mm256_xor_si256(c2[8084],_mm256_xor_si256(c2[2100],c2[2452])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[256]=simde_mm256_xor_si256(c2[7043],simde_mm256_xor_si256(c2[9859],simde_mm256_xor_si256(c2[1063],simde_mm256_xor_si256(c2[1415],simde_mm256_xor_si256(c2[6343],simde_mm256_xor_si256(c2[6695],simde_mm256_xor_si256(c2[10214],simde_mm256_xor_si256(c2[10933],simde_mm256_xor_si256(c2[2839],simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[6358],simde_mm256_xor_si256(c2[6710],simde_mm256_xor_si256(c2[9890],simde_mm256_xor_si256(c2[3203],simde_mm256_xor_si256(c2[6369],simde_mm256_xor_si256(c2[6721],simde_mm256_xor_si256(c2[8848],simde_mm256_xor_si256(c2[5335],simde_mm256_xor_si256(c2[4630],simde_mm256_xor_si256(c2[4982],simde_mm256_xor_si256(c2[7815],simde_mm256_xor_si256(c2[8517],simde_mm256_xor_si256(c2[8869],simde_mm256_xor_si256(c2[4644],simde_mm256_xor_si256(c2[4996],simde_mm256_xor_si256(c2[10646],simde_mm256_xor_si256(c2[438],simde_mm256_xor_si256(c2[5015],simde_mm256_xor_si256(c2[5367],simde_mm256_xor_si256(c2[1508],simde_mm256_xor_si256(c2[7846],simde_mm256_xor_si256(c2[8198],simde_mm256_xor_si256(c2[5376],simde_mm256_xor_si256(c2[5728],simde_mm256_xor_si256(c2[8562],simde_mm256_xor_si256(c2[10674],simde_mm256_xor_si256(c2[11026],simde_mm256_xor_si256(c2[9975],simde_mm256_xor_si256(c2[10327],simde_mm256_xor_si256(c2[839],simde_mm256_xor_si256(c2[3649],simde_mm256_xor_si256(c2[4001],simde_mm256_xor_si256(c2[4355],simde_mm256_xor_si256(c2[4707],simde_mm256_xor_si256(c2[7541],simde_mm256_xor_si256(c2[2609],simde_mm256_xor_si256(c2[8598],simde_mm256_xor_si256(c2[8950],simde_mm256_xor_si256(c2[11074],simde_mm256_xor_si256(c2[165],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[2999],simde_mm256_xor_si256(c2[7922],simde_mm256_xor_si256(c2[5462],simde_mm256_xor_si256(c2[5814],simde_mm256_xor_si256(c2[8993],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[5475],simde_mm256_xor_si256(c2[5827],simde_mm256_xor_si256(c2[5827],simde_mm256_xor_si256(c2[1268],simde_mm256_xor_si256(c2[2672],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[7250],simde_mm256_xor_si256(c2[7602],simde_mm256_xor_si256(c2[2695],simde_mm256_xor_si256(c2[9378],simde_mm256_xor_si256(c2[9730],simde_mm256_xor_si256(c2[4450],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[2694],simde_mm256_xor_si256(c2[10449],simde_mm256_xor_si256(c2[10807],simde_mm256_xor_si256(c2[9394],simde_mm256_xor_si256(c2[9746],simde_mm256_xor_si256(c2[6244],simde_mm256_xor_si256(c2[3072],simde_mm256_xor_si256(c2[3427],simde_mm256_xor_si256(c2[3779],simde_mm256_xor_si256(c2[7664],simde_mm256_xor_si256(c2[5203],simde_mm256_xor_si256(c2[5555],simde_mm256_xor_si256(c2[2038],simde_mm256_xor_si256(c2[2390],simde_mm256_xor_si256(c2[8737],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[2754],simde_mm256_xor_si256(c2[4867],simde_mm256_xor_si256(c2[5219],simde_mm256_xor_si256(c2[10516],simde_mm256_xor_si256(c2[9107],simde_mm256_xor_si256(c2[7347],simde_mm256_xor_si256(c2[7699],simde_mm256_xor_si256(c2[3491],simde_mm256_xor_si256(c2[5255],simde_mm256_xor_si256(c2[5607],simde_mm256_xor_si256(c2[3488],simde_mm256_xor_si256(c2[3840],simde_mm256_xor_si256(c2[9140],simde_mm256_xor_si256(c2[8084],simde_mm256_xor_si256(c2[2100],c2[2452])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[264]=_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[9541],_mm256_xor_si256(c2[3345],c2[7030])));
+     d2[264]=simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[9541],simde_mm256_xor_si256(c2[3345],c2[7030])));
 
 //row: 34
-     d2[272]=_mm256_xor_si256(c2[10915],_mm256_xor_si256(c2[2581],_mm256_xor_si256(c2[1653],c2[11188])));
+     d2[272]=simde_mm256_xor_si256(c2[10915],simde_mm256_xor_si256(c2[2581],simde_mm256_xor_si256(c2[1653],c2[11188])));
 
 //row: 35
-     d2[280]=_mm256_xor_si256(c2[355],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[5990],_mm256_xor_si256(c2[7],_mm256_xor_si256(c2[4245],_mm256_xor_si256(c2[7766],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[373],_mm256_xor_si256(c2[3202],_mm256_xor_si256(c2[7778],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[2160],_mm256_xor_si256(c2[9910],_mm256_xor_si256(c2[9557],_mm256_xor_si256(c2[1127],_mm256_xor_si256(c2[2181],_mm256_xor_si256(c2[9571],_mm256_xor_si256(c2[3958],_mm256_xor_si256(c2[5013],_mm256_xor_si256(c2[9942],_mm256_xor_si256(c2[6083],_mm256_xor_si256(c2[1510],_mm256_xor_si256(c2[10311],_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[1874],_mm256_xor_si256(c2[4338],_mm256_xor_si256(c2[3639],_mm256_xor_si256(c2[5414],_mm256_xor_si256(c2[8576],_mm256_xor_si256(c2[9282],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[7184],_mm256_xor_si256(c2[2262],_mm256_xor_si256(c2[4386],_mm256_xor_si256(c2[5092],_mm256_xor_si256(c2[5447],_mm256_xor_si256(c2[7574],_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[10389],_mm256_xor_si256(c2[2305],_mm256_xor_si256(c2[4774],_mm256_xor_si256(c2[10402],_mm256_xor_si256(c2[5478],_mm256_xor_si256(c2[5843],_mm256_xor_si256(c2[7607],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[7270],_mm256_xor_si256(c2[3042],_mm256_xor_si256(c2[9377],_mm256_xor_si256(c2[3761],_mm256_xor_si256(c2[4119],_mm256_xor_si256(c2[3058],_mm256_xor_si256(c2[10819],_mm256_xor_si256(c2[7655],_mm256_xor_si256(c2[8354],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[10130],_mm256_xor_si256(c2[6965],_mm256_xor_si256(c2[2049],_mm256_xor_si256(c2[7329],_mm256_xor_si256(c2[9794],_mm256_xor_si256(c2[3828],_mm256_xor_si256(c2[2419],_mm256_xor_si256(c2[1011],_mm256_xor_si256(c2[8066],_mm256_xor_si256(c2[10182],_mm256_xor_si256(c2[8423],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[1396],c2[7027])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[280]=simde_mm256_xor_si256(c2[355],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[5990],simde_mm256_xor_si256(c2[7],simde_mm256_xor_si256(c2[4245],simde_mm256_xor_si256(c2[7766],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[373],simde_mm256_xor_si256(c2[3202],simde_mm256_xor_si256(c2[7778],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[2160],simde_mm256_xor_si256(c2[9910],simde_mm256_xor_si256(c2[9557],simde_mm256_xor_si256(c2[1127],simde_mm256_xor_si256(c2[2181],simde_mm256_xor_si256(c2[9571],simde_mm256_xor_si256(c2[3958],simde_mm256_xor_si256(c2[5013],simde_mm256_xor_si256(c2[9942],simde_mm256_xor_si256(c2[6083],simde_mm256_xor_si256(c2[1510],simde_mm256_xor_si256(c2[10311],simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[1874],simde_mm256_xor_si256(c2[4338],simde_mm256_xor_si256(c2[3639],simde_mm256_xor_si256(c2[5414],simde_mm256_xor_si256(c2[8576],simde_mm256_xor_si256(c2[9282],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[7184],simde_mm256_xor_si256(c2[2262],simde_mm256_xor_si256(c2[4386],simde_mm256_xor_si256(c2[5092],simde_mm256_xor_si256(c2[5447],simde_mm256_xor_si256(c2[7574],simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[10389],simde_mm256_xor_si256(c2[2305],simde_mm256_xor_si256(c2[4774],simde_mm256_xor_si256(c2[10402],simde_mm256_xor_si256(c2[5478],simde_mm256_xor_si256(c2[5843],simde_mm256_xor_si256(c2[7607],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[7270],simde_mm256_xor_si256(c2[3042],simde_mm256_xor_si256(c2[9377],simde_mm256_xor_si256(c2[3761],simde_mm256_xor_si256(c2[4119],simde_mm256_xor_si256(c2[3058],simde_mm256_xor_si256(c2[10819],simde_mm256_xor_si256(c2[7655],simde_mm256_xor_si256(c2[8354],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[10130],simde_mm256_xor_si256(c2[6965],simde_mm256_xor_si256(c2[2049],simde_mm256_xor_si256(c2[7329],simde_mm256_xor_si256(c2[9794],simde_mm256_xor_si256(c2[3828],simde_mm256_xor_si256(c2[2419],simde_mm256_xor_si256(c2[1011],simde_mm256_xor_si256(c2[8066],simde_mm256_xor_si256(c2[10182],simde_mm256_xor_si256(c2[8423],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[1396],c2[7027])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[288]=_mm256_xor_si256(c2[1761],_mm256_xor_si256(c2[3395],_mm256_xor_si256(c2[6929],c2[8739])));
+     d2[288]=simde_mm256_xor_si256(c2[1761],simde_mm256_xor_si256(c2[3395],simde_mm256_xor_si256(c2[6929],c2[8739])));
 
 //row: 37
-     d2[296]=_mm256_xor_si256(c2[7043],_mm256_xor_si256(c2[7395],_mm256_xor_si256(c2[10211],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[7047],_mm256_xor_si256(c2[10933],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[3543],_mm256_xor_si256(c2[7062],_mm256_xor_si256(c2[2134],_mm256_xor_si256(c2[9890],_mm256_xor_si256(c2[10242],_mm256_xor_si256(c2[3555],_mm256_xor_si256(c2[7073],_mm256_xor_si256(c2[8848],_mm256_xor_si256(c2[9200],_mm256_xor_si256(c2[5687],_mm256_xor_si256(c2[5334],_mm256_xor_si256(c2[8167],_mm256_xor_si256(c2[9221],_mm256_xor_si256(c2[5348],_mm256_xor_si256(c2[10646],_mm256_xor_si256(c2[10998],_mm256_xor_si256(c2[790],_mm256_xor_si256(c2[5719],_mm256_xor_si256(c2[1508],_mm256_xor_si256(c2[1860],_mm256_xor_si256(c2[8550],_mm256_xor_si256(c2[6080],_mm256_xor_si256(c2[8914],_mm256_xor_si256(c2[115],_mm256_xor_si256(c2[10679],_mm256_xor_si256(c2[1191],_mm256_xor_si256(c2[4353],_mm256_xor_si256(c2[5059],_mm256_xor_si256(c2[7541],_mm256_xor_si256(c2[7893],_mm256_xor_si256(c2[2961],_mm256_xor_si256(c2[9302],_mm256_xor_si256(c2[11074],_mm256_xor_si256(c2[163],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[1216],_mm256_xor_si256(c2[2999],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[8274],_mm256_xor_si256(c2[6166],_mm256_xor_si256(c2[8993],_mm256_xor_si256(c2[9345],_mm256_xor_si256(c2[551],_mm256_xor_si256(c2[6179],_mm256_xor_si256(c2[1268],_mm256_xor_si256(c2[1620],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[7954],_mm256_xor_si256(c2[10070],_mm256_xor_si256(c2[3047],_mm256_xor_si256(c2[10082],_mm256_xor_si256(c2[5154],_mm256_xor_si256(c2[10449],_mm256_xor_si256(c2[10801],_mm256_xor_si256(c2[11159],_mm256_xor_si256(c2[10098],_mm256_xor_si256(c2[6244],_mm256_xor_si256(c2[6596],_mm256_xor_si256(c2[3424],_mm256_xor_si256(c2[4131],_mm256_xor_si256(c2[8016],_mm256_xor_si256(c2[5907],_mm256_xor_si256(c2[2742],_mm256_xor_si256(c2[8737],_mm256_xor_si256(c2[9089],_mm256_xor_si256(c2[3106],_mm256_xor_si256(c2[5571],_mm256_xor_si256(c2[10516],_mm256_xor_si256(c2[10868],_mm256_xor_si256(c2[9459],_mm256_xor_si256(c2[8051],_mm256_xor_si256(c2[3491],_mm256_xor_si256(c2[3843],_mm256_xor_si256(c2[5959],_mm256_xor_si256(c2[4192],_mm256_xor_si256(c2[9140],_mm256_xor_si256(c2[9492],_mm256_xor_si256(c2[8436],c2[2804])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[296]=simde_mm256_xor_si256(c2[7043],simde_mm256_xor_si256(c2[7395],simde_mm256_xor_si256(c2[10211],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[7047],simde_mm256_xor_si256(c2[10933],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[3543],simde_mm256_xor_si256(c2[7062],simde_mm256_xor_si256(c2[2134],simde_mm256_xor_si256(c2[9890],simde_mm256_xor_si256(c2[10242],simde_mm256_xor_si256(c2[3555],simde_mm256_xor_si256(c2[7073],simde_mm256_xor_si256(c2[8848],simde_mm256_xor_si256(c2[9200],simde_mm256_xor_si256(c2[5687],simde_mm256_xor_si256(c2[5334],simde_mm256_xor_si256(c2[8167],simde_mm256_xor_si256(c2[9221],simde_mm256_xor_si256(c2[5348],simde_mm256_xor_si256(c2[10646],simde_mm256_xor_si256(c2[10998],simde_mm256_xor_si256(c2[790],simde_mm256_xor_si256(c2[5719],simde_mm256_xor_si256(c2[1508],simde_mm256_xor_si256(c2[1860],simde_mm256_xor_si256(c2[8550],simde_mm256_xor_si256(c2[6080],simde_mm256_xor_si256(c2[8914],simde_mm256_xor_si256(c2[115],simde_mm256_xor_si256(c2[10679],simde_mm256_xor_si256(c2[1191],simde_mm256_xor_si256(c2[4353],simde_mm256_xor_si256(c2[5059],simde_mm256_xor_si256(c2[7541],simde_mm256_xor_si256(c2[7893],simde_mm256_xor_si256(c2[2961],simde_mm256_xor_si256(c2[9302],simde_mm256_xor_si256(c2[11074],simde_mm256_xor_si256(c2[163],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[1216],simde_mm256_xor_si256(c2[2999],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[8274],simde_mm256_xor_si256(c2[6166],simde_mm256_xor_si256(c2[8993],simde_mm256_xor_si256(c2[9345],simde_mm256_xor_si256(c2[551],simde_mm256_xor_si256(c2[6179],simde_mm256_xor_si256(c2[1268],simde_mm256_xor_si256(c2[1620],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[7954],simde_mm256_xor_si256(c2[10070],simde_mm256_xor_si256(c2[3047],simde_mm256_xor_si256(c2[10082],simde_mm256_xor_si256(c2[5154],simde_mm256_xor_si256(c2[10449],simde_mm256_xor_si256(c2[10801],simde_mm256_xor_si256(c2[11159],simde_mm256_xor_si256(c2[10098],simde_mm256_xor_si256(c2[6244],simde_mm256_xor_si256(c2[6596],simde_mm256_xor_si256(c2[3424],simde_mm256_xor_si256(c2[4131],simde_mm256_xor_si256(c2[8016],simde_mm256_xor_si256(c2[5907],simde_mm256_xor_si256(c2[2742],simde_mm256_xor_si256(c2[8737],simde_mm256_xor_si256(c2[9089],simde_mm256_xor_si256(c2[3106],simde_mm256_xor_si256(c2[5571],simde_mm256_xor_si256(c2[10516],simde_mm256_xor_si256(c2[10868],simde_mm256_xor_si256(c2[9459],simde_mm256_xor_si256(c2[8051],simde_mm256_xor_si256(c2[3491],simde_mm256_xor_si256(c2[3843],simde_mm256_xor_si256(c2[5959],simde_mm256_xor_si256(c2[4192],simde_mm256_xor_si256(c2[9140],simde_mm256_xor_si256(c2[9492],simde_mm256_xor_si256(c2[8436],c2[2804])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[304]=_mm256_xor_si256(c2[2469],_mm256_xor_si256(c2[8244],_mm256_xor_si256(c2[10372],c2[1253])));
+     d2[304]=simde_mm256_xor_si256(c2[2469],simde_mm256_xor_si256(c2[8244],simde_mm256_xor_si256(c2[10372],c2[1253])));
 
 //row: 39
-     d2[312]=_mm256_xor_si256(c2[4597],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[7508],c2[304])));
+     d2[312]=simde_mm256_xor_si256(c2[4597],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[7508],c2[304])));
 
 //row: 40
-     d2[320]=_mm256_xor_si256(c2[10212],_mm256_xor_si256(c2[3300],c2[7668]));
+     d2[320]=simde_mm256_xor_si256(c2[10212],simde_mm256_xor_si256(c2[3300],c2[7668]));
 
 //row: 41
-     d2[328]=_mm256_xor_si256(c2[2485],_mm256_xor_si256(c2[4629],_mm256_xor_si256(c2[4020],c2[8388])));
+     d2[328]=simde_mm256_xor_si256(c2[2485],simde_mm256_xor_si256(c2[4629],simde_mm256_xor_si256(c2[4020],c2[8388])));
 
 //row: 42
-     d2[336]=_mm256_xor_si256(c2[1412],_mm256_xor_si256(c2[4228],_mm256_xor_si256(c2[6695],_mm256_xor_si256(c2[7047],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[1056],_mm256_xor_si256(c2[7396],_mm256_xor_si256(c2[5302],_mm256_xor_si256(c2[8471],_mm256_xor_si256(c2[8823],_mm256_xor_si256(c2[727],_mm256_xor_si256(c2[1079],_mm256_xor_si256(c2[4259],_mm256_xor_si256(c2[8835],_mm256_xor_si256(c2[738],_mm256_xor_si256(c2[1090],_mm256_xor_si256(c2[3217],_mm256_xor_si256(c2[10967],_mm256_xor_si256(c2[10262],_mm256_xor_si256(c2[10614],_mm256_xor_si256(c2[2176],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[3238],_mm256_xor_si256(c2[10276],_mm256_xor_si256(c2[10628],_mm256_xor_si256(c2[10276],_mm256_xor_si256(c2[5015],_mm256_xor_si256(c2[6070],_mm256_xor_si256(c2[10647],_mm256_xor_si256(c2[10999],_mm256_xor_si256(c2[7140],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[2567],_mm256_xor_si256(c2[11008],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[2931],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[5395],_mm256_xor_si256(c2[4336],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[6471],_mm256_xor_si256(c2[9281],_mm256_xor_si256(c2[9633],_mm256_xor_si256(c2[9987],_mm256_xor_si256(c2[10339],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[8241],_mm256_xor_si256(c2[2967],_mm256_xor_si256(c2[3319],_mm256_xor_si256(c2[5443],_mm256_xor_si256(c2[5797],_mm256_xor_si256(c2[6149],_mm256_xor_si256(c2[6144],_mm256_xor_si256(c2[6496],_mm256_xor_si256(c2[8631],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[11094],_mm256_xor_si256(c2[183],_mm256_xor_si256(c2[3362],_mm256_xor_si256(c2[5831],_mm256_xor_si256(c2[11107],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[6900],_mm256_xor_si256(c2[8304],_mm256_xor_si256(c2[8656],_mm256_xor_si256(c2[1619],_mm256_xor_si256(c2[1971],_mm256_xor_si256(c2[8327],_mm256_xor_si256(c2[3747],_mm256_xor_si256(c2[4099],_mm256_xor_si256(c2[10082],_mm256_xor_si256(c2[10434],_mm256_xor_si256(c2[4818],_mm256_xor_si256(c2[5168],_mm256_xor_si256(c2[3763],_mm256_xor_si256(c2[4115],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[8704],_mm256_xor_si256(c2[9059],_mm256_xor_si256(c2[9411],_mm256_xor_si256(c2[2033],_mm256_xor_si256(c2[10835],_mm256_xor_si256(c2[11187],_mm256_xor_si256(c2[7670],_mm256_xor_si256(c2[8022],_mm256_xor_si256(c2[3106],_mm256_xor_si256(c2[8034],_mm256_xor_si256(c2[8386],_mm256_xor_si256(c2[10499],_mm256_xor_si256(c2[10851],_mm256_xor_si256(c2[4885],_mm256_xor_si256(c2[3476],_mm256_xor_si256(c2[1716],_mm256_xor_si256(c2[2068],_mm256_xor_si256(c2[9123],_mm256_xor_si256(c2[10887],_mm256_xor_si256(c2[11239],_mm256_xor_si256(c2[9120],_mm256_xor_si256(c2[9472],_mm256_xor_si256(c2[3509],_mm256_xor_si256(c2[2453],_mm256_xor_si256(c2[7732],c2[8084]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[336]=simde_mm256_xor_si256(c2[1412],simde_mm256_xor_si256(c2[4228],simde_mm256_xor_si256(c2[6695],simde_mm256_xor_si256(c2[7047],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[1056],simde_mm256_xor_si256(c2[7396],simde_mm256_xor_si256(c2[5302],simde_mm256_xor_si256(c2[8471],simde_mm256_xor_si256(c2[8823],simde_mm256_xor_si256(c2[727],simde_mm256_xor_si256(c2[1079],simde_mm256_xor_si256(c2[4259],simde_mm256_xor_si256(c2[8835],simde_mm256_xor_si256(c2[738],simde_mm256_xor_si256(c2[1090],simde_mm256_xor_si256(c2[3217],simde_mm256_xor_si256(c2[10967],simde_mm256_xor_si256(c2[10262],simde_mm256_xor_si256(c2[10614],simde_mm256_xor_si256(c2[2176],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[3238],simde_mm256_xor_si256(c2[10276],simde_mm256_xor_si256(c2[10628],simde_mm256_xor_si256(c2[10276],simde_mm256_xor_si256(c2[5015],simde_mm256_xor_si256(c2[6070],simde_mm256_xor_si256(c2[10647],simde_mm256_xor_si256(c2[10999],simde_mm256_xor_si256(c2[7140],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[2567],simde_mm256_xor_si256(c2[11008],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[2931],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[5395],simde_mm256_xor_si256(c2[4336],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[6471],simde_mm256_xor_si256(c2[9281],simde_mm256_xor_si256(c2[9633],simde_mm256_xor_si256(c2[9987],simde_mm256_xor_si256(c2[10339],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[8241],simde_mm256_xor_si256(c2[2967],simde_mm256_xor_si256(c2[3319],simde_mm256_xor_si256(c2[5443],simde_mm256_xor_si256(c2[5797],simde_mm256_xor_si256(c2[6149],simde_mm256_xor_si256(c2[6144],simde_mm256_xor_si256(c2[6496],simde_mm256_xor_si256(c2[8631],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[11094],simde_mm256_xor_si256(c2[183],simde_mm256_xor_si256(c2[3362],simde_mm256_xor_si256(c2[5831],simde_mm256_xor_si256(c2[11107],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[6900],simde_mm256_xor_si256(c2[8304],simde_mm256_xor_si256(c2[8656],simde_mm256_xor_si256(c2[1619],simde_mm256_xor_si256(c2[1971],simde_mm256_xor_si256(c2[8327],simde_mm256_xor_si256(c2[3747],simde_mm256_xor_si256(c2[4099],simde_mm256_xor_si256(c2[10082],simde_mm256_xor_si256(c2[10434],simde_mm256_xor_si256(c2[4818],simde_mm256_xor_si256(c2[5168],simde_mm256_xor_si256(c2[3763],simde_mm256_xor_si256(c2[4115],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[8704],simde_mm256_xor_si256(c2[9059],simde_mm256_xor_si256(c2[9411],simde_mm256_xor_si256(c2[2033],simde_mm256_xor_si256(c2[10835],simde_mm256_xor_si256(c2[11187],simde_mm256_xor_si256(c2[7670],simde_mm256_xor_si256(c2[8022],simde_mm256_xor_si256(c2[3106],simde_mm256_xor_si256(c2[8034],simde_mm256_xor_si256(c2[8386],simde_mm256_xor_si256(c2[10499],simde_mm256_xor_si256(c2[10851],simde_mm256_xor_si256(c2[4885],simde_mm256_xor_si256(c2[3476],simde_mm256_xor_si256(c2[1716],simde_mm256_xor_si256(c2[2068],simde_mm256_xor_si256(c2[9123],simde_mm256_xor_si256(c2[10887],simde_mm256_xor_si256(c2[11239],simde_mm256_xor_si256(c2[9120],simde_mm256_xor_si256(c2[9472],simde_mm256_xor_si256(c2[3509],simde_mm256_xor_si256(c2[2453],simde_mm256_xor_si256(c2[7732],c2[8084]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 43
-     d2[344]=_mm256_xor_si256(c2[2116],_mm256_xor_si256(c2[4932],_mm256_xor_si256(c2[7751],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[6006],_mm256_xor_si256(c2[9527],_mm256_xor_si256(c2[1431],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[8116],_mm256_xor_si256(c2[4963],_mm256_xor_si256(c2[9539],_mm256_xor_si256(c2[1794],_mm256_xor_si256(c2[3921],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[10966],_mm256_xor_si256(c2[55],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[10980],_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[5719],_mm256_xor_si256(c2[6774],_mm256_xor_si256(c2[432],_mm256_xor_si256(c2[7844],_mm256_xor_si256(c2[3271],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[801],_mm256_xor_si256(c2[3635],_mm256_xor_si256(c2[6099],_mm256_xor_si256(c2[5040],_mm256_xor_si256(c2[5392],_mm256_xor_si256(c2[7175],_mm256_xor_si256(c2[10337],_mm256_xor_si256(c2[10691],_mm256_xor_si256(c2[11043],_mm256_xor_si256(c2[2614],_mm256_xor_si256(c2[8945],_mm256_xor_si256(c2[4023],_mm256_xor_si256(c2[6147],_mm256_xor_si256(c2[6853],_mm256_xor_si256(c2[6848],_mm256_xor_si256(c2[7200],_mm256_xor_si256(c2[9335],_mm256_xor_si256(c2[2995],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[887],_mm256_xor_si256(c2[4066],_mm256_xor_si256(c2[6535],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[7604],_mm256_xor_si256(c2[9360],_mm256_xor_si256(c2[2323],_mm256_xor_si256(c2[2675],_mm256_xor_si256(c2[9031],_mm256_xor_si256(c2[4803],_mm256_xor_si256(c2[10786],_mm256_xor_si256(c2[11138],_mm256_xor_si256(c2[5522],_mm256_xor_si256(c2[5872],_mm256_xor_si256(c2[4819],_mm256_xor_si256(c2[1317],_mm256_xor_si256(c2[9408],_mm256_xor_si256(c2[9763],_mm256_xor_si256(c2[10115],_mm256_xor_si256(c2[1317],_mm256_xor_si256(c2[2737],_mm256_xor_si256(c2[628],_mm256_xor_si256(c2[8374],_mm256_xor_si256(c2[8726],_mm256_xor_si256(c2[3810],_mm256_xor_si256(c2[9090],_mm256_xor_si256(c2[11203],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[4869],_mm256_xor_si256(c2[5589],_mm256_xor_si256(c2[4180],_mm256_xor_si256(c2[2772],_mm256_xor_si256(c2[9827],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[9824],_mm256_xor_si256(c2[10176],_mm256_xor_si256(c2[4213],_mm256_xor_si256(c2[3157],_mm256_xor_si256(c2[8436],c2[8788]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[344]=simde_mm256_xor_si256(c2[2116],simde_mm256_xor_si256(c2[4932],simde_mm256_xor_si256(c2[7751],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[6006],simde_mm256_xor_si256(c2[9527],simde_mm256_xor_si256(c2[1431],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[8116],simde_mm256_xor_si256(c2[4963],simde_mm256_xor_si256(c2[9539],simde_mm256_xor_si256(c2[1794],simde_mm256_xor_si256(c2[3921],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[10966],simde_mm256_xor_si256(c2[55],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[10980],simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[5719],simde_mm256_xor_si256(c2[6774],simde_mm256_xor_si256(c2[432],simde_mm256_xor_si256(c2[7844],simde_mm256_xor_si256(c2[3271],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[801],simde_mm256_xor_si256(c2[3635],simde_mm256_xor_si256(c2[6099],simde_mm256_xor_si256(c2[5040],simde_mm256_xor_si256(c2[5392],simde_mm256_xor_si256(c2[7175],simde_mm256_xor_si256(c2[10337],simde_mm256_xor_si256(c2[10691],simde_mm256_xor_si256(c2[11043],simde_mm256_xor_si256(c2[2614],simde_mm256_xor_si256(c2[8945],simde_mm256_xor_si256(c2[4023],simde_mm256_xor_si256(c2[6147],simde_mm256_xor_si256(c2[6853],simde_mm256_xor_si256(c2[6848],simde_mm256_xor_si256(c2[7200],simde_mm256_xor_si256(c2[9335],simde_mm256_xor_si256(c2[2995],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[887],simde_mm256_xor_si256(c2[4066],simde_mm256_xor_si256(c2[6535],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[7604],simde_mm256_xor_si256(c2[9360],simde_mm256_xor_si256(c2[2323],simde_mm256_xor_si256(c2[2675],simde_mm256_xor_si256(c2[9031],simde_mm256_xor_si256(c2[4803],simde_mm256_xor_si256(c2[10786],simde_mm256_xor_si256(c2[11138],simde_mm256_xor_si256(c2[5522],simde_mm256_xor_si256(c2[5872],simde_mm256_xor_si256(c2[4819],simde_mm256_xor_si256(c2[1317],simde_mm256_xor_si256(c2[9408],simde_mm256_xor_si256(c2[9763],simde_mm256_xor_si256(c2[10115],simde_mm256_xor_si256(c2[1317],simde_mm256_xor_si256(c2[2737],simde_mm256_xor_si256(c2[628],simde_mm256_xor_si256(c2[8374],simde_mm256_xor_si256(c2[8726],simde_mm256_xor_si256(c2[3810],simde_mm256_xor_si256(c2[9090],simde_mm256_xor_si256(c2[11203],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[4869],simde_mm256_xor_si256(c2[5589],simde_mm256_xor_si256(c2[4180],simde_mm256_xor_si256(c2[2772],simde_mm256_xor_si256(c2[9827],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[9824],simde_mm256_xor_si256(c2[10176],simde_mm256_xor_si256(c2[4213],simde_mm256_xor_si256(c2[3157],simde_mm256_xor_si256(c2[8436],c2[8788]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 44
-     d2[352]=_mm256_xor_si256(c2[2469],_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[8096],_mm256_xor_si256(c2[2113],_mm256_xor_si256(c2[3876],_mm256_xor_si256(c2[6359],_mm256_xor_si256(c2[9872],_mm256_xor_si256(c2[2128],_mm256_xor_si256(c2[5316],_mm256_xor_si256(c2[9892],_mm256_xor_si256(c2[2147],_mm256_xor_si256(c2[4274],_mm256_xor_si256(c2[753],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[3233],_mm256_xor_si256(c2[4295],_mm256_xor_si256(c2[422],_mm256_xor_si256(c2[6064],_mm256_xor_si256(c2[7127],_mm256_xor_si256(c2[785],_mm256_xor_si256(c2[8197],_mm256_xor_si256(c2[3616],_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[3988],_mm256_xor_si256(c2[6452],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[10324],_mm256_xor_si256(c2[7520],_mm256_xor_si256(c2[10690],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[2967],_mm256_xor_si256(c2[9298],_mm256_xor_si256(c2[4368],_mm256_xor_si256(c2[1205],_mm256_xor_si256(c2[6500],_mm256_xor_si256(c2[7206],_mm256_xor_si256(c2[7553],_mm256_xor_si256(c2[9680],_mm256_xor_si256(c2[3348],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[4419],_mm256_xor_si256(c2[6880],_mm256_xor_si256(c2[1253],_mm256_xor_si256(c2[7957],_mm256_xor_si256(c2[9713],_mm256_xor_si256(c2[3028],_mm256_xor_si256(c2[9376],_mm256_xor_si256(c2[5156],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[5875],_mm256_xor_si256(c2[6225],_mm256_xor_si256(c2[5172],_mm256_xor_si256(c2[1670],_mm256_xor_si256(c2[9761],_mm256_xor_si256(c2[10468],_mm256_xor_si256(c2[3090],_mm256_xor_si256(c2[981],_mm256_xor_si256(c2[9079],_mm256_xor_si256(c2[4163],_mm256_xor_si256(c2[9443],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[5942],_mm256_xor_si256(c2[4533],_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[10180],_mm256_xor_si256(c2[1025],_mm256_xor_si256(c2[10529],_mm256_xor_si256(c2[4566],_mm256_xor_si256(c2[3510],c2[9141])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[352]=simde_mm256_xor_si256(c2[2469],simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[8096],simde_mm256_xor_si256(c2[2113],simde_mm256_xor_si256(c2[3876],simde_mm256_xor_si256(c2[6359],simde_mm256_xor_si256(c2[9872],simde_mm256_xor_si256(c2[2128],simde_mm256_xor_si256(c2[5316],simde_mm256_xor_si256(c2[9892],simde_mm256_xor_si256(c2[2147],simde_mm256_xor_si256(c2[4274],simde_mm256_xor_si256(c2[753],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[3233],simde_mm256_xor_si256(c2[4295],simde_mm256_xor_si256(c2[422],simde_mm256_xor_si256(c2[6064],simde_mm256_xor_si256(c2[7127],simde_mm256_xor_si256(c2[785],simde_mm256_xor_si256(c2[8197],simde_mm256_xor_si256(c2[3616],simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[3988],simde_mm256_xor_si256(c2[6452],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[10324],simde_mm256_xor_si256(c2[7520],simde_mm256_xor_si256(c2[10690],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[2967],simde_mm256_xor_si256(c2[9298],simde_mm256_xor_si256(c2[4368],simde_mm256_xor_si256(c2[1205],simde_mm256_xor_si256(c2[6500],simde_mm256_xor_si256(c2[7206],simde_mm256_xor_si256(c2[7553],simde_mm256_xor_si256(c2[9680],simde_mm256_xor_si256(c2[3348],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[4419],simde_mm256_xor_si256(c2[6880],simde_mm256_xor_si256(c2[1253],simde_mm256_xor_si256(c2[7957],simde_mm256_xor_si256(c2[9713],simde_mm256_xor_si256(c2[3028],simde_mm256_xor_si256(c2[9376],simde_mm256_xor_si256(c2[5156],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[5875],simde_mm256_xor_si256(c2[6225],simde_mm256_xor_si256(c2[5172],simde_mm256_xor_si256(c2[1670],simde_mm256_xor_si256(c2[9761],simde_mm256_xor_si256(c2[10468],simde_mm256_xor_si256(c2[3090],simde_mm256_xor_si256(c2[981],simde_mm256_xor_si256(c2[9079],simde_mm256_xor_si256(c2[4163],simde_mm256_xor_si256(c2[9443],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[5942],simde_mm256_xor_si256(c2[4533],simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[10180],simde_mm256_xor_si256(c2[1025],simde_mm256_xor_si256(c2[10529],simde_mm256_xor_si256(c2[4566],simde_mm256_xor_si256(c2[3510],c2[9141])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 45
-     d2[360]=_mm256_xor_si256(c2[7412],_mm256_xor_si256(c2[8196],c2[2629]));
+     d2[360]=simde_mm256_xor_si256(c2[7412],simde_mm256_xor_si256(c2[8196],c2[2629]));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc288_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc288_byte.c
index 239a6e955d949d75a5ec63071b59154a6b320408..92ef1d48f4dbdd6f835f5171993ec9bfa48f526b 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc288_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc288_byte.c
@@ -11,141 +11,141 @@ static inline void ldpc288_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[7530],_mm256_xor_si256(c2[7529],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[10697],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[2795],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[11129],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[11130],_mm256_xor_si256(c2[10355],_mm256_xor_si256(c2[12336],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[2056],_mm256_xor_si256(c2[4827],_mm256_xor_si256(c2[10788],_mm256_xor_si256(c2[95],_mm256_xor_si256(c2[7621],_mm256_xor_si256(c2[1692],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[7238],_mm256_xor_si256(c2[4880],_mm256_xor_si256(c2[10428],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[4110],_mm256_xor_si256(c2[12421],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[11646],_mm256_xor_si256(c2[8481],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[6520],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[8121],_mm256_xor_si256(c2[5349],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[9726],_mm256_xor_si256(c2[6156],_mm256_xor_si256(c2[10117],_mm256_xor_si256(c2[7761],_mm256_xor_si256(c2[4196],_mm256_xor_si256(c2[1819],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[2239],_mm256_xor_si256(c2[7779],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[9778],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[6628],_mm256_xor_si256(c2[12172],_mm256_xor_si256(c2[7423],_mm256_xor_si256(c2[7834],_mm256_xor_si256(c2[9816],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[12602],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[9852],_mm256_xor_si256(c2[11434],_mm256_xor_si256(c2[3516],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[4327],_mm256_xor_si256(c2[7095],_mm256_xor_si256(c2[2366],_mm256_xor_si256(c2[9489],c2[5133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[7530],simde_mm256_xor_si256(c2[7529],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[10697],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[2795],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[11129],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[11130],simde_mm256_xor_si256(c2[10355],simde_mm256_xor_si256(c2[12336],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[2056],simde_mm256_xor_si256(c2[4827],simde_mm256_xor_si256(c2[10788],simde_mm256_xor_si256(c2[95],simde_mm256_xor_si256(c2[7621],simde_mm256_xor_si256(c2[1692],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[7238],simde_mm256_xor_si256(c2[4880],simde_mm256_xor_si256(c2[10428],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[4110],simde_mm256_xor_si256(c2[12421],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[11646],simde_mm256_xor_si256(c2[8481],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[6520],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[8121],simde_mm256_xor_si256(c2[5349],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[9726],simde_mm256_xor_si256(c2[6156],simde_mm256_xor_si256(c2[10117],simde_mm256_xor_si256(c2[7761],simde_mm256_xor_si256(c2[4196],simde_mm256_xor_si256(c2[1819],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[2239],simde_mm256_xor_si256(c2[7779],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[9778],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[6628],simde_mm256_xor_si256(c2[12172],simde_mm256_xor_si256(c2[7423],simde_mm256_xor_si256(c2[7834],simde_mm256_xor_si256(c2[9816],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[12602],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[9852],simde_mm256_xor_si256(c2[11434],simde_mm256_xor_si256(c2[3516],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[4327],simde_mm256_xor_si256(c2[7095],simde_mm256_xor_si256(c2[2366],simde_mm256_xor_si256(c2[9489],c2[5133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 1
-     d2[9]=_mm256_xor_si256(c2[7530],_mm256_xor_si256(c2[7926],_mm256_xor_si256(c2[7925],_mm256_xor_si256(c2[1196],_mm256_xor_si256(c2[11093],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[7150],_mm256_xor_si256(c2[11129],_mm256_xor_si256(c2[11525],_mm256_xor_si256(c2[1229],_mm256_xor_si256(c2[11526],_mm256_xor_si256(c2[10355],_mm256_xor_si256(c2[10751],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[476],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[10788],_mm256_xor_si256(c2[11184],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[8017],_mm256_xor_si256(c2[1692],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[4470],_mm256_xor_si256(c2[7634],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[10824],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[4506],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[2526],_mm256_xor_si256(c2[11646],_mm256_xor_si256(c2[12042],_mm256_xor_si256(c2[8877],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[6520],_mm256_xor_si256(c2[6916],_mm256_xor_si256(c2[1764],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[8121],_mm256_xor_si256(c2[8517],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[1391],_mm256_xor_si256(c2[9726],_mm256_xor_si256(c2[10122],_mm256_xor_si256(c2[6552],_mm256_xor_si256(c2[10513],_mm256_xor_si256(c2[7761],_mm256_xor_si256(c2[8157],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[3818],_mm256_xor_si256(c2[2635],_mm256_xor_si256(c2[8175],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[4237],_mm256_xor_si256(c2[10174],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[6628],_mm256_xor_si256(c2[7024],_mm256_xor_si256(c2[12568],_mm256_xor_si256(c2[7819],_mm256_xor_si256(c2[8230],_mm256_xor_si256(c2[10212],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[12602],_mm256_xor_si256(c2[327],_mm256_xor_si256(c2[5876],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[9852],_mm256_xor_si256(c2[10248],_mm256_xor_si256(c2[11830],_mm256_xor_si256(c2[3912],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[4322],_mm256_xor_si256(c2[4723],_mm256_xor_si256(c2[7491],_mm256_xor_si256(c2[2366],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[9885],c2[5529])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[9]=simde_mm256_xor_si256(c2[7530],simde_mm256_xor_si256(c2[7926],simde_mm256_xor_si256(c2[7925],simde_mm256_xor_si256(c2[1196],simde_mm256_xor_si256(c2[11093],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[7150],simde_mm256_xor_si256(c2[11129],simde_mm256_xor_si256(c2[11525],simde_mm256_xor_si256(c2[1229],simde_mm256_xor_si256(c2[11526],simde_mm256_xor_si256(c2[10355],simde_mm256_xor_si256(c2[10751],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[476],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[10788],simde_mm256_xor_si256(c2[11184],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[8017],simde_mm256_xor_si256(c2[1692],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[4470],simde_mm256_xor_si256(c2[7634],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[10824],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[4506],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[2526],simde_mm256_xor_si256(c2[11646],simde_mm256_xor_si256(c2[12042],simde_mm256_xor_si256(c2[8877],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[6520],simde_mm256_xor_si256(c2[6916],simde_mm256_xor_si256(c2[1764],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[8121],simde_mm256_xor_si256(c2[8517],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[1391],simde_mm256_xor_si256(c2[9726],simde_mm256_xor_si256(c2[10122],simde_mm256_xor_si256(c2[6552],simde_mm256_xor_si256(c2[10513],simde_mm256_xor_si256(c2[7761],simde_mm256_xor_si256(c2[8157],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[3818],simde_mm256_xor_si256(c2[2635],simde_mm256_xor_si256(c2[8175],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[4237],simde_mm256_xor_si256(c2[10174],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[6628],simde_mm256_xor_si256(c2[7024],simde_mm256_xor_si256(c2[12568],simde_mm256_xor_si256(c2[7819],simde_mm256_xor_si256(c2[8230],simde_mm256_xor_si256(c2[10212],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[12602],simde_mm256_xor_si256(c2[327],simde_mm256_xor_si256(c2[5876],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[9852],simde_mm256_xor_si256(c2[10248],simde_mm256_xor_si256(c2[11830],simde_mm256_xor_si256(c2[3912],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[4322],simde_mm256_xor_si256(c2[4723],simde_mm256_xor_si256(c2[7491],simde_mm256_xor_si256(c2[2366],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[9885],c2[5529])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[18]=_mm256_xor_si256(c2[7926],_mm256_xor_si256(c2[7925],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[1196],_mm256_xor_si256(c2[10697],_mm256_xor_si256(c2[11093],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[2795],_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[7150],_mm256_xor_si256(c2[11525],_mm256_xor_si256(c2[1229],_mm256_xor_si256(c2[11130],_mm256_xor_si256(c2[11526],_mm256_xor_si256(c2[10751],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[476],_mm256_xor_si256(c2[2056],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[4827],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[11184],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[7621],_mm256_xor_si256(c2[8017],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[4470],_mm256_xor_si256(c2[7238],_mm256_xor_si256(c2[7634],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[10428],_mm256_xor_si256(c2[10824],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[4506],_mm256_xor_si256(c2[12421],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[2526],_mm256_xor_si256(c2[12042],_mm256_xor_si256(c2[8877],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[6916],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[1764],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[8517],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[1391],_mm256_xor_si256(c2[10122],_mm256_xor_si256(c2[6552],_mm256_xor_si256(c2[10117],_mm256_xor_si256(c2[10513],_mm256_xor_si256(c2[8157],_mm256_xor_si256(c2[4196],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[1819],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[3818],_mm256_xor_si256(c2[2239],_mm256_xor_si256(c2[2635],_mm256_xor_si256(c2[7779],_mm256_xor_si256(c2[8175],_mm256_xor_si256(c2[4237],_mm256_xor_si256(c2[10174],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[7024],_mm256_xor_si256(c2[12568],_mm256_xor_si256(c2[7423],_mm256_xor_si256(c2[7819],_mm256_xor_si256(c2[8230],_mm256_xor_si256(c2[9816],_mm256_xor_si256(c2[10212],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[327],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[5876],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[10248],_mm256_xor_si256(c2[11830],_mm256_xor_si256(c2[3516],_mm256_xor_si256(c2[3912],_mm256_xor_si256(c2[4322],_mm256_xor_si256(c2[4327],_mm256_xor_si256(c2[4723],_mm256_xor_si256(c2[7095],_mm256_xor_si256(c2[7491],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[9885],_mm256_xor_si256(c2[5133],c2[5529]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[18]=simde_mm256_xor_si256(c2[7926],simde_mm256_xor_si256(c2[7925],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[1196],simde_mm256_xor_si256(c2[10697],simde_mm256_xor_si256(c2[11093],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[2795],simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[7150],simde_mm256_xor_si256(c2[11525],simde_mm256_xor_si256(c2[1229],simde_mm256_xor_si256(c2[11130],simde_mm256_xor_si256(c2[11526],simde_mm256_xor_si256(c2[10751],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[476],simde_mm256_xor_si256(c2[2056],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[4827],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[11184],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[7621],simde_mm256_xor_si256(c2[8017],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[4470],simde_mm256_xor_si256(c2[7238],simde_mm256_xor_si256(c2[7634],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[10428],simde_mm256_xor_si256(c2[10824],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[4506],simde_mm256_xor_si256(c2[12421],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[2526],simde_mm256_xor_si256(c2[12042],simde_mm256_xor_si256(c2[8877],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[6916],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[1764],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[8517],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[1391],simde_mm256_xor_si256(c2[10122],simde_mm256_xor_si256(c2[6552],simde_mm256_xor_si256(c2[10117],simde_mm256_xor_si256(c2[10513],simde_mm256_xor_si256(c2[8157],simde_mm256_xor_si256(c2[4196],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[1819],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[3818],simde_mm256_xor_si256(c2[2239],simde_mm256_xor_si256(c2[2635],simde_mm256_xor_si256(c2[7779],simde_mm256_xor_si256(c2[8175],simde_mm256_xor_si256(c2[4237],simde_mm256_xor_si256(c2[10174],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[7024],simde_mm256_xor_si256(c2[12568],simde_mm256_xor_si256(c2[7423],simde_mm256_xor_si256(c2[7819],simde_mm256_xor_si256(c2[8230],simde_mm256_xor_si256(c2[9816],simde_mm256_xor_si256(c2[10212],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[327],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[5876],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[10248],simde_mm256_xor_si256(c2[11830],simde_mm256_xor_si256(c2[3516],simde_mm256_xor_si256(c2[3912],simde_mm256_xor_si256(c2[4322],simde_mm256_xor_si256(c2[4327],simde_mm256_xor_si256(c2[4723],simde_mm256_xor_si256(c2[7095],simde_mm256_xor_si256(c2[7491],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[9885],simde_mm256_xor_si256(c2[5133],c2[5529]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[27]=_mm256_xor_si256(c2[7926],_mm256_xor_si256(c2[7925],_mm256_xor_si256(c2[1196],_mm256_xor_si256(c2[10697],_mm256_xor_si256(c2[11093],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[7150],_mm256_xor_si256(c2[11525],_mm256_xor_si256(c2[1229],_mm256_xor_si256(c2[11526],_mm256_xor_si256(c2[10751],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[476],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[4827],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[11184],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[8017],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[4470],_mm256_xor_si256(c2[7238],_mm256_xor_si256(c2[7634],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[10824],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[4506],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[2526],_mm256_xor_si256(c2[12042],_mm256_xor_si256(c2[8877],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[6916],_mm256_xor_si256(c2[1764],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[8517],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[1391],_mm256_xor_si256(c2[10122],_mm256_xor_si256(c2[6552],_mm256_xor_si256(c2[10117],_mm256_xor_si256(c2[10513],_mm256_xor_si256(c2[8157],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[1819],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[3818],_mm256_xor_si256(c2[2635],_mm256_xor_si256(c2[7779],_mm256_xor_si256(c2[8175],_mm256_xor_si256(c2[4237],_mm256_xor_si256(c2[10174],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[7024],_mm256_xor_si256(c2[12568],_mm256_xor_si256(c2[7423],_mm256_xor_si256(c2[7819],_mm256_xor_si256(c2[8230],_mm256_xor_si256(c2[10212],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[327],_mm256_xor_si256(c2[5876],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[10248],_mm256_xor_si256(c2[11830],_mm256_xor_si256(c2[3912],_mm256_xor_si256(c2[4322],_mm256_xor_si256(c2[4723],_mm256_xor_si256(c2[7095],_mm256_xor_si256(c2[7491],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[9885],_mm256_xor_si256(c2[5133],c2[5529])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[27]=simde_mm256_xor_si256(c2[7926],simde_mm256_xor_si256(c2[7925],simde_mm256_xor_si256(c2[1196],simde_mm256_xor_si256(c2[10697],simde_mm256_xor_si256(c2[11093],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[7150],simde_mm256_xor_si256(c2[11525],simde_mm256_xor_si256(c2[1229],simde_mm256_xor_si256(c2[11526],simde_mm256_xor_si256(c2[10751],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[476],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[4827],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[11184],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[8017],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[4470],simde_mm256_xor_si256(c2[7238],simde_mm256_xor_si256(c2[7634],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[10824],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[4506],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[2526],simde_mm256_xor_si256(c2[12042],simde_mm256_xor_si256(c2[8877],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[6916],simde_mm256_xor_si256(c2[1764],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[8517],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[1391],simde_mm256_xor_si256(c2[10122],simde_mm256_xor_si256(c2[6552],simde_mm256_xor_si256(c2[10117],simde_mm256_xor_si256(c2[10513],simde_mm256_xor_si256(c2[8157],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[1819],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[3818],simde_mm256_xor_si256(c2[2635],simde_mm256_xor_si256(c2[7779],simde_mm256_xor_si256(c2[8175],simde_mm256_xor_si256(c2[4237],simde_mm256_xor_si256(c2[10174],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[7024],simde_mm256_xor_si256(c2[12568],simde_mm256_xor_si256(c2[7423],simde_mm256_xor_si256(c2[7819],simde_mm256_xor_si256(c2[8230],simde_mm256_xor_si256(c2[10212],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[327],simde_mm256_xor_si256(c2[5876],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[10248],simde_mm256_xor_si256(c2[11830],simde_mm256_xor_si256(c2[3912],simde_mm256_xor_si256(c2[4322],simde_mm256_xor_si256(c2[4723],simde_mm256_xor_si256(c2[7095],simde_mm256_xor_si256(c2[7491],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[9885],simde_mm256_xor_si256(c2[5133],c2[5529])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[36]=_mm256_xor_si256(c2[8719],c2[4381]);
+     d2[36]=simde_mm256_xor_si256(c2[8719],c2[4381]);
 
 //row: 5
-     d2[45]=_mm256_xor_si256(c2[1190],_mm256_xor_si256(c2[1189],_mm256_xor_si256(c2[7131],_mm256_xor_si256(c2[4357],_mm256_xor_si256(c2[1988],_mm256_xor_si256(c2[8731],_mm256_xor_si256(c2[9126],_mm256_xor_si256(c2[414],_mm256_xor_si256(c2[8339],_mm256_xor_si256(c2[4789],_mm256_xor_si256(c2[7164],_mm256_xor_si256(c2[4790],_mm256_xor_si256(c2[4015],_mm256_xor_si256(c2[5996],_mm256_xor_si256(c2[8766],_mm256_xor_si256(c2[3224],_mm256_xor_si256(c2[6411],_mm256_xor_si256(c2[8396],_mm256_xor_si256(c2[11167],_mm256_xor_si256(c2[4448],_mm256_xor_si256(c2[6426],_mm256_xor_si256(c2[1281],_mm256_xor_si256(c2[8032],_mm256_xor_si256(c2[10405],_mm256_xor_si256(c2[907],_mm256_xor_si256(c2[11220],_mm256_xor_si256(c2[4088],_mm256_xor_si256(c2[8054],_mm256_xor_si256(c2[10441],_mm256_xor_si256(c2[6090],_mm256_xor_si256(c2[8461],_mm256_xor_si256(c2[5315],_mm256_xor_si256(c2[2150],_mm256_xor_si256(c2[6502],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[7708],_mm256_xor_si256(c2[10085],_mm256_xor_si256(c2[1790],_mm256_xor_si256(c2[11689],_mm256_xor_si256(c2[7326],_mm256_xor_si256(c2[3386],_mm256_xor_si256(c2[12496],_mm256_xor_si256(c2[3786],_mm256_xor_si256(c2[10916],_mm256_xor_si256(c2[1430],_mm256_xor_si256(c2[10536],_mm256_xor_si256(c2[8159],_mm256_xor_si256(c2[9762],_mm256_xor_si256(c2[8570],_mm256_xor_si256(c2[1448],_mm256_xor_si256(c2[10172],_mm256_xor_si256(c2[3438],_mm256_xor_si256(c2[6610],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[5832],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[12175],_mm256_xor_si256(c2[1494],_mm256_xor_si256(c2[3476],_mm256_xor_si256(c2[8622],_mm256_xor_si256(c2[6271],_mm256_xor_si256(c2[11811],_mm256_xor_si256(c2[6664],_mm256_xor_si256(c2[3512],_mm256_xor_si256(c2[5094],_mm256_xor_si256(c2[9847],_mm256_xor_si256(c2[10266],_mm256_xor_si256(c2[10658],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[8697],_mm256_xor_si256(c2[3158],_mm256_xor_si256(c2[11473],c2[6320]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[45]=simde_mm256_xor_si256(c2[1190],simde_mm256_xor_si256(c2[1189],simde_mm256_xor_si256(c2[7131],simde_mm256_xor_si256(c2[4357],simde_mm256_xor_si256(c2[1988],simde_mm256_xor_si256(c2[8731],simde_mm256_xor_si256(c2[9126],simde_mm256_xor_si256(c2[414],simde_mm256_xor_si256(c2[8339],simde_mm256_xor_si256(c2[4789],simde_mm256_xor_si256(c2[7164],simde_mm256_xor_si256(c2[4790],simde_mm256_xor_si256(c2[4015],simde_mm256_xor_si256(c2[5996],simde_mm256_xor_si256(c2[8766],simde_mm256_xor_si256(c2[3224],simde_mm256_xor_si256(c2[6411],simde_mm256_xor_si256(c2[8396],simde_mm256_xor_si256(c2[11167],simde_mm256_xor_si256(c2[4448],simde_mm256_xor_si256(c2[6426],simde_mm256_xor_si256(c2[1281],simde_mm256_xor_si256(c2[8032],simde_mm256_xor_si256(c2[10405],simde_mm256_xor_si256(c2[907],simde_mm256_xor_si256(c2[11220],simde_mm256_xor_si256(c2[4088],simde_mm256_xor_si256(c2[8054],simde_mm256_xor_si256(c2[10441],simde_mm256_xor_si256(c2[6090],simde_mm256_xor_si256(c2[8461],simde_mm256_xor_si256(c2[5315],simde_mm256_xor_si256(c2[2150],simde_mm256_xor_si256(c2[6502],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[7708],simde_mm256_xor_si256(c2[10085],simde_mm256_xor_si256(c2[1790],simde_mm256_xor_si256(c2[11689],simde_mm256_xor_si256(c2[7326],simde_mm256_xor_si256(c2[3386],simde_mm256_xor_si256(c2[12496],simde_mm256_xor_si256(c2[3786],simde_mm256_xor_si256(c2[10916],simde_mm256_xor_si256(c2[1430],simde_mm256_xor_si256(c2[10536],simde_mm256_xor_si256(c2[8159],simde_mm256_xor_si256(c2[9762],simde_mm256_xor_si256(c2[8570],simde_mm256_xor_si256(c2[1448],simde_mm256_xor_si256(c2[10172],simde_mm256_xor_si256(c2[3438],simde_mm256_xor_si256(c2[6610],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[5832],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[12175],simde_mm256_xor_si256(c2[1494],simde_mm256_xor_si256(c2[3476],simde_mm256_xor_si256(c2[8622],simde_mm256_xor_si256(c2[6271],simde_mm256_xor_si256(c2[11811],simde_mm256_xor_si256(c2[6664],simde_mm256_xor_si256(c2[3512],simde_mm256_xor_si256(c2[5094],simde_mm256_xor_si256(c2[9847],simde_mm256_xor_si256(c2[10266],simde_mm256_xor_si256(c2[10658],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[8697],simde_mm256_xor_si256(c2[3158],simde_mm256_xor_si256(c2[11473],c2[6320]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[54]=_mm256_xor_si256(c2[6338],_mm256_xor_si256(c2[6448],_mm256_xor_si256(c2[3749],_mm256_xor_si256(c2[10496],_mm256_xor_si256(c2[10927],_mm256_xor_si256(c2[7043],_mm256_xor_si256(c2[9436],c2[5115])))))));
+     d2[54]=simde_mm256_xor_si256(c2[6338],simde_mm256_xor_si256(c2[6448],simde_mm256_xor_si256(c2[3749],simde_mm256_xor_si256(c2[10496],simde_mm256_xor_si256(c2[10927],simde_mm256_xor_si256(c2[7043],simde_mm256_xor_si256(c2[9436],c2[5115])))))));
 
 //row: 7
-     d2[63]=_mm256_xor_si256(c2[3569],_mm256_xor_si256(c2[11507],_mm256_xor_si256(c2[10372],_mm256_xor_si256(c2[9635],_mm256_xor_si256(c2[3315],c2[1841])))));
+     d2[63]=simde_mm256_xor_si256(c2[3569],simde_mm256_xor_si256(c2[11507],simde_mm256_xor_si256(c2[10372],simde_mm256_xor_si256(c2[9635],simde_mm256_xor_si256(c2[3315],c2[1841])))));
 
 //row: 8
-     d2[72]=_mm256_xor_si256(c2[9904],_mm256_xor_si256(c2[3968],_mm256_xor_si256(c2[9903],_mm256_xor_si256(c2[3967],_mm256_xor_si256(c2[3174],_mm256_xor_si256(c2[9504],_mm256_xor_si256(c2[9900],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[6739],_mm256_xor_si256(c2[7135],_mm256_xor_si256(c2[8713],_mm256_xor_si256(c2[4774],_mm256_xor_si256(c2[11509],_mm256_xor_si256(c2[5169],_mm256_xor_si256(c2[11508],_mm256_xor_si256(c2[11904],_mm256_xor_si256(c2[9128],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[3192],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[7567],_mm256_xor_si256(c2[3207],_mm256_xor_si256(c2[9942],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[7172],_mm256_xor_si256(c2[7568],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[6793],_mm256_xor_si256(c2[2039],_mm256_xor_si256(c2[8774],_mm256_xor_si256(c2[4809],_mm256_xor_si256(c2[11148],_mm256_xor_si256(c2[11544],_mm256_xor_si256(c2[11149],_mm256_xor_si256(c2[2454],_mm256_xor_si256(c2[9180],_mm256_xor_si256(c2[4430],_mm256_xor_si256(c2[10769],_mm256_xor_si256(c2[11165],_mm256_xor_si256(c2[7201],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[1265],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[7226],_mm256_xor_si256(c2[2469],_mm256_xor_si256(c2[9204],_mm256_xor_si256(c2[9995],_mm256_xor_si256(c2[3654],_mm256_xor_si256(c2[4050],_mm256_xor_si256(c2[4075],_mm256_xor_si256(c2[10801],_mm256_xor_si256(c2[6448],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[9612],_mm256_xor_si256(c2[3280],_mm256_xor_si256(c2[3676],_mm256_xor_si256(c2[7254],_mm256_xor_si256(c2[1318],_mm256_xor_si256(c2[131],_mm256_xor_si256(c2[6470],_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[4088],_mm256_xor_si256(c2[10427],_mm256_xor_si256(c2[10823],_mm256_xor_si256(c2[6484],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[2124],_mm256_xor_si256(c2[8463],_mm256_xor_si256(c2[8859],_mm256_xor_si256(c2[4504],_mm256_xor_si256(c2[10843],_mm256_xor_si256(c2[11239],_mm256_xor_si256(c2[1358],_mm256_xor_si256(c2[8084],_mm256_xor_si256(c2[10855],_mm256_xor_si256(c2[4919],_mm256_xor_si256(c2[2545],_mm256_xor_si256(c2[8875],_mm256_xor_si256(c2[9271],_mm256_xor_si256(c2[8894],_mm256_xor_si256(c2[2958],_mm256_xor_si256(c2[3751],_mm256_xor_si256(c2[10081],_mm256_xor_si256(c2[10477],_mm256_xor_si256(c2[6128],_mm256_xor_si256(c2[12458],_mm256_xor_si256(c2[183],_mm256_xor_si256(c2[10495],_mm256_xor_si256(c2[4559],_mm256_xor_si256(c2[7723],_mm256_xor_si256(c2[1787],_mm256_xor_si256(c2[3369],_mm256_xor_si256(c2[9708],_mm256_xor_si256(c2[10104],_mm256_xor_si256(c2[12100],_mm256_xor_si256(c2[6164],_mm256_xor_si256(c2[8539],_mm256_xor_si256(c2[2594],_mm256_xor_si256(c2[12500],_mm256_xor_si256(c2[6159],_mm256_xor_si256(c2[6555],_mm256_xor_si256(c2[3781],_mm256_xor_si256(c2[10135],_mm256_xor_si256(c2[4199],_mm256_xor_si256(c2[6570],_mm256_xor_si256(c2[238],_mm256_xor_si256(c2[634],_mm256_xor_si256(c2[4202],_mm256_xor_si256(c2[10532],_mm256_xor_si256(c2[10928],_mm256_xor_si256(c2[5796],_mm256_xor_si256(c2[12531],_mm256_xor_si256(c2[4613],_mm256_xor_si256(c2[10952],_mm256_xor_si256(c2[11348],_mm256_xor_si256(c2[10153],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[4217],_mm256_xor_si256(c2[6215],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[12152],_mm256_xor_si256(c2[6216],_mm256_xor_si256(c2[2653],_mm256_xor_si256(c2[8983],_mm256_xor_si256(c2[9379],_mm256_xor_si256(c2[9002],_mm256_xor_si256(c2[3066],_mm256_xor_si256(c2[1875],_mm256_xor_si256(c2[8610],_mm256_xor_si256(c2[9797],_mm256_xor_si256(c2[3456],_mm256_xor_si256(c2[3852],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[10208],_mm256_xor_si256(c2[4272],_mm256_xor_si256(c2[12190],_mm256_xor_si256(c2[5858],_mm256_xor_si256(c2[6254],_mm256_xor_si256(c2[4665],_mm256_xor_si256(c2[11004],_mm256_xor_si256(c2[11400],_mm256_xor_si256(c2[2305],_mm256_xor_si256(c2[9040],_mm256_xor_si256(c2[7854],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[1909],_mm256_xor_si256(c2[2707],_mm256_xor_si256(c2[9037],_mm256_xor_si256(c2[9433],_mm256_xor_si256(c2[12226],_mm256_xor_si256(c2[6290],_mm256_xor_si256(c2[1137],_mm256_xor_si256(c2[7872],_mm256_xor_si256(c2[5890],_mm256_xor_si256(c2[12229],_mm256_xor_si256(c2[12625],_mm256_xor_si256(c2[5887],_mm256_xor_si256(c2[6300],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[6701],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[756],_mm256_xor_si256(c2[9469],_mm256_xor_si256(c2[3137],_mm256_xor_si256(c2[3533],_mm256_xor_si256(c2[4740],_mm256_xor_si256(c2[11466],_mm256_xor_si256(c2[11863],_mm256_xor_si256(c2[5927],_mm256_xor_si256(c2[7507],_mm256_xor_si256(c2[1175],_mm256_xor_si256(c2[1571],c2[6318]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[72]=simde_mm256_xor_si256(c2[9904],simde_mm256_xor_si256(c2[3968],simde_mm256_xor_si256(c2[9903],simde_mm256_xor_si256(c2[3967],simde_mm256_xor_si256(c2[3174],simde_mm256_xor_si256(c2[9504],simde_mm256_xor_si256(c2[9900],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[6739],simde_mm256_xor_si256(c2[7135],simde_mm256_xor_si256(c2[8713],simde_mm256_xor_si256(c2[4774],simde_mm256_xor_si256(c2[11509],simde_mm256_xor_si256(c2[5169],simde_mm256_xor_si256(c2[11508],simde_mm256_xor_si256(c2[11904],simde_mm256_xor_si256(c2[9128],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[3192],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[7567],simde_mm256_xor_si256(c2[3207],simde_mm256_xor_si256(c2[9942],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[7172],simde_mm256_xor_si256(c2[7568],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[6793],simde_mm256_xor_si256(c2[2039],simde_mm256_xor_si256(c2[8774],simde_mm256_xor_si256(c2[4809],simde_mm256_xor_si256(c2[11148],simde_mm256_xor_si256(c2[11544],simde_mm256_xor_si256(c2[11149],simde_mm256_xor_si256(c2[2454],simde_mm256_xor_si256(c2[9180],simde_mm256_xor_si256(c2[4430],simde_mm256_xor_si256(c2[10769],simde_mm256_xor_si256(c2[11165],simde_mm256_xor_si256(c2[7201],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[1265],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[7226],simde_mm256_xor_si256(c2[2469],simde_mm256_xor_si256(c2[9204],simde_mm256_xor_si256(c2[9995],simde_mm256_xor_si256(c2[3654],simde_mm256_xor_si256(c2[4050],simde_mm256_xor_si256(c2[4075],simde_mm256_xor_si256(c2[10801],simde_mm256_xor_si256(c2[6448],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[9612],simde_mm256_xor_si256(c2[3280],simde_mm256_xor_si256(c2[3676],simde_mm256_xor_si256(c2[7254],simde_mm256_xor_si256(c2[1318],simde_mm256_xor_si256(c2[131],simde_mm256_xor_si256(c2[6470],simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[4088],simde_mm256_xor_si256(c2[10427],simde_mm256_xor_si256(c2[10823],simde_mm256_xor_si256(c2[6484],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[2124],simde_mm256_xor_si256(c2[8463],simde_mm256_xor_si256(c2[8859],simde_mm256_xor_si256(c2[4504],simde_mm256_xor_si256(c2[10843],simde_mm256_xor_si256(c2[11239],simde_mm256_xor_si256(c2[1358],simde_mm256_xor_si256(c2[8084],simde_mm256_xor_si256(c2[10855],simde_mm256_xor_si256(c2[4919],simde_mm256_xor_si256(c2[2545],simde_mm256_xor_si256(c2[8875],simde_mm256_xor_si256(c2[9271],simde_mm256_xor_si256(c2[8894],simde_mm256_xor_si256(c2[2958],simde_mm256_xor_si256(c2[3751],simde_mm256_xor_si256(c2[10081],simde_mm256_xor_si256(c2[10477],simde_mm256_xor_si256(c2[6128],simde_mm256_xor_si256(c2[12458],simde_mm256_xor_si256(c2[183],simde_mm256_xor_si256(c2[10495],simde_mm256_xor_si256(c2[4559],simde_mm256_xor_si256(c2[7723],simde_mm256_xor_si256(c2[1787],simde_mm256_xor_si256(c2[3369],simde_mm256_xor_si256(c2[9708],simde_mm256_xor_si256(c2[10104],simde_mm256_xor_si256(c2[12100],simde_mm256_xor_si256(c2[6164],simde_mm256_xor_si256(c2[8539],simde_mm256_xor_si256(c2[2594],simde_mm256_xor_si256(c2[12500],simde_mm256_xor_si256(c2[6159],simde_mm256_xor_si256(c2[6555],simde_mm256_xor_si256(c2[3781],simde_mm256_xor_si256(c2[10135],simde_mm256_xor_si256(c2[4199],simde_mm256_xor_si256(c2[6570],simde_mm256_xor_si256(c2[238],simde_mm256_xor_si256(c2[634],simde_mm256_xor_si256(c2[4202],simde_mm256_xor_si256(c2[10532],simde_mm256_xor_si256(c2[10928],simde_mm256_xor_si256(c2[5796],simde_mm256_xor_si256(c2[12531],simde_mm256_xor_si256(c2[4613],simde_mm256_xor_si256(c2[10952],simde_mm256_xor_si256(c2[11348],simde_mm256_xor_si256(c2[10153],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[4217],simde_mm256_xor_si256(c2[6215],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[12152],simde_mm256_xor_si256(c2[6216],simde_mm256_xor_si256(c2[2653],simde_mm256_xor_si256(c2[8983],simde_mm256_xor_si256(c2[9379],simde_mm256_xor_si256(c2[9002],simde_mm256_xor_si256(c2[3066],simde_mm256_xor_si256(c2[1875],simde_mm256_xor_si256(c2[8610],simde_mm256_xor_si256(c2[9797],simde_mm256_xor_si256(c2[3456],simde_mm256_xor_si256(c2[3852],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[10208],simde_mm256_xor_si256(c2[4272],simde_mm256_xor_si256(c2[12190],simde_mm256_xor_si256(c2[5858],simde_mm256_xor_si256(c2[6254],simde_mm256_xor_si256(c2[4665],simde_mm256_xor_si256(c2[11004],simde_mm256_xor_si256(c2[11400],simde_mm256_xor_si256(c2[2305],simde_mm256_xor_si256(c2[9040],simde_mm256_xor_si256(c2[7854],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[1909],simde_mm256_xor_si256(c2[2707],simde_mm256_xor_si256(c2[9037],simde_mm256_xor_si256(c2[9433],simde_mm256_xor_si256(c2[12226],simde_mm256_xor_si256(c2[6290],simde_mm256_xor_si256(c2[1137],simde_mm256_xor_si256(c2[7872],simde_mm256_xor_si256(c2[5890],simde_mm256_xor_si256(c2[12229],simde_mm256_xor_si256(c2[12625],simde_mm256_xor_si256(c2[5887],simde_mm256_xor_si256(c2[6300],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[6701],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[756],simde_mm256_xor_si256(c2[9469],simde_mm256_xor_si256(c2[3137],simde_mm256_xor_si256(c2[3533],simde_mm256_xor_si256(c2[4740],simde_mm256_xor_si256(c2[11466],simde_mm256_xor_si256(c2[11863],simde_mm256_xor_si256(c2[5927],simde_mm256_xor_si256(c2[7507],simde_mm256_xor_si256(c2[1175],simde_mm256_xor_si256(c2[1571],c2[6318]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[81]=_mm256_xor_si256(c2[797],_mm256_xor_si256(c2[12298],_mm256_xor_si256(c2[11666],_mm256_xor_si256(c2[2578],_mm256_xor_si256(c2[5383],_mm256_xor_si256(c2[1894],_mm256_xor_si256(c2[5078],c2[7098])))))));
+     d2[81]=simde_mm256_xor_si256(c2[797],simde_mm256_xor_si256(c2[12298],simde_mm256_xor_si256(c2[11666],simde_mm256_xor_si256(c2[2578],simde_mm256_xor_si256(c2[5383],simde_mm256_xor_si256(c2[1894],simde_mm256_xor_si256(c2[5078],c2[7098])))))));
 
 //row: 10
-     d2[90]=_mm256_xor_si256(c2[7151],_mm256_xor_si256(c2[432],_mm256_xor_si256(c2[11160],_mm256_xor_si256(c2[4490],_mm256_xor_si256(c2[4111],c2[3822])))));
+     d2[90]=simde_mm256_xor_si256(c2[7151],simde_mm256_xor_si256(c2[432],simde_mm256_xor_si256(c2[11160],simde_mm256_xor_si256(c2[4490],simde_mm256_xor_si256(c2[4111],c2[3822])))));
 
 //row: 11
-     d2[99]=_mm256_xor_si256(c2[10303],_mm256_xor_si256(c2[5155],_mm256_xor_si256(c2[5551],_mm256_xor_si256(c2[10302],_mm256_xor_si256(c2[5550],_mm256_xor_si256(c2[3564],_mm256_xor_si256(c2[11492],_mm256_xor_si256(c2[799],_mm256_xor_si256(c2[8718],_mm256_xor_si256(c2[9109],_mm256_xor_si256(c2[5173],_mm256_xor_si256(c2[25],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[5568],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[9527],_mm256_xor_si256(c2[4775],_mm256_xor_si256(c2[9126],_mm256_xor_si256(c2[1231],_mm256_xor_si256(c2[8754],_mm256_xor_si256(c2[9150],_mm256_xor_si256(c2[3606],_mm256_xor_si256(c2[11525],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[9151],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[7980],_mm256_xor_si256(c2[8376],_mm256_xor_si256(c2[2438],_mm256_xor_si256(c2[10357],_mm256_xor_si256(c2[5208],_mm256_xor_si256(c2[456],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[10772],_mm256_xor_si256(c2[4829],_mm256_xor_si256(c2[77],_mm256_xor_si256(c2[7600],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[890],_mm256_xor_si256(c2[8413],_mm256_xor_si256(c2[8809],_mm256_xor_si256(c2[2868],_mm256_xor_si256(c2[10787],_mm256_xor_si256(c2[10394],_mm256_xor_si256(c2[5642],_mm256_xor_si256(c2[4465],_mm256_xor_si256(c2[11988],_mm256_xor_si256(c2[12384],_mm256_xor_si256(c2[6847],_mm256_xor_si256(c2[2095],_mm256_xor_si256(c2[10011],_mm256_xor_si256(c2[5259],_mm256_xor_si256(c2[7653],_mm256_xor_si256(c2[2901],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[8449],_mm256_xor_si256(c2[4487],_mm256_xor_si256(c2[12406],_mm256_xor_si256(c2[6883],_mm256_xor_si256(c2[2131],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[10442],_mm256_xor_si256(c2[4903],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[1748],_mm256_xor_si256(c2[9271],_mm256_xor_si256(c2[9667],_mm256_xor_si256(c2[11254],_mm256_xor_si256(c2[6502],_mm256_xor_si256(c2[2935],_mm256_xor_si256(c2[10854],_mm256_xor_si256(c2[9293],_mm256_xor_si256(c2[4145],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[12060],_mm256_xor_si256(c2[6518],_mm256_xor_si256(c2[1766],_mm256_xor_si256(c2[10894],_mm256_xor_si256(c2[5746],_mm256_xor_si256(c2[6142],_mm256_xor_si256(c2[8122],_mm256_xor_si256(c2[3370],_mm256_xor_si256(c2[3768],_mm256_xor_si256(c2[11687],_mm256_xor_si256(c2[12499],_mm256_xor_si256(c2[7351],_mm256_xor_si256(c2[7747],_mm256_xor_si256(c2[8929],_mm256_xor_si256(c2[4177],_mm256_xor_si256(c2[219],_mm256_xor_si256(c2[8138],_mm256_xor_si256(c2[7352],_mm256_xor_si256(c2[10534],_mm256_xor_si256(c2[5386],_mm256_xor_si256(c2[5782],_mm256_xor_si256(c2[6969],_mm256_xor_si256(c2[2217],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[12511],_mm256_xor_si256(c2[6195],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[5012],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[10552],_mm256_xor_si256(c2[5800],_mm256_xor_si256(c2[6614],_mm256_xor_si256(c2[1466],_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[12551],_mm256_xor_si256(c2[7799],_mm256_xor_si256(c2[3043],_mm256_xor_si256(c2[10962],_mm256_xor_si256(c2[9401],_mm256_xor_si256(c2[4253],_mm256_xor_si256(c2[4649],_mm256_xor_si256(c2[2274],_mm256_xor_si256(c2[10193],_mm256_xor_si256(c2[10196],_mm256_xor_si256(c2[5444],_mm256_xor_si256(c2[8609],_mm256_xor_si256(c2[10607],_mm256_xor_si256(c2[5855],_mm256_xor_si256(c2[12589],_mm256_xor_si256(c2[7837],_mm256_xor_si256(c2[5064],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[10227],_mm256_xor_si256(c2[10623],_mm256_xor_si256(c2[8244],_mm256_xor_si256(c2[3492],_mm256_xor_si256(c2[3097],_mm256_xor_si256(c2[11016],_mm256_xor_si256(c2[12625],_mm256_xor_si256(c2[7477],_mm256_xor_si256(c2[7873],_mm256_xor_si256(c2[1536],_mm256_xor_si256(c2[9455],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[6699],_mm256_xor_si256(c2[1551],_mm256_xor_si256(c2[1947],_mm256_xor_si256(c2[7100],_mm256_xor_si256(c2[2348],_mm256_xor_si256(c2[9868],_mm256_xor_si256(c2[5116],_mm256_xor_si256(c2[5130],_mm256_xor_si256(c2[12662],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[12262],_mm256_xor_si256(c2[7510],_mm256_xor_si256(c2[7906],_mm256_xor_si256(c2[3154],c2[7118])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[99]=simde_mm256_xor_si256(c2[10303],simde_mm256_xor_si256(c2[5155],simde_mm256_xor_si256(c2[5551],simde_mm256_xor_si256(c2[10302],simde_mm256_xor_si256(c2[5550],simde_mm256_xor_si256(c2[3564],simde_mm256_xor_si256(c2[11492],simde_mm256_xor_si256(c2[799],simde_mm256_xor_si256(c2[8718],simde_mm256_xor_si256(c2[9109],simde_mm256_xor_si256(c2[5173],simde_mm256_xor_si256(c2[25],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[5568],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[9527],simde_mm256_xor_si256(c2[4775],simde_mm256_xor_si256(c2[9126],simde_mm256_xor_si256(c2[1231],simde_mm256_xor_si256(c2[8754],simde_mm256_xor_si256(c2[9150],simde_mm256_xor_si256(c2[3606],simde_mm256_xor_si256(c2[11525],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[9151],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[7980],simde_mm256_xor_si256(c2[8376],simde_mm256_xor_si256(c2[2438],simde_mm256_xor_si256(c2[10357],simde_mm256_xor_si256(c2[5208],simde_mm256_xor_si256(c2[456],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[10772],simde_mm256_xor_si256(c2[4829],simde_mm256_xor_si256(c2[77],simde_mm256_xor_si256(c2[7600],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[890],simde_mm256_xor_si256(c2[8413],simde_mm256_xor_si256(c2[8809],simde_mm256_xor_si256(c2[2868],simde_mm256_xor_si256(c2[10787],simde_mm256_xor_si256(c2[10394],simde_mm256_xor_si256(c2[5642],simde_mm256_xor_si256(c2[4465],simde_mm256_xor_si256(c2[11988],simde_mm256_xor_si256(c2[12384],simde_mm256_xor_si256(c2[6847],simde_mm256_xor_si256(c2[2095],simde_mm256_xor_si256(c2[10011],simde_mm256_xor_si256(c2[5259],simde_mm256_xor_si256(c2[7653],simde_mm256_xor_si256(c2[2901],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[8449],simde_mm256_xor_si256(c2[4487],simde_mm256_xor_si256(c2[12406],simde_mm256_xor_si256(c2[6883],simde_mm256_xor_si256(c2[2131],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[10442],simde_mm256_xor_si256(c2[4903],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[1748],simde_mm256_xor_si256(c2[9271],simde_mm256_xor_si256(c2[9667],simde_mm256_xor_si256(c2[11254],simde_mm256_xor_si256(c2[6502],simde_mm256_xor_si256(c2[2935],simde_mm256_xor_si256(c2[10854],simde_mm256_xor_si256(c2[9293],simde_mm256_xor_si256(c2[4145],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[12060],simde_mm256_xor_si256(c2[6518],simde_mm256_xor_si256(c2[1766],simde_mm256_xor_si256(c2[10894],simde_mm256_xor_si256(c2[5746],simde_mm256_xor_si256(c2[6142],simde_mm256_xor_si256(c2[8122],simde_mm256_xor_si256(c2[3370],simde_mm256_xor_si256(c2[3768],simde_mm256_xor_si256(c2[11687],simde_mm256_xor_si256(c2[12499],simde_mm256_xor_si256(c2[7351],simde_mm256_xor_si256(c2[7747],simde_mm256_xor_si256(c2[8929],simde_mm256_xor_si256(c2[4177],simde_mm256_xor_si256(c2[219],simde_mm256_xor_si256(c2[8138],simde_mm256_xor_si256(c2[7352],simde_mm256_xor_si256(c2[10534],simde_mm256_xor_si256(c2[5386],simde_mm256_xor_si256(c2[5782],simde_mm256_xor_si256(c2[6969],simde_mm256_xor_si256(c2[2217],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[12511],simde_mm256_xor_si256(c2[6195],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[5012],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[10552],simde_mm256_xor_si256(c2[5800],simde_mm256_xor_si256(c2[6614],simde_mm256_xor_si256(c2[1466],simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[12551],simde_mm256_xor_si256(c2[7799],simde_mm256_xor_si256(c2[3043],simde_mm256_xor_si256(c2[10962],simde_mm256_xor_si256(c2[9401],simde_mm256_xor_si256(c2[4253],simde_mm256_xor_si256(c2[4649],simde_mm256_xor_si256(c2[2274],simde_mm256_xor_si256(c2[10193],simde_mm256_xor_si256(c2[10196],simde_mm256_xor_si256(c2[5444],simde_mm256_xor_si256(c2[8609],simde_mm256_xor_si256(c2[10607],simde_mm256_xor_si256(c2[5855],simde_mm256_xor_si256(c2[12589],simde_mm256_xor_si256(c2[7837],simde_mm256_xor_si256(c2[5064],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[10227],simde_mm256_xor_si256(c2[10623],simde_mm256_xor_si256(c2[8244],simde_mm256_xor_si256(c2[3492],simde_mm256_xor_si256(c2[3097],simde_mm256_xor_si256(c2[11016],simde_mm256_xor_si256(c2[12625],simde_mm256_xor_si256(c2[7477],simde_mm256_xor_si256(c2[7873],simde_mm256_xor_si256(c2[1536],simde_mm256_xor_si256(c2[9455],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[6699],simde_mm256_xor_si256(c2[1551],simde_mm256_xor_si256(c2[1947],simde_mm256_xor_si256(c2[7100],simde_mm256_xor_si256(c2[2348],simde_mm256_xor_si256(c2[9868],simde_mm256_xor_si256(c2[5116],simde_mm256_xor_si256(c2[5130],simde_mm256_xor_si256(c2[12662],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[12262],simde_mm256_xor_si256(c2[7510],simde_mm256_xor_si256(c2[7906],simde_mm256_xor_si256(c2[3154],c2[7118])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[108]=_mm256_xor_si256(c2[403],_mm256_xor_si256(c2[815],_mm256_xor_si256(c2[8107],_mm256_xor_si256(c2[9310],_mm256_xor_si256(c2[5785],c2[7855])))));
+     d2[108]=simde_mm256_xor_si256(c2[403],simde_mm256_xor_si256(c2[815],simde_mm256_xor_si256(c2[8107],simde_mm256_xor_si256(c2[9310],simde_mm256_xor_si256(c2[5785],c2[7855])))));
 
 //row: 13
-     d2[117]=_mm256_xor_si256(c2[9115],_mm256_xor_si256(c2[9511],_mm256_xor_si256(c2[9510],_mm256_xor_si256(c2[2772],_mm256_xor_si256(c2[7],_mm256_xor_si256(c2[2779],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[4381],_mm256_xor_si256(c2[4776],_mm256_xor_si256(c2[8735],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[439],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[440],_mm256_xor_si256(c2[11940],_mm256_xor_si256(c2[12336],_mm256_xor_si256(c2[1646],_mm256_xor_si256(c2[4416],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[2052],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[6808],_mm256_xor_si256(c2[12373],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[2076],_mm256_xor_si256(c2[9602],_mm256_xor_si256(c2[3277],_mm256_xor_si256(c2[3673],_mm256_xor_si256(c2[6055],_mm256_xor_si256(c2[9219],_mm256_xor_si256(c2[6861],_mm256_xor_si256(c2[12409],_mm256_xor_si256(c2[3695],_mm256_xor_si256(c2[9636],_mm256_xor_si256(c2[6091],_mm256_xor_si256(c2[1731],_mm256_xor_si256(c2[4111],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[956],_mm256_xor_si256(c2[10462],_mm256_xor_si256(c2[2143],_mm256_xor_si256(c2[8105],_mm256_xor_si256(c2[8501],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[5726],_mm256_xor_si256(c2[9706],_mm256_xor_si256(c2[10102],_mm256_xor_si256(c2[7330],_mm256_xor_si256(c2[2976],_mm256_xor_si256(c2[11311],_mm256_xor_si256(c2[11707],_mm256_xor_si256(c2[8137],_mm256_xor_si256(c2[12098],_mm256_xor_si256(c2[9346],_mm256_xor_si256(c2[9742],_mm256_xor_si256(c2[6177],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[5403],_mm256_xor_si256(c2[4220],_mm256_xor_si256(c2[9760],_mm256_xor_si256(c2[5426],_mm256_xor_si256(c2[5822],_mm256_xor_si256(c2[11759],_mm256_xor_si256(c2[2251],_mm256_xor_si256(c2[8213],_mm256_xor_si256(c2[8609],_mm256_xor_si256(c2[1482],_mm256_xor_si256(c2[9404],_mm256_xor_si256(c2[9815],_mm256_xor_si256(c2[11797],_mm256_xor_si256(c2[4272],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[1912],_mm256_xor_si256(c2[7452],_mm256_xor_si256(c2[2305],_mm256_xor_si256(c2[11437],_mm256_xor_si256(c2[11833],_mm256_xor_si256(c2[744],_mm256_xor_si256(c2[5497],_mm256_xor_si256(c2[5511],_mm256_xor_si256(c2[5907],_mm256_xor_si256(c2[6308],_mm256_xor_si256(c2[9076],_mm256_xor_si256(c2[6301],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[4338],_mm256_xor_si256(c2[11470],c2[7114])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[117]=simde_mm256_xor_si256(c2[9115],simde_mm256_xor_si256(c2[9511],simde_mm256_xor_si256(c2[9510],simde_mm256_xor_si256(c2[2772],simde_mm256_xor_si256(c2[7],simde_mm256_xor_si256(c2[2779],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[4381],simde_mm256_xor_si256(c2[4776],simde_mm256_xor_si256(c2[8735],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[439],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[440],simde_mm256_xor_si256(c2[11940],simde_mm256_xor_si256(c2[12336],simde_mm256_xor_si256(c2[1646],simde_mm256_xor_si256(c2[4416],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[2052],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[6808],simde_mm256_xor_si256(c2[12373],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[2076],simde_mm256_xor_si256(c2[9602],simde_mm256_xor_si256(c2[3277],simde_mm256_xor_si256(c2[3673],simde_mm256_xor_si256(c2[6055],simde_mm256_xor_si256(c2[9219],simde_mm256_xor_si256(c2[6861],simde_mm256_xor_si256(c2[12409],simde_mm256_xor_si256(c2[3695],simde_mm256_xor_si256(c2[9636],simde_mm256_xor_si256(c2[6091],simde_mm256_xor_si256(c2[1731],simde_mm256_xor_si256(c2[4111],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[956],simde_mm256_xor_si256(c2[10462],simde_mm256_xor_si256(c2[2143],simde_mm256_xor_si256(c2[8105],simde_mm256_xor_si256(c2[8501],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[5726],simde_mm256_xor_si256(c2[9706],simde_mm256_xor_si256(c2[10102],simde_mm256_xor_si256(c2[7330],simde_mm256_xor_si256(c2[2976],simde_mm256_xor_si256(c2[11311],simde_mm256_xor_si256(c2[11707],simde_mm256_xor_si256(c2[8137],simde_mm256_xor_si256(c2[12098],simde_mm256_xor_si256(c2[9346],simde_mm256_xor_si256(c2[9742],simde_mm256_xor_si256(c2[6177],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[5403],simde_mm256_xor_si256(c2[4220],simde_mm256_xor_si256(c2[9760],simde_mm256_xor_si256(c2[5426],simde_mm256_xor_si256(c2[5822],simde_mm256_xor_si256(c2[11759],simde_mm256_xor_si256(c2[2251],simde_mm256_xor_si256(c2[8213],simde_mm256_xor_si256(c2[8609],simde_mm256_xor_si256(c2[1482],simde_mm256_xor_si256(c2[9404],simde_mm256_xor_si256(c2[9815],simde_mm256_xor_si256(c2[11797],simde_mm256_xor_si256(c2[4272],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[1912],simde_mm256_xor_si256(c2[7452],simde_mm256_xor_si256(c2[2305],simde_mm256_xor_si256(c2[11437],simde_mm256_xor_si256(c2[11833],simde_mm256_xor_si256(c2[744],simde_mm256_xor_si256(c2[5497],simde_mm256_xor_si256(c2[5511],simde_mm256_xor_si256(c2[5907],simde_mm256_xor_si256(c2[6308],simde_mm256_xor_si256(c2[9076],simde_mm256_xor_si256(c2[6301],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[4338],simde_mm256_xor_si256(c2[11470],c2[7114])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[126]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[10517],_mm256_xor_si256(c2[11761],_mm256_xor_si256(c2[6624],_mm256_xor_si256(c2[306],c2[6320])))));
+     d2[126]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[10517],simde_mm256_xor_si256(c2[11761],simde_mm256_xor_si256(c2[6624],simde_mm256_xor_si256(c2[306],c2[6320])))));
 
 //row: 15
-     d2[135]=_mm256_xor_si256(c2[7928],_mm256_xor_si256(c2[7927],_mm256_xor_si256(c2[1189],_mm256_xor_si256(c2[10699],_mm256_xor_si256(c2[11095],_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[2798],_mm256_xor_si256(c2[3193],_mm256_xor_si256(c2[6756],_mm256_xor_si256(c2[7152],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[11527],_mm256_xor_si256(c2[1231],_mm256_xor_si256(c2[11528],_mm256_xor_si256(c2[10753],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[2833],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[2454],_mm256_xor_si256(c2[4829],_mm256_xor_si256(c2[5225],_mm256_xor_si256(c2[11186],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[8010],_mm256_xor_si256(c2[2090],_mm256_xor_si256(c2[4472],_mm256_xor_si256(c2[7240],_mm256_xor_si256(c2[7636],_mm256_xor_si256(c2[5278],_mm256_xor_si256(c2[10826],_mm256_xor_si256(c2[1716],_mm256_xor_si256(c2[2112],_mm256_xor_si256(c2[4508],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[2528],_mm256_xor_si256(c2[12044],_mm256_xor_si256(c2[8879],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[6918],_mm256_xor_si256(c2[1766],_mm256_xor_si256(c2[3747],_mm256_xor_si256(c2[4143],_mm256_xor_si256(c2[9293],_mm256_xor_si256(c2[8519],_mm256_xor_si256(c2[5747],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[10124],_mm256_xor_si256(c2[6554],_mm256_xor_si256(c2[10119],_mm256_xor_si256(c2[10515],_mm256_xor_si256(c2[8159],_mm256_xor_si256(c2[4594],_mm256_xor_si256(c2[1821],_mm256_xor_si256(c2[2217],_mm256_xor_si256(c2[4989],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[2628],_mm256_xor_si256(c2[7781],_mm256_xor_si256(c2[8177],_mm256_xor_si256(c2[4230],_mm256_xor_si256(c2[10176],_mm256_xor_si256(c2[668],_mm256_xor_si256(c2[7026],_mm256_xor_si256(c2[12570],_mm256_xor_si256(c2[7416],_mm256_xor_si256(c2[7812],_mm256_xor_si256(c2[8232],_mm256_xor_si256(c2[10214],_mm256_xor_si256(c2[2293],_mm256_xor_si256(c2[2689],_mm256_xor_si256(c2[329],_mm256_xor_si256(c2[5869],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[10250],_mm256_xor_si256(c2[11832],_mm256_xor_si256(c2[3914],_mm256_xor_si256(c2[4324],_mm256_xor_si256(c2[4716],_mm256_xor_si256(c2[7097],_mm256_xor_si256(c2[7493],_mm256_xor_si256(c2[2755],_mm256_xor_si256(c2[9887],_mm256_xor_si256(c2[5135],c2[5531]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[135]=simde_mm256_xor_si256(c2[7928],simde_mm256_xor_si256(c2[7927],simde_mm256_xor_si256(c2[1189],simde_mm256_xor_si256(c2[10699],simde_mm256_xor_si256(c2[11095],simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[2798],simde_mm256_xor_si256(c2[3193],simde_mm256_xor_si256(c2[6756],simde_mm256_xor_si256(c2[7152],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[11527],simde_mm256_xor_si256(c2[1231],simde_mm256_xor_si256(c2[11528],simde_mm256_xor_si256(c2[10753],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[2833],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[2454],simde_mm256_xor_si256(c2[4829],simde_mm256_xor_si256(c2[5225],simde_mm256_xor_si256(c2[11186],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[8010],simde_mm256_xor_si256(c2[2090],simde_mm256_xor_si256(c2[4472],simde_mm256_xor_si256(c2[7240],simde_mm256_xor_si256(c2[7636],simde_mm256_xor_si256(c2[5278],simde_mm256_xor_si256(c2[10826],simde_mm256_xor_si256(c2[1716],simde_mm256_xor_si256(c2[2112],simde_mm256_xor_si256(c2[4508],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[2528],simde_mm256_xor_si256(c2[12044],simde_mm256_xor_si256(c2[8879],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[6918],simde_mm256_xor_si256(c2[1766],simde_mm256_xor_si256(c2[3747],simde_mm256_xor_si256(c2[4143],simde_mm256_xor_si256(c2[9293],simde_mm256_xor_si256(c2[8519],simde_mm256_xor_si256(c2[5747],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[10124],simde_mm256_xor_si256(c2[6554],simde_mm256_xor_si256(c2[10119],simde_mm256_xor_si256(c2[10515],simde_mm256_xor_si256(c2[8159],simde_mm256_xor_si256(c2[4594],simde_mm256_xor_si256(c2[1821],simde_mm256_xor_si256(c2[2217],simde_mm256_xor_si256(c2[4989],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[2628],simde_mm256_xor_si256(c2[7781],simde_mm256_xor_si256(c2[8177],simde_mm256_xor_si256(c2[4230],simde_mm256_xor_si256(c2[10176],simde_mm256_xor_si256(c2[668],simde_mm256_xor_si256(c2[7026],simde_mm256_xor_si256(c2[12570],simde_mm256_xor_si256(c2[7416],simde_mm256_xor_si256(c2[7812],simde_mm256_xor_si256(c2[8232],simde_mm256_xor_si256(c2[10214],simde_mm256_xor_si256(c2[2293],simde_mm256_xor_si256(c2[2689],simde_mm256_xor_si256(c2[329],simde_mm256_xor_si256(c2[5869],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[10250],simde_mm256_xor_si256(c2[11832],simde_mm256_xor_si256(c2[3914],simde_mm256_xor_si256(c2[4324],simde_mm256_xor_si256(c2[4716],simde_mm256_xor_si256(c2[7097],simde_mm256_xor_si256(c2[7493],simde_mm256_xor_si256(c2[2755],simde_mm256_xor_si256(c2[9887],simde_mm256_xor_si256(c2[5135],c2[5531]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[144]=_mm256_xor_si256(c2[7530],_mm256_xor_si256(c2[7529],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[10697],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[2795],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[5570],_mm256_xor_si256(c2[11129],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[11130],_mm256_xor_si256(c2[10355],_mm256_xor_si256(c2[12336],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[5202],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[2056],_mm256_xor_si256(c2[4827],_mm256_xor_si256(c2[10788],_mm256_xor_si256(c2[95],_mm256_xor_si256(c2[7621],_mm256_xor_si256(c2[1692],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[7238],_mm256_xor_si256(c2[4880],_mm256_xor_si256(c2[10428],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[4110],_mm256_xor_si256(c2[12421],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[11646],_mm256_xor_si256(c2[8481],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[6520],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[8121],_mm256_xor_si256(c2[5349],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[1389],_mm256_xor_si256(c2[9726],_mm256_xor_si256(c2[6156],_mm256_xor_si256(c2[10117],_mm256_xor_si256(c2[7761],_mm256_xor_si256(c2[4196],_mm256_xor_si256(c2[1819],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[2239],_mm256_xor_si256(c2[7779],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[9778],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[6628],_mm256_xor_si256(c2[12172],_mm256_xor_si256(c2[7423],_mm256_xor_si256(c2[7834],_mm256_xor_si256(c2[9816],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[12602],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[9852],_mm256_xor_si256(c2[11434],_mm256_xor_si256(c2[3516],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[4327],_mm256_xor_si256(c2[7095],_mm256_xor_si256(c2[9073],_mm256_xor_si256(c2[2366],_mm256_xor_si256(c2[9489],c2[5133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[144]=simde_mm256_xor_si256(c2[7530],simde_mm256_xor_si256(c2[7529],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[10697],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[2795],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[5570],simde_mm256_xor_si256(c2[11129],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[11130],simde_mm256_xor_si256(c2[10355],simde_mm256_xor_si256(c2[12336],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[5202],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[2056],simde_mm256_xor_si256(c2[4827],simde_mm256_xor_si256(c2[10788],simde_mm256_xor_si256(c2[95],simde_mm256_xor_si256(c2[7621],simde_mm256_xor_si256(c2[1692],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[7238],simde_mm256_xor_si256(c2[4880],simde_mm256_xor_si256(c2[10428],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[4110],simde_mm256_xor_si256(c2[12421],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[11646],simde_mm256_xor_si256(c2[8481],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[6520],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[8121],simde_mm256_xor_si256(c2[5349],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[1389],simde_mm256_xor_si256(c2[9726],simde_mm256_xor_si256(c2[6156],simde_mm256_xor_si256(c2[10117],simde_mm256_xor_si256(c2[7761],simde_mm256_xor_si256(c2[4196],simde_mm256_xor_si256(c2[1819],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[2239],simde_mm256_xor_si256(c2[7779],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[9778],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[6628],simde_mm256_xor_si256(c2[12172],simde_mm256_xor_si256(c2[7423],simde_mm256_xor_si256(c2[7834],simde_mm256_xor_si256(c2[9816],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[12602],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[9852],simde_mm256_xor_si256(c2[11434],simde_mm256_xor_si256(c2[3516],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[4327],simde_mm256_xor_si256(c2[7095],simde_mm256_xor_si256(c2[9073],simde_mm256_xor_si256(c2[2366],simde_mm256_xor_si256(c2[9489],c2[5133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[153]=_mm256_xor_si256(c2[9904],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[306],c2[1175]))));
+     d2[153]=simde_mm256_xor_si256(c2[9904],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[306],c2[1175]))));
 
 //row: 18
-     d2[162]=_mm256_xor_si256(c2[419],_mm256_xor_si256(c2[9328],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[7063],c2[6682]))));
+     d2[162]=simde_mm256_xor_si256(c2[419],simde_mm256_xor_si256(c2[9328],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[7063],c2[6682]))));
 
 //row: 19
-     d2[171]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[8841],_mm256_xor_si256(c2[6484],c2[180]))));
+     d2[171]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[8841],simde_mm256_xor_si256(c2[6484],c2[180]))));
 
 //row: 20
-     d2[180]=_mm256_xor_si256(c2[9112],_mm256_xor_si256(c2[9111],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[12279],_mm256_xor_si256(c2[3572],_mm256_xor_si256(c2[3982],_mm256_xor_si256(c2[4377],_mm256_xor_si256(c2[8336],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[2415],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[11937],_mm256_xor_si256(c2[1247],_mm256_xor_si256(c2[4017],_mm256_xor_si256(c2[6788],_mm256_xor_si256(c2[1662],_mm256_xor_si256(c2[3638],_mm256_xor_si256(c2[6409],_mm256_xor_si256(c2[12370],_mm256_xor_si256(c2[1677],_mm256_xor_si256(c2[9203],_mm256_xor_si256(c2[3283],_mm256_xor_si256(c2[5656],_mm256_xor_si256(c2[8820],_mm256_xor_si256(c2[6462],_mm256_xor_si256(c2[12010],_mm256_xor_si256(c2[3296],_mm256_xor_si256(c2[5692],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[3712],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[10063],_mm256_xor_si256(c2[1753],_mm256_xor_si256(c2[10460],_mm256_xor_si256(c2[8102],_mm256_xor_si256(c2[2959],_mm256_xor_si256(c2[5336],_mm256_xor_si256(c2[9703],_mm256_xor_si256(c2[6931],_mm256_xor_si256(c2[2577],_mm256_xor_si256(c2[6538],_mm256_xor_si256(c2[11308],_mm256_xor_si256(c2[7747],_mm256_xor_si256(c2[11708],_mm256_xor_si256(c2[9343],_mm256_xor_si256(c2[5778],_mm256_xor_si256(c2[3410],_mm256_xor_si256(c2[5004],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[9361],_mm256_xor_si256(c2[5423],_mm256_xor_si256(c2[11360],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[8210],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[9005],_mm256_xor_si256(c2[9416],_mm256_xor_si256(c2[11398],_mm256_xor_si256(c2[3873],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[7062],_mm256_xor_si256(c2[1915],_mm256_xor_si256(c2[11434],_mm256_xor_si256(c2[345],_mm256_xor_si256(c2[5098],_mm256_xor_si256(c2[5508],_mm256_xor_si256(c2[5909],_mm256_xor_si256(c2[8677],_mm256_xor_si256(c2[3948],_mm256_xor_si256(c2[11071],c2[6715]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[180]=simde_mm256_xor_si256(c2[9112],simde_mm256_xor_si256(c2[9111],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[12279],simde_mm256_xor_si256(c2[3572],simde_mm256_xor_si256(c2[3982],simde_mm256_xor_si256(c2[4377],simde_mm256_xor_si256(c2[8336],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[2415],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[11937],simde_mm256_xor_si256(c2[1247],simde_mm256_xor_si256(c2[4017],simde_mm256_xor_si256(c2[6788],simde_mm256_xor_si256(c2[1662],simde_mm256_xor_si256(c2[3638],simde_mm256_xor_si256(c2[6409],simde_mm256_xor_si256(c2[12370],simde_mm256_xor_si256(c2[1677],simde_mm256_xor_si256(c2[9203],simde_mm256_xor_si256(c2[3283],simde_mm256_xor_si256(c2[5656],simde_mm256_xor_si256(c2[8820],simde_mm256_xor_si256(c2[6462],simde_mm256_xor_si256(c2[12010],simde_mm256_xor_si256(c2[3296],simde_mm256_xor_si256(c2[5692],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[3712],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[10063],simde_mm256_xor_si256(c2[1753],simde_mm256_xor_si256(c2[10460],simde_mm256_xor_si256(c2[8102],simde_mm256_xor_si256(c2[2959],simde_mm256_xor_si256(c2[5336],simde_mm256_xor_si256(c2[9703],simde_mm256_xor_si256(c2[6931],simde_mm256_xor_si256(c2[2577],simde_mm256_xor_si256(c2[6538],simde_mm256_xor_si256(c2[11308],simde_mm256_xor_si256(c2[7747],simde_mm256_xor_si256(c2[11708],simde_mm256_xor_si256(c2[9343],simde_mm256_xor_si256(c2[5778],simde_mm256_xor_si256(c2[3410],simde_mm256_xor_si256(c2[5004],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[9361],simde_mm256_xor_si256(c2[5423],simde_mm256_xor_si256(c2[11360],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[8210],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[9005],simde_mm256_xor_si256(c2[9416],simde_mm256_xor_si256(c2[11398],simde_mm256_xor_si256(c2[3873],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[7062],simde_mm256_xor_si256(c2[1915],simde_mm256_xor_si256(c2[11434],simde_mm256_xor_si256(c2[345],simde_mm256_xor_si256(c2[5098],simde_mm256_xor_si256(c2[5508],simde_mm256_xor_si256(c2[5909],simde_mm256_xor_si256(c2[8677],simde_mm256_xor_si256(c2[3948],simde_mm256_xor_si256(c2[11071],c2[6715]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[189]=_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[5635],_mm256_xor_si256(c2[4256],_mm256_xor_si256(c2[3924],c2[7506]))));
+     d2[189]=simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[5635],simde_mm256_xor_si256(c2[4256],simde_mm256_xor_si256(c2[3924],c2[7506]))));
 
 //row: 22
-     d2[198]=_mm256_xor_si256(c2[3170],_mm256_xor_si256(c2[11705],_mm256_xor_si256(c2[3404],c2[710])));
+     d2[198]=simde_mm256_xor_si256(c2[3170],simde_mm256_xor_si256(c2[11705],simde_mm256_xor_si256(c2[3404],c2[710])));
 
 //row: 23
-     d2[207]=_mm256_xor_si256(c2[7943],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[180],c2[2309])));
+     d2[207]=simde_mm256_xor_si256(c2[7943],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[180],c2[2309])));
 
 //row: 24
-     d2[216]=_mm256_xor_si256(c2[7530],_mm256_xor_si256(c2[7529],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[10697],_mm256_xor_si256(c2[4759],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[2795],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[11129],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[11130],_mm256_xor_si256(c2[10355],_mm256_xor_si256(c2[12336],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[2832],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[2056],_mm256_xor_si256(c2[4827],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[10788],_mm256_xor_si256(c2[95],_mm256_xor_si256(c2[7621],_mm256_xor_si256(c2[1692],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[7238],_mm256_xor_si256(c2[4880],_mm256_xor_si256(c2[10428],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[4110],_mm256_xor_si256(c2[12421],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[11646],_mm256_xor_si256(c2[8481],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[6520],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[8121],_mm256_xor_si256(c2[5349],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[4166],_mm256_xor_si256(c2[9726],_mm256_xor_si256(c2[6156],_mm256_xor_si256(c2[10117],_mm256_xor_si256(c2[7761],_mm256_xor_si256(c2[4196],_mm256_xor_si256(c2[1819],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[2239],_mm256_xor_si256(c2[7779],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[9778],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[6628],_mm256_xor_si256(c2[12172],_mm256_xor_si256(c2[7423],_mm256_xor_si256(c2[7834],_mm256_xor_si256(c2[9816],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[12602],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[9852],_mm256_xor_si256(c2[11434],_mm256_xor_si256(c2[3516],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[4327],_mm256_xor_si256(c2[7095],_mm256_xor_si256(c2[2366],_mm256_xor_si256(c2[9489],c2[5133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[216]=simde_mm256_xor_si256(c2[7530],simde_mm256_xor_si256(c2[7529],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[10697],simde_mm256_xor_si256(c2[4759],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[2795],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[11129],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[11130],simde_mm256_xor_si256(c2[10355],simde_mm256_xor_si256(c2[12336],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[2832],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[2056],simde_mm256_xor_si256(c2[4827],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[10788],simde_mm256_xor_si256(c2[95],simde_mm256_xor_si256(c2[7621],simde_mm256_xor_si256(c2[1692],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[7238],simde_mm256_xor_si256(c2[4880],simde_mm256_xor_si256(c2[10428],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[4110],simde_mm256_xor_si256(c2[12421],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[11646],simde_mm256_xor_si256(c2[8481],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[6520],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[8121],simde_mm256_xor_si256(c2[5349],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[4166],simde_mm256_xor_si256(c2[9726],simde_mm256_xor_si256(c2[6156],simde_mm256_xor_si256(c2[10117],simde_mm256_xor_si256(c2[7761],simde_mm256_xor_si256(c2[4196],simde_mm256_xor_si256(c2[1819],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[2239],simde_mm256_xor_si256(c2[7779],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[9778],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[6628],simde_mm256_xor_si256(c2[12172],simde_mm256_xor_si256(c2[7423],simde_mm256_xor_si256(c2[7834],simde_mm256_xor_si256(c2[9816],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[12602],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[9852],simde_mm256_xor_si256(c2[11434],simde_mm256_xor_si256(c2[3516],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[4327],simde_mm256_xor_si256(c2[7095],simde_mm256_xor_si256(c2[2366],simde_mm256_xor_si256(c2[9489],c2[5133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 25
-     d2[225]=_mm256_xor_si256(c2[5172],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[126],c2[9365])));
+     d2[225]=simde_mm256_xor_si256(c2[5172],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[126],c2[9365])));
 
 //row: 26
-     d2[234]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[72],c2[8594])));
+     d2[234]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[72],c2[8594])));
 
 //row: 27
-     d2[243]=_mm256_xor_si256(c2[5167],_mm256_xor_si256(c2[1693],c2[3314]));
+     d2[243]=simde_mm256_xor_si256(c2[5167],simde_mm256_xor_si256(c2[1693],c2[3314]));
 
 //row: 28
-     d2[252]=_mm256_xor_si256(c2[7532],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[11038],c2[12259])));
+     d2[252]=simde_mm256_xor_si256(c2[7532],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[11038],c2[12259])));
 
 //row: 29
-     d2[261]=_mm256_xor_si256(c2[11887],_mm256_xor_si256(c2[11886],_mm256_xor_si256(c2[5148],_mm256_xor_si256(c2[1987],_mm256_xor_si256(c2[2383],_mm256_xor_si256(c2[6757],_mm256_xor_si256(c2[7152],_mm256_xor_si256(c2[10715],_mm256_xor_si256(c2[11111],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[2815],_mm256_xor_si256(c2[5190],_mm256_xor_si256(c2[2816],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[4022],_mm256_xor_si256(c2[6396],_mm256_xor_si256(c2[6792],_mm256_xor_si256(c2[4428],_mm256_xor_si256(c2[6413],_mm256_xor_si256(c2[8788],_mm256_xor_si256(c2[9184],_mm256_xor_si256(c2[2474],_mm256_xor_si256(c2[4452],_mm256_xor_si256(c2[11978],_mm256_xor_si256(c2[6049],_mm256_xor_si256(c2[8431],_mm256_xor_si256(c2[11199],_mm256_xor_si256(c2[11595],_mm256_xor_si256(c2[9237],_mm256_xor_si256(c2[2114],_mm256_xor_si256(c2[5675],_mm256_xor_si256(c2[6071],_mm256_xor_si256(c2[8467],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[6091],_mm256_xor_si256(c2[6487],_mm256_xor_si256(c2[3332],_mm256_xor_si256(c2[167],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[10877],_mm256_xor_si256(c2[5725],_mm256_xor_si256(c2[7706],_mm256_xor_si256(c2[8102],_mm256_xor_si256(c2[12478],_mm256_xor_si256(c2[9706],_mm256_xor_si256(c2[4956],_mm256_xor_si256(c2[5352],_mm256_xor_si256(c2[1412],_mm256_xor_si256(c2[10513],_mm256_xor_si256(c2[1407],_mm256_xor_si256(c2[1803],_mm256_xor_si256(c2[12118],_mm256_xor_si256(c2[8553],_mm256_xor_si256(c2[5780],_mm256_xor_si256(c2[6176],_mm256_xor_si256(c2[7779],_mm256_xor_si256(c2[6596],_mm256_xor_si256(c2[11740],_mm256_xor_si256(c2[12136],_mm256_xor_si256(c2[8177],_mm256_xor_si256(c2[8198],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[4627],_mm256_xor_si256(c2[10985],_mm256_xor_si256(c2[3858],_mm256_xor_si256(c2[11384],_mm256_xor_si256(c2[11780],_mm256_xor_si256(c2[12191],_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[6252],_mm256_xor_si256(c2[6648],_mm256_xor_si256(c2[4288],_mm256_xor_si256(c2[9828],_mm256_xor_si256(c2[4285],_mm256_xor_si256(c2[4681],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[1538],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[7873],_mm256_xor_si256(c2[8283],_mm256_xor_si256(c2[8684],_mm256_xor_si256(c2[11056],_mm256_xor_si256(c2[11452],_mm256_xor_si256(c2[6714],_mm256_xor_si256(c2[1175],_mm256_xor_si256(c2[9094],c2[9490]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[261]=simde_mm256_xor_si256(c2[11887],simde_mm256_xor_si256(c2[11886],simde_mm256_xor_si256(c2[5148],simde_mm256_xor_si256(c2[1987],simde_mm256_xor_si256(c2[2383],simde_mm256_xor_si256(c2[6757],simde_mm256_xor_si256(c2[7152],simde_mm256_xor_si256(c2[10715],simde_mm256_xor_si256(c2[11111],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[2815],simde_mm256_xor_si256(c2[5190],simde_mm256_xor_si256(c2[2816],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[4022],simde_mm256_xor_si256(c2[6396],simde_mm256_xor_si256(c2[6792],simde_mm256_xor_si256(c2[4428],simde_mm256_xor_si256(c2[6413],simde_mm256_xor_si256(c2[8788],simde_mm256_xor_si256(c2[9184],simde_mm256_xor_si256(c2[2474],simde_mm256_xor_si256(c2[4452],simde_mm256_xor_si256(c2[11978],simde_mm256_xor_si256(c2[6049],simde_mm256_xor_si256(c2[8431],simde_mm256_xor_si256(c2[11199],simde_mm256_xor_si256(c2[11595],simde_mm256_xor_si256(c2[9237],simde_mm256_xor_si256(c2[2114],simde_mm256_xor_si256(c2[5675],simde_mm256_xor_si256(c2[6071],simde_mm256_xor_si256(c2[8467],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[6091],simde_mm256_xor_si256(c2[6487],simde_mm256_xor_si256(c2[3332],simde_mm256_xor_si256(c2[167],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[10877],simde_mm256_xor_si256(c2[5725],simde_mm256_xor_si256(c2[7706],simde_mm256_xor_si256(c2[8102],simde_mm256_xor_si256(c2[12478],simde_mm256_xor_si256(c2[9706],simde_mm256_xor_si256(c2[4956],simde_mm256_xor_si256(c2[5352],simde_mm256_xor_si256(c2[1412],simde_mm256_xor_si256(c2[10513],simde_mm256_xor_si256(c2[1407],simde_mm256_xor_si256(c2[1803],simde_mm256_xor_si256(c2[12118],simde_mm256_xor_si256(c2[8553],simde_mm256_xor_si256(c2[5780],simde_mm256_xor_si256(c2[6176],simde_mm256_xor_si256(c2[7779],simde_mm256_xor_si256(c2[6596],simde_mm256_xor_si256(c2[11740],simde_mm256_xor_si256(c2[12136],simde_mm256_xor_si256(c2[8177],simde_mm256_xor_si256(c2[8198],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[4627],simde_mm256_xor_si256(c2[10985],simde_mm256_xor_si256(c2[3858],simde_mm256_xor_si256(c2[11384],simde_mm256_xor_si256(c2[11780],simde_mm256_xor_si256(c2[12191],simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[6252],simde_mm256_xor_si256(c2[6648],simde_mm256_xor_si256(c2[4288],simde_mm256_xor_si256(c2[9828],simde_mm256_xor_si256(c2[4285],simde_mm256_xor_si256(c2[4681],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[1538],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[7873],simde_mm256_xor_si256(c2[8283],simde_mm256_xor_si256(c2[8684],simde_mm256_xor_si256(c2[11056],simde_mm256_xor_si256(c2[11452],simde_mm256_xor_si256(c2[6714],simde_mm256_xor_si256(c2[1175],simde_mm256_xor_si256(c2[9094],c2[9490]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 30
-     d2[270]=_mm256_xor_si256(c2[399],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[6340],_mm256_xor_si256(c2[3170],_mm256_xor_si256(c2[3566],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[7940],_mm256_xor_si256(c2[7939],_mm256_xor_si256(c2[8335],_mm256_xor_si256(c2[11898],_mm256_xor_si256(c2[12294],_mm256_xor_si256(c2[3998],_mm256_xor_si256(c2[6373],_mm256_xor_si256(c2[3603],_mm256_xor_si256(c2[3999],_mm256_xor_si256(c2[3224],_mm256_xor_si256(c2[5205],_mm256_xor_si256(c2[7579],_mm256_xor_si256(c2[7975],_mm256_xor_si256(c2[5620],_mm256_xor_si256(c2[7200],_mm256_xor_si256(c2[7596],_mm256_xor_si256(c2[9980],_mm256_xor_si256(c2[10376],_mm256_xor_si256(c2[3657],_mm256_xor_si256(c2[5635],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[490],_mm256_xor_si256(c2[7241],_mm256_xor_si256(c2[9218],_mm256_xor_si256(c2[9614],_mm256_xor_si256(c2[12391],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[10429],_mm256_xor_si256(c2[2901],_mm256_xor_si256(c2[3297],_mm256_xor_si256(c2[6858],_mm256_xor_si256(c2[7254],_mm256_xor_si256(c2[9650],_mm256_xor_si256(c2[4903],_mm256_xor_si256(c2[5299],_mm256_xor_si256(c2[7274],_mm256_xor_si256(c2[7670],_mm256_xor_si256(c2[4524],_mm256_xor_si256(c2[1350],_mm256_xor_si256(c2[5315],_mm256_xor_si256(c2[5711],_mm256_xor_si256(c2[12060],_mm256_xor_si256(c2[6521],_mm256_xor_si256(c2[6917],_mm256_xor_si256(c2[8898],_mm256_xor_si256(c2[9294],_mm256_xor_si256(c2[10478],_mm256_xor_si256(c2[990],_mm256_xor_si256(c2[10898],_mm256_xor_si256(c2[6139],_mm256_xor_si256(c2[6535],_mm256_xor_si256(c2[2595],_mm256_xor_si256(c2[11705],_mm256_xor_si256(c2[2599],_mm256_xor_si256(c2[2995],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[9349],_mm256_xor_si256(c2[9745],_mm256_xor_si256(c2[6972],_mm256_xor_si256(c2[7368],_mm256_xor_si256(c2[11329],_mm256_xor_si256(c2[8971],_mm256_xor_si256(c2[7383],_mm256_xor_si256(c2[7779],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[9381],_mm256_xor_si256(c2[2647],_mm256_xor_si256(c2[5423],_mm256_xor_si256(c2[5819],_mm256_xor_si256(c2[12168],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[12567],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[703],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[2685],_mm256_xor_si256(c2[7435],_mm256_xor_si256(c2[7831],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[10624],_mm256_xor_si256(c2[11020],_mm256_xor_si256(c2[5477],_mm256_xor_si256(c2[5873],_mm256_xor_si256(c2[2721],_mm256_xor_si256(c2[4303],_mm256_xor_si256(c2[8660],_mm256_xor_si256(c2[9056],_mm256_xor_si256(c2[9475],_mm256_xor_si256(c2[9471],_mm256_xor_si256(c2[9867],_mm256_xor_si256(c2[12248],_mm256_xor_si256(c2[12644],_mm256_xor_si256(c2[7906],_mm256_xor_si256(c2[2358],_mm256_xor_si256(c2[10286],c2[10682])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[270]=simde_mm256_xor_si256(c2[399],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[6340],simde_mm256_xor_si256(c2[3170],simde_mm256_xor_si256(c2[3566],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[7940],simde_mm256_xor_si256(c2[7939],simde_mm256_xor_si256(c2[8335],simde_mm256_xor_si256(c2[11898],simde_mm256_xor_si256(c2[12294],simde_mm256_xor_si256(c2[3998],simde_mm256_xor_si256(c2[6373],simde_mm256_xor_si256(c2[3603],simde_mm256_xor_si256(c2[3999],simde_mm256_xor_si256(c2[3224],simde_mm256_xor_si256(c2[5205],simde_mm256_xor_si256(c2[7579],simde_mm256_xor_si256(c2[7975],simde_mm256_xor_si256(c2[5620],simde_mm256_xor_si256(c2[7200],simde_mm256_xor_si256(c2[7596],simde_mm256_xor_si256(c2[9980],simde_mm256_xor_si256(c2[10376],simde_mm256_xor_si256(c2[3657],simde_mm256_xor_si256(c2[5635],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[490],simde_mm256_xor_si256(c2[7241],simde_mm256_xor_si256(c2[9218],simde_mm256_xor_si256(c2[9614],simde_mm256_xor_si256(c2[12391],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[10429],simde_mm256_xor_si256(c2[2901],simde_mm256_xor_si256(c2[3297],simde_mm256_xor_si256(c2[6858],simde_mm256_xor_si256(c2[7254],simde_mm256_xor_si256(c2[9650],simde_mm256_xor_si256(c2[4903],simde_mm256_xor_si256(c2[5299],simde_mm256_xor_si256(c2[7274],simde_mm256_xor_si256(c2[7670],simde_mm256_xor_si256(c2[4524],simde_mm256_xor_si256(c2[1350],simde_mm256_xor_si256(c2[5315],simde_mm256_xor_si256(c2[5711],simde_mm256_xor_si256(c2[12060],simde_mm256_xor_si256(c2[6521],simde_mm256_xor_si256(c2[6917],simde_mm256_xor_si256(c2[8898],simde_mm256_xor_si256(c2[9294],simde_mm256_xor_si256(c2[10478],simde_mm256_xor_si256(c2[990],simde_mm256_xor_si256(c2[10898],simde_mm256_xor_si256(c2[6139],simde_mm256_xor_si256(c2[6535],simde_mm256_xor_si256(c2[2595],simde_mm256_xor_si256(c2[11705],simde_mm256_xor_si256(c2[2599],simde_mm256_xor_si256(c2[2995],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[9349],simde_mm256_xor_si256(c2[9745],simde_mm256_xor_si256(c2[6972],simde_mm256_xor_si256(c2[7368],simde_mm256_xor_si256(c2[11329],simde_mm256_xor_si256(c2[8971],simde_mm256_xor_si256(c2[7383],simde_mm256_xor_si256(c2[7779],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[9381],simde_mm256_xor_si256(c2[2647],simde_mm256_xor_si256(c2[5423],simde_mm256_xor_si256(c2[5819],simde_mm256_xor_si256(c2[12168],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[12567],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[703],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[2685],simde_mm256_xor_si256(c2[7435],simde_mm256_xor_si256(c2[7831],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[10624],simde_mm256_xor_si256(c2[11020],simde_mm256_xor_si256(c2[5477],simde_mm256_xor_si256(c2[5873],simde_mm256_xor_si256(c2[2721],simde_mm256_xor_si256(c2[4303],simde_mm256_xor_si256(c2[8660],simde_mm256_xor_si256(c2[9056],simde_mm256_xor_si256(c2[9475],simde_mm256_xor_si256(c2[9471],simde_mm256_xor_si256(c2[9867],simde_mm256_xor_si256(c2[12248],simde_mm256_xor_si256(c2[12644],simde_mm256_xor_si256(c2[7906],simde_mm256_xor_si256(c2[2358],simde_mm256_xor_si256(c2[10286],c2[10682])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 31
-     d2[279]=_mm256_xor_si256(c2[9902],_mm256_xor_si256(c2[2779],_mm256_xor_si256(c2[9901],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[3172],_mm256_xor_si256(c2[8720],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[5550],_mm256_xor_si256(c2[5946],_mm256_xor_si256(c2[4772],_mm256_xor_si256(c2[10320],_mm256_xor_si256(c2[5167],_mm256_xor_si256(c2[10715],_mm256_xor_si256(c2[9126],_mm256_xor_si256(c2[1607],_mm256_xor_si256(c2[2003],_mm256_xor_si256(c2[6358],_mm256_xor_si256(c2[830],_mm256_xor_si256(c2[6378],_mm256_xor_si256(c2[3205],_mm256_xor_si256(c2[8753],_mm256_xor_si256(c2[831],_mm256_xor_si256(c2[6379],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[5604],_mm256_xor_si256(c2[2037],_mm256_xor_si256(c2[7585],_mm256_xor_si256(c2[4807],_mm256_xor_si256(c2[9959],_mm256_xor_si256(c2[10355],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[8000],_mm256_xor_si256(c2[4428],_mm256_xor_si256(c2[9976],_mm256_xor_si256(c2[7208],_mm256_xor_si256(c2[12351],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[489],_mm256_xor_si256(c2[6037],_mm256_xor_si256(c2[2467],_mm256_xor_si256(c2[8015],_mm256_xor_si256(c2[9993],_mm256_xor_si256(c2[2870],_mm256_xor_si256(c2[4073],_mm256_xor_si256(c2[9612],_mm256_xor_si256(c2[6446],_mm256_xor_si256(c2[11994],_mm256_xor_si256(c2[9619],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[2487],_mm256_xor_si256(c2[7261],_mm256_xor_si256(c2[129],_mm256_xor_si256(c2[129],_mm256_xor_si256(c2[5677],_mm256_xor_si256(c2[4086],_mm256_xor_si256(c2[9238],_mm256_xor_si256(c2[9634],_mm256_xor_si256(c2[6466],_mm256_xor_si256(c2[6482],_mm256_xor_si256(c2[12030],_mm256_xor_si256(c2[2131],_mm256_xor_si256(c2[7670],_mm256_xor_si256(c2[4502],_mm256_xor_si256(c2[9654],_mm256_xor_si256(c2[10050],_mm256_xor_si256(c2[1356],_mm256_xor_si256(c2[6895],_mm256_xor_si256(c2[10862],_mm256_xor_si256(c2[3730],_mm256_xor_si256(c2[2543],_mm256_xor_si256(c2[8082],_mm256_xor_si256(c2[8892],_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[3749],_mm256_xor_si256(c2[9288],_mm256_xor_si256(c2[6126],_mm256_xor_si256(c2[11269],_mm256_xor_si256(c2[11665],_mm256_xor_si256(c2[10502],_mm256_xor_si256(c2[3370],_mm256_xor_si256(c2[7730],_mm256_xor_si256(c2[598],_mm256_xor_si256(c2[3367],_mm256_xor_si256(c2[8519],_mm256_xor_si256(c2[8915],_mm256_xor_si256(c2[12098],_mm256_xor_si256(c2[4975],_mm256_xor_si256(c2[8537],_mm256_xor_si256(c2[1405],_mm256_xor_si256(c2[12498],_mm256_xor_si256(c2[4970],_mm256_xor_si256(c2[5366],_mm256_xor_si256(c2[10142],_mm256_xor_si256(c2[3010],_mm256_xor_si256(c2[6577],_mm256_xor_si256(c2[12116],_mm256_xor_si256(c2[4200],_mm256_xor_si256(c2[9343],_mm256_xor_si256(c2[9739],_mm256_xor_si256(c2[5803],_mm256_xor_si256(c2[11342],_mm256_xor_si256(c2[4611],_mm256_xor_si256(c2[10159],_mm256_xor_si256(c2[10160],_mm256_xor_si256(c2[2632],_mm256_xor_si256(c2[3028],_mm256_xor_si256(c2[6213],_mm256_xor_si256(c2[11761],_mm256_xor_si256(c2[12150],_mm256_xor_si256(c2[5027],_mm256_xor_si256(c2[2651],_mm256_xor_si256(c2[8190],_mm256_xor_si256(c2[9000],_mm256_xor_si256(c2[1877],_mm256_xor_si256(c2[1873],_mm256_xor_si256(c2[7421],_mm256_xor_si256(c2[9795],_mm256_xor_si256(c2[2276],_mm256_xor_si256(c2[2672],_mm256_xor_si256(c2[10206],_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[12188],_mm256_xor_si256(c2[5065],_mm256_xor_si256(c2[4663],_mm256_xor_si256(c2[9815],_mm256_xor_si256(c2[10211],_mm256_xor_si256(c2[2312],_mm256_xor_si256(c2[7851],_mm256_xor_si256(c2[7852],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[2705],_mm256_xor_si256(c2[7848],_mm256_xor_si256(c2[8244],_mm256_xor_si256(c2[12224],_mm256_xor_si256(c2[5101],_mm256_xor_si256(c2[1135],_mm256_xor_si256(c2[6683],_mm256_xor_si256(c2[5888],_mm256_xor_si256(c2[11436],_mm256_xor_si256(c2[6307],_mm256_xor_si256(c2[11846],_mm256_xor_si256(c2[6699],_mm256_xor_si256(c2[12247],_mm256_xor_si256(c2[9476],_mm256_xor_si256(c2[1948],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[4738],_mm256_xor_si256(c2[10286],_mm256_xor_si256(c2[11870],_mm256_xor_si256(c2[4738],_mm256_xor_si256(c2[7514],_mm256_xor_si256(c2[12657],c2[382]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[279]=simde_mm256_xor_si256(c2[9902],simde_mm256_xor_si256(c2[2779],simde_mm256_xor_si256(c2[9901],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[3172],simde_mm256_xor_si256(c2[8720],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[5550],simde_mm256_xor_si256(c2[5946],simde_mm256_xor_si256(c2[4772],simde_mm256_xor_si256(c2[10320],simde_mm256_xor_si256(c2[5167],simde_mm256_xor_si256(c2[10715],simde_mm256_xor_si256(c2[9126],simde_mm256_xor_si256(c2[1607],simde_mm256_xor_si256(c2[2003],simde_mm256_xor_si256(c2[6358],simde_mm256_xor_si256(c2[830],simde_mm256_xor_si256(c2[6378],simde_mm256_xor_si256(c2[3205],simde_mm256_xor_si256(c2[8753],simde_mm256_xor_si256(c2[831],simde_mm256_xor_si256(c2[6379],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[5604],simde_mm256_xor_si256(c2[2037],simde_mm256_xor_si256(c2[7585],simde_mm256_xor_si256(c2[4807],simde_mm256_xor_si256(c2[9959],simde_mm256_xor_si256(c2[10355],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[8000],simde_mm256_xor_si256(c2[4428],simde_mm256_xor_si256(c2[9976],simde_mm256_xor_si256(c2[7208],simde_mm256_xor_si256(c2[12351],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[489],simde_mm256_xor_si256(c2[6037],simde_mm256_xor_si256(c2[2467],simde_mm256_xor_si256(c2[8015],simde_mm256_xor_si256(c2[9993],simde_mm256_xor_si256(c2[2870],simde_mm256_xor_si256(c2[4073],simde_mm256_xor_si256(c2[9612],simde_mm256_xor_si256(c2[6446],simde_mm256_xor_si256(c2[11994],simde_mm256_xor_si256(c2[9619],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[2487],simde_mm256_xor_si256(c2[7261],simde_mm256_xor_si256(c2[129],simde_mm256_xor_si256(c2[129],simde_mm256_xor_si256(c2[5677],simde_mm256_xor_si256(c2[4086],simde_mm256_xor_si256(c2[9238],simde_mm256_xor_si256(c2[9634],simde_mm256_xor_si256(c2[6466],simde_mm256_xor_si256(c2[6482],simde_mm256_xor_si256(c2[12030],simde_mm256_xor_si256(c2[2131],simde_mm256_xor_si256(c2[7670],simde_mm256_xor_si256(c2[4502],simde_mm256_xor_si256(c2[9654],simde_mm256_xor_si256(c2[10050],simde_mm256_xor_si256(c2[1356],simde_mm256_xor_si256(c2[6895],simde_mm256_xor_si256(c2[10862],simde_mm256_xor_si256(c2[3730],simde_mm256_xor_si256(c2[2543],simde_mm256_xor_si256(c2[8082],simde_mm256_xor_si256(c2[8892],simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[3749],simde_mm256_xor_si256(c2[9288],simde_mm256_xor_si256(c2[6126],simde_mm256_xor_si256(c2[11269],simde_mm256_xor_si256(c2[11665],simde_mm256_xor_si256(c2[10502],simde_mm256_xor_si256(c2[3370],simde_mm256_xor_si256(c2[7730],simde_mm256_xor_si256(c2[598],simde_mm256_xor_si256(c2[3367],simde_mm256_xor_si256(c2[8519],simde_mm256_xor_si256(c2[8915],simde_mm256_xor_si256(c2[12098],simde_mm256_xor_si256(c2[4975],simde_mm256_xor_si256(c2[8537],simde_mm256_xor_si256(c2[1405],simde_mm256_xor_si256(c2[12498],simde_mm256_xor_si256(c2[4970],simde_mm256_xor_si256(c2[5366],simde_mm256_xor_si256(c2[10142],simde_mm256_xor_si256(c2[3010],simde_mm256_xor_si256(c2[6577],simde_mm256_xor_si256(c2[12116],simde_mm256_xor_si256(c2[4200],simde_mm256_xor_si256(c2[9343],simde_mm256_xor_si256(c2[9739],simde_mm256_xor_si256(c2[5803],simde_mm256_xor_si256(c2[11342],simde_mm256_xor_si256(c2[4611],simde_mm256_xor_si256(c2[10159],simde_mm256_xor_si256(c2[10160],simde_mm256_xor_si256(c2[2632],simde_mm256_xor_si256(c2[3028],simde_mm256_xor_si256(c2[6213],simde_mm256_xor_si256(c2[11761],simde_mm256_xor_si256(c2[12150],simde_mm256_xor_si256(c2[5027],simde_mm256_xor_si256(c2[2651],simde_mm256_xor_si256(c2[8190],simde_mm256_xor_si256(c2[9000],simde_mm256_xor_si256(c2[1877],simde_mm256_xor_si256(c2[1873],simde_mm256_xor_si256(c2[7421],simde_mm256_xor_si256(c2[9795],simde_mm256_xor_si256(c2[2276],simde_mm256_xor_si256(c2[2672],simde_mm256_xor_si256(c2[10206],simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[12188],simde_mm256_xor_si256(c2[5065],simde_mm256_xor_si256(c2[4663],simde_mm256_xor_si256(c2[9815],simde_mm256_xor_si256(c2[10211],simde_mm256_xor_si256(c2[2312],simde_mm256_xor_si256(c2[7851],simde_mm256_xor_si256(c2[7852],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[2705],simde_mm256_xor_si256(c2[7848],simde_mm256_xor_si256(c2[8244],simde_mm256_xor_si256(c2[12224],simde_mm256_xor_si256(c2[5101],simde_mm256_xor_si256(c2[1135],simde_mm256_xor_si256(c2[6683],simde_mm256_xor_si256(c2[5888],simde_mm256_xor_si256(c2[11436],simde_mm256_xor_si256(c2[6307],simde_mm256_xor_si256(c2[11846],simde_mm256_xor_si256(c2[6699],simde_mm256_xor_si256(c2[12247],simde_mm256_xor_si256(c2[9476],simde_mm256_xor_si256(c2[1948],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[4738],simde_mm256_xor_si256(c2[10286],simde_mm256_xor_si256(c2[11870],simde_mm256_xor_si256(c2[4738],simde_mm256_xor_si256(c2[7514],simde_mm256_xor_si256(c2[12657],c2[382]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[288]=_mm256_xor_si256(c2[8714],_mm256_xor_si256(c2[8713],_mm256_xor_si256(c2[1588],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[11485],_mm256_xor_si256(c2[11881],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3584],_mm256_xor_si256(c2[3583],_mm256_xor_si256(c2[3979],_mm256_xor_si256(c2[7542],_mm256_xor_si256(c2[7938],_mm256_xor_si256(c2[12313],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[11918],_mm256_xor_si256(c2[12314],_mm256_xor_si256(c2[11539],_mm256_xor_si256(c2[849],_mm256_xor_si256(c2[3223],_mm256_xor_si256(c2[3619],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[5624],_mm256_xor_si256(c2[6020],_mm256_xor_si256(c2[11972],_mm256_xor_si256(c2[1279],_mm256_xor_si256(c2[8409],_mm256_xor_si256(c2[8805],_mm256_xor_si256(c2[2885],_mm256_xor_si256(c2[4862],_mm256_xor_si256(c2[5258],_mm256_xor_si256(c2[8035],_mm256_xor_si256(c2[8431],_mm256_xor_si256(c2[6073],_mm256_xor_si256(c2[11216],_mm256_xor_si256(c2[11612],_mm256_xor_si256(c2[2502],_mm256_xor_si256(c2[2898],_mm256_xor_si256(c2[5294],_mm256_xor_si256(c2[547],_mm256_xor_si256(c2[943],_mm256_xor_si256(c2[2918],_mm256_xor_si256(c2[3314],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[9674],_mm256_xor_si256(c2[959],_mm256_xor_si256(c2[1355],_mm256_xor_si256(c2[7704],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[2561],_mm256_xor_si256(c2[4542],_mm256_xor_si256(c2[4938],_mm256_xor_si256(c2[9314],_mm256_xor_si256(c2[6542],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[2179],_mm256_xor_si256(c2[10910],_mm256_xor_si256(c2[7349],_mm256_xor_si256(c2[10914],_mm256_xor_si256(c2[11310],_mm256_xor_si256(c2[7746],_mm256_xor_si256(c2[8954],_mm256_xor_si256(c2[4993],_mm256_xor_si256(c2[5389],_mm256_xor_si256(c2[2616],_mm256_xor_si256(c2[3012],_mm256_xor_si256(c2[4615],_mm256_xor_si256(c2[3027],_mm256_xor_si256(c2[3423],_mm256_xor_si256(c2[8576],_mm256_xor_si256(c2[8972],_mm256_xor_si256(c2[1837],_mm256_xor_si256(c2[5025],_mm256_xor_si256(c2[10962],_mm256_xor_si256(c2[1067],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[7812],_mm256_xor_si256(c2[685],_mm256_xor_si256(c2[8211],_mm256_xor_si256(c2[8607],_mm256_xor_si256(c2[9018],_mm256_xor_si256(c2[10604],_mm256_xor_si256(c2[11000],_mm256_xor_si256(c2[3079],_mm256_xor_si256(c2[3475],_mm256_xor_si256(c2[1124],_mm256_xor_si256(c2[6268],_mm256_xor_si256(c2[6664],_mm256_xor_si256(c2[1121],_mm256_xor_si256(c2[1517],_mm256_xor_si256(c2[11036],_mm256_xor_si256(c2[12618],_mm256_xor_si256(c2[4304],_mm256_xor_si256(c2[4700],_mm256_xor_si256(c2[5119],_mm256_xor_si256(c2[5115],_mm256_xor_si256(c2[5511],_mm256_xor_si256(c2[7892],_mm256_xor_si256(c2[8288],_mm256_xor_si256(c2[3550],_mm256_xor_si256(c2[10682],_mm256_xor_si256(c2[5930],c2[6326])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[288]=simde_mm256_xor_si256(c2[8714],simde_mm256_xor_si256(c2[8713],simde_mm256_xor_si256(c2[1588],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[11485],simde_mm256_xor_si256(c2[11881],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3584],simde_mm256_xor_si256(c2[3583],simde_mm256_xor_si256(c2[3979],simde_mm256_xor_si256(c2[7542],simde_mm256_xor_si256(c2[7938],simde_mm256_xor_si256(c2[12313],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[11918],simde_mm256_xor_si256(c2[12314],simde_mm256_xor_si256(c2[11539],simde_mm256_xor_si256(c2[849],simde_mm256_xor_si256(c2[3223],simde_mm256_xor_si256(c2[3619],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[5624],simde_mm256_xor_si256(c2[6020],simde_mm256_xor_si256(c2[11972],simde_mm256_xor_si256(c2[1279],simde_mm256_xor_si256(c2[8409],simde_mm256_xor_si256(c2[8805],simde_mm256_xor_si256(c2[2885],simde_mm256_xor_si256(c2[4862],simde_mm256_xor_si256(c2[5258],simde_mm256_xor_si256(c2[8035],simde_mm256_xor_si256(c2[8431],simde_mm256_xor_si256(c2[6073],simde_mm256_xor_si256(c2[11216],simde_mm256_xor_si256(c2[11612],simde_mm256_xor_si256(c2[2502],simde_mm256_xor_si256(c2[2898],simde_mm256_xor_si256(c2[5294],simde_mm256_xor_si256(c2[547],simde_mm256_xor_si256(c2[943],simde_mm256_xor_si256(c2[2918],simde_mm256_xor_si256(c2[3314],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[9674],simde_mm256_xor_si256(c2[959],simde_mm256_xor_si256(c2[1355],simde_mm256_xor_si256(c2[7704],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[2561],simde_mm256_xor_si256(c2[4542],simde_mm256_xor_si256(c2[4938],simde_mm256_xor_si256(c2[9314],simde_mm256_xor_si256(c2[6542],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[2179],simde_mm256_xor_si256(c2[10910],simde_mm256_xor_si256(c2[7349],simde_mm256_xor_si256(c2[10914],simde_mm256_xor_si256(c2[11310],simde_mm256_xor_si256(c2[7746],simde_mm256_xor_si256(c2[8954],simde_mm256_xor_si256(c2[4993],simde_mm256_xor_si256(c2[5389],simde_mm256_xor_si256(c2[2616],simde_mm256_xor_si256(c2[3012],simde_mm256_xor_si256(c2[4615],simde_mm256_xor_si256(c2[3027],simde_mm256_xor_si256(c2[3423],simde_mm256_xor_si256(c2[8576],simde_mm256_xor_si256(c2[8972],simde_mm256_xor_si256(c2[1837],simde_mm256_xor_si256(c2[5025],simde_mm256_xor_si256(c2[10962],simde_mm256_xor_si256(c2[1067],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[7812],simde_mm256_xor_si256(c2[685],simde_mm256_xor_si256(c2[8211],simde_mm256_xor_si256(c2[8607],simde_mm256_xor_si256(c2[9018],simde_mm256_xor_si256(c2[10604],simde_mm256_xor_si256(c2[11000],simde_mm256_xor_si256(c2[3079],simde_mm256_xor_si256(c2[3475],simde_mm256_xor_si256(c2[1124],simde_mm256_xor_si256(c2[6268],simde_mm256_xor_si256(c2[6664],simde_mm256_xor_si256(c2[1121],simde_mm256_xor_si256(c2[1517],simde_mm256_xor_si256(c2[11036],simde_mm256_xor_si256(c2[12618],simde_mm256_xor_si256(c2[4304],simde_mm256_xor_si256(c2[4700],simde_mm256_xor_si256(c2[5119],simde_mm256_xor_si256(c2[5115],simde_mm256_xor_si256(c2[5511],simde_mm256_xor_si256(c2[7892],simde_mm256_xor_si256(c2[8288],simde_mm256_xor_si256(c2[3550],simde_mm256_xor_si256(c2[10682],simde_mm256_xor_si256(c2[5930],c2[6326])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[297]=_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[4952],c2[7506])));
+     d2[297]=simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[4952],c2[7506])));
 
 //row: 34
-     d2[306]=_mm256_xor_si256(c2[1986],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[5025],c2[306])));
+     d2[306]=simde_mm256_xor_si256(c2[1986],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[5025],c2[306])));
 
 //row: 35
-     d2[315]=_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[3],_mm256_xor_si256(c2[5945],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[7545],_mm256_xor_si256(c2[7940],_mm256_xor_si256(c2[11899],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[3603],_mm256_xor_si256(c2[5978],_mm256_xor_si256(c2[3604],_mm256_xor_si256(c2[2829],_mm256_xor_si256(c2[4810],_mm256_xor_si256(c2[7580],_mm256_xor_si256(c2[5225],_mm256_xor_si256(c2[7201],_mm256_xor_si256(c2[9972],_mm256_xor_si256(c2[3262],_mm256_xor_si256(c2[5240],_mm256_xor_si256(c2[95],_mm256_xor_si256(c2[6846],_mm256_xor_si256(c2[9219],_mm256_xor_si256(c2[12392],_mm256_xor_si256(c2[8828],_mm256_xor_si256(c2[10034],_mm256_xor_si256(c2[2902],_mm256_xor_si256(c2[6859],_mm256_xor_si256(c2[9255],_mm256_xor_si256(c2[4904],_mm256_xor_si256(c2[7275],_mm256_xor_si256(c2[4129],_mm256_xor_si256(c2[955],_mm256_xor_si256(c2[5316],_mm256_xor_si256(c2[11665],_mm256_xor_si256(c2[6522],_mm256_xor_si256(c2[8899],_mm256_xor_si256(c2[595],_mm256_xor_si256(c2[10494],_mm256_xor_si256(c2[6140],_mm256_xor_si256(c2[2200],_mm256_xor_si256(c2[11310],_mm256_xor_si256(c2[2600],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[235],_mm256_xor_si256(c2[9350],_mm256_xor_si256(c2[6973],_mm256_xor_si256(c2[8576],_mm256_xor_si256(c2[7384],_mm256_xor_si256(c2[253],_mm256_xor_si256(c2[8986],_mm256_xor_si256(c2[2252],_mm256_xor_si256(c2[5424],_mm256_xor_si256(c2[11773],_mm256_xor_si256(c2[4646],_mm256_xor_si256(c2[12568],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[2290],_mm256_xor_si256(c2[7436],_mm256_xor_si256(c2[5076],_mm256_xor_si256(c2[10625],_mm256_xor_si256(c2[5478],_mm256_xor_si256(c2[2326],_mm256_xor_si256(c2[3908],_mm256_xor_si256(c2[8661],_mm256_xor_si256(c2[9080],_mm256_xor_si256(c2[9472],_mm256_xor_si256(c2[12240],_mm256_xor_si256(c2[7511],_mm256_xor_si256(c2[1963],c2[10278])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[315]=simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[3],simde_mm256_xor_si256(c2[5945],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[7545],simde_mm256_xor_si256(c2[7940],simde_mm256_xor_si256(c2[11899],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[3603],simde_mm256_xor_si256(c2[5978],simde_mm256_xor_si256(c2[3604],simde_mm256_xor_si256(c2[2829],simde_mm256_xor_si256(c2[4810],simde_mm256_xor_si256(c2[7580],simde_mm256_xor_si256(c2[5225],simde_mm256_xor_si256(c2[7201],simde_mm256_xor_si256(c2[9972],simde_mm256_xor_si256(c2[3262],simde_mm256_xor_si256(c2[5240],simde_mm256_xor_si256(c2[95],simde_mm256_xor_si256(c2[6846],simde_mm256_xor_si256(c2[9219],simde_mm256_xor_si256(c2[12392],simde_mm256_xor_si256(c2[8828],simde_mm256_xor_si256(c2[10034],simde_mm256_xor_si256(c2[2902],simde_mm256_xor_si256(c2[6859],simde_mm256_xor_si256(c2[9255],simde_mm256_xor_si256(c2[4904],simde_mm256_xor_si256(c2[7275],simde_mm256_xor_si256(c2[4129],simde_mm256_xor_si256(c2[955],simde_mm256_xor_si256(c2[5316],simde_mm256_xor_si256(c2[11665],simde_mm256_xor_si256(c2[6522],simde_mm256_xor_si256(c2[8899],simde_mm256_xor_si256(c2[595],simde_mm256_xor_si256(c2[10494],simde_mm256_xor_si256(c2[6140],simde_mm256_xor_si256(c2[2200],simde_mm256_xor_si256(c2[11310],simde_mm256_xor_si256(c2[2600],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[235],simde_mm256_xor_si256(c2[9350],simde_mm256_xor_si256(c2[6973],simde_mm256_xor_si256(c2[8576],simde_mm256_xor_si256(c2[7384],simde_mm256_xor_si256(c2[253],simde_mm256_xor_si256(c2[8986],simde_mm256_xor_si256(c2[2252],simde_mm256_xor_si256(c2[5424],simde_mm256_xor_si256(c2[11773],simde_mm256_xor_si256(c2[4646],simde_mm256_xor_si256(c2[12568],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[2290],simde_mm256_xor_si256(c2[7436],simde_mm256_xor_si256(c2[5076],simde_mm256_xor_si256(c2[10625],simde_mm256_xor_si256(c2[5478],simde_mm256_xor_si256(c2[2326],simde_mm256_xor_si256(c2[3908],simde_mm256_xor_si256(c2[8661],simde_mm256_xor_si256(c2[9080],simde_mm256_xor_si256(c2[9472],simde_mm256_xor_si256(c2[12240],simde_mm256_xor_si256(c2[7511],simde_mm256_xor_si256(c2[1963],c2[10278])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[324]=_mm256_xor_si256(c2[9510],_mm256_xor_si256(c2[6588],_mm256_xor_si256(c2[270],c2[324])));
+     d2[324]=simde_mm256_xor_si256(c2[9510],simde_mm256_xor_si256(c2[6588],simde_mm256_xor_si256(c2[270],c2[324])));
 
 //row: 37
-     d2[333]=_mm256_xor_si256(c2[7530],_mm256_xor_si256(c2[7926],_mm256_xor_si256(c2[7925],_mm256_xor_si256(c2[1196],_mm256_xor_si256(c2[11093],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[7150],_mm256_xor_si256(c2[3188],_mm256_xor_si256(c2[11129],_mm256_xor_si256(c2[11525],_mm256_xor_si256(c2[1229],_mm256_xor_si256(c2[11526],_mm256_xor_si256(c2[10355],_mm256_xor_si256(c2[10751],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[476],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[10788],_mm256_xor_si256(c2[11184],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[8017],_mm256_xor_si256(c2[1692],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[4470],_mm256_xor_si256(c2[7634],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[10824],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[4506],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[2526],_mm256_xor_si256(c2[11646],_mm256_xor_si256(c2[12042],_mm256_xor_si256(c2[8877],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[6520],_mm256_xor_si256(c2[6916],_mm256_xor_si256(c2[1764],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[8121],_mm256_xor_si256(c2[8517],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[1391],_mm256_xor_si256(c2[9726],_mm256_xor_si256(c2[10122],_mm256_xor_si256(c2[6552],_mm256_xor_si256(c2[10513],_mm256_xor_si256(c2[7761],_mm256_xor_si256(c2[8157],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[6574],_mm256_xor_si256(c2[3818],_mm256_xor_si256(c2[2635],_mm256_xor_si256(c2[8175],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[4237],_mm256_xor_si256(c2[10174],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[6628],_mm256_xor_si256(c2[7024],_mm256_xor_si256(c2[12568],_mm256_xor_si256(c2[7819],_mm256_xor_si256(c2[8230],_mm256_xor_si256(c2[10212],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[12602],_mm256_xor_si256(c2[327],_mm256_xor_si256(c2[5876],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[9852],_mm256_xor_si256(c2[10248],_mm256_xor_si256(c2[11830],_mm256_xor_si256(c2[3912],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[4322],_mm256_xor_si256(c2[4723],_mm256_xor_si256(c2[7491],_mm256_xor_si256(c2[2366],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[9885],c2[5529])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[333]=simde_mm256_xor_si256(c2[7530],simde_mm256_xor_si256(c2[7926],simde_mm256_xor_si256(c2[7925],simde_mm256_xor_si256(c2[1196],simde_mm256_xor_si256(c2[11093],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[7150],simde_mm256_xor_si256(c2[3188],simde_mm256_xor_si256(c2[11129],simde_mm256_xor_si256(c2[11525],simde_mm256_xor_si256(c2[1229],simde_mm256_xor_si256(c2[11526],simde_mm256_xor_si256(c2[10355],simde_mm256_xor_si256(c2[10751],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[476],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[10788],simde_mm256_xor_si256(c2[11184],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[8017],simde_mm256_xor_si256(c2[1692],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[4470],simde_mm256_xor_si256(c2[7634],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[10824],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[4506],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[2526],simde_mm256_xor_si256(c2[11646],simde_mm256_xor_si256(c2[12042],simde_mm256_xor_si256(c2[8877],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[6520],simde_mm256_xor_si256(c2[6916],simde_mm256_xor_si256(c2[1764],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[8121],simde_mm256_xor_si256(c2[8517],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[1391],simde_mm256_xor_si256(c2[9726],simde_mm256_xor_si256(c2[10122],simde_mm256_xor_si256(c2[6552],simde_mm256_xor_si256(c2[10513],simde_mm256_xor_si256(c2[7761],simde_mm256_xor_si256(c2[8157],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[6574],simde_mm256_xor_si256(c2[3818],simde_mm256_xor_si256(c2[2635],simde_mm256_xor_si256(c2[8175],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[4237],simde_mm256_xor_si256(c2[10174],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[6628],simde_mm256_xor_si256(c2[7024],simde_mm256_xor_si256(c2[12568],simde_mm256_xor_si256(c2[7819],simde_mm256_xor_si256(c2[8230],simde_mm256_xor_si256(c2[10212],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[12602],simde_mm256_xor_si256(c2[327],simde_mm256_xor_si256(c2[5876],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[9852],simde_mm256_xor_si256(c2[10248],simde_mm256_xor_si256(c2[11830],simde_mm256_xor_si256(c2[3912],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[4322],simde_mm256_xor_si256(c2[4723],simde_mm256_xor_si256(c2[7491],simde_mm256_xor_si256(c2[2366],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[9885],c2[5529])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[342]=_mm256_xor_si256(c2[11885],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[180],c2[216])));
+     d2[342]=simde_mm256_xor_si256(c2[11885],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[180],c2[216])));
 
 //row: 39
-     d2[351]=_mm256_xor_si256(c2[9922],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[2111],c2[8661])));
+     d2[351]=simde_mm256_xor_si256(c2[9922],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[2111],c2[8661])));
 
 //row: 40
-     d2[360]=_mm256_xor_si256(c2[9510],_mm256_xor_si256(c2[6484],c2[1098]));
+     d2[360]=simde_mm256_xor_si256(c2[9510],simde_mm256_xor_si256(c2[6484],c2[1098]));
 
 //row: 41
-     d2[369]=_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[162],c2[9437])));
+     d2[369]=simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[162],c2[9437])));
 
 //row: 42
-     d2[378]=_mm256_xor_si256(c2[9115],_mm256_xor_si256(c2[9114],_mm256_xor_si256(c2[1980],_mm256_xor_si256(c2[2376],_mm256_xor_si256(c2[11886],_mm256_xor_si256(c2[12282],_mm256_xor_si256(c2[10692],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[3984],_mm256_xor_si256(c2[4380],_mm256_xor_si256(c2[7943],_mm256_xor_si256(c2[8339],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[2418],_mm256_xor_si256(c2[12319],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[11940],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[3624],_mm256_xor_si256(c2[4020],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[3245],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[6016],_mm256_xor_si256(c2[6412],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[12373],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[8810],_mm256_xor_si256(c2[9206],_mm256_xor_si256(c2[3277],_mm256_xor_si256(c2[5263],_mm256_xor_si256(c2[5659],_mm256_xor_si256(c2[8427],_mm256_xor_si256(c2[8823],_mm256_xor_si256(c2[6465],_mm256_xor_si256(c2[11617],_mm256_xor_si256(c2[12013],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[3299],_mm256_xor_si256(c2[5695],_mm256_xor_si256(c2[939],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[3319],_mm256_xor_si256(c2[3715],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[10066],_mm256_xor_si256(c2[1351],_mm256_xor_si256(c2[1747],_mm256_xor_si256(c2[8105],_mm256_xor_si256(c2[2557],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[5330],_mm256_xor_si256(c2[9706],_mm256_xor_si256(c2[6934],_mm256_xor_si256(c2[2184],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[11311],_mm256_xor_si256(c2[7741],_mm256_xor_si256(c2[11306],_mm256_xor_si256(c2[11702],_mm256_xor_si256(c2[9346],_mm256_xor_si256(c2[5385],_mm256_xor_si256(c2[5781],_mm256_xor_si256(c2[3008],_mm256_xor_si256(c2[3404],_mm256_xor_si256(c2[5007],_mm256_xor_si256(c2[3428],_mm256_xor_si256(c2[3824],_mm256_xor_si256(c2[8968],_mm256_xor_si256(c2[9364],_mm256_xor_si256(c2[5426],_mm256_xor_si256(c2[11363],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[1855],_mm256_xor_si256(c2[8213],_mm256_xor_si256(c2[1086],_mm256_xor_si256(c2[8612],_mm256_xor_si256(c2[9008],_mm256_xor_si256(c2[9419],_mm256_xor_si256(c2[11005],_mm256_xor_si256(c2[11401],_mm256_xor_si256(c2[3480],_mm256_xor_si256(c2[3876],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[6660],_mm256_xor_si256(c2[7056],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[1909],_mm256_xor_si256(c2[11437],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[5101],_mm256_xor_si256(c2[5511],_mm256_xor_si256(c2[5516],_mm256_xor_si256(c2[5912],_mm256_xor_si256(c2[8284],_mm256_xor_si256(c2[8680],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[11074],_mm256_xor_si256(c2[6322],c2[6718]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[378]=simde_mm256_xor_si256(c2[9115],simde_mm256_xor_si256(c2[9114],simde_mm256_xor_si256(c2[1980],simde_mm256_xor_si256(c2[2376],simde_mm256_xor_si256(c2[11886],simde_mm256_xor_si256(c2[12282],simde_mm256_xor_si256(c2[10692],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[3984],simde_mm256_xor_si256(c2[4380],simde_mm256_xor_si256(c2[7943],simde_mm256_xor_si256(c2[8339],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[2418],simde_mm256_xor_si256(c2[12319],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[11940],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[3624],simde_mm256_xor_si256(c2[4020],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[3245],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[6016],simde_mm256_xor_si256(c2[6412],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[12373],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[8810],simde_mm256_xor_si256(c2[9206],simde_mm256_xor_si256(c2[3277],simde_mm256_xor_si256(c2[5263],simde_mm256_xor_si256(c2[5659],simde_mm256_xor_si256(c2[8427],simde_mm256_xor_si256(c2[8823],simde_mm256_xor_si256(c2[6465],simde_mm256_xor_si256(c2[11617],simde_mm256_xor_si256(c2[12013],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[3299],simde_mm256_xor_si256(c2[5695],simde_mm256_xor_si256(c2[939],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[3319],simde_mm256_xor_si256(c2[3715],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[10066],simde_mm256_xor_si256(c2[1351],simde_mm256_xor_si256(c2[1747],simde_mm256_xor_si256(c2[8105],simde_mm256_xor_si256(c2[2557],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[5330],simde_mm256_xor_si256(c2[9706],simde_mm256_xor_si256(c2[6934],simde_mm256_xor_si256(c2[2184],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[11311],simde_mm256_xor_si256(c2[7741],simde_mm256_xor_si256(c2[11306],simde_mm256_xor_si256(c2[11702],simde_mm256_xor_si256(c2[9346],simde_mm256_xor_si256(c2[5385],simde_mm256_xor_si256(c2[5781],simde_mm256_xor_si256(c2[3008],simde_mm256_xor_si256(c2[3404],simde_mm256_xor_si256(c2[5007],simde_mm256_xor_si256(c2[3428],simde_mm256_xor_si256(c2[3824],simde_mm256_xor_si256(c2[8968],simde_mm256_xor_si256(c2[9364],simde_mm256_xor_si256(c2[5426],simde_mm256_xor_si256(c2[11363],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[1855],simde_mm256_xor_si256(c2[8213],simde_mm256_xor_si256(c2[1086],simde_mm256_xor_si256(c2[8612],simde_mm256_xor_si256(c2[9008],simde_mm256_xor_si256(c2[9419],simde_mm256_xor_si256(c2[11005],simde_mm256_xor_si256(c2[11401],simde_mm256_xor_si256(c2[3480],simde_mm256_xor_si256(c2[3876],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[6660],simde_mm256_xor_si256(c2[7056],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[1909],simde_mm256_xor_si256(c2[11437],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[5101],simde_mm256_xor_si256(c2[5511],simde_mm256_xor_si256(c2[5516],simde_mm256_xor_si256(c2[5912],simde_mm256_xor_si256(c2[8284],simde_mm256_xor_si256(c2[8680],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[11074],simde_mm256_xor_si256(c2[6322],c2[6718]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 43
-     d2[387]=_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[797],_mm256_xor_si256(c2[6739],_mm256_xor_si256(c2[3569],_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[8339],_mm256_xor_si256(c2[8734],_mm256_xor_si256(c2[12297],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[7939],_mm256_xor_si256(c2[4397],_mm256_xor_si256(c2[6772],_mm256_xor_si256(c2[4398],_mm256_xor_si256(c2[3623],_mm256_xor_si256(c2[5604],_mm256_xor_si256(c2[7978],_mm256_xor_si256(c2[8374],_mm256_xor_si256(c2[6019],_mm256_xor_si256(c2[7995],_mm256_xor_si256(c2[10370],_mm256_xor_si256(c2[10766],_mm256_xor_si256(c2[4056],_mm256_xor_si256(c2[6034],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[7640],_mm256_xor_si256(c2[10013],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[10819],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[7257],_mm256_xor_si256(c2[7653],_mm256_xor_si256(c2[10049],_mm256_xor_si256(c2[5689],_mm256_xor_si256(c2[7673],_mm256_xor_si256(c2[8069],_mm256_xor_si256(c2[4914],_mm256_xor_si256(c2[1749],_mm256_xor_si256(c2[6110],_mm256_xor_si256(c2[12459],_mm256_xor_si256(c2[7316],_mm256_xor_si256(c2[9288],_mm256_xor_si256(c2[9684],_mm256_xor_si256(c2[1389],_mm256_xor_si256(c2[11288],_mm256_xor_si256(c2[6538],_mm256_xor_si256(c2[6934],_mm256_xor_si256(c2[2994],_mm256_xor_si256(c2[12104],_mm256_xor_si256(c2[2989],_mm256_xor_si256(c2[3385],_mm256_xor_si256(c2[1029],_mm256_xor_si256(c2[10135],_mm256_xor_si256(c2[7362],_mm256_xor_si256(c2[7758],_mm256_xor_si256(c2[9361],_mm256_xor_si256(c2[8178],_mm256_xor_si256(c2[651],_mm256_xor_si256(c2[1047],_mm256_xor_si256(c2[9780],_mm256_xor_si256(c2[3046],_mm256_xor_si256(c2[6218],_mm256_xor_si256(c2[12567],_mm256_xor_si256(c2[5440],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[7819],_mm256_xor_si256(c2[1102],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[7834],_mm256_xor_si256(c2[8230],_mm256_xor_si256(c2[5870],_mm256_xor_si256(c2[11419],_mm256_xor_si256(c2[5876],_mm256_xor_si256(c2[6272],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[4702],_mm256_xor_si256(c2[9455],_mm256_xor_si256(c2[9865],_mm256_xor_si256(c2[10266],_mm256_xor_si256(c2[12638],_mm256_xor_si256(c2[363],_mm256_xor_si256(c2[8305],_mm256_xor_si256(c2[2757],_mm256_xor_si256(c2[10676],c2[11072]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[387]=simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[797],simde_mm256_xor_si256(c2[6739],simde_mm256_xor_si256(c2[3569],simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[8339],simde_mm256_xor_si256(c2[8734],simde_mm256_xor_si256(c2[12297],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[7939],simde_mm256_xor_si256(c2[4397],simde_mm256_xor_si256(c2[6772],simde_mm256_xor_si256(c2[4398],simde_mm256_xor_si256(c2[3623],simde_mm256_xor_si256(c2[5604],simde_mm256_xor_si256(c2[7978],simde_mm256_xor_si256(c2[8374],simde_mm256_xor_si256(c2[6019],simde_mm256_xor_si256(c2[7995],simde_mm256_xor_si256(c2[10370],simde_mm256_xor_si256(c2[10766],simde_mm256_xor_si256(c2[4056],simde_mm256_xor_si256(c2[6034],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[7640],simde_mm256_xor_si256(c2[10013],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[10819],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[7257],simde_mm256_xor_si256(c2[7653],simde_mm256_xor_si256(c2[10049],simde_mm256_xor_si256(c2[5689],simde_mm256_xor_si256(c2[7673],simde_mm256_xor_si256(c2[8069],simde_mm256_xor_si256(c2[4914],simde_mm256_xor_si256(c2[1749],simde_mm256_xor_si256(c2[6110],simde_mm256_xor_si256(c2[12459],simde_mm256_xor_si256(c2[7316],simde_mm256_xor_si256(c2[9288],simde_mm256_xor_si256(c2[9684],simde_mm256_xor_si256(c2[1389],simde_mm256_xor_si256(c2[11288],simde_mm256_xor_si256(c2[6538],simde_mm256_xor_si256(c2[6934],simde_mm256_xor_si256(c2[2994],simde_mm256_xor_si256(c2[12104],simde_mm256_xor_si256(c2[2989],simde_mm256_xor_si256(c2[3385],simde_mm256_xor_si256(c2[1029],simde_mm256_xor_si256(c2[10135],simde_mm256_xor_si256(c2[7362],simde_mm256_xor_si256(c2[7758],simde_mm256_xor_si256(c2[9361],simde_mm256_xor_si256(c2[8178],simde_mm256_xor_si256(c2[651],simde_mm256_xor_si256(c2[1047],simde_mm256_xor_si256(c2[9780],simde_mm256_xor_si256(c2[3046],simde_mm256_xor_si256(c2[6218],simde_mm256_xor_si256(c2[12567],simde_mm256_xor_si256(c2[5440],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[7819],simde_mm256_xor_si256(c2[1102],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[7834],simde_mm256_xor_si256(c2[8230],simde_mm256_xor_si256(c2[5870],simde_mm256_xor_si256(c2[11419],simde_mm256_xor_si256(c2[5876],simde_mm256_xor_si256(c2[6272],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[4702],simde_mm256_xor_si256(c2[9455],simde_mm256_xor_si256(c2[9865],simde_mm256_xor_si256(c2[10266],simde_mm256_xor_si256(c2[12638],simde_mm256_xor_si256(c2[363],simde_mm256_xor_si256(c2[8305],simde_mm256_xor_si256(c2[2757],simde_mm256_xor_si256(c2[10676],c2[11072]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 44
-     d2[396]=_mm256_xor_si256(c2[4760],_mm256_xor_si256(c2[4759],_mm256_xor_si256(c2[10692],_mm256_xor_si256(c2[7927],_mm256_xor_si256(c2[7128],_mm256_xor_si256(c2[12301],_mm256_xor_si256(c2[25],_mm256_xor_si256(c2[3984],_mm256_xor_si256(c2[8359],_mm256_xor_si256(c2[10734],_mm256_xor_si256(c2[8360],_mm256_xor_si256(c2[7585],_mm256_xor_si256(c2[9566],_mm256_xor_si256(c2[12336],_mm256_xor_si256(c2[9972],_mm256_xor_si256(c2[11957],_mm256_xor_si256(c2[2057],_mm256_xor_si256(c2[8018],_mm256_xor_si256(c2[9996],_mm256_xor_si256(c2[4842],_mm256_xor_si256(c2[11593],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[4468],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[7658],_mm256_xor_si256(c2[11615],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[1340],_mm256_xor_si256(c2[9651],_mm256_xor_si256(c2[12031],_mm256_xor_si256(c2[8876],_mm256_xor_si256(c2[5711],_mm256_xor_si256(c2[10063],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[3750],_mm256_xor_si256(c2[11269],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[5351],_mm256_xor_si256(c2[2579],_mm256_xor_si256(c2[10896],_mm256_xor_si256(c2[6956],_mm256_xor_si256(c2[3386],_mm256_xor_si256(c2[7347],_mm256_xor_si256(c2[4991],_mm256_xor_si256(c2[1426],_mm256_xor_si256(c2[11720],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[12140],_mm256_xor_si256(c2[5009],_mm256_xor_si256(c2[1062],_mm256_xor_si256(c2[7008],_mm256_xor_si256(c2[10171],_mm256_xor_si256(c2[3858],_mm256_xor_si256(c2[9402],_mm256_xor_si256(c2[4644],_mm256_xor_si256(c2[5064],_mm256_xor_si256(c2[7046],_mm256_xor_si256(c2[12192],_mm256_xor_si256(c2[9832],_mm256_xor_si256(c2[2701],_mm256_xor_si256(c2[10225],_mm256_xor_si256(c2[7082],_mm256_xor_si256(c2[8664],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[1156],_mm256_xor_si256(c2[1548],_mm256_xor_si256(c2[4325],_mm256_xor_si256(c2[12258],_mm256_xor_si256(c2[6719],c2[2363])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[396]=simde_mm256_xor_si256(c2[4760],simde_mm256_xor_si256(c2[4759],simde_mm256_xor_si256(c2[10692],simde_mm256_xor_si256(c2[7927],simde_mm256_xor_si256(c2[7128],simde_mm256_xor_si256(c2[12301],simde_mm256_xor_si256(c2[25],simde_mm256_xor_si256(c2[3984],simde_mm256_xor_si256(c2[8359],simde_mm256_xor_si256(c2[10734],simde_mm256_xor_si256(c2[8360],simde_mm256_xor_si256(c2[7585],simde_mm256_xor_si256(c2[9566],simde_mm256_xor_si256(c2[12336],simde_mm256_xor_si256(c2[9972],simde_mm256_xor_si256(c2[11957],simde_mm256_xor_si256(c2[2057],simde_mm256_xor_si256(c2[8018],simde_mm256_xor_si256(c2[9996],simde_mm256_xor_si256(c2[4842],simde_mm256_xor_si256(c2[11593],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[4468],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[7658],simde_mm256_xor_si256(c2[11615],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[1340],simde_mm256_xor_si256(c2[9651],simde_mm256_xor_si256(c2[12031],simde_mm256_xor_si256(c2[8876],simde_mm256_xor_si256(c2[5711],simde_mm256_xor_si256(c2[10063],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[3750],simde_mm256_xor_si256(c2[11269],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[5351],simde_mm256_xor_si256(c2[2579],simde_mm256_xor_si256(c2[10896],simde_mm256_xor_si256(c2[6956],simde_mm256_xor_si256(c2[3386],simde_mm256_xor_si256(c2[7347],simde_mm256_xor_si256(c2[4991],simde_mm256_xor_si256(c2[1426],simde_mm256_xor_si256(c2[11720],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[12140],simde_mm256_xor_si256(c2[5009],simde_mm256_xor_si256(c2[1062],simde_mm256_xor_si256(c2[7008],simde_mm256_xor_si256(c2[10171],simde_mm256_xor_si256(c2[3858],simde_mm256_xor_si256(c2[9402],simde_mm256_xor_si256(c2[4644],simde_mm256_xor_si256(c2[5064],simde_mm256_xor_si256(c2[7046],simde_mm256_xor_si256(c2[12192],simde_mm256_xor_si256(c2[9832],simde_mm256_xor_si256(c2[2701],simde_mm256_xor_si256(c2[10225],simde_mm256_xor_si256(c2[7082],simde_mm256_xor_si256(c2[8664],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[1156],simde_mm256_xor_si256(c2[1548],simde_mm256_xor_si256(c2[4325],simde_mm256_xor_si256(c2[12258],simde_mm256_xor_si256(c2[6719],c2[2363])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 45
-     d2[405]=_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[108],c2[6520]));
+     d2[405]=simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[108],c2[6520]));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc320_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc320_byte.c
index 9d8e05607474ddf8e2903f1965a7d4503cdf3cb7..be5cbbdad8f2d213b31a031d3722d9dd4c301acf 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc320_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc320_byte.c
@@ -11,141 +11,141 @@ static inline void ldpc320_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[3962],_mm256_xor_si256(c2[6609],_mm256_xor_si256(c2[1762],_mm256_xor_si256(c2[12326],_mm256_xor_si256(c2[6620],_mm256_xor_si256(c2[3100],_mm256_xor_si256(c2[7066],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[2689],_mm256_xor_si256(c2[7082],_mm256_xor_si256(c2[7541],_mm256_xor_si256(c2[11940],_mm256_xor_si256(c2[13260],_mm256_xor_si256(c2[2288],_mm256_xor_si256(c2[10648],_mm256_xor_si256(c2[2286],_mm256_xor_si256(c2[7147],_mm256_xor_si256(c2[545],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[3201],_mm256_xor_si256(c2[1447],_mm256_xor_si256(c2[12881],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[4546],_mm256_xor_si256(c2[6745],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[3686],_mm256_xor_si256(c2[6762],_mm256_xor_si256(c2[6780],_mm256_xor_si256(c2[7222],_mm256_xor_si256(c2[3262],_mm256_xor_si256(c2[1085],_mm256_xor_si256(c2[4603],_mm256_xor_si256(c2[11208],_mm256_xor_si256(c2[10346],_mm256_xor_si256(c2[664],_mm256_xor_si256(c2[6829],_mm256_xor_si256(c2[2005],_mm256_xor_si256(c2[5529],_mm256_xor_si256(c2[13007],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[2025],_mm256_xor_si256(c2[5562],_mm256_xor_si256(c2[12168],_mm256_xor_si256(c2[9521],_mm256_xor_si256(c2[4709],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[6029],_mm256_xor_si256(c2[6483],_mm256_xor_si256(c2[3842],_mm256_xor_si256(c2[5601],_mm256_xor_si256(c2[8702],_mm256_xor_si256(c2[10905],_mm256_xor_si256(c2[12660],_mm256_xor_si256(c2[7843],_mm256_xor_si256(c2[10047],_mm256_xor_si256(c2[6082],_mm256_xor_si256(c2[7420],_mm256_xor_si256(c2[2148],_mm256_xor_si256(c2[3027],_mm256_xor_si256(c2[13165],_mm256_xor_si256(c2[9208],_mm256_xor_si256(c2[14049],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[6149],c2[2182]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[3962],simde_mm256_xor_si256(c2[6609],simde_mm256_xor_si256(c2[1762],simde_mm256_xor_si256(c2[12326],simde_mm256_xor_si256(c2[6620],simde_mm256_xor_si256(c2[3100],simde_mm256_xor_si256(c2[7066],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[2689],simde_mm256_xor_si256(c2[7082],simde_mm256_xor_si256(c2[7541],simde_mm256_xor_si256(c2[11940],simde_mm256_xor_si256(c2[13260],simde_mm256_xor_si256(c2[2288],simde_mm256_xor_si256(c2[10648],simde_mm256_xor_si256(c2[2286],simde_mm256_xor_si256(c2[7147],simde_mm256_xor_si256(c2[545],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[3201],simde_mm256_xor_si256(c2[1447],simde_mm256_xor_si256(c2[12881],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[4546],simde_mm256_xor_si256(c2[6745],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[3686],simde_mm256_xor_si256(c2[6762],simde_mm256_xor_si256(c2[6780],simde_mm256_xor_si256(c2[7222],simde_mm256_xor_si256(c2[3262],simde_mm256_xor_si256(c2[1085],simde_mm256_xor_si256(c2[4603],simde_mm256_xor_si256(c2[11208],simde_mm256_xor_si256(c2[10346],simde_mm256_xor_si256(c2[664],simde_mm256_xor_si256(c2[6829],simde_mm256_xor_si256(c2[2005],simde_mm256_xor_si256(c2[5529],simde_mm256_xor_si256(c2[13007],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[2025],simde_mm256_xor_si256(c2[5562],simde_mm256_xor_si256(c2[12168],simde_mm256_xor_si256(c2[9521],simde_mm256_xor_si256(c2[4709],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[6029],simde_mm256_xor_si256(c2[6483],simde_mm256_xor_si256(c2[3842],simde_mm256_xor_si256(c2[5601],simde_mm256_xor_si256(c2[8702],simde_mm256_xor_si256(c2[10905],simde_mm256_xor_si256(c2[12660],simde_mm256_xor_si256(c2[7843],simde_mm256_xor_si256(c2[10047],simde_mm256_xor_si256(c2[6082],simde_mm256_xor_si256(c2[7420],simde_mm256_xor_si256(c2[2148],simde_mm256_xor_si256(c2[3027],simde_mm256_xor_si256(c2[13165],simde_mm256_xor_si256(c2[9208],simde_mm256_xor_si256(c2[14049],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[6149],c2[2182]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 1
-     d2[10]=_mm256_xor_si256(c2[3962],_mm256_xor_si256(c2[4402],_mm256_xor_si256(c2[7049],_mm256_xor_si256(c2[2202],_mm256_xor_si256(c2[12766],_mm256_xor_si256(c2[6620],_mm256_xor_si256(c2[7060],_mm256_xor_si256(c2[3540],_mm256_xor_si256(c2[7506],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[3563],_mm256_xor_si256(c2[3129],_mm256_xor_si256(c2[7522],_mm256_xor_si256(c2[7541],_mm256_xor_si256(c2[7981],_mm256_xor_si256(c2[12380],_mm256_xor_si256(c2[13700],_mm256_xor_si256(c2[2728],_mm256_xor_si256(c2[11088],_mm256_xor_si256(c2[2726],_mm256_xor_si256(c2[7147],_mm256_xor_si256(c2[7587],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[3181],_mm256_xor_si256(c2[3201],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[1887],_mm256_xor_si256(c2[13321],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[4986],_mm256_xor_si256(c2[7185],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[4126],_mm256_xor_si256(c2[7202],_mm256_xor_si256(c2[6780],_mm256_xor_si256(c2[7220],_mm256_xor_si256(c2[7662],_mm256_xor_si256(c2[3702],_mm256_xor_si256(c2[1085],_mm256_xor_si256(c2[1525],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[11648],_mm256_xor_si256(c2[10346],_mm256_xor_si256(c2[10786],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[7269],_mm256_xor_si256(c2[2005],_mm256_xor_si256(c2[2445],_mm256_xor_si256(c2[5969],_mm256_xor_si256(c2[13447],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[3789],_mm256_xor_si256(c2[2465],_mm256_xor_si256(c2[6002],_mm256_xor_si256(c2[12608],_mm256_xor_si256(c2[9961],_mm256_xor_si256(c2[4709],_mm256_xor_si256(c2[5149],_mm256_xor_si256(c2[5148],_mm256_xor_si256(c2[6469],_mm256_xor_si256(c2[6483],_mm256_xor_si256(c2[6923],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[6041],_mm256_xor_si256(c2[9142],_mm256_xor_si256(c2[11345],_mm256_xor_si256(c2[13100],_mm256_xor_si256(c2[7843],_mm256_xor_si256(c2[8283],_mm256_xor_si256(c2[10487],_mm256_xor_si256(c2[6522],_mm256_xor_si256(c2[7420],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[2588],_mm256_xor_si256(c2[3467],_mm256_xor_si256(c2[13165],_mm256_xor_si256(c2[13605],_mm256_xor_si256(c2[9648],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[861],_mm256_xor_si256(c2[6589],c2[2622])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[10]=simde_mm256_xor_si256(c2[3962],simde_mm256_xor_si256(c2[4402],simde_mm256_xor_si256(c2[7049],simde_mm256_xor_si256(c2[2202],simde_mm256_xor_si256(c2[12766],simde_mm256_xor_si256(c2[6620],simde_mm256_xor_si256(c2[7060],simde_mm256_xor_si256(c2[3540],simde_mm256_xor_si256(c2[7506],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[3563],simde_mm256_xor_si256(c2[3129],simde_mm256_xor_si256(c2[7522],simde_mm256_xor_si256(c2[7541],simde_mm256_xor_si256(c2[7981],simde_mm256_xor_si256(c2[12380],simde_mm256_xor_si256(c2[13700],simde_mm256_xor_si256(c2[2728],simde_mm256_xor_si256(c2[11088],simde_mm256_xor_si256(c2[2726],simde_mm256_xor_si256(c2[7147],simde_mm256_xor_si256(c2[7587],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[3181],simde_mm256_xor_si256(c2[3201],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[1887],simde_mm256_xor_si256(c2[13321],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[4986],simde_mm256_xor_si256(c2[7185],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[4126],simde_mm256_xor_si256(c2[7202],simde_mm256_xor_si256(c2[6780],simde_mm256_xor_si256(c2[7220],simde_mm256_xor_si256(c2[7662],simde_mm256_xor_si256(c2[3702],simde_mm256_xor_si256(c2[1085],simde_mm256_xor_si256(c2[1525],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[11648],simde_mm256_xor_si256(c2[10346],simde_mm256_xor_si256(c2[10786],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[7269],simde_mm256_xor_si256(c2[2005],simde_mm256_xor_si256(c2[2445],simde_mm256_xor_si256(c2[5969],simde_mm256_xor_si256(c2[13447],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[3789],simde_mm256_xor_si256(c2[2465],simde_mm256_xor_si256(c2[6002],simde_mm256_xor_si256(c2[12608],simde_mm256_xor_si256(c2[9961],simde_mm256_xor_si256(c2[4709],simde_mm256_xor_si256(c2[5149],simde_mm256_xor_si256(c2[5148],simde_mm256_xor_si256(c2[6469],simde_mm256_xor_si256(c2[6483],simde_mm256_xor_si256(c2[6923],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[6041],simde_mm256_xor_si256(c2[9142],simde_mm256_xor_si256(c2[11345],simde_mm256_xor_si256(c2[13100],simde_mm256_xor_si256(c2[7843],simde_mm256_xor_si256(c2[8283],simde_mm256_xor_si256(c2[10487],simde_mm256_xor_si256(c2[6522],simde_mm256_xor_si256(c2[7420],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[2588],simde_mm256_xor_si256(c2[3467],simde_mm256_xor_si256(c2[13165],simde_mm256_xor_si256(c2[13605],simde_mm256_xor_si256(c2[9648],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[861],simde_mm256_xor_si256(c2[6589],c2[2622])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[20]=_mm256_xor_si256(c2[4402],_mm256_xor_si256(c2[7049],_mm256_xor_si256(c2[1762],_mm256_xor_si256(c2[2202],_mm256_xor_si256(c2[12326],_mm256_xor_si256(c2[12766],_mm256_xor_si256(c2[7060],_mm256_xor_si256(c2[3100],_mm256_xor_si256(c2[3540],_mm256_xor_si256(c2[7066],_mm256_xor_si256(c2[7506],_mm256_xor_si256(c2[3563],_mm256_xor_si256(c2[3129],_mm256_xor_si256(c2[7082],_mm256_xor_si256(c2[7522],_mm256_xor_si256(c2[7981],_mm256_xor_si256(c2[12380],_mm256_xor_si256(c2[13260],_mm256_xor_si256(c2[13700],_mm256_xor_si256(c2[2728],_mm256_xor_si256(c2[10648],_mm256_xor_si256(c2[11088],_mm256_xor_si256(c2[2286],_mm256_xor_si256(c2[2726],_mm256_xor_si256(c2[7587],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[3181],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[1447],_mm256_xor_si256(c2[1887],_mm256_xor_si256(c2[12881],_mm256_xor_si256(c2[13321],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[4546],_mm256_xor_si256(c2[4986],_mm256_xor_si256(c2[6745],_mm256_xor_si256(c2[7185],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[3686],_mm256_xor_si256(c2[4126],_mm256_xor_si256(c2[6762],_mm256_xor_si256(c2[7202],_mm256_xor_si256(c2[7220],_mm256_xor_si256(c2[7662],_mm256_xor_si256(c2[3262],_mm256_xor_si256(c2[3702],_mm256_xor_si256(c2[1525],_mm256_xor_si256(c2[4603],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[11208],_mm256_xor_si256(c2[11648],_mm256_xor_si256(c2[10786],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[6829],_mm256_xor_si256(c2[7269],_mm256_xor_si256(c2[2445],_mm256_xor_si256(c2[5969],_mm256_xor_si256(c2[13007],_mm256_xor_si256(c2[13447],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[3789],_mm256_xor_si256(c2[2025],_mm256_xor_si256(c2[2465],_mm256_xor_si256(c2[6002],_mm256_xor_si256(c2[12168],_mm256_xor_si256(c2[12608],_mm256_xor_si256(c2[9521],_mm256_xor_si256(c2[9961],_mm256_xor_si256(c2[5149],_mm256_xor_si256(c2[5148],_mm256_xor_si256(c2[6029],_mm256_xor_si256(c2[6469],_mm256_xor_si256(c2[6923],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[5601],_mm256_xor_si256(c2[6041],_mm256_xor_si256(c2[9142],_mm256_xor_si256(c2[10905],_mm256_xor_si256(c2[11345],_mm256_xor_si256(c2[12660],_mm256_xor_si256(c2[13100],_mm256_xor_si256(c2[8283],_mm256_xor_si256(c2[10047],_mm256_xor_si256(c2[10487],_mm256_xor_si256(c2[6082],_mm256_xor_si256(c2[6522],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[2588],_mm256_xor_si256(c2[3027],_mm256_xor_si256(c2[3467],_mm256_xor_si256(c2[13605],_mm256_xor_si256(c2[9208],_mm256_xor_si256(c2[9648],_mm256_xor_si256(c2[14049],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[861],_mm256_xor_si256(c2[6589],_mm256_xor_si256(c2[2182],c2[2622]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[20]=simde_mm256_xor_si256(c2[4402],simde_mm256_xor_si256(c2[7049],simde_mm256_xor_si256(c2[1762],simde_mm256_xor_si256(c2[2202],simde_mm256_xor_si256(c2[12326],simde_mm256_xor_si256(c2[12766],simde_mm256_xor_si256(c2[7060],simde_mm256_xor_si256(c2[3100],simde_mm256_xor_si256(c2[3540],simde_mm256_xor_si256(c2[7066],simde_mm256_xor_si256(c2[7506],simde_mm256_xor_si256(c2[3563],simde_mm256_xor_si256(c2[3129],simde_mm256_xor_si256(c2[7082],simde_mm256_xor_si256(c2[7522],simde_mm256_xor_si256(c2[7981],simde_mm256_xor_si256(c2[12380],simde_mm256_xor_si256(c2[13260],simde_mm256_xor_si256(c2[13700],simde_mm256_xor_si256(c2[2728],simde_mm256_xor_si256(c2[10648],simde_mm256_xor_si256(c2[11088],simde_mm256_xor_si256(c2[2286],simde_mm256_xor_si256(c2[2726],simde_mm256_xor_si256(c2[7587],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[3181],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[1447],simde_mm256_xor_si256(c2[1887],simde_mm256_xor_si256(c2[12881],simde_mm256_xor_si256(c2[13321],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[4546],simde_mm256_xor_si256(c2[4986],simde_mm256_xor_si256(c2[6745],simde_mm256_xor_si256(c2[7185],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[3686],simde_mm256_xor_si256(c2[4126],simde_mm256_xor_si256(c2[6762],simde_mm256_xor_si256(c2[7202],simde_mm256_xor_si256(c2[7220],simde_mm256_xor_si256(c2[7662],simde_mm256_xor_si256(c2[3262],simde_mm256_xor_si256(c2[3702],simde_mm256_xor_si256(c2[1525],simde_mm256_xor_si256(c2[4603],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[11208],simde_mm256_xor_si256(c2[11648],simde_mm256_xor_si256(c2[10786],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[6829],simde_mm256_xor_si256(c2[7269],simde_mm256_xor_si256(c2[2445],simde_mm256_xor_si256(c2[5969],simde_mm256_xor_si256(c2[13007],simde_mm256_xor_si256(c2[13447],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[3789],simde_mm256_xor_si256(c2[2025],simde_mm256_xor_si256(c2[2465],simde_mm256_xor_si256(c2[6002],simde_mm256_xor_si256(c2[12168],simde_mm256_xor_si256(c2[12608],simde_mm256_xor_si256(c2[9521],simde_mm256_xor_si256(c2[9961],simde_mm256_xor_si256(c2[5149],simde_mm256_xor_si256(c2[5148],simde_mm256_xor_si256(c2[6029],simde_mm256_xor_si256(c2[6469],simde_mm256_xor_si256(c2[6923],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[5601],simde_mm256_xor_si256(c2[6041],simde_mm256_xor_si256(c2[9142],simde_mm256_xor_si256(c2[10905],simde_mm256_xor_si256(c2[11345],simde_mm256_xor_si256(c2[12660],simde_mm256_xor_si256(c2[13100],simde_mm256_xor_si256(c2[8283],simde_mm256_xor_si256(c2[10047],simde_mm256_xor_si256(c2[10487],simde_mm256_xor_si256(c2[6082],simde_mm256_xor_si256(c2[6522],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[2588],simde_mm256_xor_si256(c2[3027],simde_mm256_xor_si256(c2[3467],simde_mm256_xor_si256(c2[13605],simde_mm256_xor_si256(c2[9208],simde_mm256_xor_si256(c2[9648],simde_mm256_xor_si256(c2[14049],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[861],simde_mm256_xor_si256(c2[6589],simde_mm256_xor_si256(c2[2182],c2[2622]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[30]=_mm256_xor_si256(c2[4402],_mm256_xor_si256(c2[7049],_mm256_xor_si256(c2[2202],_mm256_xor_si256(c2[12326],_mm256_xor_si256(c2[12766],_mm256_xor_si256(c2[7060],_mm256_xor_si256(c2[3540],_mm256_xor_si256(c2[7066],_mm256_xor_si256(c2[7506],_mm256_xor_si256(c2[3563],_mm256_xor_si256(c2[3129],_mm256_xor_si256(c2[7522],_mm256_xor_si256(c2[7981],_mm256_xor_si256(c2[12380],_mm256_xor_si256(c2[13260],_mm256_xor_si256(c2[13700],_mm256_xor_si256(c2[2728],_mm256_xor_si256(c2[11088],_mm256_xor_si256(c2[2286],_mm256_xor_si256(c2[2726],_mm256_xor_si256(c2[7587],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[3181],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[1887],_mm256_xor_si256(c2[12881],_mm256_xor_si256(c2[13321],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[4986],_mm256_xor_si256(c2[6745],_mm256_xor_si256(c2[7185],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[4126],_mm256_xor_si256(c2[6762],_mm256_xor_si256(c2[7202],_mm256_xor_si256(c2[7220],_mm256_xor_si256(c2[7662],_mm256_xor_si256(c2[3702],_mm256_xor_si256(c2[1525],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[11208],_mm256_xor_si256(c2[11648],_mm256_xor_si256(c2[10786],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[6829],_mm256_xor_si256(c2[7269],_mm256_xor_si256(c2[2445],_mm256_xor_si256(c2[5969],_mm256_xor_si256(c2[13007],_mm256_xor_si256(c2[13447],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[3789],_mm256_xor_si256(c2[2025],_mm256_xor_si256(c2[2465],_mm256_xor_si256(c2[6002],_mm256_xor_si256(c2[12608],_mm256_xor_si256(c2[9521],_mm256_xor_si256(c2[9961],_mm256_xor_si256(c2[5149],_mm256_xor_si256(c2[5148],_mm256_xor_si256(c2[6469],_mm256_xor_si256(c2[6923],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[5601],_mm256_xor_si256(c2[6041],_mm256_xor_si256(c2[9142],_mm256_xor_si256(c2[11345],_mm256_xor_si256(c2[12660],_mm256_xor_si256(c2[13100],_mm256_xor_si256(c2[8283],_mm256_xor_si256(c2[10487],_mm256_xor_si256(c2[6082],_mm256_xor_si256(c2[6522],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[2588],_mm256_xor_si256(c2[3467],_mm256_xor_si256(c2[13605],_mm256_xor_si256(c2[9648],_mm256_xor_si256(c2[14049],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[861],_mm256_xor_si256(c2[6589],_mm256_xor_si256(c2[2182],c2[2622])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[30]=simde_mm256_xor_si256(c2[4402],simde_mm256_xor_si256(c2[7049],simde_mm256_xor_si256(c2[2202],simde_mm256_xor_si256(c2[12326],simde_mm256_xor_si256(c2[12766],simde_mm256_xor_si256(c2[7060],simde_mm256_xor_si256(c2[3540],simde_mm256_xor_si256(c2[7066],simde_mm256_xor_si256(c2[7506],simde_mm256_xor_si256(c2[3563],simde_mm256_xor_si256(c2[3129],simde_mm256_xor_si256(c2[7522],simde_mm256_xor_si256(c2[7981],simde_mm256_xor_si256(c2[12380],simde_mm256_xor_si256(c2[13260],simde_mm256_xor_si256(c2[13700],simde_mm256_xor_si256(c2[2728],simde_mm256_xor_si256(c2[11088],simde_mm256_xor_si256(c2[2286],simde_mm256_xor_si256(c2[2726],simde_mm256_xor_si256(c2[7587],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[3181],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[1887],simde_mm256_xor_si256(c2[12881],simde_mm256_xor_si256(c2[13321],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[4986],simde_mm256_xor_si256(c2[6745],simde_mm256_xor_si256(c2[7185],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[4126],simde_mm256_xor_si256(c2[6762],simde_mm256_xor_si256(c2[7202],simde_mm256_xor_si256(c2[7220],simde_mm256_xor_si256(c2[7662],simde_mm256_xor_si256(c2[3702],simde_mm256_xor_si256(c2[1525],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[11208],simde_mm256_xor_si256(c2[11648],simde_mm256_xor_si256(c2[10786],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[6829],simde_mm256_xor_si256(c2[7269],simde_mm256_xor_si256(c2[2445],simde_mm256_xor_si256(c2[5969],simde_mm256_xor_si256(c2[13007],simde_mm256_xor_si256(c2[13447],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[3789],simde_mm256_xor_si256(c2[2025],simde_mm256_xor_si256(c2[2465],simde_mm256_xor_si256(c2[6002],simde_mm256_xor_si256(c2[12608],simde_mm256_xor_si256(c2[9521],simde_mm256_xor_si256(c2[9961],simde_mm256_xor_si256(c2[5149],simde_mm256_xor_si256(c2[5148],simde_mm256_xor_si256(c2[6469],simde_mm256_xor_si256(c2[6923],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[5601],simde_mm256_xor_si256(c2[6041],simde_mm256_xor_si256(c2[9142],simde_mm256_xor_si256(c2[11345],simde_mm256_xor_si256(c2[12660],simde_mm256_xor_si256(c2[13100],simde_mm256_xor_si256(c2[8283],simde_mm256_xor_si256(c2[10487],simde_mm256_xor_si256(c2[6082],simde_mm256_xor_si256(c2[6522],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[2588],simde_mm256_xor_si256(c2[3467],simde_mm256_xor_si256(c2[13605],simde_mm256_xor_si256(c2[9648],simde_mm256_xor_si256(c2[14049],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[861],simde_mm256_xor_si256(c2[6589],simde_mm256_xor_si256(c2[2182],c2[2622])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[40]=_mm256_xor_si256(c2[3967],c2[5746]);
+     d2[40]=simde_mm256_xor_si256(c2[3967],c2[5746]);
 
 //row: 5
-     d2[50]=_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[2648],_mm256_xor_si256(c2[11880],_mm256_xor_si256(c2[8365],_mm256_xor_si256(c2[8362],_mm256_xor_si256(c2[2669],_mm256_xor_si256(c2[13228],_mm256_xor_si256(c2[3105],_mm256_xor_si256(c2[1789],_mm256_xor_si256(c2[13241],_mm256_xor_si256(c2[12807],_mm256_xor_si256(c2[3121],_mm256_xor_si256(c2[3580],_mm256_xor_si256(c2[7989],_mm256_xor_si256(c2[9309],_mm256_xor_si256(c2[7981],_mm256_xor_si256(c2[12406],_mm256_xor_si256(c2[6687],_mm256_xor_si256(c2[12404],_mm256_xor_si256(c2[3186],_mm256_xor_si256(c2[10663],_mm256_xor_si256(c2[12869],_mm256_xor_si256(c2[13329],_mm256_xor_si256(c2[11565],_mm256_xor_si256(c2[8920],_mm256_xor_si256(c2[12462],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[12048],_mm256_xor_si256(c2[13804],_mm256_xor_si256(c2[2801],_mm256_xor_si256(c2[2829],_mm256_xor_si256(c2[3261],_mm256_xor_si256(c2[13380],_mm256_xor_si256(c2[11203],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[7247],_mm256_xor_si256(c2[6385],_mm256_xor_si256(c2[10782],_mm256_xor_si256(c2[2868],_mm256_xor_si256(c2[12123],_mm256_xor_si256(c2[1568],_mm256_xor_si256(c2[9046],_mm256_xor_si256(c2[13449],_mm256_xor_si256(c2[12582],_mm256_xor_si256(c2[13467],_mm256_xor_si256(c2[12143],_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[8207],_mm256_xor_si256(c2[5560],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[747],_mm256_xor_si256(c2[2068],_mm256_xor_si256(c2[2522],_mm256_xor_si256(c2[13960],_mm256_xor_si256(c2[1640],_mm256_xor_si256(c2[4286],_mm256_xor_si256(c2[4741],_mm256_xor_si256(c2[6944],_mm256_xor_si256(c2[8709],_mm256_xor_si256(c2[3882],_mm256_xor_si256(c2[6086],_mm256_xor_si256(c2[2121],_mm256_xor_si256(c2[3469],_mm256_xor_si256(c2[12266],_mm256_xor_si256(c2[13145],_mm256_xor_si256(c2[9204],_mm256_xor_si256(c2[5247],_mm256_xor_si256(c2[10088],_mm256_xor_si256(c2[10549],_mm256_xor_si256(c2[2188],_mm256_xor_si256(c2[12300],c2[5268]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[50]=simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[2648],simde_mm256_xor_si256(c2[11880],simde_mm256_xor_si256(c2[8365],simde_mm256_xor_si256(c2[8362],simde_mm256_xor_si256(c2[2669],simde_mm256_xor_si256(c2[13228],simde_mm256_xor_si256(c2[3105],simde_mm256_xor_si256(c2[1789],simde_mm256_xor_si256(c2[13241],simde_mm256_xor_si256(c2[12807],simde_mm256_xor_si256(c2[3121],simde_mm256_xor_si256(c2[3580],simde_mm256_xor_si256(c2[7989],simde_mm256_xor_si256(c2[9309],simde_mm256_xor_si256(c2[7981],simde_mm256_xor_si256(c2[12406],simde_mm256_xor_si256(c2[6687],simde_mm256_xor_si256(c2[12404],simde_mm256_xor_si256(c2[3186],simde_mm256_xor_si256(c2[10663],simde_mm256_xor_si256(c2[12869],simde_mm256_xor_si256(c2[13329],simde_mm256_xor_si256(c2[11565],simde_mm256_xor_si256(c2[8920],simde_mm256_xor_si256(c2[12462],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[12048],simde_mm256_xor_si256(c2[13804],simde_mm256_xor_si256(c2[2801],simde_mm256_xor_si256(c2[2829],simde_mm256_xor_si256(c2[3261],simde_mm256_xor_si256(c2[13380],simde_mm256_xor_si256(c2[11203],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[7247],simde_mm256_xor_si256(c2[6385],simde_mm256_xor_si256(c2[10782],simde_mm256_xor_si256(c2[2868],simde_mm256_xor_si256(c2[12123],simde_mm256_xor_si256(c2[1568],simde_mm256_xor_si256(c2[9046],simde_mm256_xor_si256(c2[13449],simde_mm256_xor_si256(c2[12582],simde_mm256_xor_si256(c2[13467],simde_mm256_xor_si256(c2[12143],simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[8207],simde_mm256_xor_si256(c2[5560],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[747],simde_mm256_xor_si256(c2[2068],simde_mm256_xor_si256(c2[2522],simde_mm256_xor_si256(c2[13960],simde_mm256_xor_si256(c2[1640],simde_mm256_xor_si256(c2[4286],simde_mm256_xor_si256(c2[4741],simde_mm256_xor_si256(c2[6944],simde_mm256_xor_si256(c2[8709],simde_mm256_xor_si256(c2[3882],simde_mm256_xor_si256(c2[6086],simde_mm256_xor_si256(c2[2121],simde_mm256_xor_si256(c2[3469],simde_mm256_xor_si256(c2[12266],simde_mm256_xor_si256(c2[13145],simde_mm256_xor_si256(c2[9204],simde_mm256_xor_si256(c2[5247],simde_mm256_xor_si256(c2[10088],simde_mm256_xor_si256(c2[10549],simde_mm256_xor_si256(c2[2188],simde_mm256_xor_si256(c2[12300],c2[5268]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[60]=_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[9360],_mm256_xor_si256(c2[2409],_mm256_xor_si256(c2[5940],_mm256_xor_si256(c2[3787],_mm256_xor_si256(c2[6509],_mm256_xor_si256(c2[4764],c2[5247])))))));
+     d2[60]=simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[9360],simde_mm256_xor_si256(c2[2409],simde_mm256_xor_si256(c2[5940],simde_mm256_xor_si256(c2[3787],simde_mm256_xor_si256(c2[6509],simde_mm256_xor_si256(c2[4764],c2[5247])))))));
 
 //row: 7
-     d2[70]=_mm256_xor_si256(c2[5280],_mm256_xor_si256(c2[10582],_mm256_xor_si256(c2[6686],_mm256_xor_si256(c2[8061],_mm256_xor_si256(c2[11160],c2[5562])))));
+     d2[70]=simde_mm256_xor_si256(c2[5280],simde_mm256_xor_si256(c2[10582],simde_mm256_xor_si256(c2[6686],simde_mm256_xor_si256(c2[8061],simde_mm256_xor_si256(c2[11160],c2[5562])))));
 
 //row: 8
-     d2[80]=_mm256_xor_si256(c2[13209],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[4404],_mm256_xor_si256(c2[11009],_mm256_xor_si256(c2[13206],_mm256_xor_si256(c2[13646],_mm256_xor_si256(c2[7484],_mm256_xor_si256(c2[9681],_mm256_xor_si256(c2[10121],_mm256_xor_si256(c2[3089],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[4425],_mm256_xor_si256(c2[12347],_mm256_xor_si256(c2[465],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[2224],_mm256_xor_si256(c2[4421],_mm256_xor_si256(c2[4861],_mm256_xor_si256(c2[2224],_mm256_xor_si256(c2[12360],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[11926],_mm256_xor_si256(c2[484],_mm256_xor_si256(c2[2240],_mm256_xor_si256(c2[4447],_mm256_xor_si256(c2[4887],_mm256_xor_si256(c2[2709],_mm256_xor_si256(c2[5346],_mm256_xor_si256(c2[7108],_mm256_xor_si256(c2[9745],_mm256_xor_si256(c2[8428],_mm256_xor_si256(c2[10625],_mm256_xor_si256(c2[11065],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[11525],_mm256_xor_si256(c2[83],_mm256_xor_si256(c2[5806],_mm256_xor_si256(c2[8003],_mm256_xor_si256(c2[8443],_mm256_xor_si256(c2[11523],_mm256_xor_si256(c2[13720],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[2305],_mm256_xor_si256(c2[4942],_mm256_xor_si256(c2[9782],_mm256_xor_si256(c2[12429],_mm256_xor_si256(c2[11988],_mm256_xor_si256(c2[106],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[12448],_mm256_xor_si256(c2[1006],_mm256_xor_si256(c2[10684],_mm256_xor_si256(c2[12881],_mm256_xor_si256(c2[13321],_mm256_xor_si256(c2[8049],_mm256_xor_si256(c2[10246],_mm256_xor_si256(c2[10686],_mm256_xor_si256(c2[11581],_mm256_xor_si256(c2[149],_mm256_xor_si256(c2[13783],_mm256_xor_si256(c2[1901],_mm256_xor_si256(c2[2341],_mm256_xor_si256(c2[1903],_mm256_xor_si256(c2[4100],_mm256_xor_si256(c2[4540],_mm256_xor_si256(c2[11167],_mm256_xor_si256(c2[13804],_mm256_xor_si256(c2[12923],_mm256_xor_si256(c2[1041],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[4127],_mm256_xor_si256(c2[4567],_mm256_xor_si256(c2[1948],_mm256_xor_si256(c2[4585],_mm256_xor_si256(c2[2380],_mm256_xor_si256(c2[5027],_mm256_xor_si256(c2[12509],_mm256_xor_si256(c2[627],_mm256_xor_si256(c2[1067],_mm256_xor_si256(c2[10322],_mm256_xor_si256(c2[12969],_mm256_xor_si256(c2[13840],_mm256_xor_si256(c2[1968],_mm256_xor_si256(c2[2408],_mm256_xor_si256(c2[6366],_mm256_xor_si256(c2[8563],_mm256_xor_si256(c2[9003],_mm256_xor_si256(c2[5504],_mm256_xor_si256(c2[8141],_mm256_xor_si256(c2[9901],_mm256_xor_si256(c2[12548],_mm256_xor_si256(c2[1987],_mm256_xor_si256(c2[4184],_mm256_xor_si256(c2[4624],_mm256_xor_si256(c2[11242],_mm256_xor_si256(c2[13889],_mm256_xor_si256(c2[687],_mm256_xor_si256(c2[3324],_mm256_xor_si256(c2[8165],_mm256_xor_si256(c2[10362],_mm256_xor_si256(c2[10802],_mm256_xor_si256(c2[3327],_mm256_xor_si256(c2[11701],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[12586],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[1144],_mm256_xor_si256(c2[11262],_mm256_xor_si256(c2[13469],_mm256_xor_si256(c2[13909],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[3367],_mm256_xor_si256(c2[7326],_mm256_xor_si256(c2[9523],_mm256_xor_si256(c2[9963],_mm256_xor_si256(c2[4689],_mm256_xor_si256(c2[6886],_mm256_xor_si256(c2[7326],_mm256_xor_si256(c2[13946],_mm256_xor_si256(c2[2504],_mm256_xor_si256(c2[13945],_mm256_xor_si256(c2[2503],_mm256_xor_si256(c2[1187],_mm256_xor_si256(c2[3384],_mm256_xor_si256(c2[3824],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[4288],_mm256_xor_si256(c2[13089],_mm256_xor_si256(c2[1647],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[2966],_mm256_xor_si256(c2[3406],_mm256_xor_si256(c2[3849],_mm256_xor_si256(c2[3860],_mm256_xor_si256(c2[6507],_mm256_xor_si256(c2[6063],_mm256_xor_si256(c2[8260],_mm256_xor_si256(c2[8700],_mm256_xor_si256(c2[7828],_mm256_xor_si256(c2[10025],_mm256_xor_si256(c2[10465],_mm256_xor_si256(c2[3001],_mm256_xor_si256(c2[5648],_mm256_xor_si256(c2[5205],_mm256_xor_si256(c2[7402],_mm256_xor_si256(c2[7842],_mm256_xor_si256(c2[1240],_mm256_xor_si256(c2[3447],_mm256_xor_si256(c2[3887],_mm256_xor_si256(c2[2588],_mm256_xor_si256(c2[5225],_mm256_xor_si256(c2[11385],_mm256_xor_si256(c2[14022],_mm256_xor_si256(c2[12264],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[822],_mm256_xor_si256(c2[6543],_mm256_xor_si256(c2[8323],_mm256_xor_si256(c2[10960],_mm256_xor_si256(c2[4366],_mm256_xor_si256(c2[6563],_mm256_xor_si256(c2[7003],_mm256_xor_si256(c2[9207],_mm256_xor_si256(c2[11404],_mm256_xor_si256(c2[11844],_mm256_xor_si256(c2[9668],_mm256_xor_si256(c2[12305],_mm256_xor_si256(c2[1307],_mm256_xor_si256(c2[3944],_mm256_xor_si256(c2[11429],_mm256_xor_si256(c2[13626],_mm256_xor_si256(c2[14066],c2[6148]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[80]=simde_mm256_xor_si256(c2[13209],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[4404],simde_mm256_xor_si256(c2[11009],simde_mm256_xor_si256(c2[13206],simde_mm256_xor_si256(c2[13646],simde_mm256_xor_si256(c2[7484],simde_mm256_xor_si256(c2[9681],simde_mm256_xor_si256(c2[10121],simde_mm256_xor_si256(c2[3089],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[4425],simde_mm256_xor_si256(c2[12347],simde_mm256_xor_si256(c2[465],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[2224],simde_mm256_xor_si256(c2[4421],simde_mm256_xor_si256(c2[4861],simde_mm256_xor_si256(c2[2224],simde_mm256_xor_si256(c2[12360],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[11926],simde_mm256_xor_si256(c2[484],simde_mm256_xor_si256(c2[2240],simde_mm256_xor_si256(c2[4447],simde_mm256_xor_si256(c2[4887],simde_mm256_xor_si256(c2[2709],simde_mm256_xor_si256(c2[5346],simde_mm256_xor_si256(c2[7108],simde_mm256_xor_si256(c2[9745],simde_mm256_xor_si256(c2[8428],simde_mm256_xor_si256(c2[10625],simde_mm256_xor_si256(c2[11065],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[11525],simde_mm256_xor_si256(c2[83],simde_mm256_xor_si256(c2[5806],simde_mm256_xor_si256(c2[8003],simde_mm256_xor_si256(c2[8443],simde_mm256_xor_si256(c2[11523],simde_mm256_xor_si256(c2[13720],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[2305],simde_mm256_xor_si256(c2[4942],simde_mm256_xor_si256(c2[9782],simde_mm256_xor_si256(c2[12429],simde_mm256_xor_si256(c2[11988],simde_mm256_xor_si256(c2[106],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[12448],simde_mm256_xor_si256(c2[1006],simde_mm256_xor_si256(c2[10684],simde_mm256_xor_si256(c2[12881],simde_mm256_xor_si256(c2[13321],simde_mm256_xor_si256(c2[8049],simde_mm256_xor_si256(c2[10246],simde_mm256_xor_si256(c2[10686],simde_mm256_xor_si256(c2[11581],simde_mm256_xor_si256(c2[149],simde_mm256_xor_si256(c2[13783],simde_mm256_xor_si256(c2[1901],simde_mm256_xor_si256(c2[2341],simde_mm256_xor_si256(c2[1903],simde_mm256_xor_si256(c2[4100],simde_mm256_xor_si256(c2[4540],simde_mm256_xor_si256(c2[11167],simde_mm256_xor_si256(c2[13804],simde_mm256_xor_si256(c2[12923],simde_mm256_xor_si256(c2[1041],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[4127],simde_mm256_xor_si256(c2[4567],simde_mm256_xor_si256(c2[1948],simde_mm256_xor_si256(c2[4585],simde_mm256_xor_si256(c2[2380],simde_mm256_xor_si256(c2[5027],simde_mm256_xor_si256(c2[12509],simde_mm256_xor_si256(c2[627],simde_mm256_xor_si256(c2[1067],simde_mm256_xor_si256(c2[10322],simde_mm256_xor_si256(c2[12969],simde_mm256_xor_si256(c2[13840],simde_mm256_xor_si256(c2[1968],simde_mm256_xor_si256(c2[2408],simde_mm256_xor_si256(c2[6366],simde_mm256_xor_si256(c2[8563],simde_mm256_xor_si256(c2[9003],simde_mm256_xor_si256(c2[5504],simde_mm256_xor_si256(c2[8141],simde_mm256_xor_si256(c2[9901],simde_mm256_xor_si256(c2[12548],simde_mm256_xor_si256(c2[1987],simde_mm256_xor_si256(c2[4184],simde_mm256_xor_si256(c2[4624],simde_mm256_xor_si256(c2[11242],simde_mm256_xor_si256(c2[13889],simde_mm256_xor_si256(c2[687],simde_mm256_xor_si256(c2[3324],simde_mm256_xor_si256(c2[8165],simde_mm256_xor_si256(c2[10362],simde_mm256_xor_si256(c2[10802],simde_mm256_xor_si256(c2[3327],simde_mm256_xor_si256(c2[11701],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[12586],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[1144],simde_mm256_xor_si256(c2[11262],simde_mm256_xor_si256(c2[13469],simde_mm256_xor_si256(c2[13909],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[3367],simde_mm256_xor_si256(c2[7326],simde_mm256_xor_si256(c2[9523],simde_mm256_xor_si256(c2[9963],simde_mm256_xor_si256(c2[4689],simde_mm256_xor_si256(c2[6886],simde_mm256_xor_si256(c2[7326],simde_mm256_xor_si256(c2[13946],simde_mm256_xor_si256(c2[2504],simde_mm256_xor_si256(c2[13945],simde_mm256_xor_si256(c2[2503],simde_mm256_xor_si256(c2[1187],simde_mm256_xor_si256(c2[3384],simde_mm256_xor_si256(c2[3824],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[4288],simde_mm256_xor_si256(c2[13089],simde_mm256_xor_si256(c2[1647],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[2966],simde_mm256_xor_si256(c2[3406],simde_mm256_xor_si256(c2[3849],simde_mm256_xor_si256(c2[3860],simde_mm256_xor_si256(c2[6507],simde_mm256_xor_si256(c2[6063],simde_mm256_xor_si256(c2[8260],simde_mm256_xor_si256(c2[8700],simde_mm256_xor_si256(c2[7828],simde_mm256_xor_si256(c2[10025],simde_mm256_xor_si256(c2[10465],simde_mm256_xor_si256(c2[3001],simde_mm256_xor_si256(c2[5648],simde_mm256_xor_si256(c2[5205],simde_mm256_xor_si256(c2[7402],simde_mm256_xor_si256(c2[7842],simde_mm256_xor_si256(c2[1240],simde_mm256_xor_si256(c2[3447],simde_mm256_xor_si256(c2[3887],simde_mm256_xor_si256(c2[2588],simde_mm256_xor_si256(c2[5225],simde_mm256_xor_si256(c2[11385],simde_mm256_xor_si256(c2[14022],simde_mm256_xor_si256(c2[12264],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[822],simde_mm256_xor_si256(c2[6543],simde_mm256_xor_si256(c2[8323],simde_mm256_xor_si256(c2[10960],simde_mm256_xor_si256(c2[4366],simde_mm256_xor_si256(c2[6563],simde_mm256_xor_si256(c2[7003],simde_mm256_xor_si256(c2[9207],simde_mm256_xor_si256(c2[11404],simde_mm256_xor_si256(c2[11844],simde_mm256_xor_si256(c2[9668],simde_mm256_xor_si256(c2[12305],simde_mm256_xor_si256(c2[1307],simde_mm256_xor_si256(c2[3944],simde_mm256_xor_si256(c2[11429],simde_mm256_xor_si256(c2[13626],simde_mm256_xor_si256(c2[14066],c2[6148]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[90]=_mm256_xor_si256(c2[12765],_mm256_xor_si256(c2[8827],_mm256_xor_si256(c2[1961],_mm256_xor_si256(c2[13428],_mm256_xor_si256(c2[10384],_mm256_xor_si256(c2[5188],_mm256_xor_si256(c2[3444],c2[7886])))))));
+     d2[90]=simde_mm256_xor_si256(c2[12765],simde_mm256_xor_si256(c2[8827],simde_mm256_xor_si256(c2[1961],simde_mm256_xor_si256(c2[13428],simde_mm256_xor_si256(c2[10384],simde_mm256_xor_si256(c2[5188],simde_mm256_xor_si256(c2[3444],c2[7886])))))));
 
 //row: 10
-     d2[100]=_mm256_xor_si256(c2[6180],_mm256_xor_si256(c2[7082],_mm256_xor_si256(c2[8446],_mm256_xor_si256(c2[4982],_mm256_xor_si256(c2[605],c2[10409])))));
+     d2[100]=simde_mm256_xor_si256(c2[6180],simde_mm256_xor_si256(c2[7082],simde_mm256_xor_si256(c2[8446],simde_mm256_xor_si256(c2[4982],simde_mm256_xor_si256(c2[605],c2[10409])))));
 
 //row: 11
-     d2[110]=_mm256_xor_si256(c2[8803],_mm256_xor_si256(c2[441],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[11440],_mm256_xor_si256(c2[3528],_mm256_xor_si256(c2[6603],_mm256_xor_si256(c2[12760],_mm256_xor_si256(c2[3088],_mm256_xor_si256(c2[9245],_mm256_xor_si256(c2[7040],_mm256_xor_si256(c2[11461],_mm256_xor_si256(c2[3109],_mm256_xor_si256(c2[3549],_mm256_xor_si256(c2[7941],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[11907],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[8384],_mm256_xor_si256(c2[7964],_mm256_xor_si256(c2[13681],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[7520],_mm256_xor_si256(c2[13687],_mm256_xor_si256(c2[11923],_mm256_xor_si256(c2[4001],_mm256_xor_si256(c2[12382],_mm256_xor_si256(c2[4020],_mm256_xor_si256(c2[4460],_mm256_xor_si256(c2[2702],_mm256_xor_si256(c2[8869],_mm256_xor_si256(c2[4022],_mm256_xor_si256(c2[10189],_mm256_xor_si256(c2[7129],_mm256_xor_si256(c2[13286],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[7567],_mm256_xor_si256(c2[7127],_mm256_xor_si256(c2[13284],_mm256_xor_si256(c2[11988],_mm256_xor_si256(c2[3626],_mm256_xor_si256(c2[4066],_mm256_xor_si256(c2[5386],_mm256_xor_si256(c2[11543],_mm256_xor_si256(c2[7582],_mm256_xor_si256(c2[13749],_mm256_xor_si256(c2[8042],_mm256_xor_si256(c2[13769],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[6288],_mm256_xor_si256(c2[12445],_mm256_xor_si256(c2[3643],_mm256_xor_si256(c2[9800],_mm256_xor_si256(c2[7185],_mm256_xor_si256(c2[13342],_mm256_xor_si256(c2[9387],_mm256_xor_si256(c2[1465],_mm256_xor_si256(c2[11586],_mm256_xor_si256(c2[3664],_mm256_xor_si256(c2[6761],_mm256_xor_si256(c2[12928],_mm256_xor_si256(c2[8527],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[11603],_mm256_xor_si256(c2[3681],_mm256_xor_si256(c2[11621],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[3709],_mm256_xor_si256(c2[12063],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[8103],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[5926],_mm256_xor_si256(c2[11643],_mm256_xor_si256(c2[12083],_mm256_xor_si256(c2[9444],_mm256_xor_si256(c2[1522],_mm256_xor_si256(c2[1960],_mm256_xor_si256(c2[8127],_mm256_xor_si256(c2[1108],_mm256_xor_si256(c2[6825],_mm256_xor_si256(c2[7265],_mm256_xor_si256(c2[5505],_mm256_xor_si256(c2[11662],_mm256_xor_si256(c2[11660],_mm256_xor_si256(c2[3748],_mm256_xor_si256(c2[6846],_mm256_xor_si256(c2[12563],_mm256_xor_si256(c2[13003],_mm256_xor_si256(c2[10360],_mm256_xor_si256(c2[2448],_mm256_xor_si256(c2[3769],_mm256_xor_si256(c2[9926],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[7305],_mm256_xor_si256(c2[13022],_mm256_xor_si256(c2[13462],_mm256_xor_si256(c2[8180],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[13023],_mm256_xor_si256(c2[10403],_mm256_xor_si256(c2[2481],_mm256_xor_si256(c2[2920],_mm256_xor_si256(c2[9087],_mm256_xor_si256(c2[283],_mm256_xor_si256(c2[6440],_mm256_xor_si256(c2[9540],_mm256_xor_si256(c2[1188],_mm256_xor_si256(c2[1628],_mm256_xor_si256(c2[9549],_mm256_xor_si256(c2[1627],_mm256_xor_si256(c2[10860],_mm256_xor_si256(c2[2948],_mm256_xor_si256(c2[11324],_mm256_xor_si256(c2[2962],_mm256_xor_si256(c2[3402],_mm256_xor_si256(c2[8683],_mm256_xor_si256(c2[761],_mm256_xor_si256(c2[10442],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[13543],_mm256_xor_si256(c2[5621],_mm256_xor_si256(c2[1667],_mm256_xor_si256(c2[7824],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[9589],_mm256_xor_si256(c2[12684],_mm256_xor_si256(c2[4322],_mm256_xor_si256(c2[4762],_mm256_xor_si256(c2[809],_mm256_xor_si256(c2[6966],_mm256_xor_si256(c2[10923],_mm256_xor_si256(c2[3001],_mm256_xor_si256(c2[12261],_mm256_xor_si256(c2[3909],_mm256_xor_si256(c2[4349],_mm256_xor_si256(c2[6989],_mm256_xor_si256(c2[13146],_mm256_xor_si256(c2[7868],_mm256_xor_si256(c2[14025],_mm256_xor_si256(c2[3927],_mm256_xor_si256(c2[9644],_mm256_xor_si256(c2[10084],_mm256_xor_si256(c2[14049],_mm256_xor_si256(c2[6127],_mm256_xor_si256(c2[4801],_mm256_xor_si256(c2[10968],_mm256_xor_si256(c2[5262],_mm256_xor_si256(c2[10989],_mm256_xor_si256(c2[11429],_mm256_xor_si256(c2[10980],_mm256_xor_si256(c2[3068],_mm256_xor_si256(c2[7023],_mm256_xor_si256(c2[13180],c2[7905])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[110]=simde_mm256_xor_si256(c2[8803],simde_mm256_xor_si256(c2[441],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[11440],simde_mm256_xor_si256(c2[3528],simde_mm256_xor_si256(c2[6603],simde_mm256_xor_si256(c2[12760],simde_mm256_xor_si256(c2[3088],simde_mm256_xor_si256(c2[9245],simde_mm256_xor_si256(c2[7040],simde_mm256_xor_si256(c2[11461],simde_mm256_xor_si256(c2[3109],simde_mm256_xor_si256(c2[3549],simde_mm256_xor_si256(c2[7941],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[11907],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[8384],simde_mm256_xor_si256(c2[7964],simde_mm256_xor_si256(c2[13681],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[7520],simde_mm256_xor_si256(c2[13687],simde_mm256_xor_si256(c2[11923],simde_mm256_xor_si256(c2[4001],simde_mm256_xor_si256(c2[12382],simde_mm256_xor_si256(c2[4020],simde_mm256_xor_si256(c2[4460],simde_mm256_xor_si256(c2[2702],simde_mm256_xor_si256(c2[8869],simde_mm256_xor_si256(c2[4022],simde_mm256_xor_si256(c2[10189],simde_mm256_xor_si256(c2[7129],simde_mm256_xor_si256(c2[13286],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[7567],simde_mm256_xor_si256(c2[7127],simde_mm256_xor_si256(c2[13284],simde_mm256_xor_si256(c2[11988],simde_mm256_xor_si256(c2[3626],simde_mm256_xor_si256(c2[4066],simde_mm256_xor_si256(c2[5386],simde_mm256_xor_si256(c2[11543],simde_mm256_xor_si256(c2[7582],simde_mm256_xor_si256(c2[13749],simde_mm256_xor_si256(c2[8042],simde_mm256_xor_si256(c2[13769],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[6288],simde_mm256_xor_si256(c2[12445],simde_mm256_xor_si256(c2[3643],simde_mm256_xor_si256(c2[9800],simde_mm256_xor_si256(c2[7185],simde_mm256_xor_si256(c2[13342],simde_mm256_xor_si256(c2[9387],simde_mm256_xor_si256(c2[1465],simde_mm256_xor_si256(c2[11586],simde_mm256_xor_si256(c2[3664],simde_mm256_xor_si256(c2[6761],simde_mm256_xor_si256(c2[12928],simde_mm256_xor_si256(c2[8527],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[11603],simde_mm256_xor_si256(c2[3681],simde_mm256_xor_si256(c2[11621],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[3709],simde_mm256_xor_si256(c2[12063],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[8103],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[5926],simde_mm256_xor_si256(c2[11643],simde_mm256_xor_si256(c2[12083],simde_mm256_xor_si256(c2[9444],simde_mm256_xor_si256(c2[1522],simde_mm256_xor_si256(c2[1960],simde_mm256_xor_si256(c2[8127],simde_mm256_xor_si256(c2[1108],simde_mm256_xor_si256(c2[6825],simde_mm256_xor_si256(c2[7265],simde_mm256_xor_si256(c2[5505],simde_mm256_xor_si256(c2[11662],simde_mm256_xor_si256(c2[11660],simde_mm256_xor_si256(c2[3748],simde_mm256_xor_si256(c2[6846],simde_mm256_xor_si256(c2[12563],simde_mm256_xor_si256(c2[13003],simde_mm256_xor_si256(c2[10360],simde_mm256_xor_si256(c2[2448],simde_mm256_xor_si256(c2[3769],simde_mm256_xor_si256(c2[9926],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[7305],simde_mm256_xor_si256(c2[13022],simde_mm256_xor_si256(c2[13462],simde_mm256_xor_si256(c2[8180],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[13023],simde_mm256_xor_si256(c2[10403],simde_mm256_xor_si256(c2[2481],simde_mm256_xor_si256(c2[2920],simde_mm256_xor_si256(c2[9087],simde_mm256_xor_si256(c2[283],simde_mm256_xor_si256(c2[6440],simde_mm256_xor_si256(c2[9540],simde_mm256_xor_si256(c2[1188],simde_mm256_xor_si256(c2[1628],simde_mm256_xor_si256(c2[9549],simde_mm256_xor_si256(c2[1627],simde_mm256_xor_si256(c2[10860],simde_mm256_xor_si256(c2[2948],simde_mm256_xor_si256(c2[11324],simde_mm256_xor_si256(c2[2962],simde_mm256_xor_si256(c2[3402],simde_mm256_xor_si256(c2[8683],simde_mm256_xor_si256(c2[761],simde_mm256_xor_si256(c2[10442],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[13543],simde_mm256_xor_si256(c2[5621],simde_mm256_xor_si256(c2[1667],simde_mm256_xor_si256(c2[7824],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[9589],simde_mm256_xor_si256(c2[12684],simde_mm256_xor_si256(c2[4322],simde_mm256_xor_si256(c2[4762],simde_mm256_xor_si256(c2[809],simde_mm256_xor_si256(c2[6966],simde_mm256_xor_si256(c2[10923],simde_mm256_xor_si256(c2[3001],simde_mm256_xor_si256(c2[12261],simde_mm256_xor_si256(c2[3909],simde_mm256_xor_si256(c2[4349],simde_mm256_xor_si256(c2[6989],simde_mm256_xor_si256(c2[13146],simde_mm256_xor_si256(c2[7868],simde_mm256_xor_si256(c2[14025],simde_mm256_xor_si256(c2[3927],simde_mm256_xor_si256(c2[9644],simde_mm256_xor_si256(c2[10084],simde_mm256_xor_si256(c2[14049],simde_mm256_xor_si256(c2[6127],simde_mm256_xor_si256(c2[4801],simde_mm256_xor_si256(c2[10968],simde_mm256_xor_si256(c2[5262],simde_mm256_xor_si256(c2[10989],simde_mm256_xor_si256(c2[11429],simde_mm256_xor_si256(c2[10980],simde_mm256_xor_si256(c2[3068],simde_mm256_xor_si256(c2[7023],simde_mm256_xor_si256(c2[13180],c2[7905])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[120]=_mm256_xor_si256(c2[2207],_mm256_xor_si256(c2[4867],_mm256_xor_si256(c2[4165],_mm256_xor_si256(c2[7261],_mm256_xor_si256(c2[4223],c2[9161])))));
+     d2[120]=simde_mm256_xor_si256(c2[2207],simde_mm256_xor_si256(c2[4867],simde_mm256_xor_si256(c2[4165],simde_mm256_xor_si256(c2[7261],simde_mm256_xor_si256(c2[4223],c2[9161])))));
 
 //row: 13
-     d2[130]=_mm256_xor_si256(c2[6163],_mm256_xor_si256(c2[6603],_mm256_xor_si256(c2[9240],_mm256_xor_si256(c2[4403],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[8821],_mm256_xor_si256(c2[9261],_mm256_xor_si256(c2[5741],_mm256_xor_si256(c2[9707],_mm256_xor_si256(c2[5324],_mm256_xor_si256(c2[5764],_mm256_xor_si256(c2[5320],_mm256_xor_si256(c2[9723],_mm256_xor_si256(c2[9742],_mm256_xor_si256(c2[10182],_mm256_xor_si256(c2[502],_mm256_xor_si256(c2[1822],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[4929],_mm256_xor_si256(c2[13289],_mm256_xor_si256(c2[4927],_mm256_xor_si256(c2[9348],_mm256_xor_si256(c2[9788],_mm256_xor_si256(c2[3186],_mm256_xor_si256(c2[5382],_mm256_xor_si256(c2[5402],_mm256_xor_si256(c2[5842],_mm256_xor_si256(c2[4088],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[4985],_mm256_xor_si256(c2[7187],_mm256_xor_si256(c2[9386],_mm256_xor_si256(c2[6749],_mm256_xor_si256(c2[4561],_mm256_xor_si256(c2[6327],_mm256_xor_si256(c2[9403],_mm256_xor_si256(c2[8981],_mm256_xor_si256(c2[9421],_mm256_xor_si256(c2[9863],_mm256_xor_si256(c2[5903],_mm256_xor_si256(c2[3286],_mm256_xor_si256(c2[3726],_mm256_xor_si256(c2[7244],_mm256_xor_si256(c2[13849],_mm256_xor_si256(c2[12547],_mm256_xor_si256(c2[12987],_mm256_xor_si256(c2[3305],_mm256_xor_si256(c2[9460],_mm256_xor_si256(c2[4206],_mm256_xor_si256(c2[4646],_mm256_xor_si256(c2[8160],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[4665],_mm256_xor_si256(c2[5105],_mm256_xor_si256(c2[5980],_mm256_xor_si256(c2[4666],_mm256_xor_si256(c2[8203],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[12162],_mm256_xor_si256(c2[6900],_mm256_xor_si256(c2[7340],_mm256_xor_si256(c2[7349],_mm256_xor_si256(c2[8660],_mm256_xor_si256(c2[8684],_mm256_xor_si256(c2[9124],_mm256_xor_si256(c2[6483],_mm256_xor_si256(c2[8242],_mm256_xor_si256(c2[11343],_mm256_xor_si256(c2[13546],_mm256_xor_si256(c2[1222],_mm256_xor_si256(c2[10044],_mm256_xor_si256(c2[10484],_mm256_xor_si256(c2[12688],_mm256_xor_si256(c2[8723],_mm256_xor_si256(c2[9621],_mm256_xor_si256(c2[10061],_mm256_xor_si256(c2[4789],_mm256_xor_si256(c2[5668],_mm256_xor_si256(c2[1287],_mm256_xor_si256(c2[1727],_mm256_xor_si256(c2[11849],_mm256_xor_si256(c2[2601],_mm256_xor_si256(c2[405],_mm256_xor_si256(c2[2622],_mm256_xor_si256(c2[3062],_mm256_xor_si256(c2[8780],c2[4823])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[130]=simde_mm256_xor_si256(c2[6163],simde_mm256_xor_si256(c2[6603],simde_mm256_xor_si256(c2[9240],simde_mm256_xor_si256(c2[4403],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[8821],simde_mm256_xor_si256(c2[9261],simde_mm256_xor_si256(c2[5741],simde_mm256_xor_si256(c2[9707],simde_mm256_xor_si256(c2[5324],simde_mm256_xor_si256(c2[5764],simde_mm256_xor_si256(c2[5320],simde_mm256_xor_si256(c2[9723],simde_mm256_xor_si256(c2[9742],simde_mm256_xor_si256(c2[10182],simde_mm256_xor_si256(c2[502],simde_mm256_xor_si256(c2[1822],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[4929],simde_mm256_xor_si256(c2[13289],simde_mm256_xor_si256(c2[4927],simde_mm256_xor_si256(c2[9348],simde_mm256_xor_si256(c2[9788],simde_mm256_xor_si256(c2[3186],simde_mm256_xor_si256(c2[5382],simde_mm256_xor_si256(c2[5402],simde_mm256_xor_si256(c2[5842],simde_mm256_xor_si256(c2[4088],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[4985],simde_mm256_xor_si256(c2[7187],simde_mm256_xor_si256(c2[9386],simde_mm256_xor_si256(c2[6749],simde_mm256_xor_si256(c2[4561],simde_mm256_xor_si256(c2[6327],simde_mm256_xor_si256(c2[9403],simde_mm256_xor_si256(c2[8981],simde_mm256_xor_si256(c2[9421],simde_mm256_xor_si256(c2[9863],simde_mm256_xor_si256(c2[5903],simde_mm256_xor_si256(c2[3286],simde_mm256_xor_si256(c2[3726],simde_mm256_xor_si256(c2[7244],simde_mm256_xor_si256(c2[13849],simde_mm256_xor_si256(c2[12547],simde_mm256_xor_si256(c2[12987],simde_mm256_xor_si256(c2[3305],simde_mm256_xor_si256(c2[9460],simde_mm256_xor_si256(c2[4206],simde_mm256_xor_si256(c2[4646],simde_mm256_xor_si256(c2[8160],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[4665],simde_mm256_xor_si256(c2[5105],simde_mm256_xor_si256(c2[5980],simde_mm256_xor_si256(c2[4666],simde_mm256_xor_si256(c2[8203],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[12162],simde_mm256_xor_si256(c2[6900],simde_mm256_xor_si256(c2[7340],simde_mm256_xor_si256(c2[7349],simde_mm256_xor_si256(c2[8660],simde_mm256_xor_si256(c2[8684],simde_mm256_xor_si256(c2[9124],simde_mm256_xor_si256(c2[6483],simde_mm256_xor_si256(c2[8242],simde_mm256_xor_si256(c2[11343],simde_mm256_xor_si256(c2[13546],simde_mm256_xor_si256(c2[1222],simde_mm256_xor_si256(c2[10044],simde_mm256_xor_si256(c2[10484],simde_mm256_xor_si256(c2[12688],simde_mm256_xor_si256(c2[8723],simde_mm256_xor_si256(c2[9621],simde_mm256_xor_si256(c2[10061],simde_mm256_xor_si256(c2[4789],simde_mm256_xor_si256(c2[5668],simde_mm256_xor_si256(c2[1287],simde_mm256_xor_si256(c2[1727],simde_mm256_xor_si256(c2[11849],simde_mm256_xor_si256(c2[2601],simde_mm256_xor_si256(c2[405],simde_mm256_xor_si256(c2[2622],simde_mm256_xor_si256(c2[3062],simde_mm256_xor_si256(c2[8780],c2[4823])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[140]=_mm256_xor_si256(c2[6162],_mm256_xor_si256(c2[5089],_mm256_xor_si256(c2[9981],_mm256_xor_si256(c2[13081],_mm256_xor_si256(c2[8705],c2[1308])))));
+     d2[140]=simde_mm256_xor_si256(c2[6162],simde_mm256_xor_si256(c2[5089],simde_mm256_xor_si256(c2[9981],simde_mm256_xor_si256(c2[13081],simde_mm256_xor_si256(c2[8705],c2[1308])))));
 
 //row: 15
-     d2[150]=_mm256_xor_si256(c2[12763],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[10563],_mm256_xor_si256(c2[6608],_mm256_xor_si256(c2[7048],_mm256_xor_si256(c2[2207],_mm256_xor_si256(c2[1342],_mm256_xor_si256(c2[11901],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[909],_mm256_xor_si256(c2[11924],_mm256_xor_si256(c2[11480],_mm256_xor_si256(c2[1804],_mm256_xor_si256(c2[2263],_mm256_xor_si256(c2[6662],_mm256_xor_si256(c2[7542],_mm256_xor_si256(c2[7982],_mm256_xor_si256(c2[11089],_mm256_xor_si256(c2[5360],_mm256_xor_si256(c2[10647],_mm256_xor_si256(c2[11087],_mm256_xor_si256(c2[1869],_mm256_xor_si256(c2[9346],_mm256_xor_si256(c2[11542],_mm256_xor_si256(c2[12002],_mm256_xor_si256(c2[10248],_mm256_xor_si256(c2[7163],_mm256_xor_si256(c2[7603],_mm256_xor_si256(c2[11145],_mm256_xor_si256(c2[13347],_mm256_xor_si256(c2[1027],_mm256_xor_si256(c2[1467],_mm256_xor_si256(c2[10721],_mm256_xor_si256(c2[12487],_mm256_xor_si256(c2[1044],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[1944],_mm256_xor_si256(c2[12063],_mm256_xor_si256(c2[9886],_mm256_xor_si256(c2[13404],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[5920],_mm256_xor_si256(c2[12521],_mm256_xor_si256(c2[5068],_mm256_xor_si256(c2[9465],_mm256_xor_si256(c2[1101],_mm256_xor_si256(c2[1541],_mm256_xor_si256(c2[10806],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[7289],_mm256_xor_si256(c2[7729],_mm256_xor_si256(c2[11265],_mm256_xor_si256(c2[12140],_mm256_xor_si256(c2[10386],_mm256_xor_si256(c2[10826],_mm256_xor_si256(c2[1144],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[6880],_mm256_xor_si256(c2[3803],_mm256_xor_si256(c2[4243],_mm256_xor_si256(c2[13500],_mm256_xor_si256(c2[13509],_mm256_xor_si256(c2[741],_mm256_xor_si256(c2[1205],_mm256_xor_si256(c2[12643],_mm256_xor_si256(c2[13962],_mm256_xor_si256(c2[323],_mm256_xor_si256(c2[3424],_mm256_xor_si256(c2[5627],_mm256_xor_si256(c2[6942],_mm256_xor_si256(c2[7382],_mm256_xor_si256(c2[2565],_mm256_xor_si256(c2[4769],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[10925],_mm256_xor_si256(c2[2142],_mm256_xor_si256(c2[10949],_mm256_xor_si256(c2[11828],_mm256_xor_si256(c2[7887],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[8321],_mm256_xor_si256(c2[8761],_mm256_xor_si256(c2[9222],_mm256_xor_si256(c2[861],_mm256_xor_si256(c2[10543],c2[10983]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[150]=simde_mm256_xor_si256(c2[12763],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[10563],simde_mm256_xor_si256(c2[6608],simde_mm256_xor_si256(c2[7048],simde_mm256_xor_si256(c2[2207],simde_mm256_xor_si256(c2[1342],simde_mm256_xor_si256(c2[11901],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[909],simde_mm256_xor_si256(c2[11924],simde_mm256_xor_si256(c2[11480],simde_mm256_xor_si256(c2[1804],simde_mm256_xor_si256(c2[2263],simde_mm256_xor_si256(c2[6662],simde_mm256_xor_si256(c2[7542],simde_mm256_xor_si256(c2[7982],simde_mm256_xor_si256(c2[11089],simde_mm256_xor_si256(c2[5360],simde_mm256_xor_si256(c2[10647],simde_mm256_xor_si256(c2[11087],simde_mm256_xor_si256(c2[1869],simde_mm256_xor_si256(c2[9346],simde_mm256_xor_si256(c2[11542],simde_mm256_xor_si256(c2[12002],simde_mm256_xor_si256(c2[10248],simde_mm256_xor_si256(c2[7163],simde_mm256_xor_si256(c2[7603],simde_mm256_xor_si256(c2[11145],simde_mm256_xor_si256(c2[13347],simde_mm256_xor_si256(c2[1027],simde_mm256_xor_si256(c2[1467],simde_mm256_xor_si256(c2[10721],simde_mm256_xor_si256(c2[12487],simde_mm256_xor_si256(c2[1044],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[1944],simde_mm256_xor_si256(c2[12063],simde_mm256_xor_si256(c2[9886],simde_mm256_xor_si256(c2[13404],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[5920],simde_mm256_xor_si256(c2[12521],simde_mm256_xor_si256(c2[5068],simde_mm256_xor_si256(c2[9465],simde_mm256_xor_si256(c2[1101],simde_mm256_xor_si256(c2[1541],simde_mm256_xor_si256(c2[10806],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[7289],simde_mm256_xor_si256(c2[7729],simde_mm256_xor_si256(c2[11265],simde_mm256_xor_si256(c2[12140],simde_mm256_xor_si256(c2[10386],simde_mm256_xor_si256(c2[10826],simde_mm256_xor_si256(c2[1144],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[6880],simde_mm256_xor_si256(c2[3803],simde_mm256_xor_si256(c2[4243],simde_mm256_xor_si256(c2[13500],simde_mm256_xor_si256(c2[13509],simde_mm256_xor_si256(c2[741],simde_mm256_xor_si256(c2[1205],simde_mm256_xor_si256(c2[12643],simde_mm256_xor_si256(c2[13962],simde_mm256_xor_si256(c2[323],simde_mm256_xor_si256(c2[3424],simde_mm256_xor_si256(c2[5627],simde_mm256_xor_si256(c2[6942],simde_mm256_xor_si256(c2[7382],simde_mm256_xor_si256(c2[2565],simde_mm256_xor_si256(c2[4769],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[10925],simde_mm256_xor_si256(c2[2142],simde_mm256_xor_si256(c2[10949],simde_mm256_xor_si256(c2[11828],simde_mm256_xor_si256(c2[7887],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[8321],simde_mm256_xor_si256(c2[8761],simde_mm256_xor_si256(c2[9222],simde_mm256_xor_si256(c2[861],simde_mm256_xor_si256(c2[10543],c2[10983]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[160]=_mm256_xor_si256(c2[9241],_mm256_xor_si256(c2[11888],_mm256_xor_si256(c2[7041],_mm256_xor_si256(c2[3526],_mm256_xor_si256(c2[11909],_mm256_xor_si256(c2[8389],_mm256_xor_si256(c2[12345],_mm256_xor_si256(c2[2222],_mm256_xor_si256(c2[8402],_mm256_xor_si256(c2[7968],_mm256_xor_si256(c2[12361],_mm256_xor_si256(c2[12820],_mm256_xor_si256(c2[3140],_mm256_xor_si256(c2[4460],_mm256_xor_si256(c2[5344],_mm256_xor_si256(c2[7567],_mm256_xor_si256(c2[1848],_mm256_xor_si256(c2[7565],_mm256_xor_si256(c2[12426],_mm256_xor_si256(c2[5824],_mm256_xor_si256(c2[8020],_mm256_xor_si256(c2[8480],_mm256_xor_si256(c2[6726],_mm256_xor_si256(c2[4081],_mm256_xor_si256(c2[7623],_mm256_xor_si256(c2[9825],_mm256_xor_si256(c2[12024],_mm256_xor_si256(c2[7209],_mm256_xor_si256(c2[8965],_mm256_xor_si256(c2[12041],_mm256_xor_si256(c2[12069],_mm256_xor_si256(c2[12501],_mm256_xor_si256(c2[8541],_mm256_xor_si256(c2[6364],_mm256_xor_si256(c2[9882],_mm256_xor_si256(c2[2408],_mm256_xor_si256(c2[1546],_mm256_xor_si256(c2[5943],_mm256_xor_si256(c2[12108],_mm256_xor_si256(c2[5941],_mm256_xor_si256(c2[7284],_mm256_xor_si256(c2[10808],_mm256_xor_si256(c2[4207],_mm256_xor_si256(c2[7743],_mm256_xor_si256(c2[8628],_mm256_xor_si256(c2[7304],_mm256_xor_si256(c2[10841],_mm256_xor_si256(c2[3368],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[9988],_mm256_xor_si256(c2[9987],_mm256_xor_si256(c2[11308],_mm256_xor_si256(c2[11762],_mm256_xor_si256(c2[9121],_mm256_xor_si256(c2[10880],_mm256_xor_si256(c2[13981],_mm256_xor_si256(c2[2105],_mm256_xor_si256(c2[3860],_mm256_xor_si256(c2[13122],_mm256_xor_si256(c2[1247],_mm256_xor_si256(c2[11361],_mm256_xor_si256(c2[12709],_mm256_xor_si256(c2[7427],_mm256_xor_si256(c2[8306],_mm256_xor_si256(c2[4365],_mm256_xor_si256(c2[408],_mm256_xor_si256(c2[5249],_mm256_xor_si256(c2[8763],_mm256_xor_si256(c2[5700],_mm256_xor_si256(c2[11428],c2[7461]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[160]=simde_mm256_xor_si256(c2[9241],simde_mm256_xor_si256(c2[11888],simde_mm256_xor_si256(c2[7041],simde_mm256_xor_si256(c2[3526],simde_mm256_xor_si256(c2[11909],simde_mm256_xor_si256(c2[8389],simde_mm256_xor_si256(c2[12345],simde_mm256_xor_si256(c2[2222],simde_mm256_xor_si256(c2[8402],simde_mm256_xor_si256(c2[7968],simde_mm256_xor_si256(c2[12361],simde_mm256_xor_si256(c2[12820],simde_mm256_xor_si256(c2[3140],simde_mm256_xor_si256(c2[4460],simde_mm256_xor_si256(c2[5344],simde_mm256_xor_si256(c2[7567],simde_mm256_xor_si256(c2[1848],simde_mm256_xor_si256(c2[7565],simde_mm256_xor_si256(c2[12426],simde_mm256_xor_si256(c2[5824],simde_mm256_xor_si256(c2[8020],simde_mm256_xor_si256(c2[8480],simde_mm256_xor_si256(c2[6726],simde_mm256_xor_si256(c2[4081],simde_mm256_xor_si256(c2[7623],simde_mm256_xor_si256(c2[9825],simde_mm256_xor_si256(c2[12024],simde_mm256_xor_si256(c2[7209],simde_mm256_xor_si256(c2[8965],simde_mm256_xor_si256(c2[12041],simde_mm256_xor_si256(c2[12069],simde_mm256_xor_si256(c2[12501],simde_mm256_xor_si256(c2[8541],simde_mm256_xor_si256(c2[6364],simde_mm256_xor_si256(c2[9882],simde_mm256_xor_si256(c2[2408],simde_mm256_xor_si256(c2[1546],simde_mm256_xor_si256(c2[5943],simde_mm256_xor_si256(c2[12108],simde_mm256_xor_si256(c2[5941],simde_mm256_xor_si256(c2[7284],simde_mm256_xor_si256(c2[10808],simde_mm256_xor_si256(c2[4207],simde_mm256_xor_si256(c2[7743],simde_mm256_xor_si256(c2[8628],simde_mm256_xor_si256(c2[7304],simde_mm256_xor_si256(c2[10841],simde_mm256_xor_si256(c2[3368],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[9988],simde_mm256_xor_si256(c2[9987],simde_mm256_xor_si256(c2[11308],simde_mm256_xor_si256(c2[11762],simde_mm256_xor_si256(c2[9121],simde_mm256_xor_si256(c2[10880],simde_mm256_xor_si256(c2[13981],simde_mm256_xor_si256(c2[2105],simde_mm256_xor_si256(c2[3860],simde_mm256_xor_si256(c2[13122],simde_mm256_xor_si256(c2[1247],simde_mm256_xor_si256(c2[11361],simde_mm256_xor_si256(c2[12709],simde_mm256_xor_si256(c2[7427],simde_mm256_xor_si256(c2[8306],simde_mm256_xor_si256(c2[4365],simde_mm256_xor_si256(c2[408],simde_mm256_xor_si256(c2[5249],simde_mm256_xor_si256(c2[8763],simde_mm256_xor_si256(c2[5700],simde_mm256_xor_si256(c2[11428],c2[7461]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[170]=_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[8644],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[8701],c2[2187]))));
+     d2[170]=simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[8644],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[8701],c2[2187]))));
 
 //row: 18
-     d2[180]=_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[2889],_mm256_xor_si256(c2[1589],_mm256_xor_si256(c2[6084],c2[3469]))));
+     d2[180]=simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[2889],simde_mm256_xor_si256(c2[1589],simde_mm256_xor_si256(c2[6084],c2[3469]))));
 
 //row: 19
-     d2[190]=_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[9265],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[6328],c2[4161]))));
+     d2[190]=simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[9265],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[6328],c2[4161]))));
 
 //row: 20
-     d2[200]=_mm256_xor_si256(c2[8362],_mm256_xor_si256(c2[11009],_mm256_xor_si256(c2[6162],_mm256_xor_si256(c2[2647],_mm256_xor_si256(c2[5729],_mm256_xor_si256(c2[11020],_mm256_xor_si256(c2[7500],_mm256_xor_si256(c2[11466],_mm256_xor_si256(c2[7523],_mm256_xor_si256(c2[7089],_mm256_xor_si256(c2[11482],_mm256_xor_si256(c2[11941],_mm256_xor_si256(c2[2261],_mm256_xor_si256(c2[3581],_mm256_xor_si256(c2[945],_mm256_xor_si256(c2[6688],_mm256_xor_si256(c2[969],_mm256_xor_si256(c2[6686],_mm256_xor_si256(c2[11547],_mm256_xor_si256(c2[4945],_mm256_xor_si256(c2[7141],_mm256_xor_si256(c2[7601],_mm256_xor_si256(c2[5847],_mm256_xor_si256(c2[3202],_mm256_xor_si256(c2[6744],_mm256_xor_si256(c2[8946],_mm256_xor_si256(c2[11145],_mm256_xor_si256(c2[6320],_mm256_xor_si256(c2[8086],_mm256_xor_si256(c2[11162],_mm256_xor_si256(c2[11180],_mm256_xor_si256(c2[11622],_mm256_xor_si256(c2[7662],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[5485],_mm256_xor_si256(c2[9003],_mm256_xor_si256(c2[1529],_mm256_xor_si256(c2[667],_mm256_xor_si256(c2[5064],_mm256_xor_si256(c2[11229],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[6405],_mm256_xor_si256(c2[9929],_mm256_xor_si256(c2[3328],_mm256_xor_si256(c2[6864],_mm256_xor_si256(c2[7749],_mm256_xor_si256(c2[6425],_mm256_xor_si256(c2[9962],_mm256_xor_si256(c2[2489],_mm256_xor_si256(c2[13921],_mm256_xor_si256(c2[9109],_mm256_xor_si256(c2[9108],_mm256_xor_si256(c2[10429],_mm256_xor_si256(c2[10883],_mm256_xor_si256(c2[8242],_mm256_xor_si256(c2[10001],_mm256_xor_si256(c2[13102],_mm256_xor_si256(c2[1226],_mm256_xor_si256(c2[2981],_mm256_xor_si256(c2[12243],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[10482],_mm256_xor_si256(c2[11820],_mm256_xor_si256(c2[6548],_mm256_xor_si256(c2[7427],_mm256_xor_si256(c2[3486],_mm256_xor_si256(c2[13608],_mm256_xor_si256(c2[4360],_mm256_xor_si256(c2[4821],_mm256_xor_si256(c2[10549],c2[6582]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[200]=simde_mm256_xor_si256(c2[8362],simde_mm256_xor_si256(c2[11009],simde_mm256_xor_si256(c2[6162],simde_mm256_xor_si256(c2[2647],simde_mm256_xor_si256(c2[5729],simde_mm256_xor_si256(c2[11020],simde_mm256_xor_si256(c2[7500],simde_mm256_xor_si256(c2[11466],simde_mm256_xor_si256(c2[7523],simde_mm256_xor_si256(c2[7089],simde_mm256_xor_si256(c2[11482],simde_mm256_xor_si256(c2[11941],simde_mm256_xor_si256(c2[2261],simde_mm256_xor_si256(c2[3581],simde_mm256_xor_si256(c2[945],simde_mm256_xor_si256(c2[6688],simde_mm256_xor_si256(c2[969],simde_mm256_xor_si256(c2[6686],simde_mm256_xor_si256(c2[11547],simde_mm256_xor_si256(c2[4945],simde_mm256_xor_si256(c2[7141],simde_mm256_xor_si256(c2[7601],simde_mm256_xor_si256(c2[5847],simde_mm256_xor_si256(c2[3202],simde_mm256_xor_si256(c2[6744],simde_mm256_xor_si256(c2[8946],simde_mm256_xor_si256(c2[11145],simde_mm256_xor_si256(c2[6320],simde_mm256_xor_si256(c2[8086],simde_mm256_xor_si256(c2[11162],simde_mm256_xor_si256(c2[11180],simde_mm256_xor_si256(c2[11622],simde_mm256_xor_si256(c2[7662],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[5485],simde_mm256_xor_si256(c2[9003],simde_mm256_xor_si256(c2[1529],simde_mm256_xor_si256(c2[667],simde_mm256_xor_si256(c2[5064],simde_mm256_xor_si256(c2[11229],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[6405],simde_mm256_xor_si256(c2[9929],simde_mm256_xor_si256(c2[3328],simde_mm256_xor_si256(c2[6864],simde_mm256_xor_si256(c2[7749],simde_mm256_xor_si256(c2[6425],simde_mm256_xor_si256(c2[9962],simde_mm256_xor_si256(c2[2489],simde_mm256_xor_si256(c2[13921],simde_mm256_xor_si256(c2[9109],simde_mm256_xor_si256(c2[9108],simde_mm256_xor_si256(c2[10429],simde_mm256_xor_si256(c2[10883],simde_mm256_xor_si256(c2[8242],simde_mm256_xor_si256(c2[10001],simde_mm256_xor_si256(c2[13102],simde_mm256_xor_si256(c2[1226],simde_mm256_xor_si256(c2[2981],simde_mm256_xor_si256(c2[12243],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[10482],simde_mm256_xor_si256(c2[11820],simde_mm256_xor_si256(c2[6548],simde_mm256_xor_si256(c2[7427],simde_mm256_xor_si256(c2[3486],simde_mm256_xor_si256(c2[13608],simde_mm256_xor_si256(c2[4360],simde_mm256_xor_si256(c2[4821],simde_mm256_xor_si256(c2[10549],c2[6582]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[210]=_mm256_xor_si256(c2[6622],_mm256_xor_si256(c2[6705],_mm256_xor_si256(c2[2084],_mm256_xor_si256(c2[12288],c2[3503]))));
+     d2[210]=simde_mm256_xor_si256(c2[6622],simde_mm256_xor_si256(c2[6705],simde_mm256_xor_si256(c2[2084],simde_mm256_xor_si256(c2[12288],c2[3503]))));
 
 //row: 22
-     d2[220]=_mm256_xor_si256(c2[7485],_mm256_xor_si256(c2[9040],_mm256_xor_si256(c2[10381],c2[12669])));
+     d2[220]=simde_mm256_xor_si256(c2[7485],simde_mm256_xor_si256(c2[9040],simde_mm256_xor_si256(c2[10381],c2[12669])));
 
 //row: 23
-     d2[230]=_mm256_xor_si256(c2[11027],_mm256_xor_si256(c2[7961],_mm256_xor_si256(c2[2404],c2[4323])));
+     d2[230]=simde_mm256_xor_si256(c2[11027],simde_mm256_xor_si256(c2[7961],simde_mm256_xor_si256(c2[2404],c2[4323])));
 
 //row: 24
-     d2[240]=_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[3528],_mm256_xor_si256(c2[12760],_mm256_xor_si256(c2[9245],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[3549],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[13687],_mm256_xor_si256(c2[4001],_mm256_xor_si256(c2[4460],_mm256_xor_si256(c2[8869],_mm256_xor_si256(c2[10189],_mm256_xor_si256(c2[10628],_mm256_xor_si256(c2[13286],_mm256_xor_si256(c2[7567],_mm256_xor_si256(c2[13284],_mm256_xor_si256(c2[6243],_mm256_xor_si256(c2[4066],_mm256_xor_si256(c2[11543],_mm256_xor_si256(c2[13749],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[12445],_mm256_xor_si256(c2[9800],_mm256_xor_si256(c2[13342],_mm256_xor_si256(c2[1465],_mm256_xor_si256(c2[3664],_mm256_xor_si256(c2[12928],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[3681],_mm256_xor_si256(c2[3709],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[12083],_mm256_xor_si256(c2[1522],_mm256_xor_si256(c2[8127],_mm256_xor_si256(c2[7265],_mm256_xor_si256(c2[11662],_mm256_xor_si256(c2[3748],_mm256_xor_si256(c2[12105],_mm256_xor_si256(c2[13003],_mm256_xor_si256(c2[2448],_mm256_xor_si256(c2[9926],_mm256_xor_si256(c2[13462],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[13023],_mm256_xor_si256(c2[2481],_mm256_xor_si256(c2[9087],_mm256_xor_si256(c2[6440],_mm256_xor_si256(c2[1628],_mm256_xor_si256(c2[1627],_mm256_xor_si256(c2[2948],_mm256_xor_si256(c2[3402],_mm256_xor_si256(c2[761],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[5621],_mm256_xor_si256(c2[7824],_mm256_xor_si256(c2[9589],_mm256_xor_si256(c2[4762],_mm256_xor_si256(c2[6966],_mm256_xor_si256(c2[3001],_mm256_xor_si256(c2[4349],_mm256_xor_si256(c2[13146],_mm256_xor_si256(c2[14025],_mm256_xor_si256(c2[10084],_mm256_xor_si256(c2[6127],_mm256_xor_si256(c2[10968],_mm256_xor_si256(c2[11429],_mm256_xor_si256(c2[3068],c2[13180]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[240]=simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[3528],simde_mm256_xor_si256(c2[12760],simde_mm256_xor_si256(c2[9245],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[3549],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[13687],simde_mm256_xor_si256(c2[4001],simde_mm256_xor_si256(c2[4460],simde_mm256_xor_si256(c2[8869],simde_mm256_xor_si256(c2[10189],simde_mm256_xor_si256(c2[10628],simde_mm256_xor_si256(c2[13286],simde_mm256_xor_si256(c2[7567],simde_mm256_xor_si256(c2[13284],simde_mm256_xor_si256(c2[6243],simde_mm256_xor_si256(c2[4066],simde_mm256_xor_si256(c2[11543],simde_mm256_xor_si256(c2[13749],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[12445],simde_mm256_xor_si256(c2[9800],simde_mm256_xor_si256(c2[13342],simde_mm256_xor_si256(c2[1465],simde_mm256_xor_si256(c2[3664],simde_mm256_xor_si256(c2[12928],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[3681],simde_mm256_xor_si256(c2[3709],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[12083],simde_mm256_xor_si256(c2[1522],simde_mm256_xor_si256(c2[8127],simde_mm256_xor_si256(c2[7265],simde_mm256_xor_si256(c2[11662],simde_mm256_xor_si256(c2[3748],simde_mm256_xor_si256(c2[12105],simde_mm256_xor_si256(c2[13003],simde_mm256_xor_si256(c2[2448],simde_mm256_xor_si256(c2[9926],simde_mm256_xor_si256(c2[13462],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[13023],simde_mm256_xor_si256(c2[2481],simde_mm256_xor_si256(c2[9087],simde_mm256_xor_si256(c2[6440],simde_mm256_xor_si256(c2[1628],simde_mm256_xor_si256(c2[1627],simde_mm256_xor_si256(c2[2948],simde_mm256_xor_si256(c2[3402],simde_mm256_xor_si256(c2[761],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[5621],simde_mm256_xor_si256(c2[7824],simde_mm256_xor_si256(c2[9589],simde_mm256_xor_si256(c2[4762],simde_mm256_xor_si256(c2[6966],simde_mm256_xor_si256(c2[3001],simde_mm256_xor_si256(c2[4349],simde_mm256_xor_si256(c2[13146],simde_mm256_xor_si256(c2[14025],simde_mm256_xor_si256(c2[10084],simde_mm256_xor_si256(c2[6127],simde_mm256_xor_si256(c2[10968],simde_mm256_xor_si256(c2[11429],simde_mm256_xor_si256(c2[3068],c2[13180]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 25
-     d2[250]=_mm256_xor_si256(c2[5305],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[143],c2[6441])));
+     d2[250]=simde_mm256_xor_si256(c2[5305],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[143],c2[6441])));
 
 //row: 26
-     d2[260]=_mm256_xor_si256(c2[6168],_mm256_xor_si256(c2[6203],_mm256_xor_si256(c2[13289],c2[1622])));
+     d2[260]=simde_mm256_xor_si256(c2[6168],simde_mm256_xor_si256(c2[6203],simde_mm256_xor_si256(c2[13289],c2[1622])));
 
 //row: 27
-     d2[270]=_mm256_xor_si256(c2[7946],_mm256_xor_si256(c2[12880],c2[7209]));
+     d2[270]=simde_mm256_xor_si256(c2[7946],simde_mm256_xor_si256(c2[12880],c2[7209]));
 
 //row: 28
-     d2[280]=_mm256_xor_si256(c2[4840],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[8301],c2[4827])));
+     d2[280]=simde_mm256_xor_si256(c2[4840],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[8301],c2[4827])));
 
 //row: 29
-     d2[290]=_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[5720],_mm256_xor_si256(c2[883],_mm256_xor_si256(c2[11007],_mm256_xor_si256(c2[11447],_mm256_xor_si256(c2[5741],_mm256_xor_si256(c2[2221],_mm256_xor_si256(c2[5747],_mm256_xor_si256(c2[6187],_mm256_xor_si256(c2[11900],_mm256_xor_si256(c2[2244],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[6203],_mm256_xor_si256(c2[6662],_mm256_xor_si256(c2[11061],_mm256_xor_si256(c2[11941],_mm256_xor_si256(c2[12381],_mm256_xor_si256(c2[1409],_mm256_xor_si256(c2[9769],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[1407],_mm256_xor_si256(c2[6268],_mm256_xor_si256(c2[13745],_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[2322],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[11562],_mm256_xor_si256(c2[12002],_mm256_xor_si256(c2[1465],_mm256_xor_si256(c2[3667],_mm256_xor_si256(c2[5426],_mm256_xor_si256(c2[5866],_mm256_xor_si256(c2[1041],_mm256_xor_si256(c2[2807],_mm256_xor_si256(c2[5443],_mm256_xor_si256(c2[5883],_mm256_xor_si256(c2[5901],_mm256_xor_si256(c2[6343],_mm256_xor_si256(c2[2383],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[9889],_mm256_xor_si256(c2[10329],_mm256_xor_si256(c2[9467],_mm256_xor_si256(c2[13864],_mm256_xor_si256(c2[5500],_mm256_xor_si256(c2[5940],_mm256_xor_si256(c2[1126],_mm256_xor_si256(c2[4640],_mm256_xor_si256(c2[11688],_mm256_xor_si256(c2[12128],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[2460],_mm256_xor_si256(c2[706],_mm256_xor_si256(c2[1146],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[11289],_mm256_xor_si256(c2[8202],_mm256_xor_si256(c2[8642],_mm256_xor_si256(c2[9089],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[3829],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[5604],_mm256_xor_si256(c2[2963],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[7823],_mm256_xor_si256(c2[10026],_mm256_xor_si256(c2[11341],_mm256_xor_si256(c2[11781],_mm256_xor_si256(c2[6964],_mm256_xor_si256(c2[9168],_mm256_xor_si256(c2[4763],_mm256_xor_si256(c2[5203],_mm256_xor_si256(c2[9603],_mm256_xor_si256(c2[6541],_mm256_xor_si256(c2[1269],_mm256_xor_si256(c2[2148],_mm256_xor_si256(c2[12286],_mm256_xor_si256(c2[8329],_mm256_xor_si256(c2[12720],_mm256_xor_si256(c2[13160],_mm256_xor_si256(c2[13621],_mm256_xor_si256(c2[5260],_mm256_xor_si256(c2[863],c2[1303]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[290]=simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[5720],simde_mm256_xor_si256(c2[883],simde_mm256_xor_si256(c2[11007],simde_mm256_xor_si256(c2[11447],simde_mm256_xor_si256(c2[5741],simde_mm256_xor_si256(c2[2221],simde_mm256_xor_si256(c2[5747],simde_mm256_xor_si256(c2[6187],simde_mm256_xor_si256(c2[11900],simde_mm256_xor_si256(c2[2244],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[6203],simde_mm256_xor_si256(c2[6662],simde_mm256_xor_si256(c2[11061],simde_mm256_xor_si256(c2[11941],simde_mm256_xor_si256(c2[12381],simde_mm256_xor_si256(c2[1409],simde_mm256_xor_si256(c2[9769],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[1407],simde_mm256_xor_si256(c2[6268],simde_mm256_xor_si256(c2[13745],simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[2322],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[11562],simde_mm256_xor_si256(c2[12002],simde_mm256_xor_si256(c2[1465],simde_mm256_xor_si256(c2[3667],simde_mm256_xor_si256(c2[5426],simde_mm256_xor_si256(c2[5866],simde_mm256_xor_si256(c2[1041],simde_mm256_xor_si256(c2[2807],simde_mm256_xor_si256(c2[5443],simde_mm256_xor_si256(c2[5883],simde_mm256_xor_si256(c2[5901],simde_mm256_xor_si256(c2[6343],simde_mm256_xor_si256(c2[2383],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[9889],simde_mm256_xor_si256(c2[10329],simde_mm256_xor_si256(c2[9467],simde_mm256_xor_si256(c2[13864],simde_mm256_xor_si256(c2[5500],simde_mm256_xor_si256(c2[5940],simde_mm256_xor_si256(c2[1126],simde_mm256_xor_si256(c2[4640],simde_mm256_xor_si256(c2[11688],simde_mm256_xor_si256(c2[12128],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[2460],simde_mm256_xor_si256(c2[706],simde_mm256_xor_si256(c2[1146],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[11289],simde_mm256_xor_si256(c2[8202],simde_mm256_xor_si256(c2[8642],simde_mm256_xor_si256(c2[9089],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[3829],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[5604],simde_mm256_xor_si256(c2[2963],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[7823],simde_mm256_xor_si256(c2[10026],simde_mm256_xor_si256(c2[11341],simde_mm256_xor_si256(c2[11781],simde_mm256_xor_si256(c2[6964],simde_mm256_xor_si256(c2[9168],simde_mm256_xor_si256(c2[4763],simde_mm256_xor_si256(c2[5203],simde_mm256_xor_si256(c2[9603],simde_mm256_xor_si256(c2[6541],simde_mm256_xor_si256(c2[1269],simde_mm256_xor_si256(c2[2148],simde_mm256_xor_si256(c2[12286],simde_mm256_xor_si256(c2[8329],simde_mm256_xor_si256(c2[12720],simde_mm256_xor_si256(c2[13160],simde_mm256_xor_si256(c2[13621],simde_mm256_xor_si256(c2[5260],simde_mm256_xor_si256(c2[863],c2[1303]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 30
-     d2[300]=_mm256_xor_si256(c2[7486],_mm256_xor_si256(c2[10123],_mm256_xor_si256(c2[4846],_mm256_xor_si256(c2[5286],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[1761],_mm256_xor_si256(c2[11882],_mm256_xor_si256(c2[10144],_mm256_xor_si256(c2[6184],_mm256_xor_si256(c2[6624],_mm256_xor_si256(c2[10140],_mm256_xor_si256(c2[10580],_mm256_xor_si256(c2[6647],_mm256_xor_si256(c2[6203],_mm256_xor_si256(c2[10166],_mm256_xor_si256(c2[10606],_mm256_xor_si256(c2[11065],_mm256_xor_si256(c2[1385],_mm256_xor_si256(c2[2265],_mm256_xor_si256(c2[2705],_mm256_xor_si256(c2[5802],_mm256_xor_si256(c2[13722],_mm256_xor_si256(c2[83],_mm256_xor_si256(c2[5360],_mm256_xor_si256(c2[5800],_mm256_xor_si256(c2[10661],_mm256_xor_si256(c2[4069],_mm256_xor_si256(c2[5825],_mm256_xor_si256(c2[6265],_mm256_xor_si256(c2[6725],_mm256_xor_si256(c2[4521],_mm256_xor_si256(c2[4961],_mm256_xor_si256(c2[1886],_mm256_xor_si256(c2[2326],_mm256_xor_si256(c2[5868],_mm256_xor_si256(c2[7620],_mm256_xor_si256(c2[8060],_mm256_xor_si256(c2[9829],_mm256_xor_si256(c2[10269],_mm256_xor_si256(c2[5444],_mm256_xor_si256(c2[6760],_mm256_xor_si256(c2[7200],_mm256_xor_si256(c2[9846],_mm256_xor_si256(c2[10286],_mm256_xor_si256(c2[10304],_mm256_xor_si256(c2[10746],_mm256_xor_si256(c2[6346],_mm256_xor_si256(c2[6786],_mm256_xor_si256(c2[4609],_mm256_xor_si256(c2[7687],_mm256_xor_si256(c2[8127],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[10320],_mm256_xor_si256(c2[13860],_mm256_xor_si256(c2[4188],_mm256_xor_si256(c2[9903],_mm256_xor_si256(c2[10343],_mm256_xor_si256(c2[5529],_mm256_xor_si256(c2[9043],_mm256_xor_si256(c2[2002],_mm256_xor_si256(c2[2442],_mm256_xor_si256(c2[5988],_mm256_xor_si256(c2[6423],_mm256_xor_si256(c2[6863],_mm256_xor_si256(c2[5109],_mm256_xor_si256(c2[5549],_mm256_xor_si256(c2[4223],_mm256_xor_si256(c2[9086],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[1603],_mm256_xor_si256(c2[12605],_mm256_xor_si256(c2[13045],_mm256_xor_si256(c2[8223],_mm256_xor_si256(c2[8222],_mm256_xor_si256(c2[9103],_mm256_xor_si256(c2[9543],_mm256_xor_si256(c2[10007],_mm256_xor_si256(c2[7366],_mm256_xor_si256(c2[8685],_mm256_xor_si256(c2[9125],_mm256_xor_si256(c2[12226],_mm256_xor_si256(c2[13989],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[1665],_mm256_xor_si256(c2[2105],_mm256_xor_si256(c2[11367],_mm256_xor_si256(c2[13121],_mm256_xor_si256(c2[13561],_mm256_xor_si256(c2[9166],_mm256_xor_si256(c2[9606],_mm256_xor_si256(c2[10944],_mm256_xor_si256(c2[5662],_mm256_xor_si256(c2[6101],_mm256_xor_si256(c2[6541],_mm256_xor_si256(c2[2600],_mm256_xor_si256(c2[12282],_mm256_xor_si256(c2[12722],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[3484],_mm256_xor_si256(c2[3945],_mm256_xor_si256(c2[9663],_mm256_xor_si256(c2[5266],c2[5706])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[300]=simde_mm256_xor_si256(c2[7486],simde_mm256_xor_si256(c2[10123],simde_mm256_xor_si256(c2[4846],simde_mm256_xor_si256(c2[5286],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[1761],simde_mm256_xor_si256(c2[11882],simde_mm256_xor_si256(c2[10144],simde_mm256_xor_si256(c2[6184],simde_mm256_xor_si256(c2[6624],simde_mm256_xor_si256(c2[10140],simde_mm256_xor_si256(c2[10580],simde_mm256_xor_si256(c2[6647],simde_mm256_xor_si256(c2[6203],simde_mm256_xor_si256(c2[10166],simde_mm256_xor_si256(c2[10606],simde_mm256_xor_si256(c2[11065],simde_mm256_xor_si256(c2[1385],simde_mm256_xor_si256(c2[2265],simde_mm256_xor_si256(c2[2705],simde_mm256_xor_si256(c2[5802],simde_mm256_xor_si256(c2[13722],simde_mm256_xor_si256(c2[83],simde_mm256_xor_si256(c2[5360],simde_mm256_xor_si256(c2[5800],simde_mm256_xor_si256(c2[10661],simde_mm256_xor_si256(c2[4069],simde_mm256_xor_si256(c2[5825],simde_mm256_xor_si256(c2[6265],simde_mm256_xor_si256(c2[6725],simde_mm256_xor_si256(c2[4521],simde_mm256_xor_si256(c2[4961],simde_mm256_xor_si256(c2[1886],simde_mm256_xor_si256(c2[2326],simde_mm256_xor_si256(c2[5868],simde_mm256_xor_si256(c2[7620],simde_mm256_xor_si256(c2[8060],simde_mm256_xor_si256(c2[9829],simde_mm256_xor_si256(c2[10269],simde_mm256_xor_si256(c2[5444],simde_mm256_xor_si256(c2[6760],simde_mm256_xor_si256(c2[7200],simde_mm256_xor_si256(c2[9846],simde_mm256_xor_si256(c2[10286],simde_mm256_xor_si256(c2[10304],simde_mm256_xor_si256(c2[10746],simde_mm256_xor_si256(c2[6346],simde_mm256_xor_si256(c2[6786],simde_mm256_xor_si256(c2[4609],simde_mm256_xor_si256(c2[7687],simde_mm256_xor_si256(c2[8127],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[10320],simde_mm256_xor_si256(c2[13860],simde_mm256_xor_si256(c2[4188],simde_mm256_xor_si256(c2[9903],simde_mm256_xor_si256(c2[10343],simde_mm256_xor_si256(c2[5529],simde_mm256_xor_si256(c2[9043],simde_mm256_xor_si256(c2[2002],simde_mm256_xor_si256(c2[2442],simde_mm256_xor_si256(c2[5988],simde_mm256_xor_si256(c2[6423],simde_mm256_xor_si256(c2[6863],simde_mm256_xor_si256(c2[5109],simde_mm256_xor_si256(c2[5549],simde_mm256_xor_si256(c2[4223],simde_mm256_xor_si256(c2[9086],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[1603],simde_mm256_xor_si256(c2[12605],simde_mm256_xor_si256(c2[13045],simde_mm256_xor_si256(c2[8223],simde_mm256_xor_si256(c2[8222],simde_mm256_xor_si256(c2[9103],simde_mm256_xor_si256(c2[9543],simde_mm256_xor_si256(c2[10007],simde_mm256_xor_si256(c2[7366],simde_mm256_xor_si256(c2[8685],simde_mm256_xor_si256(c2[9125],simde_mm256_xor_si256(c2[12226],simde_mm256_xor_si256(c2[13989],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[1665],simde_mm256_xor_si256(c2[2105],simde_mm256_xor_si256(c2[11367],simde_mm256_xor_si256(c2[13121],simde_mm256_xor_si256(c2[13561],simde_mm256_xor_si256(c2[9166],simde_mm256_xor_si256(c2[9606],simde_mm256_xor_si256(c2[10944],simde_mm256_xor_si256(c2[5662],simde_mm256_xor_si256(c2[6101],simde_mm256_xor_si256(c2[6541],simde_mm256_xor_si256(c2[2600],simde_mm256_xor_si256(c2[12282],simde_mm256_xor_si256(c2[12722],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[3484],simde_mm256_xor_si256(c2[3945],simde_mm256_xor_si256(c2[9663],simde_mm256_xor_si256(c2[5266],c2[5706])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 31
-     d2[310]=_mm256_xor_si256(c2[4844],_mm256_xor_si256(c2[5287],_mm256_xor_si256(c2[7481],_mm256_xor_si256(c2[7924],_mm256_xor_si256(c2[2644],_mm256_xor_si256(c2[3087],_mm256_xor_si256(c2[13208],_mm256_xor_si256(c2[13201],_mm256_xor_si256(c2[13641],_mm256_xor_si256(c2[7502],_mm256_xor_si256(c2[7945],_mm256_xor_si256(c2[3982],_mm256_xor_si256(c2[4425],_mm256_xor_si256(c2[7948],_mm256_xor_si256(c2[7941],_mm256_xor_si256(c2[8381],_mm256_xor_si256(c2[13226],_mm256_xor_si256(c2[4005],_mm256_xor_si256(c2[4448],_mm256_xor_si256(c2[3561],_mm256_xor_si256(c2[4004],_mm256_xor_si256(c2[7964],_mm256_xor_si256(c2[8407],_mm256_xor_si256(c2[8423],_mm256_xor_si256(c2[8866],_mm256_xor_si256(c2[12822],_mm256_xor_si256(c2[13265],_mm256_xor_si256(c2[63],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[3160],_mm256_xor_si256(c2[3603],_mm256_xor_si256(c2[11520],_mm256_xor_si256(c2[11963],_mm256_xor_si256(c2[3168],_mm256_xor_si256(c2[3161],_mm256_xor_si256(c2[3601],_mm256_xor_si256(c2[8029],_mm256_xor_si256(c2[8462],_mm256_xor_si256(c2[1427],_mm256_xor_si256(c2[1860],_mm256_xor_si256(c2[3623],_mm256_xor_si256(c2[4066],_mm256_xor_si256(c2[4083],_mm256_xor_si256(c2[4526],_mm256_xor_si256(c2[2329],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[13763],_mm256_xor_si256(c2[13766],_mm256_xor_si256(c2[127],_mm256_xor_si256(c2[3226],_mm256_xor_si256(c2[3669],_mm256_xor_si256(c2[5428],_mm256_xor_si256(c2[5861],_mm256_xor_si256(c2[7627],_mm256_xor_si256(c2[7620],_mm256_xor_si256(c2[8060],_mm256_xor_si256(c2[8949],_mm256_xor_si256(c2[2802],_mm256_xor_si256(c2[3245],_mm256_xor_si256(c2[4568],_mm256_xor_si256(c2[5001],_mm256_xor_si256(c2[7644],_mm256_xor_si256(c2[7647],_mm256_xor_si256(c2[8087],_mm256_xor_si256(c2[7662],_mm256_xor_si256(c2[8105],_mm256_xor_si256(c2[8104],_mm256_xor_si256(c2[8547],_mm256_xor_si256(c2[4144],_mm256_xor_si256(c2[4587],_mm256_xor_si256(c2[1967],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[5485],_mm256_xor_si256(c2[5928],_mm256_xor_si256(c2[12080],_mm256_xor_si256(c2[12083],_mm256_xor_si256(c2[12523],_mm256_xor_si256(c2[11228],_mm256_xor_si256(c2[11661],_mm256_xor_si256(c2[1546],_mm256_xor_si256(c2[1989],_mm256_xor_si256(c2[7701],_mm256_xor_si256(c2[7704],_mm256_xor_si256(c2[8144],_mm256_xor_si256(c2[2887],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[6401],_mm256_xor_si256(c2[6844],_mm256_xor_si256(c2[13889],_mm256_xor_si256(c2[13882],_mm256_xor_si256(c2[243],_mm256_xor_si256(c2[3346],_mm256_xor_si256(c2[3789],_mm256_xor_si256(c2[4221],_mm256_xor_si256(c2[4664],_mm256_xor_si256(c2[2907],_mm256_xor_si256(c2[2900],_mm256_xor_si256(c2[3340],_mm256_xor_si256(c2[6444],_mm256_xor_si256(c2[6887],_mm256_xor_si256(c2[13040],_mm256_xor_si256(c2[13483],_mm256_xor_si256(c2[10403],_mm256_xor_si256(c2[10406],_mm256_xor_si256(c2[10846],_mm256_xor_si256(c2[5581],_mm256_xor_si256(c2[6024],_mm256_xor_si256(c2[5580],_mm256_xor_si256(c2[6023],_mm256_xor_si256(c2[6901],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[7365],_mm256_xor_si256(c2[7808],_mm256_xor_si256(c2[4724],_mm256_xor_si256(c2[5167],_mm256_xor_si256(c2[6483],_mm256_xor_si256(c2[6486],_mm256_xor_si256(c2[6926],_mm256_xor_si256(c2[9584],_mm256_xor_si256(c2[10027],_mm256_xor_si256(c2[11787],_mm256_xor_si256(c2[12220],_mm256_xor_si256(c2[13542],_mm256_xor_si256(c2[13545],_mm256_xor_si256(c2[13985],_mm256_xor_si256(c2[8725],_mm256_xor_si256(c2[9168],_mm256_xor_si256(c2[10929],_mm256_xor_si256(c2[11362],_mm256_xor_si256(c2[6964],_mm256_xor_si256(c2[6967],_mm256_xor_si256(c2[7407],_mm256_xor_si256(c2[8302],_mm256_xor_si256(c2[8745],_mm256_xor_si256(c2[3020],_mm256_xor_si256(c2[3463],_mm256_xor_si256(c2[3909],_mm256_xor_si256(c2[4342],_mm256_xor_si256(c2[14047],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[10080],_mm256_xor_si256(c2[10523],_mm256_xor_si256(c2[842],_mm256_xor_si256(c2[845],_mm256_xor_si256(c2[1285],_mm256_xor_si256(c2[1303],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[7021],_mm256_xor_si256(c2[7464],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[3067],c2[3507]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[310]=simde_mm256_xor_si256(c2[4844],simde_mm256_xor_si256(c2[5287],simde_mm256_xor_si256(c2[7481],simde_mm256_xor_si256(c2[7924],simde_mm256_xor_si256(c2[2644],simde_mm256_xor_si256(c2[3087],simde_mm256_xor_si256(c2[13208],simde_mm256_xor_si256(c2[13201],simde_mm256_xor_si256(c2[13641],simde_mm256_xor_si256(c2[7502],simde_mm256_xor_si256(c2[7945],simde_mm256_xor_si256(c2[3982],simde_mm256_xor_si256(c2[4425],simde_mm256_xor_si256(c2[7948],simde_mm256_xor_si256(c2[7941],simde_mm256_xor_si256(c2[8381],simde_mm256_xor_si256(c2[13226],simde_mm256_xor_si256(c2[4005],simde_mm256_xor_si256(c2[4448],simde_mm256_xor_si256(c2[3561],simde_mm256_xor_si256(c2[4004],simde_mm256_xor_si256(c2[7964],simde_mm256_xor_si256(c2[8407],simde_mm256_xor_si256(c2[8423],simde_mm256_xor_si256(c2[8866],simde_mm256_xor_si256(c2[12822],simde_mm256_xor_si256(c2[13265],simde_mm256_xor_si256(c2[63],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[3160],simde_mm256_xor_si256(c2[3603],simde_mm256_xor_si256(c2[11520],simde_mm256_xor_si256(c2[11963],simde_mm256_xor_si256(c2[3168],simde_mm256_xor_si256(c2[3161],simde_mm256_xor_si256(c2[3601],simde_mm256_xor_si256(c2[8029],simde_mm256_xor_si256(c2[8462],simde_mm256_xor_si256(c2[1427],simde_mm256_xor_si256(c2[1860],simde_mm256_xor_si256(c2[3623],simde_mm256_xor_si256(c2[4066],simde_mm256_xor_si256(c2[4083],simde_mm256_xor_si256(c2[4526],simde_mm256_xor_si256(c2[2329],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[13763],simde_mm256_xor_si256(c2[13766],simde_mm256_xor_si256(c2[127],simde_mm256_xor_si256(c2[3226],simde_mm256_xor_si256(c2[3669],simde_mm256_xor_si256(c2[5428],simde_mm256_xor_si256(c2[5861],simde_mm256_xor_si256(c2[7627],simde_mm256_xor_si256(c2[7620],simde_mm256_xor_si256(c2[8060],simde_mm256_xor_si256(c2[8949],simde_mm256_xor_si256(c2[2802],simde_mm256_xor_si256(c2[3245],simde_mm256_xor_si256(c2[4568],simde_mm256_xor_si256(c2[5001],simde_mm256_xor_si256(c2[7644],simde_mm256_xor_si256(c2[7647],simde_mm256_xor_si256(c2[8087],simde_mm256_xor_si256(c2[7662],simde_mm256_xor_si256(c2[8105],simde_mm256_xor_si256(c2[8104],simde_mm256_xor_si256(c2[8547],simde_mm256_xor_si256(c2[4144],simde_mm256_xor_si256(c2[4587],simde_mm256_xor_si256(c2[1967],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[5485],simde_mm256_xor_si256(c2[5928],simde_mm256_xor_si256(c2[12080],simde_mm256_xor_si256(c2[12083],simde_mm256_xor_si256(c2[12523],simde_mm256_xor_si256(c2[11228],simde_mm256_xor_si256(c2[11661],simde_mm256_xor_si256(c2[1546],simde_mm256_xor_si256(c2[1989],simde_mm256_xor_si256(c2[7701],simde_mm256_xor_si256(c2[7704],simde_mm256_xor_si256(c2[8144],simde_mm256_xor_si256(c2[2887],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[6401],simde_mm256_xor_si256(c2[6844],simde_mm256_xor_si256(c2[13889],simde_mm256_xor_si256(c2[13882],simde_mm256_xor_si256(c2[243],simde_mm256_xor_si256(c2[3346],simde_mm256_xor_si256(c2[3789],simde_mm256_xor_si256(c2[4221],simde_mm256_xor_si256(c2[4664],simde_mm256_xor_si256(c2[2907],simde_mm256_xor_si256(c2[2900],simde_mm256_xor_si256(c2[3340],simde_mm256_xor_si256(c2[6444],simde_mm256_xor_si256(c2[6887],simde_mm256_xor_si256(c2[13040],simde_mm256_xor_si256(c2[13483],simde_mm256_xor_si256(c2[10403],simde_mm256_xor_si256(c2[10406],simde_mm256_xor_si256(c2[10846],simde_mm256_xor_si256(c2[5581],simde_mm256_xor_si256(c2[6024],simde_mm256_xor_si256(c2[5580],simde_mm256_xor_si256(c2[6023],simde_mm256_xor_si256(c2[6901],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[7365],simde_mm256_xor_si256(c2[7808],simde_mm256_xor_si256(c2[4724],simde_mm256_xor_si256(c2[5167],simde_mm256_xor_si256(c2[6483],simde_mm256_xor_si256(c2[6486],simde_mm256_xor_si256(c2[6926],simde_mm256_xor_si256(c2[9584],simde_mm256_xor_si256(c2[10027],simde_mm256_xor_si256(c2[11787],simde_mm256_xor_si256(c2[12220],simde_mm256_xor_si256(c2[13542],simde_mm256_xor_si256(c2[13545],simde_mm256_xor_si256(c2[13985],simde_mm256_xor_si256(c2[8725],simde_mm256_xor_si256(c2[9168],simde_mm256_xor_si256(c2[10929],simde_mm256_xor_si256(c2[11362],simde_mm256_xor_si256(c2[6964],simde_mm256_xor_si256(c2[6967],simde_mm256_xor_si256(c2[7407],simde_mm256_xor_si256(c2[8302],simde_mm256_xor_si256(c2[8745],simde_mm256_xor_si256(c2[3020],simde_mm256_xor_si256(c2[3463],simde_mm256_xor_si256(c2[3909],simde_mm256_xor_si256(c2[4342],simde_mm256_xor_si256(c2[14047],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[10080],simde_mm256_xor_si256(c2[10523],simde_mm256_xor_si256(c2[842],simde_mm256_xor_si256(c2[845],simde_mm256_xor_si256(c2[1285],simde_mm256_xor_si256(c2[1303],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[7021],simde_mm256_xor_si256(c2[7464],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[3067],c2[3507]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[320]=_mm256_xor_si256(c2[1329],_mm256_xor_si256(c2[3966],_mm256_xor_si256(c2[12768],_mm256_xor_si256(c2[13208],_mm256_xor_si256(c2[9243],_mm256_xor_si256(c2[9683],_mm256_xor_si256(c2[7926],_mm256_xor_si256(c2[3987],_mm256_xor_si256(c2[27],_mm256_xor_si256(c2[467],_mm256_xor_si256(c2[3983],_mm256_xor_si256(c2[4423],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[4009],_mm256_xor_si256(c2[4449],_mm256_xor_si256(c2[4908],_mm256_xor_si256(c2[9307],_mm256_xor_si256(c2[10187],_mm256_xor_si256(c2[10627],_mm256_xor_si256(c2[13724],_mm256_xor_si256(c2[7565],_mm256_xor_si256(c2[8005],_mm256_xor_si256(c2[13282],_mm256_xor_si256(c2[13722],_mm256_xor_si256(c2[4504],_mm256_xor_si256(c2[11981],_mm256_xor_si256(c2[13747],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[12443],_mm256_xor_si256(c2[12883],_mm256_xor_si256(c2[9808],_mm256_xor_si256(c2[10248],_mm256_xor_si256(c2[13780],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[1903],_mm256_xor_si256(c2[3662],_mm256_xor_si256(c2[4102],_mm256_xor_si256(c2[13366],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[1043],_mm256_xor_si256(c2[3689],_mm256_xor_si256(c2[4129],_mm256_xor_si256(c2[4147],_mm256_xor_si256(c2[4589],_mm256_xor_si256(c2[189],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[12521],_mm256_xor_si256(c2[1520],_mm256_xor_si256(c2[1960],_mm256_xor_si256(c2[8125],_mm256_xor_si256(c2[8565],_mm256_xor_si256(c2[7703],_mm256_xor_si256(c2[12100],_mm256_xor_si256(c2[3746],_mm256_xor_si256(c2[4186],_mm256_xor_si256(c2[13441],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[9924],_mm256_xor_si256(c2[10364],_mm256_xor_si256(c2[9920],_mm256_xor_si256(c2[13900],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[706],_mm256_xor_si256(c2[13021],_mm256_xor_si256(c2[13461],_mm256_xor_si256(c2[2929],_mm256_xor_si256(c2[9085],_mm256_xor_si256(c2[9525],_mm256_xor_si256(c2[6448],_mm256_xor_si256(c2[6888],_mm256_xor_si256(c2[6888],_mm256_xor_si256(c2[2066],_mm256_xor_si256(c2[2065],_mm256_xor_si256(c2[2946],_mm256_xor_si256(c2[3386],_mm256_xor_si256(c2[3840],_mm256_xor_si256(c2[1209],_mm256_xor_si256(c2[2528],_mm256_xor_si256(c2[2968],_mm256_xor_si256(c2[6069],_mm256_xor_si256(c2[7822],_mm256_xor_si256(c2[8262],_mm256_xor_si256(c2[9587],_mm256_xor_si256(c2[10027],_mm256_xor_si256(c2[5200],_mm256_xor_si256(c2[6964],_mm256_xor_si256(c2[7404],_mm256_xor_si256(c2[3009],_mm256_xor_si256(c2[3449],_mm256_xor_si256(c2[4787],_mm256_xor_si256(c2[13584],_mm256_xor_si256(c2[14023],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[10522],_mm256_xor_si256(c2[6125],_mm256_xor_si256(c2[6565],_mm256_xor_si256(c2[10966],_mm256_xor_si256(c2[11406],_mm256_xor_si256(c2[11867],_mm256_xor_si256(c2[3506],_mm256_xor_si256(c2[13188],c2[13628])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[320]=simde_mm256_xor_si256(c2[1329],simde_mm256_xor_si256(c2[3966],simde_mm256_xor_si256(c2[12768],simde_mm256_xor_si256(c2[13208],simde_mm256_xor_si256(c2[9243],simde_mm256_xor_si256(c2[9683],simde_mm256_xor_si256(c2[7926],simde_mm256_xor_si256(c2[3987],simde_mm256_xor_si256(c2[27],simde_mm256_xor_si256(c2[467],simde_mm256_xor_si256(c2[3983],simde_mm256_xor_si256(c2[4423],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[4009],simde_mm256_xor_si256(c2[4449],simde_mm256_xor_si256(c2[4908],simde_mm256_xor_si256(c2[9307],simde_mm256_xor_si256(c2[10187],simde_mm256_xor_si256(c2[10627],simde_mm256_xor_si256(c2[13724],simde_mm256_xor_si256(c2[7565],simde_mm256_xor_si256(c2[8005],simde_mm256_xor_si256(c2[13282],simde_mm256_xor_si256(c2[13722],simde_mm256_xor_si256(c2[4504],simde_mm256_xor_si256(c2[11981],simde_mm256_xor_si256(c2[13747],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[12443],simde_mm256_xor_si256(c2[12883],simde_mm256_xor_si256(c2[9808],simde_mm256_xor_si256(c2[10248],simde_mm256_xor_si256(c2[13780],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[1903],simde_mm256_xor_si256(c2[3662],simde_mm256_xor_si256(c2[4102],simde_mm256_xor_si256(c2[13366],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[1043],simde_mm256_xor_si256(c2[3689],simde_mm256_xor_si256(c2[4129],simde_mm256_xor_si256(c2[4147],simde_mm256_xor_si256(c2[4589],simde_mm256_xor_si256(c2[189],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[12521],simde_mm256_xor_si256(c2[1520],simde_mm256_xor_si256(c2[1960],simde_mm256_xor_si256(c2[8125],simde_mm256_xor_si256(c2[8565],simde_mm256_xor_si256(c2[7703],simde_mm256_xor_si256(c2[12100],simde_mm256_xor_si256(c2[3746],simde_mm256_xor_si256(c2[4186],simde_mm256_xor_si256(c2[13441],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[9924],simde_mm256_xor_si256(c2[10364],simde_mm256_xor_si256(c2[9920],simde_mm256_xor_si256(c2[13900],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[706],simde_mm256_xor_si256(c2[13021],simde_mm256_xor_si256(c2[13461],simde_mm256_xor_si256(c2[2929],simde_mm256_xor_si256(c2[9085],simde_mm256_xor_si256(c2[9525],simde_mm256_xor_si256(c2[6448],simde_mm256_xor_si256(c2[6888],simde_mm256_xor_si256(c2[6888],simde_mm256_xor_si256(c2[2066],simde_mm256_xor_si256(c2[2065],simde_mm256_xor_si256(c2[2946],simde_mm256_xor_si256(c2[3386],simde_mm256_xor_si256(c2[3840],simde_mm256_xor_si256(c2[1209],simde_mm256_xor_si256(c2[2528],simde_mm256_xor_si256(c2[2968],simde_mm256_xor_si256(c2[6069],simde_mm256_xor_si256(c2[7822],simde_mm256_xor_si256(c2[8262],simde_mm256_xor_si256(c2[9587],simde_mm256_xor_si256(c2[10027],simde_mm256_xor_si256(c2[5200],simde_mm256_xor_si256(c2[6964],simde_mm256_xor_si256(c2[7404],simde_mm256_xor_si256(c2[3009],simde_mm256_xor_si256(c2[3449],simde_mm256_xor_si256(c2[4787],simde_mm256_xor_si256(c2[13584],simde_mm256_xor_si256(c2[14023],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[10522],simde_mm256_xor_si256(c2[6125],simde_mm256_xor_si256(c2[6565],simde_mm256_xor_si256(c2[10966],simde_mm256_xor_si256(c2[11406],simde_mm256_xor_si256(c2[11867],simde_mm256_xor_si256(c2[3506],simde_mm256_xor_si256(c2[13188],c2[13628])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[330]=_mm256_xor_si256(c2[4425],_mm256_xor_si256(c2[8840],_mm256_xor_si256(c2[5504],c2[861])));
+     d2[330]=simde_mm256_xor_si256(c2[4425],simde_mm256_xor_si256(c2[8840],simde_mm256_xor_si256(c2[5504],c2[861])));
 
 //row: 34
-     d2[340]=_mm256_xor_si256(c2[11885],_mm256_xor_si256(c2[3669],_mm256_xor_si256(c2[2500],c2[5621])));
+     d2[340]=simde_mm256_xor_si256(c2[11885],simde_mm256_xor_si256(c2[3669],simde_mm256_xor_si256(c2[2500],c2[5621])));
 
 //row: 35
-     d2[350]=_mm256_xor_si256(c2[2641],_mm256_xor_si256(c2[5288],_mm256_xor_si256(c2[441],_mm256_xor_si256(c2[11005],_mm256_xor_si256(c2[5309],_mm256_xor_si256(c2[1789],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[6626],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[5761],_mm256_xor_si256(c2[6220],_mm256_xor_si256(c2[10629],_mm256_xor_si256(c2[11949],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[9327],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[5826],_mm256_xor_si256(c2[13303],_mm256_xor_si256(c2[1420],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[11560],_mm256_xor_si256(c2[13324],_mm256_xor_si256(c2[1023],_mm256_xor_si256(c2[3225],_mm256_xor_si256(c2[5424],_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[2365],_mm256_xor_si256(c2[5441],_mm256_xor_si256(c2[5469],_mm256_xor_si256(c2[5901],_mm256_xor_si256(c2[1941],_mm256_xor_si256(c2[13843],_mm256_xor_si256(c2[3282],_mm256_xor_si256(c2[9887],_mm256_xor_si256(c2[9025],_mm256_xor_si256(c2[13422],_mm256_xor_si256(c2[5508],_mm256_xor_si256(c2[684],_mm256_xor_si256(c2[4208],_mm256_xor_si256(c2[11686],_mm256_xor_si256(c2[10361],_mm256_xor_si256(c2[1143],_mm256_xor_si256(c2[2028],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[4241],_mm256_xor_si256(c2[10847],_mm256_xor_si256(c2[8200],_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[3387],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[5162],_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[4280],_mm256_xor_si256(c2[7381],_mm256_xor_si256(c2[9584],_mm256_xor_si256(c2[11349],_mm256_xor_si256(c2[6522],_mm256_xor_si256(c2[8726],_mm256_xor_si256(c2[4761],_mm256_xor_si256(c2[6109],_mm256_xor_si256(c2[827],_mm256_xor_si256(c2[1706],_mm256_xor_si256(c2[11844],_mm256_xor_si256(c2[7887],_mm256_xor_si256(c2[12728],_mm256_xor_si256(c2[13189],_mm256_xor_si256(c2[4828],c2[861])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[350]=simde_mm256_xor_si256(c2[2641],simde_mm256_xor_si256(c2[5288],simde_mm256_xor_si256(c2[441],simde_mm256_xor_si256(c2[11005],simde_mm256_xor_si256(c2[5309],simde_mm256_xor_si256(c2[1789],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[6626],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[5761],simde_mm256_xor_si256(c2[6220],simde_mm256_xor_si256(c2[10629],simde_mm256_xor_si256(c2[11949],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[9327],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[5826],simde_mm256_xor_si256(c2[13303],simde_mm256_xor_si256(c2[1420],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[11560],simde_mm256_xor_si256(c2[13324],simde_mm256_xor_si256(c2[1023],simde_mm256_xor_si256(c2[3225],simde_mm256_xor_si256(c2[5424],simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[2365],simde_mm256_xor_si256(c2[5441],simde_mm256_xor_si256(c2[5469],simde_mm256_xor_si256(c2[5901],simde_mm256_xor_si256(c2[1941],simde_mm256_xor_si256(c2[13843],simde_mm256_xor_si256(c2[3282],simde_mm256_xor_si256(c2[9887],simde_mm256_xor_si256(c2[9025],simde_mm256_xor_si256(c2[13422],simde_mm256_xor_si256(c2[5508],simde_mm256_xor_si256(c2[684],simde_mm256_xor_si256(c2[4208],simde_mm256_xor_si256(c2[11686],simde_mm256_xor_si256(c2[10361],simde_mm256_xor_si256(c2[1143],simde_mm256_xor_si256(c2[2028],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[4241],simde_mm256_xor_si256(c2[10847],simde_mm256_xor_si256(c2[8200],simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[3387],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[5162],simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[4280],simde_mm256_xor_si256(c2[7381],simde_mm256_xor_si256(c2[9584],simde_mm256_xor_si256(c2[11349],simde_mm256_xor_si256(c2[6522],simde_mm256_xor_si256(c2[8726],simde_mm256_xor_si256(c2[4761],simde_mm256_xor_si256(c2[6109],simde_mm256_xor_si256(c2[827],simde_mm256_xor_si256(c2[1706],simde_mm256_xor_si256(c2[11844],simde_mm256_xor_si256(c2[7887],simde_mm256_xor_si256(c2[12728],simde_mm256_xor_si256(c2[13189],simde_mm256_xor_si256(c2[4828],c2[861])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[360]=_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[8645],_mm256_xor_si256(c2[8225],c2[365])));
+     d2[360]=simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[8645],simde_mm256_xor_si256(c2[8225],c2[365])));
 
 //row: 37
-     d2[370]=_mm256_xor_si256(c2[12325],_mm256_xor_si256(c2[12765],_mm256_xor_si256(c2[1323],_mm256_xor_si256(c2[10565],_mm256_xor_si256(c2[7040],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[11903],_mm256_xor_si256(c2[1780],_mm256_xor_si256(c2[4429],_mm256_xor_si256(c2[11486],_mm256_xor_si256(c2[11926],_mm256_xor_si256(c2[11482],_mm256_xor_si256(c2[1806],_mm256_xor_si256(c2[1825],_mm256_xor_si256(c2[2265],_mm256_xor_si256(c2[6664],_mm256_xor_si256(c2[7984],_mm256_xor_si256(c2[11081],_mm256_xor_si256(c2[5362],_mm256_xor_si256(c2[11089],_mm256_xor_si256(c2[1421],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[9348],_mm256_xor_si256(c2[11544],_mm256_xor_si256(c2[11564],_mm256_xor_si256(c2[12004],_mm256_xor_si256(c2[10240],_mm256_xor_si256(c2[7605],_mm256_xor_si256(c2[11147],_mm256_xor_si256(c2[13349],_mm256_xor_si256(c2[1469],_mm256_xor_si256(c2[10723],_mm256_xor_si256(c2[12489],_mm256_xor_si256(c2[1486],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[1504],_mm256_xor_si256(c2[1946],_mm256_xor_si256(c2[12065],_mm256_xor_si256(c2[9448],_mm256_xor_si256(c2[9888],_mm256_xor_si256(c2[13406],_mm256_xor_si256(c2[5922],_mm256_xor_si256(c2[4620],_mm256_xor_si256(c2[5060],_mm256_xor_si256(c2[9467],_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[10368],_mm256_xor_si256(c2[10808],_mm256_xor_si256(c2[243],_mm256_xor_si256(c2[7721],_mm256_xor_si256(c2[10827],_mm256_xor_si256(c2[11267],_mm256_xor_si256(c2[12142],_mm256_xor_si256(c2[10828],_mm256_xor_si256(c2[6860],_mm256_xor_si256(c2[286],_mm256_xor_si256(c2[6882],_mm256_xor_si256(c2[4245],_mm256_xor_si256(c2[13062],_mm256_xor_si256(c2[13502],_mm256_xor_si256(c2[13501],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[767],_mm256_xor_si256(c2[1207],_mm256_xor_si256(c2[12645],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[3426],_mm256_xor_si256(c2[5629],_mm256_xor_si256(c2[7384],_mm256_xor_si256(c2[2127],_mm256_xor_si256(c2[2567],_mm256_xor_si256(c2[4761],_mm256_xor_si256(c2[806],_mm256_xor_si256(c2[1704],_mm256_xor_si256(c2[2144],_mm256_xor_si256(c2[10941],_mm256_xor_si256(c2[11820],_mm256_xor_si256(c2[7449],_mm256_xor_si256(c2[7889],_mm256_xor_si256(c2[3922],_mm256_xor_si256(c2[8763],_mm256_xor_si256(c2[8784],_mm256_xor_si256(c2[9224],_mm256_xor_si256(c2[863],c2[10985])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[370]=simde_mm256_xor_si256(c2[12325],simde_mm256_xor_si256(c2[12765],simde_mm256_xor_si256(c2[1323],simde_mm256_xor_si256(c2[10565],simde_mm256_xor_si256(c2[7040],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[11903],simde_mm256_xor_si256(c2[1780],simde_mm256_xor_si256(c2[4429],simde_mm256_xor_si256(c2[11486],simde_mm256_xor_si256(c2[11926],simde_mm256_xor_si256(c2[11482],simde_mm256_xor_si256(c2[1806],simde_mm256_xor_si256(c2[1825],simde_mm256_xor_si256(c2[2265],simde_mm256_xor_si256(c2[6664],simde_mm256_xor_si256(c2[7984],simde_mm256_xor_si256(c2[11081],simde_mm256_xor_si256(c2[5362],simde_mm256_xor_si256(c2[11089],simde_mm256_xor_si256(c2[1421],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[9348],simde_mm256_xor_si256(c2[11544],simde_mm256_xor_si256(c2[11564],simde_mm256_xor_si256(c2[12004],simde_mm256_xor_si256(c2[10240],simde_mm256_xor_si256(c2[7605],simde_mm256_xor_si256(c2[11147],simde_mm256_xor_si256(c2[13349],simde_mm256_xor_si256(c2[1469],simde_mm256_xor_si256(c2[10723],simde_mm256_xor_si256(c2[12489],simde_mm256_xor_si256(c2[1486],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[1504],simde_mm256_xor_si256(c2[1946],simde_mm256_xor_si256(c2[12065],simde_mm256_xor_si256(c2[9448],simde_mm256_xor_si256(c2[9888],simde_mm256_xor_si256(c2[13406],simde_mm256_xor_si256(c2[5922],simde_mm256_xor_si256(c2[4620],simde_mm256_xor_si256(c2[5060],simde_mm256_xor_si256(c2[9467],simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[10368],simde_mm256_xor_si256(c2[10808],simde_mm256_xor_si256(c2[243],simde_mm256_xor_si256(c2[7721],simde_mm256_xor_si256(c2[10827],simde_mm256_xor_si256(c2[11267],simde_mm256_xor_si256(c2[12142],simde_mm256_xor_si256(c2[10828],simde_mm256_xor_si256(c2[6860],simde_mm256_xor_si256(c2[286],simde_mm256_xor_si256(c2[6882],simde_mm256_xor_si256(c2[4245],simde_mm256_xor_si256(c2[13062],simde_mm256_xor_si256(c2[13502],simde_mm256_xor_si256(c2[13501],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[767],simde_mm256_xor_si256(c2[1207],simde_mm256_xor_si256(c2[12645],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[3426],simde_mm256_xor_si256(c2[5629],simde_mm256_xor_si256(c2[7384],simde_mm256_xor_si256(c2[2127],simde_mm256_xor_si256(c2[2567],simde_mm256_xor_si256(c2[4761],simde_mm256_xor_si256(c2[806],simde_mm256_xor_si256(c2[1704],simde_mm256_xor_si256(c2[2144],simde_mm256_xor_si256(c2[10941],simde_mm256_xor_si256(c2[11820],simde_mm256_xor_si256(c2[7449],simde_mm256_xor_si256(c2[7889],simde_mm256_xor_si256(c2[3922],simde_mm256_xor_si256(c2[8763],simde_mm256_xor_si256(c2[8784],simde_mm256_xor_si256(c2[9224],simde_mm256_xor_si256(c2[863],c2[10985])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[380]=_mm256_xor_si256(c2[10124],_mm256_xor_si256(c2[8545],_mm256_xor_si256(c2[202],c2[9485])));
+     d2[380]=simde_mm256_xor_si256(c2[10124],simde_mm256_xor_si256(c2[8545],simde_mm256_xor_si256(c2[202],c2[9485])));
 
 //row: 39
-     d2[390]=_mm256_xor_si256(c2[2663],_mm256_xor_si256(c2[5782],_mm256_xor_si256(c2[146],c2[7426])));
+     d2[390]=simde_mm256_xor_si256(c2[2663],simde_mm256_xor_si256(c2[5782],simde_mm256_xor_si256(c2[146],c2[7426])));
 
 //row: 40
-     d2[400]=_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[7202],c2[2546]));
+     d2[400]=simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[7202],c2[2546]));
 
 //row: 41
-     d2[410]=_mm256_xor_si256(c2[11464],_mm256_xor_si256(c2[6661],_mm256_xor_si256(c2[12503],c2[6966])));
+     d2[410]=simde_mm256_xor_si256(c2[11464],simde_mm256_xor_si256(c2[6661],simde_mm256_xor_si256(c2[12503],c2[6966])));
 
 //row: 42
-     d2[420]=_mm256_xor_si256(c2[3526],_mm256_xor_si256(c2[6163],_mm256_xor_si256(c2[886],_mm256_xor_si256(c2[1326],_mm256_xor_si256(c2[11440],_mm256_xor_si256(c2[11880],_mm256_xor_si256(c2[887],_mm256_xor_si256(c2[6184],_mm256_xor_si256(c2[2224],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[6180],_mm256_xor_si256(c2[6620],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[2243],_mm256_xor_si256(c2[6206],_mm256_xor_si256(c2[6646],_mm256_xor_si256(c2[7105],_mm256_xor_si256(c2[11504],_mm256_xor_si256(c2[12384],_mm256_xor_si256(c2[12824],_mm256_xor_si256(c2[1842],_mm256_xor_si256(c2[9762],_mm256_xor_si256(c2[10202],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[1840],_mm256_xor_si256(c2[522],_mm256_xor_si256(c2[6701],_mm256_xor_si256(c2[109],_mm256_xor_si256(c2[1865],_mm256_xor_si256(c2[2305],_mm256_xor_si256(c2[2765],_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[1001],_mm256_xor_si256(c2[12005],_mm256_xor_si256(c2[12445],_mm256_xor_si256(c2[1908],_mm256_xor_si256(c2[3660],_mm256_xor_si256(c2[4100],_mm256_xor_si256(c2[5869],_mm256_xor_si256(c2[6309],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[2800],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[5886],_mm256_xor_si256(c2[6326],_mm256_xor_si256(c2[6344],_mm256_xor_si256(c2[6786],_mm256_xor_si256(c2[2386],_mm256_xor_si256(c2[2826],_mm256_xor_si256(c2[649],_mm256_xor_si256(c2[3727],_mm256_xor_si256(c2[4167],_mm256_xor_si256(c2[10322],_mm256_xor_si256(c2[10762],_mm256_xor_si256(c2[9900],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[5943],_mm256_xor_si256(c2[6383],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[5083],_mm256_xor_si256(c2[12121],_mm256_xor_si256(c2[12561],_mm256_xor_si256(c2[2028],_mm256_xor_si256(c2[2463],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[1149],_mm256_xor_si256(c2[1589],_mm256_xor_si256(c2[5126],_mm256_xor_si256(c2[11282],_mm256_xor_si256(c2[11722],_mm256_xor_si256(c2[8645],_mm256_xor_si256(c2[9085],_mm256_xor_si256(c2[4263],_mm256_xor_si256(c2[4262],_mm256_xor_si256(c2[5143],_mm256_xor_si256(c2[5583],_mm256_xor_si256(c2[6047],_mm256_xor_si256(c2[3406],_mm256_xor_si256(c2[4725],_mm256_xor_si256(c2[5165],_mm256_xor_si256(c2[8266],_mm256_xor_si256(c2[10029],_mm256_xor_si256(c2[10469],_mm256_xor_si256(c2[11784],_mm256_xor_si256(c2[12224],_mm256_xor_si256(c2[7407],_mm256_xor_si256(c2[9161],_mm256_xor_si256(c2[9601],_mm256_xor_si256(c2[5206],_mm256_xor_si256(c2[5646],_mm256_xor_si256(c2[6984],_mm256_xor_si256(c2[1702],_mm256_xor_si256(c2[2141],_mm256_xor_si256(c2[2581],_mm256_xor_si256(c2[12729],_mm256_xor_si256(c2[8322],_mm256_xor_si256(c2[8762],_mm256_xor_si256(c2[13163],_mm256_xor_si256(c2[13603],_mm256_xor_si256(c2[14064],_mm256_xor_si256(c2[5703],_mm256_xor_si256(c2[1306],c2[1746]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[420]=simde_mm256_xor_si256(c2[3526],simde_mm256_xor_si256(c2[6163],simde_mm256_xor_si256(c2[886],simde_mm256_xor_si256(c2[1326],simde_mm256_xor_si256(c2[11440],simde_mm256_xor_si256(c2[11880],simde_mm256_xor_si256(c2[887],simde_mm256_xor_si256(c2[6184],simde_mm256_xor_si256(c2[2224],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[6180],simde_mm256_xor_si256(c2[6620],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[2243],simde_mm256_xor_si256(c2[6206],simde_mm256_xor_si256(c2[6646],simde_mm256_xor_si256(c2[7105],simde_mm256_xor_si256(c2[11504],simde_mm256_xor_si256(c2[12384],simde_mm256_xor_si256(c2[12824],simde_mm256_xor_si256(c2[1842],simde_mm256_xor_si256(c2[9762],simde_mm256_xor_si256(c2[10202],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[1840],simde_mm256_xor_si256(c2[522],simde_mm256_xor_si256(c2[6701],simde_mm256_xor_si256(c2[109],simde_mm256_xor_si256(c2[1865],simde_mm256_xor_si256(c2[2305],simde_mm256_xor_si256(c2[2765],simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[1001],simde_mm256_xor_si256(c2[12005],simde_mm256_xor_si256(c2[12445],simde_mm256_xor_si256(c2[1908],simde_mm256_xor_si256(c2[3660],simde_mm256_xor_si256(c2[4100],simde_mm256_xor_si256(c2[5869],simde_mm256_xor_si256(c2[6309],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[2800],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[5886],simde_mm256_xor_si256(c2[6326],simde_mm256_xor_si256(c2[6344],simde_mm256_xor_si256(c2[6786],simde_mm256_xor_si256(c2[2386],simde_mm256_xor_si256(c2[2826],simde_mm256_xor_si256(c2[649],simde_mm256_xor_si256(c2[3727],simde_mm256_xor_si256(c2[4167],simde_mm256_xor_si256(c2[10322],simde_mm256_xor_si256(c2[10762],simde_mm256_xor_si256(c2[9900],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[5943],simde_mm256_xor_si256(c2[6383],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[5083],simde_mm256_xor_si256(c2[12121],simde_mm256_xor_si256(c2[12561],simde_mm256_xor_si256(c2[2028],simde_mm256_xor_si256(c2[2463],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[1149],simde_mm256_xor_si256(c2[1589],simde_mm256_xor_si256(c2[5126],simde_mm256_xor_si256(c2[11282],simde_mm256_xor_si256(c2[11722],simde_mm256_xor_si256(c2[8645],simde_mm256_xor_si256(c2[9085],simde_mm256_xor_si256(c2[4263],simde_mm256_xor_si256(c2[4262],simde_mm256_xor_si256(c2[5143],simde_mm256_xor_si256(c2[5583],simde_mm256_xor_si256(c2[6047],simde_mm256_xor_si256(c2[3406],simde_mm256_xor_si256(c2[4725],simde_mm256_xor_si256(c2[5165],simde_mm256_xor_si256(c2[8266],simde_mm256_xor_si256(c2[10029],simde_mm256_xor_si256(c2[10469],simde_mm256_xor_si256(c2[11784],simde_mm256_xor_si256(c2[12224],simde_mm256_xor_si256(c2[7407],simde_mm256_xor_si256(c2[9161],simde_mm256_xor_si256(c2[9601],simde_mm256_xor_si256(c2[5206],simde_mm256_xor_si256(c2[5646],simde_mm256_xor_si256(c2[6984],simde_mm256_xor_si256(c2[1702],simde_mm256_xor_si256(c2[2141],simde_mm256_xor_si256(c2[2581],simde_mm256_xor_si256(c2[12729],simde_mm256_xor_si256(c2[8322],simde_mm256_xor_si256(c2[8762],simde_mm256_xor_si256(c2[13163],simde_mm256_xor_si256(c2[13603],simde_mm256_xor_si256(c2[14064],simde_mm256_xor_si256(c2[5703],simde_mm256_xor_si256(c2[1306],c2[1746]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 43
-     d2[430]=_mm256_xor_si256(c2[7045],_mm256_xor_si256(c2[9682],_mm256_xor_si256(c2[4845],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[9703],_mm256_xor_si256(c2[6183],_mm256_xor_si256(c2[9709],_mm256_xor_si256(c2[10149],_mm256_xor_si256(c2[1787],_mm256_xor_si256(c2[6206],_mm256_xor_si256(c2[5762],_mm256_xor_si256(c2[10165],_mm256_xor_si256(c2[10624],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[1824],_mm256_xor_si256(c2[2264],_mm256_xor_si256(c2[5361],_mm256_xor_si256(c2[13721],_mm256_xor_si256(c2[4929],_mm256_xor_si256(c2[5369],_mm256_xor_si256(c2[10220],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[5824],_mm256_xor_si256(c2[6284],_mm256_xor_si256(c2[4520],_mm256_xor_si256(c2[1445],_mm256_xor_si256(c2[1885],_mm256_xor_si256(c2[5427],_mm256_xor_si256(c2[7629],_mm256_xor_si256(c2[9388],_mm256_xor_si256(c2[9828],_mm256_xor_si256(c2[5003],_mm256_xor_si256(c2[6769],_mm256_xor_si256(c2[9405],_mm256_xor_si256(c2[9845],_mm256_xor_si256(c2[9863],_mm256_xor_si256(c2[10305],_mm256_xor_si256(c2[6345],_mm256_xor_si256(c2[4168],_mm256_xor_si256(c2[7686],_mm256_xor_si256(c2[13841],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[13429],_mm256_xor_si256(c2[3747],_mm256_xor_si256(c2[9462],_mm256_xor_si256(c2[9902],_mm256_xor_si256(c2[5088],_mm256_xor_si256(c2[8602],_mm256_xor_si256(c2[1561],_mm256_xor_si256(c2[2001],_mm256_xor_si256(c2[5547],_mm256_xor_si256(c2[6422],_mm256_xor_si256(c2[4668],_mm256_xor_si256(c2[5108],_mm256_xor_si256(c2[8645],_mm256_xor_si256(c2[1162],_mm256_xor_si256(c2[12164],_mm256_xor_si256(c2[12604],_mm256_xor_si256(c2[7782],_mm256_xor_si256(c2[7781],_mm256_xor_si256(c2[9102],_mm256_xor_si256(c2[9566],_mm256_xor_si256(c2[6925],_mm256_xor_si256(c2[8244],_mm256_xor_si256(c2[8684],_mm256_xor_si256(c2[2522],_mm256_xor_si256(c2[11785],_mm256_xor_si256(c2[13988],_mm256_xor_si256(c2[1224],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[10926],_mm256_xor_si256(c2[13120],_mm256_xor_si256(c2[8725],_mm256_xor_si256(c2[9165],_mm256_xor_si256(c2[7405],_mm256_xor_si256(c2[10503],_mm256_xor_si256(c2[5221],_mm256_xor_si256(c2[6100],_mm256_xor_si256(c2[2169],_mm256_xor_si256(c2[12281],_mm256_xor_si256(c2[2603],_mm256_xor_si256(c2[3043],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[9222],_mm256_xor_si256(c2[4825],c2[5265]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[430]=simde_mm256_xor_si256(c2[7045],simde_mm256_xor_si256(c2[9682],simde_mm256_xor_si256(c2[4845],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[9703],simde_mm256_xor_si256(c2[6183],simde_mm256_xor_si256(c2[9709],simde_mm256_xor_si256(c2[10149],simde_mm256_xor_si256(c2[1787],simde_mm256_xor_si256(c2[6206],simde_mm256_xor_si256(c2[5762],simde_mm256_xor_si256(c2[10165],simde_mm256_xor_si256(c2[10624],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[1824],simde_mm256_xor_si256(c2[2264],simde_mm256_xor_si256(c2[5361],simde_mm256_xor_si256(c2[13721],simde_mm256_xor_si256(c2[4929],simde_mm256_xor_si256(c2[5369],simde_mm256_xor_si256(c2[10220],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[5824],simde_mm256_xor_si256(c2[6284],simde_mm256_xor_si256(c2[4520],simde_mm256_xor_si256(c2[1445],simde_mm256_xor_si256(c2[1885],simde_mm256_xor_si256(c2[5427],simde_mm256_xor_si256(c2[7629],simde_mm256_xor_si256(c2[9388],simde_mm256_xor_si256(c2[9828],simde_mm256_xor_si256(c2[5003],simde_mm256_xor_si256(c2[6769],simde_mm256_xor_si256(c2[9405],simde_mm256_xor_si256(c2[9845],simde_mm256_xor_si256(c2[9863],simde_mm256_xor_si256(c2[10305],simde_mm256_xor_si256(c2[6345],simde_mm256_xor_si256(c2[4168],simde_mm256_xor_si256(c2[7686],simde_mm256_xor_si256(c2[13841],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[13429],simde_mm256_xor_si256(c2[3747],simde_mm256_xor_si256(c2[9462],simde_mm256_xor_si256(c2[9902],simde_mm256_xor_si256(c2[5088],simde_mm256_xor_si256(c2[8602],simde_mm256_xor_si256(c2[1561],simde_mm256_xor_si256(c2[2001],simde_mm256_xor_si256(c2[5547],simde_mm256_xor_si256(c2[6422],simde_mm256_xor_si256(c2[4668],simde_mm256_xor_si256(c2[5108],simde_mm256_xor_si256(c2[8645],simde_mm256_xor_si256(c2[1162],simde_mm256_xor_si256(c2[12164],simde_mm256_xor_si256(c2[12604],simde_mm256_xor_si256(c2[7782],simde_mm256_xor_si256(c2[7781],simde_mm256_xor_si256(c2[9102],simde_mm256_xor_si256(c2[9566],simde_mm256_xor_si256(c2[6925],simde_mm256_xor_si256(c2[8244],simde_mm256_xor_si256(c2[8684],simde_mm256_xor_si256(c2[2522],simde_mm256_xor_si256(c2[11785],simde_mm256_xor_si256(c2[13988],simde_mm256_xor_si256(c2[1224],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[10926],simde_mm256_xor_si256(c2[13120],simde_mm256_xor_si256(c2[8725],simde_mm256_xor_si256(c2[9165],simde_mm256_xor_si256(c2[7405],simde_mm256_xor_si256(c2[10503],simde_mm256_xor_si256(c2[5221],simde_mm256_xor_si256(c2[6100],simde_mm256_xor_si256(c2[2169],simde_mm256_xor_si256(c2[12281],simde_mm256_xor_si256(c2[2603],simde_mm256_xor_si256(c2[3043],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[9222],simde_mm256_xor_si256(c2[4825],c2[5265]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 44
-     d2[440]=_mm256_xor_si256(c2[5720],_mm256_xor_si256(c2[8367],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[4407],_mm256_xor_si256(c2[8388],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[8824],_mm256_xor_si256(c2[4881],_mm256_xor_si256(c2[4447],_mm256_xor_si256(c2[8840],_mm256_xor_si256(c2[9309],_mm256_xor_si256(c2[13708],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[4046],_mm256_xor_si256(c2[12406],_mm256_xor_si256(c2[4044],_mm256_xor_si256(c2[8905],_mm256_xor_si256(c2[2303],_mm256_xor_si256(c2[4509],_mm256_xor_si256(c2[4969],_mm256_xor_si256(c2[3205],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[4102],_mm256_xor_si256(c2[6304],_mm256_xor_si256(c2[8503],_mm256_xor_si256(c2[1467],_mm256_xor_si256(c2[3688],_mm256_xor_si256(c2[5444],_mm256_xor_si256(c2[8520],_mm256_xor_si256(c2[8548],_mm256_xor_si256(c2[8980],_mm256_xor_si256(c2[5020],_mm256_xor_si256(c2[1508],_mm256_xor_si256(c2[2843],_mm256_xor_si256(c2[6361],_mm256_xor_si256(c2[12966],_mm256_xor_si256(c2[12104],_mm256_xor_si256(c2[2422],_mm256_xor_si256(c2[8587],_mm256_xor_si256(c2[3763],_mm256_xor_si256(c2[7287],_mm256_xor_si256(c2[686],_mm256_xor_si256(c2[4222],_mm256_xor_si256(c2[5107],_mm256_xor_si256(c2[3783],_mm256_xor_si256(c2[7320],_mm256_xor_si256(c2[13926],_mm256_xor_si256(c2[11289],_mm256_xor_si256(c2[6467],_mm256_xor_si256(c2[6466],_mm256_xor_si256(c2[7787],_mm256_xor_si256(c2[8241],_mm256_xor_si256(c2[5600],_mm256_xor_si256(c2[7369],_mm256_xor_si256(c2[10460],_mm256_xor_si256(c2[12663],_mm256_xor_si256(c2[349],_mm256_xor_si256(c2[9601],_mm256_xor_si256(c2[11805],_mm256_xor_si256(c2[7840],_mm256_xor_si256(c2[9188],_mm256_xor_si256(c2[3906],_mm256_xor_si256(c2[4785],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[10966],_mm256_xor_si256(c2[1728],_mm256_xor_si256(c2[2189],_mm256_xor_si256(c2[7907],c2[3940])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[440]=simde_mm256_xor_si256(c2[5720],simde_mm256_xor_si256(c2[8367],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[4407],simde_mm256_xor_si256(c2[8388],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[8824],simde_mm256_xor_si256(c2[4881],simde_mm256_xor_si256(c2[4447],simde_mm256_xor_si256(c2[8840],simde_mm256_xor_si256(c2[9309],simde_mm256_xor_si256(c2[13708],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[4046],simde_mm256_xor_si256(c2[12406],simde_mm256_xor_si256(c2[4044],simde_mm256_xor_si256(c2[8905],simde_mm256_xor_si256(c2[2303],simde_mm256_xor_si256(c2[4509],simde_mm256_xor_si256(c2[4969],simde_mm256_xor_si256(c2[3205],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[4102],simde_mm256_xor_si256(c2[6304],simde_mm256_xor_si256(c2[8503],simde_mm256_xor_si256(c2[1467],simde_mm256_xor_si256(c2[3688],simde_mm256_xor_si256(c2[5444],simde_mm256_xor_si256(c2[8520],simde_mm256_xor_si256(c2[8548],simde_mm256_xor_si256(c2[8980],simde_mm256_xor_si256(c2[5020],simde_mm256_xor_si256(c2[1508],simde_mm256_xor_si256(c2[2843],simde_mm256_xor_si256(c2[6361],simde_mm256_xor_si256(c2[12966],simde_mm256_xor_si256(c2[12104],simde_mm256_xor_si256(c2[2422],simde_mm256_xor_si256(c2[8587],simde_mm256_xor_si256(c2[3763],simde_mm256_xor_si256(c2[7287],simde_mm256_xor_si256(c2[686],simde_mm256_xor_si256(c2[4222],simde_mm256_xor_si256(c2[5107],simde_mm256_xor_si256(c2[3783],simde_mm256_xor_si256(c2[7320],simde_mm256_xor_si256(c2[13926],simde_mm256_xor_si256(c2[11289],simde_mm256_xor_si256(c2[6467],simde_mm256_xor_si256(c2[6466],simde_mm256_xor_si256(c2[7787],simde_mm256_xor_si256(c2[8241],simde_mm256_xor_si256(c2[5600],simde_mm256_xor_si256(c2[7369],simde_mm256_xor_si256(c2[10460],simde_mm256_xor_si256(c2[12663],simde_mm256_xor_si256(c2[349],simde_mm256_xor_si256(c2[9601],simde_mm256_xor_si256(c2[11805],simde_mm256_xor_si256(c2[7840],simde_mm256_xor_si256(c2[9188],simde_mm256_xor_si256(c2[3906],simde_mm256_xor_si256(c2[4785],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[10966],simde_mm256_xor_si256(c2[1728],simde_mm256_xor_si256(c2[2189],simde_mm256_xor_si256(c2[7907],c2[3940])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 45
-     d2[450]=_mm256_xor_si256(c2[2223],_mm256_xor_si256(c2[1887],c2[13403]));
+     d2[450]=simde_mm256_xor_si256(c2[2223],simde_mm256_xor_si256(c2[1887],c2[13403]));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc352_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc352_byte.c
index 022f43aa4f1dc2c2aa59a3a24b2ecd7a60168438..271070b71972499d7527a91afea584119452672e 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc352_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc352_byte.c
@@ -11,141 +11,141 @@ static inline void ldpc352_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[2913],_mm256_xor_si256(c2[6294],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[10673],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[14544],_mm256_xor_si256(c2[3437],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[10214],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[8295],_mm256_xor_si256(c2[8810],_mm256_xor_si256(c2[6873],_mm256_xor_si256(c2[11228],_mm256_xor_si256(c2[7376],_mm256_xor_si256(c2[5926],_mm256_xor_si256(c2[7373],_mm256_xor_si256(c2[2557],_mm256_xor_si256(c2[4496],_mm256_xor_si256(c2[5460],_mm256_xor_si256(c2[7901],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[6961],_mm256_xor_si256(c2[6475],_mm256_xor_si256(c2[13246],_mm256_xor_si256(c2[9401],_mm256_xor_si256(c2[8911],_mm256_xor_si256(c2[3111],_mm256_xor_si256(c2[12811],_mm256_xor_si256(c2[15227],_mm256_xor_si256(c2[5063],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[3635],_mm256_xor_si256(c2[10897],_mm256_xor_si256(c2[9470],_mm256_xor_si256(c2[14307],_mm256_xor_si256(c2[12374],_mm256_xor_si256(c2[4648],_mm256_xor_si256(c2[7065],_mm256_xor_si256(c2[6586],_mm256_xor_si256(c2[7094],_mm256_xor_si256(c2[14836],_mm256_xor_si256(c2[12413],_mm256_xor_si256(c2[10495],_mm256_xor_si256(c2[9049],_mm256_xor_si256(c2[14369],_mm256_xor_si256(c2[13430],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[12459],_mm256_xor_si256(c2[2798],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[11512],_mm256_xor_si256(c2[8149],_mm256_xor_si256(c2[1853],_mm256_xor_si256(c2[7660],_mm256_xor_si256(c2[3811],_mm256_xor_si256(c2[4774],_mm256_xor_si256(c2[9136],_mm256_xor_si256(c2[7701],_mm256_xor_si256(c2[3828],_mm256_xor_si256(c2[12543],_mm256_xor_si256(c2[14019],_mm256_xor_si256(c2[7247],c2[12565]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[2913],simde_mm256_xor_si256(c2[6294],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[10673],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[14544],simde_mm256_xor_si256(c2[3437],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[10214],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[8295],simde_mm256_xor_si256(c2[8810],simde_mm256_xor_si256(c2[6873],simde_mm256_xor_si256(c2[11228],simde_mm256_xor_si256(c2[7376],simde_mm256_xor_si256(c2[5926],simde_mm256_xor_si256(c2[7373],simde_mm256_xor_si256(c2[2557],simde_mm256_xor_si256(c2[4496],simde_mm256_xor_si256(c2[5460],simde_mm256_xor_si256(c2[7901],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[6961],simde_mm256_xor_si256(c2[6475],simde_mm256_xor_si256(c2[13246],simde_mm256_xor_si256(c2[9401],simde_mm256_xor_si256(c2[8911],simde_mm256_xor_si256(c2[3111],simde_mm256_xor_si256(c2[12811],simde_mm256_xor_si256(c2[15227],simde_mm256_xor_si256(c2[5063],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[3635],simde_mm256_xor_si256(c2[10897],simde_mm256_xor_si256(c2[9470],simde_mm256_xor_si256(c2[14307],simde_mm256_xor_si256(c2[12374],simde_mm256_xor_si256(c2[4648],simde_mm256_xor_si256(c2[7065],simde_mm256_xor_si256(c2[6586],simde_mm256_xor_si256(c2[7094],simde_mm256_xor_si256(c2[14836],simde_mm256_xor_si256(c2[12413],simde_mm256_xor_si256(c2[10495],simde_mm256_xor_si256(c2[9049],simde_mm256_xor_si256(c2[14369],simde_mm256_xor_si256(c2[13430],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[12459],simde_mm256_xor_si256(c2[2798],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[11512],simde_mm256_xor_si256(c2[8149],simde_mm256_xor_si256(c2[1853],simde_mm256_xor_si256(c2[7660],simde_mm256_xor_si256(c2[3811],simde_mm256_xor_si256(c2[4774],simde_mm256_xor_si256(c2[9136],simde_mm256_xor_si256(c2[7701],simde_mm256_xor_si256(c2[3828],simde_mm256_xor_si256(c2[12543],simde_mm256_xor_si256(c2[14019],simde_mm256_xor_si256(c2[7247],c2[12565]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 1
-     d2[11]=_mm256_xor_si256(c2[2913],_mm256_xor_si256(c2[3397],_mm256_xor_si256(c2[6778],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[10673],_mm256_xor_si256(c2[11157],_mm256_xor_si256(c2[1959],_mm256_xor_si256(c2[15028],_mm256_xor_si256(c2[3437],_mm256_xor_si256(c2[3921],_mm256_xor_si256(c2[1019],_mm256_xor_si256(c2[10698],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[5400],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[8779],_mm256_xor_si256(c2[9294],_mm256_xor_si256(c2[7357],_mm256_xor_si256(c2[11712],_mm256_xor_si256(c2[7376],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[6410],_mm256_xor_si256(c2[7857],_mm256_xor_si256(c2[2557],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[4980],_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[8385],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[3547],_mm256_xor_si256(c2[7445],_mm256_xor_si256(c2[6959],_mm256_xor_si256(c2[13730],_mm256_xor_si256(c2[9401],_mm256_xor_si256(c2[9885],_mm256_xor_si256(c2[9395],_mm256_xor_si256(c2[3595],_mm256_xor_si256(c2[12811],_mm256_xor_si256(c2[13295],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[5547],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[4119],_mm256_xor_si256(c2[11381],_mm256_xor_si256(c2[9470],_mm256_xor_si256(c2[9954],_mm256_xor_si256(c2[14791],_mm256_xor_si256(c2[12858],_mm256_xor_si256(c2[4648],_mm256_xor_si256(c2[5132],_mm256_xor_si256(c2[7549],_mm256_xor_si256(c2[7070],_mm256_xor_si256(c2[7578],_mm256_xor_si256(c2[15320],_mm256_xor_si256(c2[12897],_mm256_xor_si256(c2[10495],_mm256_xor_si256(c2[10979],_mm256_xor_si256(c2[9533],_mm256_xor_si256(c2[14853],_mm256_xor_si256(c2[13430],_mm256_xor_si256(c2[13914],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[12943],_mm256_xor_si256(c2[3282],_mm256_xor_si256(c2[2314],_mm256_xor_si256(c2[11996],_mm256_xor_si256(c2[8149],_mm256_xor_si256(c2[8633],_mm256_xor_si256(c2[2337],_mm256_xor_si256(c2[8144],_mm256_xor_si256(c2[3811],_mm256_xor_si256(c2[4295],_mm256_xor_si256(c2[5258],_mm256_xor_si256(c2[9620],_mm256_xor_si256(c2[7701],_mm256_xor_si256(c2[8185],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[13027],_mm256_xor_si256(c2[14019],_mm256_xor_si256(c2[14503],_mm256_xor_si256(c2[7731],c2[13049])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[11]=simde_mm256_xor_si256(c2[2913],simde_mm256_xor_si256(c2[3397],simde_mm256_xor_si256(c2[6778],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[10673],simde_mm256_xor_si256(c2[11157],simde_mm256_xor_si256(c2[1959],simde_mm256_xor_si256(c2[15028],simde_mm256_xor_si256(c2[3437],simde_mm256_xor_si256(c2[3921],simde_mm256_xor_si256(c2[1019],simde_mm256_xor_si256(c2[10698],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[5400],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[8779],simde_mm256_xor_si256(c2[9294],simde_mm256_xor_si256(c2[7357],simde_mm256_xor_si256(c2[11712],simde_mm256_xor_si256(c2[7376],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[6410],simde_mm256_xor_si256(c2[7857],simde_mm256_xor_si256(c2[2557],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[4980],simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[8385],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[3547],simde_mm256_xor_si256(c2[7445],simde_mm256_xor_si256(c2[6959],simde_mm256_xor_si256(c2[13730],simde_mm256_xor_si256(c2[9401],simde_mm256_xor_si256(c2[9885],simde_mm256_xor_si256(c2[9395],simde_mm256_xor_si256(c2[3595],simde_mm256_xor_si256(c2[12811],simde_mm256_xor_si256(c2[13295],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[5547],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[4119],simde_mm256_xor_si256(c2[11381],simde_mm256_xor_si256(c2[9470],simde_mm256_xor_si256(c2[9954],simde_mm256_xor_si256(c2[14791],simde_mm256_xor_si256(c2[12858],simde_mm256_xor_si256(c2[4648],simde_mm256_xor_si256(c2[5132],simde_mm256_xor_si256(c2[7549],simde_mm256_xor_si256(c2[7070],simde_mm256_xor_si256(c2[7578],simde_mm256_xor_si256(c2[15320],simde_mm256_xor_si256(c2[12897],simde_mm256_xor_si256(c2[10495],simde_mm256_xor_si256(c2[10979],simde_mm256_xor_si256(c2[9533],simde_mm256_xor_si256(c2[14853],simde_mm256_xor_si256(c2[13430],simde_mm256_xor_si256(c2[13914],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[12943],simde_mm256_xor_si256(c2[3282],simde_mm256_xor_si256(c2[2314],simde_mm256_xor_si256(c2[11996],simde_mm256_xor_si256(c2[8149],simde_mm256_xor_si256(c2[8633],simde_mm256_xor_si256(c2[2337],simde_mm256_xor_si256(c2[8144],simde_mm256_xor_si256(c2[3811],simde_mm256_xor_si256(c2[4295],simde_mm256_xor_si256(c2[5258],simde_mm256_xor_si256(c2[9620],simde_mm256_xor_si256(c2[7701],simde_mm256_xor_si256(c2[8185],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[13027],simde_mm256_xor_si256(c2[14019],simde_mm256_xor_si256(c2[14503],simde_mm256_xor_si256(c2[7731],c2[13049])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[22]=_mm256_xor_si256(c2[3397],_mm256_xor_si256(c2[6778],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[11157],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[1959],_mm256_xor_si256(c2[14544],_mm256_xor_si256(c2[15028],_mm256_xor_si256(c2[3921],_mm256_xor_si256(c2[1019],_mm256_xor_si256(c2[10214],_mm256_xor_si256(c2[10698],_mm256_xor_si256(c2[5400],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[8295],_mm256_xor_si256(c2[8779],_mm256_xor_si256(c2[9294],_mm256_xor_si256(c2[6873],_mm256_xor_si256(c2[7357],_mm256_xor_si256(c2[11228],_mm256_xor_si256(c2[11712],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[6410],_mm256_xor_si256(c2[7373],_mm256_xor_si256(c2[7857],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[4496],_mm256_xor_si256(c2[4980],_mm256_xor_si256(c2[5460],_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[8385],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[3547],_mm256_xor_si256(c2[7445],_mm256_xor_si256(c2[6475],_mm256_xor_si256(c2[6959],_mm256_xor_si256(c2[13246],_mm256_xor_si256(c2[13730],_mm256_xor_si256(c2[9885],_mm256_xor_si256(c2[9395],_mm256_xor_si256(c2[3111],_mm256_xor_si256(c2[3595],_mm256_xor_si256(c2[13295],_mm256_xor_si256(c2[15227],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[5063],_mm256_xor_si256(c2[5547],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[4119],_mm256_xor_si256(c2[10897],_mm256_xor_si256(c2[11381],_mm256_xor_si256(c2[9954],_mm256_xor_si256(c2[14791],_mm256_xor_si256(c2[12374],_mm256_xor_si256(c2[12858],_mm256_xor_si256(c2[5132],_mm256_xor_si256(c2[7065],_mm256_xor_si256(c2[7549],_mm256_xor_si256(c2[6586],_mm256_xor_si256(c2[7070],_mm256_xor_si256(c2[7578],_mm256_xor_si256(c2[14836],_mm256_xor_si256(c2[15320],_mm256_xor_si256(c2[12413],_mm256_xor_si256(c2[12897],_mm256_xor_si256(c2[10979],_mm256_xor_si256(c2[9533],_mm256_xor_si256(c2[14369],_mm256_xor_si256(c2[14853],_mm256_xor_si256(c2[13914],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[12459],_mm256_xor_si256(c2[12943],_mm256_xor_si256(c2[3282],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[2314],_mm256_xor_si256(c2[11512],_mm256_xor_si256(c2[11996],_mm256_xor_si256(c2[8633],_mm256_xor_si256(c2[1853],_mm256_xor_si256(c2[2337],_mm256_xor_si256(c2[7660],_mm256_xor_si256(c2[8144],_mm256_xor_si256(c2[4295],_mm256_xor_si256(c2[5258],_mm256_xor_si256(c2[9136],_mm256_xor_si256(c2[9620],_mm256_xor_si256(c2[8185],_mm256_xor_si256(c2[3828],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[12543],_mm256_xor_si256(c2[13027],_mm256_xor_si256(c2[14503],_mm256_xor_si256(c2[7731],_mm256_xor_si256(c2[12565],c2[13049]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[22]=simde_mm256_xor_si256(c2[3397],simde_mm256_xor_si256(c2[6778],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[11157],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[1959],simde_mm256_xor_si256(c2[14544],simde_mm256_xor_si256(c2[15028],simde_mm256_xor_si256(c2[3921],simde_mm256_xor_si256(c2[1019],simde_mm256_xor_si256(c2[10214],simde_mm256_xor_si256(c2[10698],simde_mm256_xor_si256(c2[5400],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[8295],simde_mm256_xor_si256(c2[8779],simde_mm256_xor_si256(c2[9294],simde_mm256_xor_si256(c2[6873],simde_mm256_xor_si256(c2[7357],simde_mm256_xor_si256(c2[11228],simde_mm256_xor_si256(c2[11712],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[6410],simde_mm256_xor_si256(c2[7373],simde_mm256_xor_si256(c2[7857],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[4496],simde_mm256_xor_si256(c2[4980],simde_mm256_xor_si256(c2[5460],simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[8385],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[3547],simde_mm256_xor_si256(c2[7445],simde_mm256_xor_si256(c2[6475],simde_mm256_xor_si256(c2[6959],simde_mm256_xor_si256(c2[13246],simde_mm256_xor_si256(c2[13730],simde_mm256_xor_si256(c2[9885],simde_mm256_xor_si256(c2[9395],simde_mm256_xor_si256(c2[3111],simde_mm256_xor_si256(c2[3595],simde_mm256_xor_si256(c2[13295],simde_mm256_xor_si256(c2[15227],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[5063],simde_mm256_xor_si256(c2[5547],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[4119],simde_mm256_xor_si256(c2[10897],simde_mm256_xor_si256(c2[11381],simde_mm256_xor_si256(c2[9954],simde_mm256_xor_si256(c2[14791],simde_mm256_xor_si256(c2[12374],simde_mm256_xor_si256(c2[12858],simde_mm256_xor_si256(c2[5132],simde_mm256_xor_si256(c2[7065],simde_mm256_xor_si256(c2[7549],simde_mm256_xor_si256(c2[6586],simde_mm256_xor_si256(c2[7070],simde_mm256_xor_si256(c2[7578],simde_mm256_xor_si256(c2[14836],simde_mm256_xor_si256(c2[15320],simde_mm256_xor_si256(c2[12413],simde_mm256_xor_si256(c2[12897],simde_mm256_xor_si256(c2[10979],simde_mm256_xor_si256(c2[9533],simde_mm256_xor_si256(c2[14369],simde_mm256_xor_si256(c2[14853],simde_mm256_xor_si256(c2[13914],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[12459],simde_mm256_xor_si256(c2[12943],simde_mm256_xor_si256(c2[3282],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[2314],simde_mm256_xor_si256(c2[11512],simde_mm256_xor_si256(c2[11996],simde_mm256_xor_si256(c2[8633],simde_mm256_xor_si256(c2[1853],simde_mm256_xor_si256(c2[2337],simde_mm256_xor_si256(c2[7660],simde_mm256_xor_si256(c2[8144],simde_mm256_xor_si256(c2[4295],simde_mm256_xor_si256(c2[5258],simde_mm256_xor_si256(c2[9136],simde_mm256_xor_si256(c2[9620],simde_mm256_xor_si256(c2[8185],simde_mm256_xor_si256(c2[3828],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[12543],simde_mm256_xor_si256(c2[13027],simde_mm256_xor_si256(c2[14503],simde_mm256_xor_si256(c2[7731],simde_mm256_xor_si256(c2[12565],c2[13049]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[33]=_mm256_xor_si256(c2[3397],_mm256_xor_si256(c2[6778],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[11157],_mm256_xor_si256(c2[1959],_mm256_xor_si256(c2[14544],_mm256_xor_si256(c2[15028],_mm256_xor_si256(c2[3921],_mm256_xor_si256(c2[1019],_mm256_xor_si256(c2[10698],_mm256_xor_si256(c2[5400],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[8295],_mm256_xor_si256(c2[8779],_mm256_xor_si256(c2[9294],_mm256_xor_si256(c2[7357],_mm256_xor_si256(c2[11228],_mm256_xor_si256(c2[11712],_mm256_xor_si256(c2[7860],_mm256_xor_si256(c2[6410],_mm256_xor_si256(c2[7857],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[4980],_mm256_xor_si256(c2[5460],_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[8385],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[3547],_mm256_xor_si256(c2[7445],_mm256_xor_si256(c2[6959],_mm256_xor_si256(c2[13246],_mm256_xor_si256(c2[13730],_mm256_xor_si256(c2[9885],_mm256_xor_si256(c2[9395],_mm256_xor_si256(c2[3595],_mm256_xor_si256(c2[13295],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[5063],_mm256_xor_si256(c2[5547],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[4119],_mm256_xor_si256(c2[10897],_mm256_xor_si256(c2[11381],_mm256_xor_si256(c2[9954],_mm256_xor_si256(c2[14791],_mm256_xor_si256(c2[12374],_mm256_xor_si256(c2[12858],_mm256_xor_si256(c2[5132],_mm256_xor_si256(c2[7549],_mm256_xor_si256(c2[6586],_mm256_xor_si256(c2[7070],_mm256_xor_si256(c2[7578],_mm256_xor_si256(c2[15320],_mm256_xor_si256(c2[12413],_mm256_xor_si256(c2[12897],_mm256_xor_si256(c2[10979],_mm256_xor_si256(c2[9533],_mm256_xor_si256(c2[14853],_mm256_xor_si256(c2[13914],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[12459],_mm256_xor_si256(c2[12943],_mm256_xor_si256(c2[3282],_mm256_xor_si256(c2[2314],_mm256_xor_si256(c2[11512],_mm256_xor_si256(c2[11996],_mm256_xor_si256(c2[8633],_mm256_xor_si256(c2[2337],_mm256_xor_si256(c2[7660],_mm256_xor_si256(c2[8144],_mm256_xor_si256(c2[4295],_mm256_xor_si256(c2[5258],_mm256_xor_si256(c2[9620],_mm256_xor_si256(c2[8185],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[12543],_mm256_xor_si256(c2[13027],_mm256_xor_si256(c2[14503],_mm256_xor_si256(c2[7731],_mm256_xor_si256(c2[12565],c2[13049])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[33]=simde_mm256_xor_si256(c2[3397],simde_mm256_xor_si256(c2[6778],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[11157],simde_mm256_xor_si256(c2[1959],simde_mm256_xor_si256(c2[14544],simde_mm256_xor_si256(c2[15028],simde_mm256_xor_si256(c2[3921],simde_mm256_xor_si256(c2[1019],simde_mm256_xor_si256(c2[10698],simde_mm256_xor_si256(c2[5400],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[8295],simde_mm256_xor_si256(c2[8779],simde_mm256_xor_si256(c2[9294],simde_mm256_xor_si256(c2[7357],simde_mm256_xor_si256(c2[11228],simde_mm256_xor_si256(c2[11712],simde_mm256_xor_si256(c2[7860],simde_mm256_xor_si256(c2[6410],simde_mm256_xor_si256(c2[7857],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[4980],simde_mm256_xor_si256(c2[5460],simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[8385],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[3547],simde_mm256_xor_si256(c2[7445],simde_mm256_xor_si256(c2[6959],simde_mm256_xor_si256(c2[13246],simde_mm256_xor_si256(c2[13730],simde_mm256_xor_si256(c2[9885],simde_mm256_xor_si256(c2[9395],simde_mm256_xor_si256(c2[3595],simde_mm256_xor_si256(c2[13295],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[5063],simde_mm256_xor_si256(c2[5547],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[4119],simde_mm256_xor_si256(c2[10897],simde_mm256_xor_si256(c2[11381],simde_mm256_xor_si256(c2[9954],simde_mm256_xor_si256(c2[14791],simde_mm256_xor_si256(c2[12374],simde_mm256_xor_si256(c2[12858],simde_mm256_xor_si256(c2[5132],simde_mm256_xor_si256(c2[7549],simde_mm256_xor_si256(c2[6586],simde_mm256_xor_si256(c2[7070],simde_mm256_xor_si256(c2[7578],simde_mm256_xor_si256(c2[15320],simde_mm256_xor_si256(c2[12413],simde_mm256_xor_si256(c2[12897],simde_mm256_xor_si256(c2[10979],simde_mm256_xor_si256(c2[9533],simde_mm256_xor_si256(c2[14853],simde_mm256_xor_si256(c2[13914],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[12459],simde_mm256_xor_si256(c2[12943],simde_mm256_xor_si256(c2[3282],simde_mm256_xor_si256(c2[2314],simde_mm256_xor_si256(c2[11512],simde_mm256_xor_si256(c2[11996],simde_mm256_xor_si256(c2[8633],simde_mm256_xor_si256(c2[2337],simde_mm256_xor_si256(c2[7660],simde_mm256_xor_si256(c2[8144],simde_mm256_xor_si256(c2[4295],simde_mm256_xor_si256(c2[5258],simde_mm256_xor_si256(c2[9620],simde_mm256_xor_si256(c2[8185],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[12543],simde_mm256_xor_si256(c2[13027],simde_mm256_xor_si256(c2[14503],simde_mm256_xor_si256(c2[7731],simde_mm256_xor_si256(c2[12565],c2[13049])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[44]=_mm256_xor_si256(c2[4841],c2[30]);
+     d2[44]=simde_mm256_xor_si256(c2[4841],c2[30]);
 
 //row: 5
-     d2[55]=_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[3872],_mm256_xor_si256(c2[14040],_mm256_xor_si256(c2[13552],_mm256_xor_si256(c2[13074],_mm256_xor_si256(c2[8251],_mm256_xor_si256(c2[14551],_mm256_xor_si256(c2[12122],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[1015],_mm256_xor_si256(c2[13600],_mm256_xor_si256(c2[7792],_mm256_xor_si256(c2[2494],_mm256_xor_si256(c2[13134],_mm256_xor_si256(c2[5884],_mm256_xor_si256(c2[13141],_mm256_xor_si256(c2[6388],_mm256_xor_si256(c2[4451],_mm256_xor_si256(c2[8806],_mm256_xor_si256(c2[4954],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[4951],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[2074],_mm256_xor_si256(c2[3038],_mm256_xor_si256(c2[5479],_mm256_xor_si256(c2[13223],_mm256_xor_si256(c2[641],_mm256_xor_si256(c2[4539],_mm256_xor_si256(c2[4053],_mm256_xor_si256(c2[10824],_mm256_xor_si256(c2[6979],_mm256_xor_si256(c2[6500],_mm256_xor_si256(c2[689],_mm256_xor_si256(c2[10389],_mm256_xor_si256(c2[12805],_mm256_xor_si256(c2[2641],_mm256_xor_si256(c2[13802],_mm256_xor_si256(c2[1213],_mm256_xor_si256(c2[8475],_mm256_xor_si256(c2[7048],_mm256_xor_si256(c2[11885],_mm256_xor_si256(c2[9952],_mm256_xor_si256(c2[1242],_mm256_xor_si256(c2[2226],_mm256_xor_si256(c2[4643],_mm256_xor_si256(c2[4164],_mm256_xor_si256(c2[4672],_mm256_xor_si256(c2[12414],_mm256_xor_si256(c2[9991],_mm256_xor_si256(c2[8084],_mm256_xor_si256(c2[6627],_mm256_xor_si256(c2[11947],_mm256_xor_si256(c2[11008],_mm256_xor_si256(c2[13909],_mm256_xor_si256(c2[10037],_mm256_xor_si256(c2[3749],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[14895],_mm256_xor_si256(c2[9090],_mm256_xor_si256(c2[5727],_mm256_xor_si256(c2[14918],_mm256_xor_si256(c2[5238],_mm256_xor_si256(c2[1389],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[6714],_mm256_xor_si256(c2[5290],_mm256_xor_si256(c2[1417],_mm256_xor_si256(c2[10121],_mm256_xor_si256(c2[11597],_mm256_xor_si256(c2[4825],_mm256_xor_si256(c2[10143],c2[1438]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[55]=simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[3872],simde_mm256_xor_si256(c2[14040],simde_mm256_xor_si256(c2[13552],simde_mm256_xor_si256(c2[13074],simde_mm256_xor_si256(c2[8251],simde_mm256_xor_si256(c2[14551],simde_mm256_xor_si256(c2[12122],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[1015],simde_mm256_xor_si256(c2[13600],simde_mm256_xor_si256(c2[7792],simde_mm256_xor_si256(c2[2494],simde_mm256_xor_si256(c2[13134],simde_mm256_xor_si256(c2[5884],simde_mm256_xor_si256(c2[13141],simde_mm256_xor_si256(c2[6388],simde_mm256_xor_si256(c2[4451],simde_mm256_xor_si256(c2[8806],simde_mm256_xor_si256(c2[4954],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[4951],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[2074],simde_mm256_xor_si256(c2[3038],simde_mm256_xor_si256(c2[5479],simde_mm256_xor_si256(c2[13223],simde_mm256_xor_si256(c2[641],simde_mm256_xor_si256(c2[4539],simde_mm256_xor_si256(c2[4053],simde_mm256_xor_si256(c2[10824],simde_mm256_xor_si256(c2[6979],simde_mm256_xor_si256(c2[6500],simde_mm256_xor_si256(c2[689],simde_mm256_xor_si256(c2[10389],simde_mm256_xor_si256(c2[12805],simde_mm256_xor_si256(c2[2641],simde_mm256_xor_si256(c2[13802],simde_mm256_xor_si256(c2[1213],simde_mm256_xor_si256(c2[8475],simde_mm256_xor_si256(c2[7048],simde_mm256_xor_si256(c2[11885],simde_mm256_xor_si256(c2[9952],simde_mm256_xor_si256(c2[1242],simde_mm256_xor_si256(c2[2226],simde_mm256_xor_si256(c2[4643],simde_mm256_xor_si256(c2[4164],simde_mm256_xor_si256(c2[4672],simde_mm256_xor_si256(c2[12414],simde_mm256_xor_si256(c2[9991],simde_mm256_xor_si256(c2[8084],simde_mm256_xor_si256(c2[6627],simde_mm256_xor_si256(c2[11947],simde_mm256_xor_si256(c2[11008],simde_mm256_xor_si256(c2[13909],simde_mm256_xor_si256(c2[10037],simde_mm256_xor_si256(c2[3749],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[14895],simde_mm256_xor_si256(c2[9090],simde_mm256_xor_si256(c2[5727],simde_mm256_xor_si256(c2[14918],simde_mm256_xor_si256(c2[5238],simde_mm256_xor_si256(c2[1389],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[6714],simde_mm256_xor_si256(c2[5290],simde_mm256_xor_si256(c2[1417],simde_mm256_xor_si256(c2[10121],simde_mm256_xor_si256(c2[11597],simde_mm256_xor_si256(c2[4825],simde_mm256_xor_si256(c2[10143],c2[1438]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[66]=_mm256_xor_si256(c2[2913],_mm256_xor_si256(c2[4490],_mm256_xor_si256(c2[5070],_mm256_xor_si256(c2[1697],_mm256_xor_si256(c2[6099],_mm256_xor_si256(c2[11026],_mm256_xor_si256(c2[13956],c2[8677])))))));
+     d2[66]=simde_mm256_xor_si256(c2[2913],simde_mm256_xor_si256(c2[4490],simde_mm256_xor_si256(c2[5070],simde_mm256_xor_si256(c2[1697],simde_mm256_xor_si256(c2[6099],simde_mm256_xor_si256(c2[11026],simde_mm256_xor_si256(c2[13956],c2[8677])))))));
 
 //row: 7
-     d2[77]=_mm256_xor_si256(c2[1452],_mm256_xor_si256(c2[3413],_mm256_xor_si256(c2[95],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[11314],c2[3697])))));
+     d2[77]=simde_mm256_xor_si256(c2[1452],simde_mm256_xor_si256(c2[3413],simde_mm256_xor_si256(c2[95],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[11314],c2[3697])))));
 
 //row: 8
-     d2[88]=_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[13073],_mm256_xor_si256(c2[3389],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[13557],_mm256_xor_si256(c2[10651],_mm256_xor_si256(c2[11135],_mm256_xor_si256(c2[13069],_mm256_xor_si256(c2[10174],_mm256_xor_si256(c2[10658],_mm256_xor_si256(c2[13562],_mm256_xor_si256(c2[7768],_mm256_xor_si256(c2[5346],_mm256_xor_si256(c2[14068],_mm256_xor_si256(c2[11162],_mm256_xor_si256(c2[11646],_mm256_xor_si256(c2[11639],_mm256_xor_si256(c2[8744],_mm256_xor_si256(c2[9228],_mm256_xor_si256(c2[5348],_mm256_xor_si256(c2[532],_mm256_xor_si256(c2[13597],_mm256_xor_si256(c2[13117],_mm256_xor_si256(c2[10695],_mm256_xor_si256(c2[7309],_mm256_xor_si256(c2[4403],_mm256_xor_si256(c2[4887],_mm256_xor_si256(c2[2011],_mm256_xor_si256(c2[15076],_mm256_xor_si256(c2[12651],_mm256_xor_si256(c2[10240],_mm256_xor_si256(c2[5390],_mm256_xor_si256(c2[2495],_mm256_xor_si256(c2[2979],_mm256_xor_si256(c2[10714],_mm256_xor_si256(c2[5905],_mm256_xor_si256(c2[3483],_mm256_xor_si256(c2[3968],_mm256_xor_si256(c2[1062],_mm256_xor_si256(c2[1546],_mm256_xor_si256(c2[8323],_mm256_xor_si256(c2[5417],_mm256_xor_si256(c2[5901],_mm256_xor_si256(c2[4471],_mm256_xor_si256(c2[2049],_mm256_xor_si256(c2[3021],_mm256_xor_si256(c2[599],_mm256_xor_si256(c2[4468],_mm256_xor_si256(c2[1562],_mm256_xor_si256(c2[2046],_mm256_xor_si256(c2[15139],_mm256_xor_si256(c2[12717],_mm256_xor_si256(c2[1591],_mm256_xor_si256(c2[14172],_mm256_xor_si256(c2[14656],_mm256_xor_si256(c2[2555],_mm256_xor_si256(c2[15136],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[4996],_mm256_xor_si256(c2[2574],_mm256_xor_si256(c2[12740],_mm256_xor_si256(c2[9834],_mm256_xor_si256(c2[10318],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[12739],_mm256_xor_si256(c2[13223],_mm256_xor_si256(c2[4056],_mm256_xor_si256(c2[1634],_mm256_xor_si256(c2[3570],_mm256_xor_si256(c2[664],_mm256_xor_si256(c2[1148],_mm256_xor_si256(c2[10341],_mm256_xor_si256(c2[7446],_mm256_xor_si256(c2[7930],_mm256_xor_si256(c2[6496],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[6006],_mm256_xor_si256(c2[3595],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[12787],_mm256_xor_si256(c2[13271],_mm256_xor_si256(c2[9906],_mm256_xor_si256(c2[7484],_mm256_xor_si256(c2[12322],_mm256_xor_si256(c2[9416],_mm256_xor_si256(c2[9900],_mm256_xor_si256(c2[2158],_mm256_xor_si256(c2[14750],_mm256_xor_si256(c2[15234],_mm256_xor_si256(c2[13319],_mm256_xor_si256(c2[10897],_mm256_xor_si256(c2[730],_mm256_xor_si256(c2[13795],_mm256_xor_si256(c2[7992],_mm256_xor_si256(c2[5086],_mm256_xor_si256(c2[5570],_mm256_xor_si256(c2[6565],_mm256_xor_si256(c2[4143],_mm256_xor_si256(c2[11402],_mm256_xor_si256(c2[8980],_mm256_xor_si256(c2[9469],_mm256_xor_si256(c2[6563],_mm256_xor_si256(c2[7047],_mm256_xor_si256(c2[11889],_mm256_xor_si256(c2[1743],_mm256_xor_si256(c2[14808],_mm256_xor_si256(c2[4160],_mm256_xor_si256(c2[1254],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[3681],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[1259],_mm256_xor_si256(c2[4189],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[11931],_mm256_xor_si256(c2[9025],_mm256_xor_si256(c2[9509],_mm256_xor_si256(c2[9508],_mm256_xor_si256(c2[6602],_mm256_xor_si256(c2[7086],_mm256_xor_si256(c2[7590],_mm256_xor_si256(c2[5179],_mm256_xor_si256(c2[6144],_mm256_xor_si256(c2[3722],_mm256_xor_si256(c2[11464],_mm256_xor_si256(c2[8558],_mm256_xor_si256(c2[9042],_mm256_xor_si256(c2[10525],_mm256_xor_si256(c2[8103],_mm256_xor_si256(c2[13426],_mm256_xor_si256(c2[11004],_mm256_xor_si256(c2[9554],_mm256_xor_si256(c2[6648],_mm256_xor_si256(c2[7132],_mm256_xor_si256(c2[359],_mm256_xor_si256(c2[15380],_mm256_xor_si256(c2[12958],_mm256_xor_si256(c2[14412],_mm256_xor_si256(c2[11506],_mm256_xor_si256(c2[11990],_mm256_xor_si256(c2[8607],_mm256_xor_si256(c2[5701],_mm256_xor_si256(c2[6185],_mm256_xor_si256(c2[5244],_mm256_xor_si256(c2[2822],_mm256_xor_si256(c2[14435],_mm256_xor_si256(c2[11529],_mm256_xor_si256(c2[12013],_mm256_xor_si256(c2[4755],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[2333],_mm256_xor_si256(c2[906],_mm256_xor_si256(c2[13971],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[14945],_mm256_xor_si256(c2[6231],_mm256_xor_si256(c2[3325],_mm256_xor_si256(c2[3809],_mm256_xor_si256(c2[8646],_mm256_xor_si256(c2[4796],_mm256_xor_si256(c2[2385],_mm256_xor_si256(c2[934],_mm256_xor_si256(c2[13515],_mm256_xor_si256(c2[13999],_mm256_xor_si256(c2[9638],_mm256_xor_si256(c2[6732],_mm256_xor_si256(c2[7216],_mm256_xor_si256(c2[11114],_mm256_xor_si256(c2[8692],_mm256_xor_si256(c2[4342],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[9660],_mm256_xor_si256(c2[6754],_mm256_xor_si256(c2[7238],c2[13531]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[88]=simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[13073],simde_mm256_xor_si256(c2[3389],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[13557],simde_mm256_xor_si256(c2[10651],simde_mm256_xor_si256(c2[11135],simde_mm256_xor_si256(c2[13069],simde_mm256_xor_si256(c2[10174],simde_mm256_xor_si256(c2[10658],simde_mm256_xor_si256(c2[13562],simde_mm256_xor_si256(c2[7768],simde_mm256_xor_si256(c2[5346],simde_mm256_xor_si256(c2[14068],simde_mm256_xor_si256(c2[11162],simde_mm256_xor_si256(c2[11646],simde_mm256_xor_si256(c2[11639],simde_mm256_xor_si256(c2[8744],simde_mm256_xor_si256(c2[9228],simde_mm256_xor_si256(c2[5348],simde_mm256_xor_si256(c2[532],simde_mm256_xor_si256(c2[13597],simde_mm256_xor_si256(c2[13117],simde_mm256_xor_si256(c2[10695],simde_mm256_xor_si256(c2[7309],simde_mm256_xor_si256(c2[4403],simde_mm256_xor_si256(c2[4887],simde_mm256_xor_si256(c2[2011],simde_mm256_xor_si256(c2[15076],simde_mm256_xor_si256(c2[12651],simde_mm256_xor_si256(c2[10240],simde_mm256_xor_si256(c2[5390],simde_mm256_xor_si256(c2[2495],simde_mm256_xor_si256(c2[2979],simde_mm256_xor_si256(c2[10714],simde_mm256_xor_si256(c2[5905],simde_mm256_xor_si256(c2[3483],simde_mm256_xor_si256(c2[3968],simde_mm256_xor_si256(c2[1062],simde_mm256_xor_si256(c2[1546],simde_mm256_xor_si256(c2[8323],simde_mm256_xor_si256(c2[5417],simde_mm256_xor_si256(c2[5901],simde_mm256_xor_si256(c2[4471],simde_mm256_xor_si256(c2[2049],simde_mm256_xor_si256(c2[3021],simde_mm256_xor_si256(c2[599],simde_mm256_xor_si256(c2[4468],simde_mm256_xor_si256(c2[1562],simde_mm256_xor_si256(c2[2046],simde_mm256_xor_si256(c2[15139],simde_mm256_xor_si256(c2[12717],simde_mm256_xor_si256(c2[1591],simde_mm256_xor_si256(c2[14172],simde_mm256_xor_si256(c2[14656],simde_mm256_xor_si256(c2[2555],simde_mm256_xor_si256(c2[15136],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[4996],simde_mm256_xor_si256(c2[2574],simde_mm256_xor_si256(c2[12740],simde_mm256_xor_si256(c2[9834],simde_mm256_xor_si256(c2[10318],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[12739],simde_mm256_xor_si256(c2[13223],simde_mm256_xor_si256(c2[4056],simde_mm256_xor_si256(c2[1634],simde_mm256_xor_si256(c2[3570],simde_mm256_xor_si256(c2[664],simde_mm256_xor_si256(c2[1148],simde_mm256_xor_si256(c2[10341],simde_mm256_xor_si256(c2[7446],simde_mm256_xor_si256(c2[7930],simde_mm256_xor_si256(c2[6496],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[6006],simde_mm256_xor_si256(c2[3595],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[12787],simde_mm256_xor_si256(c2[13271],simde_mm256_xor_si256(c2[9906],simde_mm256_xor_si256(c2[7484],simde_mm256_xor_si256(c2[12322],simde_mm256_xor_si256(c2[9416],simde_mm256_xor_si256(c2[9900],simde_mm256_xor_si256(c2[2158],simde_mm256_xor_si256(c2[14750],simde_mm256_xor_si256(c2[15234],simde_mm256_xor_si256(c2[13319],simde_mm256_xor_si256(c2[10897],simde_mm256_xor_si256(c2[730],simde_mm256_xor_si256(c2[13795],simde_mm256_xor_si256(c2[7992],simde_mm256_xor_si256(c2[5086],simde_mm256_xor_si256(c2[5570],simde_mm256_xor_si256(c2[6565],simde_mm256_xor_si256(c2[4143],simde_mm256_xor_si256(c2[11402],simde_mm256_xor_si256(c2[8980],simde_mm256_xor_si256(c2[9469],simde_mm256_xor_si256(c2[6563],simde_mm256_xor_si256(c2[7047],simde_mm256_xor_si256(c2[11889],simde_mm256_xor_si256(c2[1743],simde_mm256_xor_si256(c2[14808],simde_mm256_xor_si256(c2[4160],simde_mm256_xor_si256(c2[1254],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[3681],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[1259],simde_mm256_xor_si256(c2[4189],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[11931],simde_mm256_xor_si256(c2[9025],simde_mm256_xor_si256(c2[9509],simde_mm256_xor_si256(c2[9508],simde_mm256_xor_si256(c2[6602],simde_mm256_xor_si256(c2[7086],simde_mm256_xor_si256(c2[7590],simde_mm256_xor_si256(c2[5179],simde_mm256_xor_si256(c2[6144],simde_mm256_xor_si256(c2[3722],simde_mm256_xor_si256(c2[11464],simde_mm256_xor_si256(c2[8558],simde_mm256_xor_si256(c2[9042],simde_mm256_xor_si256(c2[10525],simde_mm256_xor_si256(c2[8103],simde_mm256_xor_si256(c2[13426],simde_mm256_xor_si256(c2[11004],simde_mm256_xor_si256(c2[9554],simde_mm256_xor_si256(c2[6648],simde_mm256_xor_si256(c2[7132],simde_mm256_xor_si256(c2[359],simde_mm256_xor_si256(c2[15380],simde_mm256_xor_si256(c2[12958],simde_mm256_xor_si256(c2[14412],simde_mm256_xor_si256(c2[11506],simde_mm256_xor_si256(c2[11990],simde_mm256_xor_si256(c2[8607],simde_mm256_xor_si256(c2[5701],simde_mm256_xor_si256(c2[6185],simde_mm256_xor_si256(c2[5244],simde_mm256_xor_si256(c2[2822],simde_mm256_xor_si256(c2[14435],simde_mm256_xor_si256(c2[11529],simde_mm256_xor_si256(c2[12013],simde_mm256_xor_si256(c2[4755],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[2333],simde_mm256_xor_si256(c2[906],simde_mm256_xor_si256(c2[13971],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[14945],simde_mm256_xor_si256(c2[6231],simde_mm256_xor_si256(c2[3325],simde_mm256_xor_si256(c2[3809],simde_mm256_xor_si256(c2[8646],simde_mm256_xor_si256(c2[4796],simde_mm256_xor_si256(c2[2385],simde_mm256_xor_si256(c2[934],simde_mm256_xor_si256(c2[13515],simde_mm256_xor_si256(c2[13999],simde_mm256_xor_si256(c2[9638],simde_mm256_xor_si256(c2[6732],simde_mm256_xor_si256(c2[7216],simde_mm256_xor_si256(c2[11114],simde_mm256_xor_si256(c2[8692],simde_mm256_xor_si256(c2[4342],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[9660],simde_mm256_xor_si256(c2[6754],simde_mm256_xor_si256(c2[7238],c2[13531]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[99]=_mm256_xor_si256(c2[13556],_mm256_xor_si256(c2[11640],_mm256_xor_si256(c2[2649],_mm256_xor_si256(c2[7505],_mm256_xor_si256(c2[13840],_mm256_xor_si256(c2[11994],_mm256_xor_si256(c2[11528],c2[8678])))))));
+     d2[99]=simde_mm256_xor_si256(c2[13556],simde_mm256_xor_si256(c2[11640],simde_mm256_xor_si256(c2[2649],simde_mm256_xor_si256(c2[7505],simde_mm256_xor_si256(c2[13840],simde_mm256_xor_si256(c2[11994],simde_mm256_xor_si256(c2[11528],c2[8678])))))));
 
 //row: 10
-     d2[110]=_mm256_xor_si256(c2[7287],_mm256_xor_si256(c2[14087],_mm256_xor_si256(c2[13156],_mm256_xor_si256(c2[3549],_mm256_xor_si256(c2[8405],c2[5640])))));
+     d2[110]=simde_mm256_xor_si256(c2[7287],simde_mm256_xor_si256(c2[14087],simde_mm256_xor_si256(c2[13156],simde_mm256_xor_si256(c2[3549],simde_mm256_xor_si256(c2[8405],c2[5640])))));
 
 //row: 11
-     d2[121]=_mm256_xor_si256(c2[11624],_mm256_xor_si256(c2[2908],_mm256_xor_si256(c2[3392],_mm256_xor_si256(c2[15005],_mm256_xor_si256(c2[6784],_mm256_xor_si256(c2[9686],_mm256_xor_si256(c2[1454],_mm256_xor_si256(c2[9198],_mm256_xor_si256(c2[977],_mm256_xor_si256(c2[12100],_mm256_xor_si256(c2[3897],_mm256_xor_si256(c2[10679],_mm256_xor_si256(c2[11163],_mm256_xor_si256(c2[10186],_mm256_xor_si256(c2[1965],_mm256_xor_si256(c2[7768],_mm256_xor_si256(c2[15034],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[12148],_mm256_xor_si256(c2[3432],_mm256_xor_si256(c2[3916],_mm256_xor_si256(c2[9246],_mm256_xor_si256(c2[1014],_mm256_xor_si256(c2[3438],_mm256_xor_si256(c2[10693],_mm256_xor_si256(c2[13627],_mm256_xor_si256(c2[4911],_mm256_xor_si256(c2[5395],_mm256_xor_si256(c2[8780],_mm256_xor_si256(c2[559],_mm256_xor_si256(c2[1519],_mm256_xor_si256(c2[8785],_mm256_xor_si256(c2[2034],_mm256_xor_si256(c2[9289],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[7352],_mm256_xor_si256(c2[4452],_mm256_xor_si256(c2[11707],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[7371],_mm256_xor_si256(c2[7855],_mm256_xor_si256(c2[14637],_mm256_xor_si256(c2[6405],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[7863],_mm256_xor_si256(c2[11268],_mm256_xor_si256(c2[2552],_mm256_xor_si256(c2[3036],_mm256_xor_si256(c2[13207],_mm256_xor_si256(c2[4975],_mm256_xor_si256(c2[14171],_mm256_xor_si256(c2[5950],_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[8391],_mm256_xor_si256(c2[8869],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[11774],_mm256_xor_si256(c2[3542],_mm256_xor_si256(c2[185],_mm256_xor_si256(c2[7440],_mm256_xor_si256(c2[15186],_mm256_xor_si256(c2[6954],_mm256_xor_si256(c2[6470],_mm256_xor_si256(c2[13736],_mm256_xor_si256(c2[2625],_mm256_xor_si256(c2[9396],_mm256_xor_si256(c2[9880],_mm256_xor_si256(c2[2135],_mm256_xor_si256(c2[9401],_mm256_xor_si256(c2[11822],_mm256_xor_si256(c2[3590],_mm256_xor_si256(c2[6035],_mm256_xor_si256(c2[12806],_mm256_xor_si256(c2[13290],_mm256_xor_si256(c2[8451],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[13774],_mm256_xor_si256(c2[5553],_mm256_xor_si256(c2[9448],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[1216],_mm256_xor_si256(c2[12346],_mm256_xor_si256(c2[4114],_mm256_xor_si256(c2[4121],_mm256_xor_si256(c2[11376],_mm256_xor_si256(c2[2694],_mm256_xor_si256(c2[9465],_mm256_xor_si256(c2[9949],_mm256_xor_si256(c2[7531],_mm256_xor_si256(c2[14786],_mm256_xor_si256(c2[5598],_mm256_xor_si256(c2[12853],_mm256_xor_si256(c2[4142],_mm256_xor_si256(c2[13359],_mm256_xor_si256(c2[4643],_mm256_xor_si256(c2[5127],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[7555],_mm256_xor_si256(c2[15297],_mm256_xor_si256(c2[7065],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[7573],_mm256_xor_si256(c2[8060],_mm256_xor_si256(c2[15315],_mm256_xor_si256(c2[5637],_mm256_xor_si256(c2[12892],_mm256_xor_si256(c2[3719],_mm256_xor_si256(c2[10501],_mm256_xor_si256(c2[10985],_mm256_xor_si256(c2[2273],_mm256_xor_si256(c2[9528],_mm256_xor_si256(c2[7593],_mm256_xor_si256(c2[14859],_mm256_xor_si256(c2[6654],_mm256_xor_si256(c2[13425],_mm256_xor_si256(c2[13909],_mm256_xor_si256(c2[9555],_mm256_xor_si256(c2[1323],_mm256_xor_si256(c2[5683],_mm256_xor_si256(c2[12938],_mm256_xor_si256(c2[15366],_mm256_xor_si256(c2[11509],_mm256_xor_si256(c2[3288],_mm256_xor_si256(c2[10541],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[4736],_mm256_xor_si256(c2[11991],_mm256_xor_si256(c2[1373],_mm256_xor_si256(c2[8144],_mm256_xor_si256(c2[8628],_mm256_xor_si256(c2[10564],_mm256_xor_si256(c2[2332],_mm256_xor_si256(c2[884],_mm256_xor_si256(c2[8150],_mm256_xor_si256(c2[12522],_mm256_xor_si256(c2[3806],_mm256_xor_si256(c2[4290],_mm256_xor_si256(c2[13496],_mm256_xor_si256(c2[5264],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[9615],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[7707],_mm256_xor_si256(c2[8191],_mm256_xor_si256(c2[12550],_mm256_xor_si256(c2[4318],_mm256_xor_si256(c2[5767],_mm256_xor_si256(c2[13033],_mm256_xor_si256(c2[7243],_mm256_xor_si256(c2[14014],_mm256_xor_si256(c2[14498],_mm256_xor_si256(c2[471],_mm256_xor_si256(c2[7726],_mm256_xor_si256(c2[5789],_mm256_xor_si256(c2[13055],c2[3371])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[121]=simde_mm256_xor_si256(c2[11624],simde_mm256_xor_si256(c2[2908],simde_mm256_xor_si256(c2[3392],simde_mm256_xor_si256(c2[15005],simde_mm256_xor_si256(c2[6784],simde_mm256_xor_si256(c2[9686],simde_mm256_xor_si256(c2[1454],simde_mm256_xor_si256(c2[9198],simde_mm256_xor_si256(c2[977],simde_mm256_xor_si256(c2[12100],simde_mm256_xor_si256(c2[3897],simde_mm256_xor_si256(c2[10679],simde_mm256_xor_si256(c2[11163],simde_mm256_xor_si256(c2[10186],simde_mm256_xor_si256(c2[1965],simde_mm256_xor_si256(c2[7768],simde_mm256_xor_si256(c2[15034],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[12148],simde_mm256_xor_si256(c2[3432],simde_mm256_xor_si256(c2[3916],simde_mm256_xor_si256(c2[9246],simde_mm256_xor_si256(c2[1014],simde_mm256_xor_si256(c2[3438],simde_mm256_xor_si256(c2[10693],simde_mm256_xor_si256(c2[13627],simde_mm256_xor_si256(c2[4911],simde_mm256_xor_si256(c2[5395],simde_mm256_xor_si256(c2[8780],simde_mm256_xor_si256(c2[559],simde_mm256_xor_si256(c2[1519],simde_mm256_xor_si256(c2[8785],simde_mm256_xor_si256(c2[2034],simde_mm256_xor_si256(c2[9289],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[7352],simde_mm256_xor_si256(c2[4452],simde_mm256_xor_si256(c2[11707],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[7371],simde_mm256_xor_si256(c2[7855],simde_mm256_xor_si256(c2[14637],simde_mm256_xor_si256(c2[6405],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[7863],simde_mm256_xor_si256(c2[11268],simde_mm256_xor_si256(c2[2552],simde_mm256_xor_si256(c2[3036],simde_mm256_xor_si256(c2[13207],simde_mm256_xor_si256(c2[4975],simde_mm256_xor_si256(c2[14171],simde_mm256_xor_si256(c2[5950],simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[8391],simde_mm256_xor_si256(c2[8869],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[11774],simde_mm256_xor_si256(c2[3542],simde_mm256_xor_si256(c2[185],simde_mm256_xor_si256(c2[7440],simde_mm256_xor_si256(c2[15186],simde_mm256_xor_si256(c2[6954],simde_mm256_xor_si256(c2[6470],simde_mm256_xor_si256(c2[13736],simde_mm256_xor_si256(c2[2625],simde_mm256_xor_si256(c2[9396],simde_mm256_xor_si256(c2[9880],simde_mm256_xor_si256(c2[2135],simde_mm256_xor_si256(c2[9401],simde_mm256_xor_si256(c2[11822],simde_mm256_xor_si256(c2[3590],simde_mm256_xor_si256(c2[6035],simde_mm256_xor_si256(c2[12806],simde_mm256_xor_si256(c2[13290],simde_mm256_xor_si256(c2[8451],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[13774],simde_mm256_xor_si256(c2[5553],simde_mm256_xor_si256(c2[9448],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[1216],simde_mm256_xor_si256(c2[12346],simde_mm256_xor_si256(c2[4114],simde_mm256_xor_si256(c2[4121],simde_mm256_xor_si256(c2[11376],simde_mm256_xor_si256(c2[2694],simde_mm256_xor_si256(c2[9465],simde_mm256_xor_si256(c2[9949],simde_mm256_xor_si256(c2[7531],simde_mm256_xor_si256(c2[14786],simde_mm256_xor_si256(c2[5598],simde_mm256_xor_si256(c2[12853],simde_mm256_xor_si256(c2[4142],simde_mm256_xor_si256(c2[13359],simde_mm256_xor_si256(c2[4643],simde_mm256_xor_si256(c2[5127],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[7555],simde_mm256_xor_si256(c2[15297],simde_mm256_xor_si256(c2[7065],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[7573],simde_mm256_xor_si256(c2[8060],simde_mm256_xor_si256(c2[15315],simde_mm256_xor_si256(c2[5637],simde_mm256_xor_si256(c2[12892],simde_mm256_xor_si256(c2[3719],simde_mm256_xor_si256(c2[10501],simde_mm256_xor_si256(c2[10985],simde_mm256_xor_si256(c2[2273],simde_mm256_xor_si256(c2[9528],simde_mm256_xor_si256(c2[7593],simde_mm256_xor_si256(c2[14859],simde_mm256_xor_si256(c2[6654],simde_mm256_xor_si256(c2[13425],simde_mm256_xor_si256(c2[13909],simde_mm256_xor_si256(c2[9555],simde_mm256_xor_si256(c2[1323],simde_mm256_xor_si256(c2[5683],simde_mm256_xor_si256(c2[12938],simde_mm256_xor_si256(c2[15366],simde_mm256_xor_si256(c2[11509],simde_mm256_xor_si256(c2[3288],simde_mm256_xor_si256(c2[10541],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[4736],simde_mm256_xor_si256(c2[11991],simde_mm256_xor_si256(c2[1373],simde_mm256_xor_si256(c2[8144],simde_mm256_xor_si256(c2[8628],simde_mm256_xor_si256(c2[10564],simde_mm256_xor_si256(c2[2332],simde_mm256_xor_si256(c2[884],simde_mm256_xor_si256(c2[8150],simde_mm256_xor_si256(c2[12522],simde_mm256_xor_si256(c2[3806],simde_mm256_xor_si256(c2[4290],simde_mm256_xor_si256(c2[13496],simde_mm256_xor_si256(c2[5264],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[9615],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[7707],simde_mm256_xor_si256(c2[8191],simde_mm256_xor_si256(c2[12550],simde_mm256_xor_si256(c2[4318],simde_mm256_xor_si256(c2[5767],simde_mm256_xor_si256(c2[13033],simde_mm256_xor_si256(c2[7243],simde_mm256_xor_si256(c2[14014],simde_mm256_xor_si256(c2[14498],simde_mm256_xor_si256(c2[471],simde_mm256_xor_si256(c2[7726],simde_mm256_xor_si256(c2[5789],simde_mm256_xor_si256(c2[13055],c2[3371])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[132]=_mm256_xor_si256(c2[13071],_mm256_xor_si256(c2[12128],_mm256_xor_si256(c2[7000],_mm256_xor_si256(c2[7021],_mm256_xor_si256(c2[8035],c2[6206])))));
+     d2[132]=simde_mm256_xor_si256(c2[13071],simde_mm256_xor_si256(c2[12128],simde_mm256_xor_si256(c2[7000],simde_mm256_xor_si256(c2[7021],simde_mm256_xor_si256(c2[8035],c2[6206])))));
 
 //row: 13
-     d2[143]=_mm256_xor_si256(c2[11132],_mm256_xor_si256(c2[11616],_mm256_xor_si256(c2[15008],_mm256_xor_si256(c2[9689],_mm256_xor_si256(c2[9201],_mm256_xor_si256(c2[11141],_mm256_xor_si256(c2[3416],_mm256_xor_si256(c2[3900],_mm256_xor_si256(c2[10189],_mm256_xor_si256(c2[7771],_mm256_xor_si256(c2[11667],_mm256_xor_si256(c2[12151],_mm256_xor_si256(c2[9249],_mm256_xor_si256(c2[3441],_mm256_xor_si256(c2[13135],_mm256_xor_si256(c2[13619],_mm256_xor_si256(c2[8783],_mm256_xor_si256(c2[1522],_mm256_xor_si256(c2[13141],_mm256_xor_si256(c2[2026],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[4444],_mm256_xor_si256(c2[119],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[14640],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[10787],_mm256_xor_si256(c2[11271],_mm256_xor_si256(c2[13210],_mm256_xor_si256(c2[14174],_mm256_xor_si256(c2[1128],_mm256_xor_si256(c2[8872],_mm256_xor_si256(c2[11777],_mm256_xor_si256(c2[4518],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[15189],_mm256_xor_si256(c2[6473],_mm256_xor_si256(c2[2144],_mm256_xor_si256(c2[2628],_mm256_xor_si256(c2[2138],_mm256_xor_si256(c2[11814],_mm256_xor_si256(c2[5554],_mm256_xor_si256(c2[6038],_mm256_xor_si256(c2[8454],_mm256_xor_si256(c2[13777],_mm256_xor_si256(c2[8956],_mm256_xor_si256(c2[9440],_mm256_xor_si256(c2[12349],_mm256_xor_si256(c2[4124],_mm256_xor_si256(c2[2202],_mm256_xor_si256(c2[2686],_mm256_xor_si256(c2[7534],_mm256_xor_si256(c2[5590],_mm256_xor_si256(c2[12878],_mm256_xor_si256(c2[13362],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[15300],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[8052],_mm256_xor_si256(c2[5640],_mm256_xor_si256(c2[3238],_mm256_xor_si256(c2[3722],_mm256_xor_si256(c2[2276],_mm256_xor_si256(c2[7596],_mm256_xor_si256(c2[6162],_mm256_xor_si256(c2[6646],_mm256_xor_si256(c2[9558],_mm256_xor_si256(c2[5686],_mm256_xor_si256(c2[11512],_mm256_xor_si256(c2[10544],_mm256_xor_si256(c2[4739],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[1365],_mm256_xor_si256(c2[10567],_mm256_xor_si256(c2[887],_mm256_xor_si256(c2[12041],_mm256_xor_si256(c2[12525],_mm256_xor_si256(c2[13488],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[444],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[12542],_mm256_xor_si256(c2[5770],_mm256_xor_si256(c2[14962],_mm256_xor_si256(c2[6762],_mm256_xor_si256(c2[7246],_mm256_xor_si256(c2[463],c2[5792])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[143]=simde_mm256_xor_si256(c2[11132],simde_mm256_xor_si256(c2[11616],simde_mm256_xor_si256(c2[15008],simde_mm256_xor_si256(c2[9689],simde_mm256_xor_si256(c2[9201],simde_mm256_xor_si256(c2[11141],simde_mm256_xor_si256(c2[3416],simde_mm256_xor_si256(c2[3900],simde_mm256_xor_si256(c2[10189],simde_mm256_xor_si256(c2[7771],simde_mm256_xor_si256(c2[11667],simde_mm256_xor_si256(c2[12151],simde_mm256_xor_si256(c2[9249],simde_mm256_xor_si256(c2[3441],simde_mm256_xor_si256(c2[13135],simde_mm256_xor_si256(c2[13619],simde_mm256_xor_si256(c2[8783],simde_mm256_xor_si256(c2[1522],simde_mm256_xor_si256(c2[13141],simde_mm256_xor_si256(c2[2026],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[4444],simde_mm256_xor_si256(c2[119],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[14640],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[10787],simde_mm256_xor_si256(c2[11271],simde_mm256_xor_si256(c2[13210],simde_mm256_xor_si256(c2[14174],simde_mm256_xor_si256(c2[1128],simde_mm256_xor_si256(c2[8872],simde_mm256_xor_si256(c2[11777],simde_mm256_xor_si256(c2[4518],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[15189],simde_mm256_xor_si256(c2[6473],simde_mm256_xor_si256(c2[2144],simde_mm256_xor_si256(c2[2628],simde_mm256_xor_si256(c2[2138],simde_mm256_xor_si256(c2[11814],simde_mm256_xor_si256(c2[5554],simde_mm256_xor_si256(c2[6038],simde_mm256_xor_si256(c2[8454],simde_mm256_xor_si256(c2[13777],simde_mm256_xor_si256(c2[8956],simde_mm256_xor_si256(c2[9440],simde_mm256_xor_si256(c2[12349],simde_mm256_xor_si256(c2[4124],simde_mm256_xor_si256(c2[2202],simde_mm256_xor_si256(c2[2686],simde_mm256_xor_si256(c2[7534],simde_mm256_xor_si256(c2[5590],simde_mm256_xor_si256(c2[12878],simde_mm256_xor_si256(c2[13362],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[15300],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[8052],simde_mm256_xor_si256(c2[5640],simde_mm256_xor_si256(c2[3238],simde_mm256_xor_si256(c2[3722],simde_mm256_xor_si256(c2[2276],simde_mm256_xor_si256(c2[7596],simde_mm256_xor_si256(c2[6162],simde_mm256_xor_si256(c2[6646],simde_mm256_xor_si256(c2[9558],simde_mm256_xor_si256(c2[5686],simde_mm256_xor_si256(c2[11512],simde_mm256_xor_si256(c2[10544],simde_mm256_xor_si256(c2[4739],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[1365],simde_mm256_xor_si256(c2[10567],simde_mm256_xor_si256(c2[887],simde_mm256_xor_si256(c2[12041],simde_mm256_xor_si256(c2[12525],simde_mm256_xor_si256(c2[13488],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[444],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[12542],simde_mm256_xor_si256(c2[5770],simde_mm256_xor_si256(c2[14962],simde_mm256_xor_si256(c2[6762],simde_mm256_xor_si256(c2[7246],simde_mm256_xor_si256(c2[463],c2[5792])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[154]=_mm256_xor_si256(c2[10648],_mm256_xor_si256(c2[1242],_mm256_xor_si256(c2[10502],_mm256_xor_si256(c2[13908],_mm256_xor_si256(c2[1344],c2[7240])))));
+     d2[154]=simde_mm256_xor_si256(c2[10648],simde_mm256_xor_si256(c2[1242],simde_mm256_xor_si256(c2[10502],simde_mm256_xor_si256(c2[13908],simde_mm256_xor_si256(c2[1344],c2[7240])))));
 
 //row: 15
-     d2[165]=_mm256_xor_si256(c2[11617],_mm256_xor_si256(c2[15009],_mm256_xor_si256(c2[9690],_mm256_xor_si256(c2[8718],_mm256_xor_si256(c2[9202],_mm256_xor_si256(c2[7749],_mm256_xor_si256(c2[3901],_mm256_xor_si256(c2[10190],_mm256_xor_si256(c2[7288],_mm256_xor_si256(c2[7772],_mm256_xor_si256(c2[13584],_mm256_xor_si256(c2[12152],_mm256_xor_si256(c2[9250],_mm256_xor_si256(c2[3442],_mm256_xor_si256(c2[13620],_mm256_xor_si256(c2[8784],_mm256_xor_si256(c2[1039],_mm256_xor_si256(c2[1523],_mm256_xor_si256(c2[2027],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[3961],_mm256_xor_si256(c2[4445],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[14630],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[11272],_mm256_xor_si256(c2[13200],_mm256_xor_si256(c2[13691],_mm256_xor_si256(c2[14175],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[8873],_mm256_xor_si256(c2[11294],_mm256_xor_si256(c2[11778],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[15190],_mm256_xor_si256(c2[5990],_mm256_xor_si256(c2[6474],_mm256_xor_si256(c2[2618],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[11815],_mm256_xor_si256(c2[6028],_mm256_xor_si256(c2[8455],_mm256_xor_si256(c2[13294],_mm256_xor_si256(c2[13778],_mm256_xor_si256(c2[7480],_mm256_xor_si256(c2[9441],_mm256_xor_si256(c2[12350],_mm256_xor_si256(c2[3630],_mm256_xor_si256(c2[4114],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[7524],_mm256_xor_si256(c2[5107],_mm256_xor_si256(c2[5591],_mm256_xor_si256(c2[13363],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[14806],_mm256_xor_si256(c2[15290],_mm256_xor_si256(c2[8516],_mm256_xor_si256(c2[311],_mm256_xor_si256(c2[8053],_mm256_xor_si256(c2[5157],_mm256_xor_si256(c2[5641],_mm256_xor_si256(c2[3723],_mm256_xor_si256(c2[2266],_mm256_xor_si256(c2[7597],_mm256_xor_si256(c2[6647],_mm256_xor_si256(c2[9548],_mm256_xor_si256(c2[5192],_mm256_xor_si256(c2[5676],_mm256_xor_si256(c2[11513],_mm256_xor_si256(c2[10545],_mm256_xor_si256(c2[4256],_mm256_xor_si256(c2[4740],_mm256_xor_si256(c2[1366],_mm256_xor_si256(c2[10568],_mm256_xor_si256(c2[404],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[8145],_mm256_xor_si256(c2[12526],_mm256_xor_si256(c2[13489],_mm256_xor_si256(c2[2364],_mm256_xor_si256(c2[929],_mm256_xor_si256(c2[12543],_mm256_xor_si256(c2[5287],_mm256_xor_si256(c2[5771],_mm256_xor_si256(c2[7247],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[5309],c2[5793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[165]=simde_mm256_xor_si256(c2[11617],simde_mm256_xor_si256(c2[15009],simde_mm256_xor_si256(c2[9690],simde_mm256_xor_si256(c2[8718],simde_mm256_xor_si256(c2[9202],simde_mm256_xor_si256(c2[7749],simde_mm256_xor_si256(c2[3901],simde_mm256_xor_si256(c2[10190],simde_mm256_xor_si256(c2[7288],simde_mm256_xor_si256(c2[7772],simde_mm256_xor_si256(c2[13584],simde_mm256_xor_si256(c2[12152],simde_mm256_xor_si256(c2[9250],simde_mm256_xor_si256(c2[3442],simde_mm256_xor_si256(c2[13620],simde_mm256_xor_si256(c2[8784],simde_mm256_xor_si256(c2[1039],simde_mm256_xor_si256(c2[1523],simde_mm256_xor_si256(c2[2027],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[3961],simde_mm256_xor_si256(c2[4445],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[14630],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[11272],simde_mm256_xor_si256(c2[13200],simde_mm256_xor_si256(c2[13691],simde_mm256_xor_si256(c2[14175],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[8873],simde_mm256_xor_si256(c2[11294],simde_mm256_xor_si256(c2[11778],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[15190],simde_mm256_xor_si256(c2[5990],simde_mm256_xor_si256(c2[6474],simde_mm256_xor_si256(c2[2618],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[11815],simde_mm256_xor_si256(c2[6028],simde_mm256_xor_si256(c2[8455],simde_mm256_xor_si256(c2[13294],simde_mm256_xor_si256(c2[13778],simde_mm256_xor_si256(c2[7480],simde_mm256_xor_si256(c2[9441],simde_mm256_xor_si256(c2[12350],simde_mm256_xor_si256(c2[3630],simde_mm256_xor_si256(c2[4114],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[7524],simde_mm256_xor_si256(c2[5107],simde_mm256_xor_si256(c2[5591],simde_mm256_xor_si256(c2[13363],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[14806],simde_mm256_xor_si256(c2[15290],simde_mm256_xor_si256(c2[8516],simde_mm256_xor_si256(c2[311],simde_mm256_xor_si256(c2[8053],simde_mm256_xor_si256(c2[5157],simde_mm256_xor_si256(c2[5641],simde_mm256_xor_si256(c2[3723],simde_mm256_xor_si256(c2[2266],simde_mm256_xor_si256(c2[7597],simde_mm256_xor_si256(c2[6647],simde_mm256_xor_si256(c2[9548],simde_mm256_xor_si256(c2[5192],simde_mm256_xor_si256(c2[5676],simde_mm256_xor_si256(c2[11513],simde_mm256_xor_si256(c2[10545],simde_mm256_xor_si256(c2[4256],simde_mm256_xor_si256(c2[4740],simde_mm256_xor_si256(c2[1366],simde_mm256_xor_si256(c2[10568],simde_mm256_xor_si256(c2[404],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[8145],simde_mm256_xor_si256(c2[12526],simde_mm256_xor_si256(c2[13489],simde_mm256_xor_si256(c2[2364],simde_mm256_xor_si256(c2[929],simde_mm256_xor_si256(c2[12543],simde_mm256_xor_si256(c2[5287],simde_mm256_xor_si256(c2[5771],simde_mm256_xor_si256(c2[7247],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[5309],c2[5793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[176]=_mm256_xor_si256(c2[11617],_mm256_xor_si256(c2[15009],_mm256_xor_si256(c2[9690],_mm256_xor_si256(c2[9202],_mm256_xor_si256(c2[3901],_mm256_xor_si256(c2[10190],_mm256_xor_si256(c2[7772],_mm256_xor_si256(c2[14547],_mm256_xor_si256(c2[12152],_mm256_xor_si256(c2[9250],_mm256_xor_si256(c2[3442],_mm256_xor_si256(c2[13620],_mm256_xor_si256(c2[8784],_mm256_xor_si256(c2[1523],_mm256_xor_si256(c2[2495],_mm256_xor_si256(c2[2027],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[4445],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[14630],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[11272],_mm256_xor_si256(c2[13200],_mm256_xor_si256(c2[14175],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[8873],_mm256_xor_si256(c2[11778],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[15190],_mm256_xor_si256(c2[6474],_mm256_xor_si256(c2[2618],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[11815],_mm256_xor_si256(c2[6028],_mm256_xor_si256(c2[8455],_mm256_xor_si256(c2[13778],_mm256_xor_si256(c2[9441],_mm256_xor_si256(c2[12350],_mm256_xor_si256(c2[4114],_mm256_xor_si256(c2[6060],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[7524],_mm256_xor_si256(c2[5591],_mm256_xor_si256(c2[13363],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[15290],_mm256_xor_si256(c2[311],_mm256_xor_si256(c2[8053],_mm256_xor_si256(c2[5641],_mm256_xor_si256(c2[3723],_mm256_xor_si256(c2[2266],_mm256_xor_si256(c2[7597],_mm256_xor_si256(c2[6647],_mm256_xor_si256(c2[9548],_mm256_xor_si256(c2[5676],_mm256_xor_si256(c2[11513],_mm256_xor_si256(c2[10545],_mm256_xor_si256(c2[4740],_mm256_xor_si256(c2[1366],_mm256_xor_si256(c2[10568],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[12526],_mm256_xor_si256(c2[13489],_mm256_xor_si256(c2[2364],_mm256_xor_si256(c2[929],_mm256_xor_si256(c2[12543],_mm256_xor_si256(c2[5771],_mm256_xor_si256(c2[5774],_mm256_xor_si256(c2[7247],_mm256_xor_si256(c2[464],c2[5793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[176]=simde_mm256_xor_si256(c2[11617],simde_mm256_xor_si256(c2[15009],simde_mm256_xor_si256(c2[9690],simde_mm256_xor_si256(c2[9202],simde_mm256_xor_si256(c2[3901],simde_mm256_xor_si256(c2[10190],simde_mm256_xor_si256(c2[7772],simde_mm256_xor_si256(c2[14547],simde_mm256_xor_si256(c2[12152],simde_mm256_xor_si256(c2[9250],simde_mm256_xor_si256(c2[3442],simde_mm256_xor_si256(c2[13620],simde_mm256_xor_si256(c2[8784],simde_mm256_xor_si256(c2[1523],simde_mm256_xor_si256(c2[2495],simde_mm256_xor_si256(c2[2027],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[4445],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[14630],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[11272],simde_mm256_xor_si256(c2[13200],simde_mm256_xor_si256(c2[14175],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[8873],simde_mm256_xor_si256(c2[11778],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[15190],simde_mm256_xor_si256(c2[6474],simde_mm256_xor_si256(c2[2618],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[11815],simde_mm256_xor_si256(c2[6028],simde_mm256_xor_si256(c2[8455],simde_mm256_xor_si256(c2[13778],simde_mm256_xor_si256(c2[9441],simde_mm256_xor_si256(c2[12350],simde_mm256_xor_si256(c2[4114],simde_mm256_xor_si256(c2[6060],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[7524],simde_mm256_xor_si256(c2[5591],simde_mm256_xor_si256(c2[13363],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[15290],simde_mm256_xor_si256(c2[311],simde_mm256_xor_si256(c2[8053],simde_mm256_xor_si256(c2[5641],simde_mm256_xor_si256(c2[3723],simde_mm256_xor_si256(c2[2266],simde_mm256_xor_si256(c2[7597],simde_mm256_xor_si256(c2[6647],simde_mm256_xor_si256(c2[9548],simde_mm256_xor_si256(c2[5676],simde_mm256_xor_si256(c2[11513],simde_mm256_xor_si256(c2[10545],simde_mm256_xor_si256(c2[4740],simde_mm256_xor_si256(c2[1366],simde_mm256_xor_si256(c2[10568],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[12526],simde_mm256_xor_si256(c2[13489],simde_mm256_xor_si256(c2[2364],simde_mm256_xor_si256(c2[929],simde_mm256_xor_si256(c2[12543],simde_mm256_xor_si256(c2[5771],simde_mm256_xor_si256(c2[5774],simde_mm256_xor_si256(c2[7247],simde_mm256_xor_si256(c2[464],c2[5793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[187]=_mm256_xor_si256(c2[6779],_mm256_xor_si256(c2[2251],_mm256_xor_si256(c2[11491],_mm256_xor_si256(c2[10057],c2[14987]))));
+     d2[187]=simde_mm256_xor_si256(c2[6779],simde_mm256_xor_si256(c2[2251],simde_mm256_xor_si256(c2[11491],simde_mm256_xor_si256(c2[10057],c2[14987]))));
 
 //row: 18
-     d2[198]=_mm256_xor_si256(c2[7283],_mm256_xor_si256(c2[14792],_mm256_xor_si256(c2[10941],_mm256_xor_si256(c2[10565],c2[4776]))));
+     d2[198]=simde_mm256_xor_si256(c2[7283],simde_mm256_xor_si256(c2[14792],simde_mm256_xor_si256(c2[10941],simde_mm256_xor_si256(c2[10565],c2[4776]))));
 
 //row: 19
-     d2[209]=_mm256_xor_si256(c2[11134],_mm256_xor_si256(c2[6801],_mm256_xor_si256(c2[9354],_mm256_xor_si256(c2[1152],c2[6034]))));
+     d2[209]=simde_mm256_xor_si256(c2[11134],simde_mm256_xor_si256(c2[6801],simde_mm256_xor_si256(c2[9354],simde_mm256_xor_si256(c2[1152],c2[6034]))));
 
 //row: 20
-     d2[220]=_mm256_xor_si256(c2[1946],_mm256_xor_si256(c2[5327],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[15007],_mm256_xor_si256(c2[12102],_mm256_xor_si256(c2[9706],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[13577],_mm256_xor_si256(c2[2470],_mm256_xor_si256(c2[15055],_mm256_xor_si256(c2[9247],_mm256_xor_si256(c2[3938],_mm256_xor_si256(c2[14589],_mm256_xor_si256(c2[7328],_mm256_xor_si256(c2[552],_mm256_xor_si256(c2[7832],_mm256_xor_si256(c2[5906],_mm256_xor_si256(c2[10261],_mm256_xor_si256(c2[6409],_mm256_xor_si256(c2[4959],_mm256_xor_si256(c2[6406],_mm256_xor_si256(c2[1590],_mm256_xor_si256(c2[3529],_mm256_xor_si256(c2[4493],_mm256_xor_si256(c2[6934],_mm256_xor_si256(c2[14678],_mm256_xor_si256(c2[2096],_mm256_xor_si256(c2[5994],_mm256_xor_si256(c2[5508],_mm256_xor_si256(c2[12279],_mm256_xor_si256(c2[8434],_mm256_xor_si256(c2[7944],_mm256_xor_si256(c2[2144],_mm256_xor_si256(c2[13270],_mm256_xor_si256(c2[11844],_mm256_xor_si256(c2[14260],_mm256_xor_si256(c2[4096],_mm256_xor_si256(c2[15246],_mm256_xor_si256(c2[2668],_mm256_xor_si256(c2[9930],_mm256_xor_si256(c2[9929],_mm256_xor_si256(c2[8492],_mm256_xor_si256(c2[13340],_mm256_xor_si256(c2[11396],_mm256_xor_si256(c2[3681],_mm256_xor_si256(c2[6098],_mm256_xor_si256(c2[5619],_mm256_xor_si256(c2[6116],_mm256_xor_si256(c2[13869],_mm256_xor_si256(c2[11446],_mm256_xor_si256(c2[9528],_mm256_xor_si256(c2[8082],_mm256_xor_si256(c2[13402],_mm256_xor_si256(c2[12452],_mm256_xor_si256(c2[15364],_mm256_xor_si256(c2[11492],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[863],_mm256_xor_si256(c2[10545],_mm256_xor_si256(c2[7182],_mm256_xor_si256(c2[886],_mm256_xor_si256(c2[6693],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[3807],_mm256_xor_si256(c2[8169],_mm256_xor_si256(c2[6734],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[11576],_mm256_xor_si256(c2[13052],_mm256_xor_si256(c2[6280],c2[11598]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[220]=simde_mm256_xor_si256(c2[1946],simde_mm256_xor_si256(c2[5327],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[15007],simde_mm256_xor_si256(c2[12102],simde_mm256_xor_si256(c2[9706],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[13577],simde_mm256_xor_si256(c2[2470],simde_mm256_xor_si256(c2[15055],simde_mm256_xor_si256(c2[9247],simde_mm256_xor_si256(c2[3938],simde_mm256_xor_si256(c2[14589],simde_mm256_xor_si256(c2[7328],simde_mm256_xor_si256(c2[552],simde_mm256_xor_si256(c2[7832],simde_mm256_xor_si256(c2[5906],simde_mm256_xor_si256(c2[10261],simde_mm256_xor_si256(c2[6409],simde_mm256_xor_si256(c2[4959],simde_mm256_xor_si256(c2[6406],simde_mm256_xor_si256(c2[1590],simde_mm256_xor_si256(c2[3529],simde_mm256_xor_si256(c2[4493],simde_mm256_xor_si256(c2[6934],simde_mm256_xor_si256(c2[14678],simde_mm256_xor_si256(c2[2096],simde_mm256_xor_si256(c2[5994],simde_mm256_xor_si256(c2[5508],simde_mm256_xor_si256(c2[12279],simde_mm256_xor_si256(c2[8434],simde_mm256_xor_si256(c2[7944],simde_mm256_xor_si256(c2[2144],simde_mm256_xor_si256(c2[13270],simde_mm256_xor_si256(c2[11844],simde_mm256_xor_si256(c2[14260],simde_mm256_xor_si256(c2[4096],simde_mm256_xor_si256(c2[15246],simde_mm256_xor_si256(c2[2668],simde_mm256_xor_si256(c2[9930],simde_mm256_xor_si256(c2[9929],simde_mm256_xor_si256(c2[8492],simde_mm256_xor_si256(c2[13340],simde_mm256_xor_si256(c2[11396],simde_mm256_xor_si256(c2[3681],simde_mm256_xor_si256(c2[6098],simde_mm256_xor_si256(c2[5619],simde_mm256_xor_si256(c2[6116],simde_mm256_xor_si256(c2[13869],simde_mm256_xor_si256(c2[11446],simde_mm256_xor_si256(c2[9528],simde_mm256_xor_si256(c2[8082],simde_mm256_xor_si256(c2[13402],simde_mm256_xor_si256(c2[12452],simde_mm256_xor_si256(c2[15364],simde_mm256_xor_si256(c2[11492],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[863],simde_mm256_xor_si256(c2[10545],simde_mm256_xor_si256(c2[7182],simde_mm256_xor_si256(c2[886],simde_mm256_xor_si256(c2[6693],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[3807],simde_mm256_xor_si256(c2[8169],simde_mm256_xor_si256(c2[6734],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[11576],simde_mm256_xor_si256(c2[13052],simde_mm256_xor_si256(c2[6280],c2[11598]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[231]=_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[3990],_mm256_xor_si256(c2[12946],_mm256_xor_si256(c2[7704],c2[6757]))));
+     d2[231]=simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[3990],simde_mm256_xor_si256(c2[12946],simde_mm256_xor_si256(c2[7704],c2[6757]))));
 
 //row: 22
-     d2[242]=_mm256_xor_si256(c2[11624],_mm256_xor_si256(c2[14304],_mm256_xor_si256(c2[6101],c2[8605])));
+     d2[242]=simde_mm256_xor_si256(c2[11624],simde_mm256_xor_si256(c2[14304],simde_mm256_xor_si256(c2[6101],c2[8605])));
 
 //row: 23
-     d2[253]=_mm256_xor_si256(c2[8734],_mm256_xor_si256(c2[2948],_mm256_xor_si256(c2[10389],c2[8149])));
+     d2[253]=simde_mm256_xor_si256(c2[8734],simde_mm256_xor_si256(c2[2948],simde_mm256_xor_si256(c2[10389],c2[8149])));
 
 //row: 24
-     d2[264]=_mm256_xor_si256(c2[3876],_mm256_xor_si256(c2[7268],_mm256_xor_si256(c2[1938],_mm256_xor_si256(c2[1461],_mm256_xor_si256(c2[2905],_mm256_xor_si256(c2[11647],_mm256_xor_si256(c2[2449],_mm256_xor_si256(c2[31],_mm256_xor_si256(c2[4400],_mm256_xor_si256(c2[1498],_mm256_xor_si256(c2[11177],_mm256_xor_si256(c2[5879],_mm256_xor_si256(c2[1043],_mm256_xor_si256(c2[9269],_mm256_xor_si256(c2[4911],_mm256_xor_si256(c2[9773],_mm256_xor_si256(c2[7836],_mm256_xor_si256(c2[12191],_mm256_xor_si256(c2[12195],_mm256_xor_si256(c2[8339],_mm256_xor_si256(c2[6889],_mm256_xor_si256(c2[8347],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[5459],_mm256_xor_si256(c2[6434],_mm256_xor_si256(c2[8875],_mm256_xor_si256(c2[1132],_mm256_xor_si256(c2[4026],_mm256_xor_si256(c2[7924],_mm256_xor_si256(c2[7438],_mm256_xor_si256(c2[14220],_mm256_xor_si256(c2[10364],_mm256_xor_si256(c2[9885],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[13774],_mm256_xor_si256(c2[714],_mm256_xor_si256(c2[6037],_mm256_xor_si256(c2[1700],_mm256_xor_si256(c2[4598],_mm256_xor_si256(c2[11860],_mm256_xor_si256(c2[251],_mm256_xor_si256(c2[10433],_mm256_xor_si256(c2[15270],_mm256_xor_si256(c2[13337],_mm256_xor_si256(c2[5611],_mm256_xor_si256(c2[8039],_mm256_xor_si256(c2[7549],_mm256_xor_si256(c2[8057],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[13376],_mm256_xor_si256(c2[11469],_mm256_xor_si256(c2[10012],_mm256_xor_si256(c2[15343],_mm256_xor_si256(c2[14393],_mm256_xor_si256(c2[1807],_mm256_xor_si256(c2[13422],_mm256_xor_si256(c2[3772],_mm256_xor_si256(c2[2804],_mm256_xor_si256(c2[12475],_mm256_xor_si256(c2[9112],_mm256_xor_si256(c2[2816],_mm256_xor_si256(c2[8634],_mm256_xor_si256(c2[4774],_mm256_xor_si256(c2[5748],_mm256_xor_si256(c2[10099],_mm256_xor_si256(c2[8675],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[13517],_mm256_xor_si256(c2[14982],_mm256_xor_si256(c2[8210],c2[13539]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[264]=simde_mm256_xor_si256(c2[3876],simde_mm256_xor_si256(c2[7268],simde_mm256_xor_si256(c2[1938],simde_mm256_xor_si256(c2[1461],simde_mm256_xor_si256(c2[2905],simde_mm256_xor_si256(c2[11647],simde_mm256_xor_si256(c2[2449],simde_mm256_xor_si256(c2[31],simde_mm256_xor_si256(c2[4400],simde_mm256_xor_si256(c2[1498],simde_mm256_xor_si256(c2[11177],simde_mm256_xor_si256(c2[5879],simde_mm256_xor_si256(c2[1043],simde_mm256_xor_si256(c2[9269],simde_mm256_xor_si256(c2[4911],simde_mm256_xor_si256(c2[9773],simde_mm256_xor_si256(c2[7836],simde_mm256_xor_si256(c2[12191],simde_mm256_xor_si256(c2[12195],simde_mm256_xor_si256(c2[8339],simde_mm256_xor_si256(c2[6889],simde_mm256_xor_si256(c2[8347],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[5459],simde_mm256_xor_si256(c2[6434],simde_mm256_xor_si256(c2[8875],simde_mm256_xor_si256(c2[1132],simde_mm256_xor_si256(c2[4026],simde_mm256_xor_si256(c2[7924],simde_mm256_xor_si256(c2[7438],simde_mm256_xor_si256(c2[14220],simde_mm256_xor_si256(c2[10364],simde_mm256_xor_si256(c2[9885],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[13774],simde_mm256_xor_si256(c2[714],simde_mm256_xor_si256(c2[6037],simde_mm256_xor_si256(c2[1700],simde_mm256_xor_si256(c2[4598],simde_mm256_xor_si256(c2[11860],simde_mm256_xor_si256(c2[251],simde_mm256_xor_si256(c2[10433],simde_mm256_xor_si256(c2[15270],simde_mm256_xor_si256(c2[13337],simde_mm256_xor_si256(c2[5611],simde_mm256_xor_si256(c2[8039],simde_mm256_xor_si256(c2[7549],simde_mm256_xor_si256(c2[8057],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[13376],simde_mm256_xor_si256(c2[11469],simde_mm256_xor_si256(c2[10012],simde_mm256_xor_si256(c2[15343],simde_mm256_xor_si256(c2[14393],simde_mm256_xor_si256(c2[1807],simde_mm256_xor_si256(c2[13422],simde_mm256_xor_si256(c2[3772],simde_mm256_xor_si256(c2[2804],simde_mm256_xor_si256(c2[12475],simde_mm256_xor_si256(c2[9112],simde_mm256_xor_si256(c2[2816],simde_mm256_xor_si256(c2[8634],simde_mm256_xor_si256(c2[4774],simde_mm256_xor_si256(c2[5748],simde_mm256_xor_si256(c2[10099],simde_mm256_xor_si256(c2[8675],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[13517],simde_mm256_xor_si256(c2[14982],simde_mm256_xor_si256(c2[8210],c2[13539]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 25
-     d2[275]=_mm256_xor_si256(c2[11162],_mm256_xor_si256(c2[15143],_mm256_xor_si256(c2[7417],c2[10957])));
+     d2[275]=simde_mm256_xor_si256(c2[11162],simde_mm256_xor_si256(c2[15143],simde_mm256_xor_si256(c2[7417],c2[10957])));
 
 //row: 26
-     d2[286]=_mm256_xor_si256(c2[2430],_mm256_xor_si256(c2[2958],_mm256_xor_si256(c2[1063],c2[1785])));
+     d2[286]=simde_mm256_xor_si256(c2[2430],simde_mm256_xor_si256(c2[2958],simde_mm256_xor_si256(c2[1063],c2[1785])));
 
 //row: 27
-     d2[297]=_mm256_xor_si256(c2[13092],_mm256_xor_si256(c2[3046],c2[5992]));
+     d2[297]=simde_mm256_xor_si256(c2[13092],simde_mm256_xor_si256(c2[3046],c2[5992]));
 
 //row: 28
-     d2[308]=_mm256_xor_si256(c2[2907],_mm256_xor_si256(c2[572],_mm256_xor_si256(c2[4291],c2[3855])));
+     d2[308]=simde_mm256_xor_si256(c2[2907],simde_mm256_xor_si256(c2[572],simde_mm256_xor_si256(c2[4291],c2[3855])));
 
 //row: 29
-     d2[319]=_mm256_xor_si256(c2[8713],_mm256_xor_si256(c2[12105],_mm256_xor_si256(c2[6786],_mm256_xor_si256(c2[5814],_mm256_xor_si256(c2[6298],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[7286],_mm256_xor_si256(c2[4384],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[8258],_mm256_xor_si256(c2[9248],_mm256_xor_si256(c2[6346],_mm256_xor_si256(c2[538],_mm256_xor_si256(c2[10716],_mm256_xor_si256(c2[5880],_mm256_xor_si256(c2[13622],_mm256_xor_si256(c2[14106],_mm256_xor_si256(c2[14610],_mm256_xor_si256(c2[12673],_mm256_xor_si256(c2[1057],_mm256_xor_si256(c2[1541],_mm256_xor_si256(c2[13187],_mm256_xor_si256(c2[11726],_mm256_xor_si256(c2[13184],_mm256_xor_si256(c2[8368],_mm256_xor_si256(c2[10296],_mm256_xor_si256(c2[10787],_mm256_xor_si256(c2[11271],_mm256_xor_si256(c2[13712],_mm256_xor_si256(c2[5969],_mm256_xor_si256(c2[8390],_mm256_xor_si256(c2[8874],_mm256_xor_si256(c2[12761],_mm256_xor_si256(c2[12286],_mm256_xor_si256(c2[3086],_mm256_xor_si256(c2[3570],_mm256_xor_si256(c2[15212],_mm256_xor_si256(c2[14722],_mm256_xor_si256(c2[8911],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[5551],_mm256_xor_si256(c2[10390],_mm256_xor_si256(c2[10874],_mm256_xor_si256(c2[6537],_mm256_xor_si256(c2[9446],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[15270],_mm256_xor_si256(c2[4620],_mm256_xor_si256(c2[2203],_mm256_xor_si256(c2[2687],_mm256_xor_si256(c2[10459],_mm256_xor_si256(c2[12876],_mm256_xor_si256(c2[11902],_mm256_xor_si256(c2[12386],_mm256_xor_si256(c2[12894],_mm256_xor_si256(c2[5149],_mm256_xor_si256(c2[2253],_mm256_xor_si256(c2[2737],_mm256_xor_si256(c2[4183],_mm256_xor_si256(c2[819],_mm256_xor_si256(c2[14860],_mm256_xor_si256(c2[4693],_mm256_xor_si256(c2[3743],_mm256_xor_si256(c2[6644],_mm256_xor_si256(c2[2288],_mm256_xor_si256(c2[2772],_mm256_xor_si256(c2[8609],_mm256_xor_si256(c2[7641],_mm256_xor_si256(c2[1352],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[13949],_mm256_xor_si256(c2[7664],_mm256_xor_si256(c2[12987],_mm256_xor_si256(c2[13471],_mm256_xor_si256(c2[9599],_mm256_xor_si256(c2[9622],_mm256_xor_si256(c2[10585],_mm256_xor_si256(c2[14947],_mm256_xor_si256(c2[13512],_mm256_xor_si256(c2[9639],_mm256_xor_si256(c2[2383],_mm256_xor_si256(c2[2867],_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[13047],_mm256_xor_si256(c2[2405],c2[2889]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[319]=simde_mm256_xor_si256(c2[8713],simde_mm256_xor_si256(c2[12105],simde_mm256_xor_si256(c2[6786],simde_mm256_xor_si256(c2[5814],simde_mm256_xor_si256(c2[6298],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[7286],simde_mm256_xor_si256(c2[4384],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[8258],simde_mm256_xor_si256(c2[9248],simde_mm256_xor_si256(c2[6346],simde_mm256_xor_si256(c2[538],simde_mm256_xor_si256(c2[10716],simde_mm256_xor_si256(c2[5880],simde_mm256_xor_si256(c2[13622],simde_mm256_xor_si256(c2[14106],simde_mm256_xor_si256(c2[14610],simde_mm256_xor_si256(c2[12673],simde_mm256_xor_si256(c2[1057],simde_mm256_xor_si256(c2[1541],simde_mm256_xor_si256(c2[13187],simde_mm256_xor_si256(c2[11726],simde_mm256_xor_si256(c2[13184],simde_mm256_xor_si256(c2[8368],simde_mm256_xor_si256(c2[10296],simde_mm256_xor_si256(c2[10787],simde_mm256_xor_si256(c2[11271],simde_mm256_xor_si256(c2[13712],simde_mm256_xor_si256(c2[5969],simde_mm256_xor_si256(c2[8390],simde_mm256_xor_si256(c2[8874],simde_mm256_xor_si256(c2[12761],simde_mm256_xor_si256(c2[12286],simde_mm256_xor_si256(c2[3086],simde_mm256_xor_si256(c2[3570],simde_mm256_xor_si256(c2[15212],simde_mm256_xor_si256(c2[14722],simde_mm256_xor_si256(c2[8911],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[5551],simde_mm256_xor_si256(c2[10390],simde_mm256_xor_si256(c2[10874],simde_mm256_xor_si256(c2[6537],simde_mm256_xor_si256(c2[9446],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[15270],simde_mm256_xor_si256(c2[4620],simde_mm256_xor_si256(c2[2203],simde_mm256_xor_si256(c2[2687],simde_mm256_xor_si256(c2[10459],simde_mm256_xor_si256(c2[12876],simde_mm256_xor_si256(c2[11902],simde_mm256_xor_si256(c2[12386],simde_mm256_xor_si256(c2[12894],simde_mm256_xor_si256(c2[5149],simde_mm256_xor_si256(c2[2253],simde_mm256_xor_si256(c2[2737],simde_mm256_xor_si256(c2[4183],simde_mm256_xor_si256(c2[819],simde_mm256_xor_si256(c2[14860],simde_mm256_xor_si256(c2[4693],simde_mm256_xor_si256(c2[3743],simde_mm256_xor_si256(c2[6644],simde_mm256_xor_si256(c2[2288],simde_mm256_xor_si256(c2[2772],simde_mm256_xor_si256(c2[8609],simde_mm256_xor_si256(c2[7641],simde_mm256_xor_si256(c2[1352],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[13949],simde_mm256_xor_si256(c2[7664],simde_mm256_xor_si256(c2[12987],simde_mm256_xor_si256(c2[13471],simde_mm256_xor_si256(c2[9599],simde_mm256_xor_si256(c2[9622],simde_mm256_xor_si256(c2[10585],simde_mm256_xor_si256(c2[14947],simde_mm256_xor_si256(c2[13512],simde_mm256_xor_si256(c2[9639],simde_mm256_xor_si256(c2[2383],simde_mm256_xor_si256(c2[2867],simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[13047],simde_mm256_xor_si256(c2[2405],c2[2889]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 30
-     d2[330]=_mm256_xor_si256(c2[13072],_mm256_xor_si256(c2[977],_mm256_xor_si256(c2[10650],_mm256_xor_si256(c2[11134],_mm256_xor_si256(c2[10173],_mm256_xor_si256(c2[10657],_mm256_xor_si256(c2[5329],_mm256_xor_si256(c2[5356],_mm256_xor_si256(c2[11161],_mm256_xor_si256(c2[11645],_mm256_xor_si256(c2[8743],_mm256_xor_si256(c2[9227],_mm256_xor_si256(c2[13596],_mm256_xor_si256(c2[10694],_mm256_xor_si256(c2[4402],_mm256_xor_si256(c2[4886],_mm256_xor_si256(c2[15075],_mm256_xor_si256(c2[10239],_mm256_xor_si256(c2[2494],_mm256_xor_si256(c2[2978],_mm256_xor_si256(c2[3482],_mm256_xor_si256(c2[1061],_mm256_xor_si256(c2[1545],_mm256_xor_si256(c2[5416],_mm256_xor_si256(c2[5900],_mm256_xor_si256(c2[2048],_mm256_xor_si256(c2[598],_mm256_xor_si256(c2[1572],_mm256_xor_si256(c2[2056],_mm256_xor_si256(c2[12716],_mm256_xor_si256(c2[14171],_mm256_xor_si256(c2[14655],_mm256_xor_si256(c2[15146],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[2584],_mm256_xor_si256(c2[9844],_mm256_xor_si256(c2[10328],_mm256_xor_si256(c2[12738],_mm256_xor_si256(c2[13222],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[663],_mm256_xor_si256(c2[1147],_mm256_xor_si256(c2[7445],_mm256_xor_si256(c2[7929],_mm256_xor_si256(c2[4073],_mm256_xor_si256(c2[3594],_mm256_xor_si256(c2[12786],_mm256_xor_si256(c2[13270],_mm256_xor_si256(c2[7483],_mm256_xor_si256(c2[9426],_mm256_xor_si256(c2[9910],_mm256_xor_si256(c2[14749],_mm256_xor_si256(c2[15233],_mm256_xor_si256(c2[7964],_mm256_xor_si256(c2[10896],_mm256_xor_si256(c2[13794],_mm256_xor_si256(c2[5085],_mm256_xor_si256(c2[5569],_mm256_xor_si256(c2[4142],_mm256_xor_si256(c2[8979],_mm256_xor_si256(c2[6562],_mm256_xor_si256(c2[7046],_mm256_xor_si256(c2[14807],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[1748],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[1258],_mm256_xor_si256(c2[15292],_mm256_xor_si256(c2[1766],_mm256_xor_si256(c2[9024],_mm256_xor_si256(c2[9508],_mm256_xor_si256(c2[6601],_mm256_xor_si256(c2[7085],_mm256_xor_si256(c2[5178],_mm256_xor_si256(c2[3721],_mm256_xor_si256(c2[8568],_mm256_xor_si256(c2[9052],_mm256_xor_si256(c2[8102],_mm256_xor_si256(c2[11003],_mm256_xor_si256(c2[6647],_mm256_xor_si256(c2[7131],_mm256_xor_si256(c2[12968],_mm256_xor_si256(c2[11516],_mm256_xor_si256(c2[12000],_mm256_xor_si256(c2[5700],_mm256_xor_si256(c2[6184],_mm256_xor_si256(c2[2821],_mm256_xor_si256(c2[11528],_mm256_xor_si256(c2[12012],_mm256_xor_si256(c2[1848],_mm256_xor_si256(c2[2332],_mm256_xor_si256(c2[13970],_mm256_xor_si256(c2[14944],_mm256_xor_si256(c2[3324],_mm256_xor_si256(c2[3808],_mm256_xor_si256(c2[2384],_mm256_xor_si256(c2[13514],_mm256_xor_si256(c2[13998],_mm256_xor_si256(c2[6742],_mm256_xor_si256(c2[7226],_mm256_xor_si256(c2[8691],_mm256_xor_si256(c2[1919],_mm256_xor_si256(c2[6764],c2[7248])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[330]=simde_mm256_xor_si256(c2[13072],simde_mm256_xor_si256(c2[977],simde_mm256_xor_si256(c2[10650],simde_mm256_xor_si256(c2[11134],simde_mm256_xor_si256(c2[10173],simde_mm256_xor_si256(c2[10657],simde_mm256_xor_si256(c2[5329],simde_mm256_xor_si256(c2[5356],simde_mm256_xor_si256(c2[11161],simde_mm256_xor_si256(c2[11645],simde_mm256_xor_si256(c2[8743],simde_mm256_xor_si256(c2[9227],simde_mm256_xor_si256(c2[13596],simde_mm256_xor_si256(c2[10694],simde_mm256_xor_si256(c2[4402],simde_mm256_xor_si256(c2[4886],simde_mm256_xor_si256(c2[15075],simde_mm256_xor_si256(c2[10239],simde_mm256_xor_si256(c2[2494],simde_mm256_xor_si256(c2[2978],simde_mm256_xor_si256(c2[3482],simde_mm256_xor_si256(c2[1061],simde_mm256_xor_si256(c2[1545],simde_mm256_xor_si256(c2[5416],simde_mm256_xor_si256(c2[5900],simde_mm256_xor_si256(c2[2048],simde_mm256_xor_si256(c2[598],simde_mm256_xor_si256(c2[1572],simde_mm256_xor_si256(c2[2056],simde_mm256_xor_si256(c2[12716],simde_mm256_xor_si256(c2[14171],simde_mm256_xor_si256(c2[14655],simde_mm256_xor_si256(c2[15146],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[2584],simde_mm256_xor_si256(c2[9844],simde_mm256_xor_si256(c2[10328],simde_mm256_xor_si256(c2[12738],simde_mm256_xor_si256(c2[13222],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[663],simde_mm256_xor_si256(c2[1147],simde_mm256_xor_si256(c2[7445],simde_mm256_xor_si256(c2[7929],simde_mm256_xor_si256(c2[4073],simde_mm256_xor_si256(c2[3594],simde_mm256_xor_si256(c2[12786],simde_mm256_xor_si256(c2[13270],simde_mm256_xor_si256(c2[7483],simde_mm256_xor_si256(c2[9426],simde_mm256_xor_si256(c2[9910],simde_mm256_xor_si256(c2[14749],simde_mm256_xor_si256(c2[15233],simde_mm256_xor_si256(c2[7964],simde_mm256_xor_si256(c2[10896],simde_mm256_xor_si256(c2[13794],simde_mm256_xor_si256(c2[5085],simde_mm256_xor_si256(c2[5569],simde_mm256_xor_si256(c2[4142],simde_mm256_xor_si256(c2[8979],simde_mm256_xor_si256(c2[6562],simde_mm256_xor_si256(c2[7046],simde_mm256_xor_si256(c2[14807],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[1748],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[1258],simde_mm256_xor_si256(c2[15292],simde_mm256_xor_si256(c2[1766],simde_mm256_xor_si256(c2[9024],simde_mm256_xor_si256(c2[9508],simde_mm256_xor_si256(c2[6601],simde_mm256_xor_si256(c2[7085],simde_mm256_xor_si256(c2[5178],simde_mm256_xor_si256(c2[3721],simde_mm256_xor_si256(c2[8568],simde_mm256_xor_si256(c2[9052],simde_mm256_xor_si256(c2[8102],simde_mm256_xor_si256(c2[11003],simde_mm256_xor_si256(c2[6647],simde_mm256_xor_si256(c2[7131],simde_mm256_xor_si256(c2[12968],simde_mm256_xor_si256(c2[11516],simde_mm256_xor_si256(c2[12000],simde_mm256_xor_si256(c2[5700],simde_mm256_xor_si256(c2[6184],simde_mm256_xor_si256(c2[2821],simde_mm256_xor_si256(c2[11528],simde_mm256_xor_si256(c2[12012],simde_mm256_xor_si256(c2[1848],simde_mm256_xor_si256(c2[2332],simde_mm256_xor_si256(c2[13970],simde_mm256_xor_si256(c2[14944],simde_mm256_xor_si256(c2[3324],simde_mm256_xor_si256(c2[3808],simde_mm256_xor_si256(c2[2384],simde_mm256_xor_si256(c2[13514],simde_mm256_xor_si256(c2[13998],simde_mm256_xor_si256(c2[6742],simde_mm256_xor_si256(c2[7226],simde_mm256_xor_si256(c2[8691],simde_mm256_xor_si256(c2[1919],simde_mm256_xor_si256(c2[6764],c2[7248])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 31
-     d2[341]=_mm256_xor_si256(c2[14042],_mm256_xor_si256(c2[2425],_mm256_xor_si256(c2[1936],_mm256_xor_si256(c2[5817],_mm256_xor_si256(c2[12104],_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[11616],_mm256_xor_si256(c2[15013],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[6315],_mm256_xor_si256(c2[10196],_mm256_xor_si256(c2[12615],_mm256_xor_si256(c2[998],_mm256_xor_si256(c2[10186],_mm256_xor_si256(c2[13583],_mm256_xor_si256(c2[14067],_mm256_xor_si256(c2[2445],_mm256_xor_si256(c2[14566],_mm256_xor_si256(c2[2949],_mm256_xor_si256(c2[11664],_mm256_xor_si256(c2[47],_mm256_xor_si256(c2[5856],_mm256_xor_si256(c2[9726],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[4428],_mm256_xor_si256(c2[11198],_mm256_xor_si256(c2[15079],_mm256_xor_si256(c2[3948],_mm256_xor_si256(c2[7334],_mm256_xor_si256(c2[7818],_mm256_xor_si256(c2[4452],_mm256_xor_si256(c2[8322],_mm256_xor_si256(c2[2515],_mm256_xor_si256(c2[6385],_mm256_xor_si256(c2[6870],_mm256_xor_si256(c2[10256],_mm256_xor_si256(c2[10740],_mm256_xor_si256(c2[3018],_mm256_xor_si256(c2[6888],_mm256_xor_si256(c2[1568],_mm256_xor_si256(c2[5438],_mm256_xor_si256(c2[3015],_mm256_xor_si256(c2[6896],_mm256_xor_si256(c2[13686],_mm256_xor_si256(c2[2069],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[4008],_mm256_xor_si256(c2[1102],_mm256_xor_si256(c2[4488],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[3543],_mm256_xor_si256(c2[7424],_mm256_xor_si256(c2[11287],_mm256_xor_si256(c2[15168],_mm256_xor_si256(c2[14192],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[2575],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[2603],_mm256_xor_si256(c2[6473],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[5987],_mm256_xor_si256(c2[8888],_mm256_xor_si256(c2[12285],_mm256_xor_si256(c2[12769],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[8913],_mm256_xor_si256(c2[4564],_mm256_xor_si256(c2[8434],_mm256_xor_si256(c2[14240],_mm256_xor_si256(c2[2623],_mm256_xor_si256(c2[8453],_mm256_xor_si256(c2[12323],_mm256_xor_si256(c2[10869],_mm256_xor_si256(c2[14750],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[4102],_mm256_xor_si256(c2[4586],_mm256_xor_si256(c2[11866],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[14764],_mm256_xor_si256(c2[3147],_mm256_xor_si256(c2[6539],_mm256_xor_si256(c2[9925],_mm256_xor_si256(c2[10409],_mm256_xor_si256(c2[5112],_mm256_xor_si256(c2[8982],_mm256_xor_si256(c2[9949],_mm256_xor_si256(c2[13819],_mm256_xor_si256(c2[8016],_mm256_xor_si256(c2[11402],_mm256_xor_si256(c2[11886],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[4160],_mm256_xor_si256(c2[2707],_mm256_xor_si256(c2[6588],_mm256_xor_si256(c2[2228],_mm256_xor_si256(c2[5614],_mm256_xor_si256(c2[6098],_mm256_xor_si256(c2[2736],_mm256_xor_si256(c2[6606],_mm256_xor_si256(c2[10478],_mm256_xor_si256(c2[14348],_mm256_xor_si256(c2[8055],_mm256_xor_si256(c2[11441],_mm256_xor_si256(c2[11925],_mm256_xor_si256(c2[6148],_mm256_xor_si256(c2[10018],_mm256_xor_si256(c2[4691],_mm256_xor_si256(c2[8561],_mm256_xor_si256(c2[10011],_mm256_xor_si256(c2[13892],_mm256_xor_si256(c2[9072],_mm256_xor_si256(c2[12942],_mm256_xor_si256(c2[11973],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[8101],_mm256_xor_si256(c2[11487],_mm256_xor_si256(c2[11971],_mm256_xor_si256(c2[13927],_mm256_xor_si256(c2[2310],_mm256_xor_si256(c2[12959],_mm256_xor_si256(c2[1342],_mm256_xor_si256(c2[7154],_mm256_xor_si256(c2[10540],_mm256_xor_si256(c2[11024],_mm256_xor_si256(c2[3791],_mm256_xor_si256(c2[7661],_mm256_xor_si256(c2[12982],_mm256_xor_si256(c2[1365],_mm256_xor_si256(c2[3302],_mm256_xor_si256(c2[6688],_mm256_xor_si256(c2[7172],_mm256_xor_si256(c2[14940],_mm256_xor_si256(c2[3323],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[4297],_mm256_xor_si256(c2[4778],_mm256_xor_si256(c2[8648],_mm256_xor_si256(c2[3354],_mm256_xor_si256(c2[7224],_mm256_xor_si256(c2[14968],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[8185],_mm256_xor_si256(c2[11582],_mm256_xor_si256(c2[12066],_mm256_xor_si256(c2[9661],_mm256_xor_si256(c2[13531],_mm256_xor_si256(c2[2889],_mm256_xor_si256(c2[6759],_mm256_xor_si256(c2[8207],_mm256_xor_si256(c2[11604],c2[12088]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[341]=simde_mm256_xor_si256(c2[14042],simde_mm256_xor_si256(c2[2425],simde_mm256_xor_si256(c2[1936],simde_mm256_xor_si256(c2[5817],simde_mm256_xor_si256(c2[12104],simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[11616],simde_mm256_xor_si256(c2[15013],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[6315],simde_mm256_xor_si256(c2[10196],simde_mm256_xor_si256(c2[12615],simde_mm256_xor_si256(c2[998],simde_mm256_xor_si256(c2[10186],simde_mm256_xor_si256(c2[13583],simde_mm256_xor_si256(c2[14067],simde_mm256_xor_si256(c2[2445],simde_mm256_xor_si256(c2[14566],simde_mm256_xor_si256(c2[2949],simde_mm256_xor_si256(c2[11664],simde_mm256_xor_si256(c2[47],simde_mm256_xor_si256(c2[5856],simde_mm256_xor_si256(c2[9726],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[4428],simde_mm256_xor_si256(c2[11198],simde_mm256_xor_si256(c2[15079],simde_mm256_xor_si256(c2[3948],simde_mm256_xor_si256(c2[7334],simde_mm256_xor_si256(c2[7818],simde_mm256_xor_si256(c2[4452],simde_mm256_xor_si256(c2[8322],simde_mm256_xor_si256(c2[2515],simde_mm256_xor_si256(c2[6385],simde_mm256_xor_si256(c2[6870],simde_mm256_xor_si256(c2[10256],simde_mm256_xor_si256(c2[10740],simde_mm256_xor_si256(c2[3018],simde_mm256_xor_si256(c2[6888],simde_mm256_xor_si256(c2[1568],simde_mm256_xor_si256(c2[5438],simde_mm256_xor_si256(c2[3015],simde_mm256_xor_si256(c2[6896],simde_mm256_xor_si256(c2[13686],simde_mm256_xor_si256(c2[2069],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[4008],simde_mm256_xor_si256(c2[1102],simde_mm256_xor_si256(c2[4488],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[3543],simde_mm256_xor_si256(c2[7424],simde_mm256_xor_si256(c2[11287],simde_mm256_xor_si256(c2[15168],simde_mm256_xor_si256(c2[14192],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[2575],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[2603],simde_mm256_xor_si256(c2[6473],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[5987],simde_mm256_xor_si256(c2[8888],simde_mm256_xor_si256(c2[12285],simde_mm256_xor_si256(c2[12769],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[8913],simde_mm256_xor_si256(c2[4564],simde_mm256_xor_si256(c2[8434],simde_mm256_xor_si256(c2[14240],simde_mm256_xor_si256(c2[2623],simde_mm256_xor_si256(c2[8453],simde_mm256_xor_si256(c2[12323],simde_mm256_xor_si256(c2[10869],simde_mm256_xor_si256(c2[14750],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[4102],simde_mm256_xor_si256(c2[4586],simde_mm256_xor_si256(c2[11866],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[14764],simde_mm256_xor_si256(c2[3147],simde_mm256_xor_si256(c2[6539],simde_mm256_xor_si256(c2[9925],simde_mm256_xor_si256(c2[10409],simde_mm256_xor_si256(c2[5112],simde_mm256_xor_si256(c2[8982],simde_mm256_xor_si256(c2[9949],simde_mm256_xor_si256(c2[13819],simde_mm256_xor_si256(c2[8016],simde_mm256_xor_si256(c2[11402],simde_mm256_xor_si256(c2[11886],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[4160],simde_mm256_xor_si256(c2[2707],simde_mm256_xor_si256(c2[6588],simde_mm256_xor_si256(c2[2228],simde_mm256_xor_si256(c2[5614],simde_mm256_xor_si256(c2[6098],simde_mm256_xor_si256(c2[2736],simde_mm256_xor_si256(c2[6606],simde_mm256_xor_si256(c2[10478],simde_mm256_xor_si256(c2[14348],simde_mm256_xor_si256(c2[8055],simde_mm256_xor_si256(c2[11441],simde_mm256_xor_si256(c2[11925],simde_mm256_xor_si256(c2[6148],simde_mm256_xor_si256(c2[10018],simde_mm256_xor_si256(c2[4691],simde_mm256_xor_si256(c2[8561],simde_mm256_xor_si256(c2[10011],simde_mm256_xor_si256(c2[13892],simde_mm256_xor_si256(c2[9072],simde_mm256_xor_si256(c2[12942],simde_mm256_xor_si256(c2[11973],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[8101],simde_mm256_xor_si256(c2[11487],simde_mm256_xor_si256(c2[11971],simde_mm256_xor_si256(c2[13927],simde_mm256_xor_si256(c2[2310],simde_mm256_xor_si256(c2[12959],simde_mm256_xor_si256(c2[1342],simde_mm256_xor_si256(c2[7154],simde_mm256_xor_si256(c2[10540],simde_mm256_xor_si256(c2[11024],simde_mm256_xor_si256(c2[3791],simde_mm256_xor_si256(c2[7661],simde_mm256_xor_si256(c2[12982],simde_mm256_xor_si256(c2[1365],simde_mm256_xor_si256(c2[3302],simde_mm256_xor_si256(c2[6688],simde_mm256_xor_si256(c2[7172],simde_mm256_xor_si256(c2[14940],simde_mm256_xor_si256(c2[3323],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[4297],simde_mm256_xor_si256(c2[4778],simde_mm256_xor_si256(c2[8648],simde_mm256_xor_si256(c2[3354],simde_mm256_xor_si256(c2[7224],simde_mm256_xor_si256(c2[14968],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[8185],simde_mm256_xor_si256(c2[11582],simde_mm256_xor_si256(c2[12066],simde_mm256_xor_si256(c2[9661],simde_mm256_xor_si256(c2[13531],simde_mm256_xor_si256(c2[2889],simde_mm256_xor_si256(c2[6759],simde_mm256_xor_si256(c2[8207],simde_mm256_xor_si256(c2[11604],c2[12088]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[352]=_mm256_xor_si256(c2[12584],_mm256_xor_si256(c2[489],_mm256_xor_si256(c2[10173],_mm256_xor_si256(c2[10657],_mm256_xor_si256(c2[9685],_mm256_xor_si256(c2[10169],_mm256_xor_si256(c2[15014],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[10673],_mm256_xor_si256(c2[11157],_mm256_xor_si256(c2[8255],_mm256_xor_si256(c2[8739],_mm256_xor_si256(c2[13119],_mm256_xor_si256(c2[10217],_mm256_xor_si256(c2[3925],_mm256_xor_si256(c2[4409],_mm256_xor_si256(c2[14587],_mm256_xor_si256(c2[9751],_mm256_xor_si256(c2[2006],_mm256_xor_si256(c2[2490],_mm256_xor_si256(c2[2994],_mm256_xor_si256(c2[573],_mm256_xor_si256(c2[1057],_mm256_xor_si256(c2[4928],_mm256_xor_si256(c2[5412],_mm256_xor_si256(c2[1571],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[1084],_mm256_xor_si256(c2[1568],_mm256_xor_si256(c2[12239],_mm256_xor_si256(c2[13694],_mm256_xor_si256(c2[14178],_mm256_xor_si256(c2[14658],_mm256_xor_si256(c2[15142],_mm256_xor_si256(c2[2096],_mm256_xor_si256(c2[9356],_mm256_xor_si256(c2[9840],_mm256_xor_si256(c2[12261],_mm256_xor_si256(c2[12745],_mm256_xor_si256(c2[1145],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[670],_mm256_xor_si256(c2[6957],_mm256_xor_si256(c2[7441],_mm256_xor_si256(c2[3596],_mm256_xor_si256(c2[3106],_mm256_xor_si256(c2[12298],_mm256_xor_si256(c2[12782],_mm256_xor_si256(c2[7006],_mm256_xor_si256(c2[8938],_mm256_xor_si256(c2[9422],_mm256_xor_si256(c2[14261],_mm256_xor_si256(c2[14745],_mm256_xor_si256(c2[10408],_mm256_xor_si256(c2[13317],_mm256_xor_si256(c2[4608],_mm256_xor_si256(c2[5092],_mm256_xor_si256(c2[3654],_mm256_xor_si256(c2[8502],_mm256_xor_si256(c2[6074],_mm256_xor_si256(c2[6558],_mm256_xor_si256(c2[4628],_mm256_xor_si256(c2[14330],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[286],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[1278],_mm256_xor_si256(c2[8536],_mm256_xor_si256(c2[9020],_mm256_xor_si256(c2[6124],_mm256_xor_si256(c2[6608],_mm256_xor_si256(c2[9030],_mm256_xor_si256(c2[4690],_mm256_xor_si256(c2[3244],_mm256_xor_si256(c2[8080],_mm256_xor_si256(c2[8564],_mm256_xor_si256(c2[7614],_mm256_xor_si256(c2[10526],_mm256_xor_si256(c2[6170],_mm256_xor_si256(c2[6654],_mm256_xor_si256(c2[12480],_mm256_xor_si256(c2[11028],_mm256_xor_si256(c2[11512],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[5707],_mm256_xor_si256(c2[2333],_mm256_xor_si256(c2[11051],_mm256_xor_si256(c2[11535],_mm256_xor_si256(c2[1371],_mm256_xor_si256(c2[1855],_mm256_xor_si256(c2[13493],_mm256_xor_si256(c2[14456],_mm256_xor_si256(c2[2847],_mm256_xor_si256(c2[3331],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[13026],_mm256_xor_si256(c2[13510],_mm256_xor_si256(c2[6254],_mm256_xor_si256(c2[6738],_mm256_xor_si256(c2[8214],_mm256_xor_si256(c2[1431],_mm256_xor_si256(c2[6276],c2[6760])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[352]=simde_mm256_xor_si256(c2[12584],simde_mm256_xor_si256(c2[489],simde_mm256_xor_si256(c2[10173],simde_mm256_xor_si256(c2[10657],simde_mm256_xor_si256(c2[9685],simde_mm256_xor_si256(c2[10169],simde_mm256_xor_si256(c2[15014],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[10673],simde_mm256_xor_si256(c2[11157],simde_mm256_xor_si256(c2[8255],simde_mm256_xor_si256(c2[8739],simde_mm256_xor_si256(c2[13119],simde_mm256_xor_si256(c2[10217],simde_mm256_xor_si256(c2[3925],simde_mm256_xor_si256(c2[4409],simde_mm256_xor_si256(c2[14587],simde_mm256_xor_si256(c2[9751],simde_mm256_xor_si256(c2[2006],simde_mm256_xor_si256(c2[2490],simde_mm256_xor_si256(c2[2994],simde_mm256_xor_si256(c2[573],simde_mm256_xor_si256(c2[1057],simde_mm256_xor_si256(c2[4928],simde_mm256_xor_si256(c2[5412],simde_mm256_xor_si256(c2[1571],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[1084],simde_mm256_xor_si256(c2[1568],simde_mm256_xor_si256(c2[12239],simde_mm256_xor_si256(c2[13694],simde_mm256_xor_si256(c2[14178],simde_mm256_xor_si256(c2[14658],simde_mm256_xor_si256(c2[15142],simde_mm256_xor_si256(c2[2096],simde_mm256_xor_si256(c2[9356],simde_mm256_xor_si256(c2[9840],simde_mm256_xor_si256(c2[12261],simde_mm256_xor_si256(c2[12745],simde_mm256_xor_si256(c2[1145],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[670],simde_mm256_xor_si256(c2[6957],simde_mm256_xor_si256(c2[7441],simde_mm256_xor_si256(c2[3596],simde_mm256_xor_si256(c2[3106],simde_mm256_xor_si256(c2[12298],simde_mm256_xor_si256(c2[12782],simde_mm256_xor_si256(c2[7006],simde_mm256_xor_si256(c2[8938],simde_mm256_xor_si256(c2[9422],simde_mm256_xor_si256(c2[14261],simde_mm256_xor_si256(c2[14745],simde_mm256_xor_si256(c2[10408],simde_mm256_xor_si256(c2[13317],simde_mm256_xor_si256(c2[4608],simde_mm256_xor_si256(c2[5092],simde_mm256_xor_si256(c2[3654],simde_mm256_xor_si256(c2[8502],simde_mm256_xor_si256(c2[6074],simde_mm256_xor_si256(c2[6558],simde_mm256_xor_si256(c2[4628],simde_mm256_xor_si256(c2[14330],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[286],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[1278],simde_mm256_xor_si256(c2[8536],simde_mm256_xor_si256(c2[9020],simde_mm256_xor_si256(c2[6124],simde_mm256_xor_si256(c2[6608],simde_mm256_xor_si256(c2[9030],simde_mm256_xor_si256(c2[4690],simde_mm256_xor_si256(c2[3244],simde_mm256_xor_si256(c2[8080],simde_mm256_xor_si256(c2[8564],simde_mm256_xor_si256(c2[7614],simde_mm256_xor_si256(c2[10526],simde_mm256_xor_si256(c2[6170],simde_mm256_xor_si256(c2[6654],simde_mm256_xor_si256(c2[12480],simde_mm256_xor_si256(c2[11028],simde_mm256_xor_si256(c2[11512],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[5707],simde_mm256_xor_si256(c2[2333],simde_mm256_xor_si256(c2[11051],simde_mm256_xor_si256(c2[11535],simde_mm256_xor_si256(c2[1371],simde_mm256_xor_si256(c2[1855],simde_mm256_xor_si256(c2[13493],simde_mm256_xor_si256(c2[14456],simde_mm256_xor_si256(c2[2847],simde_mm256_xor_si256(c2[3331],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[13026],simde_mm256_xor_si256(c2[13510],simde_mm256_xor_si256(c2[6254],simde_mm256_xor_si256(c2[6738],simde_mm256_xor_si256(c2[8214],simde_mm256_xor_si256(c2[1431],simde_mm256_xor_si256(c2[6276],c2[6760])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[363]=_mm256_xor_si256(c2[11639],_mm256_xor_si256(c2[7797],_mm256_xor_si256(c2[6538],c2[2885])));
+     d2[363]=simde_mm256_xor_si256(c2[11639],simde_mm256_xor_si256(c2[7797],simde_mm256_xor_si256(c2[6538],c2[2885])));
 
 //row: 34
-     d2[374]=_mm256_xor_si256(c2[13553],_mm256_xor_si256(c2[164],_mm256_xor_si256(c2[8077],c2[11023])));
+     d2[374]=simde_mm256_xor_si256(c2[13553],simde_mm256_xor_si256(c2[164],simde_mm256_xor_si256(c2[8077],c2[11023])));
 
 //row: 35
-     d2[385]=_mm256_xor_si256(c2[8718],_mm256_xor_si256(c2[12110],_mm256_xor_si256(c2[6780],_mm256_xor_si256(c2[6292],_mm256_xor_si256(c2[991],_mm256_xor_si256(c2[7291],_mm256_xor_si256(c2[4862],_mm256_xor_si256(c2[1961],_mm256_xor_si256(c2[9242],_mm256_xor_si256(c2[6340],_mm256_xor_si256(c2[532],_mm256_xor_si256(c2[10721],_mm256_xor_si256(c2[5874],_mm256_xor_si256(c2[14111],_mm256_xor_si256(c2[14615],_mm256_xor_si256(c2[12678],_mm256_xor_si256(c2[1546],_mm256_xor_si256(c2[13181],_mm256_xor_si256(c2[11731],_mm256_xor_si256(c2[13178],_mm256_xor_si256(c2[8362],_mm256_xor_si256(c2[10301],_mm256_xor_si256(c2[11265],_mm256_xor_si256(c2[8850],_mm256_xor_si256(c2[13706],_mm256_xor_si256(c2[5963],_mm256_xor_si256(c2[8868],_mm256_xor_si256(c2[12766],_mm256_xor_si256(c2[12280],_mm256_xor_si256(c2[3564],_mm256_xor_si256(c2[15206],_mm256_xor_si256(c2[14727],_mm256_xor_si256(c2[8916],_mm256_xor_si256(c2[3129],_mm256_xor_si256(c2[5545],_mm256_xor_si256(c2[10868],_mm256_xor_si256(c2[6542],_mm256_xor_si256(c2[9440],_mm256_xor_si256(c2[1215],_mm256_xor_si256(c2[15275],_mm256_xor_si256(c2[4625],_mm256_xor_si256(c2[2692],_mm256_xor_si256(c2[1722],_mm256_xor_si256(c2[10453],_mm256_xor_si256(c2[12870],_mm256_xor_si256(c2[12391],_mm256_xor_si256(c2[12899],_mm256_xor_si256(c2[5154],_mm256_xor_si256(c2[2731],_mm256_xor_si256(c2[824],_mm256_xor_si256(c2[14854],_mm256_xor_si256(c2[4687],_mm256_xor_si256(c2[3748],_mm256_xor_si256(c2[6649],_mm256_xor_si256(c2[2777],_mm256_xor_si256(c2[8603],_mm256_xor_si256(c2[7635],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[13954],_mm256_xor_si256(c2[7658],_mm256_xor_si256(c2[13465],_mm256_xor_si256(c2[9616],_mm256_xor_si256(c2[10590],_mm256_xor_si256(c2[14941],_mm256_xor_si256(c2[13517],_mm256_xor_si256(c2[9644],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[4337],_mm256_xor_si256(c2[13052],c2[2883])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[385]=simde_mm256_xor_si256(c2[8718],simde_mm256_xor_si256(c2[12110],simde_mm256_xor_si256(c2[6780],simde_mm256_xor_si256(c2[6292],simde_mm256_xor_si256(c2[991],simde_mm256_xor_si256(c2[7291],simde_mm256_xor_si256(c2[4862],simde_mm256_xor_si256(c2[1961],simde_mm256_xor_si256(c2[9242],simde_mm256_xor_si256(c2[6340],simde_mm256_xor_si256(c2[532],simde_mm256_xor_si256(c2[10721],simde_mm256_xor_si256(c2[5874],simde_mm256_xor_si256(c2[14111],simde_mm256_xor_si256(c2[14615],simde_mm256_xor_si256(c2[12678],simde_mm256_xor_si256(c2[1546],simde_mm256_xor_si256(c2[13181],simde_mm256_xor_si256(c2[11731],simde_mm256_xor_si256(c2[13178],simde_mm256_xor_si256(c2[8362],simde_mm256_xor_si256(c2[10301],simde_mm256_xor_si256(c2[11265],simde_mm256_xor_si256(c2[8850],simde_mm256_xor_si256(c2[13706],simde_mm256_xor_si256(c2[5963],simde_mm256_xor_si256(c2[8868],simde_mm256_xor_si256(c2[12766],simde_mm256_xor_si256(c2[12280],simde_mm256_xor_si256(c2[3564],simde_mm256_xor_si256(c2[15206],simde_mm256_xor_si256(c2[14727],simde_mm256_xor_si256(c2[8916],simde_mm256_xor_si256(c2[3129],simde_mm256_xor_si256(c2[5545],simde_mm256_xor_si256(c2[10868],simde_mm256_xor_si256(c2[6542],simde_mm256_xor_si256(c2[9440],simde_mm256_xor_si256(c2[1215],simde_mm256_xor_si256(c2[15275],simde_mm256_xor_si256(c2[4625],simde_mm256_xor_si256(c2[2692],simde_mm256_xor_si256(c2[1722],simde_mm256_xor_si256(c2[10453],simde_mm256_xor_si256(c2[12870],simde_mm256_xor_si256(c2[12391],simde_mm256_xor_si256(c2[12899],simde_mm256_xor_si256(c2[5154],simde_mm256_xor_si256(c2[2731],simde_mm256_xor_si256(c2[824],simde_mm256_xor_si256(c2[14854],simde_mm256_xor_si256(c2[4687],simde_mm256_xor_si256(c2[3748],simde_mm256_xor_si256(c2[6649],simde_mm256_xor_si256(c2[2777],simde_mm256_xor_si256(c2[8603],simde_mm256_xor_si256(c2[7635],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[13954],simde_mm256_xor_si256(c2[7658],simde_mm256_xor_si256(c2[13465],simde_mm256_xor_si256(c2[9616],simde_mm256_xor_si256(c2[10590],simde_mm256_xor_si256(c2[14941],simde_mm256_xor_si256(c2[13517],simde_mm256_xor_si256(c2[9644],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[4337],simde_mm256_xor_si256(c2[13052],c2[2883])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[396]=_mm256_xor_si256(c2[3392],_mm256_xor_si256(c2[7568],_mm256_xor_si256(c2[1783],c2[13953])));
+     d2[396]=simde_mm256_xor_si256(c2[3392],simde_mm256_xor_si256(c2[7568],simde_mm256_xor_si256(c2[1783],c2[13953])));
 
 //row: 37
-     d2[407]=_mm256_xor_si256(c2[13068],_mm256_xor_si256(c2[13552],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[11625],_mm256_xor_si256(c2[11137],_mm256_xor_si256(c2[5352],_mm256_xor_si256(c2[5836],_mm256_xor_si256(c2[12125],_mm256_xor_si256(c2[9707],_mm256_xor_si256(c2[15035],_mm256_xor_si256(c2[13603],_mm256_xor_si256(c2[14087],_mm256_xor_si256(c2[11185],_mm256_xor_si256(c2[5377],_mm256_xor_si256(c2[15071],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[10719],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[3962],_mm256_xor_si256(c2[2025],_mm256_xor_si256(c2[6380],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[2539],_mm256_xor_si256(c2[1078],_mm256_xor_si256(c2[2536],_mm256_xor_si256(c2[12723],_mm256_xor_si256(c2[13207],_mm256_xor_si256(c2[15146],_mm256_xor_si256(c2[623],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[10808],_mm256_xor_si256(c2[13713],_mm256_xor_si256(c2[2113],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[8409],_mm256_xor_si256(c2[4080],_mm256_xor_si256(c2[4564],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[13750],_mm256_xor_si256(c2[7490],_mm256_xor_si256(c2[7974],_mm256_xor_si256(c2[10390],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[10892],_mm256_xor_si256(c2[11376],_mm256_xor_si256(c2[14285],_mm256_xor_si256(c2[6060],_mm256_xor_si256(c2[4138],_mm256_xor_si256(c2[4622],_mm256_xor_si256(c2[9470],_mm256_xor_si256(c2[7526],_mm256_xor_si256(c2[14814],_mm256_xor_si256(c2[15298],_mm256_xor_si256(c2[2228],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[6101],_mm256_xor_si256(c2[2246],_mm256_xor_si256(c2[9988],_mm256_xor_si256(c2[7576],_mm256_xor_si256(c2[5174],_mm256_xor_si256(c2[5658],_mm256_xor_si256(c2[4212],_mm256_xor_si256(c2[9532],_mm256_xor_si256(c2[8098],_mm256_xor_si256(c2[8582],_mm256_xor_si256(c2[11494],_mm256_xor_si256(c2[7622],_mm256_xor_si256(c2[13448],_mm256_xor_si256(c2[12480],_mm256_xor_si256(c2[6675],_mm256_xor_si256(c2[2817],_mm256_xor_si256(c2[3301],_mm256_xor_si256(c2[12503],_mm256_xor_si256(c2[2823],_mm256_xor_si256(c2[13977],_mm256_xor_si256(c2[14461],_mm256_xor_si256(c2[15424],_mm256_xor_si256(c2[4299],_mm256_xor_si256(c2[2380],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[14478],_mm256_xor_si256(c2[7706],_mm256_xor_si256(c2[8698],_mm256_xor_si256(c2[9182],_mm256_xor_si256(c2[2399],c2[7728])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[407]=simde_mm256_xor_si256(c2[13068],simde_mm256_xor_si256(c2[13552],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[11625],simde_mm256_xor_si256(c2[11137],simde_mm256_xor_si256(c2[5352],simde_mm256_xor_si256(c2[5836],simde_mm256_xor_si256(c2[12125],simde_mm256_xor_si256(c2[9707],simde_mm256_xor_si256(c2[15035],simde_mm256_xor_si256(c2[13603],simde_mm256_xor_si256(c2[14087],simde_mm256_xor_si256(c2[11185],simde_mm256_xor_si256(c2[5377],simde_mm256_xor_si256(c2[15071],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[10719],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[3962],simde_mm256_xor_si256(c2[2025],simde_mm256_xor_si256(c2[6380],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[2539],simde_mm256_xor_si256(c2[1078],simde_mm256_xor_si256(c2[2536],simde_mm256_xor_si256(c2[12723],simde_mm256_xor_si256(c2[13207],simde_mm256_xor_si256(c2[15146],simde_mm256_xor_si256(c2[623],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[10808],simde_mm256_xor_si256(c2[13713],simde_mm256_xor_si256(c2[2113],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[8409],simde_mm256_xor_si256(c2[4080],simde_mm256_xor_si256(c2[4564],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[13750],simde_mm256_xor_si256(c2[7490],simde_mm256_xor_si256(c2[7974],simde_mm256_xor_si256(c2[10390],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[10892],simde_mm256_xor_si256(c2[11376],simde_mm256_xor_si256(c2[14285],simde_mm256_xor_si256(c2[6060],simde_mm256_xor_si256(c2[4138],simde_mm256_xor_si256(c2[4622],simde_mm256_xor_si256(c2[9470],simde_mm256_xor_si256(c2[7526],simde_mm256_xor_si256(c2[14814],simde_mm256_xor_si256(c2[15298],simde_mm256_xor_si256(c2[2228],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[6101],simde_mm256_xor_si256(c2[2246],simde_mm256_xor_si256(c2[9988],simde_mm256_xor_si256(c2[7576],simde_mm256_xor_si256(c2[5174],simde_mm256_xor_si256(c2[5658],simde_mm256_xor_si256(c2[4212],simde_mm256_xor_si256(c2[9532],simde_mm256_xor_si256(c2[8098],simde_mm256_xor_si256(c2[8582],simde_mm256_xor_si256(c2[11494],simde_mm256_xor_si256(c2[7622],simde_mm256_xor_si256(c2[13448],simde_mm256_xor_si256(c2[12480],simde_mm256_xor_si256(c2[6675],simde_mm256_xor_si256(c2[2817],simde_mm256_xor_si256(c2[3301],simde_mm256_xor_si256(c2[12503],simde_mm256_xor_si256(c2[2823],simde_mm256_xor_si256(c2[13977],simde_mm256_xor_si256(c2[14461],simde_mm256_xor_si256(c2[15424],simde_mm256_xor_si256(c2[4299],simde_mm256_xor_si256(c2[2380],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[14478],simde_mm256_xor_si256(c2[7706],simde_mm256_xor_si256(c2[8698],simde_mm256_xor_si256(c2[9182],simde_mm256_xor_si256(c2[2399],c2[7728])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[418]=_mm256_xor_si256(c2[1941],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[8454],c2[10919])));
+     d2[418]=simde_mm256_xor_si256(c2[1941],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[8454],c2[10919])));
 
 //row: 39
-     d2[429]=_mm256_xor_si256(c2[5837],_mm256_xor_si256(c2[3946],_mm256_xor_si256(c2[2575],c2[8170])));
+     d2[429]=simde_mm256_xor_si256(c2[5837],simde_mm256_xor_si256(c2[3946],simde_mm256_xor_si256(c2[2575],c2[8170])));
 
 //row: 40
-     d2[440]=_mm256_xor_si256(c2[7753],_mm256_xor_si256(c2[6475],c2[3766]));
+     d2[440]=simde_mm256_xor_si256(c2[7753],simde_mm256_xor_si256(c2[6475],c2[3766]));
 
 //row: 41
-     d2[451]=_mm256_xor_si256(c2[13093],_mm256_xor_si256(c2[6360],_mm256_xor_si256(c2[12298],c2[8148])));
+     d2[451]=simde_mm256_xor_si256(c2[13093],simde_mm256_xor_si256(c2[6360],simde_mm256_xor_si256(c2[12298],c2[8148])));
 
 //row: 42
-     d2[462]=_mm256_xor_si256(c2[11625],_mm256_xor_si256(c2[15006],_mm256_xor_si256(c2[9203],_mm256_xor_si256(c2[9687],_mm256_xor_si256(c2[8715],_mm256_xor_si256(c2[9199],_mm256_xor_si256(c2[9],_mm256_xor_si256(c2[3898],_mm256_xor_si256(c2[9703],_mm256_xor_si256(c2[10187],_mm256_xor_si256(c2[7285],_mm256_xor_si256(c2[7769],_mm256_xor_si256(c2[12149],_mm256_xor_si256(c2[9247],_mm256_xor_si256(c2[2955],_mm256_xor_si256(c2[3439],_mm256_xor_si256(c2[13628],_mm256_xor_si256(c2[8781],_mm256_xor_si256(c2[1036],_mm256_xor_si256(c2[1520],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[15101],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[3969],_mm256_xor_si256(c2[4453],_mm256_xor_si256(c2[9286],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[14638],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[598],_mm256_xor_si256(c2[11269],_mm256_xor_si256(c2[12724],_mm256_xor_si256(c2[13208],_mm256_xor_si256(c2[13688],_mm256_xor_si256(c2[14172],_mm256_xor_si256(c2[1126],_mm256_xor_si256(c2[8386],_mm256_xor_si256(c2[8870],_mm256_xor_si256(c2[11291],_mm256_xor_si256(c2[11775],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[14703],_mm256_xor_si256(c2[15187],_mm256_xor_si256(c2[5987],_mm256_xor_si256(c2[6471],_mm256_xor_si256(c2[2626],_mm256_xor_si256(c2[2136],_mm256_xor_si256(c2[11339],_mm256_xor_si256(c2[11823],_mm256_xor_si256(c2[6036],_mm256_xor_si256(c2[7968],_mm256_xor_si256(c2[8452],_mm256_xor_si256(c2[13291],_mm256_xor_si256(c2[13775],_mm256_xor_si256(c2[9438],_mm256_xor_si256(c2[12347],_mm256_xor_si256(c2[3638],_mm256_xor_si256(c2[4122],_mm256_xor_si256(c2[2684],_mm256_xor_si256(c2[7532],_mm256_xor_si256(c2[5104],_mm256_xor_si256(c2[5588],_mm256_xor_si256(c2[13360],_mm256_xor_si256(c2[15293],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[14814],_mm256_xor_si256(c2[15298],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[7577],_mm256_xor_si256(c2[8061],_mm256_xor_si256(c2[5154],_mm256_xor_si256(c2[5638],_mm256_xor_si256(c2[3720],_mm256_xor_si256(c2[2274],_mm256_xor_si256(c2[7110],_mm256_xor_si256(c2[7594],_mm256_xor_si256(c2[6644],_mm256_xor_si256(c2[9556],_mm256_xor_si256(c2[5200],_mm256_xor_si256(c2[5684],_mm256_xor_si256(c2[11510],_mm256_xor_si256(c2[10058],_mm256_xor_si256(c2[10542],_mm256_xor_si256(c2[4253],_mm256_xor_si256(c2[4737],_mm256_xor_si256(c2[1374],_mm256_xor_si256(c2[10081],_mm256_xor_si256(c2[10565],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[12523],_mm256_xor_si256(c2[13486],_mm256_xor_si256(c2[1877],_mm256_xor_si256(c2[2361],_mm256_xor_si256(c2[926],_mm256_xor_si256(c2[12056],_mm256_xor_si256(c2[12540],_mm256_xor_si256(c2[5284],_mm256_xor_si256(c2[5768],_mm256_xor_si256(c2[7244],_mm256_xor_si256(c2[472],_mm256_xor_si256(c2[5306],c2[5790]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[462]=simde_mm256_xor_si256(c2[11625],simde_mm256_xor_si256(c2[15006],simde_mm256_xor_si256(c2[9203],simde_mm256_xor_si256(c2[9687],simde_mm256_xor_si256(c2[8715],simde_mm256_xor_si256(c2[9199],simde_mm256_xor_si256(c2[9],simde_mm256_xor_si256(c2[3898],simde_mm256_xor_si256(c2[9703],simde_mm256_xor_si256(c2[10187],simde_mm256_xor_si256(c2[7285],simde_mm256_xor_si256(c2[7769],simde_mm256_xor_si256(c2[12149],simde_mm256_xor_si256(c2[9247],simde_mm256_xor_si256(c2[2955],simde_mm256_xor_si256(c2[3439],simde_mm256_xor_si256(c2[13628],simde_mm256_xor_si256(c2[8781],simde_mm256_xor_si256(c2[1036],simde_mm256_xor_si256(c2[1520],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[15101],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[3969],simde_mm256_xor_si256(c2[4453],simde_mm256_xor_si256(c2[9286],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[14638],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[598],simde_mm256_xor_si256(c2[11269],simde_mm256_xor_si256(c2[12724],simde_mm256_xor_si256(c2[13208],simde_mm256_xor_si256(c2[13688],simde_mm256_xor_si256(c2[14172],simde_mm256_xor_si256(c2[1126],simde_mm256_xor_si256(c2[8386],simde_mm256_xor_si256(c2[8870],simde_mm256_xor_si256(c2[11291],simde_mm256_xor_si256(c2[11775],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[14703],simde_mm256_xor_si256(c2[15187],simde_mm256_xor_si256(c2[5987],simde_mm256_xor_si256(c2[6471],simde_mm256_xor_si256(c2[2626],simde_mm256_xor_si256(c2[2136],simde_mm256_xor_si256(c2[11339],simde_mm256_xor_si256(c2[11823],simde_mm256_xor_si256(c2[6036],simde_mm256_xor_si256(c2[7968],simde_mm256_xor_si256(c2[8452],simde_mm256_xor_si256(c2[13291],simde_mm256_xor_si256(c2[13775],simde_mm256_xor_si256(c2[9438],simde_mm256_xor_si256(c2[12347],simde_mm256_xor_si256(c2[3638],simde_mm256_xor_si256(c2[4122],simde_mm256_xor_si256(c2[2684],simde_mm256_xor_si256(c2[7532],simde_mm256_xor_si256(c2[5104],simde_mm256_xor_si256(c2[5588],simde_mm256_xor_si256(c2[13360],simde_mm256_xor_si256(c2[15293],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[14814],simde_mm256_xor_si256(c2[15298],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[7577],simde_mm256_xor_si256(c2[8061],simde_mm256_xor_si256(c2[5154],simde_mm256_xor_si256(c2[5638],simde_mm256_xor_si256(c2[3720],simde_mm256_xor_si256(c2[2274],simde_mm256_xor_si256(c2[7110],simde_mm256_xor_si256(c2[7594],simde_mm256_xor_si256(c2[6644],simde_mm256_xor_si256(c2[9556],simde_mm256_xor_si256(c2[5200],simde_mm256_xor_si256(c2[5684],simde_mm256_xor_si256(c2[11510],simde_mm256_xor_si256(c2[10058],simde_mm256_xor_si256(c2[10542],simde_mm256_xor_si256(c2[4253],simde_mm256_xor_si256(c2[4737],simde_mm256_xor_si256(c2[1374],simde_mm256_xor_si256(c2[10081],simde_mm256_xor_si256(c2[10565],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[12523],simde_mm256_xor_si256(c2[13486],simde_mm256_xor_si256(c2[1877],simde_mm256_xor_si256(c2[2361],simde_mm256_xor_si256(c2[926],simde_mm256_xor_si256(c2[12056],simde_mm256_xor_si256(c2[12540],simde_mm256_xor_si256(c2[5284],simde_mm256_xor_si256(c2[5768],simde_mm256_xor_si256(c2[7244],simde_mm256_xor_si256(c2[472],simde_mm256_xor_si256(c2[5306],c2[5790]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 43
-     d2[473]=_mm256_xor_si256(c2[6779],_mm256_xor_si256(c2[10171],_mm256_xor_si256(c2[4841],_mm256_xor_si256(c2[3880],_mm256_xor_si256(c2[4364],_mm256_xor_si256(c2[14550],_mm256_xor_si256(c2[5352],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[2934],_mm256_xor_si256(c2[8740],_mm256_xor_si256(c2[7314],_mm256_xor_si256(c2[4401],_mm256_xor_si256(c2[14080],_mm256_xor_si256(c2[8782],_mm256_xor_si256(c2[3946],_mm256_xor_si256(c2[11688],_mm256_xor_si256(c2[12172],_mm256_xor_si256(c2[12676],_mm256_xor_si256(c2[10739],_mm256_xor_si256(c2[14610],_mm256_xor_si256(c2[15094],_mm256_xor_si256(c2[11242],_mm256_xor_si256(c2[9792],_mm256_xor_si256(c2[11250],_mm256_xor_si256(c2[6434],_mm256_xor_si256(c2[8362],_mm256_xor_si256(c2[8853],_mm256_xor_si256(c2[9337],_mm256_xor_si256(c2[11778],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[6456],_mm256_xor_si256(c2[6940],_mm256_xor_si256(c2[10827],_mm256_xor_si256(c2[10341],_mm256_xor_si256(c2[1152],_mm256_xor_si256(c2[1636],_mm256_xor_si256(c2[13267],_mm256_xor_si256(c2[12788],_mm256_xor_si256(c2[6977],_mm256_xor_si256(c2[1190],_mm256_xor_si256(c2[3617],_mm256_xor_si256(c2[8456],_mm256_xor_si256(c2[8940],_mm256_xor_si256(c2[4603],_mm256_xor_si256(c2[7512],_mm256_xor_si256(c2[14279],_mm256_xor_si256(c2[14763],_mm256_xor_si256(c2[13336],_mm256_xor_si256(c2[2686],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[753],_mm256_xor_si256(c2[8514],_mm256_xor_si256(c2[10942],_mm256_xor_si256(c2[9968],_mm256_xor_si256(c2[10452],_mm256_xor_si256(c2[10960],_mm256_xor_si256(c2[3215],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[792],_mm256_xor_si256(c2[14372],_mm256_xor_si256(c2[12915],_mm256_xor_si256(c2[2759],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[4710],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[838],_mm256_xor_si256(c2[1804],_mm256_xor_si256(c2[6675],_mm256_xor_si256(c2[5707],_mm256_xor_si256(c2[14894],_mm256_xor_si256(c2[15378],_mm256_xor_si256(c2[12015],_mm256_xor_si256(c2[5730],_mm256_xor_si256(c2[11053],_mm256_xor_si256(c2[11537],_mm256_xor_si256(c2[10561],_mm256_xor_si256(c2[7688],_mm256_xor_si256(c2[8651],_mm256_xor_si256(c2[13002],_mm256_xor_si256(c2[11578],_mm256_xor_si256(c2[7705],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[933],_mm256_xor_si256(c2[2398],_mm256_xor_si256(c2[11113],_mm256_xor_si256(c2[471],c2[955]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[473]=simde_mm256_xor_si256(c2[6779],simde_mm256_xor_si256(c2[10171],simde_mm256_xor_si256(c2[4841],simde_mm256_xor_si256(c2[3880],simde_mm256_xor_si256(c2[4364],simde_mm256_xor_si256(c2[14550],simde_mm256_xor_si256(c2[5352],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[2934],simde_mm256_xor_si256(c2[8740],simde_mm256_xor_si256(c2[7314],simde_mm256_xor_si256(c2[4401],simde_mm256_xor_si256(c2[14080],simde_mm256_xor_si256(c2[8782],simde_mm256_xor_si256(c2[3946],simde_mm256_xor_si256(c2[11688],simde_mm256_xor_si256(c2[12172],simde_mm256_xor_si256(c2[12676],simde_mm256_xor_si256(c2[10739],simde_mm256_xor_si256(c2[14610],simde_mm256_xor_si256(c2[15094],simde_mm256_xor_si256(c2[11242],simde_mm256_xor_si256(c2[9792],simde_mm256_xor_si256(c2[11250],simde_mm256_xor_si256(c2[6434],simde_mm256_xor_si256(c2[8362],simde_mm256_xor_si256(c2[8853],simde_mm256_xor_si256(c2[9337],simde_mm256_xor_si256(c2[11778],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[6456],simde_mm256_xor_si256(c2[6940],simde_mm256_xor_si256(c2[10827],simde_mm256_xor_si256(c2[10341],simde_mm256_xor_si256(c2[1152],simde_mm256_xor_si256(c2[1636],simde_mm256_xor_si256(c2[13267],simde_mm256_xor_si256(c2[12788],simde_mm256_xor_si256(c2[6977],simde_mm256_xor_si256(c2[1190],simde_mm256_xor_si256(c2[3617],simde_mm256_xor_si256(c2[8456],simde_mm256_xor_si256(c2[8940],simde_mm256_xor_si256(c2[4603],simde_mm256_xor_si256(c2[7512],simde_mm256_xor_si256(c2[14279],simde_mm256_xor_si256(c2[14763],simde_mm256_xor_si256(c2[13336],simde_mm256_xor_si256(c2[2686],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[753],simde_mm256_xor_si256(c2[8514],simde_mm256_xor_si256(c2[10942],simde_mm256_xor_si256(c2[9968],simde_mm256_xor_si256(c2[10452],simde_mm256_xor_si256(c2[10960],simde_mm256_xor_si256(c2[3215],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[792],simde_mm256_xor_si256(c2[14372],simde_mm256_xor_si256(c2[12915],simde_mm256_xor_si256(c2[2759],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[4710],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[838],simde_mm256_xor_si256(c2[1804],simde_mm256_xor_si256(c2[6675],simde_mm256_xor_si256(c2[5707],simde_mm256_xor_si256(c2[14894],simde_mm256_xor_si256(c2[15378],simde_mm256_xor_si256(c2[12015],simde_mm256_xor_si256(c2[5730],simde_mm256_xor_si256(c2[11053],simde_mm256_xor_si256(c2[11537],simde_mm256_xor_si256(c2[10561],simde_mm256_xor_si256(c2[7688],simde_mm256_xor_si256(c2[8651],simde_mm256_xor_si256(c2[13002],simde_mm256_xor_si256(c2[11578],simde_mm256_xor_si256(c2[7705],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[933],simde_mm256_xor_si256(c2[2398],simde_mm256_xor_si256(c2[11113],simde_mm256_xor_si256(c2[471],c2[955]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 44
-     d2[484]=_mm256_xor_si256(c2[10654],_mm256_xor_si256(c2[14046],_mm256_xor_si256(c2[8716],_mm256_xor_si256(c2[8228],_mm256_xor_si256(c2[7262],_mm256_xor_si256(c2[2927],_mm256_xor_si256(c2[9227],_mm256_xor_si256(c2[6798],_mm256_xor_si256(c2[11178],_mm256_xor_si256(c2[8276],_mm256_xor_si256(c2[2468],_mm256_xor_si256(c2[12657],_mm256_xor_si256(c2[7810],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[14614],_mm256_xor_si256(c2[3482],_mm256_xor_si256(c2[15117],_mm256_xor_si256(c2[13667],_mm256_xor_si256(c2[15114],_mm256_xor_si256(c2[10298],_mm256_xor_si256(c2[12237],_mm256_xor_si256(c2[13201],_mm256_xor_si256(c2[155],_mm256_xor_si256(c2[7899],_mm256_xor_si256(c2[10804],_mm256_xor_si256(c2[9841],_mm256_xor_si256(c2[14702],_mm256_xor_si256(c2[14216],_mm256_xor_si256(c2[5500],_mm256_xor_si256(c2[1655],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[10852],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[5065],_mm256_xor_si256(c2[7481],_mm256_xor_si256(c2[12804],_mm256_xor_si256(c2[8478],_mm256_xor_si256(c2[11376],_mm256_xor_si256(c2[3151],_mm256_xor_si256(c2[1724],_mm256_xor_si256(c2[6561],_mm256_xor_si256(c2[4628],_mm256_xor_si256(c2[12389],_mm256_xor_si256(c2[14806],_mm256_xor_si256(c2[14327],_mm256_xor_si256(c2[14835],_mm256_xor_si256(c2[7090],_mm256_xor_si256(c2[4667],_mm256_xor_si256(c2[2760],_mm256_xor_si256(c2[1303],_mm256_xor_si256(c2[6623],_mm256_xor_si256(c2[5684],_mm256_xor_si256(c2[8585],_mm256_xor_si256(c2[4713],_mm256_xor_si256(c2[10539],_mm256_xor_si256(c2[9571],_mm256_xor_si256(c2[3766],_mm256_xor_si256(c2[403],_mm256_xor_si256(c2[9594],_mm256_xor_si256(c2[15401],_mm256_xor_si256(c2[11552],_mm256_xor_si256(c2[12526],_mm256_xor_si256(c2[1390],_mm256_xor_si256(c2[15453],_mm256_xor_si256(c2[11580],_mm256_xor_si256(c2[4797],_mm256_xor_si256(c2[6273],_mm256_xor_si256(c2[14988],c2[4819])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[484]=simde_mm256_xor_si256(c2[10654],simde_mm256_xor_si256(c2[14046],simde_mm256_xor_si256(c2[8716],simde_mm256_xor_si256(c2[8228],simde_mm256_xor_si256(c2[7262],simde_mm256_xor_si256(c2[2927],simde_mm256_xor_si256(c2[9227],simde_mm256_xor_si256(c2[6798],simde_mm256_xor_si256(c2[11178],simde_mm256_xor_si256(c2[8276],simde_mm256_xor_si256(c2[2468],simde_mm256_xor_si256(c2[12657],simde_mm256_xor_si256(c2[7810],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[14614],simde_mm256_xor_si256(c2[3482],simde_mm256_xor_si256(c2[15117],simde_mm256_xor_si256(c2[13667],simde_mm256_xor_si256(c2[15114],simde_mm256_xor_si256(c2[10298],simde_mm256_xor_si256(c2[12237],simde_mm256_xor_si256(c2[13201],simde_mm256_xor_si256(c2[155],simde_mm256_xor_si256(c2[7899],simde_mm256_xor_si256(c2[10804],simde_mm256_xor_si256(c2[9841],simde_mm256_xor_si256(c2[14702],simde_mm256_xor_si256(c2[14216],simde_mm256_xor_si256(c2[5500],simde_mm256_xor_si256(c2[1655],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[10852],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[5065],simde_mm256_xor_si256(c2[7481],simde_mm256_xor_si256(c2[12804],simde_mm256_xor_si256(c2[8478],simde_mm256_xor_si256(c2[11376],simde_mm256_xor_si256(c2[3151],simde_mm256_xor_si256(c2[1724],simde_mm256_xor_si256(c2[6561],simde_mm256_xor_si256(c2[4628],simde_mm256_xor_si256(c2[12389],simde_mm256_xor_si256(c2[14806],simde_mm256_xor_si256(c2[14327],simde_mm256_xor_si256(c2[14835],simde_mm256_xor_si256(c2[7090],simde_mm256_xor_si256(c2[4667],simde_mm256_xor_si256(c2[2760],simde_mm256_xor_si256(c2[1303],simde_mm256_xor_si256(c2[6623],simde_mm256_xor_si256(c2[5684],simde_mm256_xor_si256(c2[8585],simde_mm256_xor_si256(c2[4713],simde_mm256_xor_si256(c2[10539],simde_mm256_xor_si256(c2[9571],simde_mm256_xor_si256(c2[3766],simde_mm256_xor_si256(c2[403],simde_mm256_xor_si256(c2[9594],simde_mm256_xor_si256(c2[15401],simde_mm256_xor_si256(c2[11552],simde_mm256_xor_si256(c2[12526],simde_mm256_xor_si256(c2[1390],simde_mm256_xor_si256(c2[15453],simde_mm256_xor_si256(c2[11580],simde_mm256_xor_si256(c2[4797],simde_mm256_xor_si256(c2[6273],simde_mm256_xor_si256(c2[14988],c2[4819])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 45
-     d2[495]=_mm256_xor_si256(c2[8736],_mm256_xor_si256(c2[1586],c2[5551]));
+     d2[495]=simde_mm256_xor_si256(c2[8736],simde_mm256_xor_si256(c2[1586],c2[5551]));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc384_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc384_byte.c
index 02c2db3295a98972695c111fde9d918350e76983..fa906bbefb1fa4e41e9c91c61c780c1004d81e5d 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc384_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc384_byte.c
@@ -11,141 +11,141 @@ static inline void ldpc384_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[10041],_mm256_xor_si256(c2[6338],_mm256_xor_si256(c2[6870],_mm256_xor_si256(c2[10568],_mm256_xor_si256(c2[10056],_mm256_xor_si256(c2[13759],_mm256_xor_si256(c2[12170],_mm256_xor_si256(c2[9553],_mm256_xor_si256(c2[6386],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[9059],_mm256_xor_si256(c2[4826],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[105],_mm256_xor_si256(c2[6442],_mm256_xor_si256(c2[10136],_mm256_xor_si256(c2[11213],_mm256_xor_si256(c2[8572],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[12822],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[3846],_mm256_xor_si256(c2[5986],_mm256_xor_si256(c2[5984],_mm256_xor_si256(c2[13372],_mm256_xor_si256(c2[6010],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[12865],_mm256_xor_si256(c2[15537],_mm256_xor_si256(c2[9725],_mm256_xor_si256(c2[16585],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[2356],_mm256_xor_si256(c2[7131],_mm256_xor_si256(c2[3969],_mm256_xor_si256(c2[9249],_mm256_xor_si256(c2[9264],_mm256_xor_si256(c2[11914],_mm256_xor_si256(c2[3991],_mm256_xor_si256(c2[2963],_mm256_xor_si256(c2[4542],_mm256_xor_si256(c2[11410],_mm256_xor_si256(c2[13542],_mm256_xor_si256(c2[13010],_mm256_xor_si256(c2[10902],_mm256_xor_si256(c2[12510],_mm256_xor_si256(c2[1947],_mm256_xor_si256(c2[11449],_mm256_xor_si256(c2[5667],_mm256_xor_si256(c2[1451],_mm256_xor_si256(c2[8841],_mm256_xor_si256(c2[9915],_mm256_xor_si256(c2[1996],_mm256_xor_si256(c2[6753],_mm256_xor_si256(c2[9943],_mm256_xor_si256(c2[8887],_mm256_xor_si256(c2[8360],_mm256_xor_si256(c2[11021],_mm256_xor_si256(c2[6274],_mm256_xor_si256(c2[7326],_mm256_xor_si256(c2[5770],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[4177],_mm256_xor_si256(c2[14242],_mm256_xor_si256(c2[8955],c2[3155]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[10041],simde_mm256_xor_si256(c2[6338],simde_mm256_xor_si256(c2[6870],simde_mm256_xor_si256(c2[10568],simde_mm256_xor_si256(c2[10056],simde_mm256_xor_si256(c2[13759],simde_mm256_xor_si256(c2[12170],simde_mm256_xor_si256(c2[9553],simde_mm256_xor_si256(c2[6386],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[9059],simde_mm256_xor_si256(c2[4826],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[105],simde_mm256_xor_si256(c2[6442],simde_mm256_xor_si256(c2[10136],simde_mm256_xor_si256(c2[11213],simde_mm256_xor_si256(c2[8572],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[12822],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[3846],simde_mm256_xor_si256(c2[5986],simde_mm256_xor_si256(c2[5984],simde_mm256_xor_si256(c2[13372],simde_mm256_xor_si256(c2[6010],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[12865],simde_mm256_xor_si256(c2[15537],simde_mm256_xor_si256(c2[9725],simde_mm256_xor_si256(c2[16585],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[2356],simde_mm256_xor_si256(c2[7131],simde_mm256_xor_si256(c2[3969],simde_mm256_xor_si256(c2[9249],simde_mm256_xor_si256(c2[9264],simde_mm256_xor_si256(c2[11914],simde_mm256_xor_si256(c2[3991],simde_mm256_xor_si256(c2[2963],simde_mm256_xor_si256(c2[4542],simde_mm256_xor_si256(c2[11410],simde_mm256_xor_si256(c2[13542],simde_mm256_xor_si256(c2[13010],simde_mm256_xor_si256(c2[10902],simde_mm256_xor_si256(c2[12510],simde_mm256_xor_si256(c2[1947],simde_mm256_xor_si256(c2[11449],simde_mm256_xor_si256(c2[5667],simde_mm256_xor_si256(c2[1451],simde_mm256_xor_si256(c2[8841],simde_mm256_xor_si256(c2[9915],simde_mm256_xor_si256(c2[1996],simde_mm256_xor_si256(c2[6753],simde_mm256_xor_si256(c2[9943],simde_mm256_xor_si256(c2[8887],simde_mm256_xor_si256(c2[8360],simde_mm256_xor_si256(c2[11021],simde_mm256_xor_si256(c2[6274],simde_mm256_xor_si256(c2[7326],simde_mm256_xor_si256(c2[5770],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[4177],simde_mm256_xor_si256(c2[14242],simde_mm256_xor_si256(c2[8955],c2[3155]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 1
-     d2[12]=_mm256_xor_si256(c2[10041],_mm256_xor_si256(c2[10569],_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[7398],_mm256_xor_si256(c2[11096],_mm256_xor_si256(c2[10056],_mm256_xor_si256(c2[10584],_mm256_xor_si256(c2[14287],_mm256_xor_si256(c2[12698],_mm256_xor_si256(c2[9553],_mm256_xor_si256(c2[10081],_mm256_xor_si256(c2[6914],_mm256_xor_si256(c2[4810],_mm256_xor_si256(c2[9059],_mm256_xor_si256(c2[9587],_mm256_xor_si256(c2[5354],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[6970],_mm256_xor_si256(c2[10664],_mm256_xor_si256(c2[11213],_mm256_xor_si256(c2[11741],_mm256_xor_si256(c2[9100],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[12822],_mm256_xor_si256(c2[13350],_mm256_xor_si256(c2[1205],_mm256_xor_si256(c2[4374],_mm256_xor_si256(c2[6514],_mm256_xor_si256(c2[6512],_mm256_xor_si256(c2[13900],_mm256_xor_si256(c2[6538],_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[13393],_mm256_xor_si256(c2[15537],_mm256_xor_si256(c2[16065],_mm256_xor_si256(c2[10253],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[7131],_mm256_xor_si256(c2[7659],_mm256_xor_si256(c2[4497],_mm256_xor_si256(c2[9777],_mm256_xor_si256(c2[9264],_mm256_xor_si256(c2[9792],_mm256_xor_si256(c2[12442],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[2963],_mm256_xor_si256(c2[3491],_mm256_xor_si256(c2[5070],_mm256_xor_si256(c2[11938],_mm256_xor_si256(c2[14070],_mm256_xor_si256(c2[13538],_mm256_xor_si256(c2[11430],_mm256_xor_si256(c2[12510],_mm256_xor_si256(c2[13038],_mm256_xor_si256(c2[2475],_mm256_xor_si256(c2[11977],_mm256_xor_si256(c2[5667],_mm256_xor_si256(c2[6195],_mm256_xor_si256(c2[1979],_mm256_xor_si256(c2[9369],_mm256_xor_si256(c2[10443],_mm256_xor_si256(c2[2524],_mm256_xor_si256(c2[7281],_mm256_xor_si256(c2[9943],_mm256_xor_si256(c2[10471],_mm256_xor_si256(c2[9415],_mm256_xor_si256(c2[8888],_mm256_xor_si256(c2[11021],_mm256_xor_si256(c2[11549],_mm256_xor_si256(c2[6802],_mm256_xor_si256(c2[7854],_mm256_xor_si256(c2[5770],_mm256_xor_si256(c2[6298],_mm256_xor_si256(c2[7872],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[14242],_mm256_xor_si256(c2[14770],_mm256_xor_si256(c2[9483],c2[3683])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[10041],simde_mm256_xor_si256(c2[10569],simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[7398],simde_mm256_xor_si256(c2[11096],simde_mm256_xor_si256(c2[10056],simde_mm256_xor_si256(c2[10584],simde_mm256_xor_si256(c2[14287],simde_mm256_xor_si256(c2[12698],simde_mm256_xor_si256(c2[9553],simde_mm256_xor_si256(c2[10081],simde_mm256_xor_si256(c2[6914],simde_mm256_xor_si256(c2[4810],simde_mm256_xor_si256(c2[9059],simde_mm256_xor_si256(c2[9587],simde_mm256_xor_si256(c2[5354],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[6970],simde_mm256_xor_si256(c2[10664],simde_mm256_xor_si256(c2[11213],simde_mm256_xor_si256(c2[11741],simde_mm256_xor_si256(c2[9100],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[12822],simde_mm256_xor_si256(c2[13350],simde_mm256_xor_si256(c2[1205],simde_mm256_xor_si256(c2[4374],simde_mm256_xor_si256(c2[6514],simde_mm256_xor_si256(c2[6512],simde_mm256_xor_si256(c2[13900],simde_mm256_xor_si256(c2[6538],simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[13393],simde_mm256_xor_si256(c2[15537],simde_mm256_xor_si256(c2[16065],simde_mm256_xor_si256(c2[10253],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[7131],simde_mm256_xor_si256(c2[7659],simde_mm256_xor_si256(c2[4497],simde_mm256_xor_si256(c2[9777],simde_mm256_xor_si256(c2[9264],simde_mm256_xor_si256(c2[9792],simde_mm256_xor_si256(c2[12442],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[2963],simde_mm256_xor_si256(c2[3491],simde_mm256_xor_si256(c2[5070],simde_mm256_xor_si256(c2[11938],simde_mm256_xor_si256(c2[14070],simde_mm256_xor_si256(c2[13538],simde_mm256_xor_si256(c2[11430],simde_mm256_xor_si256(c2[12510],simde_mm256_xor_si256(c2[13038],simde_mm256_xor_si256(c2[2475],simde_mm256_xor_si256(c2[11977],simde_mm256_xor_si256(c2[5667],simde_mm256_xor_si256(c2[6195],simde_mm256_xor_si256(c2[1979],simde_mm256_xor_si256(c2[9369],simde_mm256_xor_si256(c2[10443],simde_mm256_xor_si256(c2[2524],simde_mm256_xor_si256(c2[7281],simde_mm256_xor_si256(c2[9943],simde_mm256_xor_si256(c2[10471],simde_mm256_xor_si256(c2[9415],simde_mm256_xor_si256(c2[8888],simde_mm256_xor_si256(c2[11021],simde_mm256_xor_si256(c2[11549],simde_mm256_xor_si256(c2[6802],simde_mm256_xor_si256(c2[7854],simde_mm256_xor_si256(c2[5770],simde_mm256_xor_si256(c2[6298],simde_mm256_xor_si256(c2[7872],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[14242],simde_mm256_xor_si256(c2[14770],simde_mm256_xor_si256(c2[9483],c2[3683])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[24]=_mm256_xor_si256(c2[10569],_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[6870],_mm256_xor_si256(c2[7398],_mm256_xor_si256(c2[10568],_mm256_xor_si256(c2[11096],_mm256_xor_si256(c2[10584],_mm256_xor_si256(c2[13759],_mm256_xor_si256(c2[14287],_mm256_xor_si256(c2[12170],_mm256_xor_si256(c2[12698],_mm256_xor_si256(c2[10081],_mm256_xor_si256(c2[6914],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[4810],_mm256_xor_si256(c2[9587],_mm256_xor_si256(c2[5354],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[6442],_mm256_xor_si256(c2[6970],_mm256_xor_si256(c2[10136],_mm256_xor_si256(c2[10664],_mm256_xor_si256(c2[11741],_mm256_xor_si256(c2[9100],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[13350],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[1205],_mm256_xor_si256(c2[3846],_mm256_xor_si256(c2[4374],_mm256_xor_si256(c2[6514],_mm256_xor_si256(c2[5984],_mm256_xor_si256(c2[6512],_mm256_xor_si256(c2[13372],_mm256_xor_si256(c2[13900],_mm256_xor_si256(c2[6538],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[12865],_mm256_xor_si256(c2[13393],_mm256_xor_si256(c2[16065],_mm256_xor_si256(c2[10253],_mm256_xor_si256(c2[16585],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[2356],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[7659],_mm256_xor_si256(c2[4497],_mm256_xor_si256(c2[9249],_mm256_xor_si256(c2[9777],_mm256_xor_si256(c2[9792],_mm256_xor_si256(c2[12442],_mm256_xor_si256(c2[3991],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[3491],_mm256_xor_si256(c2[4542],_mm256_xor_si256(c2[5070],_mm256_xor_si256(c2[11410],_mm256_xor_si256(c2[11938],_mm256_xor_si256(c2[14070],_mm256_xor_si256(c2[13010],_mm256_xor_si256(c2[13538],_mm256_xor_si256(c2[10902],_mm256_xor_si256(c2[11430],_mm256_xor_si256(c2[13038],_mm256_xor_si256(c2[2475],_mm256_xor_si256(c2[11449],_mm256_xor_si256(c2[11977],_mm256_xor_si256(c2[6195],_mm256_xor_si256(c2[1979],_mm256_xor_si256(c2[8841],_mm256_xor_si256(c2[9369],_mm256_xor_si256(c2[10443],_mm256_xor_si256(c2[1996],_mm256_xor_si256(c2[2524],_mm256_xor_si256(c2[6753],_mm256_xor_si256(c2[7281],_mm256_xor_si256(c2[10471],_mm256_xor_si256(c2[8887],_mm256_xor_si256(c2[9415],_mm256_xor_si256(c2[8360],_mm256_xor_si256(c2[8888],_mm256_xor_si256(c2[11549],_mm256_xor_si256(c2[6802],_mm256_xor_si256(c2[7326],_mm256_xor_si256(c2[7854],_mm256_xor_si256(c2[6298],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[7872],_mm256_xor_si256(c2[4177],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[14770],_mm256_xor_si256(c2[9483],_mm256_xor_si256(c2[3155],c2[3683]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[10569],simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[6870],simde_mm256_xor_si256(c2[7398],simde_mm256_xor_si256(c2[10568],simde_mm256_xor_si256(c2[11096],simde_mm256_xor_si256(c2[10584],simde_mm256_xor_si256(c2[13759],simde_mm256_xor_si256(c2[14287],simde_mm256_xor_si256(c2[12170],simde_mm256_xor_si256(c2[12698],simde_mm256_xor_si256(c2[10081],simde_mm256_xor_si256(c2[6914],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[4810],simde_mm256_xor_si256(c2[9587],simde_mm256_xor_si256(c2[5354],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[6442],simde_mm256_xor_si256(c2[6970],simde_mm256_xor_si256(c2[10136],simde_mm256_xor_si256(c2[10664],simde_mm256_xor_si256(c2[11741],simde_mm256_xor_si256(c2[9100],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[13350],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[1205],simde_mm256_xor_si256(c2[3846],simde_mm256_xor_si256(c2[4374],simde_mm256_xor_si256(c2[6514],simde_mm256_xor_si256(c2[5984],simde_mm256_xor_si256(c2[6512],simde_mm256_xor_si256(c2[13372],simde_mm256_xor_si256(c2[13900],simde_mm256_xor_si256(c2[6538],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[12865],simde_mm256_xor_si256(c2[13393],simde_mm256_xor_si256(c2[16065],simde_mm256_xor_si256(c2[10253],simde_mm256_xor_si256(c2[16585],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[2356],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[7659],simde_mm256_xor_si256(c2[4497],simde_mm256_xor_si256(c2[9249],simde_mm256_xor_si256(c2[9777],simde_mm256_xor_si256(c2[9792],simde_mm256_xor_si256(c2[12442],simde_mm256_xor_si256(c2[3991],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[3491],simde_mm256_xor_si256(c2[4542],simde_mm256_xor_si256(c2[5070],simde_mm256_xor_si256(c2[11410],simde_mm256_xor_si256(c2[11938],simde_mm256_xor_si256(c2[14070],simde_mm256_xor_si256(c2[13010],simde_mm256_xor_si256(c2[13538],simde_mm256_xor_si256(c2[10902],simde_mm256_xor_si256(c2[11430],simde_mm256_xor_si256(c2[13038],simde_mm256_xor_si256(c2[2475],simde_mm256_xor_si256(c2[11449],simde_mm256_xor_si256(c2[11977],simde_mm256_xor_si256(c2[6195],simde_mm256_xor_si256(c2[1979],simde_mm256_xor_si256(c2[8841],simde_mm256_xor_si256(c2[9369],simde_mm256_xor_si256(c2[10443],simde_mm256_xor_si256(c2[1996],simde_mm256_xor_si256(c2[2524],simde_mm256_xor_si256(c2[6753],simde_mm256_xor_si256(c2[7281],simde_mm256_xor_si256(c2[10471],simde_mm256_xor_si256(c2[8887],simde_mm256_xor_si256(c2[9415],simde_mm256_xor_si256(c2[8360],simde_mm256_xor_si256(c2[8888],simde_mm256_xor_si256(c2[11549],simde_mm256_xor_si256(c2[6802],simde_mm256_xor_si256(c2[7326],simde_mm256_xor_si256(c2[7854],simde_mm256_xor_si256(c2[6298],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[7872],simde_mm256_xor_si256(c2[4177],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[14770],simde_mm256_xor_si256(c2[9483],simde_mm256_xor_si256(c2[3155],c2[3683]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[36]=_mm256_xor_si256(c2[10569],_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[7398],_mm256_xor_si256(c2[10568],_mm256_xor_si256(c2[11096],_mm256_xor_si256(c2[10584],_mm256_xor_si256(c2[14287],_mm256_xor_si256(c2[12170],_mm256_xor_si256(c2[12698],_mm256_xor_si256(c2[10081],_mm256_xor_si256(c2[6914],_mm256_xor_si256(c2[4810],_mm256_xor_si256(c2[9587],_mm256_xor_si256(c2[5354],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[6970],_mm256_xor_si256(c2[10136],_mm256_xor_si256(c2[10664],_mm256_xor_si256(c2[11741],_mm256_xor_si256(c2[9100],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[13350],_mm256_xor_si256(c2[1205],_mm256_xor_si256(c2[3846],_mm256_xor_si256(c2[4374],_mm256_xor_si256(c2[6514],_mm256_xor_si256(c2[6512],_mm256_xor_si256(c2[13372],_mm256_xor_si256(c2[13900],_mm256_xor_si256(c2[6538],_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[12865],_mm256_xor_si256(c2[13393],_mm256_xor_si256(c2[16065],_mm256_xor_si256(c2[10253],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[2356],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[7659],_mm256_xor_si256(c2[4497],_mm256_xor_si256(c2[9249],_mm256_xor_si256(c2[9777],_mm256_xor_si256(c2[9792],_mm256_xor_si256(c2[12442],_mm256_xor_si256(c2[3991],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[3491],_mm256_xor_si256(c2[5070],_mm256_xor_si256(c2[11410],_mm256_xor_si256(c2[11938],_mm256_xor_si256(c2[14070],_mm256_xor_si256(c2[13538],_mm256_xor_si256(c2[10902],_mm256_xor_si256(c2[11430],_mm256_xor_si256(c2[13038],_mm256_xor_si256(c2[2475],_mm256_xor_si256(c2[11977],_mm256_xor_si256(c2[6195],_mm256_xor_si256(c2[1979],_mm256_xor_si256(c2[8841],_mm256_xor_si256(c2[9369],_mm256_xor_si256(c2[10443],_mm256_xor_si256(c2[2524],_mm256_xor_si256(c2[6753],_mm256_xor_si256(c2[7281],_mm256_xor_si256(c2[10471],_mm256_xor_si256(c2[9415],_mm256_xor_si256(c2[8360],_mm256_xor_si256(c2[8888],_mm256_xor_si256(c2[11549],_mm256_xor_si256(c2[6802],_mm256_xor_si256(c2[7854],_mm256_xor_si256(c2[6298],_mm256_xor_si256(c2[7872],_mm256_xor_si256(c2[4177],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[14770],_mm256_xor_si256(c2[9483],_mm256_xor_si256(c2[3155],c2[3683])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[36]=simde_mm256_xor_si256(c2[10569],simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[7398],simde_mm256_xor_si256(c2[10568],simde_mm256_xor_si256(c2[11096],simde_mm256_xor_si256(c2[10584],simde_mm256_xor_si256(c2[14287],simde_mm256_xor_si256(c2[12170],simde_mm256_xor_si256(c2[12698],simde_mm256_xor_si256(c2[10081],simde_mm256_xor_si256(c2[6914],simde_mm256_xor_si256(c2[4810],simde_mm256_xor_si256(c2[9587],simde_mm256_xor_si256(c2[5354],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[6970],simde_mm256_xor_si256(c2[10136],simde_mm256_xor_si256(c2[10664],simde_mm256_xor_si256(c2[11741],simde_mm256_xor_si256(c2[9100],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[13350],simde_mm256_xor_si256(c2[1205],simde_mm256_xor_si256(c2[3846],simde_mm256_xor_si256(c2[4374],simde_mm256_xor_si256(c2[6514],simde_mm256_xor_si256(c2[6512],simde_mm256_xor_si256(c2[13372],simde_mm256_xor_si256(c2[13900],simde_mm256_xor_si256(c2[6538],simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[12865],simde_mm256_xor_si256(c2[13393],simde_mm256_xor_si256(c2[16065],simde_mm256_xor_si256(c2[10253],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[2356],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[7659],simde_mm256_xor_si256(c2[4497],simde_mm256_xor_si256(c2[9249],simde_mm256_xor_si256(c2[9777],simde_mm256_xor_si256(c2[9792],simde_mm256_xor_si256(c2[12442],simde_mm256_xor_si256(c2[3991],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[3491],simde_mm256_xor_si256(c2[5070],simde_mm256_xor_si256(c2[11410],simde_mm256_xor_si256(c2[11938],simde_mm256_xor_si256(c2[14070],simde_mm256_xor_si256(c2[13538],simde_mm256_xor_si256(c2[10902],simde_mm256_xor_si256(c2[11430],simde_mm256_xor_si256(c2[13038],simde_mm256_xor_si256(c2[2475],simde_mm256_xor_si256(c2[11977],simde_mm256_xor_si256(c2[6195],simde_mm256_xor_si256(c2[1979],simde_mm256_xor_si256(c2[8841],simde_mm256_xor_si256(c2[9369],simde_mm256_xor_si256(c2[10443],simde_mm256_xor_si256(c2[2524],simde_mm256_xor_si256(c2[6753],simde_mm256_xor_si256(c2[7281],simde_mm256_xor_si256(c2[10471],simde_mm256_xor_si256(c2[9415],simde_mm256_xor_si256(c2[8360],simde_mm256_xor_si256(c2[8888],simde_mm256_xor_si256(c2[11549],simde_mm256_xor_si256(c2[6802],simde_mm256_xor_si256(c2[7854],simde_mm256_xor_si256(c2[6298],simde_mm256_xor_si256(c2[7872],simde_mm256_xor_si256(c2[4177],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[14770],simde_mm256_xor_si256(c2[9483],simde_mm256_xor_si256(c2[3155],c2[3683])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[48]=_mm256_xor_si256(c2[6346],c2[11117]);
+     d2[48]=simde_mm256_xor_si256(c2[6346],c2[11117]);
 
 //row: 5
-     d2[60]=_mm256_xor_si256(c2[8450],_mm256_xor_si256(c2[4759],_mm256_xor_si256(c2[5291],_mm256_xor_si256(c2[8977],_mm256_xor_si256(c2[1590],_mm256_xor_si256(c2[8477],_mm256_xor_si256(c2[12168],_mm256_xor_si256(c2[10591],_mm256_xor_si256(c2[7416],_mm256_xor_si256(c2[7974],_mm256_xor_si256(c2[4807],_mm256_xor_si256(c2[2691],_mm256_xor_si256(c2[7468],_mm256_xor_si256(c2[3247],_mm256_xor_si256(c2[15388],_mm256_xor_si256(c2[10107],_mm256_xor_si256(c2[15409],_mm256_xor_si256(c2[4851],_mm256_xor_si256(c2[8545],_mm256_xor_si256(c2[9634],_mm256_xor_si256(c2[6993],_mm256_xor_si256(c2[15432],_mm256_xor_si256(c2[11243],_mm256_xor_si256(c2[15993],_mm256_xor_si256(c2[2267],_mm256_xor_si256(c2[4395],_mm256_xor_si256(c2[4393],_mm256_xor_si256(c2[11793],_mm256_xor_si256(c2[4419],_mm256_xor_si256(c2[15513],_mm256_xor_si256(c2[11286],_mm256_xor_si256(c2[13946],_mm256_xor_si256(c2[8146],_mm256_xor_si256(c2[15006],_mm256_xor_si256(c2[15553],_mm256_xor_si256(c2[16088],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[5552],_mm256_xor_si256(c2[2378],_mm256_xor_si256(c2[7658],_mm256_xor_si256(c2[7685],_mm256_xor_si256(c2[10323],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[3461],_mm256_xor_si256(c2[1372],_mm256_xor_si256(c2[2963],_mm256_xor_si256(c2[9819],_mm256_xor_si256(c2[11963],_mm256_xor_si256(c2[11431],_mm256_xor_si256(c2[9323],_mm256_xor_si256(c2[10931],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[9870],_mm256_xor_si256(c2[4088],_mm256_xor_si256(c2[16755],_mm256_xor_si256(c2[7250],_mm256_xor_si256(c2[9367],_mm256_xor_si256(c2[8336],_mm256_xor_si256(c2[417],_mm256_xor_si256(c2[5162],_mm256_xor_si256(c2[8352],_mm256_xor_si256(c2[7296],_mm256_xor_si256(c2[6769],_mm256_xor_si256(c2[9442],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[5747],_mm256_xor_si256(c2[4179],_mm256_xor_si256(c2[5765],_mm256_xor_si256(c2[2598],_mm256_xor_si256(c2[12651],_mm256_xor_si256(c2[7376],_mm256_xor_si256(c2[1564],c2[10537]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[60]=simde_mm256_xor_si256(c2[8450],simde_mm256_xor_si256(c2[4759],simde_mm256_xor_si256(c2[5291],simde_mm256_xor_si256(c2[8977],simde_mm256_xor_si256(c2[1590],simde_mm256_xor_si256(c2[8477],simde_mm256_xor_si256(c2[12168],simde_mm256_xor_si256(c2[10591],simde_mm256_xor_si256(c2[7416],simde_mm256_xor_si256(c2[7974],simde_mm256_xor_si256(c2[4807],simde_mm256_xor_si256(c2[2691],simde_mm256_xor_si256(c2[7468],simde_mm256_xor_si256(c2[3247],simde_mm256_xor_si256(c2[15388],simde_mm256_xor_si256(c2[10107],simde_mm256_xor_si256(c2[15409],simde_mm256_xor_si256(c2[4851],simde_mm256_xor_si256(c2[8545],simde_mm256_xor_si256(c2[9634],simde_mm256_xor_si256(c2[6993],simde_mm256_xor_si256(c2[15432],simde_mm256_xor_si256(c2[11243],simde_mm256_xor_si256(c2[15993],simde_mm256_xor_si256(c2[2267],simde_mm256_xor_si256(c2[4395],simde_mm256_xor_si256(c2[4393],simde_mm256_xor_si256(c2[11793],simde_mm256_xor_si256(c2[4419],simde_mm256_xor_si256(c2[15513],simde_mm256_xor_si256(c2[11286],simde_mm256_xor_si256(c2[13946],simde_mm256_xor_si256(c2[8146],simde_mm256_xor_si256(c2[15006],simde_mm256_xor_si256(c2[15553],simde_mm256_xor_si256(c2[16088],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[5552],simde_mm256_xor_si256(c2[2378],simde_mm256_xor_si256(c2[7658],simde_mm256_xor_si256(c2[7685],simde_mm256_xor_si256(c2[10323],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[3461],simde_mm256_xor_si256(c2[1372],simde_mm256_xor_si256(c2[2963],simde_mm256_xor_si256(c2[9819],simde_mm256_xor_si256(c2[11963],simde_mm256_xor_si256(c2[11431],simde_mm256_xor_si256(c2[9323],simde_mm256_xor_si256(c2[10931],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[9870],simde_mm256_xor_si256(c2[4088],simde_mm256_xor_si256(c2[16755],simde_mm256_xor_si256(c2[7250],simde_mm256_xor_si256(c2[9367],simde_mm256_xor_si256(c2[8336],simde_mm256_xor_si256(c2[417],simde_mm256_xor_si256(c2[5162],simde_mm256_xor_si256(c2[8352],simde_mm256_xor_si256(c2[7296],simde_mm256_xor_si256(c2[6769],simde_mm256_xor_si256(c2[9442],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[5747],simde_mm256_xor_si256(c2[4179],simde_mm256_xor_si256(c2[5765],simde_mm256_xor_si256(c2[2598],simde_mm256_xor_si256(c2[12651],simde_mm256_xor_si256(c2[7376],simde_mm256_xor_si256(c2[1564],c2[10537]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[72]=_mm256_xor_si256(c2[11624],_mm256_xor_si256(c2[680],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[16642],_mm256_xor_si256(c2[15098],_mm256_xor_si256(c2[15727],_mm256_xor_si256(c2[9936],c2[1015])))))));
+     d2[72]=simde_mm256_xor_si256(c2[11624],simde_mm256_xor_si256(c2[680],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[16642],simde_mm256_xor_si256(c2[15098],simde_mm256_xor_si256(c2[15727],simde_mm256_xor_si256(c2[9936],c2[1015])))))));
 
 //row: 7
-     d2[84]=_mm256_xor_si256(c2[4752],_mm256_xor_si256(c2[15865],_mm256_xor_si256(c2[14889],_mm256_xor_si256(c2[7042],_mm256_xor_si256(c2[1257],c2[9843])))));
+     d2[84]=simde_mm256_xor_si256(c2[4752],simde_mm256_xor_si256(c2[15865],simde_mm256_xor_si256(c2[14889],simde_mm256_xor_si256(c2[7042],simde_mm256_xor_si256(c2[1257],c2[9843])))));
 
 //row: 8
-     d2[96]=_mm256_xor_si256(c2[11627],_mm256_xor_si256(c2[15842],_mm256_xor_si256(c2[7924],_mm256_xor_si256(c2[12151],_mm256_xor_si256(c2[8456],_mm256_xor_si256(c2[12155],_mm256_xor_si256(c2[12683],_mm256_xor_si256(c2[12154],_mm256_xor_si256(c2[15841],_mm256_xor_si256(c2[16369],_mm256_xor_si256(c2[10041],_mm256_xor_si256(c2[11642],_mm256_xor_si256(c2[15869],_mm256_xor_si256(c2[15345],_mm256_xor_si256(c2[2137],_mm256_xor_si256(c2[2665],_mm256_xor_si256(c2[13756],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[1088],_mm256_xor_si256(c2[10061],_mm256_xor_si256(c2[11139],_mm256_xor_si256(c2[15366],_mm256_xor_si256(c2[7972],_mm256_xor_si256(c2[12199],_mm256_xor_si256(c2[5856],_mm256_xor_si256(c2[9555],_mm256_xor_si256(c2[10083],_mm256_xor_si256(c2[10633],_mm256_xor_si256(c2[14860],_mm256_xor_si256(c2[6412],_mm256_xor_si256(c2[10639],_mm256_xor_si256(c2[1658],_mm256_xor_si256(c2[5357],_mm256_xor_si256(c2[5885],_mm256_xor_si256(c2[2717],_mm256_xor_si256(c2[1691],_mm256_xor_si256(c2[5906],_mm256_xor_si256(c2[8016],_mm256_xor_si256(c2[11715],_mm256_xor_si256(c2[12243],_mm256_xor_si256(c2[11722],_mm256_xor_si256(c2[15409],_mm256_xor_si256(c2[15937],_mm256_xor_si256(c2[12799],_mm256_xor_si256(c2[131],_mm256_xor_si256(c2[10158],_mm256_xor_si256(c2[14385],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[5401],_mm256_xor_si256(c2[5929],_mm256_xor_si256(c2[14408],_mm256_xor_si256(c2[1728],_mm256_xor_si256(c2[2263],_mm256_xor_si256(c2[5962],_mm256_xor_si256(c2[6490],_mm256_xor_si256(c2[5432],_mm256_xor_si256(c2[9131],_mm256_xor_si256(c2[9659],_mm256_xor_si256(c2[7560],_mm256_xor_si256(c2[11787],_mm256_xor_si256(c2[7570],_mm256_xor_si256(c2[11257],_mm256_xor_si256(c2[11785],_mm256_xor_si256(c2[14958],_mm256_xor_si256(c2[1762],_mm256_xor_si256(c2[2290],_mm256_xor_si256(c2[7584],_mm256_xor_si256(c2[11811],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[5482],_mm256_xor_si256(c2[6010],_mm256_xor_si256(c2[14451],_mm256_xor_si256(c2[1255],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[4443],_mm256_xor_si256(c2[11311],_mm256_xor_si256(c2[15538],_mm256_xor_si256(c2[1276],_mm256_xor_si256(c2[4975],_mm256_xor_si256(c2[5503],_mm256_xor_si256(c2[1835],_mm256_xor_si256(c2[6050],_mm256_xor_si256(c2[2358],_mm256_xor_si256(c2[6057],_mm256_xor_si256(c2[6585],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[7641],_mm256_xor_si256(c2[8169],_mm256_xor_si256(c2[8717],_mm256_xor_si256(c2[12944],_mm256_xor_si256(c2[5555],_mm256_xor_si256(c2[9770],_mm256_xor_si256(c2[10835],_mm256_xor_si256(c2[14522],_mm256_xor_si256(c2[15050],_mm256_xor_si256(c2[10850],_mm256_xor_si256(c2[15077],_mm256_xor_si256(c2[13488],_mm256_xor_si256(c2[820],_mm256_xor_si256(c2[5577],_mm256_xor_si256(c2[9264],_mm256_xor_si256(c2[9792],_mm256_xor_si256(c2[9792],_mm256_xor_si256(c2[4537],_mm256_xor_si256(c2[8764],_mm256_xor_si256(c2[6128],_mm256_xor_si256(c2[9827],_mm256_xor_si256(c2[10355],_mm256_xor_si256(c2[12984],_mm256_xor_si256(c2[16683],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[15128],_mm256_xor_si256(c2[2448],_mm256_xor_si256(c2[14596],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[1928],_mm256_xor_si256(c2[12488],_mm256_xor_si256(c2[16187],_mm256_xor_si256(c2[16715],_mm256_xor_si256(c2[14096],_mm256_xor_si256(c2[1416],_mm256_xor_si256(c2[3533],_mm256_xor_si256(c2[7760],_mm256_xor_si256(c2[13035],_mm256_xor_si256(c2[16734],_mm256_xor_si256(c2[367],_mm256_xor_si256(c2[7253],_mm256_xor_si256(c2[11480],_mm256_xor_si256(c2[3025],_mm256_xor_si256(c2[7252],_mm256_xor_si256(c2[10427],_mm256_xor_si256(c2[14114],_mm256_xor_si256(c2[14642],_mm256_xor_si256(c2[4081],_mm256_xor_si256(c2[11501],_mm256_xor_si256(c2[15728],_mm256_xor_si256(c2[3582],_mm256_xor_si256(c2[7281],_mm256_xor_si256(c2[7809],_mm256_xor_si256(c2[8339],_mm256_xor_si256(c2[12026],_mm256_xor_si256(c2[12554],_mm256_xor_si256(c2[11529],_mm256_xor_si256(c2[15744],_mm256_xor_si256(c2[10473],_mm256_xor_si256(c2[14160],_mm256_xor_si256(c2[14688],_mm256_xor_si256(c2[9946],_mm256_xor_si256(c2[13633],_mm256_xor_si256(c2[14161],_mm256_xor_si256(c2[12607],_mm256_xor_si256(c2[16834],_mm256_xor_si256(c2[7848],_mm256_xor_si256(c2[12075],_mm256_xor_si256(c2[8912],_mm256_xor_si256(c2[12611],_mm256_xor_si256(c2[13139],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[11571],_mm256_xor_si256(c2[8930],_mm256_xor_si256(c2[12629],_mm256_xor_si256(c2[13157],_mm256_xor_si256(c2[5763],_mm256_xor_si256(c2[9462],_mm256_xor_si256(c2[9990],_mm256_xor_si256(c2[15816],_mm256_xor_si256(c2[3148],_mm256_xor_si256(c2[10541],_mm256_xor_si256(c2[14768],_mm256_xor_si256(c2[4729],_mm256_xor_si256(c2[8428],_mm256_xor_si256(c2[8956],c2[8963]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[96]=simde_mm256_xor_si256(c2[11627],simde_mm256_xor_si256(c2[15842],simde_mm256_xor_si256(c2[7924],simde_mm256_xor_si256(c2[12151],simde_mm256_xor_si256(c2[8456],simde_mm256_xor_si256(c2[12155],simde_mm256_xor_si256(c2[12683],simde_mm256_xor_si256(c2[12154],simde_mm256_xor_si256(c2[15841],simde_mm256_xor_si256(c2[16369],simde_mm256_xor_si256(c2[10041],simde_mm256_xor_si256(c2[11642],simde_mm256_xor_si256(c2[15869],simde_mm256_xor_si256(c2[15345],simde_mm256_xor_si256(c2[2137],simde_mm256_xor_si256(c2[2665],simde_mm256_xor_si256(c2[13756],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[1088],simde_mm256_xor_si256(c2[10061],simde_mm256_xor_si256(c2[11139],simde_mm256_xor_si256(c2[15366],simde_mm256_xor_si256(c2[7972],simde_mm256_xor_si256(c2[12199],simde_mm256_xor_si256(c2[5856],simde_mm256_xor_si256(c2[9555],simde_mm256_xor_si256(c2[10083],simde_mm256_xor_si256(c2[10633],simde_mm256_xor_si256(c2[14860],simde_mm256_xor_si256(c2[6412],simde_mm256_xor_si256(c2[10639],simde_mm256_xor_si256(c2[1658],simde_mm256_xor_si256(c2[5357],simde_mm256_xor_si256(c2[5885],simde_mm256_xor_si256(c2[2717],simde_mm256_xor_si256(c2[1691],simde_mm256_xor_si256(c2[5906],simde_mm256_xor_si256(c2[8016],simde_mm256_xor_si256(c2[11715],simde_mm256_xor_si256(c2[12243],simde_mm256_xor_si256(c2[11722],simde_mm256_xor_si256(c2[15409],simde_mm256_xor_si256(c2[15937],simde_mm256_xor_si256(c2[12799],simde_mm256_xor_si256(c2[131],simde_mm256_xor_si256(c2[10158],simde_mm256_xor_si256(c2[14385],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[5401],simde_mm256_xor_si256(c2[5929],simde_mm256_xor_si256(c2[14408],simde_mm256_xor_si256(c2[1728],simde_mm256_xor_si256(c2[2263],simde_mm256_xor_si256(c2[5962],simde_mm256_xor_si256(c2[6490],simde_mm256_xor_si256(c2[5432],simde_mm256_xor_si256(c2[9131],simde_mm256_xor_si256(c2[9659],simde_mm256_xor_si256(c2[7560],simde_mm256_xor_si256(c2[11787],simde_mm256_xor_si256(c2[7570],simde_mm256_xor_si256(c2[11257],simde_mm256_xor_si256(c2[11785],simde_mm256_xor_si256(c2[14958],simde_mm256_xor_si256(c2[1762],simde_mm256_xor_si256(c2[2290],simde_mm256_xor_si256(c2[7584],simde_mm256_xor_si256(c2[11811],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[5482],simde_mm256_xor_si256(c2[6010],simde_mm256_xor_si256(c2[14451],simde_mm256_xor_si256(c2[1255],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[4443],simde_mm256_xor_si256(c2[11311],simde_mm256_xor_si256(c2[15538],simde_mm256_xor_si256(c2[1276],simde_mm256_xor_si256(c2[4975],simde_mm256_xor_si256(c2[5503],simde_mm256_xor_si256(c2[1835],simde_mm256_xor_si256(c2[6050],simde_mm256_xor_si256(c2[2358],simde_mm256_xor_si256(c2[6057],simde_mm256_xor_si256(c2[6585],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[7641],simde_mm256_xor_si256(c2[8169],simde_mm256_xor_si256(c2[8717],simde_mm256_xor_si256(c2[12944],simde_mm256_xor_si256(c2[5555],simde_mm256_xor_si256(c2[9770],simde_mm256_xor_si256(c2[10835],simde_mm256_xor_si256(c2[14522],simde_mm256_xor_si256(c2[15050],simde_mm256_xor_si256(c2[10850],simde_mm256_xor_si256(c2[15077],simde_mm256_xor_si256(c2[13488],simde_mm256_xor_si256(c2[820],simde_mm256_xor_si256(c2[5577],simde_mm256_xor_si256(c2[9264],simde_mm256_xor_si256(c2[9792],simde_mm256_xor_si256(c2[9792],simde_mm256_xor_si256(c2[4537],simde_mm256_xor_si256(c2[8764],simde_mm256_xor_si256(c2[6128],simde_mm256_xor_si256(c2[9827],simde_mm256_xor_si256(c2[10355],simde_mm256_xor_si256(c2[12984],simde_mm256_xor_si256(c2[16683],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[15128],simde_mm256_xor_si256(c2[2448],simde_mm256_xor_si256(c2[14596],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[1928],simde_mm256_xor_si256(c2[12488],simde_mm256_xor_si256(c2[16187],simde_mm256_xor_si256(c2[16715],simde_mm256_xor_si256(c2[14096],simde_mm256_xor_si256(c2[1416],simde_mm256_xor_si256(c2[3533],simde_mm256_xor_si256(c2[7760],simde_mm256_xor_si256(c2[13035],simde_mm256_xor_si256(c2[16734],simde_mm256_xor_si256(c2[367],simde_mm256_xor_si256(c2[7253],simde_mm256_xor_si256(c2[11480],simde_mm256_xor_si256(c2[3025],simde_mm256_xor_si256(c2[7252],simde_mm256_xor_si256(c2[10427],simde_mm256_xor_si256(c2[14114],simde_mm256_xor_si256(c2[14642],simde_mm256_xor_si256(c2[4081],simde_mm256_xor_si256(c2[11501],simde_mm256_xor_si256(c2[15728],simde_mm256_xor_si256(c2[3582],simde_mm256_xor_si256(c2[7281],simde_mm256_xor_si256(c2[7809],simde_mm256_xor_si256(c2[8339],simde_mm256_xor_si256(c2[12026],simde_mm256_xor_si256(c2[12554],simde_mm256_xor_si256(c2[11529],simde_mm256_xor_si256(c2[15744],simde_mm256_xor_si256(c2[10473],simde_mm256_xor_si256(c2[14160],simde_mm256_xor_si256(c2[14688],simde_mm256_xor_si256(c2[9946],simde_mm256_xor_si256(c2[13633],simde_mm256_xor_si256(c2[14161],simde_mm256_xor_si256(c2[12607],simde_mm256_xor_si256(c2[16834],simde_mm256_xor_si256(c2[7848],simde_mm256_xor_si256(c2[12075],simde_mm256_xor_si256(c2[8912],simde_mm256_xor_si256(c2[12611],simde_mm256_xor_si256(c2[13139],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[11571],simde_mm256_xor_si256(c2[8930],simde_mm256_xor_si256(c2[12629],simde_mm256_xor_si256(c2[13157],simde_mm256_xor_si256(c2[5763],simde_mm256_xor_si256(c2[9462],simde_mm256_xor_si256(c2[9990],simde_mm256_xor_si256(c2[15816],simde_mm256_xor_si256(c2[3148],simde_mm256_xor_si256(c2[10541],simde_mm256_xor_si256(c2[14768],simde_mm256_xor_si256(c2[4729],simde_mm256_xor_si256(c2[8428],simde_mm256_xor_si256(c2[8956],c2[8963]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[108]=_mm256_xor_si256(c2[7403],_mm256_xor_si256(c2[4255],_mm256_xor_si256(c2[778],_mm256_xor_si256(c2[2908],_mm256_xor_si256(c2[13513],_mm256_xor_si256(c2[8337],_mm256_xor_si256(c2[16801],c2[9986])))))));
+     d2[108]=simde_mm256_xor_si256(c2[7403],simde_mm256_xor_si256(c2[4255],simde_mm256_xor_si256(c2[778],simde_mm256_xor_si256(c2[2908],simde_mm256_xor_si256(c2[13513],simde_mm256_xor_si256(c2[8337],simde_mm256_xor_si256(c2[16801],c2[9986])))))));
 
 //row: 10
-     d2[120]=_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[10090],_mm256_xor_si256(c2[9608],_mm256_xor_si256(c2[8091],_mm256_xor_si256(c2[16571],c2[1403])))));
+     d2[120]=simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[10090],simde_mm256_xor_si256(c2[9608],simde_mm256_xor_si256(c2[8091],simde_mm256_xor_si256(c2[16571],c2[1403])))));
 
 //row: 11
-     d2[132]=_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[3169],_mm256_xor_si256(c2[3697],_mm256_xor_si256(c2[13728],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[14260],_mm256_xor_si256(c2[538],_mm256_xor_si256(c2[1063],_mm256_xor_si256(c2[4224],_mm256_xor_si256(c2[8449],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[3196],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[4254],_mm256_xor_si256(c2[7427],_mm256_xor_si256(c2[2665],_mm256_xor_si256(c2[5838],_mm256_xor_si256(c2[3195],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[2693],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[13776],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[11672],_mm256_xor_si256(c2[14833],_mm256_xor_si256(c2[16449],_mm256_xor_si256(c2[2187],_mm256_xor_si256(c2[2715],_mm256_xor_si256(c2[12216],_mm256_xor_si256(c2[15389],_mm256_xor_si256(c2[7474],_mm256_xor_si256(c2[10635],_mm256_xor_si256(c2[7495],_mm256_xor_si256(c2[10656],_mm256_xor_si256(c2[13832],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[631],_mm256_xor_si256(c2[3792],_mm256_xor_si256(c2[1708],_mm256_xor_si256(c2[4353],_mm256_xor_si256(c2[4881],_mm256_xor_si256(c2[15962],_mm256_xor_si256(c2[2240],_mm256_xor_si256(c2[7518],_mm256_xor_si256(c2[10691],_mm256_xor_si256(c2[3317],_mm256_xor_si256(c2[5962],_mm256_xor_si256(c2[6490],_mm256_xor_si256(c2[8067],_mm256_xor_si256(c2[11240],_mm256_xor_si256(c2[11236],_mm256_xor_si256(c2[14409],_mm256_xor_si256(c2[13376],_mm256_xor_si256(c2[16537],_mm256_xor_si256(c2[13374],_mm256_xor_si256(c2[16547],_mm256_xor_si256(c2[3867],_mm256_xor_si256(c2[7040],_mm256_xor_si256(c2[13400],_mm256_xor_si256(c2[16561],_mm256_xor_si256(c2[7587],_mm256_xor_si256(c2[10760],_mm256_xor_si256(c2[3360],_mm256_xor_si256(c2[6533],_mm256_xor_si256(c2[6032],_mm256_xor_si256(c2[8665],_mm256_xor_si256(c2[9193],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[3393],_mm256_xor_si256(c2[7080],_mm256_xor_si256(c2[10253],_mm256_xor_si256(c2[7639],_mm256_xor_si256(c2[10272],_mm256_xor_si256(c2[10800],_mm256_xor_si256(c2[8162],_mm256_xor_si256(c2[11335],_mm256_xor_si256(c2[9746],_mm256_xor_si256(c2[12919],_mm256_xor_si256(c2[14521],_mm256_xor_si256(c2[271],_mm256_xor_si256(c2[799],_mm256_xor_si256(c2[11359],_mm256_xor_si256(c2[14520],_mm256_xor_si256(c2[16639],_mm256_xor_si256(c2[2905],_mm256_xor_si256(c2[16666],_mm256_xor_si256(c2[2404],_mm256_xor_si256(c2[2932],_mm256_xor_si256(c2[2409],_mm256_xor_si256(c2[5570],_mm256_xor_si256(c2[11381],_mm256_xor_si256(c2[14554],_mm256_xor_si256(c2[4512],_mm256_xor_si256(c2[10353],_mm256_xor_si256(c2[12986],_mm256_xor_si256(c2[13514],_mm256_xor_si256(c2[11932],_mm256_xor_si256(c2[15105],_mm256_xor_si256(c2[1905],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[7210],_mm256_xor_si256(c2[3505],_mm256_xor_si256(c2[6678],_mm256_xor_si256(c2[1397],_mm256_xor_si256(c2[4570],_mm256_xor_si256(c2[3005],_mm256_xor_si256(c2[5650],_mm256_xor_si256(c2[6178],_mm256_xor_si256(c2[9337],_mm256_xor_si256(c2[12510],_mm256_xor_si256(c2[1944],_mm256_xor_si256(c2[5117],_mm256_xor_si256(c2[13057],_mm256_xor_si256(c2[15702],_mm256_xor_si256(c2[16230],_mm256_xor_si256(c2[8841],_mm256_xor_si256(c2[12002],_mm256_xor_si256(c2[16231],_mm256_xor_si256(c2[2497],_mm256_xor_si256(c2[8305],_mm256_xor_si256(c2[410],_mm256_xor_si256(c2[3583],_mm256_xor_si256(c2[9386],_mm256_xor_si256(c2[12559],_mm256_xor_si256(c2[14143],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[438],_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[3611],_mm256_xor_si256(c2[16277],_mm256_xor_si256(c2[2555],_mm256_xor_si256(c2[15750],_mm256_xor_si256(c2[2016],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[4689],_mm256_xor_si256(c2[13664],_mm256_xor_si256(c2[16825],_mm256_xor_si256(c2[14716],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[13160],_mm256_xor_si256(c2[15793],_mm256_xor_si256(c2[16321],_mm256_xor_si256(c2[14746],_mm256_xor_si256(c2[1012],_mm256_xor_si256(c2[11579],_mm256_xor_si256(c2[14740],_mm256_xor_si256(c2[4737],_mm256_xor_si256(c2[7370],_mm256_xor_si256(c2[7898],_mm256_xor_si256(c2[16345],_mm256_xor_si256(c2[2623],_mm256_xor_si256(c2[10545],_mm256_xor_si256(c2[13706],c2[15293])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[132]=simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[3169],simde_mm256_xor_si256(c2[3697],simde_mm256_xor_si256(c2[13728],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[14260],simde_mm256_xor_si256(c2[538],simde_mm256_xor_si256(c2[1063],simde_mm256_xor_si256(c2[4224],simde_mm256_xor_si256(c2[8449],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[3196],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[4254],simde_mm256_xor_si256(c2[7427],simde_mm256_xor_si256(c2[2665],simde_mm256_xor_si256(c2[5838],simde_mm256_xor_si256(c2[3195],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[2693],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[13776],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[11672],simde_mm256_xor_si256(c2[14833],simde_mm256_xor_si256(c2[16449],simde_mm256_xor_si256(c2[2187],simde_mm256_xor_si256(c2[2715],simde_mm256_xor_si256(c2[12216],simde_mm256_xor_si256(c2[15389],simde_mm256_xor_si256(c2[7474],simde_mm256_xor_si256(c2[10635],simde_mm256_xor_si256(c2[7495],simde_mm256_xor_si256(c2[10656],simde_mm256_xor_si256(c2[13832],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[631],simde_mm256_xor_si256(c2[3792],simde_mm256_xor_si256(c2[1708],simde_mm256_xor_si256(c2[4353],simde_mm256_xor_si256(c2[4881],simde_mm256_xor_si256(c2[15962],simde_mm256_xor_si256(c2[2240],simde_mm256_xor_si256(c2[7518],simde_mm256_xor_si256(c2[10691],simde_mm256_xor_si256(c2[3317],simde_mm256_xor_si256(c2[5962],simde_mm256_xor_si256(c2[6490],simde_mm256_xor_si256(c2[8067],simde_mm256_xor_si256(c2[11240],simde_mm256_xor_si256(c2[11236],simde_mm256_xor_si256(c2[14409],simde_mm256_xor_si256(c2[13376],simde_mm256_xor_si256(c2[16537],simde_mm256_xor_si256(c2[13374],simde_mm256_xor_si256(c2[16547],simde_mm256_xor_si256(c2[3867],simde_mm256_xor_si256(c2[7040],simde_mm256_xor_si256(c2[13400],simde_mm256_xor_si256(c2[16561],simde_mm256_xor_si256(c2[7587],simde_mm256_xor_si256(c2[10760],simde_mm256_xor_si256(c2[3360],simde_mm256_xor_si256(c2[6533],simde_mm256_xor_si256(c2[6032],simde_mm256_xor_si256(c2[8665],simde_mm256_xor_si256(c2[9193],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[3393],simde_mm256_xor_si256(c2[7080],simde_mm256_xor_si256(c2[10253],simde_mm256_xor_si256(c2[7639],simde_mm256_xor_si256(c2[10272],simde_mm256_xor_si256(c2[10800],simde_mm256_xor_si256(c2[8162],simde_mm256_xor_si256(c2[11335],simde_mm256_xor_si256(c2[9746],simde_mm256_xor_si256(c2[12919],simde_mm256_xor_si256(c2[14521],simde_mm256_xor_si256(c2[271],simde_mm256_xor_si256(c2[799],simde_mm256_xor_si256(c2[11359],simde_mm256_xor_si256(c2[14520],simde_mm256_xor_si256(c2[16639],simde_mm256_xor_si256(c2[2905],simde_mm256_xor_si256(c2[16666],simde_mm256_xor_si256(c2[2404],simde_mm256_xor_si256(c2[2932],simde_mm256_xor_si256(c2[2409],simde_mm256_xor_si256(c2[5570],simde_mm256_xor_si256(c2[11381],simde_mm256_xor_si256(c2[14554],simde_mm256_xor_si256(c2[4512],simde_mm256_xor_si256(c2[10353],simde_mm256_xor_si256(c2[12986],simde_mm256_xor_si256(c2[13514],simde_mm256_xor_si256(c2[11932],simde_mm256_xor_si256(c2[15105],simde_mm256_xor_si256(c2[1905],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[7210],simde_mm256_xor_si256(c2[3505],simde_mm256_xor_si256(c2[6678],simde_mm256_xor_si256(c2[1397],simde_mm256_xor_si256(c2[4570],simde_mm256_xor_si256(c2[3005],simde_mm256_xor_si256(c2[5650],simde_mm256_xor_si256(c2[6178],simde_mm256_xor_si256(c2[9337],simde_mm256_xor_si256(c2[12510],simde_mm256_xor_si256(c2[1944],simde_mm256_xor_si256(c2[5117],simde_mm256_xor_si256(c2[13057],simde_mm256_xor_si256(c2[15702],simde_mm256_xor_si256(c2[16230],simde_mm256_xor_si256(c2[8841],simde_mm256_xor_si256(c2[12002],simde_mm256_xor_si256(c2[16231],simde_mm256_xor_si256(c2[2497],simde_mm256_xor_si256(c2[8305],simde_mm256_xor_si256(c2[410],simde_mm256_xor_si256(c2[3583],simde_mm256_xor_si256(c2[9386],simde_mm256_xor_si256(c2[12559],simde_mm256_xor_si256(c2[14143],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[438],simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[3611],simde_mm256_xor_si256(c2[16277],simde_mm256_xor_si256(c2[2555],simde_mm256_xor_si256(c2[15750],simde_mm256_xor_si256(c2[2016],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[4689],simde_mm256_xor_si256(c2[13664],simde_mm256_xor_si256(c2[16825],simde_mm256_xor_si256(c2[14716],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[13160],simde_mm256_xor_si256(c2[15793],simde_mm256_xor_si256(c2[16321],simde_mm256_xor_si256(c2[14746],simde_mm256_xor_si256(c2[1012],simde_mm256_xor_si256(c2[11579],simde_mm256_xor_si256(c2[14740],simde_mm256_xor_si256(c2[4737],simde_mm256_xor_si256(c2[7370],simde_mm256_xor_si256(c2[7898],simde_mm256_xor_si256(c2[16345],simde_mm256_xor_si256(c2[2623],simde_mm256_xor_si256(c2[10545],simde_mm256_xor_si256(c2[13706],c2[15293])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[144]=_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[13757],_mm256_xor_si256(c2[7637],_mm256_xor_si256(c2[4495],_mm256_xor_si256(c2[9817],c2[5714])))));
+     d2[144]=simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[13757],simde_mm256_xor_si256(c2[7637],simde_mm256_xor_si256(c2[4495],simde_mm256_xor_si256(c2[9817],c2[5714])))));
 
 //row: 13
-     d2[156]=_mm256_xor_si256(c2[2649],_mm256_xor_si256(c2[3177],_mm256_xor_si256(c2[16369],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[3704],_mm256_xor_si256(c2[13209],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[3192],_mm256_xor_si256(c2[6895],_mm256_xor_si256(c2[5306],_mm256_xor_si256(c2[2161],_mm256_xor_si256(c2[2689],_mm256_xor_si256(c2[16417],_mm256_xor_si256(c2[14313],_mm256_xor_si256(c2[1667],_mm256_xor_si256(c2[2195],_mm256_xor_si256(c2[14857],_mm256_xor_si256(c2[10115],_mm256_xor_si256(c2[9053],_mm256_xor_si256(c2[10136],_mm256_xor_si256(c2[16473],_mm256_xor_si256(c2[3272],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[4349],_mm256_xor_si256(c2[1708],_mm256_xor_si256(c2[10159],_mm256_xor_si256(c2[5430],_mm256_xor_si256(c2[5958],_mm256_xor_si256(c2[10708],_mm256_xor_si256(c2[13877],_mm256_xor_si256(c2[16017],_mm256_xor_si256(c2[16015],_mm256_xor_si256(c2[6508],_mm256_xor_si256(c2[5456],_mm256_xor_si256(c2[16041],_mm256_xor_si256(c2[10228],_mm256_xor_si256(c2[6001],_mm256_xor_si256(c2[8145],_mm256_xor_si256(c2[8673],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[9721],_mm256_xor_si256(c2[9752],_mm256_xor_si256(c2[10280],_mm256_xor_si256(c2[10803],_mm256_xor_si256(c2[12387],_mm256_xor_si256(c2[16634],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[14000],_mm256_xor_si256(c2[2385],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[5050],_mm256_xor_si256(c2[14022],_mm256_xor_si256(c2[12466],_mm256_xor_si256(c2[12994],_mm256_xor_si256(c2[14573],_mm256_xor_si256(c2[4546],_mm256_xor_si256(c2[6678],_mm256_xor_si256(c2[6146],_mm256_xor_si256(c2[4038],_mm256_xor_si256(c2[5118],_mm256_xor_si256(c2[5646],_mm256_xor_si256(c2[11978],_mm256_xor_si256(c2[4585],_mm256_xor_si256(c2[15170],_mm256_xor_si256(c2[15698],_mm256_xor_si256(c2[11482],_mm256_xor_si256(c2[1977],_mm256_xor_si256(c2[3051],_mm256_xor_si256(c2[12027],_mm256_xor_si256(c2[16784],_mm256_xor_si256(c2[2551],_mm256_xor_si256(c2[3079],_mm256_xor_si256(c2[2023],_mm256_xor_si256(c2[1496],_mm256_xor_si256(c2[3629],_mm256_xor_si256(c2[4157],_mm256_xor_si256(c2[16305],_mm256_xor_si256(c2[462],_mm256_xor_si256(c2[15273],_mm256_xor_si256(c2[15801],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[14208],_mm256_xor_si256(c2[10515],_mm256_xor_si256(c2[6850],_mm256_xor_si256(c2[7378],_mm256_xor_si256(c2[2091],c2[13186])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[156]=simde_mm256_xor_si256(c2[2649],simde_mm256_xor_si256(c2[3177],simde_mm256_xor_si256(c2[16369],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[3704],simde_mm256_xor_si256(c2[13209],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[3192],simde_mm256_xor_si256(c2[6895],simde_mm256_xor_si256(c2[5306],simde_mm256_xor_si256(c2[2161],simde_mm256_xor_si256(c2[2689],simde_mm256_xor_si256(c2[16417],simde_mm256_xor_si256(c2[14313],simde_mm256_xor_si256(c2[1667],simde_mm256_xor_si256(c2[2195],simde_mm256_xor_si256(c2[14857],simde_mm256_xor_si256(c2[10115],simde_mm256_xor_si256(c2[9053],simde_mm256_xor_si256(c2[10136],simde_mm256_xor_si256(c2[16473],simde_mm256_xor_si256(c2[3272],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[4349],simde_mm256_xor_si256(c2[1708],simde_mm256_xor_si256(c2[10159],simde_mm256_xor_si256(c2[5430],simde_mm256_xor_si256(c2[5958],simde_mm256_xor_si256(c2[10708],simde_mm256_xor_si256(c2[13877],simde_mm256_xor_si256(c2[16017],simde_mm256_xor_si256(c2[16015],simde_mm256_xor_si256(c2[6508],simde_mm256_xor_si256(c2[5456],simde_mm256_xor_si256(c2[16041],simde_mm256_xor_si256(c2[10228],simde_mm256_xor_si256(c2[6001],simde_mm256_xor_si256(c2[8145],simde_mm256_xor_si256(c2[8673],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[9721],simde_mm256_xor_si256(c2[9752],simde_mm256_xor_si256(c2[10280],simde_mm256_xor_si256(c2[10803],simde_mm256_xor_si256(c2[12387],simde_mm256_xor_si256(c2[16634],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[14000],simde_mm256_xor_si256(c2[2385],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[5050],simde_mm256_xor_si256(c2[14022],simde_mm256_xor_si256(c2[12466],simde_mm256_xor_si256(c2[12994],simde_mm256_xor_si256(c2[14573],simde_mm256_xor_si256(c2[4546],simde_mm256_xor_si256(c2[6678],simde_mm256_xor_si256(c2[6146],simde_mm256_xor_si256(c2[4038],simde_mm256_xor_si256(c2[5118],simde_mm256_xor_si256(c2[5646],simde_mm256_xor_si256(c2[11978],simde_mm256_xor_si256(c2[4585],simde_mm256_xor_si256(c2[15170],simde_mm256_xor_si256(c2[15698],simde_mm256_xor_si256(c2[11482],simde_mm256_xor_si256(c2[1977],simde_mm256_xor_si256(c2[3051],simde_mm256_xor_si256(c2[12027],simde_mm256_xor_si256(c2[16784],simde_mm256_xor_si256(c2[2551],simde_mm256_xor_si256(c2[3079],simde_mm256_xor_si256(c2[2023],simde_mm256_xor_si256(c2[1496],simde_mm256_xor_si256(c2[3629],simde_mm256_xor_si256(c2[4157],simde_mm256_xor_si256(c2[16305],simde_mm256_xor_si256(c2[462],simde_mm256_xor_si256(c2[15273],simde_mm256_xor_si256(c2[15801],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[14208],simde_mm256_xor_si256(c2[10515],simde_mm256_xor_si256(c2[6850],simde_mm256_xor_si256(c2[7378],simde_mm256_xor_si256(c2[2091],c2[13186])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[168]=_mm256_xor_si256(c2[7396],_mm256_xor_si256(c2[12967],_mm256_xor_si256(c2[5116],_mm256_xor_si256(c2[13586],_mm256_xor_si256(c2[14674],c2[6840])))));
+     d2[168]=simde_mm256_xor_si256(c2[7396],simde_mm256_xor_si256(c2[12967],simde_mm256_xor_si256(c2[5116],simde_mm256_xor_si256(c2[13586],simde_mm256_xor_si256(c2[14674],c2[6840])))));
 
 //row: 15
-     d2[180]=_mm256_xor_si256(c2[534],_mm256_xor_si256(c2[13738],_mm256_xor_si256(c2[14258],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[1061],_mm256_xor_si256(c2[8983],_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[4252],_mm256_xor_si256(c2[2147],_mm256_xor_si256(c2[2675],_mm256_xor_si256(c2[1080],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[13786],_mm256_xor_si256(c2[11670],_mm256_xor_si256(c2[16447],_mm256_xor_si256(c2[12226],_mm256_xor_si256(c2[6944],_mm256_xor_si256(c2[7472],_mm256_xor_si256(c2[7493],_mm256_xor_si256(c2[13830],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[1706],_mm256_xor_si256(c2[15960],_mm256_xor_si256(c2[7516],_mm256_xor_si256(c2[3315],_mm256_xor_si256(c2[8065],_mm256_xor_si256(c2[10706],_mm256_xor_si256(c2[11234],_mm256_xor_si256(c2[13374],_mm256_xor_si256(c2[13372],_mm256_xor_si256(c2[3337],_mm256_xor_si256(c2[3865],_mm256_xor_si256(c2[13398],_mm256_xor_si256(c2[7585],_mm256_xor_si256(c2[2842],_mm256_xor_si256(c2[3370],_mm256_xor_si256(c2[6030],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[7090],_mm256_xor_si256(c2[7637],_mm256_xor_si256(c2[8160],_mm256_xor_si256(c2[9216],_mm256_xor_si256(c2[9744],_mm256_xor_si256(c2[9750],_mm256_xor_si256(c2[14531],_mm256_xor_si256(c2[11357],_mm256_xor_si256(c2[16109],_mm256_xor_si256(c2[16637],_mm256_xor_si256(c2[16664],_mm256_xor_si256(c2[2407],_mm256_xor_si256(c2[10851],_mm256_xor_si256(c2[11379],_mm256_xor_si256(c2[10351],_mm256_xor_si256(c2[11930],_mm256_xor_si256(c2[1375],_mm256_xor_si256(c2[1903],_mm256_xor_si256(c2[16161],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[3515],_mm256_xor_si256(c2[867],_mm256_xor_si256(c2[1395],_mm256_xor_si256(c2[3003],_mm256_xor_si256(c2[9347],_mm256_xor_si256(c2[1954],_mm256_xor_si256(c2[13067],_mm256_xor_si256(c2[8839],_mm256_xor_si256(c2[15701],_mm256_xor_si256(c2[16229],_mm256_xor_si256(c2[408],_mm256_xor_si256(c2[9384],_mm256_xor_si256(c2[13613],_mm256_xor_si256(c2[14141],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[16275],_mm256_xor_si256(c2[15220],_mm256_xor_si256(c2[15748],_mm256_xor_si256(c2[12577],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[13662],_mm256_xor_si256(c2[14714],_mm256_xor_si256(c2[13158],_mm256_xor_si256(c2[14744],_mm256_xor_si256(c2[11049],_mm256_xor_si256(c2[11577],_mm256_xor_si256(c2[4735],_mm256_xor_si256(c2[16355],_mm256_xor_si256(c2[10015],c2[10543]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[180]=simde_mm256_xor_si256(c2[534],simde_mm256_xor_si256(c2[13738],simde_mm256_xor_si256(c2[14258],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[1061],simde_mm256_xor_si256(c2[8983],simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[4252],simde_mm256_xor_si256(c2[2147],simde_mm256_xor_si256(c2[2675],simde_mm256_xor_si256(c2[1080],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[13786],simde_mm256_xor_si256(c2[11670],simde_mm256_xor_si256(c2[16447],simde_mm256_xor_si256(c2[12226],simde_mm256_xor_si256(c2[6944],simde_mm256_xor_si256(c2[7472],simde_mm256_xor_si256(c2[7493],simde_mm256_xor_si256(c2[13830],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[1706],simde_mm256_xor_si256(c2[15960],simde_mm256_xor_si256(c2[7516],simde_mm256_xor_si256(c2[3315],simde_mm256_xor_si256(c2[8065],simde_mm256_xor_si256(c2[10706],simde_mm256_xor_si256(c2[11234],simde_mm256_xor_si256(c2[13374],simde_mm256_xor_si256(c2[13372],simde_mm256_xor_si256(c2[3337],simde_mm256_xor_si256(c2[3865],simde_mm256_xor_si256(c2[13398],simde_mm256_xor_si256(c2[7585],simde_mm256_xor_si256(c2[2842],simde_mm256_xor_si256(c2[3370],simde_mm256_xor_si256(c2[6030],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[7090],simde_mm256_xor_si256(c2[7637],simde_mm256_xor_si256(c2[8160],simde_mm256_xor_si256(c2[9216],simde_mm256_xor_si256(c2[9744],simde_mm256_xor_si256(c2[9750],simde_mm256_xor_si256(c2[14531],simde_mm256_xor_si256(c2[11357],simde_mm256_xor_si256(c2[16109],simde_mm256_xor_si256(c2[16637],simde_mm256_xor_si256(c2[16664],simde_mm256_xor_si256(c2[2407],simde_mm256_xor_si256(c2[10851],simde_mm256_xor_si256(c2[11379],simde_mm256_xor_si256(c2[10351],simde_mm256_xor_si256(c2[11930],simde_mm256_xor_si256(c2[1375],simde_mm256_xor_si256(c2[1903],simde_mm256_xor_si256(c2[16161],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[3515],simde_mm256_xor_si256(c2[867],simde_mm256_xor_si256(c2[1395],simde_mm256_xor_si256(c2[3003],simde_mm256_xor_si256(c2[9347],simde_mm256_xor_si256(c2[1954],simde_mm256_xor_si256(c2[13067],simde_mm256_xor_si256(c2[8839],simde_mm256_xor_si256(c2[15701],simde_mm256_xor_si256(c2[16229],simde_mm256_xor_si256(c2[408],simde_mm256_xor_si256(c2[9384],simde_mm256_xor_si256(c2[13613],simde_mm256_xor_si256(c2[14141],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[16275],simde_mm256_xor_si256(c2[15220],simde_mm256_xor_si256(c2[15748],simde_mm256_xor_si256(c2[12577],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[13662],simde_mm256_xor_si256(c2[14714],simde_mm256_xor_si256(c2[13158],simde_mm256_xor_si256(c2[14744],simde_mm256_xor_si256(c2[11049],simde_mm256_xor_si256(c2[11577],simde_mm256_xor_si256(c2[4735],simde_mm256_xor_si256(c2[16355],simde_mm256_xor_si256(c2[10015],c2[10543]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[192]=_mm256_xor_si256(c2[6347],_mm256_xor_si256(c2[2644],_mm256_xor_si256(c2[3176],_mm256_xor_si256(c2[6874],_mm256_xor_si256(c2[6362],_mm256_xor_si256(c2[10065],_mm256_xor_si256(c2[8476],_mm256_xor_si256(c2[6888],_mm256_xor_si256(c2[5859],_mm256_xor_si256(c2[2692],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[5353],_mm256_xor_si256(c2[1132],_mm256_xor_si256(c2[13273],_mm256_xor_si256(c2[9586],_mm256_xor_si256(c2[13306],_mm256_xor_si256(c2[2736],_mm256_xor_si256(c2[6442],_mm256_xor_si256(c2[7519],_mm256_xor_si256(c2[4878],_mm256_xor_si256(c2[13329],_mm256_xor_si256(c2[9128],_mm256_xor_si256(c2[13878],_mm256_xor_si256(c2[152],_mm256_xor_si256(c2[2280],_mm256_xor_si256(c2[2290],_mm256_xor_si256(c2[9678],_mm256_xor_si256(c2[2304],_mm256_xor_si256(c2[13398],_mm256_xor_si256(c2[9171],_mm256_xor_si256(c2[11843],_mm256_xor_si256(c2[6031],_mm256_xor_si256(c2[12891],_mm256_xor_si256(c2[13450],_mm256_xor_si256(c2[13973],_mm256_xor_si256(c2[15557],_mm256_xor_si256(c2[3437],_mm256_xor_si256(c2[275],_mm256_xor_si256(c2[5555],_mm256_xor_si256(c2[13465],_mm256_xor_si256(c2[5570],_mm256_xor_si256(c2[8208],_mm256_xor_si256(c2[297],_mm256_xor_si256(c2[16152],_mm256_xor_si256(c2[848],_mm256_xor_si256(c2[7704],_mm256_xor_si256(c2[9848],_mm256_xor_si256(c2[9316],_mm256_xor_si256(c2[7208],_mm256_xor_si256(c2[8816],_mm256_xor_si256(c2[15148],_mm256_xor_si256(c2[7755],_mm256_xor_si256(c2[1973],_mm256_xor_si256(c2[14640],_mm256_xor_si256(c2[5147],_mm256_xor_si256(c2[6221],_mm256_xor_si256(c2[15197],_mm256_xor_si256(c2[3059],_mm256_xor_si256(c2[6249],_mm256_xor_si256(c2[5193],_mm256_xor_si256(c2[4666],_mm256_xor_si256(c2[7327],_mm256_xor_si256(c2[2568],_mm256_xor_si256(c2[3632],_mm256_xor_si256(c2[2064],_mm256_xor_si256(c2[3650],_mm256_xor_si256(c2[483],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[10536],_mm256_xor_si256(c2[5261],c2[16344]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[192]=simde_mm256_xor_si256(c2[6347],simde_mm256_xor_si256(c2[2644],simde_mm256_xor_si256(c2[3176],simde_mm256_xor_si256(c2[6874],simde_mm256_xor_si256(c2[6362],simde_mm256_xor_si256(c2[10065],simde_mm256_xor_si256(c2[8476],simde_mm256_xor_si256(c2[6888],simde_mm256_xor_si256(c2[5859],simde_mm256_xor_si256(c2[2692],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[5353],simde_mm256_xor_si256(c2[1132],simde_mm256_xor_si256(c2[13273],simde_mm256_xor_si256(c2[9586],simde_mm256_xor_si256(c2[13306],simde_mm256_xor_si256(c2[2736],simde_mm256_xor_si256(c2[6442],simde_mm256_xor_si256(c2[7519],simde_mm256_xor_si256(c2[4878],simde_mm256_xor_si256(c2[13329],simde_mm256_xor_si256(c2[9128],simde_mm256_xor_si256(c2[13878],simde_mm256_xor_si256(c2[152],simde_mm256_xor_si256(c2[2280],simde_mm256_xor_si256(c2[2290],simde_mm256_xor_si256(c2[9678],simde_mm256_xor_si256(c2[2304],simde_mm256_xor_si256(c2[13398],simde_mm256_xor_si256(c2[9171],simde_mm256_xor_si256(c2[11843],simde_mm256_xor_si256(c2[6031],simde_mm256_xor_si256(c2[12891],simde_mm256_xor_si256(c2[13450],simde_mm256_xor_si256(c2[13973],simde_mm256_xor_si256(c2[15557],simde_mm256_xor_si256(c2[3437],simde_mm256_xor_si256(c2[275],simde_mm256_xor_si256(c2[5555],simde_mm256_xor_si256(c2[13465],simde_mm256_xor_si256(c2[5570],simde_mm256_xor_si256(c2[8208],simde_mm256_xor_si256(c2[297],simde_mm256_xor_si256(c2[16152],simde_mm256_xor_si256(c2[848],simde_mm256_xor_si256(c2[7704],simde_mm256_xor_si256(c2[9848],simde_mm256_xor_si256(c2[9316],simde_mm256_xor_si256(c2[7208],simde_mm256_xor_si256(c2[8816],simde_mm256_xor_si256(c2[15148],simde_mm256_xor_si256(c2[7755],simde_mm256_xor_si256(c2[1973],simde_mm256_xor_si256(c2[14640],simde_mm256_xor_si256(c2[5147],simde_mm256_xor_si256(c2[6221],simde_mm256_xor_si256(c2[15197],simde_mm256_xor_si256(c2[3059],simde_mm256_xor_si256(c2[6249],simde_mm256_xor_si256(c2[5193],simde_mm256_xor_si256(c2[4666],simde_mm256_xor_si256(c2[7327],simde_mm256_xor_si256(c2[2568],simde_mm256_xor_si256(c2[3632],simde_mm256_xor_si256(c2[2064],simde_mm256_xor_si256(c2[3650],simde_mm256_xor_si256(c2[483],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[10536],simde_mm256_xor_si256(c2[5261],c2[16344]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[204]=_mm256_xor_si256(c2[2120],_mm256_xor_si256(c2[8265],_mm256_xor_si256(c2[9362],_mm256_xor_si256(c2[3587],c2[12659]))));
+     d2[204]=simde_mm256_xor_si256(c2[2120],simde_mm256_xor_si256(c2[8265],simde_mm256_xor_si256(c2[9362],simde_mm256_xor_si256(c2[3587],c2[12659]))));
 
 //row: 18
-     d2[216]=_mm256_xor_si256(c2[1084],_mm256_xor_si256(c2[1877],_mm256_xor_si256(c2[12992],_mm256_xor_si256(c2[2548],c2[2568]))));
+     d2[216]=simde_mm256_xor_si256(c2[1084],simde_mm256_xor_si256(c2[1877],simde_mm256_xor_si256(c2[12992],simde_mm256_xor_si256(c2[2548],c2[2568]))));
 
 //row: 19
-     d2[228]=_mm256_xor_si256(c2[8980],_mm256_xor_si256(c2[11118],_mm256_xor_si256(c2[12850],_mm256_xor_si256(c2[9703],c2[2886]))));
+     d2[228]=simde_mm256_xor_si256(c2[8980],simde_mm256_xor_si256(c2[11118],simde_mm256_xor_si256(c2[12850],simde_mm256_xor_si256(c2[9703],c2[2886]))));
 
 //row: 20
-     d2[240]=_mm256_xor_si256(c2[7403],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[4232],_mm256_xor_si256(c2[7930],_mm256_xor_si256(c2[14261],_mm256_xor_si256(c2[7418],_mm256_xor_si256(c2[11121],_mm256_xor_si256(c2[9532],_mm256_xor_si256(c2[6915],_mm256_xor_si256(c2[3748],_mm256_xor_si256(c2[1632],_mm256_xor_si256(c2[6409],_mm256_xor_si256(c2[2188],_mm256_xor_si256(c2[14329],_mm256_xor_si256(c2[7470],_mm256_xor_si256(c2[14362],_mm256_xor_si256(c2[3792],_mm256_xor_si256(c2[7498],_mm256_xor_si256(c2[8575],_mm256_xor_si256(c2[5934],_mm256_xor_si256(c2[14385],_mm256_xor_si256(c2[10184],_mm256_xor_si256(c2[14934],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[3346],_mm256_xor_si256(c2[10734],_mm256_xor_si256(c2[3360],_mm256_xor_si256(c2[14454],_mm256_xor_si256(c2[10227],_mm256_xor_si256(c2[12899],_mm256_xor_si256(c2[7087],_mm256_xor_si256(c2[13947],_mm256_xor_si256(c2[4448],_mm256_xor_si256(c2[14506],_mm256_xor_si256(c2[15029],_mm256_xor_si256(c2[16613],_mm256_xor_si256(c2[4493],_mm256_xor_si256(c2[1331],_mm256_xor_si256(c2[6611],_mm256_xor_si256(c2[11362],_mm256_xor_si256(c2[6626],_mm256_xor_si256(c2[9264],_mm256_xor_si256(c2[1353],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[8760],_mm256_xor_si256(c2[10904],_mm256_xor_si256(c2[10372],_mm256_xor_si256(c2[8264],_mm256_xor_si256(c2[9872],_mm256_xor_si256(c2[16204],_mm256_xor_si256(c2[8811],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[15696],_mm256_xor_si256(c2[6203],_mm256_xor_si256(c2[7277],_mm256_xor_si256(c2[16253],_mm256_xor_si256(c2[4115],_mm256_xor_si256(c2[7305],_mm256_xor_si256(c2[6249],_mm256_xor_si256(c2[5722],_mm256_xor_si256(c2[8383],_mm256_xor_si256(c2[3624],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[4706],_mm256_xor_si256(c2[1539],_mm256_xor_si256(c2[11592],_mm256_xor_si256(c2[6317],c2[505]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[240]=simde_mm256_xor_si256(c2[7403],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[4232],simde_mm256_xor_si256(c2[7930],simde_mm256_xor_si256(c2[14261],simde_mm256_xor_si256(c2[7418],simde_mm256_xor_si256(c2[11121],simde_mm256_xor_si256(c2[9532],simde_mm256_xor_si256(c2[6915],simde_mm256_xor_si256(c2[3748],simde_mm256_xor_si256(c2[1632],simde_mm256_xor_si256(c2[6409],simde_mm256_xor_si256(c2[2188],simde_mm256_xor_si256(c2[14329],simde_mm256_xor_si256(c2[7470],simde_mm256_xor_si256(c2[14362],simde_mm256_xor_si256(c2[3792],simde_mm256_xor_si256(c2[7498],simde_mm256_xor_si256(c2[8575],simde_mm256_xor_si256(c2[5934],simde_mm256_xor_si256(c2[14385],simde_mm256_xor_si256(c2[10184],simde_mm256_xor_si256(c2[14934],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[3346],simde_mm256_xor_si256(c2[10734],simde_mm256_xor_si256(c2[3360],simde_mm256_xor_si256(c2[14454],simde_mm256_xor_si256(c2[10227],simde_mm256_xor_si256(c2[12899],simde_mm256_xor_si256(c2[7087],simde_mm256_xor_si256(c2[13947],simde_mm256_xor_si256(c2[4448],simde_mm256_xor_si256(c2[14506],simde_mm256_xor_si256(c2[15029],simde_mm256_xor_si256(c2[16613],simde_mm256_xor_si256(c2[4493],simde_mm256_xor_si256(c2[1331],simde_mm256_xor_si256(c2[6611],simde_mm256_xor_si256(c2[11362],simde_mm256_xor_si256(c2[6626],simde_mm256_xor_si256(c2[9264],simde_mm256_xor_si256(c2[1353],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[8760],simde_mm256_xor_si256(c2[10904],simde_mm256_xor_si256(c2[10372],simde_mm256_xor_si256(c2[8264],simde_mm256_xor_si256(c2[9872],simde_mm256_xor_si256(c2[16204],simde_mm256_xor_si256(c2[8811],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[15696],simde_mm256_xor_si256(c2[6203],simde_mm256_xor_si256(c2[7277],simde_mm256_xor_si256(c2[16253],simde_mm256_xor_si256(c2[4115],simde_mm256_xor_si256(c2[7305],simde_mm256_xor_si256(c2[6249],simde_mm256_xor_si256(c2[5722],simde_mm256_xor_si256(c2[8383],simde_mm256_xor_si256(c2[3624],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[4706],simde_mm256_xor_si256(c2[1539],simde_mm256_xor_si256(c2[11592],simde_mm256_xor_si256(c2[6317],c2[505]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[252]=_mm256_xor_si256(c2[6894],_mm256_xor_si256(c2[3291],_mm256_xor_si256(c2[4618],_mm256_xor_si256(c2[11574],c2[1035]))));
+     d2[252]=simde_mm256_xor_si256(c2[6894],simde_mm256_xor_si256(c2[3291],simde_mm256_xor_si256(c2[4618],simde_mm256_xor_si256(c2[11574],c2[1035]))));
 
 //row: 22
-     d2[264]=_mm256_xor_si256(c2[15840],_mm256_xor_si256(c2[6096],_mm256_xor_si256(c2[5071],c2[12024])));
+     d2[264]=simde_mm256_xor_si256(c2[15840],simde_mm256_xor_si256(c2[6096],simde_mm256_xor_si256(c2[5071],c2[12024])));
 
 //row: 23
-     d2[276]=_mm256_xor_si256(c2[12696],_mm256_xor_si256(c2[13250],_mm256_xor_si256(c2[15553],c2[14688])));
+     d2[276]=simde_mm256_xor_si256(c2[12696],simde_mm256_xor_si256(c2[13250],simde_mm256_xor_si256(c2[15553],c2[14688])));
 
 //row: 24
-     d2[288]=_mm256_xor_si256(c2[15316],_mm256_xor_si256(c2[11625],_mm256_xor_si256(c2[12145],_mm256_xor_si256(c2[15843],_mm256_xor_si256(c2[5289],_mm256_xor_si256(c2[15343],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[562],_mm256_xor_si256(c2[14840],_mm256_xor_si256(c2[11673],_mm256_xor_si256(c2[9557],_mm256_xor_si256(c2[14334],_mm256_xor_si256(c2[10113],_mm256_xor_si256(c2[5359],_mm256_xor_si256(c2[15916],_mm256_xor_si256(c2[5380],_mm256_xor_si256(c2[11717],_mm256_xor_si256(c2[15411],_mm256_xor_si256(c2[5911],_mm256_xor_si256(c2[16488],_mm256_xor_si256(c2[13859],_mm256_xor_si256(c2[5403],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[5952],_mm256_xor_si256(c2[9121],_mm256_xor_si256(c2[11261],_mm256_xor_si256(c2[11259],_mm256_xor_si256(c2[1752],_mm256_xor_si256(c2[11285],_mm256_xor_si256(c2[5472],_mm256_xor_si256(c2[1257],_mm256_xor_si256(c2[3917],_mm256_xor_si256(c2[15000],_mm256_xor_si256(c2[4977],_mm256_xor_si256(c2[5524],_mm256_xor_si256(c2[6059],_mm256_xor_si256(c2[7643],_mm256_xor_si256(c2[12418],_mm256_xor_si256(c2[9244],_mm256_xor_si256(c2[14524],_mm256_xor_si256(c2[10306],_mm256_xor_si256(c2[14551],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[9266],_mm256_xor_si256(c2[8238],_mm256_xor_si256(c2[9817],_mm256_xor_si256(c2[16685],_mm256_xor_si256(c2[1922],_mm256_xor_si256(c2[1402],_mm256_xor_si256(c2[16177],_mm256_xor_si256(c2[890],_mm256_xor_si256(c2[7234],_mm256_xor_si256(c2[16736],_mm256_xor_si256(c2[10954],_mm256_xor_si256(c2[6726],_mm256_xor_si256(c2[14116],_mm256_xor_si256(c2[15202],_mm256_xor_si256(c2[7283],_mm256_xor_si256(c2[12028],_mm256_xor_si256(c2[15218],_mm256_xor_si256(c2[14162],_mm256_xor_si256(c2[13635],_mm256_xor_si256(c2[16296],_mm256_xor_si256(c2[11549],_mm256_xor_si256(c2[12601],_mm256_xor_si256(c2[11045],_mm256_xor_si256(c2[12631],_mm256_xor_si256(c2[9464],_mm256_xor_si256(c2[2622],_mm256_xor_si256(c2[14242],c2[8430]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[288]=simde_mm256_xor_si256(c2[15316],simde_mm256_xor_si256(c2[11625],simde_mm256_xor_si256(c2[12145],simde_mm256_xor_si256(c2[15843],simde_mm256_xor_si256(c2[5289],simde_mm256_xor_si256(c2[15343],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[562],simde_mm256_xor_si256(c2[14840],simde_mm256_xor_si256(c2[11673],simde_mm256_xor_si256(c2[9557],simde_mm256_xor_si256(c2[14334],simde_mm256_xor_si256(c2[10113],simde_mm256_xor_si256(c2[5359],simde_mm256_xor_si256(c2[15916],simde_mm256_xor_si256(c2[5380],simde_mm256_xor_si256(c2[11717],simde_mm256_xor_si256(c2[15411],simde_mm256_xor_si256(c2[5911],simde_mm256_xor_si256(c2[16488],simde_mm256_xor_si256(c2[13859],simde_mm256_xor_si256(c2[5403],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[5952],simde_mm256_xor_si256(c2[9121],simde_mm256_xor_si256(c2[11261],simde_mm256_xor_si256(c2[11259],simde_mm256_xor_si256(c2[1752],simde_mm256_xor_si256(c2[11285],simde_mm256_xor_si256(c2[5472],simde_mm256_xor_si256(c2[1257],simde_mm256_xor_si256(c2[3917],simde_mm256_xor_si256(c2[15000],simde_mm256_xor_si256(c2[4977],simde_mm256_xor_si256(c2[5524],simde_mm256_xor_si256(c2[6059],simde_mm256_xor_si256(c2[7643],simde_mm256_xor_si256(c2[12418],simde_mm256_xor_si256(c2[9244],simde_mm256_xor_si256(c2[14524],simde_mm256_xor_si256(c2[10306],simde_mm256_xor_si256(c2[14551],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[9266],simde_mm256_xor_si256(c2[8238],simde_mm256_xor_si256(c2[9817],simde_mm256_xor_si256(c2[16685],simde_mm256_xor_si256(c2[1922],simde_mm256_xor_si256(c2[1402],simde_mm256_xor_si256(c2[16177],simde_mm256_xor_si256(c2[890],simde_mm256_xor_si256(c2[7234],simde_mm256_xor_si256(c2[16736],simde_mm256_xor_si256(c2[10954],simde_mm256_xor_si256(c2[6726],simde_mm256_xor_si256(c2[14116],simde_mm256_xor_si256(c2[15202],simde_mm256_xor_si256(c2[7283],simde_mm256_xor_si256(c2[12028],simde_mm256_xor_si256(c2[15218],simde_mm256_xor_si256(c2[14162],simde_mm256_xor_si256(c2[13635],simde_mm256_xor_si256(c2[16296],simde_mm256_xor_si256(c2[11549],simde_mm256_xor_si256(c2[12601],simde_mm256_xor_si256(c2[11045],simde_mm256_xor_si256(c2[12631],simde_mm256_xor_si256(c2[9464],simde_mm256_xor_si256(c2[2622],simde_mm256_xor_si256(c2[14242],c2[8430]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 25
-     d2[300]=_mm256_xor_si256(c2[4250],_mm256_xor_si256(c2[9120],_mm256_xor_si256(c2[16547],c2[13017])));
+     d2[300]=simde_mm256_xor_si256(c2[4250],simde_mm256_xor_si256(c2[9120],simde_mm256_xor_si256(c2[16547],c2[13017])));
 
 //row: 26
-     d2[312]=_mm256_xor_si256(c2[3698],_mm256_xor_si256(c2[9026],_mm256_xor_si256(c2[6434],c2[4588])));
+     d2[312]=simde_mm256_xor_si256(c2[3698],simde_mm256_xor_si256(c2[9026],simde_mm256_xor_si256(c2[6434],c2[4588])));
 
 //row: 27
-     d2[324]=_mm256_xor_si256(c2[1086],_mm256_xor_si256(c2[1206],c2[2835]));
+     d2[324]=simde_mm256_xor_si256(c2[1086],simde_mm256_xor_si256(c2[1206],c2[2835]));
 
 //row: 28
-     d2[336]=_mm256_xor_si256(c2[15846],_mm256_xor_si256(c2[10128],_mm256_xor_si256(c2[11023],c2[10016])));
+     d2[336]=simde_mm256_xor_si256(c2[15846],simde_mm256_xor_si256(c2[10128],simde_mm256_xor_si256(c2[11023],c2[10016])));
 
 //row: 29
-     d2[348]=_mm256_xor_si256(c2[1056],_mm256_xor_si256(c2[14260],_mm256_xor_si256(c2[14792],_mm256_xor_si256(c2[1067],_mm256_xor_si256(c2[1595],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[4786],_mm256_xor_si256(c2[2669],_mm256_xor_si256(c2[3197],_mm256_xor_si256(c2[14815],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[14308],_mm256_xor_si256(c2[12192],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[12748],_mm256_xor_si256(c2[7466],_mm256_xor_si256(c2[7994],_mm256_xor_si256(c2[8027],_mm256_xor_si256(c2[14352],_mm256_xor_si256(c2[635],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[2240],_mm256_xor_si256(c2[16494],_mm256_xor_si256(c2[8050],_mm256_xor_si256(c2[3849],_mm256_xor_si256(c2[8599],_mm256_xor_si256(c2[11240],_mm256_xor_si256(c2[11768],_mm256_xor_si256(c2[13896],_mm256_xor_si256(c2[13906],_mm256_xor_si256(c2[3871],_mm256_xor_si256(c2[4399],_mm256_xor_si256(c2[13920],_mm256_xor_si256(c2[8119],_mm256_xor_si256(c2[3364],_mm256_xor_si256(c2[3892],_mm256_xor_si256(c2[6552],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[7612],_mm256_xor_si256(c2[8171],_mm256_xor_si256(c2[8694],_mm256_xor_si256(c2[9750],_mm256_xor_si256(c2[10278],_mm256_xor_si256(c2[15053],_mm256_xor_si256(c2[11891],_mm256_xor_si256(c2[16643],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[291],_mm256_xor_si256(c2[2929],_mm256_xor_si256(c2[11385],_mm256_xor_si256(c2[11913],_mm256_xor_si256(c2[10873],_mm256_xor_si256(c2[12464],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[2425],_mm256_xor_si256(c2[4569],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[1401],_mm256_xor_si256(c2[1929],_mm256_xor_si256(c2[2976],_mm256_xor_si256(c2[3537],_mm256_xor_si256(c2[9869],_mm256_xor_si256(c2[2476],_mm256_xor_si256(c2[13589],_mm256_xor_si256(c2[9361],_mm256_xor_si256(c2[16235],_mm256_xor_si256(c2[16763],_mm256_xor_si256(c2[942],_mm256_xor_si256(c2[9918],_mm256_xor_si256(c2[14147],_mm256_xor_si256(c2[14675],_mm256_xor_si256(c2[970],_mm256_xor_si256(c2[16809],_mm256_xor_si256(c2[15754],_mm256_xor_si256(c2[16282],_mm256_xor_si256(c2[10468],_mm256_xor_si256(c2[2048],_mm256_xor_si256(c2[14184],_mm256_xor_si256(c2[15248],_mm256_xor_si256(c2[13680],_mm256_xor_si256(c2[15266],_mm256_xor_si256(c2[11571],_mm256_xor_si256(c2[12099],_mm256_xor_si256(c2[5257],_mm256_xor_si256(c2[16877],_mm256_xor_si256(c2[10537],c2[11065]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[348]=simde_mm256_xor_si256(c2[1056],simde_mm256_xor_si256(c2[14260],simde_mm256_xor_si256(c2[14792],simde_mm256_xor_si256(c2[1067],simde_mm256_xor_si256(c2[1595],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[4786],simde_mm256_xor_si256(c2[2669],simde_mm256_xor_si256(c2[3197],simde_mm256_xor_si256(c2[14815],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[14308],simde_mm256_xor_si256(c2[12192],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[12748],simde_mm256_xor_si256(c2[7466],simde_mm256_xor_si256(c2[7994],simde_mm256_xor_si256(c2[8027],simde_mm256_xor_si256(c2[14352],simde_mm256_xor_si256(c2[635],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[2240],simde_mm256_xor_si256(c2[16494],simde_mm256_xor_si256(c2[8050],simde_mm256_xor_si256(c2[3849],simde_mm256_xor_si256(c2[8599],simde_mm256_xor_si256(c2[11240],simde_mm256_xor_si256(c2[11768],simde_mm256_xor_si256(c2[13896],simde_mm256_xor_si256(c2[13906],simde_mm256_xor_si256(c2[3871],simde_mm256_xor_si256(c2[4399],simde_mm256_xor_si256(c2[13920],simde_mm256_xor_si256(c2[8119],simde_mm256_xor_si256(c2[3364],simde_mm256_xor_si256(c2[3892],simde_mm256_xor_si256(c2[6552],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[7612],simde_mm256_xor_si256(c2[8171],simde_mm256_xor_si256(c2[8694],simde_mm256_xor_si256(c2[9750],simde_mm256_xor_si256(c2[10278],simde_mm256_xor_si256(c2[15053],simde_mm256_xor_si256(c2[11891],simde_mm256_xor_si256(c2[16643],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[291],simde_mm256_xor_si256(c2[2929],simde_mm256_xor_si256(c2[11385],simde_mm256_xor_si256(c2[11913],simde_mm256_xor_si256(c2[10873],simde_mm256_xor_si256(c2[12464],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[2425],simde_mm256_xor_si256(c2[4569],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[1401],simde_mm256_xor_si256(c2[1929],simde_mm256_xor_si256(c2[2976],simde_mm256_xor_si256(c2[3537],simde_mm256_xor_si256(c2[9869],simde_mm256_xor_si256(c2[2476],simde_mm256_xor_si256(c2[13589],simde_mm256_xor_si256(c2[9361],simde_mm256_xor_si256(c2[16235],simde_mm256_xor_si256(c2[16763],simde_mm256_xor_si256(c2[942],simde_mm256_xor_si256(c2[9918],simde_mm256_xor_si256(c2[14147],simde_mm256_xor_si256(c2[14675],simde_mm256_xor_si256(c2[970],simde_mm256_xor_si256(c2[16809],simde_mm256_xor_si256(c2[15754],simde_mm256_xor_si256(c2[16282],simde_mm256_xor_si256(c2[10468],simde_mm256_xor_si256(c2[2048],simde_mm256_xor_si256(c2[14184],simde_mm256_xor_si256(c2[15248],simde_mm256_xor_si256(c2[13680],simde_mm256_xor_si256(c2[15266],simde_mm256_xor_si256(c2[11571],simde_mm256_xor_si256(c2[12099],simde_mm256_xor_si256(c2[5257],simde_mm256_xor_si256(c2[16877],simde_mm256_xor_si256(c2[10537],c2[11065]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 30
-     d2[360]=_mm256_xor_si256(c2[7392],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[3705],_mm256_xor_si256(c2[4233],_mm256_xor_si256(c2[7403],_mm256_xor_si256(c2[7931],_mm256_xor_si256(c2[16372],_mm256_xor_si256(c2[7419],_mm256_xor_si256(c2[10594],_mm256_xor_si256(c2[11122],_mm256_xor_si256(c2[9005],_mm256_xor_si256(c2[9533],_mm256_xor_si256(c2[6916],_mm256_xor_si256(c2[3749],_mm256_xor_si256(c2[1105],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[6410],_mm256_xor_si256(c2[2189],_mm256_xor_si256(c2[13802],_mm256_xor_si256(c2[14330],_mm256_xor_si256(c2[14363],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[3793],_mm256_xor_si256(c2[6971],_mm256_xor_si256(c2[7499],_mm256_xor_si256(c2[8576],_mm256_xor_si256(c2[5935],_mm256_xor_si256(c2[13858],_mm256_xor_si256(c2[14386],_mm256_xor_si256(c2[10185],_mm256_xor_si256(c2[14407],_mm256_xor_si256(c2[14935],_mm256_xor_si256(c2[681],_mm256_xor_si256(c2[1209],_mm256_xor_si256(c2[3337],_mm256_xor_si256(c2[2819],_mm256_xor_si256(c2[3347],_mm256_xor_si256(c2[10207],_mm256_xor_si256(c2[10735],_mm256_xor_si256(c2[3361],_mm256_xor_si256(c2[13927],_mm256_xor_si256(c2[14455],_mm256_xor_si256(c2[9700],_mm256_xor_si256(c2[10228],_mm256_xor_si256(c2[12888],_mm256_xor_si256(c2[7088],_mm256_xor_si256(c2[13420],_mm256_xor_si256(c2[13948],_mm256_xor_si256(c2[14507],_mm256_xor_si256(c2[14502],_mm256_xor_si256(c2[15030],_mm256_xor_si256(c2[16086],_mm256_xor_si256(c2[16614],_mm256_xor_si256(c2[2887],_mm256_xor_si256(c2[4494],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[6072],_mm256_xor_si256(c2[6600],_mm256_xor_si256(c2[6627],_mm256_xor_si256(c2[9265],_mm256_xor_si256(c2[826],_mm256_xor_si256(c2[1354],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[1377],_mm256_xor_si256(c2[1905],_mm256_xor_si256(c2[8233],_mm256_xor_si256(c2[8761],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[10905],_mm256_xor_si256(c2[9845],_mm256_xor_si256(c2[10373],_mm256_xor_si256(c2[7737],_mm256_xor_si256(c2[8265],_mm256_xor_si256(c2[9873],_mm256_xor_si256(c2[16205],_mm256_xor_si256(c2[8284],_mm256_xor_si256(c2[8812],_mm256_xor_si256(c2[3030],_mm256_xor_si256(c2[15697],_mm256_xor_si256(c2[5664],_mm256_xor_si256(c2[6192],_mm256_xor_si256(c2[7278],_mm256_xor_si256(c2[15726],_mm256_xor_si256(c2[16254],_mm256_xor_si256(c2[3576],_mm256_xor_si256(c2[4104],_mm256_xor_si256(c2[7306],_mm256_xor_si256(c2[5722],_mm256_xor_si256(c2[6250],_mm256_xor_si256(c2[5195],_mm256_xor_si256(c2[5723],_mm256_xor_si256(c2[8384],_mm256_xor_si256(c2[3625],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[4689],_mm256_xor_si256(c2[3121],_mm256_xor_si256(c2[4179],_mm256_xor_si256(c2[4707],_mm256_xor_si256(c2[1012],_mm256_xor_si256(c2[1540],_mm256_xor_si256(c2[11593],_mm256_xor_si256(c2[6318],_mm256_xor_si256(c2[16873],c2[506])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[360]=simde_mm256_xor_si256(c2[7392],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[3705],simde_mm256_xor_si256(c2[4233],simde_mm256_xor_si256(c2[7403],simde_mm256_xor_si256(c2[7931],simde_mm256_xor_si256(c2[16372],simde_mm256_xor_si256(c2[7419],simde_mm256_xor_si256(c2[10594],simde_mm256_xor_si256(c2[11122],simde_mm256_xor_si256(c2[9005],simde_mm256_xor_si256(c2[9533],simde_mm256_xor_si256(c2[6916],simde_mm256_xor_si256(c2[3749],simde_mm256_xor_si256(c2[1105],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[6410],simde_mm256_xor_si256(c2[2189],simde_mm256_xor_si256(c2[13802],simde_mm256_xor_si256(c2[14330],simde_mm256_xor_si256(c2[14363],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[3793],simde_mm256_xor_si256(c2[6971],simde_mm256_xor_si256(c2[7499],simde_mm256_xor_si256(c2[8576],simde_mm256_xor_si256(c2[5935],simde_mm256_xor_si256(c2[13858],simde_mm256_xor_si256(c2[14386],simde_mm256_xor_si256(c2[10185],simde_mm256_xor_si256(c2[14407],simde_mm256_xor_si256(c2[14935],simde_mm256_xor_si256(c2[681],simde_mm256_xor_si256(c2[1209],simde_mm256_xor_si256(c2[3337],simde_mm256_xor_si256(c2[2819],simde_mm256_xor_si256(c2[3347],simde_mm256_xor_si256(c2[10207],simde_mm256_xor_si256(c2[10735],simde_mm256_xor_si256(c2[3361],simde_mm256_xor_si256(c2[13927],simde_mm256_xor_si256(c2[14455],simde_mm256_xor_si256(c2[9700],simde_mm256_xor_si256(c2[10228],simde_mm256_xor_si256(c2[12888],simde_mm256_xor_si256(c2[7088],simde_mm256_xor_si256(c2[13420],simde_mm256_xor_si256(c2[13948],simde_mm256_xor_si256(c2[14507],simde_mm256_xor_si256(c2[14502],simde_mm256_xor_si256(c2[15030],simde_mm256_xor_si256(c2[16086],simde_mm256_xor_si256(c2[16614],simde_mm256_xor_si256(c2[2887],simde_mm256_xor_si256(c2[4494],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[6072],simde_mm256_xor_si256(c2[6600],simde_mm256_xor_si256(c2[6627],simde_mm256_xor_si256(c2[9265],simde_mm256_xor_si256(c2[826],simde_mm256_xor_si256(c2[1354],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[1377],simde_mm256_xor_si256(c2[1905],simde_mm256_xor_si256(c2[8233],simde_mm256_xor_si256(c2[8761],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[10905],simde_mm256_xor_si256(c2[9845],simde_mm256_xor_si256(c2[10373],simde_mm256_xor_si256(c2[7737],simde_mm256_xor_si256(c2[8265],simde_mm256_xor_si256(c2[9873],simde_mm256_xor_si256(c2[16205],simde_mm256_xor_si256(c2[8284],simde_mm256_xor_si256(c2[8812],simde_mm256_xor_si256(c2[3030],simde_mm256_xor_si256(c2[15697],simde_mm256_xor_si256(c2[5664],simde_mm256_xor_si256(c2[6192],simde_mm256_xor_si256(c2[7278],simde_mm256_xor_si256(c2[15726],simde_mm256_xor_si256(c2[16254],simde_mm256_xor_si256(c2[3576],simde_mm256_xor_si256(c2[4104],simde_mm256_xor_si256(c2[7306],simde_mm256_xor_si256(c2[5722],simde_mm256_xor_si256(c2[6250],simde_mm256_xor_si256(c2[5195],simde_mm256_xor_si256(c2[5723],simde_mm256_xor_si256(c2[8384],simde_mm256_xor_si256(c2[3625],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[4689],simde_mm256_xor_si256(c2[3121],simde_mm256_xor_si256(c2[4179],simde_mm256_xor_si256(c2[4707],simde_mm256_xor_si256(c2[1012],simde_mm256_xor_si256(c2[1540],simde_mm256_xor_si256(c2[11593],simde_mm256_xor_si256(c2[6318],simde_mm256_xor_si256(c2[16873],c2[506])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 31
-     d2[372]=_mm256_xor_si256(c2[11093],_mm256_xor_si256(c2[10565],_mm256_xor_si256(c2[7402],_mm256_xor_si256(c2[6874],_mm256_xor_si256(c2[7922],_mm256_xor_si256(c2[7394],_mm256_xor_si256(c2[11620],_mm256_xor_si256(c2[10564],_mm256_xor_si256(c2[11092],_mm256_xor_si256(c2[11120],_mm256_xor_si256(c2[10592],_mm256_xor_si256(c2[14811],_mm256_xor_si256(c2[14283],_mm256_xor_si256(c2[13234],_mm256_xor_si256(c2[12178],_mm256_xor_si256(c2[12706],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[10617],_mm256_xor_si256(c2[10089],_mm256_xor_si256(c2[7450],_mm256_xor_si256(c2[6922],_mm256_xor_si256(c2[5334],_mm256_xor_si256(c2[4806],_mm256_xor_si256(c2[10111],_mm256_xor_si256(c2[9583],_mm256_xor_si256(c2[5890],_mm256_xor_si256(c2[5362],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[1157],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[7494],_mm256_xor_si256(c2[6966],_mm256_xor_si256(c2[11188],_mm256_xor_si256(c2[10132],_mm256_xor_si256(c2[10660],_mm256_xor_si256(c2[12265],_mm256_xor_si256(c2[11737],_mm256_xor_si256(c2[9624],_mm256_xor_si256(c2[9096],_mm256_xor_si256(c2[1180],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[13874],_mm256_xor_si256(c2[13346],_mm256_xor_si256(c2[1729],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[4898],_mm256_xor_si256(c2[3842],_mm256_xor_si256(c2[4370],_mm256_xor_si256(c2[7038],_mm256_xor_si256(c2[6510],_mm256_xor_si256(c2[7036],_mm256_xor_si256(c2[6508],_mm256_xor_si256(c2[14424],_mm256_xor_si256(c2[13368],_mm256_xor_si256(c2[13896],_mm256_xor_si256(c2[12318],_mm256_xor_si256(c2[7062],_mm256_xor_si256(c2[6534],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[13929],_mm256_xor_si256(c2[12873],_mm256_xor_si256(c2[13401],_mm256_xor_si256(c2[16589],_mm256_xor_si256(c2[16061],_mm256_xor_si256(c2[10777],_mm256_xor_si256(c2[10249],_mm256_xor_si256(c2[754],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[1301],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[1824],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[3408],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[8195],_mm256_xor_si256(c2[7667],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[4493],_mm256_xor_si256(c2[10301],_mm256_xor_si256(c2[9245],_mm256_xor_si256(c2[9773],_mm256_xor_si256(c2[10328],_mm256_xor_si256(c2[9800],_mm256_xor_si256(c2[12966],_mm256_xor_si256(c2[12438],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[3987],_mm256_xor_si256(c2[4515],_mm256_xor_si256(c2[4015],_mm256_xor_si256(c2[3487],_mm256_xor_si256(c2[5594],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[12462],_mm256_xor_si256(c2[11406],_mm256_xor_si256(c2[11934],_mm256_xor_si256(c2[14594],_mm256_xor_si256(c2[14066],_mm256_xor_si256(c2[14074],_mm256_xor_si256(c2[13546],_mm256_xor_si256(c2[11954],_mm256_xor_si256(c2[10898],_mm256_xor_si256(c2[11426],_mm256_xor_si256(c2[13562],_mm256_xor_si256(c2[13034],_mm256_xor_si256(c2[3011],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[12513],_mm256_xor_si256(c2[11985],_mm256_xor_si256(c2[6731],_mm256_xor_si256(c2[6203],_mm256_xor_si256(c2[2503],_mm256_xor_si256(c2[1975],_mm256_xor_si256(c2[9893],_mm256_xor_si256(c2[8837],_mm256_xor_si256(c2[9365],_mm256_xor_si256(c2[10979],_mm256_xor_si256(c2[10451],_mm256_xor_si256(c2[3048],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[7805],_mm256_xor_si256(c2[6749],_mm256_xor_si256(c2[7277],_mm256_xor_si256(c2[10995],_mm256_xor_si256(c2[10467],_mm256_xor_si256(c2[9939],_mm256_xor_si256(c2[9411],_mm256_xor_si256(c2[9412],_mm256_xor_si256(c2[8356],_mm256_xor_si256(c2[8884],_mm256_xor_si256(c2[12073],_mm256_xor_si256(c2[11545],_mm256_xor_si256(c2[7326],_mm256_xor_si256(c2[6798],_mm256_xor_si256(c2[8378],_mm256_xor_si256(c2[7850],_mm256_xor_si256(c2[6822],_mm256_xor_si256(c2[6294],_mm256_xor_si256(c2[8408],_mm256_xor_si256(c2[7880],_mm256_xor_si256(c2[5241],_mm256_xor_si256(c2[4185],_mm256_xor_si256(c2[4713],_mm256_xor_si256(c2[15294],_mm256_xor_si256(c2[14766],_mm256_xor_si256(c2[10019],_mm256_xor_si256(c2[9491],_mm256_xor_si256(c2[4207],_mm256_xor_si256(c2[3151],c2[3679]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[372]=simde_mm256_xor_si256(c2[11093],simde_mm256_xor_si256(c2[10565],simde_mm256_xor_si256(c2[7402],simde_mm256_xor_si256(c2[6874],simde_mm256_xor_si256(c2[7922],simde_mm256_xor_si256(c2[7394],simde_mm256_xor_si256(c2[11620],simde_mm256_xor_si256(c2[10564],simde_mm256_xor_si256(c2[11092],simde_mm256_xor_si256(c2[11120],simde_mm256_xor_si256(c2[10592],simde_mm256_xor_si256(c2[14811],simde_mm256_xor_si256(c2[14283],simde_mm256_xor_si256(c2[13234],simde_mm256_xor_si256(c2[12178],simde_mm256_xor_si256(c2[12706],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[10617],simde_mm256_xor_si256(c2[10089],simde_mm256_xor_si256(c2[7450],simde_mm256_xor_si256(c2[6922],simde_mm256_xor_si256(c2[5334],simde_mm256_xor_si256(c2[4806],simde_mm256_xor_si256(c2[10111],simde_mm256_xor_si256(c2[9583],simde_mm256_xor_si256(c2[5890],simde_mm256_xor_si256(c2[5362],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[1157],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[7494],simde_mm256_xor_si256(c2[6966],simde_mm256_xor_si256(c2[11188],simde_mm256_xor_si256(c2[10132],simde_mm256_xor_si256(c2[10660],simde_mm256_xor_si256(c2[12265],simde_mm256_xor_si256(c2[11737],simde_mm256_xor_si256(c2[9624],simde_mm256_xor_si256(c2[9096],simde_mm256_xor_si256(c2[1180],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[13874],simde_mm256_xor_si256(c2[13346],simde_mm256_xor_si256(c2[1729],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[4898],simde_mm256_xor_si256(c2[3842],simde_mm256_xor_si256(c2[4370],simde_mm256_xor_si256(c2[7038],simde_mm256_xor_si256(c2[6510],simde_mm256_xor_si256(c2[7036],simde_mm256_xor_si256(c2[6508],simde_mm256_xor_si256(c2[14424],simde_mm256_xor_si256(c2[13368],simde_mm256_xor_si256(c2[13896],simde_mm256_xor_si256(c2[12318],simde_mm256_xor_si256(c2[7062],simde_mm256_xor_si256(c2[6534],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[13929],simde_mm256_xor_si256(c2[12873],simde_mm256_xor_si256(c2[13401],simde_mm256_xor_si256(c2[16589],simde_mm256_xor_si256(c2[16061],simde_mm256_xor_si256(c2[10777],simde_mm256_xor_si256(c2[10249],simde_mm256_xor_si256(c2[754],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[1301],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[1824],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[3408],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[8195],simde_mm256_xor_si256(c2[7667],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[4493],simde_mm256_xor_si256(c2[10301],simde_mm256_xor_si256(c2[9245],simde_mm256_xor_si256(c2[9773],simde_mm256_xor_si256(c2[10328],simde_mm256_xor_si256(c2[9800],simde_mm256_xor_si256(c2[12966],simde_mm256_xor_si256(c2[12438],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[3987],simde_mm256_xor_si256(c2[4515],simde_mm256_xor_si256(c2[4015],simde_mm256_xor_si256(c2[3487],simde_mm256_xor_si256(c2[5594],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[12462],simde_mm256_xor_si256(c2[11406],simde_mm256_xor_si256(c2[11934],simde_mm256_xor_si256(c2[14594],simde_mm256_xor_si256(c2[14066],simde_mm256_xor_si256(c2[14074],simde_mm256_xor_si256(c2[13546],simde_mm256_xor_si256(c2[11954],simde_mm256_xor_si256(c2[10898],simde_mm256_xor_si256(c2[11426],simde_mm256_xor_si256(c2[13562],simde_mm256_xor_si256(c2[13034],simde_mm256_xor_si256(c2[3011],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[12513],simde_mm256_xor_si256(c2[11985],simde_mm256_xor_si256(c2[6731],simde_mm256_xor_si256(c2[6203],simde_mm256_xor_si256(c2[2503],simde_mm256_xor_si256(c2[1975],simde_mm256_xor_si256(c2[9893],simde_mm256_xor_si256(c2[8837],simde_mm256_xor_si256(c2[9365],simde_mm256_xor_si256(c2[10979],simde_mm256_xor_si256(c2[10451],simde_mm256_xor_si256(c2[3048],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[7805],simde_mm256_xor_si256(c2[6749],simde_mm256_xor_si256(c2[7277],simde_mm256_xor_si256(c2[10995],simde_mm256_xor_si256(c2[10467],simde_mm256_xor_si256(c2[9939],simde_mm256_xor_si256(c2[9411],simde_mm256_xor_si256(c2[9412],simde_mm256_xor_si256(c2[8356],simde_mm256_xor_si256(c2[8884],simde_mm256_xor_si256(c2[12073],simde_mm256_xor_si256(c2[11545],simde_mm256_xor_si256(c2[7326],simde_mm256_xor_si256(c2[6798],simde_mm256_xor_si256(c2[8378],simde_mm256_xor_si256(c2[7850],simde_mm256_xor_si256(c2[6822],simde_mm256_xor_si256(c2[6294],simde_mm256_xor_si256(c2[8408],simde_mm256_xor_si256(c2[7880],simde_mm256_xor_si256(c2[5241],simde_mm256_xor_si256(c2[4185],simde_mm256_xor_si256(c2[4713],simde_mm256_xor_si256(c2[15294],simde_mm256_xor_si256(c2[14766],simde_mm256_xor_si256(c2[10019],simde_mm256_xor_si256(c2[9491],simde_mm256_xor_si256(c2[4207],simde_mm256_xor_si256(c2[3151],c2[3679]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[384]=_mm256_xor_si256(c2[10038],_mm256_xor_si256(c2[6347],_mm256_xor_si256(c2[6339],_mm256_xor_si256(c2[6867],_mm256_xor_si256(c2[10037],_mm256_xor_si256(c2[10565],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[10065],_mm256_xor_si256(c2[13228],_mm256_xor_si256(c2[13756],_mm256_xor_si256(c2[11651],_mm256_xor_si256(c2[12179],_mm256_xor_si256(c2[9562],_mm256_xor_si256(c2[6395],_mm256_xor_si256(c2[3751],_mm256_xor_si256(c2[4279],_mm256_xor_si256(c2[9056],_mm256_xor_si256(c2[4835],_mm256_xor_si256(c2[16448],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[5911],_mm256_xor_si256(c2[6439],_mm256_xor_si256(c2[9605],_mm256_xor_si256(c2[10133],_mm256_xor_si256(c2[11210],_mm256_xor_si256(c2[8569],_mm256_xor_si256(c2[16492],_mm256_xor_si256(c2[125],_mm256_xor_si256(c2[12819],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[674],_mm256_xor_si256(c2[3315],_mm256_xor_si256(c2[3843],_mm256_xor_si256(c2[5983],_mm256_xor_si256(c2[5453],_mm256_xor_si256(c2[5981],_mm256_xor_si256(c2[12841],_mm256_xor_si256(c2[13369],_mm256_xor_si256(c2[6007],_mm256_xor_si256(c2[16561],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[12346],_mm256_xor_si256(c2[12874],_mm256_xor_si256(c2[15534],_mm256_xor_si256(c2[9722],_mm256_xor_si256(c2[16066],_mm256_xor_si256(c2[16594],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[1825],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[7128],_mm256_xor_si256(c2[3966],_mm256_xor_si256(c2[8718],_mm256_xor_si256(c2[9246],_mm256_xor_si256(c2[9273],_mm256_xor_si256(c2[11911],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[3988],_mm256_xor_si256(c2[5046],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[4011],_mm256_xor_si256(c2[4539],_mm256_xor_si256(c2[10879],_mm256_xor_si256(c2[11407],_mm256_xor_si256(c2[13539],_mm256_xor_si256(c2[12491],_mm256_xor_si256(c2[13019],_mm256_xor_si256(c2[10371],_mm256_xor_si256(c2[10899],_mm256_xor_si256(c2[8261],_mm256_xor_si256(c2[12507],_mm256_xor_si256(c2[1944],_mm256_xor_si256(c2[10930],_mm256_xor_si256(c2[11458],_mm256_xor_si256(c2[5664],_mm256_xor_si256(c2[1448],_mm256_xor_si256(c2[8310],_mm256_xor_si256(c2[8838],_mm256_xor_si256(c2[9912],_mm256_xor_si256(c2[1465],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[6222],_mm256_xor_si256(c2[6750],_mm256_xor_si256(c2[9940],_mm256_xor_si256(c2[8356],_mm256_xor_si256(c2[8884],_mm256_xor_si256(c2[7829],_mm256_xor_si256(c2[8357],_mm256_xor_si256(c2[11018],_mm256_xor_si256(c2[6271],_mm256_xor_si256(c2[6795],_mm256_xor_si256(c2[7323],_mm256_xor_si256(c2[5767],_mm256_xor_si256(c2[6825],_mm256_xor_si256(c2[7353],_mm256_xor_si256(c2[3658],_mm256_xor_si256(c2[4186],_mm256_xor_si256(c2[14239],_mm256_xor_si256(c2[8952],_mm256_xor_si256(c2[2624],c2[3152])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[384]=simde_mm256_xor_si256(c2[10038],simde_mm256_xor_si256(c2[6347],simde_mm256_xor_si256(c2[6339],simde_mm256_xor_si256(c2[6867],simde_mm256_xor_si256(c2[10037],simde_mm256_xor_si256(c2[10565],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[10065],simde_mm256_xor_si256(c2[13228],simde_mm256_xor_si256(c2[13756],simde_mm256_xor_si256(c2[11651],simde_mm256_xor_si256(c2[12179],simde_mm256_xor_si256(c2[9562],simde_mm256_xor_si256(c2[6395],simde_mm256_xor_si256(c2[3751],simde_mm256_xor_si256(c2[4279],simde_mm256_xor_si256(c2[9056],simde_mm256_xor_si256(c2[4835],simde_mm256_xor_si256(c2[16448],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[5911],simde_mm256_xor_si256(c2[6439],simde_mm256_xor_si256(c2[9605],simde_mm256_xor_si256(c2[10133],simde_mm256_xor_si256(c2[11210],simde_mm256_xor_si256(c2[8569],simde_mm256_xor_si256(c2[16492],simde_mm256_xor_si256(c2[125],simde_mm256_xor_si256(c2[12819],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[674],simde_mm256_xor_si256(c2[3315],simde_mm256_xor_si256(c2[3843],simde_mm256_xor_si256(c2[5983],simde_mm256_xor_si256(c2[5453],simde_mm256_xor_si256(c2[5981],simde_mm256_xor_si256(c2[12841],simde_mm256_xor_si256(c2[13369],simde_mm256_xor_si256(c2[6007],simde_mm256_xor_si256(c2[16561],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[12346],simde_mm256_xor_si256(c2[12874],simde_mm256_xor_si256(c2[15534],simde_mm256_xor_si256(c2[9722],simde_mm256_xor_si256(c2[16066],simde_mm256_xor_si256(c2[16594],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[1825],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[7128],simde_mm256_xor_si256(c2[3966],simde_mm256_xor_si256(c2[8718],simde_mm256_xor_si256(c2[9246],simde_mm256_xor_si256(c2[9273],simde_mm256_xor_si256(c2[11911],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[3988],simde_mm256_xor_si256(c2[5046],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[4011],simde_mm256_xor_si256(c2[4539],simde_mm256_xor_si256(c2[10879],simde_mm256_xor_si256(c2[11407],simde_mm256_xor_si256(c2[13539],simde_mm256_xor_si256(c2[12491],simde_mm256_xor_si256(c2[13019],simde_mm256_xor_si256(c2[10371],simde_mm256_xor_si256(c2[10899],simde_mm256_xor_si256(c2[8261],simde_mm256_xor_si256(c2[12507],simde_mm256_xor_si256(c2[1944],simde_mm256_xor_si256(c2[10930],simde_mm256_xor_si256(c2[11458],simde_mm256_xor_si256(c2[5664],simde_mm256_xor_si256(c2[1448],simde_mm256_xor_si256(c2[8310],simde_mm256_xor_si256(c2[8838],simde_mm256_xor_si256(c2[9912],simde_mm256_xor_si256(c2[1465],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[6222],simde_mm256_xor_si256(c2[6750],simde_mm256_xor_si256(c2[9940],simde_mm256_xor_si256(c2[8356],simde_mm256_xor_si256(c2[8884],simde_mm256_xor_si256(c2[7829],simde_mm256_xor_si256(c2[8357],simde_mm256_xor_si256(c2[11018],simde_mm256_xor_si256(c2[6271],simde_mm256_xor_si256(c2[6795],simde_mm256_xor_si256(c2[7323],simde_mm256_xor_si256(c2[5767],simde_mm256_xor_si256(c2[6825],simde_mm256_xor_si256(c2[7353],simde_mm256_xor_si256(c2[3658],simde_mm256_xor_si256(c2[4186],simde_mm256_xor_si256(c2[14239],simde_mm256_xor_si256(c2[8952],simde_mm256_xor_si256(c2[2624],c2[3152])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[396]=_mm256_xor_si256(c2[1618],_mm256_xor_si256(c2[4272],_mm256_xor_si256(c2[5027],c2[5259])));
+     d2[396]=simde_mm256_xor_si256(c2[1618],simde_mm256_xor_si256(c2[4272],simde_mm256_xor_si256(c2[5027],c2[5259])));
 
 //row: 34
-     d2[408]=_mm256_xor_si256(c2[3175],_mm256_xor_si256(c2[10732],_mm256_xor_si256(c2[5646],c2[13089])));
+     d2[408]=simde_mm256_xor_si256(c2[3175],simde_mm256_xor_si256(c2[10732],simde_mm256_xor_si256(c2[5646],c2[13089])));
 
 //row: 35
-     d2[420]=_mm256_xor_si256(c2[15317],_mm256_xor_si256(c2[11626],_mm256_xor_si256(c2[12146],_mm256_xor_si256(c2[15844],_mm256_xor_si256(c2[15344],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[14841],_mm256_xor_si256(c2[11674],_mm256_xor_si256(c2[9558],_mm256_xor_si256(c2[14335],_mm256_xor_si256(c2[10114],_mm256_xor_si256(c2[5360],_mm256_xor_si256(c2[5381],_mm256_xor_si256(c2[11718],_mm256_xor_si256(c2[15412],_mm256_xor_si256(c2[16489],_mm256_xor_si256(c2[13848],_mm256_xor_si256(c2[5404],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[5953],_mm256_xor_si256(c2[9122],_mm256_xor_si256(c2[8074],_mm256_xor_si256(c2[11262],_mm256_xor_si256(c2[11260],_mm256_xor_si256(c2[1753],_mm256_xor_si256(c2[11286],_mm256_xor_si256(c2[5473],_mm256_xor_si256(c2[1258],_mm256_xor_si256(c2[3918],_mm256_xor_si256(c2[15001],_mm256_xor_si256(c2[4978],_mm256_xor_si256(c2[5525],_mm256_xor_si256(c2[6048],_mm256_xor_si256(c2[7632],_mm256_xor_si256(c2[12419],_mm256_xor_si256(c2[9245],_mm256_xor_si256(c2[14525],_mm256_xor_si256(c2[14552],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[9267],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[8239],_mm256_xor_si256(c2[9818],_mm256_xor_si256(c2[16686],_mm256_xor_si256(c2[1923],_mm256_xor_si256(c2[1403],_mm256_xor_si256(c2[16178],_mm256_xor_si256(c2[891],_mm256_xor_si256(c2[7235],_mm256_xor_si256(c2[16737],_mm256_xor_si256(c2[10955],_mm256_xor_si256(c2[6727],_mm256_xor_si256(c2[14117],_mm256_xor_si256(c2[15203],_mm256_xor_si256(c2[7272],_mm256_xor_si256(c2[12029],_mm256_xor_si256(c2[15219],_mm256_xor_si256(c2[14163],_mm256_xor_si256(c2[13636],_mm256_xor_si256(c2[16297],_mm256_xor_si256(c2[11550],_mm256_xor_si256(c2[12602],_mm256_xor_si256(c2[11046],_mm256_xor_si256(c2[12632],_mm256_xor_si256(c2[9465],_mm256_xor_si256(c2[2623],_mm256_xor_si256(c2[14243],c2[8431])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[420]=simde_mm256_xor_si256(c2[15317],simde_mm256_xor_si256(c2[11626],simde_mm256_xor_si256(c2[12146],simde_mm256_xor_si256(c2[15844],simde_mm256_xor_si256(c2[15344],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[14841],simde_mm256_xor_si256(c2[11674],simde_mm256_xor_si256(c2[9558],simde_mm256_xor_si256(c2[14335],simde_mm256_xor_si256(c2[10114],simde_mm256_xor_si256(c2[5360],simde_mm256_xor_si256(c2[5381],simde_mm256_xor_si256(c2[11718],simde_mm256_xor_si256(c2[15412],simde_mm256_xor_si256(c2[16489],simde_mm256_xor_si256(c2[13848],simde_mm256_xor_si256(c2[5404],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[5953],simde_mm256_xor_si256(c2[9122],simde_mm256_xor_si256(c2[8074],simde_mm256_xor_si256(c2[11262],simde_mm256_xor_si256(c2[11260],simde_mm256_xor_si256(c2[1753],simde_mm256_xor_si256(c2[11286],simde_mm256_xor_si256(c2[5473],simde_mm256_xor_si256(c2[1258],simde_mm256_xor_si256(c2[3918],simde_mm256_xor_si256(c2[15001],simde_mm256_xor_si256(c2[4978],simde_mm256_xor_si256(c2[5525],simde_mm256_xor_si256(c2[6048],simde_mm256_xor_si256(c2[7632],simde_mm256_xor_si256(c2[12419],simde_mm256_xor_si256(c2[9245],simde_mm256_xor_si256(c2[14525],simde_mm256_xor_si256(c2[14552],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[9267],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[8239],simde_mm256_xor_si256(c2[9818],simde_mm256_xor_si256(c2[16686],simde_mm256_xor_si256(c2[1923],simde_mm256_xor_si256(c2[1403],simde_mm256_xor_si256(c2[16178],simde_mm256_xor_si256(c2[891],simde_mm256_xor_si256(c2[7235],simde_mm256_xor_si256(c2[16737],simde_mm256_xor_si256(c2[10955],simde_mm256_xor_si256(c2[6727],simde_mm256_xor_si256(c2[14117],simde_mm256_xor_si256(c2[15203],simde_mm256_xor_si256(c2[7272],simde_mm256_xor_si256(c2[12029],simde_mm256_xor_si256(c2[15219],simde_mm256_xor_si256(c2[14163],simde_mm256_xor_si256(c2[13636],simde_mm256_xor_si256(c2[16297],simde_mm256_xor_si256(c2[11550],simde_mm256_xor_si256(c2[12602],simde_mm256_xor_si256(c2[11046],simde_mm256_xor_si256(c2[12632],simde_mm256_xor_si256(c2[9465],simde_mm256_xor_si256(c2[2623],simde_mm256_xor_si256(c2[14243],c2[8431])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[432]=_mm256_xor_si256(c2[9510],_mm256_xor_si256(c2[13545],_mm256_xor_si256(c2[5121],c2[11520])));
+     d2[432]=simde_mm256_xor_si256(c2[9510],simde_mm256_xor_si256(c2[13545],simde_mm256_xor_si256(c2[5121],c2[11520])));
 
 //row: 37
-     d2[444]=_mm256_xor_si256(c2[3169],_mm256_xor_si256(c2[3697],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[538],_mm256_xor_si256(c2[4224],_mm256_xor_si256(c2[3196],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[7427],_mm256_xor_si256(c2[5838],_mm256_xor_si256(c2[6896],_mm256_xor_si256(c2[2693],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[14833],_mm256_xor_si256(c2[2187],_mm256_xor_si256(c2[2715],_mm256_xor_si256(c2[15389],_mm256_xor_si256(c2[10635],_mm256_xor_si256(c2[10656],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[3792],_mm256_xor_si256(c2[4353],_mm256_xor_si256(c2[4881],_mm256_xor_si256(c2[2240],_mm256_xor_si256(c2[10691],_mm256_xor_si256(c2[5962],_mm256_xor_si256(c2[6490],_mm256_xor_si256(c2[11240],_mm256_xor_si256(c2[14409],_mm256_xor_si256(c2[16537],_mm256_xor_si256(c2[16547],_mm256_xor_si256(c2[7040],_mm256_xor_si256(c2[16561],_mm256_xor_si256(c2[10760],_mm256_xor_si256(c2[6533],_mm256_xor_si256(c2[8665],_mm256_xor_si256(c2[9193],_mm256_xor_si256(c2[3393],_mm256_xor_si256(c2[10253],_mm256_xor_si256(c2[10272],_mm256_xor_si256(c2[10800],_mm256_xor_si256(c2[11335],_mm256_xor_si256(c2[12919],_mm256_xor_si256(c2[271],_mm256_xor_si256(c2[799],_mm256_xor_si256(c2[14520],_mm256_xor_si256(c2[2905],_mm256_xor_si256(c2[2404],_mm256_xor_si256(c2[2932],_mm256_xor_si256(c2[5570],_mm256_xor_si256(c2[14554],_mm256_xor_si256(c2[12986],_mm256_xor_si256(c2[13514],_mm256_xor_si256(c2[15105],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[9818],_mm256_xor_si256(c2[7210],_mm256_xor_si256(c2[6678],_mm256_xor_si256(c2[4570],_mm256_xor_si256(c2[5650],_mm256_xor_si256(c2[6178],_mm256_xor_si256(c2[12510],_mm256_xor_si256(c2[5117],_mm256_xor_si256(c2[15702],_mm256_xor_si256(c2[16230],_mm256_xor_si256(c2[12002],_mm256_xor_si256(c2[2497],_mm256_xor_si256(c2[3583],_mm256_xor_si256(c2[12559],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[3611],_mm256_xor_si256(c2[2555],_mm256_xor_si256(c2[2016],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[4689],_mm256_xor_si256(c2[16825],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[15793],_mm256_xor_si256(c2[16321],_mm256_xor_si256(c2[1012],_mm256_xor_si256(c2[14740],_mm256_xor_si256(c2[7370],_mm256_xor_si256(c2[7898],_mm256_xor_si256(c2[2623],c2[13706])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[444]=simde_mm256_xor_si256(c2[3169],simde_mm256_xor_si256(c2[3697],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[538],simde_mm256_xor_si256(c2[4224],simde_mm256_xor_si256(c2[3196],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[7427],simde_mm256_xor_si256(c2[5838],simde_mm256_xor_si256(c2[6896],simde_mm256_xor_si256(c2[2693],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[14833],simde_mm256_xor_si256(c2[2187],simde_mm256_xor_si256(c2[2715],simde_mm256_xor_si256(c2[15389],simde_mm256_xor_si256(c2[10635],simde_mm256_xor_si256(c2[10656],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[3792],simde_mm256_xor_si256(c2[4353],simde_mm256_xor_si256(c2[4881],simde_mm256_xor_si256(c2[2240],simde_mm256_xor_si256(c2[10691],simde_mm256_xor_si256(c2[5962],simde_mm256_xor_si256(c2[6490],simde_mm256_xor_si256(c2[11240],simde_mm256_xor_si256(c2[14409],simde_mm256_xor_si256(c2[16537],simde_mm256_xor_si256(c2[16547],simde_mm256_xor_si256(c2[7040],simde_mm256_xor_si256(c2[16561],simde_mm256_xor_si256(c2[10760],simde_mm256_xor_si256(c2[6533],simde_mm256_xor_si256(c2[8665],simde_mm256_xor_si256(c2[9193],simde_mm256_xor_si256(c2[3393],simde_mm256_xor_si256(c2[10253],simde_mm256_xor_si256(c2[10272],simde_mm256_xor_si256(c2[10800],simde_mm256_xor_si256(c2[11335],simde_mm256_xor_si256(c2[12919],simde_mm256_xor_si256(c2[271],simde_mm256_xor_si256(c2[799],simde_mm256_xor_si256(c2[14520],simde_mm256_xor_si256(c2[2905],simde_mm256_xor_si256(c2[2404],simde_mm256_xor_si256(c2[2932],simde_mm256_xor_si256(c2[5570],simde_mm256_xor_si256(c2[14554],simde_mm256_xor_si256(c2[12986],simde_mm256_xor_si256(c2[13514],simde_mm256_xor_si256(c2[15105],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[9818],simde_mm256_xor_si256(c2[7210],simde_mm256_xor_si256(c2[6678],simde_mm256_xor_si256(c2[4570],simde_mm256_xor_si256(c2[5650],simde_mm256_xor_si256(c2[6178],simde_mm256_xor_si256(c2[12510],simde_mm256_xor_si256(c2[5117],simde_mm256_xor_si256(c2[15702],simde_mm256_xor_si256(c2[16230],simde_mm256_xor_si256(c2[12002],simde_mm256_xor_si256(c2[2497],simde_mm256_xor_si256(c2[3583],simde_mm256_xor_si256(c2[12559],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[3611],simde_mm256_xor_si256(c2[2555],simde_mm256_xor_si256(c2[2016],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[4689],simde_mm256_xor_si256(c2[16825],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[15793],simde_mm256_xor_si256(c2[16321],simde_mm256_xor_si256(c2[1012],simde_mm256_xor_si256(c2[14740],simde_mm256_xor_si256(c2[7370],simde_mm256_xor_si256(c2[7898],simde_mm256_xor_si256(c2[2623],c2[13706])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[456]=_mm256_xor_si256(c2[13205],_mm256_xor_si256(c2[9197],_mm256_xor_si256(c2[777],c2[11910])));
+     d2[456]=simde_mm256_xor_si256(c2[13205],simde_mm256_xor_si256(c2[9197],simde_mm256_xor_si256(c2[777],c2[11910])));
 
 //row: 39
-     d2[468]=_mm256_xor_si256(c2[1088],_mm256_xor_si256(c2[15386],_mm256_xor_si256(c2[13906],c2[5217])));
+     d2[468]=simde_mm256_xor_si256(c2[1088],simde_mm256_xor_si256(c2[15386],simde_mm256_xor_si256(c2[13906],c2[5217])));
 
 //row: 40
-     d2[480]=_mm256_xor_si256(c2[7925],_mm256_xor_si256(c2[2833],c2[13089]));
+     d2[480]=simde_mm256_xor_si256(c2[7925],simde_mm256_xor_si256(c2[2833],c2[13089]));
 
 //row: 41
-     d2[492]=_mm256_xor_si256(c2[10585],_mm256_xor_si256(c2[13809],_mm256_xor_si256(c2[6028],c2[441])));
+     d2[492]=simde_mm256_xor_si256(c2[10585],simde_mm256_xor_si256(c2[13809],simde_mm256_xor_si256(c2[6028],c2[441])));
 
 //row: 42
-     d2[504]=_mm256_xor_si256(c2[7396],_mm256_xor_si256(c2[3705],_mm256_xor_si256(c2[3697],_mm256_xor_si256(c2[4225],_mm256_xor_si256(c2[7395],_mm256_xor_si256(c2[7923],_mm256_xor_si256(c2[8979],_mm256_xor_si256(c2[7423],_mm256_xor_si256(c2[10586],_mm256_xor_si256(c2[11114],_mm256_xor_si256(c2[9009],_mm256_xor_si256(c2[9537],_mm256_xor_si256(c2[6920],_mm256_xor_si256(c2[3753],_mm256_xor_si256(c2[1109],_mm256_xor_si256(c2[1637],_mm256_xor_si256(c2[6414],_mm256_xor_si256(c2[2193],_mm256_xor_si256(c2[13806],_mm256_xor_si256(c2[14334],_mm256_xor_si256(c2[14355],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[3797],_mm256_xor_si256(c2[6963],_mm256_xor_si256(c2[7491],_mm256_xor_si256(c2[7488],_mm256_xor_si256(c2[8568],_mm256_xor_si256(c2[5939],_mm256_xor_si256(c2[13850],_mm256_xor_si256(c2[14378],_mm256_xor_si256(c2[10177],_mm256_xor_si256(c2[14411],_mm256_xor_si256(c2[14939],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[3341],_mm256_xor_si256(c2[2811],_mm256_xor_si256(c2[3339],_mm256_xor_si256(c2[10211],_mm256_xor_si256(c2[10739],_mm256_xor_si256(c2[3365],_mm256_xor_si256(c2[13931],_mm256_xor_si256(c2[14459],_mm256_xor_si256(c2[9704],_mm256_xor_si256(c2[10232],_mm256_xor_si256(c2[12892],_mm256_xor_si256(c2[7080],_mm256_xor_si256(c2[13424],_mm256_xor_si256(c2[13952],_mm256_xor_si256(c2[14499],_mm256_xor_si256(c2[14506],_mm256_xor_si256(c2[15034],_mm256_xor_si256(c2[16090],_mm256_xor_si256(c2[16618],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[1324],_mm256_xor_si256(c2[6076],_mm256_xor_si256(c2[6604],_mm256_xor_si256(c2[6631],_mm256_xor_si256(c2[9269],_mm256_xor_si256(c2[818],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[1369],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[8237],_mm256_xor_si256(c2[8765],_mm256_xor_si256(c2[10897],_mm256_xor_si256(c2[9849],_mm256_xor_si256(c2[10377],_mm256_xor_si256(c2[7729],_mm256_xor_si256(c2[8257],_mm256_xor_si256(c2[9865],_mm256_xor_si256(c2[16209],_mm256_xor_si256(c2[8288],_mm256_xor_si256(c2[8816],_mm256_xor_si256(c2[3034],_mm256_xor_si256(c2[15701],_mm256_xor_si256(c2[5668],_mm256_xor_si256(c2[6196],_mm256_xor_si256(c2[7282],_mm256_xor_si256(c2[15730],_mm256_xor_si256(c2[16258],_mm256_xor_si256(c2[3580],_mm256_xor_si256(c2[4108],_mm256_xor_si256(c2[7298],_mm256_xor_si256(c2[5714],_mm256_xor_si256(c2[6242],_mm256_xor_si256(c2[5187],_mm256_xor_si256(c2[5715],_mm256_xor_si256(c2[8376],_mm256_xor_si256(c2[3629],_mm256_xor_si256(c2[4153],_mm256_xor_si256(c2[4681],_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[4183],_mm256_xor_si256(c2[4711],_mm256_xor_si256(c2[1016],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[11597],_mm256_xor_si256(c2[6322],_mm256_xor_si256(c2[16877],c2[510]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[504]=simde_mm256_xor_si256(c2[7396],simde_mm256_xor_si256(c2[3705],simde_mm256_xor_si256(c2[3697],simde_mm256_xor_si256(c2[4225],simde_mm256_xor_si256(c2[7395],simde_mm256_xor_si256(c2[7923],simde_mm256_xor_si256(c2[8979],simde_mm256_xor_si256(c2[7423],simde_mm256_xor_si256(c2[10586],simde_mm256_xor_si256(c2[11114],simde_mm256_xor_si256(c2[9009],simde_mm256_xor_si256(c2[9537],simde_mm256_xor_si256(c2[6920],simde_mm256_xor_si256(c2[3753],simde_mm256_xor_si256(c2[1109],simde_mm256_xor_si256(c2[1637],simde_mm256_xor_si256(c2[6414],simde_mm256_xor_si256(c2[2193],simde_mm256_xor_si256(c2[13806],simde_mm256_xor_si256(c2[14334],simde_mm256_xor_si256(c2[14355],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[3797],simde_mm256_xor_si256(c2[6963],simde_mm256_xor_si256(c2[7491],simde_mm256_xor_si256(c2[7488],simde_mm256_xor_si256(c2[8568],simde_mm256_xor_si256(c2[5939],simde_mm256_xor_si256(c2[13850],simde_mm256_xor_si256(c2[14378],simde_mm256_xor_si256(c2[10177],simde_mm256_xor_si256(c2[14411],simde_mm256_xor_si256(c2[14939],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[3341],simde_mm256_xor_si256(c2[2811],simde_mm256_xor_si256(c2[3339],simde_mm256_xor_si256(c2[10211],simde_mm256_xor_si256(c2[10739],simde_mm256_xor_si256(c2[3365],simde_mm256_xor_si256(c2[13931],simde_mm256_xor_si256(c2[14459],simde_mm256_xor_si256(c2[9704],simde_mm256_xor_si256(c2[10232],simde_mm256_xor_si256(c2[12892],simde_mm256_xor_si256(c2[7080],simde_mm256_xor_si256(c2[13424],simde_mm256_xor_si256(c2[13952],simde_mm256_xor_si256(c2[14499],simde_mm256_xor_si256(c2[14506],simde_mm256_xor_si256(c2[15034],simde_mm256_xor_si256(c2[16090],simde_mm256_xor_si256(c2[16618],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[1324],simde_mm256_xor_si256(c2[6076],simde_mm256_xor_si256(c2[6604],simde_mm256_xor_si256(c2[6631],simde_mm256_xor_si256(c2[9269],simde_mm256_xor_si256(c2[818],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[1369],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[8237],simde_mm256_xor_si256(c2[8765],simde_mm256_xor_si256(c2[10897],simde_mm256_xor_si256(c2[9849],simde_mm256_xor_si256(c2[10377],simde_mm256_xor_si256(c2[7729],simde_mm256_xor_si256(c2[8257],simde_mm256_xor_si256(c2[9865],simde_mm256_xor_si256(c2[16209],simde_mm256_xor_si256(c2[8288],simde_mm256_xor_si256(c2[8816],simde_mm256_xor_si256(c2[3034],simde_mm256_xor_si256(c2[15701],simde_mm256_xor_si256(c2[5668],simde_mm256_xor_si256(c2[6196],simde_mm256_xor_si256(c2[7282],simde_mm256_xor_si256(c2[15730],simde_mm256_xor_si256(c2[16258],simde_mm256_xor_si256(c2[3580],simde_mm256_xor_si256(c2[4108],simde_mm256_xor_si256(c2[7298],simde_mm256_xor_si256(c2[5714],simde_mm256_xor_si256(c2[6242],simde_mm256_xor_si256(c2[5187],simde_mm256_xor_si256(c2[5715],simde_mm256_xor_si256(c2[8376],simde_mm256_xor_si256(c2[3629],simde_mm256_xor_si256(c2[4153],simde_mm256_xor_si256(c2[4681],simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[4183],simde_mm256_xor_si256(c2[4711],simde_mm256_xor_si256(c2[1016],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[11597],simde_mm256_xor_si256(c2[6322],simde_mm256_xor_si256(c2[16877],c2[510]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 43
-     d2[516]=_mm256_xor_si256(c2[14786],_mm256_xor_si256(c2[11095],_mm256_xor_si256(c2[11627],_mm256_xor_si256(c2[14785],_mm256_xor_si256(c2[15313],_mm256_xor_si256(c2[14813],_mm256_xor_si256(c2[1609],_mm256_xor_si256(c2[16399],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[9003],_mm256_xor_si256(c2[14310],_mm256_xor_si256(c2[11143],_mm256_xor_si256(c2[9027],_mm256_xor_si256(c2[13804],_mm256_xor_si256(c2[9583],_mm256_xor_si256(c2[4301],_mm256_xor_si256(c2[4829],_mm256_xor_si256(c2[4850],_mm256_xor_si256(c2[11187],_mm256_xor_si256(c2[14353],_mm256_xor_si256(c2[14881],_mm256_xor_si256(c2[15970],_mm256_xor_si256(c2[13329],_mm256_xor_si256(c2[4873],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[5434],_mm256_xor_si256(c2[8075],_mm256_xor_si256(c2[8603],_mm256_xor_si256(c2[10731],_mm256_xor_si256(c2[10729],_mm256_xor_si256(c2[706],_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[10755],_mm256_xor_si256(c2[4954],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[727],_mm256_xor_si256(c2[3387],_mm256_xor_si256(c2[14482],_mm256_xor_si256(c2[4447],_mm256_xor_si256(c2[4994],_mm256_xor_si256(c2[5529],_mm256_xor_si256(c2[6585],_mm256_xor_si256(c2[7113],_mm256_xor_si256(c2[11888],_mm256_xor_si256(c2[8714],_mm256_xor_si256(c2[13466],_mm256_xor_si256(c2[13994],_mm256_xor_si256(c2[14021],_mm256_xor_si256(c2[16659],_mm256_xor_si256(c2[8208],_mm256_xor_si256(c2[8736],_mm256_xor_si256(c2[7708],_mm256_xor_si256(c2[9299],_mm256_xor_si256(c2[15627],_mm256_xor_si256(c2[16155],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[872],_mm256_xor_si256(c2[15131],_mm256_xor_si256(c2[15659],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[6704],_mm256_xor_si256(c2[16206],_mm256_xor_si256(c2[10424],_mm256_xor_si256(c2[6196],_mm256_xor_si256(c2[13058],_mm256_xor_si256(c2[13586],_mm256_xor_si256(c2[2500],_mm256_xor_si256(c2[14672],_mm256_xor_si256(c2[6753],_mm256_xor_si256(c2[10970],_mm256_xor_si256(c2[11498],_mm256_xor_si256(c2[14688],_mm256_xor_si256(c2[13632],_mm256_xor_si256(c2[12577],_mm256_xor_si256(c2[13105],_mm256_xor_si256(c2[9939],_mm256_xor_si256(c2[15778],_mm256_xor_si256(c2[11019],_mm256_xor_si256(c2[12083],_mm256_xor_si256(c2[10515],_mm256_xor_si256(c2[12101],_mm256_xor_si256(c2[8406],_mm256_xor_si256(c2[8934],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[13712],_mm256_xor_si256(c2[7372],c2[7900]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[516]=simde_mm256_xor_si256(c2[14786],simde_mm256_xor_si256(c2[11095],simde_mm256_xor_si256(c2[11627],simde_mm256_xor_si256(c2[14785],simde_mm256_xor_si256(c2[15313],simde_mm256_xor_si256(c2[14813],simde_mm256_xor_si256(c2[1609],simde_mm256_xor_si256(c2[16399],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[9003],simde_mm256_xor_si256(c2[14310],simde_mm256_xor_si256(c2[11143],simde_mm256_xor_si256(c2[9027],simde_mm256_xor_si256(c2[13804],simde_mm256_xor_si256(c2[9583],simde_mm256_xor_si256(c2[4301],simde_mm256_xor_si256(c2[4829],simde_mm256_xor_si256(c2[4850],simde_mm256_xor_si256(c2[11187],simde_mm256_xor_si256(c2[14353],simde_mm256_xor_si256(c2[14881],simde_mm256_xor_si256(c2[15970],simde_mm256_xor_si256(c2[13329],simde_mm256_xor_si256(c2[4873],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[5434],simde_mm256_xor_si256(c2[8075],simde_mm256_xor_si256(c2[8603],simde_mm256_xor_si256(c2[10731],simde_mm256_xor_si256(c2[10729],simde_mm256_xor_si256(c2[706],simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[10755],simde_mm256_xor_si256(c2[4954],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[727],simde_mm256_xor_si256(c2[3387],simde_mm256_xor_si256(c2[14482],simde_mm256_xor_si256(c2[4447],simde_mm256_xor_si256(c2[4994],simde_mm256_xor_si256(c2[5529],simde_mm256_xor_si256(c2[6585],simde_mm256_xor_si256(c2[7113],simde_mm256_xor_si256(c2[11888],simde_mm256_xor_si256(c2[8714],simde_mm256_xor_si256(c2[13466],simde_mm256_xor_si256(c2[13994],simde_mm256_xor_si256(c2[14021],simde_mm256_xor_si256(c2[16659],simde_mm256_xor_si256(c2[8208],simde_mm256_xor_si256(c2[8736],simde_mm256_xor_si256(c2[7708],simde_mm256_xor_si256(c2[9299],simde_mm256_xor_si256(c2[15627],simde_mm256_xor_si256(c2[16155],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[872],simde_mm256_xor_si256(c2[15131],simde_mm256_xor_si256(c2[15659],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[6704],simde_mm256_xor_si256(c2[16206],simde_mm256_xor_si256(c2[10424],simde_mm256_xor_si256(c2[6196],simde_mm256_xor_si256(c2[13058],simde_mm256_xor_si256(c2[13586],simde_mm256_xor_si256(c2[2500],simde_mm256_xor_si256(c2[14672],simde_mm256_xor_si256(c2[6753],simde_mm256_xor_si256(c2[10970],simde_mm256_xor_si256(c2[11498],simde_mm256_xor_si256(c2[14688],simde_mm256_xor_si256(c2[13632],simde_mm256_xor_si256(c2[12577],simde_mm256_xor_si256(c2[13105],simde_mm256_xor_si256(c2[9939],simde_mm256_xor_si256(c2[15778],simde_mm256_xor_si256(c2[11019],simde_mm256_xor_si256(c2[12083],simde_mm256_xor_si256(c2[10515],simde_mm256_xor_si256(c2[12101],simde_mm256_xor_si256(c2[8406],simde_mm256_xor_si256(c2[8934],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[13712],simde_mm256_xor_si256(c2[7372],c2[7900]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 44
-     d2[528]=_mm256_xor_si256(c2[2646],_mm256_xor_si256(c2[15850],_mm256_xor_si256(c2[16370],_mm256_xor_si256(c2[3173],_mm256_xor_si256(c2[8450],_mm256_xor_si256(c2[2673],_mm256_xor_si256(c2[6364],_mm256_xor_si256(c2[4787],_mm256_xor_si256(c2[2170],_mm256_xor_si256(c2[15898],_mm256_xor_si256(c2[13782],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[14338],_mm256_xor_si256(c2[9584],_mm256_xor_si256(c2[9605],_mm256_xor_si256(c2[15942],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[3818],_mm256_xor_si256(c2[1177],_mm256_xor_si256(c2[9628],_mm256_xor_si256(c2[5427],_mm256_xor_si256(c2[10177],_mm256_xor_si256(c2[13346],_mm256_xor_si256(c2[15486],_mm256_xor_si256(c2[15484],_mm256_xor_si256(c2[5977],_mm256_xor_si256(c2[7562],_mm256_xor_si256(c2[15510],_mm256_xor_si256(c2[9697],_mm256_xor_si256(c2[5482],_mm256_xor_si256(c2[8142],_mm256_xor_si256(c2[2330],_mm256_xor_si256(c2[9202],_mm256_xor_si256(c2[1805],_mm256_xor_si256(c2[9749],_mm256_xor_si256(c2[10272],_mm256_xor_si256(c2[11856],_mm256_xor_si256(c2[16643],_mm256_xor_si256(c2[13469],_mm256_xor_si256(c2[1854],_mm256_xor_si256(c2[1881],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[13491],_mm256_xor_si256(c2[12463],_mm256_xor_si256(c2[14042],_mm256_xor_si256(c2[4015],_mm256_xor_si256(c2[6147],_mm256_xor_si256(c2[5627],_mm256_xor_si256(c2[3507],_mm256_xor_si256(c2[5115],_mm256_xor_si256(c2[11459],_mm256_xor_si256(c2[4066],_mm256_xor_si256(c2[15179],_mm256_xor_si256(c2[10951],_mm256_xor_si256(c2[1446],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[11496],_mm256_xor_si256(c2[16253],_mm256_xor_si256(c2[2548],_mm256_xor_si256(c2[1492],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[3626],_mm256_xor_si256(c2[15774],_mm256_xor_si256(c2[16826],_mm256_xor_si256(c2[15270],_mm256_xor_si256(c2[16856],_mm256_xor_si256(c2[13689],_mm256_xor_si256(c2[6847],_mm256_xor_si256(c2[1560],c2[12655])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[528]=simde_mm256_xor_si256(c2[2646],simde_mm256_xor_si256(c2[15850],simde_mm256_xor_si256(c2[16370],simde_mm256_xor_si256(c2[3173],simde_mm256_xor_si256(c2[8450],simde_mm256_xor_si256(c2[2673],simde_mm256_xor_si256(c2[6364],simde_mm256_xor_si256(c2[4787],simde_mm256_xor_si256(c2[2170],simde_mm256_xor_si256(c2[15898],simde_mm256_xor_si256(c2[13782],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[14338],simde_mm256_xor_si256(c2[9584],simde_mm256_xor_si256(c2[9605],simde_mm256_xor_si256(c2[15942],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[3818],simde_mm256_xor_si256(c2[1177],simde_mm256_xor_si256(c2[9628],simde_mm256_xor_si256(c2[5427],simde_mm256_xor_si256(c2[10177],simde_mm256_xor_si256(c2[13346],simde_mm256_xor_si256(c2[15486],simde_mm256_xor_si256(c2[15484],simde_mm256_xor_si256(c2[5977],simde_mm256_xor_si256(c2[7562],simde_mm256_xor_si256(c2[15510],simde_mm256_xor_si256(c2[9697],simde_mm256_xor_si256(c2[5482],simde_mm256_xor_si256(c2[8142],simde_mm256_xor_si256(c2[2330],simde_mm256_xor_si256(c2[9202],simde_mm256_xor_si256(c2[1805],simde_mm256_xor_si256(c2[9749],simde_mm256_xor_si256(c2[10272],simde_mm256_xor_si256(c2[11856],simde_mm256_xor_si256(c2[16643],simde_mm256_xor_si256(c2[13469],simde_mm256_xor_si256(c2[1854],simde_mm256_xor_si256(c2[1881],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[13491],simde_mm256_xor_si256(c2[12463],simde_mm256_xor_si256(c2[14042],simde_mm256_xor_si256(c2[4015],simde_mm256_xor_si256(c2[6147],simde_mm256_xor_si256(c2[5627],simde_mm256_xor_si256(c2[3507],simde_mm256_xor_si256(c2[5115],simde_mm256_xor_si256(c2[11459],simde_mm256_xor_si256(c2[4066],simde_mm256_xor_si256(c2[15179],simde_mm256_xor_si256(c2[10951],simde_mm256_xor_si256(c2[1446],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[11496],simde_mm256_xor_si256(c2[16253],simde_mm256_xor_si256(c2[2548],simde_mm256_xor_si256(c2[1492],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[3626],simde_mm256_xor_si256(c2[15774],simde_mm256_xor_si256(c2[16826],simde_mm256_xor_si256(c2[15270],simde_mm256_xor_si256(c2[16856],simde_mm256_xor_si256(c2[13689],simde_mm256_xor_si256(c2[6847],simde_mm256_xor_si256(c2[1560],c2[12655])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 45
-     d2[540]=_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[11236],c2[8160]));
+     d2[540]=simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[11236],c2[8160]));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc128_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc128_byte.c
index 45ad07040b69762c8deff583ad293520f4a35379..ad1bc53f1f4f8907e951027af9905209e0909f7b 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc128_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc128_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc128_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[1282],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1371],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[898],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[2275],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[131],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[1737],_mm256_xor_si256(c2[2546],_mm256_xor_si256(c2[1267],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[1034],_mm256_xor_si256(c2[2235],c2[2555]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[1282],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1371],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[898],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[2275],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[131],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[1737],simde_mm256_xor_si256(c2[2546],simde_mm256_xor_si256(c2[1267],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[1034],simde_mm256_xor_si256(c2[2235],c2[2555]))))))))))))))))))))))))));
 
 //row: 1
-     d2[4]=_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[1282],_mm256_xor_si256(c2[1691],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1371],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[898],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[2104],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[2275],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[2369],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[131],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[1737],_mm256_xor_si256(c2[2546],_mm256_xor_si256(c2[1267],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[1114],_mm256_xor_si256(c2[1034],_mm256_xor_si256(c2[2235],c2[2555]))))))))))))))))))))))))))))))));
+     d2[4]=simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[1282],simde_mm256_xor_si256(c2[1691],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1371],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[898],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[2104],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[2275],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[2369],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[131],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[1737],simde_mm256_xor_si256(c2[2546],simde_mm256_xor_si256(c2[1267],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[1114],simde_mm256_xor_si256(c2[1034],simde_mm256_xor_si256(c2[2235],c2[2555]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[8]=_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[1282],_mm256_xor_si256(c2[1691],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1371],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[898],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[2104],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[2355],_mm256_xor_si256(c2[2275],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[2369],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[131],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[2296],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[1737],_mm256_xor_si256(c2[67],_mm256_xor_si256(c2[2546],_mm256_xor_si256(c2[1267],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[1114],_mm256_xor_si256(c2[1034],_mm256_xor_si256(c2[2315],_mm256_xor_si256(c2[2235],c2[2555]))))))))))))))))))))))))))))))))))))))));
+     d2[8]=simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[1282],simde_mm256_xor_si256(c2[1691],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1371],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[898],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[2104],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[2355],simde_mm256_xor_si256(c2[2275],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[2369],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[131],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[2296],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[1737],simde_mm256_xor_si256(c2[67],simde_mm256_xor_si256(c2[2546],simde_mm256_xor_si256(c2[1267],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[1114],simde_mm256_xor_si256(c2[1034],simde_mm256_xor_si256(c2[2315],simde_mm256_xor_si256(c2[2235],c2[2555]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[12]=_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[1282],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1371],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[898],_mm256_xor_si256(c2[2097],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[2275],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[683],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[131],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[1817],_mm256_xor_si256(c2[1737],_mm256_xor_si256(c2[2546],_mm256_xor_si256(c2[1267],_mm256_xor_si256(c2[1504],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[1034],_mm256_xor_si256(c2[2235],_mm256_xor_si256(c2[72],c2[2555]))))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[1282],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1371],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[898],simde_mm256_xor_si256(c2[2097],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[2275],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[683],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[131],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[1817],simde_mm256_xor_si256(c2[1737],simde_mm256_xor_si256(c2[2546],simde_mm256_xor_si256(c2[1267],simde_mm256_xor_si256(c2[1504],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[1034],simde_mm256_xor_si256(c2[2235],simde_mm256_xor_si256(c2[72],c2[2555]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[16]=_mm256_xor_si256(c2[1282],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1043],_mm256_xor_si256(c2[1840],_mm256_xor_si256(c2[1521],_mm256_xor_si256(c2[2249],_mm256_xor_si256(c2[2169],_mm256_xor_si256(c2[1929],_mm256_xor_si256(c2[1130],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[1536],_mm256_xor_si256(c2[1456],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[107],_mm256_xor_si256(c2[27],_mm256_xor_si256(c2[987],_mm256_xor_si256(c2[1467],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[2115],_mm256_xor_si256(c2[355],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[1161],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[689],_mm256_xor_si256(c2[1888],_mm256_xor_si256(c2[219],_mm256_xor_si256(c2[2299],_mm256_xor_si256(c2[545],_mm256_xor_si256(c2[1825],_mm256_xor_si256(c2[1986],_mm256_xor_si256(c2[1672],_mm256_xor_si256(c2[1592],_mm256_xor_si256(c2[234],c2[554]))))))))))))))))))))))))))))))))));
+     d2[16]=simde_mm256_xor_si256(c2[1282],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1043],simde_mm256_xor_si256(c2[1840],simde_mm256_xor_si256(c2[1521],simde_mm256_xor_si256(c2[2249],simde_mm256_xor_si256(c2[2169],simde_mm256_xor_si256(c2[1929],simde_mm256_xor_si256(c2[1130],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[1536],simde_mm256_xor_si256(c2[1456],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[107],simde_mm256_xor_si256(c2[27],simde_mm256_xor_si256(c2[987],simde_mm256_xor_si256(c2[1467],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[2115],simde_mm256_xor_si256(c2[355],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[1161],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[689],simde_mm256_xor_si256(c2[1888],simde_mm256_xor_si256(c2[219],simde_mm256_xor_si256(c2[2299],simde_mm256_xor_si256(c2[545],simde_mm256_xor_si256(c2[1825],simde_mm256_xor_si256(c2[1986],simde_mm256_xor_si256(c2[1672],simde_mm256_xor_si256(c2[1592],simde_mm256_xor_si256(c2[234],c2[554]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[20]=_mm256_xor_si256(c2[1283],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[2250],_mm256_xor_si256(c2[2170],_mm256_xor_si256(c2[1930],_mm256_xor_si256(c2[1131],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[275],_mm256_xor_si256(c2[2112],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[283],_mm256_xor_si256(c2[1162],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[690],_mm256_xor_si256(c2[1889],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[2296],_mm256_xor_si256(c2[2536],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[1987],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[1593],_mm256_xor_si256(c2[235],c2[555]))))))))))))))))))))))))))))))))))));
+     d2[20]=simde_mm256_xor_si256(c2[1283],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[2250],simde_mm256_xor_si256(c2[2170],simde_mm256_xor_si256(c2[1930],simde_mm256_xor_si256(c2[1131],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[275],simde_mm256_xor_si256(c2[2112],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[283],simde_mm256_xor_si256(c2[1162],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[690],simde_mm256_xor_si256(c2[1889],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[2296],simde_mm256_xor_si256(c2[2536],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[1987],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[1593],simde_mm256_xor_si256(c2[235],c2[555]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[24]=_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[1123],_mm256_xor_si256(c2[2160],_mm256_xor_si256(c2[1528],_mm256_xor_si256(c2[1448],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[819],_mm256_xor_si256(c2[739],_mm256_xor_si256(c2[1858],_mm256_xor_si256(c2[1945],_mm256_xor_si256(c2[1865],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[2112],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[2193],_mm256_xor_si256(c2[2120],_mm256_xor_si256(c2[440],_mm256_xor_si256(c2[363],_mm256_xor_si256(c2[2210],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[2531],_mm256_xor_si256(c2[1171],_mm256_xor_si256(c2[2057],_mm256_xor_si256(c2[1578],_mm256_xor_si256(c2[1097],_mm256_xor_si256(c2[2387],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[1265],_mm256_xor_si256(c2[955],_mm256_xor_si256(c2[875],_mm256_xor_si256(c2[2072],_mm256_xor_si256(c2[2392],c2[2312]))))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[1123],simde_mm256_xor_si256(c2[2160],simde_mm256_xor_si256(c2[1528],simde_mm256_xor_si256(c2[1448],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[819],simde_mm256_xor_si256(c2[739],simde_mm256_xor_si256(c2[1858],simde_mm256_xor_si256(c2[1945],simde_mm256_xor_si256(c2[1865],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[2112],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[2193],simde_mm256_xor_si256(c2[2120],simde_mm256_xor_si256(c2[440],simde_mm256_xor_si256(c2[363],simde_mm256_xor_si256(c2[2210],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[2531],simde_mm256_xor_si256(c2[1171],simde_mm256_xor_si256(c2[2057],simde_mm256_xor_si256(c2[1578],simde_mm256_xor_si256(c2[1097],simde_mm256_xor_si256(c2[2387],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[1265],simde_mm256_xor_si256(c2[955],simde_mm256_xor_si256(c2[875],simde_mm256_xor_si256(c2[2072],simde_mm256_xor_si256(c2[2392],c2[2312]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[28]=_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[2243],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[2080],_mm256_xor_si256(c2[1522],_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[1931],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[651],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[411],_mm256_xor_si256(c2[2251],_mm256_xor_si256(c2[808],_mm256_xor_si256(c2[2171],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[1138],_mm256_xor_si256(c2[2497],_mm256_xor_si256(c2[1137],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[1057],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[2264],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[1145],_mm256_xor_si256(c2[2504],_mm256_xor_si256(c2[2515],_mm256_xor_si256(c2[1315],_mm256_xor_si256(c2[1793],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[1472],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[1323],_mm256_xor_si256(c2[2282],_mm256_xor_si256(c2[843],_mm256_xor_si256(c2[2202],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[2529],_mm256_xor_si256(c2[1329],_mm256_xor_si256(c2[371],_mm256_xor_si256(c2[1730],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[370],_mm256_xor_si256(c2[2456],_mm256_xor_si256(c2[1256],_mm256_xor_si256(c2[857],_mm256_xor_si256(c2[1977],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[1016],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[307],_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[1354],_mm256_xor_si256(c2[1274],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[2475],_mm256_xor_si256(c2[1275],_mm256_xor_si256(c2[1675],_mm256_xor_si256(c2[232],c2[1595]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[28]=simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[2243],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[2080],simde_mm256_xor_si256(c2[1522],simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[1931],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[651],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[411],simde_mm256_xor_si256(c2[2251],simde_mm256_xor_si256(c2[808],simde_mm256_xor_si256(c2[2171],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[1138],simde_mm256_xor_si256(c2[2497],simde_mm256_xor_si256(c2[1137],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[1057],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[2264],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[1145],simde_mm256_xor_si256(c2[2504],simde_mm256_xor_si256(c2[2515],simde_mm256_xor_si256(c2[1315],simde_mm256_xor_si256(c2[1793],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[1472],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[1323],simde_mm256_xor_si256(c2[2282],simde_mm256_xor_si256(c2[843],simde_mm256_xor_si256(c2[2202],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[2529],simde_mm256_xor_si256(c2[1329],simde_mm256_xor_si256(c2[371],simde_mm256_xor_si256(c2[1730],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[370],simde_mm256_xor_si256(c2[2456],simde_mm256_xor_si256(c2[1256],simde_mm256_xor_si256(c2[857],simde_mm256_xor_si256(c2[1977],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[1016],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[307],simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[1354],simde_mm256_xor_si256(c2[1274],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[2475],simde_mm256_xor_si256(c2[1275],simde_mm256_xor_si256(c2[1675],simde_mm256_xor_si256(c2[232],c2[1595]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[32]=_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[1123],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[1761],_mm256_xor_si256(c2[1120],_mm256_xor_si256(c2[2170],_mm256_xor_si256(c2[2090],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[1051],_mm256_xor_si256(c2[2410],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[1377],_mm256_xor_si256(c2[2496],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[2507],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[1384],_mm256_xor_si256(c2[275],_mm256_xor_si256(c2[195],_mm256_xor_si256(c2[2032],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[283],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[1082],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[209],_mm256_xor_si256(c2[690],_mm256_xor_si256(c2[610],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[466],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[1907],_mm256_xor_si256(c2[1593],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[235],_mm256_xor_si256(c2[155],c2[475]))))))))))))))))))))))))))))))))))))))))));
+     d2[32]=simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[1123],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[1761],simde_mm256_xor_si256(c2[1120],simde_mm256_xor_si256(c2[2170],simde_mm256_xor_si256(c2[2090],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[1051],simde_mm256_xor_si256(c2[2410],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[1377],simde_mm256_xor_si256(c2[2496],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[2507],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[1384],simde_mm256_xor_si256(c2[275],simde_mm256_xor_si256(c2[195],simde_mm256_xor_si256(c2[2032],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[283],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[1082],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[209],simde_mm256_xor_si256(c2[690],simde_mm256_xor_si256(c2[610],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[466],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[1907],simde_mm256_xor_si256(c2[1593],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[235],simde_mm256_xor_si256(c2[155],c2[475]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[36]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[402],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[243],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[562],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[1369],_mm256_xor_si256(c2[891],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[651],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[2411],_mm256_xor_si256(c2[890],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[1779],_mm256_xor_si256(c2[1297],_mm256_xor_si256(c2[1384],_mm256_xor_si256(c2[1786],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[187],_mm256_xor_si256(c2[2264],_mm256_xor_si256(c2[667],_mm256_xor_si256(c2[185],_mm256_xor_si256(c2[2033],_mm256_xor_si256(c2[1555],_mm256_xor_si256(c2[1315],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[2114],_mm256_xor_si256(c2[1632],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[1563],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[2442],_mm256_xor_si256(c2[1649],_mm256_xor_si256(c2[2051],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[2448],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[1088],_mm256_xor_si256(c2[610],_mm256_xor_si256(c2[1978],_mm256_xor_si256(c2[1496],_mm256_xor_si256(c2[1499],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[2304],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[1025],_mm256_xor_si256(c2[547],_mm256_xor_si256(c2[1186],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[1106],_mm256_xor_si256(c2[394],_mm256_xor_si256(c2[792],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[2313],c2[1835])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[36]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[402],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[243],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[562],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[1369],simde_mm256_xor_si256(c2[891],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[651],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[2411],simde_mm256_xor_si256(c2[890],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[1779],simde_mm256_xor_si256(c2[1297],simde_mm256_xor_si256(c2[1384],simde_mm256_xor_si256(c2[1786],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[187],simde_mm256_xor_si256(c2[2264],simde_mm256_xor_si256(c2[667],simde_mm256_xor_si256(c2[185],simde_mm256_xor_si256(c2[2033],simde_mm256_xor_si256(c2[1555],simde_mm256_xor_si256(c2[1315],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[2114],simde_mm256_xor_si256(c2[1632],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[1563],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[2442],simde_mm256_xor_si256(c2[1649],simde_mm256_xor_si256(c2[2051],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[2448],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[1088],simde_mm256_xor_si256(c2[610],simde_mm256_xor_si256(c2[1978],simde_mm256_xor_si256(c2[1496],simde_mm256_xor_si256(c2[1499],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[2304],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[1025],simde_mm256_xor_si256(c2[547],simde_mm256_xor_si256(c2[1186],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[1106],simde_mm256_xor_si256(c2[394],simde_mm256_xor_si256(c2[792],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[2313],c2[1835])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[40]=_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[2009],_mm256_xor_si256(c2[48],c2[1739])));
+     d2[40]=simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[2009],simde_mm256_xor_si256(c2[48],c2[1739])));
 
 //row: 11
-     d2[44]=_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[969],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[2489],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[1379],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[2346],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[915],_mm256_xor_si256(c2[1794],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[1651],_mm256_xor_si256(c2[2048],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[1578],_mm256_xor_si256(c2[1179],_mm256_xor_si256(c2[1099],_mm256_xor_si256(c2[1019],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[786],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[1593],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[1913],c2[1514])))))))))))))))))))))))))))))))))))));
+     d2[44]=simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[969],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[2489],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[1379],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[2346],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[915],simde_mm256_xor_si256(c2[1794],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[1651],simde_mm256_xor_si256(c2[2048],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[1578],simde_mm256_xor_si256(c2[1179],simde_mm256_xor_si256(c2[1099],simde_mm256_xor_si256(c2[1019],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[786],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[1593],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[1913],c2[1514])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[48]=_mm256_xor_si256(c2[1840],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[2491],_mm256_xor_si256(c2[1688],_mm256_xor_si256(c2[2489],_mm256_xor_si256(c2[2098],_mm256_xor_si256(c2[2018],_mm256_xor_si256(c2[578],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[1545],_mm256_xor_si256(c2[2025],_mm256_xor_si256(c2[1227],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[840],_mm256_xor_si256(c2[1723],_mm256_xor_si256(c2[930],_mm256_xor_si256(c2[850],_mm256_xor_si256(c2[1251],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[298],_mm256_xor_si256(c2[1107],_mm256_xor_si256(c2[2387],_mm256_xor_si256(c2[2544],_mm256_xor_si256(c2[2234],_mm256_xor_si256(c2[2154],_mm256_xor_si256(c2[792],c2[1112]))))))))))))))))))))))))))))))))));
+     d2[48]=simde_mm256_xor_si256(c2[1840],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[2491],simde_mm256_xor_si256(c2[1688],simde_mm256_xor_si256(c2[2489],simde_mm256_xor_si256(c2[2098],simde_mm256_xor_si256(c2[2018],simde_mm256_xor_si256(c2[578],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[1545],simde_mm256_xor_si256(c2[2025],simde_mm256_xor_si256(c2[1227],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[840],simde_mm256_xor_si256(c2[1723],simde_mm256_xor_si256(c2[930],simde_mm256_xor_si256(c2[850],simde_mm256_xor_si256(c2[1251],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[298],simde_mm256_xor_si256(c2[1107],simde_mm256_xor_si256(c2[2387],simde_mm256_xor_si256(c2[2544],simde_mm256_xor_si256(c2[2234],simde_mm256_xor_si256(c2[2154],simde_mm256_xor_si256(c2[792],c2[1112]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[52]=_mm256_xor_si256(c2[483],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[1121],_mm256_xor_si256(c2[1522],_mm256_xor_si256(c2[1450],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[411],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[1936],_mm256_xor_si256(c2[1856],_mm256_xor_si256(c2[1867],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[744],_mm256_xor_si256(c2[2114],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[2275],_mm256_xor_si256(c2[2195],_mm256_xor_si256(c2[2122],_mm256_xor_si256(c2[522],_mm256_xor_si256(c2[442],_mm256_xor_si256(c2[2128],_mm256_xor_si256(c2[2529],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[1169],_mm256_xor_si256(c2[2059],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[1576],_mm256_xor_si256(c2[2385],_mm256_xor_si256(c2[1106],_mm256_xor_si256(c2[1347],_mm256_xor_si256(c2[1267],_mm256_xor_si256(c2[545],_mm256_xor_si256(c2[873],_mm256_xor_si256(c2[2074],_mm256_xor_si256(c2[2474],c2[2394])))))))))))))))))))))))))))))))))))));
+     d2[52]=simde_mm256_xor_si256(c2[483],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[1121],simde_mm256_xor_si256(c2[1522],simde_mm256_xor_si256(c2[1450],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[411],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[1936],simde_mm256_xor_si256(c2[1856],simde_mm256_xor_si256(c2[1867],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[744],simde_mm256_xor_si256(c2[2114],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[2275],simde_mm256_xor_si256(c2[2195],simde_mm256_xor_si256(c2[2122],simde_mm256_xor_si256(c2[522],simde_mm256_xor_si256(c2[442],simde_mm256_xor_si256(c2[2128],simde_mm256_xor_si256(c2[2529],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[1169],simde_mm256_xor_si256(c2[2059],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[1576],simde_mm256_xor_si256(c2[2385],simde_mm256_xor_si256(c2[1106],simde_mm256_xor_si256(c2[1347],simde_mm256_xor_si256(c2[1267],simde_mm256_xor_si256(c2[545],simde_mm256_xor_si256(c2[873],simde_mm256_xor_si256(c2[2074],simde_mm256_xor_si256(c2[2474],c2[2394])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[56]=_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[1283],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[1120],_mm256_xor_si256(c2[1522],_mm256_xor_si256(c2[1921],_mm256_xor_si256(c2[1931],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[2250],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[2010],_mm256_xor_si256(c2[1291],_mm256_xor_si256(c2[808],_mm256_xor_si256(c2[1211],_mm256_xor_si256(c2[1531],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[1138],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[2264],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[1145],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[2515],_mm256_xor_si256(c2[355],_mm256_xor_si256(c2[1793],_mm256_xor_si256(c2[2192],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[432],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[363],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[843],_mm256_xor_si256(c2[1242],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[2529],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[371],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[2049],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[1969],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[2456],_mm256_xor_si256(c2[296],_mm256_xor_si256(c2[2456],_mm256_xor_si256(c2[1977],_mm256_xor_si256(c2[2376],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[626],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[1906],_mm256_xor_si256(c2[2147],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[2067],_mm256_xor_si256(c2[1354],_mm256_xor_si256(c2[1274],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[2475],_mm256_xor_si256(c2[315],_mm256_xor_si256(c2[715],_mm256_xor_si256(c2[232],c2[635])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[56]=simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[1283],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[1120],simde_mm256_xor_si256(c2[1522],simde_mm256_xor_si256(c2[1921],simde_mm256_xor_si256(c2[1931],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[2250],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[2010],simde_mm256_xor_si256(c2[1291],simde_mm256_xor_si256(c2[808],simde_mm256_xor_si256(c2[1211],simde_mm256_xor_si256(c2[1531],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[1138],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[2264],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[1145],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[2515],simde_mm256_xor_si256(c2[355],simde_mm256_xor_si256(c2[1793],simde_mm256_xor_si256(c2[2192],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[432],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[363],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[843],simde_mm256_xor_si256(c2[1242],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[2529],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[371],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[2049],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[1969],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[2456],simde_mm256_xor_si256(c2[296],simde_mm256_xor_si256(c2[2456],simde_mm256_xor_si256(c2[1977],simde_mm256_xor_si256(c2[2376],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[626],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[1906],simde_mm256_xor_si256(c2[2147],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[2067],simde_mm256_xor_si256(c2[1354],simde_mm256_xor_si256(c2[1274],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[2475],simde_mm256_xor_si256(c2[315],simde_mm256_xor_si256(c2[715],simde_mm256_xor_si256(c2[232],c2[635])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[60]=_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[2322],_mm256_xor_si256(c2[1682],_mm256_xor_si256(c2[2163],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[1521],_mm256_xor_si256(c2[810],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[730],_mm256_xor_si256(c2[9],_mm256_xor_si256(c2[490],_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[2250],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[2099],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[659],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[1227],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[1147],_mm256_xor_si256(c2[1626],_mm256_xor_si256(c2[2107],_mm256_xor_si256(c2[2106],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[195],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[1402],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[2281],_mm256_xor_si256(c2[1488],_mm256_xor_si256(c2[931],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[2531],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[858],_mm256_xor_si256(c2[1339],_mm256_xor_si256(c2[379],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[1184],_mm256_xor_si256(c2[1665],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[547],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[2235],_mm256_xor_si256(c2[153],_mm256_xor_si256(c2[873],_mm256_xor_si256(c2[1354],_mm256_xor_si256(c2[1193],c2[1674]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[60]=simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[2322],simde_mm256_xor_si256(c2[1682],simde_mm256_xor_si256(c2[2163],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[1521],simde_mm256_xor_si256(c2[810],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[730],simde_mm256_xor_si256(c2[9],simde_mm256_xor_si256(c2[490],simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[2250],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[2099],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[659],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[1227],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[1147],simde_mm256_xor_si256(c2[1626],simde_mm256_xor_si256(c2[2107],simde_mm256_xor_si256(c2[2106],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[195],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[1402],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[2281],simde_mm256_xor_si256(c2[1488],simde_mm256_xor_si256(c2[931],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[2531],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[858],simde_mm256_xor_si256(c2[1339],simde_mm256_xor_si256(c2[379],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[1184],simde_mm256_xor_si256(c2[1665],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[547],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[2235],simde_mm256_xor_si256(c2[153],simde_mm256_xor_si256(c2[873],simde_mm256_xor_si256(c2[1354],simde_mm256_xor_si256(c2[1193],c2[1674]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[64]=_mm256_xor_si256(c2[1360],_mm256_xor_si256(c2[2163],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[2083],_mm256_xor_si256(c2[2000],_mm256_xor_si256(c2[1121],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[1922],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[2331],_mm256_xor_si256(c2[571],_mm256_xor_si256(c2[2251],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[2011],_mm256_xor_si256(c2[251],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[2011],_mm256_xor_si256(c2[890],_mm256_xor_si256(c2[1618],_mm256_xor_si256(c2[2417],_mm256_xor_si256(c2[1538],_mm256_xor_si256(c2[2337],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[897],_mm256_xor_si256(c2[185],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[105],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[1944],_mm256_xor_si256(c2[1065],_mm256_xor_si256(c2[1864],_mm256_xor_si256(c2[1545],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[1155],_mm256_xor_si256(c2[2193],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[1243],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[1243],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[370],_mm256_xor_si256(c2[1169],_mm256_xor_si256(c2[1650],_mm256_xor_si256(c2[771],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[297],_mm256_xor_si256(c2[1096],_mm256_xor_si256(c2[2377],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[1506],_mm256_xor_si256(c2[627],_mm256_xor_si256(c2[1426],_mm256_xor_si256(c2[1907],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[2064],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[2553],_mm256_xor_si256(c2[1674],_mm256_xor_si256(c2[2473],_mm256_xor_si256(c2[1195],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[1115],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[1435],c2[1192])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[64]=simde_mm256_xor_si256(c2[1360],simde_mm256_xor_si256(c2[2163],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[2083],simde_mm256_xor_si256(c2[2000],simde_mm256_xor_si256(c2[1121],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[1922],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[2331],simde_mm256_xor_si256(c2[571],simde_mm256_xor_si256(c2[2251],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[2011],simde_mm256_xor_si256(c2[251],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[2011],simde_mm256_xor_si256(c2[890],simde_mm256_xor_si256(c2[1618],simde_mm256_xor_si256(c2[2417],simde_mm256_xor_si256(c2[1538],simde_mm256_xor_si256(c2[2337],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[897],simde_mm256_xor_si256(c2[185],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[105],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[1944],simde_mm256_xor_si256(c2[1065],simde_mm256_xor_si256(c2[1864],simde_mm256_xor_si256(c2[1545],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[1155],simde_mm256_xor_si256(c2[2193],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[1243],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[1243],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[370],simde_mm256_xor_si256(c2[1169],simde_mm256_xor_si256(c2[1650],simde_mm256_xor_si256(c2[771],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[297],simde_mm256_xor_si256(c2[1096],simde_mm256_xor_si256(c2[2377],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[1506],simde_mm256_xor_si256(c2[627],simde_mm256_xor_si256(c2[1426],simde_mm256_xor_si256(c2[1907],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[2064],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[2553],simde_mm256_xor_si256(c2[1674],simde_mm256_xor_si256(c2[2473],simde_mm256_xor_si256(c2[1195],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[1115],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[1435],c2[1192])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[68]=_mm256_xor_si256(c2[2163],_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[2083],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[483],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[571],_mm256_xor_si256(c2[1689],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[1609],_mm256_xor_si256(c2[251],_mm256_xor_si256(c2[1369],_mm256_xor_si256(c2[2011],_mm256_xor_si256(c2[570],_mm256_xor_si256(c2[2411],_mm256_xor_si256(c2[2417],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[2337],_mm256_xor_si256(c2[896],_mm256_xor_si256(c2[897],_mm256_xor_si256(c2[2019],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[2106],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[2026],_mm256_xor_si256(c2[507],_mm256_xor_si256(c2[1864],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[907],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[1155],_mm256_xor_si256(c2[2273],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[1555],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[2354],_mm256_xor_si256(c2[2361],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[2281],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[2371],_mm256_xor_si256(c2[1169],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[209],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[129],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[2298],_mm256_xor_si256(c2[1096],_mm256_xor_si256(c2[2218],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[1739],_mm256_xor_si256(c2[65],_mm256_xor_si256(c2[1426],_mm256_xor_si256(c2[2544],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[1265],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[1426],_mm256_xor_si256(c2[2553],_mm256_xor_si256(c2[1112],_mm256_xor_si256(c2[2473],_mm256_xor_si256(c2[1032],_mm256_xor_si256(c2[2313],_mm256_xor_si256(c2[1115],_mm256_xor_si256(c2[2233],_mm256_xor_si256(c2[1435],c2[2553])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[68]=simde_mm256_xor_si256(c2[2163],simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[2083],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[483],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[571],simde_mm256_xor_si256(c2[1689],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[1609],simde_mm256_xor_si256(c2[251],simde_mm256_xor_si256(c2[1369],simde_mm256_xor_si256(c2[2011],simde_mm256_xor_si256(c2[570],simde_mm256_xor_si256(c2[2411],simde_mm256_xor_si256(c2[2417],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[2337],simde_mm256_xor_si256(c2[896],simde_mm256_xor_si256(c2[897],simde_mm256_xor_si256(c2[2019],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[2106],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[2026],simde_mm256_xor_si256(c2[507],simde_mm256_xor_si256(c2[1864],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[907],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[1155],simde_mm256_xor_si256(c2[2273],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[1555],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[2354],simde_mm256_xor_si256(c2[2361],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[2281],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[2371],simde_mm256_xor_si256(c2[1169],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[209],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[129],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[2298],simde_mm256_xor_si256(c2[1096],simde_mm256_xor_si256(c2[2218],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[1739],simde_mm256_xor_si256(c2[65],simde_mm256_xor_si256(c2[1426],simde_mm256_xor_si256(c2[2544],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[1265],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[1426],simde_mm256_xor_si256(c2[2553],simde_mm256_xor_si256(c2[1112],simde_mm256_xor_si256(c2[2473],simde_mm256_xor_si256(c2[1032],simde_mm256_xor_si256(c2[2313],simde_mm256_xor_si256(c2[1115],simde_mm256_xor_si256(c2[2233],simde_mm256_xor_si256(c2[1435],c2[2553])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[72]=_mm256_xor_si256(c2[2242],_mm256_xor_si256(c2[210],c2[1497]));
+     d2[72]=simde_mm256_xor_si256(c2[2242],simde_mm256_xor_si256(c2[210],c2[1497]));
 
 //row: 19
-     d2[76]=_mm256_xor_si256(c2[82],_mm256_xor_si256(c2[2482],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[1842],_mm256_xor_si256(c2[1049],_mm256_xor_si256(c2[809],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[1608],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[1466],_mm256_xor_si256(c2[2426],_mm256_xor_si256(c2[347],_mm256_xor_si256(c2[1713],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[1794],_mm256_xor_si256(c2[1721],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[1731],_mm256_xor_si256(c2[2128],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[1658],_mm256_xor_si256(c2[1179],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[472],_mm256_xor_si256(c2[1673],c2[1993]))))))))))))))))))))))))))));
+     d2[76]=simde_mm256_xor_si256(c2[82],simde_mm256_xor_si256(c2[2482],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[1842],simde_mm256_xor_si256(c2[1049],simde_mm256_xor_si256(c2[809],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[1608],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[1466],simde_mm256_xor_si256(c2[2426],simde_mm256_xor_si256(c2[347],simde_mm256_xor_si256(c2[1713],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[1794],simde_mm256_xor_si256(c2[1721],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[1731],simde_mm256_xor_si256(c2[2128],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[1658],simde_mm256_xor_si256(c2[1179],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[472],simde_mm256_xor_si256(c2[1673],c2[1993]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[80]=_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[1043],_mm256_xor_si256(c2[1448],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[1128],_mm256_xor_si256(c2[329],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[739],_mm256_xor_si256(c2[659],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[1865],_mm256_xor_si256(c2[1785],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[2032],_mm256_xor_si256(c2[1314],_mm256_xor_si256(c2[2113],_mm256_xor_si256(c2[755],_mm256_xor_si256(c2[2040],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[2451],_mm256_xor_si256(c2[1091],_mm256_xor_si256(c2[1977],_mm256_xor_si256(c2[1498],_mm256_xor_si256(c2[2307],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[1185],_mm256_xor_si256(c2[875],_mm256_xor_si256(c2[795],_mm256_xor_si256(c2[1992],c2[2312]))))))))))))))))))))))))))))))))));
+     d2[80]=simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[1043],simde_mm256_xor_si256(c2[1448],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[1128],simde_mm256_xor_si256(c2[329],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[739],simde_mm256_xor_si256(c2[659],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[1865],simde_mm256_xor_si256(c2[1785],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[2032],simde_mm256_xor_si256(c2[1314],simde_mm256_xor_si256(c2[2113],simde_mm256_xor_si256(c2[755],simde_mm256_xor_si256(c2[2040],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[2451],simde_mm256_xor_si256(c2[1091],simde_mm256_xor_si256(c2[1977],simde_mm256_xor_si256(c2[1498],simde_mm256_xor_si256(c2[2307],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[1185],simde_mm256_xor_si256(c2[875],simde_mm256_xor_si256(c2[795],simde_mm256_xor_si256(c2[1992],c2[2312]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[84]=_mm256_xor_si256(c2[2082],_mm256_xor_si256(c2[1923],_mm256_xor_si256(c2[161],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[490],_mm256_xor_si256(c2[250],_mm256_xor_si256(c2[2090],_mm256_xor_si256(c2[2010],_mm256_xor_si256(c2[2336],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[896],_mm256_xor_si256(c2[907],_mm256_xor_si256(c2[1867],_mm256_xor_si256(c2[2347],_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[432],_mm256_xor_si256(c2[1315],_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[1162],_mm256_xor_si256(c2[2121],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[1168],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[209],_mm256_xor_si256(c2[1099],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[1425],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[387],_mm256_xor_si256(c2[307],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[2472],_mm256_xor_si256(c2[1114],_mm256_xor_si256(c2[1514],c2[1434]))))))))))))))))))))))))))))))))))));
+     d2[84]=simde_mm256_xor_si256(c2[2082],simde_mm256_xor_si256(c2[1923],simde_mm256_xor_si256(c2[161],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[490],simde_mm256_xor_si256(c2[250],simde_mm256_xor_si256(c2[2090],simde_mm256_xor_si256(c2[2010],simde_mm256_xor_si256(c2[2336],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[896],simde_mm256_xor_si256(c2[907],simde_mm256_xor_si256(c2[1867],simde_mm256_xor_si256(c2[2347],simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[432],simde_mm256_xor_si256(c2[1315],simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[1162],simde_mm256_xor_si256(c2[2121],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[1168],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[209],simde_mm256_xor_si256(c2[1099],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[1425],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[387],simde_mm256_xor_si256(c2[307],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[2472],simde_mm256_xor_si256(c2[1114],simde_mm256_xor_si256(c2[1514],c2[1434]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[88]=_mm256_xor_si256(c2[2410],c2[2497]);
+     d2[88]=simde_mm256_xor_si256(c2[2410],c2[2497]);
 
 //row: 23
-     d2[92]=_mm256_xor_si256(c2[1840],_mm256_xor_si256(c2[907],c2[1163]));
+     d2[92]=simde_mm256_xor_si256(c2[1840],simde_mm256_xor_si256(c2[907],c2[1163]));
 
 //row: 24
-     d2[96]=_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[896],c2[712]));
+     d2[96]=simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[896],c2[712]));
 
 //row: 25
-     d2[100]=_mm256_xor_si256(c2[323],c2[2280]);
+     d2[100]=simde_mm256_xor_si256(c2[323],c2[2280]);
 
 //row: 26
-     d2[104]=_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[641],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[562],_mm256_xor_si256(c2[482],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[1283],_mm256_xor_si256(c2[802],_mm256_xor_si256(c2[1688],_mm256_xor_si256(c2[1608],_mm256_xor_si256(c2[1131],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[891],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[569],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[979],_mm256_xor_si256(c2[899],_mm256_xor_si256(c2[418],_mm256_xor_si256(c2[1617],_mm256_xor_si256(c2[2018],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[2336],_mm256_xor_si256(c2[2105],_mm256_xor_si256(c2[2025],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[426],_mm256_xor_si256(c2[2504],_mm256_xor_si256(c2[906],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[2272],_mm256_xor_si256(c2[1795],_mm256_xor_si256(c2[1554],_mm256_xor_si256(c2[1073],_mm256_xor_si256(c2[1952],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[2280],_mm256_xor_si256(c2[1803],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[123],_mm256_xor_si256(c2[2370],_mm256_xor_si256(c2[2290],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[2210],_mm256_xor_si256(c2[930],_mm256_xor_si256(c2[1331],_mm256_xor_si256(c2[850],_mm256_xor_si256(c2[2297],_mm256_xor_si256(c2[2217],_mm256_xor_si256(c2[1736],_mm256_xor_si256(c2[1337],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[1257],_mm256_xor_si256(c2[1256],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[2547],_mm256_xor_si256(c2[2066],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[787],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[1425],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[1115],_mm256_xor_si256(c2[1035],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[2312],_mm256_xor_si256(c2[2232],_mm256_xor_si256(c2[1755],_mm256_xor_si256(c2[2155],_mm256_xor_si256(c2[2552],c2[2075])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[104]=simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[641],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[562],simde_mm256_xor_si256(c2[482],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[1283],simde_mm256_xor_si256(c2[802],simde_mm256_xor_si256(c2[1688],simde_mm256_xor_si256(c2[1608],simde_mm256_xor_si256(c2[1131],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[891],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[569],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[979],simde_mm256_xor_si256(c2[899],simde_mm256_xor_si256(c2[418],simde_mm256_xor_si256(c2[1617],simde_mm256_xor_si256(c2[2018],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[2336],simde_mm256_xor_si256(c2[2105],simde_mm256_xor_si256(c2[2025],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[426],simde_mm256_xor_si256(c2[2504],simde_mm256_xor_si256(c2[906],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[2272],simde_mm256_xor_si256(c2[1795],simde_mm256_xor_si256(c2[1554],simde_mm256_xor_si256(c2[1073],simde_mm256_xor_si256(c2[1952],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[2280],simde_mm256_xor_si256(c2[1803],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[123],simde_mm256_xor_si256(c2[2370],simde_mm256_xor_si256(c2[2290],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[2210],simde_mm256_xor_si256(c2[930],simde_mm256_xor_si256(c2[1331],simde_mm256_xor_si256(c2[850],simde_mm256_xor_si256(c2[2297],simde_mm256_xor_si256(c2[2217],simde_mm256_xor_si256(c2[1736],simde_mm256_xor_si256(c2[1337],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[1257],simde_mm256_xor_si256(c2[1256],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[2547],simde_mm256_xor_si256(c2[2066],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[787],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[1425],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[1115],simde_mm256_xor_si256(c2[1035],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[2312],simde_mm256_xor_si256(c2[2232],simde_mm256_xor_si256(c2[1755],simde_mm256_xor_si256(c2[2155],simde_mm256_xor_si256(c2[2552],c2[2075])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[108]=_mm256_xor_si256(c2[640],c2[1888]);
+     d2[108]=simde_mm256_xor_si256(c2[640],c2[1888]);
 
 //row: 28
-     d2[112]=_mm256_xor_si256(c2[171],_mm256_xor_si256(c2[419],c2[600]));
+     d2[112]=simde_mm256_xor_si256(c2[171],simde_mm256_xor_si256(c2[419],c2[600]));
 
 //row: 29
-     d2[116]=_mm256_xor_si256(c2[1440],c2[2272]);
+     d2[116]=simde_mm256_xor_si256(c2[1440],c2[2272]);
 
 //row: 30
-     d2[120]=_mm256_xor_si256(c2[578],_mm256_xor_si256(c2[1323],_mm256_xor_si256(c2[776],c2[1674])));
+     d2[120]=simde_mm256_xor_si256(c2[578],simde_mm256_xor_si256(c2[1323],simde_mm256_xor_si256(c2[776],c2[1674])));
 
 //row: 31
-     d2[124]=_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[1362],_mm256_xor_si256(c2[1691],_mm256_xor_si256(c2[1451],_mm256_xor_si256(c2[728],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[811],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[2177],_mm256_xor_si256(c2[2097],_mm256_xor_si256(c2[2104],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[2355],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[2512],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[763],_mm256_xor_si256(c2[683],_mm256_xor_si256(c2[2369],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[2296],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[1817],_mm256_xor_si256(c2[67],_mm256_xor_si256(c2[1347],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[1504],_mm256_xor_si256(c2[1114],_mm256_xor_si256(c2[2315],_mm256_xor_si256(c2[152],c2[72])))))))))))))))))))))))))))))))))));
+     d2[124]=simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[1362],simde_mm256_xor_si256(c2[1691],simde_mm256_xor_si256(c2[1451],simde_mm256_xor_si256(c2[728],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[811],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[2177],simde_mm256_xor_si256(c2[2097],simde_mm256_xor_si256(c2[2104],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[2355],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[2512],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[763],simde_mm256_xor_si256(c2[683],simde_mm256_xor_si256(c2[2369],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[2296],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[1817],simde_mm256_xor_si256(c2[67],simde_mm256_xor_si256(c2[1347],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[1504],simde_mm256_xor_si256(c2[1114],simde_mm256_xor_si256(c2[2315],simde_mm256_xor_si256(c2[152],c2[72])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[128]=_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[1121],_mm256_xor_si256(c2[1042],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[1763],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[2168],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[1848],_mm256_xor_si256(c2[1049],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[1379],_mm256_xor_si256(c2[2498],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[2505],_mm256_xor_si256(c2[986],_mm256_xor_si256(c2[906],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[273],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[2034],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[281],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[1080],_mm256_xor_si256(c2[1001],_mm256_xor_si256(c2[291],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[1811],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[2218],_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[1744],_mm256_xor_si256(c2[1905],_mm256_xor_si256(c2[1595],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[153],c2[473]))))))))))))))))))))))))))))))))))))))))));
+     d2[128]=simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[1121],simde_mm256_xor_si256(c2[1042],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[1763],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[2168],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[1848],simde_mm256_xor_si256(c2[1049],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[1379],simde_mm256_xor_si256(c2[2498],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[2505],simde_mm256_xor_si256(c2[986],simde_mm256_xor_si256(c2[906],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[273],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[2034],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[281],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[1080],simde_mm256_xor_si256(c2[1001],simde_mm256_xor_si256(c2[291],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[1811],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[2218],simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[1744],simde_mm256_xor_si256(c2[1905],simde_mm256_xor_si256(c2[1595],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[153],c2[473]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[132]=_mm256_xor_si256(c2[1523],_mm256_xor_si256(c2[1360],_mm256_xor_si256(c2[2161],_mm256_xor_si256(c2[2490],_mm256_xor_si256(c2[2250],_mm256_xor_si256(c2[1451],_mm256_xor_si256(c2[1777],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[1784],_mm256_xor_si256(c2[595],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[1482],_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[1010],_mm256_xor_si256(c2[2209],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[57],_mm256_xor_si256(c2[377],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[2146],_mm256_xor_si256(c2[2307],_mm256_xor_si256(c2[1913],_mm256_xor_si256(c2[555],c2[875]))))))))))))))))))))))))))));
+     d2[132]=simde_mm256_xor_si256(c2[1523],simde_mm256_xor_si256(c2[1360],simde_mm256_xor_si256(c2[2161],simde_mm256_xor_si256(c2[2490],simde_mm256_xor_si256(c2[2250],simde_mm256_xor_si256(c2[1451],simde_mm256_xor_si256(c2[1777],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[1784],simde_mm256_xor_si256(c2[595],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[1482],simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[1010],simde_mm256_xor_si256(c2[2209],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[57],simde_mm256_xor_si256(c2[377],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[2146],simde_mm256_xor_si256(c2[2307],simde_mm256_xor_si256(c2[1913],simde_mm256_xor_si256(c2[555],c2[875]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[136]=_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[2322],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[2243],_mm256_xor_si256(c2[2163],_mm256_xor_si256(c2[802],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[1603],_mm256_xor_si256(c2[1520],_mm256_xor_si256(c2[810],_mm256_xor_si256(c2[730],_mm256_xor_si256(c2[1928],_mm256_xor_si256(c2[490],_mm256_xor_si256(c2[1688],_mm256_xor_si256(c2[969],_mm256_xor_si256(c2[2250],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[1219],_mm256_xor_si256(c2[2418],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[2338],_mm256_xor_si256(c2[1227],_mm256_xor_si256(c2[1147],_mm256_xor_si256(c2[2345],_mm256_xor_si256(c2[2187],_mm256_xor_si256(c2[2107],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[1226],_mm256_xor_si256(c2[1474],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[1874],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[1482],_mm256_xor_si256(c2[1402],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[2281],_mm256_xor_si256(c2[920],_mm256_xor_si256(c2[1488],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[51],_mm256_xor_si256(c2[1889],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[1731],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[1651],_mm256_xor_si256(c2[1419],_mm256_xor_si256(c2[1339],_mm256_xor_si256(c2[2537],_mm256_xor_si256(c2[2138],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[2058],_mm256_xor_si256(c2[1745],_mm256_xor_si256(c2[1665],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[1825],_mm256_xor_si256(c2[547],_mm256_xor_si256(c2[1745],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[153],_mm256_xor_si256(c2[1355],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[1354],_mm256_xor_si256(c2[2552],_mm256_xor_si256(c2[393],_mm256_xor_si256(c2[1674],c2[313]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[136]=simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[2322],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[2243],simde_mm256_xor_si256(c2[2163],simde_mm256_xor_si256(c2[802],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[1603],simde_mm256_xor_si256(c2[1520],simde_mm256_xor_si256(c2[810],simde_mm256_xor_si256(c2[730],simde_mm256_xor_si256(c2[1928],simde_mm256_xor_si256(c2[490],simde_mm256_xor_si256(c2[1688],simde_mm256_xor_si256(c2[969],simde_mm256_xor_si256(c2[2250],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[1219],simde_mm256_xor_si256(c2[2418],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[2338],simde_mm256_xor_si256(c2[1227],simde_mm256_xor_si256(c2[1147],simde_mm256_xor_si256(c2[2345],simde_mm256_xor_si256(c2[2187],simde_mm256_xor_si256(c2[2107],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[1226],simde_mm256_xor_si256(c2[1474],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[1874],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[1482],simde_mm256_xor_si256(c2[1402],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[2281],simde_mm256_xor_si256(c2[920],simde_mm256_xor_si256(c2[1488],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[51],simde_mm256_xor_si256(c2[1889],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[1731],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[1651],simde_mm256_xor_si256(c2[1419],simde_mm256_xor_si256(c2[1339],simde_mm256_xor_si256(c2[2537],simde_mm256_xor_si256(c2[2138],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[2058],simde_mm256_xor_si256(c2[1745],simde_mm256_xor_si256(c2[1665],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[1825],simde_mm256_xor_si256(c2[547],simde_mm256_xor_si256(c2[1745],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[153],simde_mm256_xor_si256(c2[1355],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[1354],simde_mm256_xor_si256(c2[2552],simde_mm256_xor_si256(c2[393],simde_mm256_xor_si256(c2[1674],c2[313]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[140]=_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[562],_mm256_xor_si256(c2[403],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[1609],_mm256_xor_si256(c2[1529],_mm256_xor_si256(c2[1289],_mm256_xor_si256(c2[490],_mm256_xor_si256(c2[2009],_mm256_xor_si256(c2[896],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[1939],_mm256_xor_si256(c2[2026],_mm256_xor_si256(c2[1946],_mm256_xor_si256(c2[347],_mm256_xor_si256(c2[827],_mm256_xor_si256(c2[2193],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[2274],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[521],_mm256_xor_si256(c2[681],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[2211],_mm256_xor_si256(c2[49],_mm256_xor_si256(c2[1248],_mm256_xor_si256(c2[2138],_mm256_xor_si256(c2[1659],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[1185],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[1032],_mm256_xor_si256(c2[952],_mm256_xor_si256(c2[2153],c2[2473]))))))))))))))))))))))))))))))))));
+     d2[140]=simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[562],simde_mm256_xor_si256(c2[403],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[1609],simde_mm256_xor_si256(c2[1529],simde_mm256_xor_si256(c2[1289],simde_mm256_xor_si256(c2[490],simde_mm256_xor_si256(c2[2009],simde_mm256_xor_si256(c2[896],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[1939],simde_mm256_xor_si256(c2[2026],simde_mm256_xor_si256(c2[1946],simde_mm256_xor_si256(c2[347],simde_mm256_xor_si256(c2[827],simde_mm256_xor_si256(c2[2193],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[2274],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[521],simde_mm256_xor_si256(c2[681],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[2211],simde_mm256_xor_si256(c2[49],simde_mm256_xor_si256(c2[1248],simde_mm256_xor_si256(c2[2138],simde_mm256_xor_si256(c2[1659],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[1185],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[1032],simde_mm256_xor_si256(c2[952],simde_mm256_xor_si256(c2[2153],c2[2473]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[144]=_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[497],c2[2136]));
+     d2[144]=simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[497],c2[2136]));
 
 //row: 37
-     d2[148]=_mm256_xor_si256(c2[243],_mm256_xor_si256(c2[2480],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[2321],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[970],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[2488],_mm256_xor_si256(c2[171],_mm256_xor_si256(c2[2408],_mm256_xor_si256(c2[497],_mm256_xor_si256(c2[179],_mm256_xor_si256(c2[1378],_mm256_xor_si256(c2[1616],_mm256_xor_si256(c2[1298],_mm256_xor_si256(c2[1627],_mm256_xor_si256(c2[1305],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[2265],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[1874],_mm256_xor_si256(c2[1552],_mm256_xor_si256(c2[1152],_mm256_xor_si256(c2[834],_mm256_xor_si256(c2[1713],_mm256_xor_si256(c2[1955],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[1882],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[2443],_mm256_xor_si256(c2[1888],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[1971],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[929],_mm256_xor_si256(c2[611],_mm256_xor_si256(c2[1819],_mm256_xor_si256(c2[1497],_mm256_xor_si256(c2[1098],_mm256_xor_si256(c2[1336],_mm256_xor_si256(c2[1018],_mm256_xor_si256(c2[2145],_mm256_xor_si256(c2[1827],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[785],_mm256_xor_si256(c2[1027],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[315],_mm256_xor_si256(c2[1834],_mm256_xor_si256(c2[1512],_mm256_xor_si256(c2[1912],_mm256_xor_si256(c2[2154],c2[1832])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[148]=simde_mm256_xor_si256(c2[243],simde_mm256_xor_si256(c2[2480],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[2321],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[970],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[2488],simde_mm256_xor_si256(c2[171],simde_mm256_xor_si256(c2[2408],simde_mm256_xor_si256(c2[497],simde_mm256_xor_si256(c2[179],simde_mm256_xor_si256(c2[1378],simde_mm256_xor_si256(c2[1616],simde_mm256_xor_si256(c2[1298],simde_mm256_xor_si256(c2[1627],simde_mm256_xor_si256(c2[1305],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[2265],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[1874],simde_mm256_xor_si256(c2[1552],simde_mm256_xor_si256(c2[1152],simde_mm256_xor_si256(c2[834],simde_mm256_xor_si256(c2[1713],simde_mm256_xor_si256(c2[1955],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[1882],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[2443],simde_mm256_xor_si256(c2[1888],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[1971],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[929],simde_mm256_xor_si256(c2[611],simde_mm256_xor_si256(c2[1819],simde_mm256_xor_si256(c2[1497],simde_mm256_xor_si256(c2[1098],simde_mm256_xor_si256(c2[1336],simde_mm256_xor_si256(c2[1018],simde_mm256_xor_si256(c2[2145],simde_mm256_xor_si256(c2[1827],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[785],simde_mm256_xor_si256(c2[1027],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[315],simde_mm256_xor_si256(c2[1834],simde_mm256_xor_si256(c2[1512],simde_mm256_xor_si256(c2[1912],simde_mm256_xor_si256(c2[2154],c2[1832])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[152]=_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[1121],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[1763],_mm256_xor_si256(c2[2168],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[1848],_mm256_xor_si256(c2[1049],_mm256_xor_si256(c2[2488],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[1379],_mm256_xor_si256(c2[2498],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[2505],_mm256_xor_si256(c2[906],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[2034],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[1080],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[291],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[1811],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[2218],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[1744],_mm256_xor_si256(c2[1905],_mm256_xor_si256(c2[1595],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[153],c2[473]))))))))))))))))))))))))))))))))));
+     d2[152]=simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[1121],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[1763],simde_mm256_xor_si256(c2[2168],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[1848],simde_mm256_xor_si256(c2[1049],simde_mm256_xor_si256(c2[2488],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[1379],simde_mm256_xor_si256(c2[2498],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[2505],simde_mm256_xor_si256(c2[906],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[2034],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[1080],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[291],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[1811],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[2218],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[1744],simde_mm256_xor_si256(c2[1905],simde_mm256_xor_si256(c2[1595],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[153],c2[473]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[156]=_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[801],_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[1848],_mm256_xor_si256(c2[1768],_mm256_xor_si256(c2[1528],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[1139],_mm256_xor_si256(c2[1059],_mm256_xor_si256(c2[2178],_mm256_xor_si256(c2[2265],_mm256_xor_si256(c2[2185],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[586],_mm256_xor_si256(c2[1066],_mm256_xor_si256(c2[2512],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[2513],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[2440],_mm256_xor_si256(c2[760],_mm256_xor_si256(c2[2530],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[1491],_mm256_xor_si256(c2[2457],_mm256_xor_si256(c2[2377],_mm256_xor_si256(c2[1898],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[1275],_mm256_xor_si256(c2[1195],_mm256_xor_si256(c2[2472],_mm256_xor_si256(c2[2392],c2[153]))))))))))))))))))))))))))))))))))))))))));
+     d2[156]=simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[801],simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[1848],simde_mm256_xor_si256(c2[1768],simde_mm256_xor_si256(c2[1528],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[1139],simde_mm256_xor_si256(c2[1059],simde_mm256_xor_si256(c2[2178],simde_mm256_xor_si256(c2[2265],simde_mm256_xor_si256(c2[2185],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[586],simde_mm256_xor_si256(c2[1066],simde_mm256_xor_si256(c2[2512],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[2513],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[2440],simde_mm256_xor_si256(c2[760],simde_mm256_xor_si256(c2[2530],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[1491],simde_mm256_xor_si256(c2[2457],simde_mm256_xor_si256(c2[2377],simde_mm256_xor_si256(c2[1898],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[1275],simde_mm256_xor_si256(c2[1195],simde_mm256_xor_si256(c2[2472],simde_mm256_xor_si256(c2[2392],c2[153]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[160]=_mm256_xor_si256(c2[1522],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1363],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[2160],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[2489],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[2249],_mm256_xor_si256(c2[731],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[1450],_mm256_xor_si256(c2[2491],_mm256_xor_si256(c2[1776],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[1377],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[347],_mm256_xor_si256(c2[1384],_mm256_xor_si256(c2[1307],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[1787],_mm256_xor_si256(c2[265],_mm256_xor_si256(c2[594],_mm256_xor_si256(c2[1635],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[675],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[1643],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[2522],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[1649],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[2208],_mm256_xor_si256(c2[690],_mm256_xor_si256(c2[539],_mm256_xor_si256(c2[1576],_mm256_xor_si256(c2[1177],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[1097],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[1906],_mm256_xor_si256(c2[2145],_mm256_xor_si256(c2[627],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[2306],_mm256_xor_si256(c2[784],_mm256_xor_si256(c2[1912],_mm256_xor_si256(c2[394],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[1595],_mm256_xor_si256(c2[1995],_mm256_xor_si256(c2[874],c2[1915]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[160]=simde_mm256_xor_si256(c2[1522],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1363],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[2160],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[2489],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[2249],simde_mm256_xor_si256(c2[731],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[1450],simde_mm256_xor_si256(c2[2491],simde_mm256_xor_si256(c2[1776],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[1377],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[347],simde_mm256_xor_si256(c2[1384],simde_mm256_xor_si256(c2[1307],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[1787],simde_mm256_xor_si256(c2[265],simde_mm256_xor_si256(c2[594],simde_mm256_xor_si256(c2[1635],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[675],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[1643],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[2522],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[1649],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[2208],simde_mm256_xor_si256(c2[690],simde_mm256_xor_si256(c2[539],simde_mm256_xor_si256(c2[1576],simde_mm256_xor_si256(c2[1177],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[1097],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[1906],simde_mm256_xor_si256(c2[2145],simde_mm256_xor_si256(c2[627],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[2306],simde_mm256_xor_si256(c2[784],simde_mm256_xor_si256(c2[1912],simde_mm256_xor_si256(c2[394],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[1595],simde_mm256_xor_si256(c2[1995],simde_mm256_xor_si256(c2[874],c2[1915]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[164]=_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[2240],_mm256_xor_si256(c2[482],_mm256_xor_si256(c2[891],_mm256_xor_si256(c2[811],_mm256_xor_si256(c2[571],_mm256_xor_si256(c2[2331],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[1217],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[1224],_mm256_xor_si256(c2[2184],_mm256_xor_si256(c2[105],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[753],_mm256_xor_si256(c2[1552],_mm256_xor_si256(c2[1483],_mm256_xor_si256(c2[2362],_mm256_xor_si256(c2[443],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[1489],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[1416],_mm256_xor_si256(c2[937],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[467],_mm256_xor_si256(c2[624],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[1435],c2[1755]))))))))))))))))))))))))))))))))));
+     d2[164]=simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[2240],simde_mm256_xor_si256(c2[482],simde_mm256_xor_si256(c2[891],simde_mm256_xor_si256(c2[811],simde_mm256_xor_si256(c2[571],simde_mm256_xor_si256(c2[2331],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[1217],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[1224],simde_mm256_xor_si256(c2[2184],simde_mm256_xor_si256(c2[105],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[753],simde_mm256_xor_si256(c2[1552],simde_mm256_xor_si256(c2[1483],simde_mm256_xor_si256(c2[2362],simde_mm256_xor_si256(c2[443],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[1489],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[1416],simde_mm256_xor_si256(c2[937],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[467],simde_mm256_xor_si256(c2[624],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[1435],c2[1755]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc160_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc160_byte.c
index 88a453a74d6928cab5ea9c85b41966a67107129b..b2f90e62d09849ae7e499042ae958cfc38344ab8 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc160_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc160_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc160_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[3114],_mm256_xor_si256(c2[2912],_mm256_xor_si256(c2[511],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[1420],_mm256_xor_si256(c2[3134],_mm256_xor_si256(c2[2733],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[840],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[3154],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[2362],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[3174],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[2081],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2291],c2[1391]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[3114],simde_mm256_xor_si256(c2[2912],simde_mm256_xor_si256(c2[511],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[1420],simde_mm256_xor_si256(c2[3134],simde_mm256_xor_si256(c2[2733],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[840],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[3154],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[2362],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[3174],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[2081],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2291],c2[1391]))))))))))))))))))))))))));
 
 //row: 1
-     d2[5]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[3114],_mm256_xor_si256(c2[2912],_mm256_xor_si256(c2[511],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[1420],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[3134],_mm256_xor_si256(c2[2733],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[840],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[3154],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[2362],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[3174],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[2081],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2291],c2[1391]))))))))))))))))))))))))))))))));
+     d2[5]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[3114],simde_mm256_xor_si256(c2[2912],simde_mm256_xor_si256(c2[511],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[1420],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[3134],simde_mm256_xor_si256(c2[2733],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[840],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[3154],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[2362],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[3174],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[2081],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2291],c2[1391]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[10]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[3114],_mm256_xor_si256(c2[2912],_mm256_xor_si256(c2[511],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[1420],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[3134],_mm256_xor_si256(c2[2833],_mm256_xor_si256(c2[2733],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[840],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[3154],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[2462],_mm256_xor_si256(c2[2362],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[3174],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[2081],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2391],_mm256_xor_si256(c2[2291],c2[1391]))))))))))))))))))))))))))))))))))))))));
+     d2[10]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[3114],simde_mm256_xor_si256(c2[2912],simde_mm256_xor_si256(c2[511],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[1420],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[3134],simde_mm256_xor_si256(c2[2833],simde_mm256_xor_si256(c2[2733],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[840],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[3154],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[2462],simde_mm256_xor_si256(c2[2362],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[3174],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[2081],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2391],simde_mm256_xor_si256(c2[2291],c2[1391]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[15]=_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[3114],_mm256_xor_si256(c2[2912],_mm256_xor_si256(c2[611],_mm256_xor_si256(c2[511],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[1520],_mm256_xor_si256(c2[1420],_mm256_xor_si256(c2[3134],_mm256_xor_si256(c2[2733],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[840],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[3154],_mm256_xor_si256(c2[1854],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[2362],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[3174],_mm256_xor_si256(c2[2571],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[2181],_mm256_xor_si256(c2[2081],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[1491],c2[1391]))))))))))))))))))))))))))))))))));
+     d2[15]=simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[3114],simde_mm256_xor_si256(c2[2912],simde_mm256_xor_si256(c2[611],simde_mm256_xor_si256(c2[511],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[1520],simde_mm256_xor_si256(c2[1420],simde_mm256_xor_si256(c2[3134],simde_mm256_xor_si256(c2[2733],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[840],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[3154],simde_mm256_xor_si256(c2[1854],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[2362],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[3174],simde_mm256_xor_si256(c2[2571],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[2181],simde_mm256_xor_si256(c2[2081],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[1491],c2[1391]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[20]=_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[2804],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[1600],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2914],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[2612],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[814],_mm256_xor_si256(c2[2924],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[1120],_mm256_xor_si256(c2[2934],_mm256_xor_si256(c2[2834],_mm256_xor_si256(c2[2433],_mm256_xor_si256(c2[3132],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[243],_mm256_xor_si256(c2[2854],_mm256_xor_si256(c2[1454],_mm256_xor_si256(c2[2964],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[2062],_mm256_xor_si256(c2[860],_mm256_xor_si256(c2[2874],_mm256_xor_si256(c2[2171],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[883],_mm256_xor_si256(c2[1781],_mm256_xor_si256(c2[2994],_mm256_xor_si256(c2[2894],_mm256_xor_si256(c2[1991],c2[1091]))))))))))))))))))))))))))))))))));
+     d2[20]=simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[2804],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[1600],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2914],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[2612],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[814],simde_mm256_xor_si256(c2[2924],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[1120],simde_mm256_xor_si256(c2[2934],simde_mm256_xor_si256(c2[2834],simde_mm256_xor_si256(c2[2433],simde_mm256_xor_si256(c2[3132],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[243],simde_mm256_xor_si256(c2[2854],simde_mm256_xor_si256(c2[1454],simde_mm256_xor_si256(c2[2964],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[2062],simde_mm256_xor_si256(c2[860],simde_mm256_xor_si256(c2[2874],simde_mm256_xor_si256(c2[2171],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[883],simde_mm256_xor_si256(c2[1781],simde_mm256_xor_si256(c2[2994],simde_mm256_xor_si256(c2[2894],simde_mm256_xor_si256(c2[1991],c2[1091]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[25]=_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[801],_mm256_xor_si256(c2[1902],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[3111],_mm256_xor_si256(c2[2914],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[3121],_mm256_xor_si256(c2[1422],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[3131],_mm256_xor_si256(c2[2730],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[842],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[3151],_mm256_xor_si256(c2[1751],_mm256_xor_si256(c2[1454],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[3161],_mm256_xor_si256(c2[2364],_mm256_xor_si256(c2[1162],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[2473],_mm256_xor_si256(c2[1374],_mm256_xor_si256(c2[3181],_mm256_xor_si256(c2[1180],_mm256_xor_si256(c2[2083],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[2293],c2[1393]))))))))))))))))))))))))))))))))))));
+     d2[25]=simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[801],simde_mm256_xor_si256(c2[1902],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[3111],simde_mm256_xor_si256(c2[2914],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[3121],simde_mm256_xor_si256(c2[1422],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[3131],simde_mm256_xor_si256(c2[2730],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[842],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[3151],simde_mm256_xor_si256(c2[1751],simde_mm256_xor_si256(c2[1454],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[3161],simde_mm256_xor_si256(c2[2364],simde_mm256_xor_si256(c2[1162],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[2473],simde_mm256_xor_si256(c2[1374],simde_mm256_xor_si256(c2[3181],simde_mm256_xor_si256(c2[1180],simde_mm256_xor_si256(c2[2083],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[2293],c2[1393]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[30]=_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[2803],_mm256_xor_si256(c2[700],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2014],_mm256_xor_si256(c2[1914],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[2510],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[2034],_mm256_xor_si256(c2[1934],_mm256_xor_si256(c2[1533],_mm256_xor_si256(c2[2232],_mm256_xor_si256(c2[1944],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[2542],_mm256_xor_si256(c2[1954],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[2853],_mm256_xor_si256(c2[2064],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[1162],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[1974],_mm256_xor_si256(c2[1271],_mm256_xor_si256(c2[373],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[3182],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[2094],_mm256_xor_si256(c2[1994],_mm256_xor_si256(c2[1091],_mm256_xor_si256(c2[191],c2[1391]))))))))))))))))))))))))))))))))))));
+     d2[30]=simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[2803],simde_mm256_xor_si256(c2[700],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2014],simde_mm256_xor_si256(c2[1914],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[2510],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[2034],simde_mm256_xor_si256(c2[1934],simde_mm256_xor_si256(c2[1533],simde_mm256_xor_si256(c2[2232],simde_mm256_xor_si256(c2[1944],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[2542],simde_mm256_xor_si256(c2[1954],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[2853],simde_mm256_xor_si256(c2[2064],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[1162],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[1974],simde_mm256_xor_si256(c2[1271],simde_mm256_xor_si256(c2[373],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[3182],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[2094],simde_mm256_xor_si256(c2[1994],simde_mm256_xor_si256(c2[1091],simde_mm256_xor_si256(c2[191],c2[1391]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[35]=_mm256_xor_si256(c2[3],_mm256_xor_si256(c2[3102],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[802],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[1903],_mm256_xor_si256(c2[103],_mm256_xor_si256(c2[13],_mm256_xor_si256(c2[3112],_mm256_xor_si256(c2[1312],_mm256_xor_si256(c2[2910],_mm256_xor_si256(c2[1110],_mm256_xor_si256(c2[514],_mm256_xor_si256(c2[2013],_mm256_xor_si256(c2[1913],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[23],_mm256_xor_si256(c2[3122],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[1423],_mm256_xor_si256(c2[2922],_mm256_xor_si256(c2[2822],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[3132],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[2731],_mm256_xor_si256(c2[931],_mm256_xor_si256(c2[231],_mm256_xor_si256(c2[1630],_mm256_xor_si256(c2[3142],_mm256_xor_si256(c2[1342],_mm256_xor_si256(c2[843],_mm256_xor_si256(c2[2242],_mm256_xor_si256(c2[541],_mm256_xor_si256(c2[2040],_mm256_xor_si256(c2[1940],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[1352],_mm256_xor_si256(c2[1752],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[3151],_mm256_xor_si256(c2[1351],_mm256_xor_si256(c2[63],_mm256_xor_si256(c2[3162],_mm256_xor_si256(c2[1362],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[3172],_mm256_xor_si256(c2[1372],_mm256_xor_si256(c2[2474],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[674],_mm256_xor_si256(c2[2074],_mm256_xor_si256(c2[3182],_mm256_xor_si256(c2[1382],_mm256_xor_si256(c2[1181],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[2084],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[93],_mm256_xor_si256(c2[3192],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[2294],_mm256_xor_si256(c2[494],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[2893],c2[2793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[35]=simde_mm256_xor_si256(c2[3],simde_mm256_xor_si256(c2[3102],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[802],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[1903],simde_mm256_xor_si256(c2[103],simde_mm256_xor_si256(c2[13],simde_mm256_xor_si256(c2[3112],simde_mm256_xor_si256(c2[1312],simde_mm256_xor_si256(c2[2910],simde_mm256_xor_si256(c2[1110],simde_mm256_xor_si256(c2[514],simde_mm256_xor_si256(c2[2013],simde_mm256_xor_si256(c2[1913],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[23],simde_mm256_xor_si256(c2[3122],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[1423],simde_mm256_xor_si256(c2[2922],simde_mm256_xor_si256(c2[2822],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[3132],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[2731],simde_mm256_xor_si256(c2[931],simde_mm256_xor_si256(c2[231],simde_mm256_xor_si256(c2[1630],simde_mm256_xor_si256(c2[3142],simde_mm256_xor_si256(c2[1342],simde_mm256_xor_si256(c2[843],simde_mm256_xor_si256(c2[2242],simde_mm256_xor_si256(c2[541],simde_mm256_xor_si256(c2[2040],simde_mm256_xor_si256(c2[1940],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[1352],simde_mm256_xor_si256(c2[1752],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[3151],simde_mm256_xor_si256(c2[1351],simde_mm256_xor_si256(c2[63],simde_mm256_xor_si256(c2[3162],simde_mm256_xor_si256(c2[1362],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[3172],simde_mm256_xor_si256(c2[1372],simde_mm256_xor_si256(c2[2474],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[674],simde_mm256_xor_si256(c2[2074],simde_mm256_xor_si256(c2[3182],simde_mm256_xor_si256(c2[1382],simde_mm256_xor_si256(c2[1181],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[2084],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[93],simde_mm256_xor_si256(c2[3192],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[2294],simde_mm256_xor_si256(c2[494],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[2893],c2[2793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[40]=_mm256_xor_si256(c2[2302],_mm256_xor_si256(c2[2202],_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[1003],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2312],_mm256_xor_si256(c2[2212],_mm256_xor_si256(c2[2010],_mm256_xor_si256(c2[2813],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[2322],_mm256_xor_si256(c2[2222],_mm256_xor_si256(c2[523],_mm256_xor_si256(c2[2332],_mm256_xor_si256(c2[2232],_mm256_xor_si256(c2[1931],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[2530],_mm256_xor_si256(c2[2342],_mm256_xor_si256(c2[2242],_mm256_xor_si256(c2[3142],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[2252],_mm256_xor_si256(c2[852],_mm256_xor_si256(c2[2362],_mm256_xor_si256(c2[2262],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[1460],_mm256_xor_si256(c2[263],_mm256_xor_si256(c2[2372],_mm256_xor_si256(c2[2272],_mm256_xor_si256(c2[1574],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[2282],_mm256_xor_si256(c2[281],_mm256_xor_si256(c2[1184],_mm256_xor_si256(c2[2392],_mm256_xor_si256(c2[2292],_mm256_xor_si256(c2[1494],_mm256_xor_si256(c2[1394],c2[494]))))))))))))))))))))))))))))))))))))))))));
+     d2[40]=simde_mm256_xor_si256(c2[2302],simde_mm256_xor_si256(c2[2202],simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[1003],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2312],simde_mm256_xor_si256(c2[2212],simde_mm256_xor_si256(c2[2010],simde_mm256_xor_si256(c2[2813],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[2322],simde_mm256_xor_si256(c2[2222],simde_mm256_xor_si256(c2[523],simde_mm256_xor_si256(c2[2332],simde_mm256_xor_si256(c2[2232],simde_mm256_xor_si256(c2[1931],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[2530],simde_mm256_xor_si256(c2[2342],simde_mm256_xor_si256(c2[2242],simde_mm256_xor_si256(c2[3142],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[2252],simde_mm256_xor_si256(c2[852],simde_mm256_xor_si256(c2[2362],simde_mm256_xor_si256(c2[2262],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[1460],simde_mm256_xor_si256(c2[263],simde_mm256_xor_si256(c2[2372],simde_mm256_xor_si256(c2[2272],simde_mm256_xor_si256(c2[1574],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[2282],simde_mm256_xor_si256(c2[281],simde_mm256_xor_si256(c2[1184],simde_mm256_xor_si256(c2[2392],simde_mm256_xor_si256(c2[2292],simde_mm256_xor_si256(c2[1494],simde_mm256_xor_si256(c2[1394],c2[494]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[45]=_mm256_xor_si256(c2[1801],_mm256_xor_si256(c2[2102],_mm256_xor_si256(c2[2002],_mm256_xor_si256(c2[2700],_mm256_xor_si256(c2[2901],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[803],_mm256_xor_si256(c2[1811],_mm256_xor_si256(c2[2112],_mm256_xor_si256(c2[2012],_mm256_xor_si256(c2[1614],_mm256_xor_si256(c2[1810],_mm256_xor_si256(c2[2412],_mm256_xor_si256(c2[2613],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[1821],_mm256_xor_si256(c2[2122],_mm256_xor_si256(c2[2022],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[323],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[2032],_mm256_xor_si256(c2[1430],_mm256_xor_si256(c2[1631],_mm256_xor_si256(c2[2134],_mm256_xor_si256(c2[2330],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[2942],_mm256_xor_si256(c2[2444],_mm256_xor_si256(c2[2640],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[2052],_mm256_xor_si256(c2[451],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[2162],_mm256_xor_si256(c2[2062],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[3061],_mm256_xor_si256(c2[63],_mm256_xor_si256(c2[1871],_mm256_xor_si256(c2[2072],_mm256_xor_si256(c2[1173],_mm256_xor_si256(c2[1374],_mm256_xor_si256(c2[1881],_mm256_xor_si256(c2[2082],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[783],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[183],_mm256_xor_si256(c2[1891],_mm256_xor_si256(c2[2192],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[993],_mm256_xor_si256(c2[1194],_mm256_xor_si256(c2[93],c2[294])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[45]=simde_mm256_xor_si256(c2[1801],simde_mm256_xor_si256(c2[2102],simde_mm256_xor_si256(c2[2002],simde_mm256_xor_si256(c2[2700],simde_mm256_xor_si256(c2[2901],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[803],simde_mm256_xor_si256(c2[1811],simde_mm256_xor_si256(c2[2112],simde_mm256_xor_si256(c2[2012],simde_mm256_xor_si256(c2[1614],simde_mm256_xor_si256(c2[1810],simde_mm256_xor_si256(c2[2412],simde_mm256_xor_si256(c2[2613],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[1821],simde_mm256_xor_si256(c2[2122],simde_mm256_xor_si256(c2[2022],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[323],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[2032],simde_mm256_xor_si256(c2[1430],simde_mm256_xor_si256(c2[1631],simde_mm256_xor_si256(c2[2134],simde_mm256_xor_si256(c2[2330],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[2942],simde_mm256_xor_si256(c2[2444],simde_mm256_xor_si256(c2[2640],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[2052],simde_mm256_xor_si256(c2[451],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[2162],simde_mm256_xor_si256(c2[2062],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[3061],simde_mm256_xor_si256(c2[63],simde_mm256_xor_si256(c2[1871],simde_mm256_xor_si256(c2[2072],simde_mm256_xor_si256(c2[1173],simde_mm256_xor_si256(c2[1374],simde_mm256_xor_si256(c2[1881],simde_mm256_xor_si256(c2[2082],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[783],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[183],simde_mm256_xor_si256(c2[1891],simde_mm256_xor_si256(c2[2192],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[993],simde_mm256_xor_si256(c2[1194],simde_mm256_xor_si256(c2[93],c2[294])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[50]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1710],_mm256_xor_si256(c2[2864],c2[2070])));
+     d2[50]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1710],simde_mm256_xor_si256(c2[2864],c2[2070])));
 
 //row: 11
-     d2[55]=_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[1004],_mm256_xor_si256(c2[2100],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[3112],_mm256_xor_si256(c2[811],_mm256_xor_si256(c2[711],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[1720],_mm256_xor_si256(c2[1620],_mm256_xor_si256(c2[130],_mm256_xor_si256(c2[2933],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[843],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[2054],_mm256_xor_si256(c2[1954],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[1460],_mm256_xor_si256(c2[1360],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[2771],_mm256_xor_si256(c2[2671],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[1383],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[2281],_mm256_xor_si256(c2[190],_mm256_xor_si256(c2[2491],_mm256_xor_si256(c2[1691],_mm256_xor_si256(c2[1591],c2[490])))))))))))))))))))))))))))))))))))));
+     d2[55]=simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[1004],simde_mm256_xor_si256(c2[2100],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[3112],simde_mm256_xor_si256(c2[811],simde_mm256_xor_si256(c2[711],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[1720],simde_mm256_xor_si256(c2[1620],simde_mm256_xor_si256(c2[130],simde_mm256_xor_si256(c2[2933],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[843],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[2054],simde_mm256_xor_si256(c2[1954],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[1460],simde_mm256_xor_si256(c2[1360],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[2771],simde_mm256_xor_si256(c2[2671],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[1383],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[2281],simde_mm256_xor_si256(c2[190],simde_mm256_xor_si256(c2[2491],simde_mm256_xor_si256(c2[1691],simde_mm256_xor_si256(c2[1591],c2[490])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[60]=_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[1501],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1511],_mm256_xor_si256(c2[1314],_mm256_xor_si256(c2[2112],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[1621],_mm256_xor_si256(c2[1521],_mm256_xor_si256(c2[3021],_mm256_xor_si256(c2[1631],_mm256_xor_si256(c2[1531],_mm256_xor_si256(c2[1130],_mm256_xor_si256(c2[1834],_mm256_xor_si256(c2[1733],_mm256_xor_si256(c2[1541],_mm256_xor_si256(c2[2441],_mm256_xor_si256(c2[2144],_mm256_xor_si256(c2[1551],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[1661],_mm256_xor_si256(c2[1561],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[2761],_mm256_xor_si256(c2[1571],_mm256_xor_si256(c2[873],_mm256_xor_si256(c2[1581],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[483],_mm256_xor_si256(c2[1691],_mm256_xor_si256(c2[1591],_mm256_xor_si256(c2[693],c2[2992]))))))))))))))))))))))))))))))))));
+     d2[60]=simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[1501],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1511],simde_mm256_xor_si256(c2[1314],simde_mm256_xor_si256(c2[2112],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[1621],simde_mm256_xor_si256(c2[1521],simde_mm256_xor_si256(c2[3021],simde_mm256_xor_si256(c2[1631],simde_mm256_xor_si256(c2[1531],simde_mm256_xor_si256(c2[1130],simde_mm256_xor_si256(c2[1834],simde_mm256_xor_si256(c2[1733],simde_mm256_xor_si256(c2[1541],simde_mm256_xor_si256(c2[2441],simde_mm256_xor_si256(c2[2144],simde_mm256_xor_si256(c2[1551],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[1661],simde_mm256_xor_si256(c2[1561],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[2761],simde_mm256_xor_si256(c2[1571],simde_mm256_xor_si256(c2[873],simde_mm256_xor_si256(c2[1581],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[483],simde_mm256_xor_si256(c2[1691],simde_mm256_xor_si256(c2[1591],simde_mm256_xor_si256(c2[693],c2[2992]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[65]=_mm256_xor_si256(c2[2500],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[1301],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2510],_mm256_xor_si256(c2[2313],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[3111],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[821],_mm256_xor_si256(c2[2530],_mm256_xor_si256(c2[2134],_mm256_xor_si256(c2[2833],_mm256_xor_si256(c2[2540],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[3143],_mm256_xor_si256(c2[2550],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[1150],_mm256_xor_si256(c2[2560],_mm256_xor_si256(c2[1763],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[2570],_mm256_xor_si256(c2[1972],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[584],_mm256_xor_si256(c2[1582],_mm256_xor_si256(c2[1482],_mm256_xor_si256(c2[683],_mm256_xor_si256(c2[2590],_mm256_xor_si256(c2[1692],_mm256_xor_si256(c2[892],c2[792])))))))))))))))))))))))))))))))))))));
+     d2[65]=simde_mm256_xor_si256(c2[2500],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[1301],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2510],simde_mm256_xor_si256(c2[2313],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[3111],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[821],simde_mm256_xor_si256(c2[2530],simde_mm256_xor_si256(c2[2134],simde_mm256_xor_si256(c2[2833],simde_mm256_xor_si256(c2[2540],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[3143],simde_mm256_xor_si256(c2[2550],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[1150],simde_mm256_xor_si256(c2[2560],simde_mm256_xor_si256(c2[1763],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[2570],simde_mm256_xor_si256(c2[1972],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[584],simde_mm256_xor_si256(c2[1582],simde_mm256_xor_si256(c2[1482],simde_mm256_xor_si256(c2[683],simde_mm256_xor_si256(c2[2590],simde_mm256_xor_si256(c2[1692],simde_mm256_xor_si256(c2[892],c2[792])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[70]=_mm256_xor_si256(c2[2501],_mm256_xor_si256(c2[2401],_mm256_xor_si256(c2[2600],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1401],_mm256_xor_si256(c2[2511],_mm256_xor_si256(c2[2411],_mm256_xor_si256(c2[2610],_mm256_xor_si256(c2[2214],_mm256_xor_si256(c2[2413],_mm256_xor_si256(c2[3012],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[2421],_mm256_xor_si256(c2[2620],_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[2531],_mm256_xor_si256(c2[2431],_mm256_xor_si256(c2[2630],_mm256_xor_si256(c2[2030],_mm256_xor_si256(c2[2234],_mm256_xor_si256(c2[2734],_mm256_xor_si256(c2[2933],_mm256_xor_si256(c2[2441],_mm256_xor_si256(c2[2640],_mm256_xor_si256(c2[142],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[2451],_mm256_xor_si256(c2[2650],_mm256_xor_si256(c2[1051],_mm256_xor_si256(c2[1350],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[2561],_mm256_xor_si256(c2[2461],_mm256_xor_si256(c2[2660],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[1863],_mm256_xor_si256(c2[462],_mm256_xor_si256(c2[761],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[2670],_mm256_xor_si256(c2[1773],_mm256_xor_si256(c2[2072],_mm256_xor_si256(c2[1972],_mm256_xor_si256(c2[2481],_mm256_xor_si256(c2[2680],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[684],_mm256_xor_si256(c2[1383],_mm256_xor_si256(c2[1682],_mm256_xor_si256(c2[1582],_mm256_xor_si256(c2[2591],_mm256_xor_si256(c2[2491],_mm256_xor_si256(c2[2690],_mm256_xor_si256(c2[1593],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[693],_mm256_xor_si256(c2[992],c2[892])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[70]=simde_mm256_xor_si256(c2[2501],simde_mm256_xor_si256(c2[2401],simde_mm256_xor_si256(c2[2600],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1401],simde_mm256_xor_si256(c2[2511],simde_mm256_xor_si256(c2[2411],simde_mm256_xor_si256(c2[2610],simde_mm256_xor_si256(c2[2214],simde_mm256_xor_si256(c2[2413],simde_mm256_xor_si256(c2[3012],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[2421],simde_mm256_xor_si256(c2[2620],simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[2531],simde_mm256_xor_si256(c2[2431],simde_mm256_xor_si256(c2[2630],simde_mm256_xor_si256(c2[2030],simde_mm256_xor_si256(c2[2234],simde_mm256_xor_si256(c2[2734],simde_mm256_xor_si256(c2[2933],simde_mm256_xor_si256(c2[2441],simde_mm256_xor_si256(c2[2640],simde_mm256_xor_si256(c2[142],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[2451],simde_mm256_xor_si256(c2[2650],simde_mm256_xor_si256(c2[1051],simde_mm256_xor_si256(c2[1350],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[2561],simde_mm256_xor_si256(c2[2461],simde_mm256_xor_si256(c2[2660],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[1863],simde_mm256_xor_si256(c2[462],simde_mm256_xor_si256(c2[761],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[2670],simde_mm256_xor_si256(c2[1773],simde_mm256_xor_si256(c2[2072],simde_mm256_xor_si256(c2[1972],simde_mm256_xor_si256(c2[2481],simde_mm256_xor_si256(c2[2680],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[684],simde_mm256_xor_si256(c2[1383],simde_mm256_xor_si256(c2[1682],simde_mm256_xor_si256(c2[1582],simde_mm256_xor_si256(c2[2591],simde_mm256_xor_si256(c2[2491],simde_mm256_xor_si256(c2[2690],simde_mm256_xor_si256(c2[1593],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[693],simde_mm256_xor_si256(c2[992],c2[892])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[75]=_mm256_xor_si256(c2[802],_mm256_xor_si256(c2[303],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[1701],_mm256_xor_si256(c2[1102],_mm256_xor_si256(c2[2802],_mm256_xor_si256(c2[2203],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[213],_mm256_xor_si256(c2[610],_mm256_xor_si256(c2[11],_mm256_xor_si256(c2[1413],_mm256_xor_si256(c2[814],_mm256_xor_si256(c2[822],_mm256_xor_si256(c2[323],_mm256_xor_si256(c2[223],_mm256_xor_si256(c2[2322],_mm256_xor_si256(c2[1723],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[333],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[431],_mm256_xor_si256(c2[3031],_mm256_xor_si256(c2[1130],_mm256_xor_si256(c2[531],_mm256_xor_si256(c2[842],_mm256_xor_si256(c2[243],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[1143],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[852],_mm256_xor_si256(c2[253],_mm256_xor_si256(c2[2651],_mm256_xor_si256(c2[2052],_mm256_xor_si256(c2[862],_mm256_xor_si256(c2[363],_mm256_xor_si256(c2[263],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[2660],_mm256_xor_si256(c2[2062],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[872],_mm256_xor_si256(c2[273],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[882],_mm256_xor_si256(c2[283],_mm256_xor_si256(c2[2080],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[2983],_mm256_xor_si256(c2[2384],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[393],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[3193],_mm256_xor_si256(c2[2594],_mm256_xor_si256(c2[2293],c2[1694]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[75]=simde_mm256_xor_si256(c2[802],simde_mm256_xor_si256(c2[303],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[1701],simde_mm256_xor_si256(c2[1102],simde_mm256_xor_si256(c2[2802],simde_mm256_xor_si256(c2[2203],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[213],simde_mm256_xor_si256(c2[610],simde_mm256_xor_si256(c2[11],simde_mm256_xor_si256(c2[1413],simde_mm256_xor_si256(c2[814],simde_mm256_xor_si256(c2[822],simde_mm256_xor_si256(c2[323],simde_mm256_xor_si256(c2[223],simde_mm256_xor_si256(c2[2322],simde_mm256_xor_si256(c2[1723],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[333],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[431],simde_mm256_xor_si256(c2[3031],simde_mm256_xor_si256(c2[1130],simde_mm256_xor_si256(c2[531],simde_mm256_xor_si256(c2[842],simde_mm256_xor_si256(c2[243],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[1143],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[852],simde_mm256_xor_si256(c2[253],simde_mm256_xor_si256(c2[2651],simde_mm256_xor_si256(c2[2052],simde_mm256_xor_si256(c2[862],simde_mm256_xor_si256(c2[363],simde_mm256_xor_si256(c2[263],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[2660],simde_mm256_xor_si256(c2[2062],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[872],simde_mm256_xor_si256(c2[273],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[882],simde_mm256_xor_si256(c2[283],simde_mm256_xor_si256(c2[2080],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[2983],simde_mm256_xor_si256(c2[2384],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[393],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[3193],simde_mm256_xor_si256(c2[2594],simde_mm256_xor_si256(c2[2293],c2[1694]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[80]=_mm256_xor_si256(c2[1503],_mm256_xor_si256(c2[1403],_mm256_xor_si256(c2[1504],_mm256_xor_si256(c2[1404],_mm256_xor_si256(c2[2302],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[2303],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[1413],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[1414],_mm256_xor_si256(c2[1211],_mm256_xor_si256(c2[1212],_mm256_xor_si256(c2[2014],_mm256_xor_si256(c2[2010],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[1523],_mm256_xor_si256(c2[1423],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[2923],_mm256_xor_si256(c2[2924],_mm256_xor_si256(c2[1533],_mm256_xor_si256(c2[1433],_mm256_xor_si256(c2[1534],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[1032],_mm256_xor_si256(c2[1133],_mm256_xor_si256(c2[1033],_mm256_xor_si256(c2[1731],_mm256_xor_si256(c2[1732],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[1453],_mm256_xor_si256(c2[1554],_mm256_xor_si256(c2[1454],_mm256_xor_si256(c2[53],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[1563],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[1564],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[662],_mm256_xor_si256(c2[2663],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[1473],_mm256_xor_si256(c2[1574],_mm256_xor_si256(c2[1474],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[771],_mm256_xor_si256(c2[1483],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[2681],_mm256_xor_si256(c2[2682],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[381],_mm256_xor_si256(c2[1593],_mm256_xor_si256(c2[1493],_mm256_xor_si256(c2[1594],_mm256_xor_si256(c2[1494],_mm256_xor_si256(c2[590],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[591],_mm256_xor_si256(c2[2894],_mm256_xor_si256(c2[2890],c2[1592])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[80]=simde_mm256_xor_si256(c2[1503],simde_mm256_xor_si256(c2[1403],simde_mm256_xor_si256(c2[1504],simde_mm256_xor_si256(c2[1404],simde_mm256_xor_si256(c2[2302],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[2303],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[1413],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[1414],simde_mm256_xor_si256(c2[1211],simde_mm256_xor_si256(c2[1212],simde_mm256_xor_si256(c2[2014],simde_mm256_xor_si256(c2[2010],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[1523],simde_mm256_xor_si256(c2[1423],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[2923],simde_mm256_xor_si256(c2[2924],simde_mm256_xor_si256(c2[1533],simde_mm256_xor_si256(c2[1433],simde_mm256_xor_si256(c2[1534],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[1032],simde_mm256_xor_si256(c2[1133],simde_mm256_xor_si256(c2[1033],simde_mm256_xor_si256(c2[1731],simde_mm256_xor_si256(c2[1732],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[1453],simde_mm256_xor_si256(c2[1554],simde_mm256_xor_si256(c2[1454],simde_mm256_xor_si256(c2[53],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[1563],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[1564],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[662],simde_mm256_xor_si256(c2[2663],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[1473],simde_mm256_xor_si256(c2[1574],simde_mm256_xor_si256(c2[1474],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[771],simde_mm256_xor_si256(c2[1483],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[2681],simde_mm256_xor_si256(c2[2682],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[381],simde_mm256_xor_si256(c2[1593],simde_mm256_xor_si256(c2[1493],simde_mm256_xor_si256(c2[1594],simde_mm256_xor_si256(c2[1494],simde_mm256_xor_si256(c2[590],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[591],simde_mm256_xor_si256(c2[2894],simde_mm256_xor_si256(c2[2890],c2[1592])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[85]=_mm256_xor_si256(c2[1303],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[1700],_mm256_xor_si256(c2[2102],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[2604],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[501],_mm256_xor_si256(c2[1313],_mm256_xor_si256(c2[1213],_mm256_xor_si256(c2[1810],_mm256_xor_si256(c2[1710],_mm256_xor_si256(c2[1011],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[1814],_mm256_xor_si256(c2[2311],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[1323],_mm256_xor_si256(c2[1223],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[1720],_mm256_xor_si256(c2[2723],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[1233],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[1730],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[1334],_mm256_xor_si256(c2[1531],_mm256_xor_si256(c2[2033],_mm256_xor_si256(c2[1243],_mm256_xor_si256(c2[1840],_mm256_xor_si256(c2[1740],_mm256_xor_si256(c2[2143],_mm256_xor_si256(c2[2640],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[1253],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[1750],_mm256_xor_si256(c2[3052],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[1363],_mm256_xor_si256(c2[1263],_mm256_xor_si256(c2[1860],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[461],_mm256_xor_si256(c2[1063],_mm256_xor_si256(c2[963],_mm256_xor_si256(c2[2463],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[1273],_mm256_xor_si256(c2[1870],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[570],_mm256_xor_si256(c2[1072],_mm256_xor_si256(c2[1283],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[1780],_mm256_xor_si256(c2[2481],_mm256_xor_si256(c2[2983],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[682],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[1293],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[1790],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[2694],c2[3191])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[85]=simde_mm256_xor_si256(c2[1303],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[1700],simde_mm256_xor_si256(c2[2102],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[2604],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[501],simde_mm256_xor_si256(c2[1313],simde_mm256_xor_si256(c2[1213],simde_mm256_xor_si256(c2[1810],simde_mm256_xor_si256(c2[1710],simde_mm256_xor_si256(c2[1011],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[1814],simde_mm256_xor_si256(c2[2311],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[1323],simde_mm256_xor_si256(c2[1223],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[1720],simde_mm256_xor_si256(c2[2723],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[1233],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[1730],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[1334],simde_mm256_xor_si256(c2[1531],simde_mm256_xor_si256(c2[2033],simde_mm256_xor_si256(c2[1243],simde_mm256_xor_si256(c2[1840],simde_mm256_xor_si256(c2[1740],simde_mm256_xor_si256(c2[2143],simde_mm256_xor_si256(c2[2640],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[1253],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[1750],simde_mm256_xor_si256(c2[3052],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[1363],simde_mm256_xor_si256(c2[1263],simde_mm256_xor_si256(c2[1860],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[461],simde_mm256_xor_si256(c2[1063],simde_mm256_xor_si256(c2[963],simde_mm256_xor_si256(c2[2463],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[1273],simde_mm256_xor_si256(c2[1870],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[570],simde_mm256_xor_si256(c2[1072],simde_mm256_xor_si256(c2[1283],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[1780],simde_mm256_xor_si256(c2[2481],simde_mm256_xor_si256(c2[2983],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[682],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[1293],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[1790],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[2694],c2[3191])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[90]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1860],c2[2272]));
+     d2[90]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1860],c2[2272]));
 
 //row: 19
-     d2[95]=_mm256_xor_si256(c2[2504],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2514],_mm256_xor_si256(c2[2312],_mm256_xor_si256(c2[3110],_mm256_xor_si256(c2[3014],_mm256_xor_si256(c2[2524],_mm256_xor_si256(c2[820],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[2133],_mm256_xor_si256(c2[2832],_mm256_xor_si256(c2[2544],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[3142],_mm256_xor_si256(c2[2554],_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[2564],_mm256_xor_si256(c2[1762],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[2574],_mm256_xor_si256(c2[1871],_mm256_xor_si256(c2[2584],_mm256_xor_si256(c2[583],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[2594],_mm256_xor_si256(c2[1691],c2[791]))))))))))))))))))))))))))));
+     d2[95]=simde_mm256_xor_si256(c2[2504],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2514],simde_mm256_xor_si256(c2[2312],simde_mm256_xor_si256(c2[3110],simde_mm256_xor_si256(c2[3014],simde_mm256_xor_si256(c2[2524],simde_mm256_xor_si256(c2[820],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[2133],simde_mm256_xor_si256(c2[2832],simde_mm256_xor_si256(c2[2544],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[3142],simde_mm256_xor_si256(c2[2554],simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[2564],simde_mm256_xor_si256(c2[1762],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[2574],simde_mm256_xor_si256(c2[1871],simde_mm256_xor_si256(c2[2584],simde_mm256_xor_si256(c2[583],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[2594],simde_mm256_xor_si256(c2[1691],c2[791]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[100]=_mm256_xor_si256(c2[803],_mm256_xor_si256(c2[703],_mm256_xor_si256(c2[1602],_mm256_xor_si256(c2[2703],_mm256_xor_si256(c2[813],_mm256_xor_si256(c2[713],_mm256_xor_si256(c2[511],_mm256_xor_si256(c2[1314],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[823],_mm256_xor_si256(c2[723],_mm256_xor_si256(c2[2223],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[733],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[1031],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[1643],_mm256_xor_si256(c2[1341],_mm256_xor_si256(c2[2044],_mm256_xor_si256(c2[753],_mm256_xor_si256(c2[2552],_mm256_xor_si256(c2[863],_mm256_xor_si256(c2[763],_mm256_xor_si256(c2[3160],_mm256_xor_si256(c2[1963],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[783],_mm256_xor_si256(c2[1981],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[893],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[3094],c2[2194]))))))))))))))))))))))))))))))))));
+     d2[100]=simde_mm256_xor_si256(c2[803],simde_mm256_xor_si256(c2[703],simde_mm256_xor_si256(c2[1602],simde_mm256_xor_si256(c2[2703],simde_mm256_xor_si256(c2[813],simde_mm256_xor_si256(c2[713],simde_mm256_xor_si256(c2[511],simde_mm256_xor_si256(c2[1314],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[823],simde_mm256_xor_si256(c2[723],simde_mm256_xor_si256(c2[2223],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[733],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[1031],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[1643],simde_mm256_xor_si256(c2[1341],simde_mm256_xor_si256(c2[2044],simde_mm256_xor_si256(c2[753],simde_mm256_xor_si256(c2[2552],simde_mm256_xor_si256(c2[863],simde_mm256_xor_si256(c2[763],simde_mm256_xor_si256(c2[3160],simde_mm256_xor_si256(c2[1963],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[783],simde_mm256_xor_si256(c2[1981],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[893],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[3094],c2[2194]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[105]=_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[2001],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[11],_mm256_xor_si256(c2[3013],_mm256_xor_si256(c2[712],_mm256_xor_si256(c2[612],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[1621],_mm256_xor_si256(c2[1521],_mm256_xor_si256(c2[31],_mm256_xor_si256(c2[2834],_mm256_xor_si256(c2[334],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[941],_mm256_xor_si256(c2[744],_mm256_xor_si256(c2[644],_mm256_xor_si256(c2[51],_mm256_xor_si256(c2[1950],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[2463],_mm256_xor_si256(c2[1361],_mm256_xor_si256(c2[1261],_mm256_xor_si256(c2[71],_mm256_xor_si256(c2[2672],_mm256_xor_si256(c2[2572],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[1284],_mm256_xor_si256(c2[2282],_mm256_xor_si256(c2[2182],_mm256_xor_si256(c2[1780],_mm256_xor_si256(c2[91],_mm256_xor_si256(c2[2392],_mm256_xor_si256(c2[1592],c2[1492]))))))))))))))))))))))))))))))))))));
+     d2[105]=simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[2001],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[11],simde_mm256_xor_si256(c2[3013],simde_mm256_xor_si256(c2[712],simde_mm256_xor_si256(c2[612],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[1621],simde_mm256_xor_si256(c2[1521],simde_mm256_xor_si256(c2[31],simde_mm256_xor_si256(c2[2834],simde_mm256_xor_si256(c2[334],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[941],simde_mm256_xor_si256(c2[744],simde_mm256_xor_si256(c2[644],simde_mm256_xor_si256(c2[51],simde_mm256_xor_si256(c2[1950],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[2463],simde_mm256_xor_si256(c2[1361],simde_mm256_xor_si256(c2[1261],simde_mm256_xor_si256(c2[71],simde_mm256_xor_si256(c2[2672],simde_mm256_xor_si256(c2[2572],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[1284],simde_mm256_xor_si256(c2[2282],simde_mm256_xor_si256(c2[2182],simde_mm256_xor_si256(c2[1780],simde_mm256_xor_si256(c2[91],simde_mm256_xor_si256(c2[2392],simde_mm256_xor_si256(c2[1592],c2[1492]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[110]=_mm256_xor_si256(c2[10],c2[420]);
+     d2[110]=simde_mm256_xor_si256(c2[10],c2[420]);
 
 //row: 23
-     d2[115]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1132],c2[3054]));
+     d2[115]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1132],c2[3054]));
 
 //row: 24
-     d2[120]=_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[522],c2[2392]));
+     d2[120]=simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[522],c2[2392]));
 
 //row: 25
-     d2[125]=_mm256_xor_si256(c2[0],c2[152]);
+     d2[125]=simde_mm256_xor_si256(c2[0],c2[152]);
 
 //row: 26
-     d2[130]=_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[2204],_mm256_xor_si256(c2[2104],_mm256_xor_si256(c2[1504],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[2600],_mm256_xor_si256(c2[1310],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[610],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[413],_mm256_xor_si256(c2[1811],_mm256_xor_si256(c2[1311],_mm256_xor_si256(c2[1211],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[1220],_mm256_xor_si256(c2[620],_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[2220],_mm256_xor_si256(c2[2120],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[1230],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[934],_mm256_xor_si256(c2[834],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[1533],_mm256_xor_si256(c2[933],_mm256_xor_si256(c2[1340],_mm256_xor_si256(c2[1240],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[1540],_mm256_xor_si256(c2[1843],_mm256_xor_si256(c2[1343],_mm256_xor_si256(c2[1243],_mm256_xor_si256(c2[1350],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[3054],_mm256_xor_si256(c2[2554],_mm256_xor_si256(c2[2454],_mm256_xor_si256(c2[1360],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[3062],_mm256_xor_si256(c2[2460],_mm256_xor_si256(c2[1960],_mm256_xor_si256(c2[1860],_mm256_xor_si256(c2[1370],_mm256_xor_si256(c2[1270],_mm256_xor_si256(c2[670],_mm256_xor_si256(c2[572],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[473],_mm256_xor_si256(c2[1380],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[680],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[1883],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[2881],_mm256_xor_si256(c2[2781],_mm256_xor_si256(c2[1390],_mm256_xor_si256(c2[1290],_mm256_xor_si256(c2[690],_mm256_xor_si256(c2[492],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[2991],_mm256_xor_si256(c2[2691],_mm256_xor_si256(c2[2191],c2[2091])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[130]=simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[2204],simde_mm256_xor_si256(c2[2104],simde_mm256_xor_si256(c2[1504],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[2600],simde_mm256_xor_si256(c2[1310],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[610],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[413],simde_mm256_xor_si256(c2[1811],simde_mm256_xor_si256(c2[1311],simde_mm256_xor_si256(c2[1211],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[1220],simde_mm256_xor_si256(c2[620],simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[2220],simde_mm256_xor_si256(c2[2120],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[1230],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[934],simde_mm256_xor_si256(c2[834],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[1533],simde_mm256_xor_si256(c2[933],simde_mm256_xor_si256(c2[1340],simde_mm256_xor_si256(c2[1240],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[1540],simde_mm256_xor_si256(c2[1843],simde_mm256_xor_si256(c2[1343],simde_mm256_xor_si256(c2[1243],simde_mm256_xor_si256(c2[1350],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[3054],simde_mm256_xor_si256(c2[2554],simde_mm256_xor_si256(c2[2454],simde_mm256_xor_si256(c2[1360],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[3062],simde_mm256_xor_si256(c2[2460],simde_mm256_xor_si256(c2[1960],simde_mm256_xor_si256(c2[1860],simde_mm256_xor_si256(c2[1370],simde_mm256_xor_si256(c2[1270],simde_mm256_xor_si256(c2[670],simde_mm256_xor_si256(c2[572],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[473],simde_mm256_xor_si256(c2[1380],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[680],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[1883],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[2881],simde_mm256_xor_si256(c2[2781],simde_mm256_xor_si256(c2[1390],simde_mm256_xor_si256(c2[1290],simde_mm256_xor_si256(c2[690],simde_mm256_xor_si256(c2[492],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[2991],simde_mm256_xor_si256(c2[2691],simde_mm256_xor_si256(c2[2191],c2[2091])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[135]=_mm256_xor_si256(c2[0],c2[61]);
+     d2[135]=simde_mm256_xor_si256(c2[0],c2[61]);
 
 //row: 28
-     d2[140]=_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[3023],c2[1453]));
+     d2[140]=simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[3023],c2[1453]));
 
 //row: 29
-     d2[145]=_mm256_xor_si256(c2[0],c2[2644]);
+     d2[145]=simde_mm256_xor_si256(c2[0],c2[2644]);
 
 //row: 30
-     d2[150]=_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[351],_mm256_xor_si256(c2[1971],c2[694])));
+     d2[150]=simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[351],simde_mm256_xor_si256(c2[1971],c2[694])));
 
 //row: 31
-     d2[155]=_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[2804],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[1713],_mm256_xor_si256(c2[2611],_mm256_xor_si256(c2[2511],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[321],_mm256_xor_si256(c2[221],_mm256_xor_si256(c2[1930],_mm256_xor_si256(c2[1534],_mm256_xor_si256(c2[2233],_mm256_xor_si256(c2[1940],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[2643],_mm256_xor_si256(c2[2543],_mm256_xor_si256(c2[1950],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[550],_mm256_xor_si256(c2[1960],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[3160],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[1372],_mm256_xor_si256(c2[1272],_mm256_xor_si256(c2[1980],_mm256_xor_si256(c2[3183],_mm256_xor_si256(c2[982],_mm256_xor_si256(c2[882],_mm256_xor_si256(c2[1990],_mm256_xor_si256(c2[1092],_mm256_xor_si256(c2[292],c2[192])))))))))))))))))))))))))))))))))));
+     d2[155]=simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[2804],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[1713],simde_mm256_xor_si256(c2[2611],simde_mm256_xor_si256(c2[2511],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[321],simde_mm256_xor_si256(c2[221],simde_mm256_xor_si256(c2[1930],simde_mm256_xor_si256(c2[1534],simde_mm256_xor_si256(c2[2233],simde_mm256_xor_si256(c2[1940],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[2643],simde_mm256_xor_si256(c2[2543],simde_mm256_xor_si256(c2[1950],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[550],simde_mm256_xor_si256(c2[1960],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[3160],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[1372],simde_mm256_xor_si256(c2[1272],simde_mm256_xor_si256(c2[1980],simde_mm256_xor_si256(c2[3183],simde_mm256_xor_si256(c2[982],simde_mm256_xor_si256(c2[882],simde_mm256_xor_si256(c2[1990],simde_mm256_xor_si256(c2[1092],simde_mm256_xor_si256(c2[292],c2[192])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[160]=_mm256_xor_si256(c2[2603],_mm256_xor_si256(c2[2503],_mm256_xor_si256(c2[303],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2613],_mm256_xor_si256(c2[2513],_mm256_xor_si256(c2[2311],_mm256_xor_si256(c2[3114],_mm256_xor_si256(c2[2623],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[824],_mm256_xor_si256(c2[2633],_mm256_xor_si256(c2[2533],_mm256_xor_si256(c2[2232],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[2643],_mm256_xor_si256(c2[2543],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[2653],_mm256_xor_si256(c2[2553],_mm256_xor_si256(c2[1153],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[2663],_mm256_xor_si256(c2[2563],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[1761],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[2673],_mm256_xor_si256(c2[2573],_mm256_xor_si256(c2[1870],_mm256_xor_si256(c2[2683],_mm256_xor_si256(c2[2583],_mm256_xor_si256(c2[582],_mm256_xor_si256(c2[1480],_mm256_xor_si256(c2[2693],_mm256_xor_si256(c2[2593],_mm256_xor_si256(c2[1790],_mm256_xor_si256(c2[1690],c2[790]))))))))))))))))))))))))))))))))))))))))));
+     d2[160]=simde_mm256_xor_si256(c2[2603],simde_mm256_xor_si256(c2[2503],simde_mm256_xor_si256(c2[303],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2613],simde_mm256_xor_si256(c2[2513],simde_mm256_xor_si256(c2[2311],simde_mm256_xor_si256(c2[3114],simde_mm256_xor_si256(c2[2623],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[824],simde_mm256_xor_si256(c2[2633],simde_mm256_xor_si256(c2[2533],simde_mm256_xor_si256(c2[2232],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[2643],simde_mm256_xor_si256(c2[2543],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[2653],simde_mm256_xor_si256(c2[2553],simde_mm256_xor_si256(c2[1153],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[2663],simde_mm256_xor_si256(c2[2563],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[1761],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[2673],simde_mm256_xor_si256(c2[2573],simde_mm256_xor_si256(c2[1870],simde_mm256_xor_si256(c2[2683],simde_mm256_xor_si256(c2[2583],simde_mm256_xor_si256(c2[582],simde_mm256_xor_si256(c2[1480],simde_mm256_xor_si256(c2[2693],simde_mm256_xor_si256(c2[2593],simde_mm256_xor_si256(c2[1790],simde_mm256_xor_si256(c2[1690],c2[790]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[165]=_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[2104],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[1811],_mm256_xor_si256(c2[1220],_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[1230],_mm256_xor_si256(c2[834],_mm256_xor_si256(c2[1533],_mm256_xor_si256(c2[1240],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[1843],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[3054],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[2460],_mm256_xor_si256(c2[1270],_mm256_xor_si256(c2[572],_mm256_xor_si256(c2[2472],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[1290],_mm256_xor_si256(c2[392],c2[2691]))))))))))))))))))))))))))));
+     d2[165]=simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[2104],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[1811],simde_mm256_xor_si256(c2[1220],simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[1230],simde_mm256_xor_si256(c2[834],simde_mm256_xor_si256(c2[1533],simde_mm256_xor_si256(c2[1240],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[1843],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[3054],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[2460],simde_mm256_xor_si256(c2[1270],simde_mm256_xor_si256(c2[572],simde_mm256_xor_si256(c2[2472],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[1290],simde_mm256_xor_si256(c2[392],c2[2691]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[170]=_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[2804],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[103],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[1810],_mm256_xor_si256(c2[1312],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[1110],_mm256_xor_si256(c2[2411],_mm256_xor_si256(c2[2013],_mm256_xor_si256(c2[1913],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[2922],_mm256_xor_si256(c2[2822],_mm256_xor_si256(c2[1930],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[1534],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[931],_mm256_xor_si256(c2[2133],_mm256_xor_si256(c2[1630],_mm256_xor_si256(c2[1940],_mm256_xor_si256(c2[1840],_mm256_xor_si256(c2[1342],_mm256_xor_si256(c2[2740],_mm256_xor_si256(c2[2242],_mm256_xor_si256(c2[2443],_mm256_xor_si256(c2[2040],_mm256_xor_si256(c2[1940],_mm256_xor_si256(c2[1950],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[1352],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[3151],_mm256_xor_si256(c2[1960],_mm256_xor_si256(c2[1860],_mm256_xor_si256(c2[1362],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[1063],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[3060],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[1870],_mm256_xor_si256(c2[1372],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[674],_mm256_xor_si256(c2[1980],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[1382],_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[1990],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[1092],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[494],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[2893],c2[2793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[170]=simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[2804],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[103],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[1810],simde_mm256_xor_si256(c2[1312],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[1110],simde_mm256_xor_si256(c2[2411],simde_mm256_xor_si256(c2[2013],simde_mm256_xor_si256(c2[1913],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[2922],simde_mm256_xor_si256(c2[2822],simde_mm256_xor_si256(c2[1930],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[1534],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[931],simde_mm256_xor_si256(c2[2133],simde_mm256_xor_si256(c2[1630],simde_mm256_xor_si256(c2[1940],simde_mm256_xor_si256(c2[1840],simde_mm256_xor_si256(c2[1342],simde_mm256_xor_si256(c2[2740],simde_mm256_xor_si256(c2[2242],simde_mm256_xor_si256(c2[2443],simde_mm256_xor_si256(c2[2040],simde_mm256_xor_si256(c2[1940],simde_mm256_xor_si256(c2[1950],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[1352],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[3151],simde_mm256_xor_si256(c2[1960],simde_mm256_xor_si256(c2[1860],simde_mm256_xor_si256(c2[1362],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[1063],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[3060],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[1870],simde_mm256_xor_si256(c2[1372],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[674],simde_mm256_xor_si256(c2[1980],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[1382],simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[1990],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[1092],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[494],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[2893],c2[2793]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[175]=_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[500],_mm256_xor_si256(c2[1404],_mm256_xor_si256(c2[2500],_mm256_xor_si256(c2[610],_mm256_xor_si256(c2[510],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[1111],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[620],_mm256_xor_si256(c2[520],_mm256_xor_si256(c2[2020],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[1143],_mm256_xor_si256(c2[550],_mm256_xor_si256(c2[2354],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[2962],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[570],_mm256_xor_si256(c2[3071],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[2681],_mm256_xor_si256(c2[690],_mm256_xor_si256(c2[590],_mm256_xor_si256(c2[2891],c2[1991]))))))))))))))))))))))))))))))))));
+     d2[175]=simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[500],simde_mm256_xor_si256(c2[1404],simde_mm256_xor_si256(c2[2500],simde_mm256_xor_si256(c2[610],simde_mm256_xor_si256(c2[510],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[1111],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[620],simde_mm256_xor_si256(c2[520],simde_mm256_xor_si256(c2[2020],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[1143],simde_mm256_xor_si256(c2[550],simde_mm256_xor_si256(c2[2354],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[2962],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[570],simde_mm256_xor_si256(c2[3071],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[2681],simde_mm256_xor_si256(c2[690],simde_mm256_xor_si256(c2[590],simde_mm256_xor_si256(c2[2891],c2[1991]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[180]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3121],c2[1872]));
+     d2[180]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3121],c2[1872]));
 
 //row: 37
-     d2[185]=_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[1504],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[3114],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[2912],_mm256_xor_si256(c2[1312],_mm256_xor_si256(c2[511],_mm256_xor_si256(c2[2210],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[1420],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[3134],_mm256_xor_si256(c2[1534],_mm256_xor_si256(c2[2733],_mm256_xor_si256(c2[1133],_mm256_xor_si256(c2[233],_mm256_xor_si256(c2[1832],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[840],_mm256_xor_si256(c2[2444],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[2242],_mm256_xor_si256(c2[2142],_mm256_xor_si256(c2[3154],_mm256_xor_si256(c2[1554],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[154],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[1564],_mm256_xor_si256(c2[2362],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[2764],_mm256_xor_si256(c2[3174],_mm256_xor_si256(c2[1574],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[871],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[2782],_mm256_xor_si256(c2[2081],_mm256_xor_si256(c2[581],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[1594],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[1391],_mm256_xor_si256(c2[3090],c2[2990])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[185]=simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[1504],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[3114],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[2912],simde_mm256_xor_si256(c2[1312],simde_mm256_xor_si256(c2[511],simde_mm256_xor_si256(c2[2210],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[1420],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[3134],simde_mm256_xor_si256(c2[1534],simde_mm256_xor_si256(c2[2733],simde_mm256_xor_si256(c2[1133],simde_mm256_xor_si256(c2[233],simde_mm256_xor_si256(c2[1832],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[840],simde_mm256_xor_si256(c2[2444],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[2242],simde_mm256_xor_si256(c2[2142],simde_mm256_xor_si256(c2[3154],simde_mm256_xor_si256(c2[1554],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[154],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[1564],simde_mm256_xor_si256(c2[2362],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[2764],simde_mm256_xor_si256(c2[3174],simde_mm256_xor_si256(c2[1574],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[871],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[2782],simde_mm256_xor_si256(c2[2081],simde_mm256_xor_si256(c2[581],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[1594],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[1391],simde_mm256_xor_si256(c2[3090],c2[2990])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[190]=_mm256_xor_si256(c2[1900],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[1810],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[2411],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[1930],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[2133],_mm256_xor_si256(c2[1840],_mm256_xor_si256(c2[2740],_mm256_xor_si256(c2[2443],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[2952],_mm256_xor_si256(c2[1960],_mm256_xor_si256(c2[1860],_mm256_xor_si256(c2[1063],_mm256_xor_si256(c2[3060],_mm256_xor_si256(c2[1870],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[1990],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[992],c2[92]))))))))))))))))))))))))))))))))));
+     d2[190]=simde_mm256_xor_si256(c2[1900],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[1810],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[2411],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[1930],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[2133],simde_mm256_xor_si256(c2[1840],simde_mm256_xor_si256(c2[2740],simde_mm256_xor_si256(c2[2443],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[2952],simde_mm256_xor_si256(c2[1960],simde_mm256_xor_si256(c2[1860],simde_mm256_xor_si256(c2[1063],simde_mm256_xor_si256(c2[3060],simde_mm256_xor_si256(c2[1870],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[1990],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[992],c2[92]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[195]=_mm256_xor_si256(c2[1004],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[1903],_mm256_xor_si256(c2[1803],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1014],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[712],_mm256_xor_si256(c2[1510],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[2424],_mm256_xor_si256(c2[1034],_mm256_xor_si256(c2[934],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[1044],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[1844],_mm256_xor_si256(c2[1542],_mm256_xor_si256(c2[1054],_mm256_xor_si256(c2[954],_mm256_xor_si256(c2[2753],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[964],_mm256_xor_si256(c2[262],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[2164],_mm256_xor_si256(c2[1074],_mm256_xor_si256(c2[974],_mm256_xor_si256(c2[271],_mm256_xor_si256(c2[2470],_mm256_xor_si256(c2[1084],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[2182],_mm256_xor_si256(c2[3080],_mm256_xor_si256(c2[1094],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[191],_mm256_xor_si256(c2[91],c2[2390]))))))))))))))))))))))))))))))))))))))))));
+     d2[195]=simde_mm256_xor_si256(c2[1004],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[1903],simde_mm256_xor_si256(c2[1803],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1014],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[712],simde_mm256_xor_si256(c2[1510],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[2424],simde_mm256_xor_si256(c2[1034],simde_mm256_xor_si256(c2[934],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[1044],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[1844],simde_mm256_xor_si256(c2[1542],simde_mm256_xor_si256(c2[1054],simde_mm256_xor_si256(c2[954],simde_mm256_xor_si256(c2[2753],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[964],simde_mm256_xor_si256(c2[262],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[2164],simde_mm256_xor_si256(c2[1074],simde_mm256_xor_si256(c2[974],simde_mm256_xor_si256(c2[271],simde_mm256_xor_si256(c2[2470],simde_mm256_xor_si256(c2[1084],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[2182],simde_mm256_xor_si256(c2[3080],simde_mm256_xor_si256(c2[1094],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[191],simde_mm256_xor_si256(c2[91],c2[2390]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[200]=_mm256_xor_si256(c2[301],_mm256_xor_si256(c2[1404],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[2303],_mm256_xor_si256(c2[2301],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[311],_mm256_xor_si256(c2[1414],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[1212],_mm256_xor_si256(c2[912],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[2010],_mm256_xor_si256(c2[321],_mm256_xor_si256(c2[1424],_mm256_xor_si256(c2[1821],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[2924],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[331],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[3134],_mm256_xor_si256(c2[1033],_mm256_xor_si256(c2[634],_mm256_xor_si256(c2[1732],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[1241],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[2142],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[351],_mm256_xor_si256(c2[1454],_mm256_xor_si256(c2[2150],_mm256_xor_si256(c2[154],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[2763],_mm256_xor_si256(c2[662],_mm256_xor_si256(c2[1561],_mm256_xor_si256(c2[2764],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[371],_mm256_xor_si256(c2[1474],_mm256_xor_si256(c2[2872],_mm256_xor_si256(c2[871],_mm256_xor_si256(c2[771],_mm256_xor_si256(c2[381],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[2682],_mm256_xor_si256(c2[2482],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[381],_mm256_xor_si256(c2[391],_mm256_xor_si256(c2[1494],_mm256_xor_si256(c2[2692],_mm256_xor_si256(c2[591],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[2990],c2[2890]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[200]=simde_mm256_xor_si256(c2[301],simde_mm256_xor_si256(c2[1404],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[2303],simde_mm256_xor_si256(c2[2301],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[311],simde_mm256_xor_si256(c2[1414],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[1212],simde_mm256_xor_si256(c2[912],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[2010],simde_mm256_xor_si256(c2[321],simde_mm256_xor_si256(c2[1424],simde_mm256_xor_si256(c2[1821],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[2924],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[331],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[3134],simde_mm256_xor_si256(c2[1033],simde_mm256_xor_si256(c2[634],simde_mm256_xor_si256(c2[1732],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[1241],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[2142],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[351],simde_mm256_xor_si256(c2[1454],simde_mm256_xor_si256(c2[2150],simde_mm256_xor_si256(c2[154],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[2763],simde_mm256_xor_si256(c2[662],simde_mm256_xor_si256(c2[1561],simde_mm256_xor_si256(c2[2764],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[371],simde_mm256_xor_si256(c2[1474],simde_mm256_xor_si256(c2[2872],simde_mm256_xor_si256(c2[871],simde_mm256_xor_si256(c2[771],simde_mm256_xor_si256(c2[381],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[2682],simde_mm256_xor_si256(c2[2482],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[381],simde_mm256_xor_si256(c2[391],simde_mm256_xor_si256(c2[1494],simde_mm256_xor_si256(c2[2692],simde_mm256_xor_si256(c2[591],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[2990],c2[2890]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[205]=_mm256_xor_si256(c2[2301],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[3100],_mm256_xor_si256(c2[1002],_mm256_xor_si256(c2[2311],_mm256_xor_si256(c2[2211],_mm256_xor_si256(c2[2014],_mm256_xor_si256(c2[2812],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[2321],_mm256_xor_si256(c2[2221],_mm256_xor_si256(c2[522],_mm256_xor_si256(c2[2331],_mm256_xor_si256(c2[2231],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[2241],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[2251],_mm256_xor_si256(c2[851],_mm256_xor_si256(c2[250],_mm256_xor_si256(c2[2361],_mm256_xor_si256(c2[2261],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[262],_mm256_xor_si256(c2[2271],_mm256_xor_si256(c2[1573],_mm256_xor_si256(c2[2281],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[2391],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[1393],c2[493]))))))))))))))))))))))))))))))))));
+     d2[205]=simde_mm256_xor_si256(c2[2301],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[3100],simde_mm256_xor_si256(c2[1002],simde_mm256_xor_si256(c2[2311],simde_mm256_xor_si256(c2[2211],simde_mm256_xor_si256(c2[2014],simde_mm256_xor_si256(c2[2812],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[2321],simde_mm256_xor_si256(c2[2221],simde_mm256_xor_si256(c2[522],simde_mm256_xor_si256(c2[2331],simde_mm256_xor_si256(c2[2231],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[2241],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[2251],simde_mm256_xor_si256(c2[851],simde_mm256_xor_si256(c2[250],simde_mm256_xor_si256(c2[2361],simde_mm256_xor_si256(c2[2261],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[262],simde_mm256_xor_si256(c2[2271],simde_mm256_xor_si256(c2[1573],simde_mm256_xor_si256(c2[2281],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[2391],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[1393],c2[493]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc192_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc192_byte.c
index 9cb2c47727b4982a1a59922facc8dff4b31844da..bc90a35a860479a277960fc9ec1bd3e06959220a 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc192_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc192_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc192_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[1565],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[15],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[1709],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1971],_mm256_xor_si256(c2[3302],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[794],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[3195],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[2607],_mm256_xor_si256(c2[2981],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[1433],_mm256_xor_si256(c2[348],c2[3113]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[1565],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[15],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[1709],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1971],simde_mm256_xor_si256(c2[3302],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[794],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[3195],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[2607],simde_mm256_xor_si256(c2[2981],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[1433],simde_mm256_xor_si256(c2[348],c2[3113]))))))))))))))))))))))))));
 
 //row: 1
-     d2[6]=_mm256_xor_si256(c2[1685],_mm256_xor_si256(c2[1565],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[15],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[1709],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1971],_mm256_xor_si256(c2[3302],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[794],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[3195],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[2607],_mm256_xor_si256(c2[2981],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[1433],_mm256_xor_si256(c2[348],c2[3113]))))))))))))))))))))))))))))))));
+     d2[6]=simde_mm256_xor_si256(c2[1685],simde_mm256_xor_si256(c2[1565],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[15],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[1709],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1971],simde_mm256_xor_si256(c2[3302],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[794],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[3195],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[2607],simde_mm256_xor_si256(c2[2981],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[1433],simde_mm256_xor_si256(c2[348],c2[3113]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[12]=_mm256_xor_si256(c2[1685],_mm256_xor_si256(c2[1565],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[15],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[1709],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[1969],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1971],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[3302],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[794],_mm256_xor_si256(c2[3792],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[3195],_mm256_xor_si256(c2[3329],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[2607],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[2981],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[1433],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[348],c2[3113]))))))))))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[1685],simde_mm256_xor_si256(c2[1565],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[15],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[1709],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[1969],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1971],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[3302],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[794],simde_mm256_xor_si256(c2[3792],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[3195],simde_mm256_xor_si256(c2[3329],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[2607],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[2981],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[1433],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[348],c2[3113]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[18]=_mm256_xor_si256(c2[1565],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[15],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[1709],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[1971],_mm256_xor_si256(c2[3302],_mm256_xor_si256(c2[1022],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[794],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[3315],_mm256_xor_si256(c2[3195],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[2727],_mm256_xor_si256(c2[2607],_mm256_xor_si256(c2[2981],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[3456],_mm256_xor_si256(c2[3336],_mm256_xor_si256(c2[1433],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[3233],c2[3113]))))))))))))))))))))))))))))))))));
+     d2[18]=simde_mm256_xor_si256(c2[1565],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[15],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[1709],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[1971],simde_mm256_xor_si256(c2[3302],simde_mm256_xor_si256(c2[1022],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[794],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[3315],simde_mm256_xor_si256(c2[3195],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[2727],simde_mm256_xor_si256(c2[2607],simde_mm256_xor_si256(c2[2981],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[3456],simde_mm256_xor_si256(c2[3336],simde_mm256_xor_si256(c2[1433],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[3233],c2[3113]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[24]=_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[2761],_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[3615],_mm256_xor_si256(c2[3495],_mm256_xor_si256(c2[1696],_mm256_xor_si256(c2[497],_mm256_xor_si256(c2[1214],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[3758],_mm256_xor_si256(c2[3638],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[2080],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[1252],_mm256_xor_si256(c2[1612],_mm256_xor_si256(c2[2943],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[555],_mm256_xor_si256(c2[435],_mm256_xor_si256(c2[3313],_mm256_xor_si256(c2[2836],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[2248],_mm256_xor_si256(c2[2616],_mm256_xor_si256(c2[1780],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[1188],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[3828],c2[2748]))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[2761],simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[3615],simde_mm256_xor_si256(c2[3495],simde_mm256_xor_si256(c2[1696],simde_mm256_xor_si256(c2[497],simde_mm256_xor_si256(c2[1214],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[3758],simde_mm256_xor_si256(c2[3638],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[2080],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[1252],simde_mm256_xor_si256(c2[1612],simde_mm256_xor_si256(c2[2943],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[555],simde_mm256_xor_si256(c2[435],simde_mm256_xor_si256(c2[3313],simde_mm256_xor_si256(c2[2836],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[2248],simde_mm256_xor_si256(c2[2616],simde_mm256_xor_si256(c2[1780],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[1188],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[3828],c2[2748]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[30]=_mm256_xor_si256(c2[3600],_mm256_xor_si256(c2[3480],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[2056],_mm256_xor_si256(c2[1936],_mm256_xor_si256(c2[137],_mm256_xor_si256(c2[2777],_mm256_xor_si256(c2[1453],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[2544],_mm256_xor_si256(c2[3624],_mm256_xor_si256(c2[2199],_mm256_xor_si256(c2[2079],_mm256_xor_si256(c2[2318],_mm256_xor_si256(c2[521],_mm256_xor_si256(c2[3770],_mm256_xor_si256(c2[3532],_mm256_xor_si256(c2[53],_mm256_xor_si256(c2[1384],_mm256_xor_si256(c2[2823],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[2835],_mm256_xor_si256(c2[2715],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[1277],_mm256_xor_si256(c2[1285],_mm256_xor_si256(c2[689],_mm256_xor_si256(c2[2006],_mm256_xor_si256(c2[1057],_mm256_xor_si256(c2[221],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[3468],_mm256_xor_si256(c2[3348],_mm256_xor_si256(c2[2269],c2[1189]))))))))))))))))))))))))))))))))))));
+     d2[30]=simde_mm256_xor_si256(c2[3600],simde_mm256_xor_si256(c2[3480],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[2056],simde_mm256_xor_si256(c2[1936],simde_mm256_xor_si256(c2[137],simde_mm256_xor_si256(c2[2777],simde_mm256_xor_si256(c2[1453],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[2544],simde_mm256_xor_si256(c2[3624],simde_mm256_xor_si256(c2[2199],simde_mm256_xor_si256(c2[2079],simde_mm256_xor_si256(c2[2318],simde_mm256_xor_si256(c2[521],simde_mm256_xor_si256(c2[3770],simde_mm256_xor_si256(c2[3532],simde_mm256_xor_si256(c2[53],simde_mm256_xor_si256(c2[1384],simde_mm256_xor_si256(c2[2823],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[2835],simde_mm256_xor_si256(c2[2715],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[1277],simde_mm256_xor_si256(c2[1285],simde_mm256_xor_si256(c2[689],simde_mm256_xor_si256(c2[2006],simde_mm256_xor_si256(c2[1057],simde_mm256_xor_si256(c2[221],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[3468],simde_mm256_xor_si256(c2[3348],simde_mm256_xor_si256(c2[2269],c2[1189]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[36]=_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[2160],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[3014],_mm256_xor_si256(c2[2894],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[3735],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[3508],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[3157],_mm256_xor_si256(c2[3037],_mm256_xor_si256(c2[3276],_mm256_xor_si256(c2[1479],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[651],_mm256_xor_si256(c2[1011],_mm256_xor_si256(c2[2342],_mm256_xor_si256(c2[3781],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[3793],_mm256_xor_si256(c2[3673],_mm256_xor_si256(c2[2712],_mm256_xor_si256(c2[2235],_mm256_xor_si256(c2[2249],_mm256_xor_si256(c2[1647],_mm256_xor_si256(c2[567],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[1179],_mm256_xor_si256(c2[2376],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[473],_mm256_xor_si256(c2[3233],_mm256_xor_si256(c2[2153],c2[2149]))))))))))))))))))))))))))))))))))));
+     d2[36]=simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[2160],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[3014],simde_mm256_xor_si256(c2[2894],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[3735],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[3508],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[3157],simde_mm256_xor_si256(c2[3037],simde_mm256_xor_si256(c2[3276],simde_mm256_xor_si256(c2[1479],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[651],simde_mm256_xor_si256(c2[1011],simde_mm256_xor_si256(c2[2342],simde_mm256_xor_si256(c2[3781],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[3793],simde_mm256_xor_si256(c2[3673],simde_mm256_xor_si256(c2[2712],simde_mm256_xor_si256(c2[2235],simde_mm256_xor_si256(c2[2249],simde_mm256_xor_si256(c2[1647],simde_mm256_xor_si256(c2[567],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[1179],simde_mm256_xor_si256(c2[2376],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[473],simde_mm256_xor_si256(c2[3233],simde_mm256_xor_si256(c2[2153],c2[2149]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[42]=_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[2282],_mm256_xor_si256(c2[3363],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[1085],_mm256_xor_si256(c2[3603],_mm256_xor_si256(c2[845],_mm256_xor_si256(c2[852],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[1813],_mm256_xor_si256(c2[2772],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[1573],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[2654],_mm256_xor_si256(c2[1934],_mm256_xor_si256(c2[1466],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[2427],_mm256_xor_si256(c2[2426],_mm256_xor_si256(c2[3627],_mm256_xor_si256(c2[3507],_mm256_xor_si256(c2[1001],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[1956],_mm256_xor_si256(c2[1120],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[3156],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[2572],_mm256_xor_si256(c2[3653],_mm256_xor_si256(c2[2328],_mm256_xor_si256(c2[3409],_mm256_xor_si256(c2[2688],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[3769],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[1261],_mm256_xor_si256(c2[1625],_mm256_xor_si256(c2[2820],_mm256_xor_si256(c2[2700],_mm256_xor_si256(c2[3185],_mm256_xor_si256(c2[1637],_mm256_xor_si256(c2[1517],_mm256_xor_si256(c2[2592],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[1637],_mm256_xor_si256(c2[73],_mm256_xor_si256(c2[1274],_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[87],_mm256_xor_si256(c2[1168],_mm256_xor_si256(c2[3324],_mm256_xor_si256(c2[686],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[3698],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[2856],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[1421],_mm256_xor_si256(c2[1301],_mm256_xor_si256(c2[2270],_mm256_xor_si256(c2[2150],_mm256_xor_si256(c2[3231],_mm256_xor_si256(c2[1071],_mm256_xor_si256(c2[2152],_mm256_xor_si256(c2[3830],_mm256_xor_si256(c2[1192],c2[1072]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[42]=simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[2282],simde_mm256_xor_si256(c2[3363],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[1085],simde_mm256_xor_si256(c2[3603],simde_mm256_xor_si256(c2[845],simde_mm256_xor_si256(c2[852],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[1813],simde_mm256_xor_si256(c2[2772],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[1573],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[2654],simde_mm256_xor_si256(c2[1934],simde_mm256_xor_si256(c2[1466],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[2427],simde_mm256_xor_si256(c2[2426],simde_mm256_xor_si256(c2[3627],simde_mm256_xor_si256(c2[3507],simde_mm256_xor_si256(c2[1001],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[1956],simde_mm256_xor_si256(c2[1120],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[3156],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[2572],simde_mm256_xor_si256(c2[3653],simde_mm256_xor_si256(c2[2328],simde_mm256_xor_si256(c2[3409],simde_mm256_xor_si256(c2[2688],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[3769],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[1261],simde_mm256_xor_si256(c2[1625],simde_mm256_xor_si256(c2[2820],simde_mm256_xor_si256(c2[2700],simde_mm256_xor_si256(c2[3185],simde_mm256_xor_si256(c2[1637],simde_mm256_xor_si256(c2[1517],simde_mm256_xor_si256(c2[2592],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[1637],simde_mm256_xor_si256(c2[73],simde_mm256_xor_si256(c2[1274],simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[87],simde_mm256_xor_si256(c2[1168],simde_mm256_xor_si256(c2[3324],simde_mm256_xor_si256(c2[686],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[3698],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[2856],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[1421],simde_mm256_xor_si256(c2[1301],simde_mm256_xor_si256(c2[2270],simde_mm256_xor_si256(c2[2150],simde_mm256_xor_si256(c2[3231],simde_mm256_xor_si256(c2[1071],simde_mm256_xor_si256(c2[2152],simde_mm256_xor_si256(c2[3830],simde_mm256_xor_si256(c2[1192],c2[1072]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[48]=_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[2285],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[1925],_mm256_xor_si256(c2[2643],_mm256_xor_si256(c2[3013],_mm256_xor_si256(c2[2893],_mm256_xor_si256(c2[1094],_mm256_xor_si256(c2[3734],_mm256_xor_si256(c2[734],_mm256_xor_si256(c2[3627],_mm256_xor_si256(c2[3507],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[3156],_mm256_xor_si256(c2[3036],_mm256_xor_si256(c2[3401],_mm256_xor_si256(c2[3281],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[1010],_mm256_xor_si256(c2[2461],_mm256_xor_si256(c2[2341],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[3792],_mm256_xor_si256(c2[3672],_mm256_xor_si256(c2[2837],_mm256_xor_si256(c2[2717],_mm256_xor_si256(c2[2234],_mm256_xor_si256(c2[2368],_mm256_xor_si256(c2[2248],_mm256_xor_si256(c2[1646],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[2020],_mm256_xor_si256(c2[1178],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[472],_mm256_xor_si256(c2[3352],_mm256_xor_si256(c2[3232],c2[2152]))))))))))))))))))))))))))))))))))))))))));
+     d2[48]=simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[2285],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[1925],simde_mm256_xor_si256(c2[2643],simde_mm256_xor_si256(c2[3013],simde_mm256_xor_si256(c2[2893],simde_mm256_xor_si256(c2[1094],simde_mm256_xor_si256(c2[3734],simde_mm256_xor_si256(c2[734],simde_mm256_xor_si256(c2[3627],simde_mm256_xor_si256(c2[3507],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[3156],simde_mm256_xor_si256(c2[3036],simde_mm256_xor_si256(c2[3401],simde_mm256_xor_si256(c2[3281],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[1010],simde_mm256_xor_si256(c2[2461],simde_mm256_xor_si256(c2[2341],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[3792],simde_mm256_xor_si256(c2[3672],simde_mm256_xor_si256(c2[2837],simde_mm256_xor_si256(c2[2717],simde_mm256_xor_si256(c2[2234],simde_mm256_xor_si256(c2[2368],simde_mm256_xor_si256(c2[2248],simde_mm256_xor_si256(c2[1646],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[2020],simde_mm256_xor_si256(c2[1178],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[472],simde_mm256_xor_si256(c2[3352],simde_mm256_xor_si256(c2[3232],c2[2152]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[54]=_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[2280],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[2040],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[3014],_mm256_xor_si256(c2[2294],_mm256_xor_si256(c2[2174],_mm256_xor_si256(c2[1215],_mm256_xor_si256(c2[375],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[3015],_mm256_xor_si256(c2[3372],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[2908],_mm256_xor_si256(c2[2788],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[3157],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[2317],_mm256_xor_si256(c2[3396],_mm256_xor_si256(c2[2556],_mm256_xor_si256(c2[1599],_mm256_xor_si256(c2[759],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[169],_mm256_xor_si256(c2[771],_mm256_xor_si256(c2[3770],_mm256_xor_si256(c2[1131],_mm256_xor_si256(c2[291],_mm256_xor_si256(c2[2462],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[3061],_mm256_xor_si256(c2[3793],_mm256_xor_si256(c2[3073],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[2832],_mm256_xor_si256(c2[1992],_mm256_xor_si256(c2[2355],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[2369],_mm256_xor_si256(c2[1529],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[927],_mm256_xor_si256(c2[2141],_mm256_xor_si256(c2[1301],_mm256_xor_si256(c2[1299],_mm256_xor_si256(c2[459],_mm256_xor_si256(c2[2496],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[3712],_mm256_xor_si256(c2[3592],_mm256_xor_si256(c2[3353],_mm256_xor_si256(c2[2513],_mm256_xor_si256(c2[2273],c2[1433])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[54]=simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[2280],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[2040],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[3014],simde_mm256_xor_si256(c2[2294],simde_mm256_xor_si256(c2[2174],simde_mm256_xor_si256(c2[1215],simde_mm256_xor_si256(c2[375],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[3015],simde_mm256_xor_si256(c2[3372],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[2908],simde_mm256_xor_si256(c2[2788],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[3157],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[2317],simde_mm256_xor_si256(c2[3396],simde_mm256_xor_si256(c2[2556],simde_mm256_xor_si256(c2[1599],simde_mm256_xor_si256(c2[759],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[169],simde_mm256_xor_si256(c2[771],simde_mm256_xor_si256(c2[3770],simde_mm256_xor_si256(c2[1131],simde_mm256_xor_si256(c2[291],simde_mm256_xor_si256(c2[2462],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[3061],simde_mm256_xor_si256(c2[3793],simde_mm256_xor_si256(c2[3073],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[2832],simde_mm256_xor_si256(c2[1992],simde_mm256_xor_si256(c2[2355],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[2369],simde_mm256_xor_si256(c2[1529],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[927],simde_mm256_xor_si256(c2[2141],simde_mm256_xor_si256(c2[1301],simde_mm256_xor_si256(c2[1299],simde_mm256_xor_si256(c2[459],simde_mm256_xor_si256(c2[2496],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[3712],simde_mm256_xor_si256(c2[3592],simde_mm256_xor_si256(c2[3353],simde_mm256_xor_si256(c2[2513],simde_mm256_xor_si256(c2[2273],c2[1433])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[60]=_mm256_xor_si256(c2[3241],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[2712],c2[2485])));
+     d2[60]=simde_mm256_xor_si256(c2[3241],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[2712],c2[2485])));
 
 //row: 11
-     d2[66]=_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[2405],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[3133],_mm256_xor_si256(c2[1334],_mm256_xor_si256(c2[255],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[3747],_mm256_xor_si256(c2[1108],_mm256_xor_si256(c2[988],_mm256_xor_si256(c2[3276],_mm256_xor_si256(c2[3521],_mm256_xor_si256(c2[1718],_mm256_xor_si256(c2[1128],_mm256_xor_si256(c2[890],_mm256_xor_si256(c2[1370],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[2581],_mm256_xor_si256(c2[301],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[73],_mm256_xor_si256(c2[2957],_mm256_xor_si256(c2[2594],_mm256_xor_si256(c2[2474],_mm256_xor_si256(c2[2488],_mm256_xor_si256(c2[2006],_mm256_xor_si256(c2[1886],_mm256_xor_si256(c2[3446],_mm256_xor_si256(c2[2260],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[2621],_mm256_xor_si256(c2[712],_mm256_xor_si256(c2[3472],_mm256_xor_si256(c2[2512],_mm256_xor_si256(c2[2392],c2[1793])))))))))))))))))))))))))))))))))))));
+     d2[66]=simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[2405],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[3133],simde_mm256_xor_si256(c2[1334],simde_mm256_xor_si256(c2[255],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[3747],simde_mm256_xor_si256(c2[1108],simde_mm256_xor_si256(c2[988],simde_mm256_xor_si256(c2[3276],simde_mm256_xor_si256(c2[3521],simde_mm256_xor_si256(c2[1718],simde_mm256_xor_si256(c2[1128],simde_mm256_xor_si256(c2[890],simde_mm256_xor_si256(c2[1370],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[2581],simde_mm256_xor_si256(c2[301],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[73],simde_mm256_xor_si256(c2[2957],simde_mm256_xor_si256(c2[2594],simde_mm256_xor_si256(c2[2474],simde_mm256_xor_si256(c2[2488],simde_mm256_xor_si256(c2[2006],simde_mm256_xor_si256(c2[1886],simde_mm256_xor_si256(c2[3446],simde_mm256_xor_si256(c2[2260],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[2621],simde_mm256_xor_si256(c2[712],simde_mm256_xor_si256(c2[3472],simde_mm256_xor_si256(c2[2512],simde_mm256_xor_si256(c2[2392],c2[1793])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[72]=_mm256_xor_si256(c2[3005],_mm256_xor_si256(c2[2885],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[1455],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[3375],_mm256_xor_si256(c2[2176],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[2069],_mm256_xor_si256(c2[1949],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[1598],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[1717],_mm256_xor_si256(c2[3759],_mm256_xor_si256(c2[3518],_mm256_xor_si256(c2[3169],_mm256_xor_si256(c2[2931],_mm256_xor_si256(c2[3291],_mm256_xor_si256(c2[783],_mm256_xor_si256(c2[2222],_mm256_xor_si256(c2[2234],_mm256_xor_si256(c2[2114],_mm256_xor_si256(c2[1153],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[684],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[456],_mm256_xor_si256(c2[3459],_mm256_xor_si256(c2[817],_mm256_xor_si256(c2[2873],_mm256_xor_si256(c2[2753],_mm256_xor_si256(c2[1668],c2[588]))))))))))))))))))))))))))))))))));
+     d2[72]=simde_mm256_xor_si256(c2[3005],simde_mm256_xor_si256(c2[2885],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[1455],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[3375],simde_mm256_xor_si256(c2[2176],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[2069],simde_mm256_xor_si256(c2[1949],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[1598],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[1717],simde_mm256_xor_si256(c2[3759],simde_mm256_xor_si256(c2[3518],simde_mm256_xor_si256(c2[3169],simde_mm256_xor_si256(c2[2931],simde_mm256_xor_si256(c2[3291],simde_mm256_xor_si256(c2[783],simde_mm256_xor_si256(c2[2222],simde_mm256_xor_si256(c2[2234],simde_mm256_xor_si256(c2[2114],simde_mm256_xor_si256(c2[1153],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[684],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[456],simde_mm256_xor_si256(c2[3459],simde_mm256_xor_si256(c2[817],simde_mm256_xor_si256(c2[2873],simde_mm256_xor_si256(c2[2753],simde_mm256_xor_si256(c2[1668],c2[588]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[78]=_mm256_xor_si256(c2[2284],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3605],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[734],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[1695],_mm256_xor_si256(c2[1575],_mm256_xor_si256(c2[3495],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[2548],_mm256_xor_si256(c2[2428],_mm256_xor_si256(c2[877],_mm256_xor_si256(c2[1116],_mm256_xor_si256(c2[3158],_mm256_xor_si256(c2[2568],_mm256_xor_si256(c2[2330],_mm256_xor_si256(c2[2810],_mm256_xor_si256(c2[2690],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[1741],_mm256_xor_si256(c2[1621],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[552],_mm256_xor_si256(c2[195],_mm256_xor_si256(c2[75],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[3446],_mm256_xor_si256(c2[3326],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[2858],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[2152],_mm256_xor_si256(c2[1073],_mm256_xor_si256(c2[113],c2[3832])))))))))))))))))))))))))))))))))))));
+     d2[78]=simde_mm256_xor_si256(c2[2284],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3605],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[734],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[1695],simde_mm256_xor_si256(c2[1575],simde_mm256_xor_si256(c2[3495],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[2548],simde_mm256_xor_si256(c2[2428],simde_mm256_xor_si256(c2[877],simde_mm256_xor_si256(c2[1116],simde_mm256_xor_si256(c2[3158],simde_mm256_xor_si256(c2[2568],simde_mm256_xor_si256(c2[2330],simde_mm256_xor_si256(c2[2810],simde_mm256_xor_si256(c2[2690],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[1741],simde_mm256_xor_si256(c2[1621],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[552],simde_mm256_xor_si256(c2[195],simde_mm256_xor_si256(c2[75],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[3446],simde_mm256_xor_si256(c2[3326],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[2858],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[2152],simde_mm256_xor_si256(c2[1073],simde_mm256_xor_si256(c2[113],c2[3832])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[84]=_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[1801],_mm256_xor_si256(c2[3485],_mm256_xor_si256(c2[1561],_mm256_xor_si256(c2[3245],_mm256_xor_si256(c2[2655],_mm256_xor_si256(c2[2535],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[2414],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[1215],_mm256_xor_si256(c2[2292],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[3149],_mm256_xor_si256(c2[988],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[2188],_mm256_xor_si256(c2[2068],_mm256_xor_si256(c2[2798],_mm256_xor_si256(c2[2678],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[2917],_mm256_xor_si256(c2[756],_mm256_xor_si256(c2[1120],_mm256_xor_si256(c2[2798],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[2208],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[2330],_mm256_xor_si256(c2[1983],_mm256_xor_si256(c2[3661],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[1381],_mm256_xor_si256(c2[1261],_mm256_xor_si256(c2[3434],_mm256_xor_si256(c2[3314],_mm256_xor_si256(c2[1153],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[1876],_mm256_xor_si256(c2[3674],_mm256_xor_si256(c2[3554],_mm256_xor_si256(c2[2715],_mm256_xor_si256(c2[1884],_mm256_xor_si256(c2[3568],_mm256_xor_si256(c2[1288],_mm256_xor_si256(c2[3086],_mm256_xor_si256(c2[2966],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[3340],_mm256_xor_si256(c2[820],_mm256_xor_si256(c2[2498],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[2868],_mm256_xor_si256(c2[713],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[3592],c2[3472])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[84]=simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[1801],simde_mm256_xor_si256(c2[3485],simde_mm256_xor_si256(c2[1561],simde_mm256_xor_si256(c2[3245],simde_mm256_xor_si256(c2[2655],simde_mm256_xor_si256(c2[2535],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[2414],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[1215],simde_mm256_xor_si256(c2[2292],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[3149],simde_mm256_xor_si256(c2[988],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[2188],simde_mm256_xor_si256(c2[2068],simde_mm256_xor_si256(c2[2798],simde_mm256_xor_si256(c2[2678],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[2917],simde_mm256_xor_si256(c2[756],simde_mm256_xor_si256(c2[1120],simde_mm256_xor_si256(c2[2798],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[2208],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[2330],simde_mm256_xor_si256(c2[1983],simde_mm256_xor_si256(c2[3661],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[1381],simde_mm256_xor_si256(c2[1261],simde_mm256_xor_si256(c2[3434],simde_mm256_xor_si256(c2[3314],simde_mm256_xor_si256(c2[1153],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[1876],simde_mm256_xor_si256(c2[3674],simde_mm256_xor_si256(c2[3554],simde_mm256_xor_si256(c2[2715],simde_mm256_xor_si256(c2[1884],simde_mm256_xor_si256(c2[3568],simde_mm256_xor_si256(c2[1288],simde_mm256_xor_si256(c2[3086],simde_mm256_xor_si256(c2[2966],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[3340],simde_mm256_xor_si256(c2[820],simde_mm256_xor_si256(c2[2498],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[2868],simde_mm256_xor_si256(c2[713],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[3592],c2[3472])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[90]=_mm256_xor_si256(c2[1441],_mm256_xor_si256(c2[3721],_mm256_xor_si256(c2[3601],_mm256_xor_si256(c2[3002],_mm256_xor_si256(c2[1323],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[482],_mm256_xor_si256(c2[3736],_mm256_xor_si256(c2[2177],_mm256_xor_si256(c2[2057],_mm256_xor_si256(c2[1937],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[2892],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[2785],_mm256_xor_si256(c2[2665],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[2200],_mm256_xor_si256(c2[279],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[2321],_mm256_xor_si256(c2[636],_mm256_xor_si256(c2[1731],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[1493],_mm256_xor_si256(c2[3653],_mm256_xor_si256(c2[1853],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[1505],_mm256_xor_si256(c2[784],_mm256_xor_si256(c2[2944],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[2956],_mm256_xor_si256(c2[2836],_mm256_xor_si256(c2[3554],_mm256_xor_si256(c2[1875],_mm256_xor_si256(c2[3077],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[3085],_mm256_xor_si256(c2[1406],_mm256_xor_si256(c2[2489],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[2857],_mm256_xor_si256(c2[1178],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[3218],_mm256_xor_si256(c2[1539],_mm256_xor_si256(c2[1309],_mm256_xor_si256(c2[3589],_mm256_xor_si256(c2[3469],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[2390],_mm256_xor_si256(c2[2989],c2[1310]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[90]=simde_mm256_xor_si256(c2[1441],simde_mm256_xor_si256(c2[3721],simde_mm256_xor_si256(c2[3601],simde_mm256_xor_si256(c2[3002],simde_mm256_xor_si256(c2[1323],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[482],simde_mm256_xor_si256(c2[3736],simde_mm256_xor_si256(c2[2177],simde_mm256_xor_si256(c2[2057],simde_mm256_xor_si256(c2[1937],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[2892],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[2785],simde_mm256_xor_si256(c2[2665],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[2200],simde_mm256_xor_si256(c2[279],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[2321],simde_mm256_xor_si256(c2[636],simde_mm256_xor_si256(c2[1731],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[1493],simde_mm256_xor_si256(c2[3653],simde_mm256_xor_si256(c2[1853],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[1505],simde_mm256_xor_si256(c2[784],simde_mm256_xor_si256(c2[2944],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[2956],simde_mm256_xor_si256(c2[2836],simde_mm256_xor_si256(c2[3554],simde_mm256_xor_si256(c2[1875],simde_mm256_xor_si256(c2[3077],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[3085],simde_mm256_xor_si256(c2[1406],simde_mm256_xor_si256(c2[2489],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[2857],simde_mm256_xor_si256(c2[1178],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[3218],simde_mm256_xor_si256(c2[1539],simde_mm256_xor_si256(c2[1309],simde_mm256_xor_si256(c2[3589],simde_mm256_xor_si256(c2[3469],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[2390],simde_mm256_xor_si256(c2[2989],c2[1310]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[96]=_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[1681],_mm256_xor_si256(c2[1561],_mm256_xor_si256(c2[125],_mm256_xor_si256(c2[3242],_mm256_xor_si256(c2[3122],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[2882],_mm256_xor_si256(c2[973],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[137],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[2893],_mm256_xor_si256(c2[2057],_mm256_xor_si256(c2[1694],_mm256_xor_si256(c2[852],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[1587],_mm256_xor_si256(c2[1467],_mm256_xor_si256(c2[745],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[2547],_mm256_xor_si256(c2[1705],_mm256_xor_si256(c2[1116],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[1241],_mm256_xor_si256(c2[519],_mm256_xor_si256(c2[399],_mm256_xor_si256(c2[3277],_mm256_xor_si256(c2[2441],_mm256_xor_si256(c2[2693],_mm256_xor_si256(c2[1971],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[2449],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[2809],_mm256_xor_si256(c2[1973],_mm256_xor_si256(c2[301],_mm256_xor_si256(c2[3424],_mm256_xor_si256(c2[3304],_mm256_xor_si256(c2[1740],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[1752],_mm256_xor_si256(c2[1632],_mm256_xor_si256(c2[916],_mm256_xor_si256(c2[796],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[3794],_mm256_xor_si256(c2[3674],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[3197],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[3325],_mm256_xor_si256(c2[3205],_mm256_xor_si256(c2[3445],_mm256_xor_si256(c2[2609],_mm256_xor_si256(c2[3819],_mm256_xor_si256(c2[3097],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[2141],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[3338],_mm256_xor_si256(c2[2391],_mm256_xor_si256(c2[2271],_mm256_xor_si256(c2[1549],_mm256_xor_si256(c2[1429],_mm256_xor_si256(c2[1192],_mm256_xor_si256(c2[470],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[3109],c2[2153])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[96]=simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[1681],simde_mm256_xor_si256(c2[1561],simde_mm256_xor_si256(c2[125],simde_mm256_xor_si256(c2[3242],simde_mm256_xor_si256(c2[3122],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[2882],simde_mm256_xor_si256(c2[973],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[137],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[2893],simde_mm256_xor_si256(c2[2057],simde_mm256_xor_si256(c2[1694],simde_mm256_xor_si256(c2[852],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[1587],simde_mm256_xor_si256(c2[1467],simde_mm256_xor_si256(c2[745],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[2547],simde_mm256_xor_si256(c2[1705],simde_mm256_xor_si256(c2[1116],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[1241],simde_mm256_xor_si256(c2[519],simde_mm256_xor_si256(c2[399],simde_mm256_xor_si256(c2[3277],simde_mm256_xor_si256(c2[2441],simde_mm256_xor_si256(c2[2693],simde_mm256_xor_si256(c2[1971],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[2449],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[2809],simde_mm256_xor_si256(c2[1973],simde_mm256_xor_si256(c2[301],simde_mm256_xor_si256(c2[3424],simde_mm256_xor_si256(c2[3304],simde_mm256_xor_si256(c2[1740],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[1752],simde_mm256_xor_si256(c2[1632],simde_mm256_xor_si256(c2[916],simde_mm256_xor_si256(c2[796],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[3794],simde_mm256_xor_si256(c2[3674],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[3197],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[3325],simde_mm256_xor_si256(c2[3205],simde_mm256_xor_si256(c2[3445],simde_mm256_xor_si256(c2[2609],simde_mm256_xor_si256(c2[3819],simde_mm256_xor_si256(c2[3097],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[2141],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[3338],simde_mm256_xor_si256(c2[2391],simde_mm256_xor_si256(c2[2271],simde_mm256_xor_si256(c2[1549],simde_mm256_xor_si256(c2[1429],simde_mm256_xor_si256(c2[1192],simde_mm256_xor_si256(c2[470],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[3109],c2[2153])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[102]=_mm256_xor_si256(c2[2765],_mm256_xor_si256(c2[2645],_mm256_xor_si256(c2[2405],_mm256_xor_si256(c2[2285],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[3600],_mm256_xor_si256(c2[1215],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[855],_mm256_xor_si256(c2[735],_mm256_xor_si256(c2[3135],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[1936],_mm256_xor_si256(c2[1576],_mm256_xor_si256(c2[3616],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[1709],_mm256_xor_si256(c2[1469],_mm256_xor_si256(c2[1349],_mm256_xor_si256(c2[2789],_mm256_xor_si256(c2[2429],_mm256_xor_si256(c2[1358],_mm256_xor_si256(c2[1238],_mm256_xor_si256(c2[998],_mm256_xor_si256(c2[878],_mm256_xor_si256(c2[1477],_mm256_xor_si256(c2[1237],_mm256_xor_si256(c2[1117],_mm256_xor_si256(c2[3519],_mm256_xor_si256(c2[3159],_mm256_xor_si256(c2[2929],_mm256_xor_si256(c2[2689],_mm256_xor_si256(c2[2569],_mm256_xor_si256(c2[2691],_mm256_xor_si256(c2[2331],_mm256_xor_si256(c2[3051],_mm256_xor_si256(c2[2691],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[303],_mm256_xor_si256(c2[183],_mm256_xor_si256(c2[1982],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[2820],_mm256_xor_si256(c2[1994],_mm256_xor_si256(c2[1874],_mm256_xor_si256(c2[1634],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[444],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[3687],_mm256_xor_si256(c2[3327],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[2859],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[217],_mm256_xor_si256(c2[2633],_mm256_xor_si256(c2[2513],_mm256_xor_si256(c2[2273],_mm256_xor_si256(c2[2153],_mm256_xor_si256(c2[1428],_mm256_xor_si256(c2[1188],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[348],c2[3833])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[102]=simde_mm256_xor_si256(c2[2765],simde_mm256_xor_si256(c2[2645],simde_mm256_xor_si256(c2[2405],simde_mm256_xor_si256(c2[2285],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[3600],simde_mm256_xor_si256(c2[1215],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[855],simde_mm256_xor_si256(c2[735],simde_mm256_xor_si256(c2[3135],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[1936],simde_mm256_xor_si256(c2[1576],simde_mm256_xor_si256(c2[3616],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[1709],simde_mm256_xor_si256(c2[1469],simde_mm256_xor_si256(c2[1349],simde_mm256_xor_si256(c2[2789],simde_mm256_xor_si256(c2[2429],simde_mm256_xor_si256(c2[1358],simde_mm256_xor_si256(c2[1238],simde_mm256_xor_si256(c2[998],simde_mm256_xor_si256(c2[878],simde_mm256_xor_si256(c2[1477],simde_mm256_xor_si256(c2[1237],simde_mm256_xor_si256(c2[1117],simde_mm256_xor_si256(c2[3519],simde_mm256_xor_si256(c2[3159],simde_mm256_xor_si256(c2[2929],simde_mm256_xor_si256(c2[2689],simde_mm256_xor_si256(c2[2569],simde_mm256_xor_si256(c2[2691],simde_mm256_xor_si256(c2[2331],simde_mm256_xor_si256(c2[3051],simde_mm256_xor_si256(c2[2691],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[303],simde_mm256_xor_si256(c2[183],simde_mm256_xor_si256(c2[1982],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[2820],simde_mm256_xor_si256(c2[1994],simde_mm256_xor_si256(c2[1874],simde_mm256_xor_si256(c2[1634],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[444],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[3687],simde_mm256_xor_si256(c2[3327],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[2859],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[217],simde_mm256_xor_si256(c2[2633],simde_mm256_xor_si256(c2[2513],simde_mm256_xor_si256(c2[2273],simde_mm256_xor_si256(c2[2153],simde_mm256_xor_si256(c2[1428],simde_mm256_xor_si256(c2[1188],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[348],c2[3833])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[108]=_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[792],c2[1765]));
+     d2[108]=simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[792],c2[1765]));
 
 //row: 19
-     d2[114]=_mm256_xor_si256(c2[1084],_mm256_xor_si256(c2[2645],_mm256_xor_si256(c2[2405],_mm256_xor_si256(c2[3121],_mm256_xor_si256(c2[3373],_mm256_xor_si256(c2[1574],_mm256_xor_si256(c2[375],_mm256_xor_si256(c2[1213],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[1228],_mm256_xor_si256(c2[3516],_mm256_xor_si256(c2[3761],_mm256_xor_si256(c2[1958],_mm256_xor_si256(c2[1368],_mm256_xor_si256(c2[1130],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[2821],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[3197],_mm256_xor_si256(c2[2714],_mm256_xor_si256(c2[2728],_mm256_xor_si256(c2[2126],_mm256_xor_si256(c2[2500],_mm256_xor_si256(c2[1658],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[952],_mm256_xor_si256(c2[3712],c2[2632]))))))))))))))))))))))))))));
+     d2[114]=simde_mm256_xor_si256(c2[1084],simde_mm256_xor_si256(c2[2645],simde_mm256_xor_si256(c2[2405],simde_mm256_xor_si256(c2[3121],simde_mm256_xor_si256(c2[3373],simde_mm256_xor_si256(c2[1574],simde_mm256_xor_si256(c2[375],simde_mm256_xor_si256(c2[1213],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[1228],simde_mm256_xor_si256(c2[3516],simde_mm256_xor_si256(c2[3761],simde_mm256_xor_si256(c2[1958],simde_mm256_xor_si256(c2[1368],simde_mm256_xor_si256(c2[1130],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[2821],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[3197],simde_mm256_xor_si256(c2[2714],simde_mm256_xor_si256(c2[2728],simde_mm256_xor_si256(c2[2126],simde_mm256_xor_si256(c2[2500],simde_mm256_xor_si256(c2[1658],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[952],simde_mm256_xor_si256(c2[3712],c2[2632]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[120]=_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[2285],_mm256_xor_si256(c2[2045],_mm256_xor_si256(c2[3133],_mm256_xor_si256(c2[3013],_mm256_xor_si256(c2[1214],_mm256_xor_si256(c2[15],_mm256_xor_si256(c2[1454],_mm256_xor_si256(c2[3747],_mm256_xor_si256(c2[3627],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[3276],_mm256_xor_si256(c2[3156],_mm256_xor_si256(c2[3401],_mm256_xor_si256(c2[1598],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[1130],_mm256_xor_si256(c2[3529],_mm256_xor_si256(c2[2461],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[73],_mm256_xor_si256(c2[3792],_mm256_xor_si256(c2[2837],_mm256_xor_si256(c2[2354],_mm256_xor_si256(c2[2368],_mm256_xor_si256(c2[1766],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[1298],_mm256_xor_si256(c2[2501],_mm256_xor_si256(c2[712],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[3352],c2[2272]))))))))))))))))))))))))))))))))));
+     d2[120]=simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[2285],simde_mm256_xor_si256(c2[2045],simde_mm256_xor_si256(c2[3133],simde_mm256_xor_si256(c2[3013],simde_mm256_xor_si256(c2[1214],simde_mm256_xor_si256(c2[15],simde_mm256_xor_si256(c2[1454],simde_mm256_xor_si256(c2[3747],simde_mm256_xor_si256(c2[3627],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[3276],simde_mm256_xor_si256(c2[3156],simde_mm256_xor_si256(c2[3401],simde_mm256_xor_si256(c2[1598],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[1130],simde_mm256_xor_si256(c2[3529],simde_mm256_xor_si256(c2[2461],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[73],simde_mm256_xor_si256(c2[3792],simde_mm256_xor_si256(c2[2837],simde_mm256_xor_si256(c2[2354],simde_mm256_xor_si256(c2[2368],simde_mm256_xor_si256(c2[1766],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[1298],simde_mm256_xor_si256(c2[2501],simde_mm256_xor_si256(c2[712],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[3352],c2[2272]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[126]=_mm256_xor_si256(c2[1921],_mm256_xor_si256(c2[3482],_mm256_xor_si256(c2[3242],_mm256_xor_si256(c2[3484],_mm256_xor_si256(c2[377],_mm256_xor_si256(c2[2417],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[1212],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[2185],_mm256_xor_si256(c2[2065],_mm256_xor_si256(c2[520],_mm256_xor_si256(c2[759],_mm256_xor_si256(c2[2801],_mm256_xor_si256(c2[2211],_mm256_xor_si256(c2[1973],_mm256_xor_si256(c2[2453],_mm256_xor_si256(c2[2333],_mm256_xor_si256(c2[3664],_mm256_xor_si256(c2[1384],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[1156],_mm256_xor_si256(c2[195],_mm256_xor_si256(c2[3677],_mm256_xor_si256(c2[3557],_mm256_xor_si256(c2[3565],_mm256_xor_si256(c2[3089],_mm256_xor_si256(c2[2969],_mm256_xor_si256(c2[3337],_mm256_xor_si256(c2[2501],_mm256_xor_si256(c2[3818],_mm256_xor_si256(c2[3698],_mm256_xor_si256(c2[1901],_mm256_xor_si256(c2[1789],_mm256_xor_si256(c2[710],_mm256_xor_si256(c2[3589],c2[3469]))))))))))))))))))))))))))))))))))));
+     d2[126]=simde_mm256_xor_si256(c2[1921],simde_mm256_xor_si256(c2[3482],simde_mm256_xor_si256(c2[3242],simde_mm256_xor_si256(c2[3484],simde_mm256_xor_si256(c2[377],simde_mm256_xor_si256(c2[2417],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[1212],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[2185],simde_mm256_xor_si256(c2[2065],simde_mm256_xor_si256(c2[520],simde_mm256_xor_si256(c2[759],simde_mm256_xor_si256(c2[2801],simde_mm256_xor_si256(c2[2211],simde_mm256_xor_si256(c2[1973],simde_mm256_xor_si256(c2[2453],simde_mm256_xor_si256(c2[2333],simde_mm256_xor_si256(c2[3664],simde_mm256_xor_si256(c2[1384],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[1156],simde_mm256_xor_si256(c2[195],simde_mm256_xor_si256(c2[3677],simde_mm256_xor_si256(c2[3557],simde_mm256_xor_si256(c2[3565],simde_mm256_xor_si256(c2[3089],simde_mm256_xor_si256(c2[2969],simde_mm256_xor_si256(c2[3337],simde_mm256_xor_si256(c2[2501],simde_mm256_xor_si256(c2[3818],simde_mm256_xor_si256(c2[3698],simde_mm256_xor_si256(c2[1901],simde_mm256_xor_si256(c2[1789],simde_mm256_xor_si256(c2[710],simde_mm256_xor_si256(c2[3589],c2[3469]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[132]=_mm256_xor_si256(c2[2412],c2[2425]);
+     d2[132]=simde_mm256_xor_si256(c2[2412],c2[2425]);
 
 //row: 23
-     d2[138]=_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[2678],c2[3782]));
+     d2[138]=simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[2678],c2[3782]));
 
 //row: 24
-     d2[144]=_mm256_xor_si256(c2[2657],_mm256_xor_si256(c2[3028],c2[110]));
+     d2[144]=simde_mm256_xor_si256(c2[2657],simde_mm256_xor_si256(c2[3028],c2[110]));
 
 //row: 25
-     d2[150]=_mm256_xor_si256(c2[1561],c2[2580]);
+     d2[150]=simde_mm256_xor_si256(c2[1561],c2[2580]);
 
 //row: 26
-     d2[156]=_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[2162],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[1803],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1563],_mm256_xor_si256(c2[2896],_mm256_xor_si256(c2[2776],_mm256_xor_si256(c2[2537],_mm256_xor_si256(c2[977],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[3617],_mm256_xor_si256(c2[3492],_mm256_xor_si256(c2[3372],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[3384],_mm256_xor_si256(c2[3145],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[3039],_mm256_xor_si256(c2[2919],_mm256_xor_si256(c2[2680],_mm256_xor_si256(c2[3278],_mm256_xor_si256(c2[3158],_mm256_xor_si256(c2[2919],_mm256_xor_si256(c2[1361],_mm256_xor_si256(c2[1116],_mm256_xor_si256(c2[891],_mm256_xor_si256(c2[771],_mm256_xor_si256(c2[532],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[893],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[2224],_mm256_xor_si256(c2[1985],_mm256_xor_si256(c2[3663],_mm256_xor_si256(c2[3544],_mm256_xor_si256(c2[3424],_mm256_xor_si256(c2[3675],_mm256_xor_si256(c2[3555],_mm256_xor_si256(c2[3316],_mm256_xor_si256(c2[2714],_mm256_xor_si256(c2[2594],_mm256_xor_si256(c2[2355],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[1992],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[2125],_mm256_xor_si256(c2[1886],_mm256_xor_si256(c2[1529],_mm256_xor_si256(c2[1404],_mm256_xor_si256(c2[1284],_mm256_xor_si256(c2[1168],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[1658],_mm256_xor_si256(c2[1061],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[2258],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[2019],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[349],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[3229],_mm256_xor_si256(c2[3109],_mm256_xor_si256(c2[2870],_mm256_xor_si256(c2[2029],_mm256_xor_si256(c2[1910],c2[1790])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[156]=simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[2162],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[1803],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1563],simde_mm256_xor_si256(c2[2896],simde_mm256_xor_si256(c2[2776],simde_mm256_xor_si256(c2[2537],simde_mm256_xor_si256(c2[977],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[3617],simde_mm256_xor_si256(c2[3492],simde_mm256_xor_si256(c2[3372],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[3384],simde_mm256_xor_si256(c2[3145],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[3039],simde_mm256_xor_si256(c2[2919],simde_mm256_xor_si256(c2[2680],simde_mm256_xor_si256(c2[3278],simde_mm256_xor_si256(c2[3158],simde_mm256_xor_si256(c2[2919],simde_mm256_xor_si256(c2[1361],simde_mm256_xor_si256(c2[1116],simde_mm256_xor_si256(c2[891],simde_mm256_xor_si256(c2[771],simde_mm256_xor_si256(c2[532],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[893],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[2224],simde_mm256_xor_si256(c2[1985],simde_mm256_xor_si256(c2[3663],simde_mm256_xor_si256(c2[3544],simde_mm256_xor_si256(c2[3424],simde_mm256_xor_si256(c2[3675],simde_mm256_xor_si256(c2[3555],simde_mm256_xor_si256(c2[3316],simde_mm256_xor_si256(c2[2714],simde_mm256_xor_si256(c2[2594],simde_mm256_xor_si256(c2[2355],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[1992],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[2125],simde_mm256_xor_si256(c2[1886],simde_mm256_xor_si256(c2[1529],simde_mm256_xor_si256(c2[1404],simde_mm256_xor_si256(c2[1284],simde_mm256_xor_si256(c2[1168],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[1658],simde_mm256_xor_si256(c2[1061],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[2258],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[2019],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[349],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[3229],simde_mm256_xor_si256(c2[3109],simde_mm256_xor_si256(c2[2870],simde_mm256_xor_si256(c2[2029],simde_mm256_xor_si256(c2[1910],c2[1790])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[162]=_mm256_xor_si256(c2[843],c2[2233]);
+     d2[162]=simde_mm256_xor_si256(c2[843],c2[2233]);
 
 //row: 28
-     d2[168]=_mm256_xor_si256(c2[734],_mm256_xor_si256(c2[1827],c2[1025]));
+     d2[168]=simde_mm256_xor_si256(c2[734],simde_mm256_xor_si256(c2[1827],c2[1025]));
 
 //row: 29
-     d2[174]=_mm256_xor_si256(c2[1683],c2[2088]);
+     d2[174]=simde_mm256_xor_si256(c2[1683],c2[2088]);
 
 //row: 30
-     d2[180]=_mm256_xor_si256(c2[2907],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[2485],c2[2989])));
+     d2[180]=simde_mm256_xor_si256(c2[2907],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[2485],c2[2989])));
 
 //row: 31
-     d2[186]=_mm256_xor_si256(c2[2764],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[1214],_mm256_xor_si256(c2[3254],_mm256_xor_si256(c2[2175],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[1828],_mm256_xor_si256(c2[3028],_mm256_xor_si256(c2[2908],_mm256_xor_si256(c2[1357],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[3638],_mm256_xor_si256(c2[3048],_mm256_xor_si256(c2[2810],_mm256_xor_si256(c2[3290],_mm256_xor_si256(c2[3170],_mm256_xor_si256(c2[662],_mm256_xor_si256(c2[2221],_mm256_xor_si256(c2[2101],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[1032],_mm256_xor_si256(c2[675],_mm256_xor_si256(c2[555],_mm256_xor_si256(c2[569],_mm256_xor_si256(c2[87],_mm256_xor_si256(c2[3806],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[3338],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[2632],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[593],c2[473])))))))))))))))))))))))))))))))))));
+     d2[186]=simde_mm256_xor_si256(c2[2764],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[1214],simde_mm256_xor_si256(c2[3254],simde_mm256_xor_si256(c2[2175],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[1828],simde_mm256_xor_si256(c2[3028],simde_mm256_xor_si256(c2[2908],simde_mm256_xor_si256(c2[1357],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[3638],simde_mm256_xor_si256(c2[3048],simde_mm256_xor_si256(c2[2810],simde_mm256_xor_si256(c2[3290],simde_mm256_xor_si256(c2[3170],simde_mm256_xor_si256(c2[662],simde_mm256_xor_si256(c2[2221],simde_mm256_xor_si256(c2[2101],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[1032],simde_mm256_xor_si256(c2[675],simde_mm256_xor_si256(c2[555],simde_mm256_xor_si256(c2[569],simde_mm256_xor_si256(c2[87],simde_mm256_xor_si256(c2[3806],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[3338],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[2632],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[593],c2[473])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[192]=_mm256_xor_si256(c2[3725],_mm256_xor_si256(c2[3605],_mm256_xor_si256(c2[1441],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[1081],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[2175],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[2896],_mm256_xor_si256(c2[2789],_mm256_xor_si256(c2[2669],_mm256_xor_si256(c2[3749],_mm256_xor_si256(c2[2318],_mm256_xor_si256(c2[2198],_mm256_xor_si256(c2[2557],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[3651],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[1623],_mm256_xor_si256(c2[1503],_mm256_xor_si256(c2[2942],_mm256_xor_si256(c2[1020],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[2834],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[1873],_mm256_xor_si256(c2[1396],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[1404],_mm256_xor_si256(c2[808],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[3593],_mm256_xor_si256(c2[3473],_mm256_xor_si256(c2[2508],_mm256_xor_si256(c2[2388],c2[1308]))))))))))))))))))))))))))))))))))))))))));
+     d2[192]=simde_mm256_xor_si256(c2[3725],simde_mm256_xor_si256(c2[3605],simde_mm256_xor_si256(c2[1441],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[1081],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[2175],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[2896],simde_mm256_xor_si256(c2[2789],simde_mm256_xor_si256(c2[2669],simde_mm256_xor_si256(c2[3749],simde_mm256_xor_si256(c2[2318],simde_mm256_xor_si256(c2[2198],simde_mm256_xor_si256(c2[2557],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[3651],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[1623],simde_mm256_xor_si256(c2[1503],simde_mm256_xor_si256(c2[2942],simde_mm256_xor_si256(c2[1020],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[2834],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[1873],simde_mm256_xor_si256(c2[1396],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[1404],simde_mm256_xor_si256(c2[808],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[3593],simde_mm256_xor_si256(c2[3473],simde_mm256_xor_si256(c2[2508],simde_mm256_xor_si256(c2[2388],c2[1308]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[198]=_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[2644],_mm256_xor_si256(c2[2404],_mm256_xor_si256(c2[3372],_mm256_xor_si256(c2[1573],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[1227],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[3521],_mm256_xor_si256(c2[3760],_mm256_xor_si256(c2[1957],_mm256_xor_si256(c2[1373],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[1489],_mm256_xor_si256(c2[2820],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[3196],_mm256_xor_si256(c2[2713],_mm256_xor_si256(c2[2727],_mm256_xor_si256(c2[2125],_mm256_xor_si256(c2[2369],_mm256_xor_si256(c2[2499],_mm256_xor_si256(c2[1657],_mm256_xor_si256(c2[2860],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[3711],c2[2631]))))))))))))))))))))))))))));
+     d2[198]=simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[2644],simde_mm256_xor_si256(c2[2404],simde_mm256_xor_si256(c2[3372],simde_mm256_xor_si256(c2[1573],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[1227],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[3521],simde_mm256_xor_si256(c2[3760],simde_mm256_xor_si256(c2[1957],simde_mm256_xor_si256(c2[1373],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[1489],simde_mm256_xor_si256(c2[2820],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[3196],simde_mm256_xor_si256(c2[2713],simde_mm256_xor_si256(c2[2727],simde_mm256_xor_si256(c2[2125],simde_mm256_xor_si256(c2[2369],simde_mm256_xor_si256(c2[2499],simde_mm256_xor_si256(c2[1657],simde_mm256_xor_si256(c2[2860],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[3711],c2[2631]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[204]=_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[3604],_mm256_xor_si256(c2[3005],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[1080],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[1565],_mm256_xor_si256(c2[2174],_mm256_xor_si256(c2[2054],_mm256_xor_si256(c2[1455],_mm256_xor_si256(c2[255],_mm256_xor_si256(c2[3495],_mm256_xor_si256(c2[2895],_mm256_xor_si256(c2[2416],_mm256_xor_si256(c2[2296],_mm256_xor_si256(c2[2788],_mm256_xor_si256(c2[2668],_mm256_xor_si256(c2[2069],_mm256_xor_si256(c2[3748],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[3149],_mm256_xor_si256(c2[2317],_mm256_xor_si256(c2[2197],_mm256_xor_si256(c2[1598],_mm256_xor_si256(c2[2556],_mm256_xor_si256(c2[2436],_mm256_xor_si256(c2[1837],_mm256_xor_si256(c2[639],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[169],_mm256_xor_si256(c2[49],_mm256_xor_si256(c2[3289],_mm256_xor_si256(c2[3650],_mm256_xor_si256(c2[3051],_mm256_xor_si256(c2[171],_mm256_xor_si256(c2[3531],_mm256_xor_si256(c2[3411],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[903],_mm256_xor_si256(c2[2941],_mm256_xor_si256(c2[2462],_mm256_xor_si256(c2[2342],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[2833],_mm256_xor_si256(c2[2234],_mm256_xor_si256(c2[1992],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[1273],_mm256_xor_si256(c2[1395],_mm256_xor_si256(c2[916],_mm256_xor_si256(c2[796],_mm256_xor_si256(c2[1529],_mm256_xor_si256(c2[1409],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[807],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[1301],_mm256_xor_si256(c2[1181],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[339],_mm256_xor_si256(c2[3579],_mm256_xor_si256(c2[1536],_mm256_xor_si256(c2[1057],_mm256_xor_si256(c2[937],_mm256_xor_si256(c2[3592],_mm256_xor_si256(c2[3472],_mm256_xor_si256(c2[2873],_mm256_xor_si256(c2[2513],_mm256_xor_si256(c2[2393],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[1313],_mm256_xor_si256(c2[828],c2[708]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[204]=simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[3604],simde_mm256_xor_si256(c2[3005],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[1080],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[1565],simde_mm256_xor_si256(c2[2174],simde_mm256_xor_si256(c2[2054],simde_mm256_xor_si256(c2[1455],simde_mm256_xor_si256(c2[255],simde_mm256_xor_si256(c2[3495],simde_mm256_xor_si256(c2[2895],simde_mm256_xor_si256(c2[2416],simde_mm256_xor_si256(c2[2296],simde_mm256_xor_si256(c2[2788],simde_mm256_xor_si256(c2[2668],simde_mm256_xor_si256(c2[2069],simde_mm256_xor_si256(c2[3748],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[3149],simde_mm256_xor_si256(c2[2317],simde_mm256_xor_si256(c2[2197],simde_mm256_xor_si256(c2[1598],simde_mm256_xor_si256(c2[2556],simde_mm256_xor_si256(c2[2436],simde_mm256_xor_si256(c2[1837],simde_mm256_xor_si256(c2[639],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[169],simde_mm256_xor_si256(c2[49],simde_mm256_xor_si256(c2[3289],simde_mm256_xor_si256(c2[3650],simde_mm256_xor_si256(c2[3051],simde_mm256_xor_si256(c2[171],simde_mm256_xor_si256(c2[3531],simde_mm256_xor_si256(c2[3411],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[903],simde_mm256_xor_si256(c2[2941],simde_mm256_xor_si256(c2[2462],simde_mm256_xor_si256(c2[2342],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[2833],simde_mm256_xor_si256(c2[2234],simde_mm256_xor_si256(c2[1992],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[1273],simde_mm256_xor_si256(c2[1395],simde_mm256_xor_si256(c2[916],simde_mm256_xor_si256(c2[796],simde_mm256_xor_si256(c2[1529],simde_mm256_xor_si256(c2[1409],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[807],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[1301],simde_mm256_xor_si256(c2[1181],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[339],simde_mm256_xor_si256(c2[3579],simde_mm256_xor_si256(c2[1536],simde_mm256_xor_si256(c2[1057],simde_mm256_xor_si256(c2[937],simde_mm256_xor_si256(c2[3592],simde_mm256_xor_si256(c2[3472],simde_mm256_xor_si256(c2[2873],simde_mm256_xor_si256(c2[2513],simde_mm256_xor_si256(c2[2393],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[1313],simde_mm256_xor_si256(c2[828],c2[708]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[210]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3725],_mm256_xor_si256(c2[1441],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[2295],_mm256_xor_si256(c2[2175],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[3016],_mm256_xor_si256(c2[1574],_mm256_xor_si256(c2[2909],_mm256_xor_si256(c2[2789],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[2438],_mm256_xor_si256(c2[2318],_mm256_xor_si256(c2[2557],_mm256_xor_si256(c2[760],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[3771],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[1623],_mm256_xor_si256(c2[3062],_mm256_xor_si256(c2[2945],_mm256_xor_si256(c2[3074],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[460],_mm256_xor_si256(c2[1657],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[3593],_mm256_xor_si256(c2[2508],c2[1428]))))))))))))))))))))))))))))))))));
+     d2[210]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3725],simde_mm256_xor_si256(c2[1441],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[2295],simde_mm256_xor_si256(c2[2175],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[3016],simde_mm256_xor_si256(c2[1574],simde_mm256_xor_si256(c2[2909],simde_mm256_xor_si256(c2[2789],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[2438],simde_mm256_xor_si256(c2[2318],simde_mm256_xor_si256(c2[2557],simde_mm256_xor_si256(c2[760],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[3771],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[1623],simde_mm256_xor_si256(c2[3062],simde_mm256_xor_si256(c2[2945],simde_mm256_xor_si256(c2[3074],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[460],simde_mm256_xor_si256(c2[1657],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[3593],simde_mm256_xor_si256(c2[2508],c2[1428]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[216]=_mm256_xor_si256(c2[3000],_mm256_xor_si256(c2[2788],c2[1289]));
+     d2[216]=simde_mm256_xor_si256(c2[3000],simde_mm256_xor_si256(c2[2788],c2[1289]));
 
 //row: 37
-     d2[222]=_mm256_xor_si256(c2[2160],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[3721],_mm256_xor_si256(c2[3001],_mm256_xor_si256(c2[3481],_mm256_xor_si256(c2[2761],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[3735],_mm256_xor_si256(c2[2656],_mm256_xor_si256(c2[1936],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[857],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[1224],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[2304],_mm256_xor_si256(c2[1704],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[759],_mm256_xor_si256(c2[39],_mm256_xor_si256(c2[998],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[1730],_mm256_xor_si256(c2[2212],_mm256_xor_si256(c2[1492],_mm256_xor_si256(c2[2572],_mm256_xor_si256(c2[1972],_mm256_xor_si256(c2[1852],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[3183],_mm256_xor_si256(c2[1503],_mm256_xor_si256(c2[903],_mm256_xor_si256(c2[783],_mm256_xor_si256(c2[1395],_mm256_xor_si256(c2[675],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[3553],_mm256_xor_si256(c2[3796],_mm256_xor_si256(c2[3196],_mm256_xor_si256(c2[3076],_mm256_xor_si256(c2[3804],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[3208],_mm256_xor_si256(c2[2608],_mm256_xor_si256(c2[2488],_mm256_xor_si256(c2[3576],_mm256_xor_si256(c2[2856],_mm256_xor_si256(c2[2740],_mm256_xor_si256(c2[2020],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[3337],_mm256_xor_si256(c2[3217],_mm256_xor_si256(c2[2028],_mm256_xor_si256(c2[1308],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[3708],_mm256_xor_si256(c2[3108],c2[2988])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[222]=simde_mm256_xor_si256(c2[2160],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[3721],simde_mm256_xor_si256(c2[3001],simde_mm256_xor_si256(c2[3481],simde_mm256_xor_si256(c2[2761],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[3735],simde_mm256_xor_si256(c2[2656],simde_mm256_xor_si256(c2[1936],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[857],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[1224],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[2304],simde_mm256_xor_si256(c2[1704],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[759],simde_mm256_xor_si256(c2[39],simde_mm256_xor_si256(c2[998],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[1730],simde_mm256_xor_si256(c2[2212],simde_mm256_xor_si256(c2[1492],simde_mm256_xor_si256(c2[2572],simde_mm256_xor_si256(c2[1972],simde_mm256_xor_si256(c2[1852],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[3183],simde_mm256_xor_si256(c2[1503],simde_mm256_xor_si256(c2[903],simde_mm256_xor_si256(c2[783],simde_mm256_xor_si256(c2[1395],simde_mm256_xor_si256(c2[675],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[3553],simde_mm256_xor_si256(c2[3796],simde_mm256_xor_si256(c2[3196],simde_mm256_xor_si256(c2[3076],simde_mm256_xor_si256(c2[3804],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[3208],simde_mm256_xor_si256(c2[2608],simde_mm256_xor_si256(c2[2488],simde_mm256_xor_si256(c2[3576],simde_mm256_xor_si256(c2[2856],simde_mm256_xor_si256(c2[2740],simde_mm256_xor_si256(c2[2020],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[3337],simde_mm256_xor_si256(c2[3217],simde_mm256_xor_si256(c2[2028],simde_mm256_xor_si256(c2[1308],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[3708],simde_mm256_xor_si256(c2[3108],c2[2988])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[228]=_mm256_xor_si256(c2[1445],_mm256_xor_si256(c2[1325],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[2640],_mm256_xor_si256(c2[3734],_mm256_xor_si256(c2[3614],_mm256_xor_si256(c2[1815],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[2414],_mm256_xor_si256(c2[509],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[1469],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[3757],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[2199],_mm256_xor_si256(c2[1609],_mm256_xor_si256(c2[1371],_mm256_xor_si256(c2[1731],_mm256_xor_si256(c2[3062],_mm256_xor_si256(c2[662],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[674],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[3432],_mm256_xor_si256(c2[2955],_mm256_xor_si256(c2[2969],_mm256_xor_si256(c2[2367],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[1899],_mm256_xor_si256(c2[3096],_mm256_xor_si256(c2[1313],_mm256_xor_si256(c2[1193],_mm256_xor_si256(c2[108],c2[2873]))))))))))))))))))))))))))))))))));
+     d2[228]=simde_mm256_xor_si256(c2[1445],simde_mm256_xor_si256(c2[1325],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[2640],simde_mm256_xor_si256(c2[3734],simde_mm256_xor_si256(c2[3614],simde_mm256_xor_si256(c2[1815],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[2414],simde_mm256_xor_si256(c2[509],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[1469],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[3757],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[2199],simde_mm256_xor_si256(c2[1609],simde_mm256_xor_si256(c2[1371],simde_mm256_xor_si256(c2[1731],simde_mm256_xor_si256(c2[3062],simde_mm256_xor_si256(c2[662],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[674],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[3432],simde_mm256_xor_si256(c2[2955],simde_mm256_xor_si256(c2[2969],simde_mm256_xor_si256(c2[2367],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[1899],simde_mm256_xor_si256(c2[3096],simde_mm256_xor_si256(c2[1313],simde_mm256_xor_si256(c2[1193],simde_mm256_xor_si256(c2[108],c2[2873]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[234]=_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[2282],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[3482],_mm256_xor_si256(c2[3136],_mm256_xor_si256(c2[3016],_mm256_xor_si256(c2[1217],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[3744],_mm256_xor_si256(c2[3624],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[3279],_mm256_xor_si256(c2[3159],_mm256_xor_si256(c2[3518],_mm256_xor_si256(c2[3398],_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[1131],_mm256_xor_si256(c2[1011],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[1133],_mm256_xor_si256(c2[2584],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[3795],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[2834],_mm256_xor_si256(c2[2357],_mm256_xor_si256(c2[2485],_mm256_xor_si256(c2[2365],_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[2137],_mm256_xor_si256(c2[1301],_mm256_xor_si256(c2[2498],_mm256_xor_si256(c2[709],_mm256_xor_si256(c2[589],_mm256_xor_si256(c2[3469],_mm256_xor_si256(c2[3349],c2[2269]))))))))))))))))))))))))))))))))))))))))));
+     d2[234]=simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[2282],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[3482],simde_mm256_xor_si256(c2[3136],simde_mm256_xor_si256(c2[3016],simde_mm256_xor_si256(c2[1217],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[3744],simde_mm256_xor_si256(c2[3624],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[3279],simde_mm256_xor_si256(c2[3159],simde_mm256_xor_si256(c2[3518],simde_mm256_xor_si256(c2[3398],simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[1131],simde_mm256_xor_si256(c2[1011],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[1133],simde_mm256_xor_si256(c2[2584],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[3795],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[2834],simde_mm256_xor_si256(c2[2357],simde_mm256_xor_si256(c2[2485],simde_mm256_xor_si256(c2[2365],simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[2137],simde_mm256_xor_si256(c2[1301],simde_mm256_xor_si256(c2[2498],simde_mm256_xor_si256(c2[709],simde_mm256_xor_si256(c2[589],simde_mm256_xor_si256(c2[3469],simde_mm256_xor_si256(c2[3349],c2[2269]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[240]=_mm256_xor_si256(c2[2882],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[3485],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[3245],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[3372],_mm256_xor_si256(c2[2414],_mm256_xor_si256(c2[2173],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[1215],_mm256_xor_si256(c2[1946],_mm256_xor_si256(c2[988],_mm256_xor_si256(c2[3026],_mm256_xor_si256(c2[2188],_mm256_xor_si256(c2[2068],_mm256_xor_si256(c2[867],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[1720],_mm256_xor_si256(c2[756],_mm256_xor_si256(c2[3756],_mm256_xor_si256(c2[2798],_mm256_xor_si256(c2[3172],_mm256_xor_si256(c2[2208],_mm256_xor_si256(c2[2928],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[3288],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[2330],_mm256_xor_si256(c2[780],_mm256_xor_si256(c2[3661],_mm256_xor_si256(c2[2225],_mm256_xor_si256(c2[1381],_mm256_xor_si256(c2[1261],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[1153],_mm256_xor_si256(c2[1156],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[3674],_mm256_xor_si256(c2[3554],_mm256_xor_si256(c2[687],_mm256_xor_si256(c2[3568],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[3086],_mm256_xor_si256(c2[2966],_mm256_xor_si256(c2[459],_mm256_xor_si256(c2[3340],_mm256_xor_si256(c2[3456],_mm256_xor_si256(c2[2498],_mm256_xor_si256(c2[820],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[2750],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[1671],_mm256_xor_si256(c2[713],_mm256_xor_si256(c2[591],_mm256_xor_si256(c2[3592],c2[3472]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[240]=simde_mm256_xor_si256(c2[2882],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[3485],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[3245],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[3372],simde_mm256_xor_si256(c2[2414],simde_mm256_xor_si256(c2[2173],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[1215],simde_mm256_xor_si256(c2[1946],simde_mm256_xor_si256(c2[988],simde_mm256_xor_si256(c2[3026],simde_mm256_xor_si256(c2[2188],simde_mm256_xor_si256(c2[2068],simde_mm256_xor_si256(c2[867],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[1720],simde_mm256_xor_si256(c2[756],simde_mm256_xor_si256(c2[3756],simde_mm256_xor_si256(c2[2798],simde_mm256_xor_si256(c2[3172],simde_mm256_xor_si256(c2[2208],simde_mm256_xor_si256(c2[2928],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[3288],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[2330],simde_mm256_xor_si256(c2[780],simde_mm256_xor_si256(c2[3661],simde_mm256_xor_si256(c2[2225],simde_mm256_xor_si256(c2[1381],simde_mm256_xor_si256(c2[1261],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[1153],simde_mm256_xor_si256(c2[1156],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[3674],simde_mm256_xor_si256(c2[3554],simde_mm256_xor_si256(c2[687],simde_mm256_xor_si256(c2[3568],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[3086],simde_mm256_xor_si256(c2[2966],simde_mm256_xor_si256(c2[459],simde_mm256_xor_si256(c2[3340],simde_mm256_xor_si256(c2[3456],simde_mm256_xor_si256(c2[2498],simde_mm256_xor_si256(c2[820],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[2750],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[1671],simde_mm256_xor_si256(c2[713],simde_mm256_xor_si256(c2[591],simde_mm256_xor_si256(c2[3592],c2[3472]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[246]=_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[1081],_mm256_xor_si256(c2[2642],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[3496],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[1577],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[2296],_mm256_xor_si256(c2[265],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[1225],_mm256_xor_si256(c2[3639],_mm256_xor_si256(c2[3519],_mm256_xor_si256(c2[3758],_mm256_xor_si256(c2[1961],_mm256_xor_si256(c2[1371],_mm256_xor_si256(c2[1133],_mm256_xor_si256(c2[1493],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[424],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2717],_mm256_xor_si256(c2[2725],_mm256_xor_si256(c2[2129],_mm256_xor_si256(c2[2497],_mm256_xor_si256(c2[1661],_mm256_xor_si256(c2[2858],_mm256_xor_si256(c2[1069],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[3709],c2[2629]))))))))))))))))))))))))))))))))));
+     d2[246]=simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[1081],simde_mm256_xor_si256(c2[2642],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[3496],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[1577],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[2296],simde_mm256_xor_si256(c2[265],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[1225],simde_mm256_xor_si256(c2[3639],simde_mm256_xor_si256(c2[3519],simde_mm256_xor_si256(c2[3758],simde_mm256_xor_si256(c2[1961],simde_mm256_xor_si256(c2[1371],simde_mm256_xor_si256(c2[1133],simde_mm256_xor_si256(c2[1493],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[424],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2717],simde_mm256_xor_si256(c2[2725],simde_mm256_xor_si256(c2[2129],simde_mm256_xor_si256(c2[2497],simde_mm256_xor_si256(c2[1661],simde_mm256_xor_si256(c2[2858],simde_mm256_xor_si256(c2[1069],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[3709],c2[2629]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc224_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc224_byte.c
index cd665548f82ba7f8fbe8aedcee1c7bf6647228c7..8969c624decaf257f8bdbb2dbc4c0e30cb43b078 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc224_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc224_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc224_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[1122],_mm256_xor_si256(c2[2941],_mm256_xor_si256(c2[3364],_mm256_xor_si256(c2[1977],_mm256_xor_si256(c2[438],_mm256_xor_si256(c2[3519],_mm256_xor_si256(c2[3248],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[2987],_mm256_xor_si256(c2[3966],_mm256_xor_si256(c2[2003],_mm256_xor_si256(c2[2719],_mm256_xor_si256(c2[4401],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[3994],_mm256_xor_si256(c2[3993],_mm256_xor_si256(c2[4426],_mm256_xor_si256(c2[2747],_mm256_xor_si256(c2[4007],_mm256_xor_si256(c2[1224],_mm256_xor_si256(c2[2061],_mm256_xor_si256(c2[4172],_mm256_xor_si256(c2[3894],_mm256_xor_si256(c2[4036],_mm256_xor_si256(c2[1246],_mm256_xor_si256(c2[4466],c2[830]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[1122],simde_mm256_xor_si256(c2[2941],simde_mm256_xor_si256(c2[3364],simde_mm256_xor_si256(c2[1977],simde_mm256_xor_si256(c2[438],simde_mm256_xor_si256(c2[3519],simde_mm256_xor_si256(c2[3248],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[2987],simde_mm256_xor_si256(c2[3966],simde_mm256_xor_si256(c2[2003],simde_mm256_xor_si256(c2[2719],simde_mm256_xor_si256(c2[4401],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[3994],simde_mm256_xor_si256(c2[3993],simde_mm256_xor_si256(c2[4426],simde_mm256_xor_si256(c2[2747],simde_mm256_xor_si256(c2[4007],simde_mm256_xor_si256(c2[1224],simde_mm256_xor_si256(c2[2061],simde_mm256_xor_si256(c2[4172],simde_mm256_xor_si256(c2[3894],simde_mm256_xor_si256(c2[4036],simde_mm256_xor_si256(c2[1246],simde_mm256_xor_si256(c2[4466],c2[830]))))))))))))))))))))))))));
 
 //row: 1
-     d2[7]=_mm256_xor_si256(c2[1122],_mm256_xor_si256(c2[1262],_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[1977],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[578],_mm256_xor_si256(c2[3659],_mm256_xor_si256(c2[3248],_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[2987],_mm256_xor_si256(c2[3127],_mm256_xor_si256(c2[4106],_mm256_xor_si256(c2[2143],_mm256_xor_si256(c2[2859],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[4134],_mm256_xor_si256(c2[4133],_mm256_xor_si256(c2[4426],_mm256_xor_si256(c2[87],_mm256_xor_si256(c2[2887],_mm256_xor_si256(c2[4147],_mm256_xor_si256(c2[1364],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[4034],_mm256_xor_si256(c2[4176],_mm256_xor_si256(c2[1246],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[127],c2[970]))))))))))))))))))))))))))))))));
+     d2[7]=simde_mm256_xor_si256(c2[1122],simde_mm256_xor_si256(c2[1262],simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[1977],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[578],simde_mm256_xor_si256(c2[3659],simde_mm256_xor_si256(c2[3248],simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[2987],simde_mm256_xor_si256(c2[3127],simde_mm256_xor_si256(c2[4106],simde_mm256_xor_si256(c2[2143],simde_mm256_xor_si256(c2[2859],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[4134],simde_mm256_xor_si256(c2[4133],simde_mm256_xor_si256(c2[4426],simde_mm256_xor_si256(c2[87],simde_mm256_xor_si256(c2[2887],simde_mm256_xor_si256(c2[4147],simde_mm256_xor_si256(c2[1364],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[4034],simde_mm256_xor_si256(c2[4176],simde_mm256_xor_si256(c2[1246],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[127],c2[970]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[14]=_mm256_xor_si256(c2[1122],_mm256_xor_si256(c2[1262],_mm256_xor_si256(c2[2941],_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[1977],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[578],_mm256_xor_si256(c2[3659],_mm256_xor_si256(c2[3248],_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[2987],_mm256_xor_si256(c2[3127],_mm256_xor_si256(c2[3966],_mm256_xor_si256(c2[4106],_mm256_xor_si256(c2[2143],_mm256_xor_si256(c2[2719],_mm256_xor_si256(c2[2859],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[3994],_mm256_xor_si256(c2[4134],_mm256_xor_si256(c2[4133],_mm256_xor_si256(c2[4426],_mm256_xor_si256(c2[87],_mm256_xor_si256(c2[2747],_mm256_xor_si256(c2[2887],_mm256_xor_si256(c2[4147],_mm256_xor_si256(c2[1224],_mm256_xor_si256(c2[1364],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[4172],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[4034],_mm256_xor_si256(c2[4176],_mm256_xor_si256(c2[1246],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[4466],_mm256_xor_si256(c2[127],c2[970]))))))))))))))))))))))))))))))))))))))));
+     d2[14]=simde_mm256_xor_si256(c2[1122],simde_mm256_xor_si256(c2[1262],simde_mm256_xor_si256(c2[2941],simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[1977],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[578],simde_mm256_xor_si256(c2[3659],simde_mm256_xor_si256(c2[3248],simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[2987],simde_mm256_xor_si256(c2[3127],simde_mm256_xor_si256(c2[3966],simde_mm256_xor_si256(c2[4106],simde_mm256_xor_si256(c2[2143],simde_mm256_xor_si256(c2[2719],simde_mm256_xor_si256(c2[2859],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[3994],simde_mm256_xor_si256(c2[4134],simde_mm256_xor_si256(c2[4133],simde_mm256_xor_si256(c2[4426],simde_mm256_xor_si256(c2[87],simde_mm256_xor_si256(c2[2747],simde_mm256_xor_si256(c2[2887],simde_mm256_xor_si256(c2[4147],simde_mm256_xor_si256(c2[1224],simde_mm256_xor_si256(c2[1364],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[4172],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[4034],simde_mm256_xor_si256(c2[4176],simde_mm256_xor_si256(c2[1246],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[4466],simde_mm256_xor_si256(c2[127],c2[970]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[21]=_mm256_xor_si256(c2[1262],_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[578],_mm256_xor_si256(c2[3519],_mm256_xor_si256(c2[3659],_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[3127],_mm256_xor_si256(c2[4106],_mm256_xor_si256(c2[2143],_mm256_xor_si256(c2[2859],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[4134],_mm256_xor_si256(c2[3993],_mm256_xor_si256(c2[4133],_mm256_xor_si256(c2[87],_mm256_xor_si256(c2[2887],_mm256_xor_si256(c2[4007],_mm256_xor_si256(c2[4147],_mm256_xor_si256(c2[1364],_mm256_xor_si256(c2[2061],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[4034],_mm256_xor_si256(c2[4036],_mm256_xor_si256(c2[4176],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[127],_mm256_xor_si256(c2[830],c2[970]))))))))))))))))))))))))))))))))));
+     d2[21]=simde_mm256_xor_si256(c2[1262],simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[578],simde_mm256_xor_si256(c2[3519],simde_mm256_xor_si256(c2[3659],simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[3127],simde_mm256_xor_si256(c2[4106],simde_mm256_xor_si256(c2[2143],simde_mm256_xor_si256(c2[2859],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[4134],simde_mm256_xor_si256(c2[3993],simde_mm256_xor_si256(c2[4133],simde_mm256_xor_si256(c2[87],simde_mm256_xor_si256(c2[2887],simde_mm256_xor_si256(c2[4007],simde_mm256_xor_si256(c2[4147],simde_mm256_xor_si256(c2[1364],simde_mm256_xor_si256(c2[2061],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[4034],simde_mm256_xor_si256(c2[4036],simde_mm256_xor_si256(c2[4176],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[127],simde_mm256_xor_si256(c2[830],c2[970]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[28]=_mm256_xor_si256(c2[1825],_mm256_xor_si256(c2[1965],_mm256_xor_si256(c2[3784],_mm256_xor_si256(c2[4200],_mm256_xor_si256(c2[1126],_mm256_xor_si256(c2[2680],_mm256_xor_si256(c2[2820],_mm256_xor_si256(c2[1274],_mm256_xor_si256(c2[4355],_mm256_xor_si256(c2[2254],_mm256_xor_si256(c2[3951],_mm256_xor_si256(c2[4091],_mm256_xor_si256(c2[1711],_mm256_xor_si256(c2[3683],_mm256_xor_si256(c2[3823],_mm256_xor_si256(c2[323],_mm256_xor_si256(c2[2846],_mm256_xor_si256(c2[3562],_mm256_xor_si256(c2[758],_mm256_xor_si256(c2[1460],_mm256_xor_si256(c2[351],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[790],_mm256_xor_si256(c2[3590],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[2060],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[393],_mm256_xor_si256(c2[1949],_mm256_xor_si256(c2[2089],_mm256_xor_si256(c2[830],c2[1666]))))))))))))))))))))))))))))))))));
+     d2[28]=simde_mm256_xor_si256(c2[1825],simde_mm256_xor_si256(c2[1965],simde_mm256_xor_si256(c2[3784],simde_mm256_xor_si256(c2[4200],simde_mm256_xor_si256(c2[1126],simde_mm256_xor_si256(c2[2680],simde_mm256_xor_si256(c2[2820],simde_mm256_xor_si256(c2[1274],simde_mm256_xor_si256(c2[4355],simde_mm256_xor_si256(c2[2254],simde_mm256_xor_si256(c2[3951],simde_mm256_xor_si256(c2[4091],simde_mm256_xor_si256(c2[1711],simde_mm256_xor_si256(c2[3683],simde_mm256_xor_si256(c2[3823],simde_mm256_xor_si256(c2[323],simde_mm256_xor_si256(c2[2846],simde_mm256_xor_si256(c2[3562],simde_mm256_xor_si256(c2[758],simde_mm256_xor_si256(c2[1460],simde_mm256_xor_si256(c2[351],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[790],simde_mm256_xor_si256(c2[3590],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[2060],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[393],simde_mm256_xor_si256(c2[1949],simde_mm256_xor_si256(c2[2089],simde_mm256_xor_si256(c2[830],c2[1666]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[35]=_mm256_xor_si256(c2[1261],_mm256_xor_si256(c2[1401],_mm256_xor_si256(c2[3220],_mm256_xor_si256(c2[3643],_mm256_xor_si256(c2[3505],_mm256_xor_si256(c2[2116],_mm256_xor_si256(c2[2256],_mm256_xor_si256(c2[717],_mm256_xor_si256(c2[3798],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[3394],_mm256_xor_si256(c2[3534],_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[3126],_mm256_xor_si256(c2[3266],_mm256_xor_si256(c2[4245],_mm256_xor_si256(c2[2282],_mm256_xor_si256(c2[2998],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[896],_mm256_xor_si256(c2[4273],_mm256_xor_si256(c2[4272],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[3026],_mm256_xor_si256(c2[4286],_mm256_xor_si256(c2[1503],_mm256_xor_si256(c2[2340],_mm256_xor_si256(c2[3884],_mm256_xor_si256(c2[4458],_mm256_xor_si256(c2[4173],_mm256_xor_si256(c2[4315],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[1532],_mm256_xor_si256(c2[266],c2[1109]))))))))))))))))))))))))))))))))))));
+     d2[35]=simde_mm256_xor_si256(c2[1261],simde_mm256_xor_si256(c2[1401],simde_mm256_xor_si256(c2[3220],simde_mm256_xor_si256(c2[3643],simde_mm256_xor_si256(c2[3505],simde_mm256_xor_si256(c2[2116],simde_mm256_xor_si256(c2[2256],simde_mm256_xor_si256(c2[717],simde_mm256_xor_si256(c2[3798],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[3394],simde_mm256_xor_si256(c2[3534],simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[3126],simde_mm256_xor_si256(c2[3266],simde_mm256_xor_si256(c2[4245],simde_mm256_xor_si256(c2[2282],simde_mm256_xor_si256(c2[2998],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[896],simde_mm256_xor_si256(c2[4273],simde_mm256_xor_si256(c2[4272],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[3026],simde_mm256_xor_si256(c2[4286],simde_mm256_xor_si256(c2[1503],simde_mm256_xor_si256(c2[2340],simde_mm256_xor_si256(c2[3884],simde_mm256_xor_si256(c2[4458],simde_mm256_xor_si256(c2[4173],simde_mm256_xor_si256(c2[4315],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[1532],simde_mm256_xor_si256(c2[266],c2[1109]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[42]=_mm256_xor_si256(c2[3501],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[981],_mm256_xor_si256(c2[1404],_mm256_xor_si256(c2[3783],_mm256_xor_si256(c2[4356],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[2957],_mm256_xor_si256(c2[1559],_mm256_xor_si256(c2[1148],_mm256_xor_si256(c2[1288],_mm256_xor_si256(c2[3394],_mm256_xor_si256(c2[887],_mm256_xor_si256(c2[1027],_mm256_xor_si256(c2[2006],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[759],_mm256_xor_si256(c2[2441],_mm256_xor_si256(c2[3136],_mm256_xor_si256(c2[2034],_mm256_xor_si256(c2[2033],_mm256_xor_si256(c2[3291],_mm256_xor_si256(c2[2326],_mm256_xor_si256(c2[2466],_mm256_xor_si256(c2[787],_mm256_xor_si256(c2[2047],_mm256_xor_si256(c2[3743],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[4438],_mm256_xor_si256(c2[2212],_mm256_xor_si256(c2[1934],_mm256_xor_si256(c2[2076],_mm256_xor_si256(c2[3632],_mm256_xor_si256(c2[3772],_mm256_xor_si256(c2[2506],_mm256_xor_si256(c2[3349],c2[4332]))))))))))))))))))))))))))))))))))));
+     d2[42]=simde_mm256_xor_si256(c2[3501],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[981],simde_mm256_xor_si256(c2[1404],simde_mm256_xor_si256(c2[3783],simde_mm256_xor_si256(c2[4356],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[2957],simde_mm256_xor_si256(c2[1559],simde_mm256_xor_si256(c2[1148],simde_mm256_xor_si256(c2[1288],simde_mm256_xor_si256(c2[3394],simde_mm256_xor_si256(c2[887],simde_mm256_xor_si256(c2[1027],simde_mm256_xor_si256(c2[2006],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[759],simde_mm256_xor_si256(c2[2441],simde_mm256_xor_si256(c2[3136],simde_mm256_xor_si256(c2[2034],simde_mm256_xor_si256(c2[2033],simde_mm256_xor_si256(c2[3291],simde_mm256_xor_si256(c2[2326],simde_mm256_xor_si256(c2[2466],simde_mm256_xor_si256(c2[787],simde_mm256_xor_si256(c2[2047],simde_mm256_xor_si256(c2[3743],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[4438],simde_mm256_xor_si256(c2[2212],simde_mm256_xor_si256(c2[1934],simde_mm256_xor_si256(c2[2076],simde_mm256_xor_si256(c2[3632],simde_mm256_xor_si256(c2[3772],simde_mm256_xor_si256(c2[2506],simde_mm256_xor_si256(c2[3349],c2[4332]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[49]=_mm256_xor_si256(c2[4206],_mm256_xor_si256(c2[4346],_mm256_xor_si256(c2[4200],_mm256_xor_si256(c2[1686],_mm256_xor_si256(c2[1540],_mm256_xor_si256(c2[2102],_mm256_xor_si256(c2[1963],_mm256_xor_si256(c2[575],_mm256_xor_si256(c2[715],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[3655],_mm256_xor_si256(c2[3516],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[1978],_mm256_xor_si256(c2[2118],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[1853],_mm256_xor_si256(c2[1993],_mm256_xor_si256(c2[1854],_mm256_xor_si256(c2[4092],_mm256_xor_si256(c2[3813],_mm256_xor_si256(c2[3953],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[1725],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[2565],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[1318],_mm256_xor_si256(c2[3139],_mm256_xor_si256(c2[3000],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[3562],_mm256_xor_si256(c2[3702],_mm256_xor_si256(c2[2732],_mm256_xor_si256(c2[2593],_mm256_xor_si256(c2[2731],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[2592],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[3025],_mm256_xor_si256(c2[1485],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[2745],_mm256_xor_si256(c2[2466],_mm256_xor_si256(c2[2606],_mm256_xor_si256(c2[4441],_mm256_xor_si256(c2[4302],_mm256_xor_si256(c2[799],_mm256_xor_si256(c2[520],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[1361],_mm256_xor_si256(c2[2917],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[2632],_mm256_xor_si256(c2[2493],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[2495],_mm256_xor_si256(c2[2635],_mm256_xor_si256(c2[4330],_mm256_xor_si256(c2[4470],_mm256_xor_si256(c2[4331],_mm256_xor_si256(c2[3211],_mm256_xor_si256(c2[3072],_mm256_xor_si256(c2[4047],_mm256_xor_si256(c2[3768],c2[3908]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[49]=simde_mm256_xor_si256(c2[4206],simde_mm256_xor_si256(c2[4346],simde_mm256_xor_si256(c2[4200],simde_mm256_xor_si256(c2[1686],simde_mm256_xor_si256(c2[1540],simde_mm256_xor_si256(c2[2102],simde_mm256_xor_si256(c2[1963],simde_mm256_xor_si256(c2[575],simde_mm256_xor_si256(c2[715],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[3655],simde_mm256_xor_si256(c2[3516],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[1978],simde_mm256_xor_si256(c2[2118],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[1853],simde_mm256_xor_si256(c2[1993],simde_mm256_xor_si256(c2[1854],simde_mm256_xor_si256(c2[4092],simde_mm256_xor_si256(c2[3813],simde_mm256_xor_si256(c2[3953],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[1725],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[2565],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[1318],simde_mm256_xor_si256(c2[3139],simde_mm256_xor_si256(c2[3000],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[3562],simde_mm256_xor_si256(c2[3702],simde_mm256_xor_si256(c2[2732],simde_mm256_xor_si256(c2[2593],simde_mm256_xor_si256(c2[2731],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[2592],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[3025],simde_mm256_xor_si256(c2[1485],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[2745],simde_mm256_xor_si256(c2[2466],simde_mm256_xor_si256(c2[2606],simde_mm256_xor_si256(c2[4441],simde_mm256_xor_si256(c2[4302],simde_mm256_xor_si256(c2[799],simde_mm256_xor_si256(c2[520],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[1361],simde_mm256_xor_si256(c2[2917],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[2632],simde_mm256_xor_si256(c2[2493],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[2495],simde_mm256_xor_si256(c2[2635],simde_mm256_xor_si256(c2[4330],simde_mm256_xor_si256(c2[4470],simde_mm256_xor_si256(c2[4331],simde_mm256_xor_si256(c2[3211],simde_mm256_xor_si256(c2[3072],simde_mm256_xor_si256(c2[4047],simde_mm256_xor_si256(c2[3768],c2[3908]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[56]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[1966],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[855],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[3935],_mm256_xor_si256(c2[2537],_mm256_xor_si256(c2[1555],_mm256_xor_si256(c2[2133],_mm256_xor_si256(c2[2273],_mm256_xor_si256(c2[4372],_mm256_xor_si256(c2[1865],_mm256_xor_si256(c2[2005],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[2984],_mm256_xor_si256(c2[1028],_mm256_xor_si256(c2[1597],_mm256_xor_si256(c2[1737],_mm256_xor_si256(c2[3419],_mm256_xor_si256(c2[4121],_mm256_xor_si256(c2[2872],_mm256_xor_si256(c2[3012],_mm256_xor_si256(c2[3011],_mm256_xor_si256(c2[3304],_mm256_xor_si256(c2[3444],_mm256_xor_si256(c2[1625],_mm256_xor_si256(c2[1765],_mm256_xor_si256(c2[3025],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[1079],_mm256_xor_si256(c2[3057],_mm256_xor_si256(c2[3197],_mm256_xor_si256(c2[2912],_mm256_xor_si256(c2[3054],_mm256_xor_si256(c2[131],_mm256_xor_si256(c2[271],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[3491],c2[4327]))))))))))))))))))))))))))))))))))))))))));
+     d2[56]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[1966],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[855],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[3935],simde_mm256_xor_si256(c2[2537],simde_mm256_xor_si256(c2[1555],simde_mm256_xor_si256(c2[2133],simde_mm256_xor_si256(c2[2273],simde_mm256_xor_si256(c2[4372],simde_mm256_xor_si256(c2[1865],simde_mm256_xor_si256(c2[2005],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[2984],simde_mm256_xor_si256(c2[1028],simde_mm256_xor_si256(c2[1597],simde_mm256_xor_si256(c2[1737],simde_mm256_xor_si256(c2[3419],simde_mm256_xor_si256(c2[4121],simde_mm256_xor_si256(c2[2872],simde_mm256_xor_si256(c2[3012],simde_mm256_xor_si256(c2[3011],simde_mm256_xor_si256(c2[3304],simde_mm256_xor_si256(c2[3444],simde_mm256_xor_si256(c2[1625],simde_mm256_xor_si256(c2[1765],simde_mm256_xor_si256(c2[3025],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[1079],simde_mm256_xor_si256(c2[3057],simde_mm256_xor_si256(c2[3197],simde_mm256_xor_si256(c2[2912],simde_mm256_xor_si256(c2[3054],simde_mm256_xor_si256(c2[131],simde_mm256_xor_si256(c2[271],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[3491],c2[4327]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[63]=_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[3784],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[2243],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[3796],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[2398],_mm256_xor_si256(c2[1835],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[2134],_mm256_xor_si256(c2[1431],_mm256_xor_si256(c2[1571],_mm256_xor_si256(c2[4233],_mm256_xor_si256(c2[3670],_mm256_xor_si256(c2[1866],_mm256_xor_si256(c2[1163],_mm256_xor_si256(c2[1303],_mm256_xor_si256(c2[2845],_mm256_xor_si256(c2[2282],_mm256_xor_si256(c2[882],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[1598],_mm256_xor_si256(c2[1042],_mm256_xor_si256(c2[3280],_mm256_xor_si256(c2[2717],_mm256_xor_si256(c2[3982],_mm256_xor_si256(c2[3419],_mm256_xor_si256(c2[2873],_mm256_xor_si256(c2[2310],_mm256_xor_si256(c2[2872],_mm256_xor_si256(c2[2316],_mm256_xor_si256(c2[3305],_mm256_xor_si256(c2[2609],_mm256_xor_si256(c2[2749],_mm256_xor_si256(c2[1626],_mm256_xor_si256(c2[1070],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[2330],_mm256_xor_si256(c2[103],_mm256_xor_si256(c2[4019],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[3058],_mm256_xor_si256(c2[2495],_mm256_xor_si256(c2[2773],_mm256_xor_si256(c2[2217],_mm256_xor_si256(c2[2915],_mm256_xor_si256(c2[2352],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[3908],_mm256_xor_si256(c2[4048],_mm256_xor_si256(c2[3352],_mm256_xor_si256(c2[2789],_mm256_xor_si256(c2[4188],c2[3632])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[63]=simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[3784],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[2243],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[3796],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[2398],simde_mm256_xor_si256(c2[1835],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[2134],simde_mm256_xor_si256(c2[1431],simde_mm256_xor_si256(c2[1571],simde_mm256_xor_si256(c2[4233],simde_mm256_xor_si256(c2[3670],simde_mm256_xor_si256(c2[1866],simde_mm256_xor_si256(c2[1163],simde_mm256_xor_si256(c2[1303],simde_mm256_xor_si256(c2[2845],simde_mm256_xor_si256(c2[2282],simde_mm256_xor_si256(c2[882],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[1598],simde_mm256_xor_si256(c2[1042],simde_mm256_xor_si256(c2[3280],simde_mm256_xor_si256(c2[2717],simde_mm256_xor_si256(c2[3982],simde_mm256_xor_si256(c2[3419],simde_mm256_xor_si256(c2[2873],simde_mm256_xor_si256(c2[2310],simde_mm256_xor_si256(c2[2872],simde_mm256_xor_si256(c2[2316],simde_mm256_xor_si256(c2[3305],simde_mm256_xor_si256(c2[2609],simde_mm256_xor_si256(c2[2749],simde_mm256_xor_si256(c2[1626],simde_mm256_xor_si256(c2[1070],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[2330],simde_mm256_xor_si256(c2[103],simde_mm256_xor_si256(c2[4019],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[3058],simde_mm256_xor_si256(c2[2495],simde_mm256_xor_si256(c2[2773],simde_mm256_xor_si256(c2[2217],simde_mm256_xor_si256(c2[2915],simde_mm256_xor_si256(c2[2352],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[3908],simde_mm256_xor_si256(c2[4048],simde_mm256_xor_si256(c2[3352],simde_mm256_xor_si256(c2[2789],simde_mm256_xor_si256(c2[4188],c2[3632])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[70]=_mm256_xor_si256(c2[1965],_mm256_xor_si256(c2[3098],_mm256_xor_si256(c2[1204],c2[3459])));
+     d2[70]=simde_mm256_xor_si256(c2[1965],simde_mm256_xor_si256(c2[3098],simde_mm256_xor_si256(c2[1204],c2[3459])));
 
 //row: 11
-     d2[77]=_mm256_xor_si256(c2[1685],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[423],_mm256_xor_si256(c2[2540],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[3935],_mm256_xor_si256(c2[4075],_mm256_xor_si256(c2[3811],_mm256_xor_si256(c2[1291],_mm256_xor_si256(c2[1431],_mm256_xor_si256(c2[3543],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[2566],_mm256_xor_si256(c2[3282],_mm256_xor_si256(c2[478],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[1180],_mm256_xor_si256(c2[71],_mm256_xor_si256(c2[4416],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[510],_mm256_xor_si256(c2[3310],_mm256_xor_si256(c2[4430],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[1780],_mm256_xor_si256(c2[2484],_mm256_xor_si256(c2[2624],_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[4457],_mm256_xor_si256(c2[4452],_mm256_xor_si256(c2[113],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[550],_mm256_xor_si256(c2[1246],_mm256_xor_si256(c2[1386],c2[2089])))))))))))))))))))))))))))))))))))));
+     d2[77]=simde_mm256_xor_si256(c2[1685],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[423],simde_mm256_xor_si256(c2[2540],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[3935],simde_mm256_xor_si256(c2[4075],simde_mm256_xor_si256(c2[3811],simde_mm256_xor_si256(c2[1291],simde_mm256_xor_si256(c2[1431],simde_mm256_xor_si256(c2[3543],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[2566],simde_mm256_xor_si256(c2[3282],simde_mm256_xor_si256(c2[478],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[1180],simde_mm256_xor_si256(c2[71],simde_mm256_xor_si256(c2[4416],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[510],simde_mm256_xor_si256(c2[3310],simde_mm256_xor_si256(c2[4430],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[1780],simde_mm256_xor_si256(c2[2484],simde_mm256_xor_si256(c2[2624],simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[4457],simde_mm256_xor_si256(c2[4452],simde_mm256_xor_si256(c2[113],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[550],simde_mm256_xor_si256(c2[1246],simde_mm256_xor_si256(c2[1386],c2[2089])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[84]=_mm256_xor_si256(c2[2945],_mm256_xor_si256(c2[3085],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[3940],_mm256_xor_si256(c2[2394],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[1975],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[3966],_mm256_xor_si256(c2[3548],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[1878],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[1471],_mm256_xor_si256(c2[1470],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[3180],_mm256_xor_si256(c2[4024],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[1378],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[3069],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[1950],c2[2786]))))))))))))))))))))))))))))))))));
+     d2[84]=simde_mm256_xor_si256(c2[2945],simde_mm256_xor_si256(c2[3085],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[3940],simde_mm256_xor_si256(c2[2394],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[1975],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[3966],simde_mm256_xor_si256(c2[3548],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[1878],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[1471],simde_mm256_xor_si256(c2[1470],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[3180],simde_mm256_xor_si256(c2[4024],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[1378],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[3069],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[1950],c2[2786]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[91]=_mm256_xor_si256(c2[2946],_mm256_xor_si256(c2[286],_mm256_xor_si256(c2[702],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[3794],_mm256_xor_si256(c2[2255],_mm256_xor_si256(c2[717],_mm256_xor_si256(c2[857],_mm256_xor_si256(c2[2397],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[2552],_mm256_xor_si256(c2[2692],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[3827],_mm256_xor_si256(c2[57],_mm256_xor_si256(c2[1739],_mm256_xor_si256(c2[2301],_mm256_xor_si256(c2[2441],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[1191],_mm256_xor_si256(c2[1331],_mm256_xor_si256(c2[1764],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[1205],_mm256_xor_si256(c2[1345],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[3738],_mm256_xor_si256(c2[3878],_mm256_xor_si256(c2[1517],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[1374],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[3070],_mm256_xor_si256(c2[1811],_mm256_xor_si256(c2[2507],c2[2647])))))))))))))))))))))))))))))))))))));
+     d2[91]=simde_mm256_xor_si256(c2[2946],simde_mm256_xor_si256(c2[286],simde_mm256_xor_si256(c2[702],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[3794],simde_mm256_xor_si256(c2[2255],simde_mm256_xor_si256(c2[717],simde_mm256_xor_si256(c2[857],simde_mm256_xor_si256(c2[2397],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[2552],simde_mm256_xor_si256(c2[2692],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[3827],simde_mm256_xor_si256(c2[57],simde_mm256_xor_si256(c2[1739],simde_mm256_xor_si256(c2[2301],simde_mm256_xor_si256(c2[2441],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[1191],simde_mm256_xor_si256(c2[1331],simde_mm256_xor_si256(c2[1764],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[1205],simde_mm256_xor_si256(c2[1345],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[3738],simde_mm256_xor_si256(c2[3878],simde_mm256_xor_si256(c2[1517],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[1374],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[3070],simde_mm256_xor_si256(c2[1811],simde_mm256_xor_si256(c2[2507],c2[2647])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[98]=_mm256_xor_si256(c2[2243],_mm256_xor_si256(c2[2383],_mm256_xor_si256(c2[4065],_mm256_xor_si256(c2[4202],_mm256_xor_si256(c2[1405],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[1821],_mm256_xor_si256(c2[3098],_mm256_xor_si256(c2[3238],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[1699],_mm256_xor_si256(c2[3374],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[1976],_mm256_xor_si256(c2[575],_mm256_xor_si256(c2[4369],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[2129],_mm256_xor_si256(c2[3671],_mm256_xor_si256(c2[3811],_mm256_xor_si256(c2[4108],_mm256_xor_si256(c2[4248],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[2423],_mm256_xor_si256(c2[3264],_mm256_xor_si256(c2[467],_mm256_xor_si256(c2[3980],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[2858],_mm256_xor_si256(c2[1878],_mm256_xor_si256(c2[3420],_mm256_xor_si256(c2[3560],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[2451],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[2310],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[2890],_mm256_xor_si256(c2[4008],_mm256_xor_si256(c2[1204],_mm256_xor_si256(c2[789],_mm256_xor_si256(c2[2324],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[4426],_mm256_xor_si256(c2[2478],_mm256_xor_si256(c2[4160],_mm256_xor_si256(c2[3322],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[518],_mm256_xor_si256(c2[954],_mm256_xor_si256(c2[2636],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[2358],_mm256_xor_si256(c2[818],_mm256_xor_si256(c2[2353],_mm256_xor_si256(c2[2493],_mm256_xor_si256(c2[2367],_mm256_xor_si256(c2[2507],_mm256_xor_si256(c2[4189],_mm256_xor_si256(c2[1248],_mm256_xor_si256(c2[2930],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[3626],c2[3766])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[98]=simde_mm256_xor_si256(c2[2243],simde_mm256_xor_si256(c2[2383],simde_mm256_xor_si256(c2[4065],simde_mm256_xor_si256(c2[4202],simde_mm256_xor_si256(c2[1405],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[1821],simde_mm256_xor_si256(c2[3098],simde_mm256_xor_si256(c2[3238],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[1699],simde_mm256_xor_si256(c2[3374],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[1976],simde_mm256_xor_si256(c2[575],simde_mm256_xor_si256(c2[4369],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[2129],simde_mm256_xor_si256(c2[3671],simde_mm256_xor_si256(c2[3811],simde_mm256_xor_si256(c2[4108],simde_mm256_xor_si256(c2[4248],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[2423],simde_mm256_xor_si256(c2[3264],simde_mm256_xor_si256(c2[467],simde_mm256_xor_si256(c2[3980],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[2858],simde_mm256_xor_si256(c2[1878],simde_mm256_xor_si256(c2[3420],simde_mm256_xor_si256(c2[3560],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[2451],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[2310],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[2890],simde_mm256_xor_si256(c2[4008],simde_mm256_xor_si256(c2[1204],simde_mm256_xor_si256(c2[789],simde_mm256_xor_si256(c2[2324],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[4426],simde_mm256_xor_si256(c2[2478],simde_mm256_xor_si256(c2[4160],simde_mm256_xor_si256(c2[3322],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[518],simde_mm256_xor_si256(c2[954],simde_mm256_xor_si256(c2[2636],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[2358],simde_mm256_xor_si256(c2[818],simde_mm256_xor_si256(c2[2353],simde_mm256_xor_si256(c2[2493],simde_mm256_xor_si256(c2[2367],simde_mm256_xor_si256(c2[2507],simde_mm256_xor_si256(c2[4189],simde_mm256_xor_si256(c2[1248],simde_mm256_xor_si256(c2[2930],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[3626],c2[3766])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[105]=_mm256_xor_si256(c2[2241],_mm256_xor_si256(c2[3085],_mm256_xor_si256(c2[3225],_mm256_xor_si256(c2[4060],_mm256_xor_si256(c2[565],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[981],_mm256_xor_si256(c2[2803],_mm256_xor_si256(c2[3096],_mm256_xor_si256(c2[3940],_mm256_xor_si256(c2[4080],_mm256_xor_si256(c2[1557],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[159],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[4374],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[872],_mm256_xor_si256(c2[1994],_mm256_xor_si256(c2[2971],_mm256_xor_si256(c2[4106],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[606],_mm256_xor_si256(c2[1583],_mm256_xor_si256(c2[3122],_mm256_xor_si256(c2[4106],_mm256_xor_si256(c2[3838],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[1041],_mm256_xor_si256(c2[2018],_mm256_xor_si256(c2[1736],_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[634],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[1610],_mm256_xor_si256(c2[1066],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[3866],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[647],_mm256_xor_si256(c2[1624],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[3180],_mm256_xor_si256(c2[4164],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[1796],_mm256_xor_si256(c2[534],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[1653],_mm256_xor_si256(c2[2372],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[1106],_mm256_xor_si256(c2[2090],_mm256_xor_si256(c2[1949],c2[2926]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[105]=simde_mm256_xor_si256(c2[2241],simde_mm256_xor_si256(c2[3085],simde_mm256_xor_si256(c2[3225],simde_mm256_xor_si256(c2[4060],simde_mm256_xor_si256(c2[565],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[981],simde_mm256_xor_si256(c2[2803],simde_mm256_xor_si256(c2[3096],simde_mm256_xor_si256(c2[3940],simde_mm256_xor_si256(c2[4080],simde_mm256_xor_si256(c2[1557],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[159],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[4374],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[872],simde_mm256_xor_si256(c2[1994],simde_mm256_xor_si256(c2[2971],simde_mm256_xor_si256(c2[4106],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[606],simde_mm256_xor_si256(c2[1583],simde_mm256_xor_si256(c2[3122],simde_mm256_xor_si256(c2[4106],simde_mm256_xor_si256(c2[3838],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[1041],simde_mm256_xor_si256(c2[2018],simde_mm256_xor_si256(c2[1736],simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[634],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[1610],simde_mm256_xor_si256(c2[1066],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[3866],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[647],simde_mm256_xor_si256(c2[1624],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[3180],simde_mm256_xor_si256(c2[4164],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[1796],simde_mm256_xor_si256(c2[534],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[1653],simde_mm256_xor_si256(c2[2372],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[1106],simde_mm256_xor_si256(c2[2090],simde_mm256_xor_si256(c2[1949],c2[2926]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[112]=_mm256_xor_si256(c2[1966],_mm256_xor_si256(c2[2106],_mm256_xor_si256(c2[1265],_mm256_xor_si256(c2[1405],_mm256_xor_si256(c2[3925],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[3224],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[3640],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[2120],_mm256_xor_si256(c2[2260],_mm256_xor_si256(c2[1415],_mm256_xor_si256(c2[714],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[3795],_mm256_xor_si256(c2[1556],_mm256_xor_si256(c2[4092],_mm256_xor_si256(c2[4232],_mm256_xor_si256(c2[3391],_mm256_xor_si256(c2[3531],_mm256_xor_si256(c2[1852],_mm256_xor_si256(c2[1151],_mm256_xor_si256(c2[3824],_mm256_xor_si256(c2[3964],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[3263],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[4102],_mm256_xor_si256(c2[4242],_mm256_xor_si256(c2[2987],_mm256_xor_si256(c2[2286],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[2862],_mm256_xor_si256(c2[3002],_mm256_xor_si256(c2[899],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[492],_mm256_xor_si256(c2[4130],_mm256_xor_si256(c2[4270],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[4276],_mm256_xor_si256(c2[784],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[2890],_mm256_xor_si256(c2[3030],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[4290],_mm256_xor_si256(c2[2201],_mm256_xor_si256(c2[1360],_mm256_xor_si256(c2[1500],_mm256_xor_si256(c2[3038],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[4315],_mm256_xor_si256(c2[4455],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[4177],_mm256_xor_si256(c2[534],_mm256_xor_si256(c2[4312],_mm256_xor_si256(c2[2090],_mm256_xor_si256(c2[2230],_mm256_xor_si256(c2[1389],_mm256_xor_si256(c2[1529],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[130],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[1807],_mm256_xor_si256(c2[1106],c2[4330])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[112]=simde_mm256_xor_si256(c2[1966],simde_mm256_xor_si256(c2[2106],simde_mm256_xor_si256(c2[1265],simde_mm256_xor_si256(c2[1405],simde_mm256_xor_si256(c2[3925],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[3224],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[3640],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[2120],simde_mm256_xor_si256(c2[2260],simde_mm256_xor_si256(c2[1415],simde_mm256_xor_si256(c2[714],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[3795],simde_mm256_xor_si256(c2[1556],simde_mm256_xor_si256(c2[4092],simde_mm256_xor_si256(c2[4232],simde_mm256_xor_si256(c2[3391],simde_mm256_xor_si256(c2[3531],simde_mm256_xor_si256(c2[1852],simde_mm256_xor_si256(c2[1151],simde_mm256_xor_si256(c2[3824],simde_mm256_xor_si256(c2[3964],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[3263],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[4102],simde_mm256_xor_si256(c2[4242],simde_mm256_xor_si256(c2[2987],simde_mm256_xor_si256(c2[2286],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[2862],simde_mm256_xor_si256(c2[3002],simde_mm256_xor_si256(c2[899],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[492],simde_mm256_xor_si256(c2[4130],simde_mm256_xor_si256(c2[4270],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[4276],simde_mm256_xor_si256(c2[784],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[2890],simde_mm256_xor_si256(c2[3030],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[4290],simde_mm256_xor_si256(c2[2201],simde_mm256_xor_si256(c2[1360],simde_mm256_xor_si256(c2[1500],simde_mm256_xor_si256(c2[3038],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[4315],simde_mm256_xor_si256(c2[4455],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[4177],simde_mm256_xor_si256(c2[534],simde_mm256_xor_si256(c2[4312],simde_mm256_xor_si256(c2[2090],simde_mm256_xor_si256(c2[2230],simde_mm256_xor_si256(c2[1389],simde_mm256_xor_si256(c2[1529],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[130],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[1807],simde_mm256_xor_si256(c2[1106],c2[4330])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[119]=_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[1402],_mm256_xor_si256(c2[1542],_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[3361],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[3784],_mm256_xor_si256(c2[3936],_mm256_xor_si256(c2[4076],_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[2397],_mm256_xor_si256(c2[2537],_mm256_xor_si256(c2[858],_mm256_xor_si256(c2[1139],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[2255],_mm256_xor_si256(c2[728],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[3528],_mm256_xor_si256(c2[3668],_mm256_xor_si256(c2[2974],_mm256_xor_si256(c2[1288],_mm256_xor_si256(c2[467],_mm256_xor_si256(c2[607],_mm256_xor_si256(c2[3267],_mm256_xor_si256(c2[3407],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[4246],_mm256_xor_si256(c2[4386],_mm256_xor_si256(c2[4102],_mm256_xor_si256(c2[2423],_mm256_xor_si256(c2[339],_mm256_xor_si256(c2[2999],_mm256_xor_si256(c2[3139],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[2716],_mm256_xor_si256(c2[1037],_mm256_xor_si256(c2[1614],_mm256_xor_si256(c2[4274],_mm256_xor_si256(c2[4414],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[4413],_mm256_xor_si256(c2[634],_mm256_xor_si256(c2[1906],_mm256_xor_si256(c2[2046],_mm256_xor_si256(c2[227],_mm256_xor_si256(c2[367],_mm256_xor_si256(c2[367],_mm256_xor_si256(c2[3027],_mm256_xor_si256(c2[3167],_mm256_xor_si256(c2[1627],_mm256_xor_si256(c2[4427],_mm256_xor_si256(c2[3323],_mm256_xor_si256(c2[1504],_mm256_xor_si256(c2[1644],_mm256_xor_si256(c2[4160],_mm256_xor_si256(c2[2481],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[4452],_mm256_xor_si256(c2[113],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[4314],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[4456],_mm256_xor_si256(c2[3212],_mm256_xor_si256(c2[3352],_mm256_xor_si256(c2[1526],_mm256_xor_si256(c2[1666],_mm256_xor_si256(c2[2086],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[407],_mm256_xor_si256(c2[2929],c2[1250])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[119]=simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[1402],simde_mm256_xor_si256(c2[1542],simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[3361],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[3784],simde_mm256_xor_si256(c2[3936],simde_mm256_xor_si256(c2[4076],simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[2397],simde_mm256_xor_si256(c2[2537],simde_mm256_xor_si256(c2[858],simde_mm256_xor_si256(c2[1139],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[2255],simde_mm256_xor_si256(c2[728],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[3528],simde_mm256_xor_si256(c2[3668],simde_mm256_xor_si256(c2[2974],simde_mm256_xor_si256(c2[1288],simde_mm256_xor_si256(c2[467],simde_mm256_xor_si256(c2[607],simde_mm256_xor_si256(c2[3267],simde_mm256_xor_si256(c2[3407],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[4246],simde_mm256_xor_si256(c2[4386],simde_mm256_xor_si256(c2[4102],simde_mm256_xor_si256(c2[2423],simde_mm256_xor_si256(c2[339],simde_mm256_xor_si256(c2[2999],simde_mm256_xor_si256(c2[3139],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[2716],simde_mm256_xor_si256(c2[1037],simde_mm256_xor_si256(c2[1614],simde_mm256_xor_si256(c2[4274],simde_mm256_xor_si256(c2[4414],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[4413],simde_mm256_xor_si256(c2[634],simde_mm256_xor_si256(c2[1906],simde_mm256_xor_si256(c2[2046],simde_mm256_xor_si256(c2[227],simde_mm256_xor_si256(c2[367],simde_mm256_xor_si256(c2[367],simde_mm256_xor_si256(c2[3027],simde_mm256_xor_si256(c2[3167],simde_mm256_xor_si256(c2[1627],simde_mm256_xor_si256(c2[4427],simde_mm256_xor_si256(c2[3323],simde_mm256_xor_si256(c2[1504],simde_mm256_xor_si256(c2[1644],simde_mm256_xor_si256(c2[4160],simde_mm256_xor_si256(c2[2481],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[4452],simde_mm256_xor_si256(c2[113],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[4314],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[4456],simde_mm256_xor_si256(c2[3212],simde_mm256_xor_si256(c2[3352],simde_mm256_xor_si256(c2[1526],simde_mm256_xor_si256(c2[1666],simde_mm256_xor_si256(c2[2086],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[407],simde_mm256_xor_si256(c2[2929],c2[1250])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[126]=_mm256_xor_si256(c2[562],_mm256_xor_si256(c2[2324],c2[4022]));
+     d2[126]=simde_mm256_xor_si256(c2[562],simde_mm256_xor_si256(c2[2324],c2[4022]));
 
 //row: 19
-     d2[133]=_mm256_xor_si256(c2[4204],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[1960],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[2115],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[3950],_mm256_xor_si256(c2[1583],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[606],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[2997],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[2590],_mm256_xor_si256(c2[2596],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[1350],_mm256_xor_si256(c2[2610],_mm256_xor_si256(c2[4299],_mm256_xor_si256(c2[664],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[2497],_mm256_xor_si256(c2[2632],_mm256_xor_si256(c2[4328],_mm256_xor_si256(c2[3069],c2[3912]))))))))))))))))))))))))))));
+     d2[133]=simde_mm256_xor_si256(c2[4204],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[1960],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[2115],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[3950],simde_mm256_xor_si256(c2[1583],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[606],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[2997],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[2590],simde_mm256_xor_si256(c2[2596],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[1350],simde_mm256_xor_si256(c2[2610],simde_mm256_xor_si256(c2[4299],simde_mm256_xor_si256(c2[664],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[2497],simde_mm256_xor_si256(c2[2632],simde_mm256_xor_si256(c2[4328],simde_mm256_xor_si256(c2[3069],c2[3912]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[140]=_mm256_xor_si256(c2[2946],_mm256_xor_si256(c2[3086],_mm256_xor_si256(c2[426],_mm256_xor_si256(c2[842],_mm256_xor_si256(c2[3794],_mm256_xor_si256(c2[3934],_mm256_xor_si256(c2[2395],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[854],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[733],_mm256_xor_si256(c2[2832],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[465],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[3967],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[1879],_mm256_xor_si256(c2[2581],_mm256_xor_si256(c2[2856],_mm256_xor_si256(c2[1472],_mm256_xor_si256(c2[1471],_mm256_xor_si256(c2[1764],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[1485],_mm256_xor_si256(c2[3181],_mm256_xor_si256(c2[4018],_mm256_xor_si256(c2[1657],_mm256_xor_si256(c2[1372],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[3070],_mm256_xor_si256(c2[3210],_mm256_xor_si256(c2[1951],c2[2787]))))))))))))))))))))))))))))))))));
+     d2[140]=simde_mm256_xor_si256(c2[2946],simde_mm256_xor_si256(c2[3086],simde_mm256_xor_si256(c2[426],simde_mm256_xor_si256(c2[842],simde_mm256_xor_si256(c2[3794],simde_mm256_xor_si256(c2[3934],simde_mm256_xor_si256(c2[2395],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[854],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[733],simde_mm256_xor_si256(c2[2832],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[465],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[3967],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[1879],simde_mm256_xor_si256(c2[2581],simde_mm256_xor_si256(c2[2856],simde_mm256_xor_si256(c2[1472],simde_mm256_xor_si256(c2[1471],simde_mm256_xor_si256(c2[1764],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[1485],simde_mm256_xor_si256(c2[3181],simde_mm256_xor_si256(c2[4018],simde_mm256_xor_si256(c2[1657],simde_mm256_xor_si256(c2[1372],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[3070],simde_mm256_xor_si256(c2[3210],simde_mm256_xor_si256(c2[1951],c2[2787]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[147]=_mm256_xor_si256(c2[3644],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[2242],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[1415],_mm256_xor_si256(c2[1555],_mm256_xor_si256(c2[1291],_mm256_xor_si256(c2[3250],_mm256_xor_si256(c2[3390],_mm256_xor_si256(c2[1023],_mm256_xor_si256(c2[2002],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[2999],_mm256_xor_si256(c2[3139],_mm256_xor_si256(c2[2030],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[2036],_mm256_xor_si256(c2[2469],_mm256_xor_si256(c2[790],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[3739],_mm256_xor_si256(c2[4443],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[1937],_mm256_xor_si256(c2[1932],_mm256_xor_si256(c2[2072],_mm256_xor_si256(c2[1653],_mm256_xor_si256(c2[3768],_mm256_xor_si256(c2[2509],_mm256_xor_si256(c2[3212],c2[3352]))))))))))))))))))))))))))))))))))));
+     d2[147]=simde_mm256_xor_si256(c2[3644],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[2242],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[1415],simde_mm256_xor_si256(c2[1555],simde_mm256_xor_si256(c2[1291],simde_mm256_xor_si256(c2[3250],simde_mm256_xor_si256(c2[3390],simde_mm256_xor_si256(c2[1023],simde_mm256_xor_si256(c2[2002],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[2999],simde_mm256_xor_si256(c2[3139],simde_mm256_xor_si256(c2[2030],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[2036],simde_mm256_xor_si256(c2[2469],simde_mm256_xor_si256(c2[790],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[3739],simde_mm256_xor_si256(c2[4443],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[1937],simde_mm256_xor_si256(c2[1932],simde_mm256_xor_si256(c2[2072],simde_mm256_xor_si256(c2[1653],simde_mm256_xor_si256(c2[3768],simde_mm256_xor_si256(c2[2509],simde_mm256_xor_si256(c2[3212],c2[3352]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[154]=_mm256_xor_si256(c2[2395],c2[168]);
+     d2[154]=simde_mm256_xor_si256(c2[2395],c2[168]);
 
 //row: 23
-     d2[161]=_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[3123],c2[914]));
+     d2[161]=simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[3123],c2[914]));
 
 //row: 24
-     d2[168]=_mm256_xor_si256(c2[3518],_mm256_xor_si256(c2[3390],c2[4467]));
+     d2[168]=simde_mm256_xor_si256(c2[3518],simde_mm256_xor_si256(c2[3390],c2[4467]));
 
 //row: 25
-     d2[175]=_mm256_xor_si256(c2[2666],c2[4272]);
+     d2[175]=simde_mm256_xor_si256(c2[2666],c2[4272]);
 
 //row: 26
-     d2[182]=_mm256_xor_si256(c2[702],_mm256_xor_si256(c2[842],_mm256_xor_si256(c2[2102],_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[2661],_mm256_xor_si256(c2[3921],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[4344],_mm256_xor_si256(c2[1557],_mm256_xor_si256(c2[1697],_mm256_xor_si256(c2[2957],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[3239],_mm256_xor_si256(c2[4359],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[2828],_mm256_xor_si256(c2[2968],_mm256_xor_si256(c2[4228],_mm256_xor_si256(c2[588],_mm256_xor_si256(c2[1708],_mm256_xor_si256(c2[1848],_mm256_xor_si256(c2[3670],_mm256_xor_si256(c2[2567],_mm256_xor_si256(c2[2707],_mm256_xor_si256(c2[3967],_mm256_xor_si256(c2[3546],_mm256_xor_si256(c2[3686],_mm256_xor_si256(c2[467],_mm256_xor_si256(c2[1723],_mm256_xor_si256(c2[2983],_mm256_xor_si256(c2[2299],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[4121],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[1597],_mm256_xor_si256(c2[3574],_mm256_xor_si256(c2[3714],_mm256_xor_si256(c2[495],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[494],_mm256_xor_si256(c2[4006],_mm256_xor_si256(c2[4146],_mm256_xor_si256(c2[927],_mm256_xor_si256(c2[2327],_mm256_xor_si256(c2[2467],_mm256_xor_si256(c2[3727],_mm256_xor_si256(c2[3727],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[2204],_mm256_xor_si256(c2[1781],_mm256_xor_si256(c2[2901],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[938],_mm256_xor_si256(c2[3752],_mm256_xor_si256(c2[3892],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[3614],_mm256_xor_si256(c2[395],_mm256_xor_si256(c2[3756],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[537],_mm256_xor_si256(c2[826],_mm256_xor_si256(c2[966],_mm256_xor_si256(c2[2226],_mm256_xor_si256(c2[4046],_mm256_xor_si256(c2[4186],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[550],_mm256_xor_si256(c2[1670],c2[1810])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[182]=simde_mm256_xor_si256(c2[702],simde_mm256_xor_si256(c2[842],simde_mm256_xor_si256(c2[2102],simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[2661],simde_mm256_xor_si256(c2[3921],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[4344],simde_mm256_xor_si256(c2[1557],simde_mm256_xor_si256(c2[1697],simde_mm256_xor_si256(c2[2957],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[3239],simde_mm256_xor_si256(c2[4359],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[2828],simde_mm256_xor_si256(c2[2968],simde_mm256_xor_si256(c2[4228],simde_mm256_xor_si256(c2[588],simde_mm256_xor_si256(c2[1708],simde_mm256_xor_si256(c2[1848],simde_mm256_xor_si256(c2[3670],simde_mm256_xor_si256(c2[2567],simde_mm256_xor_si256(c2[2707],simde_mm256_xor_si256(c2[3967],simde_mm256_xor_si256(c2[3546],simde_mm256_xor_si256(c2[3686],simde_mm256_xor_si256(c2[467],simde_mm256_xor_si256(c2[1723],simde_mm256_xor_si256(c2[2983],simde_mm256_xor_si256(c2[2299],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[4121],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[1597],simde_mm256_xor_si256(c2[3574],simde_mm256_xor_si256(c2[3714],simde_mm256_xor_si256(c2[495],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[494],simde_mm256_xor_si256(c2[4006],simde_mm256_xor_si256(c2[4146],simde_mm256_xor_si256(c2[927],simde_mm256_xor_si256(c2[2327],simde_mm256_xor_si256(c2[2467],simde_mm256_xor_si256(c2[3727],simde_mm256_xor_si256(c2[3727],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[2204],simde_mm256_xor_si256(c2[1781],simde_mm256_xor_si256(c2[2901],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[938],simde_mm256_xor_si256(c2[3752],simde_mm256_xor_si256(c2[3892],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[3614],simde_mm256_xor_si256(c2[395],simde_mm256_xor_si256(c2[3756],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[537],simde_mm256_xor_si256(c2[826],simde_mm256_xor_si256(c2[966],simde_mm256_xor_si256(c2[2226],simde_mm256_xor_si256(c2[4046],simde_mm256_xor_si256(c2[4186],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[550],simde_mm256_xor_si256(c2[1670],c2[1810])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[189]=_mm256_xor_si256(c2[3780],c2[3167]);
+     d2[189]=simde_mm256_xor_si256(c2[3780],c2[3167]);
 
 //row: 28
-     d2[196]=_mm256_xor_si256(c2[3380],_mm256_xor_si256(c2[2834],c2[216]));
+     d2[196]=simde_mm256_xor_si256(c2[3380],simde_mm256_xor_si256(c2[2834],c2[216]));
 
 //row: 29
-     d2[203]=_mm256_xor_si256(c2[1683],c2[4117]);
+     d2[203]=simde_mm256_xor_si256(c2[1683],c2[4117]);
 
 //row: 30
-     d2[210]=_mm256_xor_si256(c2[1431],_mm256_xor_si256(c2[1751],_mm256_xor_si256(c2[3603],c2[2371])));
+     d2[210]=simde_mm256_xor_si256(c2[1431],simde_mm256_xor_si256(c2[1751],simde_mm256_xor_si256(c2[3603],c2[2371])));
 
 //row: 31
-     d2[217]=_mm256_xor_si256(c2[4340],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[2103],_mm256_xor_si256(c2[716],_mm256_xor_si256(c2[3656],_mm256_xor_si256(c2[2118],_mm256_xor_si256(c2[2258],_mm256_xor_si256(c2[2678],_mm256_xor_si256(c2[1994],_mm256_xor_si256(c2[3953],_mm256_xor_si256(c2[4093],_mm256_xor_si256(c2[1726],_mm256_xor_si256(c2[2705],_mm256_xor_si256(c2[742],_mm256_xor_si256(c2[1458],_mm256_xor_si256(c2[3140],_mm256_xor_si256(c2[3702],_mm256_xor_si256(c2[3842],_mm256_xor_si256(c2[2733],_mm256_xor_si256(c2[2592],_mm256_xor_si256(c2[2732],_mm256_xor_si256(c2[3165],_mm256_xor_si256(c2[1486],_mm256_xor_si256(c2[2606],_mm256_xor_si256(c2[2746],_mm256_xor_si256(c2[4442],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[2918],_mm256_xor_si256(c2[2633],_mm256_xor_si256(c2[2635],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[4471],_mm256_xor_si256(c2[3212],_mm256_xor_si256(c2[3908],c2[4048])))))))))))))))))))))))))))))))))));
+     d2[217]=simde_mm256_xor_si256(c2[4340],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[2103],simde_mm256_xor_si256(c2[716],simde_mm256_xor_si256(c2[3656],simde_mm256_xor_si256(c2[2118],simde_mm256_xor_si256(c2[2258],simde_mm256_xor_si256(c2[2678],simde_mm256_xor_si256(c2[1994],simde_mm256_xor_si256(c2[3953],simde_mm256_xor_si256(c2[4093],simde_mm256_xor_si256(c2[1726],simde_mm256_xor_si256(c2[2705],simde_mm256_xor_si256(c2[742],simde_mm256_xor_si256(c2[1458],simde_mm256_xor_si256(c2[3140],simde_mm256_xor_si256(c2[3702],simde_mm256_xor_si256(c2[3842],simde_mm256_xor_si256(c2[2733],simde_mm256_xor_si256(c2[2592],simde_mm256_xor_si256(c2[2732],simde_mm256_xor_si256(c2[3165],simde_mm256_xor_si256(c2[1486],simde_mm256_xor_si256(c2[2606],simde_mm256_xor_si256(c2[2746],simde_mm256_xor_si256(c2[4442],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[2918],simde_mm256_xor_si256(c2[2633],simde_mm256_xor_si256(c2[2635],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[4471],simde_mm256_xor_si256(c2[3212],simde_mm256_xor_si256(c2[3908],c2[4048])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[224]=_mm256_xor_si256(c2[3085],_mm256_xor_si256(c2[3225],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[565],_mm256_xor_si256(c2[981],_mm256_xor_si256(c2[1683],_mm256_xor_si256(c2[3940],_mm256_xor_si256(c2[4080],_mm256_xor_si256(c2[2534],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[872],_mm256_xor_si256(c2[2971],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[1583],_mm256_xor_si256(c2[4106],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[2018],_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[1471],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1610],_mm256_xor_si256(c2[3010],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[1624],_mm256_xor_si256(c2[3180],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[4164],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[1796],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[1653],_mm256_xor_si256(c2[3209],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[1950],_mm256_xor_si256(c2[2090],c2[2926]))))))))))))))))))))))))))))))))))))))))));
+     d2[224]=simde_mm256_xor_si256(c2[3085],simde_mm256_xor_si256(c2[3225],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[565],simde_mm256_xor_si256(c2[981],simde_mm256_xor_si256(c2[1683],simde_mm256_xor_si256(c2[3940],simde_mm256_xor_si256(c2[4080],simde_mm256_xor_si256(c2[2534],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[872],simde_mm256_xor_si256(c2[2971],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[1583],simde_mm256_xor_si256(c2[4106],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[2018],simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[1471],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1610],simde_mm256_xor_si256(c2[3010],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[1624],simde_mm256_xor_si256(c2[3180],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[4164],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[1796],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[1653],simde_mm256_xor_si256(c2[3209],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[1950],simde_mm256_xor_si256(c2[2090],c2[2926]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[231]=_mm256_xor_si256(c2[2945],_mm256_xor_si256(c2[285],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[2254],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[2691],_mm256_xor_si256(c2[1010],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[1303],_mm256_xor_si256(c2[3826],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[2440],_mm256_xor_si256(c2[1331],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[3884],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[1238],_mm256_xor_si256(c2[1373],_mm256_xor_si256(c2[3069],_mm256_xor_si256(c2[1810],c2[2646]))))))))))))))))))))))))))));
+     d2[231]=simde_mm256_xor_si256(c2[2945],simde_mm256_xor_si256(c2[285],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[2254],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[2691],simde_mm256_xor_si256(c2[1010],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[1303],simde_mm256_xor_si256(c2[3826],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[2440],simde_mm256_xor_si256(c2[1331],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[3884],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[1238],simde_mm256_xor_si256(c2[1373],simde_mm256_xor_si256(c2[3069],simde_mm256_xor_si256(c2[1810],c2[2646]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[238]=_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[4200],_mm256_xor_si256(c2[4340],_mm256_xor_si256(c2[3783],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[4206],_mm256_xor_si256(c2[4060],_mm256_xor_si256(c2[3236],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[2819],_mm256_xor_si256(c2[1837],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[439],_mm256_xor_si256(c2[4214],_mm256_xor_si256(c2[4354],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[4090],_mm256_xor_si256(c2[2274],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[1710],_mm256_xor_si256(c2[4246],_mm256_xor_si256(c2[4386],_mm256_xor_si256(c2[3822],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[886],_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[3402],_mm256_xor_si256(c2[2845],_mm256_xor_si256(c2[3978],_mm256_xor_si256(c2[4118],_mm256_xor_si256(c2[3561],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[2016],_mm256_xor_si256(c2[1319],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[1206],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[789],_mm256_xor_si256(c2[4006],_mm256_xor_si256(c2[4146],_mm256_xor_si256(c2[3589],_mm256_xor_si256(c2[927],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[370],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[2623],_mm256_xor_si256(c2[2059],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[2763],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[952],_mm256_xor_si256(c2[1092],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[814],_mm256_xor_si256(c2[257],_mm256_xor_si256(c2[956],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[2512],_mm256_xor_si256(c2[2652],_mm256_xor_si256(c2[2088],_mm256_xor_si256(c2[1246],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[829],_mm256_xor_si256(c2[2229],_mm256_xor_si256(c2[1532],c2[1672]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[238]=simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[4200],simde_mm256_xor_si256(c2[4340],simde_mm256_xor_si256(c2[3783],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[4206],simde_mm256_xor_si256(c2[4060],simde_mm256_xor_si256(c2[3236],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[2819],simde_mm256_xor_si256(c2[1837],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[439],simde_mm256_xor_si256(c2[4214],simde_mm256_xor_si256(c2[4354],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[4090],simde_mm256_xor_si256(c2[2274],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[1710],simde_mm256_xor_si256(c2[4246],simde_mm256_xor_si256(c2[4386],simde_mm256_xor_si256(c2[3822],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[886],simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[3402],simde_mm256_xor_si256(c2[2845],simde_mm256_xor_si256(c2[3978],simde_mm256_xor_si256(c2[4118],simde_mm256_xor_si256(c2[3561],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[2016],simde_mm256_xor_si256(c2[1319],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[1206],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[789],simde_mm256_xor_si256(c2[4006],simde_mm256_xor_si256(c2[4146],simde_mm256_xor_si256(c2[3589],simde_mm256_xor_si256(c2[927],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[370],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[2623],simde_mm256_xor_si256(c2[2059],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[2763],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[952],simde_mm256_xor_si256(c2[1092],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[814],simde_mm256_xor_si256(c2[257],simde_mm256_xor_si256(c2[956],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[2512],simde_mm256_xor_si256(c2[2652],simde_mm256_xor_si256(c2[2088],simde_mm256_xor_si256(c2[1246],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[829],simde_mm256_xor_si256(c2[2229],simde_mm256_xor_si256(c2[1532],c2[1672]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[245]=_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[1823],_mm256_xor_si256(c2[2246],_mm256_xor_si256(c2[719],_mm256_xor_si256(c2[859],_mm256_xor_si256(c2[3799],_mm256_xor_si256(c2[2394],_mm256_xor_si256(c2[3796],_mm256_xor_si256(c2[1990],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[4229],_mm256_xor_si256(c2[1722],_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[3276],_mm256_xor_si256(c2[3978],_mm256_xor_si256(c2[2876],_mm256_xor_si256(c2[2875],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[3168],_mm256_xor_si256(c2[3308],_mm256_xor_si256(c2[1629],_mm256_xor_si256(c2[2889],_mm256_xor_si256(c2[99],_mm256_xor_si256(c2[943],_mm256_xor_si256(c2[3054],_mm256_xor_si256(c2[2776],_mm256_xor_si256(c2[2918],_mm256_xor_si256(c2[4467],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[3348],c2[4191]))))))))))))))))))))))))))))))))));
+     d2[245]=simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[1823],simde_mm256_xor_si256(c2[2246],simde_mm256_xor_si256(c2[719],simde_mm256_xor_si256(c2[859],simde_mm256_xor_si256(c2[3799],simde_mm256_xor_si256(c2[2394],simde_mm256_xor_si256(c2[3796],simde_mm256_xor_si256(c2[1990],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[4229],simde_mm256_xor_si256(c2[1722],simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[3276],simde_mm256_xor_si256(c2[3978],simde_mm256_xor_si256(c2[2876],simde_mm256_xor_si256(c2[2875],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[3168],simde_mm256_xor_si256(c2[3308],simde_mm256_xor_si256(c2[1629],simde_mm256_xor_si256(c2[2889],simde_mm256_xor_si256(c2[99],simde_mm256_xor_si256(c2[943],simde_mm256_xor_si256(c2[3054],simde_mm256_xor_si256(c2[2776],simde_mm256_xor_si256(c2[2918],simde_mm256_xor_si256(c2[4467],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[3348],c2[4191]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[252]=_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[2133],c2[2760]));
+     d2[252]=simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[2133],c2[2760]));
 
 //row: 37
-     d2[259]=_mm256_xor_si256(c2[2243],_mm256_xor_si256(c2[2942],_mm256_xor_si256(c2[4062],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[3098],_mm256_xor_si256(c2[3797],_mm256_xor_si256(c2[1559],_mm256_xor_si256(c2[2258],_mm256_xor_si256(c2[154],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[860],_mm256_xor_si256(c2[4369],_mm256_xor_si256(c2[589],_mm256_xor_si256(c2[1989],_mm256_xor_si256(c2[2548],_mm256_xor_si256(c2[2688],_mm256_xor_si256(c2[4108],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[1307],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[3823],_mm256_xor_si256(c2[3840],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[1036],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[2297],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[636],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[635],_mm256_xor_si256(c2[1194],_mm256_xor_si256(c2[1334],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[1767],_mm256_xor_si256(c2[3868],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[649],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[2338],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[3182],_mm256_xor_si256(c2[3741],_mm256_xor_si256(c2[3881],_mm256_xor_si256(c2[814],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[678],_mm256_xor_si256(c2[1237],_mm256_xor_si256(c2[1377],_mm256_xor_si256(c2[2367],_mm256_xor_si256(c2[3066],_mm256_xor_si256(c2[1108],_mm256_xor_si256(c2[1807],_mm256_xor_si256(c2[1951],_mm256_xor_si256(c2[2510],c2[2650])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[259]=simde_mm256_xor_si256(c2[2243],simde_mm256_xor_si256(c2[2942],simde_mm256_xor_si256(c2[4062],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[3098],simde_mm256_xor_si256(c2[3797],simde_mm256_xor_si256(c2[1559],simde_mm256_xor_si256(c2[2258],simde_mm256_xor_si256(c2[154],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[860],simde_mm256_xor_si256(c2[4369],simde_mm256_xor_si256(c2[589],simde_mm256_xor_si256(c2[1989],simde_mm256_xor_si256(c2[2548],simde_mm256_xor_si256(c2[2688],simde_mm256_xor_si256(c2[4108],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[1307],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[3823],simde_mm256_xor_si256(c2[3840],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[1036],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[2297],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[636],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[635],simde_mm256_xor_si256(c2[1194],simde_mm256_xor_si256(c2[1334],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[1767],simde_mm256_xor_si256(c2[3868],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[649],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[2338],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[3182],simde_mm256_xor_si256(c2[3741],simde_mm256_xor_si256(c2[3881],simde_mm256_xor_si256(c2[814],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[678],simde_mm256_xor_si256(c2[1237],simde_mm256_xor_si256(c2[1377],simde_mm256_xor_si256(c2[2367],simde_mm256_xor_si256(c2[3066],simde_mm256_xor_si256(c2[1108],simde_mm256_xor_si256(c2[1807],simde_mm256_xor_si256(c2[1951],simde_mm256_xor_si256(c2[2510],c2[2650])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[266]=_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[3223],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[986],_mm256_xor_si256(c2[3938],_mm256_xor_si256(c2[4078],_mm256_xor_si256(c2[2539],_mm256_xor_si256(c2[1134],_mm256_xor_si256(c2[715],_mm256_xor_si256(c2[730],_mm256_xor_si256(c2[870],_mm256_xor_si256(c2[2969],_mm256_xor_si256(c2[462],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[1588],_mm256_xor_si256(c2[4104],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[2016],_mm256_xor_si256(c2[2718],_mm256_xor_si256(c2[1616],_mm256_xor_si256(c2[1615],_mm256_xor_si256(c2[213],_mm256_xor_si256(c2[1908],_mm256_xor_si256(c2[2048],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[1629],_mm256_xor_si256(c2[3318],_mm256_xor_si256(c2[4162],_mm256_xor_si256(c2[1794],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[1658],_mm256_xor_si256(c2[3207],_mm256_xor_si256(c2[3347],_mm256_xor_si256(c2[2088],c2[2931]))))))))))))))))))))))))))))))))));
+     d2[266]=simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[3223],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[986],simde_mm256_xor_si256(c2[3938],simde_mm256_xor_si256(c2[4078],simde_mm256_xor_si256(c2[2539],simde_mm256_xor_si256(c2[1134],simde_mm256_xor_si256(c2[715],simde_mm256_xor_si256(c2[730],simde_mm256_xor_si256(c2[870],simde_mm256_xor_si256(c2[2969],simde_mm256_xor_si256(c2[462],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[1588],simde_mm256_xor_si256(c2[4104],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[2016],simde_mm256_xor_si256(c2[2718],simde_mm256_xor_si256(c2[1616],simde_mm256_xor_si256(c2[1615],simde_mm256_xor_si256(c2[213],simde_mm256_xor_si256(c2[1908],simde_mm256_xor_si256(c2[2048],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[1629],simde_mm256_xor_si256(c2[3318],simde_mm256_xor_si256(c2[4162],simde_mm256_xor_si256(c2[1794],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[1658],simde_mm256_xor_si256(c2[3207],simde_mm256_xor_si256(c2[3347],simde_mm256_xor_si256(c2[2088],c2[2931]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[273]=_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[2106],_mm256_xor_si256(c2[2246],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[1403],_mm256_xor_si256(c2[1135],_mm256_xor_si256(c2[1275],_mm256_xor_si256(c2[4215],_mm256_xor_si256(c2[2817],_mm256_xor_si256(c2[2413],_mm256_xor_si256(c2[2553],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[2145],_mm256_xor_si256(c2[2285],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[3264],_mm256_xor_si256(c2[1308],_mm256_xor_si256(c2[1877],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[4401],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[3292],_mm256_xor_si256(c2[3291],_mm256_xor_si256(c2[3584],_mm256_xor_si256(c2[3724],_mm256_xor_si256(c2[1905],_mm256_xor_si256(c2[2045],_mm256_xor_si256(c2[3305],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[522],_mm256_xor_si256(c2[1359],_mm256_xor_si256(c2[3043],_mm256_xor_si256(c2[3337],_mm256_xor_si256(c2[3477],_mm256_xor_si256(c2[3192],_mm256_xor_si256(c2[3334],_mm256_xor_si256(c2[411],_mm256_xor_si256(c2[551],_mm256_xor_si256(c2[3631],_mm256_xor_si256(c2[3771],c2[128]))))))))))))))))))))))))))))))))))))))))));
+     d2[273]=simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[2106],simde_mm256_xor_si256(c2[2246],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[1403],simde_mm256_xor_si256(c2[1135],simde_mm256_xor_si256(c2[1275],simde_mm256_xor_si256(c2[4215],simde_mm256_xor_si256(c2[2817],simde_mm256_xor_si256(c2[2413],simde_mm256_xor_si256(c2[2553],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[2145],simde_mm256_xor_si256(c2[2285],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[3264],simde_mm256_xor_si256(c2[1308],simde_mm256_xor_si256(c2[1877],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[4401],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[3292],simde_mm256_xor_si256(c2[3291],simde_mm256_xor_si256(c2[3584],simde_mm256_xor_si256(c2[3724],simde_mm256_xor_si256(c2[1905],simde_mm256_xor_si256(c2[2045],simde_mm256_xor_si256(c2[3305],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[522],simde_mm256_xor_si256(c2[1359],simde_mm256_xor_si256(c2[3043],simde_mm256_xor_si256(c2[3337],simde_mm256_xor_si256(c2[3477],simde_mm256_xor_si256(c2[3192],simde_mm256_xor_si256(c2[3334],simde_mm256_xor_si256(c2[411],simde_mm256_xor_si256(c2[551],simde_mm256_xor_si256(c2[3631],simde_mm256_xor_si256(c2[3771],c2[128]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[280]=_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[1823],_mm256_xor_si256(c2[3362],_mm256_xor_si256(c2[3642],_mm256_xor_si256(c2[3785],_mm256_xor_si256(c2[4065],_mm256_xor_si256(c2[2398],_mm256_xor_si256(c2[2678],_mm256_xor_si256(c2[859],_mm256_xor_si256(c2[1139],_mm256_xor_si256(c2[3940],_mm256_xor_si256(c2[4080],_mm256_xor_si256(c2[4220],_mm256_xor_si256(c2[3669],_mm256_xor_si256(c2[3949],_mm256_xor_si256(c2[1289],_mm256_xor_si256(c2[1429],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[311],_mm256_xor_si256(c2[3408],_mm256_xor_si256(c2[3688],_mm256_xor_si256(c2[4387],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[2424],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[3140],_mm256_xor_si256(c2[3420],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[1038],_mm256_xor_si256(c2[1178],_mm256_xor_si256(c2[1318],_mm256_xor_si256(c2[4415],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[4414],_mm256_xor_si256(c2[75],_mm256_xor_si256(c2[215],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[3168],_mm256_xor_si256(c2[3448],_mm256_xor_si256(c2[4428],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[1918],_mm256_xor_si256(c2[2482],_mm256_xor_si256(c2[2622],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[394],_mm256_xor_si256(c2[4315],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[4457],_mm256_xor_si256(c2[118],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[1667],_mm256_xor_si256(c2[1947],_mm256_xor_si256(c2[408],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[1251],_mm256_xor_si256(c2[1391],c2[1531]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[280]=simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[1823],simde_mm256_xor_si256(c2[3362],simde_mm256_xor_si256(c2[3642],simde_mm256_xor_si256(c2[3785],simde_mm256_xor_si256(c2[4065],simde_mm256_xor_si256(c2[2398],simde_mm256_xor_si256(c2[2678],simde_mm256_xor_si256(c2[859],simde_mm256_xor_si256(c2[1139],simde_mm256_xor_si256(c2[3940],simde_mm256_xor_si256(c2[4080],simde_mm256_xor_si256(c2[4220],simde_mm256_xor_si256(c2[3669],simde_mm256_xor_si256(c2[3949],simde_mm256_xor_si256(c2[1289],simde_mm256_xor_si256(c2[1429],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[311],simde_mm256_xor_si256(c2[3408],simde_mm256_xor_si256(c2[3688],simde_mm256_xor_si256(c2[4387],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[2424],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[3140],simde_mm256_xor_si256(c2[3420],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[1038],simde_mm256_xor_si256(c2[1178],simde_mm256_xor_si256(c2[1318],simde_mm256_xor_si256(c2[4415],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[4414],simde_mm256_xor_si256(c2[75],simde_mm256_xor_si256(c2[215],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[3168],simde_mm256_xor_si256(c2[3448],simde_mm256_xor_si256(c2[4428],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[1918],simde_mm256_xor_si256(c2[2482],simde_mm256_xor_si256(c2[2622],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[394],simde_mm256_xor_si256(c2[4315],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[4457],simde_mm256_xor_si256(c2[118],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[1667],simde_mm256_xor_si256(c2[1947],simde_mm256_xor_si256(c2[408],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[1251],simde_mm256_xor_si256(c2[1391],c2[1531]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[287]=_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[3644],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[4359],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[1555],_mm256_xor_si256(c2[3377],_mm256_xor_si256(c2[1151],_mm256_xor_si256(c2[1291],_mm256_xor_si256(c2[3390],_mm256_xor_si256(c2[883],_mm256_xor_si256(c2[1023],_mm256_xor_si256(c2[2002],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[3139],_mm256_xor_si256(c2[2030],_mm256_xor_si256(c2[2036],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[2329],_mm256_xor_si256(c2[2469],_mm256_xor_si256(c2[790],_mm256_xor_si256(c2[2050],_mm256_xor_si256(c2[3739],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[1937],_mm256_xor_si256(c2[2072],_mm256_xor_si256(c2[3628],_mm256_xor_si256(c2[3768],_mm256_xor_si256(c2[2509],c2[3352]))))))))))))))))))))))))))))))))));
+     d2[287]=simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[3644],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[4359],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[1555],simde_mm256_xor_si256(c2[3377],simde_mm256_xor_si256(c2[1151],simde_mm256_xor_si256(c2[1291],simde_mm256_xor_si256(c2[3390],simde_mm256_xor_si256(c2[883],simde_mm256_xor_si256(c2[1023],simde_mm256_xor_si256(c2[2002],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[3139],simde_mm256_xor_si256(c2[2030],simde_mm256_xor_si256(c2[2036],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[2329],simde_mm256_xor_si256(c2[2469],simde_mm256_xor_si256(c2[790],simde_mm256_xor_si256(c2[2050],simde_mm256_xor_si256(c2[3739],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[1937],simde_mm256_xor_si256(c2[2072],simde_mm256_xor_si256(c2[3628],simde_mm256_xor_si256(c2[3768],simde_mm256_xor_si256(c2[2509],c2[3352]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc256_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc256_byte.c
index 429a91ea16855e9574a42522f3139d3c6d853e94..cd86a0ca2265b197c76023f62df00687e951fd90 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc256_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc256_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc256_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[2739],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[1798],_mm256_xor_si256(c2[4033],_mm256_xor_si256(c2[4048],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[4551],_mm256_xor_si256(c2[3105],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[4563],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[263],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[4436],_mm256_xor_si256(c2[3473],_mm256_xor_si256(c2[5094],_mm256_xor_si256(c2[2535],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[2070],_mm256_xor_si256(c2[4471],c2[5107]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[2739],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[1798],simde_mm256_xor_si256(c2[4033],simde_mm256_xor_si256(c2[4048],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[4551],simde_mm256_xor_si256(c2[3105],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[4563],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[263],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[4436],simde_mm256_xor_si256(c2[3473],simde_mm256_xor_si256(c2[5094],simde_mm256_xor_si256(c2[2535],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[2070],simde_mm256_xor_si256(c2[4471],c2[5107]))))))))))))))))))))))))));
 
 //row: 1
-     d2[8]=_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[3379],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[2739],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[1958],_mm256_xor_si256(c2[1798],_mm256_xor_si256(c2[4033],_mm256_xor_si256(c2[4208],_mm256_xor_si256(c2[4048],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[4551],_mm256_xor_si256(c2[3105],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[4563],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[4741],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[263],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[4436],_mm256_xor_si256(c2[3473],_mm256_xor_si256(c2[5094],_mm256_xor_si256(c2[2535],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[2230],_mm256_xor_si256(c2[2070],_mm256_xor_si256(c2[4471],c2[5107]))))))))))))))))))))))))))))))));
+     d2[8]=simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[3379],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[2739],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[1958],simde_mm256_xor_si256(c2[1798],simde_mm256_xor_si256(c2[4033],simde_mm256_xor_si256(c2[4208],simde_mm256_xor_si256(c2[4048],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[4551],simde_mm256_xor_si256(c2[3105],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[4563],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[4741],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[263],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[4436],simde_mm256_xor_si256(c2[3473],simde_mm256_xor_si256(c2[5094],simde_mm256_xor_si256(c2[2535],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[2230],simde_mm256_xor_si256(c2[2070],simde_mm256_xor_si256(c2[4471],c2[5107]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[16]=_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[3379],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[2739],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[1958],_mm256_xor_si256(c2[1798],_mm256_xor_si256(c2[4033],_mm256_xor_si256(c2[4208],_mm256_xor_si256(c2[4048],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[4711],_mm256_xor_si256(c2[4551],_mm256_xor_si256(c2[3105],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[4723],_mm256_xor_si256(c2[4563],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[4741],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[423],_mm256_xor_si256(c2[263],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[4596],_mm256_xor_si256(c2[4436],_mm256_xor_si256(c2[3473],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[5094],_mm256_xor_si256(c2[2535],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[2230],_mm256_xor_si256(c2[2070],_mm256_xor_si256(c2[4631],_mm256_xor_si256(c2[4471],c2[5107]))))))))))))))))))))))))))))))))))))))));
+     d2[16]=simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[3379],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[2739],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[1958],simde_mm256_xor_si256(c2[1798],simde_mm256_xor_si256(c2[4033],simde_mm256_xor_si256(c2[4208],simde_mm256_xor_si256(c2[4048],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[4711],simde_mm256_xor_si256(c2[4551],simde_mm256_xor_si256(c2[3105],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[4723],simde_mm256_xor_si256(c2[4563],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[4741],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[423],simde_mm256_xor_si256(c2[263],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[4596],simde_mm256_xor_si256(c2[4436],simde_mm256_xor_si256(c2[3473],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[5094],simde_mm256_xor_si256(c2[2535],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[2230],simde_mm256_xor_si256(c2[2070],simde_mm256_xor_si256(c2[4631],simde_mm256_xor_si256(c2[4471],c2[5107]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[24]=_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[2739],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[1798],_mm256_xor_si256(c2[4193],_mm256_xor_si256(c2[4033],_mm256_xor_si256(c2[4048],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[1809],_mm256_xor_si256(c2[4551],_mm256_xor_si256(c2[3105],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[4563],_mm256_xor_si256(c2[1363],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[263],_mm256_xor_si256(c2[2822],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[4436],_mm256_xor_si256(c2[3633],_mm256_xor_si256(c2[3473],_mm256_xor_si256(c2[5094],_mm256_xor_si256(c2[2535],_mm256_xor_si256(c2[3008],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[2070],_mm256_xor_si256(c2[4471],_mm256_xor_si256(c2[148],c2[5107]))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[2739],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[1798],simde_mm256_xor_si256(c2[4193],simde_mm256_xor_si256(c2[4033],simde_mm256_xor_si256(c2[4048],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[1809],simde_mm256_xor_si256(c2[4551],simde_mm256_xor_si256(c2[3105],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[4563],simde_mm256_xor_si256(c2[1363],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[263],simde_mm256_xor_si256(c2[2822],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[4436],simde_mm256_xor_si256(c2[3633],simde_mm256_xor_si256(c2[3473],simde_mm256_xor_si256(c2[5094],simde_mm256_xor_si256(c2[2535],simde_mm256_xor_si256(c2[3008],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[2070],simde_mm256_xor_si256(c2[4471],simde_mm256_xor_si256(c2[148],c2[5107]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[32]=_mm256_xor_si256(c2[2562],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[2087],_mm256_xor_si256(c2[3684],_mm256_xor_si256(c2[3045],_mm256_xor_si256(c2[4501],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[3861],_mm256_xor_si256(c2[2258],_mm256_xor_si256(c2[3542],_mm256_xor_si256(c2[3072],_mm256_xor_si256(c2[2912],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[51],_mm256_xor_si256(c2[1975],_mm256_xor_si256(c2[2931],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[4227],_mm256_xor_si256(c2[711],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[2325],_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[1377],_mm256_xor_si256(c2[3776],_mm256_xor_si256(c2[439],_mm256_xor_si256(c2[4595],_mm256_xor_si256(c2[1089],_mm256_xor_si256(c2[3649],_mm256_xor_si256(c2[3970],_mm256_xor_si256(c2[3344],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[466],c2[1110]))))))))))))))))))))))))))))))))));
+     d2[32]=simde_mm256_xor_si256(c2[2562],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[2087],simde_mm256_xor_si256(c2[3684],simde_mm256_xor_si256(c2[3045],simde_mm256_xor_si256(c2[4501],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[3861],simde_mm256_xor_si256(c2[2258],simde_mm256_xor_si256(c2[3542],simde_mm256_xor_si256(c2[3072],simde_mm256_xor_si256(c2[2912],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[51],simde_mm256_xor_si256(c2[1975],simde_mm256_xor_si256(c2[2931],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[4227],simde_mm256_xor_si256(c2[711],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[2325],simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[1377],simde_mm256_xor_si256(c2[3776],simde_mm256_xor_si256(c2[439],simde_mm256_xor_si256(c2[4595],simde_mm256_xor_si256(c2[1089],simde_mm256_xor_si256(c2[3649],simde_mm256_xor_si256(c2[3970],simde_mm256_xor_si256(c2[3344],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[466],c2[1110]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[40]=_mm256_xor_si256(c2[2563],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[2080],_mm256_xor_si256(c2[3685],_mm256_xor_si256(c2[1127],_mm256_xor_si256(c2[4502],_mm256_xor_si256(c2[4342],_mm256_xor_si256(c2[3862],_mm256_xor_si256(c2[2259],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[3073],_mm256_xor_si256(c2[2913],_mm256_xor_si256(c2[37],_mm256_xor_si256(c2[212],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[1968],_mm256_xor_si256(c2[2932],_mm256_xor_si256(c2[547],_mm256_xor_si256(c2[4228],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[567],_mm256_xor_si256(c2[2326],_mm256_xor_si256(c2[406],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[1378],_mm256_xor_si256(c2[3777],_mm256_xor_si256(c2[432],_mm256_xor_si256(c2[4596],_mm256_xor_si256(c2[5076],_mm256_xor_si256(c2[1090],_mm256_xor_si256(c2[3650],_mm256_xor_si256(c2[3971],_mm256_xor_si256(c2[3345],_mm256_xor_si256(c2[3185],_mm256_xor_si256(c2[467],c2[1111]))))))))))))))))))))))))))))))))))));
+     d2[40]=simde_mm256_xor_si256(c2[2563],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[2080],simde_mm256_xor_si256(c2[3685],simde_mm256_xor_si256(c2[1127],simde_mm256_xor_si256(c2[4502],simde_mm256_xor_si256(c2[4342],simde_mm256_xor_si256(c2[3862],simde_mm256_xor_si256(c2[2259],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[3073],simde_mm256_xor_si256(c2[2913],simde_mm256_xor_si256(c2[37],simde_mm256_xor_si256(c2[212],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[1968],simde_mm256_xor_si256(c2[2932],simde_mm256_xor_si256(c2[547],simde_mm256_xor_si256(c2[4228],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[567],simde_mm256_xor_si256(c2[2326],simde_mm256_xor_si256(c2[406],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[1378],simde_mm256_xor_si256(c2[3777],simde_mm256_xor_si256(c2[432],simde_mm256_xor_si256(c2[4596],simde_mm256_xor_si256(c2[5076],simde_mm256_xor_si256(c2[1090],simde_mm256_xor_si256(c2[3650],simde_mm256_xor_si256(c2[3971],simde_mm256_xor_si256(c2[3345],simde_mm256_xor_si256(c2[3185],simde_mm256_xor_si256(c2[467],c2[1111]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[48]=_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[2247],_mm256_xor_si256(c2[4324],_mm256_xor_si256(c2[3056],_mm256_xor_si256(c2[2896],_mm256_xor_si256(c2[2416],_mm256_xor_si256(c2[821],_mm256_xor_si256(c2[1635],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[3718],_mm256_xor_si256(c2[3893],_mm256_xor_si256(c2[3733],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[1494],_mm256_xor_si256(c2[4228],_mm256_xor_si256(c2[2790],_mm256_xor_si256(c2[4385],_mm256_xor_si256(c2[4240],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[727],_mm256_xor_si256(c2[4418],_mm256_xor_si256(c2[4258],_mm256_xor_si256(c2[5059],_mm256_xor_si256(c2[2339],_mm256_xor_si256(c2[4113],_mm256_xor_si256(c2[3158],_mm256_xor_si256(c2[2193],_mm256_xor_si256(c2[4771],_mm256_xor_si256(c2[2212],_mm256_xor_si256(c2[2533],_mm256_xor_si256(c2[1907],_mm256_xor_si256(c2[1747],_mm256_xor_si256(c2[4148],_mm256_xor_si256(c2[4784],c2[4624]))))))))))))))))))))))))))))))))))));
+     d2[48]=simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[2247],simde_mm256_xor_si256(c2[4324],simde_mm256_xor_si256(c2[3056],simde_mm256_xor_si256(c2[2896],simde_mm256_xor_si256(c2[2416],simde_mm256_xor_si256(c2[821],simde_mm256_xor_si256(c2[1635],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[3718],simde_mm256_xor_si256(c2[3893],simde_mm256_xor_si256(c2[3733],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[1494],simde_mm256_xor_si256(c2[4228],simde_mm256_xor_si256(c2[2790],simde_mm256_xor_si256(c2[4385],simde_mm256_xor_si256(c2[4240],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[727],simde_mm256_xor_si256(c2[4418],simde_mm256_xor_si256(c2[4258],simde_mm256_xor_si256(c2[5059],simde_mm256_xor_si256(c2[2339],simde_mm256_xor_si256(c2[4113],simde_mm256_xor_si256(c2[3158],simde_mm256_xor_si256(c2[2193],simde_mm256_xor_si256(c2[4771],simde_mm256_xor_si256(c2[2212],simde_mm256_xor_si256(c2[2533],simde_mm256_xor_si256(c2[1907],simde_mm256_xor_si256(c2[1747],simde_mm256_xor_si256(c2[4148],simde_mm256_xor_si256(c2[4784],c2[4624]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[56]=_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[4483],_mm256_xor_si256(c2[1445],_mm256_xor_si256(c2[4160],_mm256_xor_si256(c2[3042],_mm256_xor_si256(c2[646],_mm256_xor_si256(c2[3859],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[1303],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[823],_mm256_xor_si256(c2[4499],_mm256_xor_si256(c2[1616],_mm256_xor_si256(c2[4339],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[2438],_mm256_xor_si256(c2[2278],_mm256_xor_si256(c2[4993],_mm256_xor_si256(c2[2277],_mm256_xor_si256(c2[4513],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[4528],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[4048],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[5012],_mm256_xor_si256(c2[5031],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[3585],_mm256_xor_si256(c2[1189],_mm256_xor_si256(c2[2944],_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[2647],_mm256_xor_si256(c2[4566],_mm256_xor_si256(c2[1683],_mm256_xor_si256(c2[4406],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[5061],_mm256_xor_si256(c2[2657],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[898],_mm256_xor_si256(c2[3142],_mm256_xor_si256(c2[738],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[2512],_mm256_xor_si256(c2[1717],_mm256_xor_si256(c2[3953],_mm256_xor_si256(c2[1557],_mm256_xor_si256(c2[2036],_mm256_xor_si256(c2[455],_mm256_xor_si256(c2[3170],_mm256_xor_si256(c2[3015],_mm256_xor_si256(c2[611],_mm256_xor_si256(c2[1092],_mm256_xor_si256(c2[3328],_mm256_xor_si256(c2[932],_mm256_xor_si256(c2[2710],_mm256_xor_si256(c2[2550],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[4951],_mm256_xor_si256(c2[2547],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[468],c2[3191]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[56]=simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[4483],simde_mm256_xor_si256(c2[1445],simde_mm256_xor_si256(c2[4160],simde_mm256_xor_si256(c2[3042],simde_mm256_xor_si256(c2[646],simde_mm256_xor_si256(c2[3859],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[1303],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[823],simde_mm256_xor_si256(c2[4499],simde_mm256_xor_si256(c2[1616],simde_mm256_xor_si256(c2[4339],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[2438],simde_mm256_xor_si256(c2[2278],simde_mm256_xor_si256(c2[4993],simde_mm256_xor_si256(c2[2277],simde_mm256_xor_si256(c2[4513],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[4528],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[4048],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[5012],simde_mm256_xor_si256(c2[5031],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[3585],simde_mm256_xor_si256(c2[1189],simde_mm256_xor_si256(c2[2944],simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[2647],simde_mm256_xor_si256(c2[4566],simde_mm256_xor_si256(c2[1683],simde_mm256_xor_si256(c2[4406],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[5061],simde_mm256_xor_si256(c2[2657],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[898],simde_mm256_xor_si256(c2[3142],simde_mm256_xor_si256(c2[738],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[2512],simde_mm256_xor_si256(c2[1717],simde_mm256_xor_si256(c2[3953],simde_mm256_xor_si256(c2[1557],simde_mm256_xor_si256(c2[2036],simde_mm256_xor_si256(c2[455],simde_mm256_xor_si256(c2[3170],simde_mm256_xor_si256(c2[3015],simde_mm256_xor_si256(c2[611],simde_mm256_xor_si256(c2[1092],simde_mm256_xor_si256(c2[3328],simde_mm256_xor_si256(c2[932],simde_mm256_xor_si256(c2[2710],simde_mm256_xor_si256(c2[2550],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[4951],simde_mm256_xor_si256(c2[2547],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[468],c2[3191]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[64]=_mm256_xor_si256(c2[2407],_mm256_xor_si256(c2[2247],_mm256_xor_si256(c2[2084],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[3521],_mm256_xor_si256(c2[2244],_mm256_xor_si256(c2[4338],_mm256_xor_si256(c2[4178],_mm256_xor_si256(c2[3698],_mm256_xor_si256(c2[2103],_mm256_xor_si256(c2[4818],_mm256_xor_si256(c2[2917],_mm256_xor_si256(c2[2757],_mm256_xor_si256(c2[4992],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[5015],_mm256_xor_si256(c2[1972],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[2768],_mm256_xor_si256(c2[551],_mm256_xor_si256(c2[391],_mm256_xor_si256(c2[4064],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[403],_mm256_xor_si256(c2[2162],_mm256_xor_si256(c2[581],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[1382],_mm256_xor_si256(c2[1222],_mm256_xor_si256(c2[3621],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[4432],_mm256_xor_si256(c2[1094],_mm256_xor_si256(c2[934],_mm256_xor_si256(c2[3494],_mm256_xor_si256(c2[3815],_mm256_xor_si256(c2[3189],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[471],_mm256_xor_si256(c2[311],c2[947]))))))))))))))))))))))))))))))))))))))))));
+     d2[64]=simde_mm256_xor_si256(c2[2407],simde_mm256_xor_si256(c2[2247],simde_mm256_xor_si256(c2[2084],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[3521],simde_mm256_xor_si256(c2[2244],simde_mm256_xor_si256(c2[4338],simde_mm256_xor_si256(c2[4178],simde_mm256_xor_si256(c2[3698],simde_mm256_xor_si256(c2[2103],simde_mm256_xor_si256(c2[4818],simde_mm256_xor_si256(c2[2917],simde_mm256_xor_si256(c2[2757],simde_mm256_xor_si256(c2[4992],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[5015],simde_mm256_xor_si256(c2[1972],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[2768],simde_mm256_xor_si256(c2[551],simde_mm256_xor_si256(c2[391],simde_mm256_xor_si256(c2[4064],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[403],simde_mm256_xor_si256(c2[2162],simde_mm256_xor_si256(c2[581],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[1382],simde_mm256_xor_si256(c2[1222],simde_mm256_xor_si256(c2[3621],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[4432],simde_mm256_xor_si256(c2[1094],simde_mm256_xor_si256(c2[934],simde_mm256_xor_si256(c2[3494],simde_mm256_xor_si256(c2[3815],simde_mm256_xor_si256(c2[3189],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[471],simde_mm256_xor_si256(c2[311],c2[947]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[72]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[802],_mm256_xor_si256(c2[4967],_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[4644],_mm256_xor_si256(c2[2084],_mm256_xor_si256(c2[1122],_mm256_xor_si256(c2[1939],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[1779],_mm256_xor_si256(c2[2261],_mm256_xor_si256(c2[1299],_mm256_xor_si256(c2[658],_mm256_xor_si256(c2[4823],_mm256_xor_si256(c2[1782],_mm256_xor_si256(c2[518],_mm256_xor_si256(c2[1312],_mm256_xor_si256(c2[358],_mm256_xor_si256(c2[3555],_mm256_xor_si256(c2[2593],_mm256_xor_si256(c2[2768],_mm256_xor_si256(c2[3570],_mm256_xor_si256(c2[2608],_mm256_xor_si256(c2[375],_mm256_xor_si256(c2[4532],_mm256_xor_si256(c2[1331],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[4065],_mm256_xor_si256(c2[3111],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[1665],_mm256_xor_si256(c2[4230],_mm256_xor_si256(c2[3268],_mm256_xor_si256(c2[4085],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[4882],_mm256_xor_si256(c2[3301],_mm256_xor_si256(c2[4103],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[4896],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[2176],_mm256_xor_si256(c2[1222],_mm256_xor_si256(c2[3958],_mm256_xor_si256(c2[2996],_mm256_xor_si256(c2[2995],_mm256_xor_si256(c2[2033],_mm256_xor_si256(c2[4608],_mm256_xor_si256(c2[3654],_mm256_xor_si256(c2[2049],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[2370],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[2214],_mm256_xor_si256(c2[790],_mm256_xor_si256(c2[1584],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[3031],_mm256_xor_si256(c2[4629],c2[3667])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[72]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[802],simde_mm256_xor_si256(c2[4967],simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[4644],simde_mm256_xor_si256(c2[2084],simde_mm256_xor_si256(c2[1122],simde_mm256_xor_si256(c2[1939],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[1779],simde_mm256_xor_si256(c2[2261],simde_mm256_xor_si256(c2[1299],simde_mm256_xor_si256(c2[658],simde_mm256_xor_si256(c2[4823],simde_mm256_xor_si256(c2[1782],simde_mm256_xor_si256(c2[518],simde_mm256_xor_si256(c2[1312],simde_mm256_xor_si256(c2[358],simde_mm256_xor_si256(c2[3555],simde_mm256_xor_si256(c2[2593],simde_mm256_xor_si256(c2[2768],simde_mm256_xor_si256(c2[3570],simde_mm256_xor_si256(c2[2608],simde_mm256_xor_si256(c2[375],simde_mm256_xor_si256(c2[4532],simde_mm256_xor_si256(c2[1331],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[4065],simde_mm256_xor_si256(c2[3111],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[1665],simde_mm256_xor_si256(c2[4230],simde_mm256_xor_si256(c2[3268],simde_mm256_xor_si256(c2[4085],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[4882],simde_mm256_xor_si256(c2[3301],simde_mm256_xor_si256(c2[4103],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[4896],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[2176],simde_mm256_xor_si256(c2[1222],simde_mm256_xor_si256(c2[3958],simde_mm256_xor_si256(c2[2996],simde_mm256_xor_si256(c2[2995],simde_mm256_xor_si256(c2[2033],simde_mm256_xor_si256(c2[4608],simde_mm256_xor_si256(c2[3654],simde_mm256_xor_si256(c2[2049],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[2370],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[2214],simde_mm256_xor_si256(c2[790],simde_mm256_xor_si256(c2[1584],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[3031],simde_mm256_xor_si256(c2[4629],c2[3667])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[80]=_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[4021],_mm256_xor_si256(c2[96],c2[3475])));
+     d2[80]=simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[4021],simde_mm256_xor_si256(c2[96],c2[3475])));
 
 //row: 11
-     d2[88]=_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[4806],_mm256_xor_si256(c2[1284],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[1941],_mm256_xor_si256(c2[1461],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[4977],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[2915],_mm256_xor_si256(c2[2755],_mm256_xor_si256(c2[2770],_mm256_xor_si256(c2[4694],_mm256_xor_si256(c2[531],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[1827],_mm256_xor_si256(c2[3590],_mm256_xor_si256(c2[3430],_mm256_xor_si256(c2[3285],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[5044],_mm256_xor_si256(c2[3303],_mm256_xor_si256(c2[4096],_mm256_xor_si256(c2[1536],_mm256_xor_si256(c2[1376],_mm256_xor_si256(c2[3158],_mm256_xor_si256(c2[2355],_mm256_xor_si256(c2[2195],_mm256_xor_si256(c2[2039],_mm256_xor_si256(c2[3808],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[1730],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[784],_mm256_xor_si256(c2[3185],_mm256_xor_si256(c2[3989],_mm256_xor_si256(c2[3829],c2[3030])))))))))))))))))))))))))))))))))))));
+     d2[88]=simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[4806],simde_mm256_xor_si256(c2[1284],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[1941],simde_mm256_xor_si256(c2[1461],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[4977],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[2915],simde_mm256_xor_si256(c2[2755],simde_mm256_xor_si256(c2[2770],simde_mm256_xor_si256(c2[4694],simde_mm256_xor_si256(c2[531],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[1827],simde_mm256_xor_si256(c2[3590],simde_mm256_xor_si256(c2[3430],simde_mm256_xor_si256(c2[3285],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[5044],simde_mm256_xor_si256(c2[3303],simde_mm256_xor_si256(c2[4096],simde_mm256_xor_si256(c2[1536],simde_mm256_xor_si256(c2[1376],simde_mm256_xor_si256(c2[3158],simde_mm256_xor_si256(c2[2355],simde_mm256_xor_si256(c2[2195],simde_mm256_xor_si256(c2[2039],simde_mm256_xor_si256(c2[3808],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[1730],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[784],simde_mm256_xor_si256(c2[3185],simde_mm256_xor_si256(c2[3989],simde_mm256_xor_si256(c2[3829],c2[3030])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[96]=_mm256_xor_si256(c2[3680],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[3205],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[500],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[4979],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[4977],_mm256_xor_si256(c2[4198],_mm256_xor_si256(c2[4038],_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[1329],_mm256_xor_si256(c2[1169],_mm256_xor_si256(c2[3093],_mm256_xor_si256(c2[4049],_mm256_xor_si256(c2[2451],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[1684],_mm256_xor_si256(c2[3443],_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[1702],_mm256_xor_si256(c2[2503],_mm256_xor_si256(c2[4902],_mm256_xor_si256(c2[1557],_mm256_xor_si256(c2[594],_mm256_xor_si256(c2[2215],_mm256_xor_si256(c2[4775],_mm256_xor_si256(c2[5088],_mm256_xor_si256(c2[4470],_mm256_xor_si256(c2[4310],_mm256_xor_si256(c2[1584],c2[2228]))))))))))))))))))))))))))))))))));
+     d2[96]=simde_mm256_xor_si256(c2[3680],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[3205],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[500],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[4979],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[4977],simde_mm256_xor_si256(c2[4198],simde_mm256_xor_si256(c2[4038],simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[1329],simde_mm256_xor_si256(c2[1169],simde_mm256_xor_si256(c2[3093],simde_mm256_xor_si256(c2[4049],simde_mm256_xor_si256(c2[2451],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[1684],simde_mm256_xor_si256(c2[3443],simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[1702],simde_mm256_xor_si256(c2[2503],simde_mm256_xor_si256(c2[4902],simde_mm256_xor_si256(c2[1557],simde_mm256_xor_si256(c2[594],simde_mm256_xor_si256(c2[2215],simde_mm256_xor_si256(c2[4775],simde_mm256_xor_si256(c2[5088],simde_mm256_xor_si256(c2[4470],simde_mm256_xor_si256(c2[4310],simde_mm256_xor_si256(c2[1584],c2[2228]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[104]=_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[644],_mm256_xor_si256(c2[2241],_mm256_xor_si256(c2[3042],_mm256_xor_si256(c2[2898],_mm256_xor_si256(c2[2418],_mm256_xor_si256(c2[983],_mm256_xor_si256(c2[823],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[1477],_mm256_xor_si256(c2[3872],_mm256_xor_si256(c2[3712],_mm256_xor_si256(c2[3735],_mm256_xor_si256(c2[532],_mm256_xor_si256(c2[1488],_mm256_xor_si256(c2[4230],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[4547],_mm256_xor_si256(c2[4387],_mm256_xor_si256(c2[4242],_mm256_xor_si256(c2[1042],_mm256_xor_si256(c2[882],_mm256_xor_si256(c2[4260],_mm256_xor_si256(c2[5061],_mm256_xor_si256(c2[2501],_mm256_xor_si256(c2[2341],_mm256_xor_si256(c2[4115],_mm256_xor_si256(c2[3312],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[4773],_mm256_xor_si256(c2[2214],_mm256_xor_si256(c2[2695],_mm256_xor_si256(c2[2535],_mm256_xor_si256(c2[1089],_mm256_xor_si256(c2[1749],_mm256_xor_si256(c2[4150],_mm256_xor_si256(c2[4946],c2[4786])))))))))))))))))))))))))))))))))))));
+     d2[104]=simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[644],simde_mm256_xor_si256(c2[2241],simde_mm256_xor_si256(c2[3042],simde_mm256_xor_si256(c2[2898],simde_mm256_xor_si256(c2[2418],simde_mm256_xor_si256(c2[983],simde_mm256_xor_si256(c2[823],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[1477],simde_mm256_xor_si256(c2[3872],simde_mm256_xor_si256(c2[3712],simde_mm256_xor_si256(c2[3735],simde_mm256_xor_si256(c2[532],simde_mm256_xor_si256(c2[1488],simde_mm256_xor_si256(c2[4230],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[4547],simde_mm256_xor_si256(c2[4387],simde_mm256_xor_si256(c2[4242],simde_mm256_xor_si256(c2[1042],simde_mm256_xor_si256(c2[882],simde_mm256_xor_si256(c2[4260],simde_mm256_xor_si256(c2[5061],simde_mm256_xor_si256(c2[2501],simde_mm256_xor_si256(c2[2341],simde_mm256_xor_si256(c2[4115],simde_mm256_xor_si256(c2[3312],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[4773],simde_mm256_xor_si256(c2[2214],simde_mm256_xor_si256(c2[2695],simde_mm256_xor_si256(c2[2535],simde_mm256_xor_si256(c2[1089],simde_mm256_xor_si256(c2[1749],simde_mm256_xor_si256(c2[4150],simde_mm256_xor_si256(c2[4946],c2[4786])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[112]=_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[2567],_mm256_xor_si256(c2[1445],_mm256_xor_si256(c2[2244],_mm256_xor_si256(c2[3042],_mm256_xor_si256(c2[3841],_mm256_xor_si256(c2[3859],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[4018],_mm256_xor_si256(c2[2583],_mm256_xor_si256(c2[1616],_mm256_xor_si256(c2[2423],_mm256_xor_si256(c2[3059],_mm256_xor_si256(c2[2438],_mm256_xor_si256(c2[2278],_mm256_xor_si256(c2[3077],_mm256_xor_si256(c2[353],_mm256_xor_si256(c2[4513],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[4528],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[2289],_mm256_xor_si256(c2[3088],_mm256_xor_si256(c2[5031],_mm256_xor_si256(c2[711],_mm256_xor_si256(c2[3585],_mm256_xor_si256(c2[4384],_mm256_xor_si256(c2[1028],_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[723],_mm256_xor_si256(c2[2642],_mm256_xor_si256(c2[1683],_mm256_xor_si256(c2[2482],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[5061],_mm256_xor_si256(c2[741],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[1542],_mm256_xor_si256(c2[4101],_mm256_xor_si256(c2[3142],_mm256_xor_si256(c2[3941],_mm256_xor_si256(c2[2820],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[596],_mm256_xor_si256(c2[4912],_mm256_xor_si256(c2[3953],_mm256_xor_si256(c2[4752],_mm256_xor_si256(c2[455],_mm256_xor_si256(c2[1254],_mm256_xor_si256(c2[3015],_mm256_xor_si256(c2[3814],_mm256_xor_si256(c2[4295],_mm256_xor_si256(c2[3328],_mm256_xor_si256(c2[4135],_mm256_xor_si256(c2[2710],_mm256_xor_si256(c2[2550],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[4951],_mm256_xor_si256(c2[631],_mm256_xor_si256(c2[1427],_mm256_xor_si256(c2[468],c2[1267])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[112]=simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[2567],simde_mm256_xor_si256(c2[1445],simde_mm256_xor_si256(c2[2244],simde_mm256_xor_si256(c2[3042],simde_mm256_xor_si256(c2[3841],simde_mm256_xor_si256(c2[3859],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[4018],simde_mm256_xor_si256(c2[2583],simde_mm256_xor_si256(c2[1616],simde_mm256_xor_si256(c2[2423],simde_mm256_xor_si256(c2[3059],simde_mm256_xor_si256(c2[2438],simde_mm256_xor_si256(c2[2278],simde_mm256_xor_si256(c2[3077],simde_mm256_xor_si256(c2[353],simde_mm256_xor_si256(c2[4513],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[4528],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[2289],simde_mm256_xor_si256(c2[3088],simde_mm256_xor_si256(c2[5031],simde_mm256_xor_si256(c2[711],simde_mm256_xor_si256(c2[3585],simde_mm256_xor_si256(c2[4384],simde_mm256_xor_si256(c2[1028],simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[723],simde_mm256_xor_si256(c2[2642],simde_mm256_xor_si256(c2[1683],simde_mm256_xor_si256(c2[2482],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[5061],simde_mm256_xor_si256(c2[741],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[1542],simde_mm256_xor_si256(c2[4101],simde_mm256_xor_si256(c2[3142],simde_mm256_xor_si256(c2[3941],simde_mm256_xor_si256(c2[2820],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[596],simde_mm256_xor_si256(c2[4912],simde_mm256_xor_si256(c2[3953],simde_mm256_xor_si256(c2[4752],simde_mm256_xor_si256(c2[455],simde_mm256_xor_si256(c2[1254],simde_mm256_xor_si256(c2[3015],simde_mm256_xor_si256(c2[3814],simde_mm256_xor_si256(c2[4295],simde_mm256_xor_si256(c2[3328],simde_mm256_xor_si256(c2[4135],simde_mm256_xor_si256(c2[2710],simde_mm256_xor_si256(c2[2550],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[4951],simde_mm256_xor_si256(c2[631],simde_mm256_xor_si256(c2[1427],simde_mm256_xor_si256(c2[468],c2[1267])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[120]=_mm256_xor_si256(c2[4806],_mm256_xor_si256(c2[3685],_mm256_xor_si256(c2[4646],_mm256_xor_si256(c2[3362],_mm256_xor_si256(c2[4323],_mm256_xor_si256(c2[4967],_mm256_xor_si256(c2[801],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[1618],_mm256_xor_si256(c2[497],_mm256_xor_si256(c2[1458],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[3541],_mm256_xor_si256(c2[4502],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[4195],_mm256_xor_si256(c2[37],_mm256_xor_si256(c2[1319],_mm256_xor_si256(c2[2272],_mm256_xor_si256(c2[2455],_mm256_xor_si256(c2[1334],_mm256_xor_si256(c2[2295],_mm256_xor_si256(c2[3250],_mm256_xor_si256(c2[4211],_mm256_xor_si256(c2[4214],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[2790],_mm256_xor_si256(c2[391],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[1986],_mm256_xor_si256(c2[2947],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[2802],_mm256_xor_si256(c2[3600],_mm256_xor_si256(c2[4561],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[1859],_mm256_xor_si256(c2[2820],_mm256_xor_si256(c2[2660],_mm256_xor_si256(c2[3621],_mm256_xor_si256(c2[5059],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[2675],_mm256_xor_si256(c2[759],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[2372],_mm256_xor_si256(c2[3333],_mm256_xor_si256(c2[4932],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[1095],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[4467],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[1749],_mm256_xor_si256(c2[2710],_mm256_xor_si256(c2[2385],c2[3346]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[120]=simde_mm256_xor_si256(c2[4806],simde_mm256_xor_si256(c2[3685],simde_mm256_xor_si256(c2[4646],simde_mm256_xor_si256(c2[3362],simde_mm256_xor_si256(c2[4323],simde_mm256_xor_si256(c2[4967],simde_mm256_xor_si256(c2[801],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[1618],simde_mm256_xor_si256(c2[497],simde_mm256_xor_si256(c2[1458],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[3541],simde_mm256_xor_si256(c2[4502],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[4195],simde_mm256_xor_si256(c2[37],simde_mm256_xor_si256(c2[1319],simde_mm256_xor_si256(c2[2272],simde_mm256_xor_si256(c2[2455],simde_mm256_xor_si256(c2[1334],simde_mm256_xor_si256(c2[2295],simde_mm256_xor_si256(c2[3250],simde_mm256_xor_si256(c2[4211],simde_mm256_xor_si256(c2[4214],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[2790],simde_mm256_xor_si256(c2[391],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[1986],simde_mm256_xor_si256(c2[2947],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[2802],simde_mm256_xor_si256(c2[3600],simde_mm256_xor_si256(c2[4561],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[1859],simde_mm256_xor_si256(c2[2820],simde_mm256_xor_si256(c2[2660],simde_mm256_xor_si256(c2[3621],simde_mm256_xor_si256(c2[5059],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[2675],simde_mm256_xor_si256(c2[759],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[2372],simde_mm256_xor_si256(c2[3333],simde_mm256_xor_si256(c2[4932],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[1095],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[4467],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[1749],simde_mm256_xor_si256(c2[2710],simde_mm256_xor_si256(c2[2385],c2[3346]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[128]=_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[4327],_mm256_xor_si256(c2[2560],_mm256_xor_si256(c2[4167],_mm256_xor_si256(c2[4004],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[3844],_mm256_xor_si256(c2[3842],_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[4659],_mm256_xor_si256(c2[1139],_mm256_xor_si256(c2[4499],_mm256_xor_si256(c2[979],_mm256_xor_si256(c2[4019],_mm256_xor_si256(c2[499],_mm256_xor_si256(c2[2416],_mm256_xor_si256(c2[4023],_mm256_xor_si256(c2[1782],_mm256_xor_si256(c2[3238],_mm256_xor_si256(c2[4837],_mm256_xor_si256(c2[3078],_mm256_xor_si256(c2[4677],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[1793],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[1968],_mm256_xor_si256(c2[209],_mm256_xor_si256(c2[1808],_mm256_xor_si256(c2[3892],_mm256_xor_si256(c2[2133],_mm256_xor_si256(c2[3732],_mm256_xor_si256(c2[3089],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[2311],_mm256_xor_si256(c2[4385],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[2468],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[2323],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[4082],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[2501],_mm256_xor_si256(c2[742],_mm256_xor_si256(c2[2341],_mm256_xor_si256(c2[3302],_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[3142],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[422],_mm256_xor_si256(c2[2356],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[2196],_mm256_xor_si256(c2[4753],_mm256_xor_si256(c2[1233],_mm256_xor_si256(c2[3014],_mm256_xor_si256(c2[1255],_mm256_xor_si256(c2[2854],_mm256_xor_si256(c2[3815],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[4128],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[3510],_mm256_xor_si256(c2[5109],_mm256_xor_si256(c2[3350],_mm256_xor_si256(c2[4949],_mm256_xor_si256(c2[2391],_mm256_xor_si256(c2[624],_mm256_xor_si256(c2[2231],_mm256_xor_si256(c2[1268],_mm256_xor_si256(c2[2867],c2[2388])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[128]=simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[4327],simde_mm256_xor_si256(c2[2560],simde_mm256_xor_si256(c2[4167],simde_mm256_xor_si256(c2[4004],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[3844],simde_mm256_xor_si256(c2[3842],simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[4659],simde_mm256_xor_si256(c2[1139],simde_mm256_xor_si256(c2[4499],simde_mm256_xor_si256(c2[979],simde_mm256_xor_si256(c2[4019],simde_mm256_xor_si256(c2[499],simde_mm256_xor_si256(c2[2416],simde_mm256_xor_si256(c2[4023],simde_mm256_xor_si256(c2[1782],simde_mm256_xor_si256(c2[3238],simde_mm256_xor_si256(c2[4837],simde_mm256_xor_si256(c2[3078],simde_mm256_xor_si256(c2[4677],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[1793],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[1968],simde_mm256_xor_si256(c2[209],simde_mm256_xor_si256(c2[1808],simde_mm256_xor_si256(c2[3892],simde_mm256_xor_si256(c2[2133],simde_mm256_xor_si256(c2[3732],simde_mm256_xor_si256(c2[3089],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[2311],simde_mm256_xor_si256(c2[4385],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[2468],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[2323],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[4082],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[2501],simde_mm256_xor_si256(c2[742],simde_mm256_xor_si256(c2[2341],simde_mm256_xor_si256(c2[3302],simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[3142],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[422],simde_mm256_xor_si256(c2[2356],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[2196],simde_mm256_xor_si256(c2[4753],simde_mm256_xor_si256(c2[1233],simde_mm256_xor_si256(c2[3014],simde_mm256_xor_si256(c2[1255],simde_mm256_xor_si256(c2[2854],simde_mm256_xor_si256(c2[3815],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[4128],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[3510],simde_mm256_xor_si256(c2[5109],simde_mm256_xor_si256(c2[3350],simde_mm256_xor_si256(c2[4949],simde_mm256_xor_si256(c2[2391],simde_mm256_xor_si256(c2[624],simde_mm256_xor_si256(c2[2231],simde_mm256_xor_si256(c2[1268],simde_mm256_xor_si256(c2[2867],c2[2388])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[136]=_mm256_xor_si256(c2[4323],_mm256_xor_si256(c2[1442],_mm256_xor_si256(c2[4163],_mm256_xor_si256(c2[1282],_mm256_xor_si256(c2[1127],_mm256_xor_si256(c2[3840],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[2564],_mm256_xor_si256(c2[1143],_mm256_xor_si256(c2[3381],_mm256_xor_si256(c2[983],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[503],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[4019],_mm256_xor_si256(c2[1138],_mm256_xor_si256(c2[4823],_mm256_xor_si256(c2[4833],_mm256_xor_si256(c2[1952],_mm256_xor_si256(c2[4673],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[1797],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[1972],_mm256_xor_si256(c2[4210],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[4050],_mm256_xor_si256(c2[1015],_mm256_xor_si256(c2[3728],_mm256_xor_si256(c2[855],_mm256_xor_si256(c2[4692],_mm256_xor_si256(c2[1811],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[2307],_mm256_xor_si256(c2[4545],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[3107],_mm256_xor_si256(c2[2464],_mm256_xor_si256(c2[4710],_mm256_xor_si256(c2[4725],_mm256_xor_si256(c2[2327],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[4086],_mm256_xor_si256(c2[1205],_mm256_xor_si256(c2[4563],_mm256_xor_si256(c2[2497],_mm256_xor_si256(c2[4743],_mm256_xor_si256(c2[2337],_mm256_xor_si256(c2[4583],_mm256_xor_si256(c2[417],_mm256_xor_si256(c2[3138],_mm256_xor_si256(c2[257],_mm256_xor_si256(c2[418],_mm256_xor_si256(c2[2656],_mm256_xor_si256(c2[4598],_mm256_xor_si256(c2[2192],_mm256_xor_si256(c2[4438],_mm256_xor_si256(c2[1237],_mm256_xor_si256(c2[3475],_mm256_xor_si256(c2[129],_mm256_xor_si256(c2[2850],_mm256_xor_si256(c2[5088],_mm256_xor_si256(c2[291],_mm256_xor_si256(c2[2529],_mm256_xor_si256(c2[612],_mm256_xor_si256(c2[2850],_mm256_xor_si256(c2[5105],_mm256_xor_si256(c2[2224],_mm256_xor_si256(c2[4945],_mm256_xor_si256(c2[2064],_mm256_xor_si256(c2[4625],_mm256_xor_si256(c2[2227],_mm256_xor_si256(c2[4465],_mm256_xor_si256(c2[2871],c2[5109])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[136]=simde_mm256_xor_si256(c2[4323],simde_mm256_xor_si256(c2[1442],simde_mm256_xor_si256(c2[4163],simde_mm256_xor_si256(c2[1282],simde_mm256_xor_si256(c2[1127],simde_mm256_xor_si256(c2[3840],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[2564],simde_mm256_xor_si256(c2[1143],simde_mm256_xor_si256(c2[3381],simde_mm256_xor_si256(c2[983],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[503],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[4019],simde_mm256_xor_si256(c2[1138],simde_mm256_xor_si256(c2[4823],simde_mm256_xor_si256(c2[4833],simde_mm256_xor_si256(c2[1952],simde_mm256_xor_si256(c2[4673],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[1797],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[1972],simde_mm256_xor_si256(c2[4210],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[4050],simde_mm256_xor_si256(c2[1015],simde_mm256_xor_si256(c2[3728],simde_mm256_xor_si256(c2[855],simde_mm256_xor_si256(c2[4692],simde_mm256_xor_si256(c2[1811],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[2307],simde_mm256_xor_si256(c2[4545],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[3107],simde_mm256_xor_si256(c2[2464],simde_mm256_xor_si256(c2[4710],simde_mm256_xor_si256(c2[4725],simde_mm256_xor_si256(c2[2327],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[4086],simde_mm256_xor_si256(c2[1205],simde_mm256_xor_si256(c2[4563],simde_mm256_xor_si256(c2[2497],simde_mm256_xor_si256(c2[4743],simde_mm256_xor_si256(c2[2337],simde_mm256_xor_si256(c2[4583],simde_mm256_xor_si256(c2[417],simde_mm256_xor_si256(c2[3138],simde_mm256_xor_si256(c2[257],simde_mm256_xor_si256(c2[418],simde_mm256_xor_si256(c2[2656],simde_mm256_xor_si256(c2[4598],simde_mm256_xor_si256(c2[2192],simde_mm256_xor_si256(c2[4438],simde_mm256_xor_si256(c2[1237],simde_mm256_xor_si256(c2[3475],simde_mm256_xor_si256(c2[129],simde_mm256_xor_si256(c2[2850],simde_mm256_xor_si256(c2[5088],simde_mm256_xor_si256(c2[291],simde_mm256_xor_si256(c2[2529],simde_mm256_xor_si256(c2[612],simde_mm256_xor_si256(c2[2850],simde_mm256_xor_si256(c2[5105],simde_mm256_xor_si256(c2[2224],simde_mm256_xor_si256(c2[4945],simde_mm256_xor_si256(c2[2064],simde_mm256_xor_si256(c2[4625],simde_mm256_xor_si256(c2[2227],simde_mm256_xor_si256(c2[4465],simde_mm256_xor_si256(c2[2871],c2[5109])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[144]=_mm256_xor_si256(c2[4486],_mm256_xor_si256(c2[422],c2[2993]));
+     d2[144]=simde_mm256_xor_si256(c2[4486],simde_mm256_xor_si256(c2[422],c2[2993]));
 
 //row: 19
-     d2[152]=_mm256_xor_si256(c2[166],_mm256_xor_si256(c2[4962],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[3682],_mm256_xor_si256(c2[2097],_mm256_xor_si256(c2[1617],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[3216],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[2919],_mm256_xor_si256(c2[2934],_mm256_xor_si256(c2[4850],_mm256_xor_si256(c2[695],_mm256_xor_si256(c2[3429],_mm256_xor_si256(c2[1991],_mm256_xor_si256(c2[3586],_mm256_xor_si256(c2[3441],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[3459],_mm256_xor_si256(c2[4260],_mm256_xor_si256(c2[1540],_mm256_xor_si256(c2[3314],_mm256_xor_si256(c2[2359],_mm256_xor_si256(c2[3972],_mm256_xor_si256(c2[1413],_mm256_xor_si256(c2[1734],_mm256_xor_si256(c2[948],_mm256_xor_si256(c2[3349],c2[3985]))))))))))))))))))))))))))));
+     d2[152]=simde_mm256_xor_si256(c2[166],simde_mm256_xor_si256(c2[4962],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[3682],simde_mm256_xor_si256(c2[2097],simde_mm256_xor_si256(c2[1617],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[3216],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[2919],simde_mm256_xor_si256(c2[2934],simde_mm256_xor_si256(c2[4850],simde_mm256_xor_si256(c2[695],simde_mm256_xor_si256(c2[3429],simde_mm256_xor_si256(c2[1991],simde_mm256_xor_si256(c2[3586],simde_mm256_xor_si256(c2[3441],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[3459],simde_mm256_xor_si256(c2[4260],simde_mm256_xor_si256(c2[1540],simde_mm256_xor_si256(c2[3314],simde_mm256_xor_si256(c2[2359],simde_mm256_xor_si256(c2[3972],simde_mm256_xor_si256(c2[1413],simde_mm256_xor_si256(c2[1734],simde_mm256_xor_si256(c2[948],simde_mm256_xor_si256(c2[3349],c2[3985]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[160]=_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[801],_mm256_xor_si256(c2[486],_mm256_xor_si256(c2[2083],_mm256_xor_si256(c2[2900],_mm256_xor_si256(c2[2740],_mm256_xor_si256(c2[2260],_mm256_xor_si256(c2[657],_mm256_xor_si256(c2[4176],_mm256_xor_si256(c2[1479],_mm256_xor_si256(c2[1319],_mm256_xor_si256(c2[3554],_mm256_xor_si256(c2[3729],_mm256_xor_si256(c2[3569],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[1330],_mm256_xor_si256(c2[4064],_mm256_xor_si256(c2[2626],_mm256_xor_si256(c2[4229],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[4084],_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[4262],_mm256_xor_si256(c2[4102],_mm256_xor_si256(c2[4903],_mm256_xor_si256(c2[2183],_mm256_xor_si256(c2[3957],_mm256_xor_si256(c2[2994],_mm256_xor_si256(c2[4615],_mm256_xor_si256(c2[2048],_mm256_xor_si256(c2[2369],_mm256_xor_si256(c2[1751],_mm256_xor_si256(c2[1591],_mm256_xor_si256(c2[3984],c2[4628]))))))))))))))))))))))))))))))))));
+     d2[160]=simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[801],simde_mm256_xor_si256(c2[486],simde_mm256_xor_si256(c2[2083],simde_mm256_xor_si256(c2[2900],simde_mm256_xor_si256(c2[2740],simde_mm256_xor_si256(c2[2260],simde_mm256_xor_si256(c2[657],simde_mm256_xor_si256(c2[4176],simde_mm256_xor_si256(c2[1479],simde_mm256_xor_si256(c2[1319],simde_mm256_xor_si256(c2[3554],simde_mm256_xor_si256(c2[3729],simde_mm256_xor_si256(c2[3569],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[1330],simde_mm256_xor_si256(c2[4064],simde_mm256_xor_si256(c2[2626],simde_mm256_xor_si256(c2[4229],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[4084],simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[4262],simde_mm256_xor_si256(c2[4102],simde_mm256_xor_si256(c2[4903],simde_mm256_xor_si256(c2[2183],simde_mm256_xor_si256(c2[3957],simde_mm256_xor_si256(c2[2994],simde_mm256_xor_si256(c2[4615],simde_mm256_xor_si256(c2[2048],simde_mm256_xor_si256(c2[2369],simde_mm256_xor_si256(c2[1751],simde_mm256_xor_si256(c2[1591],simde_mm256_xor_si256(c2[3984],c2[4628]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[168]=_mm256_xor_si256(c2[4166],_mm256_xor_si256(c2[3843],_mm256_xor_si256(c2[321],_mm256_xor_si256(c2[1922],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[498],_mm256_xor_si256(c2[4182],_mm256_xor_si256(c2[4022],_mm256_xor_si256(c2[4676],_mm256_xor_si256(c2[1952],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[1815],_mm256_xor_si256(c2[3731],_mm256_xor_si256(c2[4695],_mm256_xor_si256(c2[2310],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[2467],_mm256_xor_si256(c2[2322],_mm256_xor_si256(c2[4241],_mm256_xor_si256(c2[4081],_mm256_xor_si256(c2[2340],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[581],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[2195],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[2853],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[615],_mm256_xor_si256(c2[1729],_mm256_xor_si256(c2[4948],_mm256_xor_si256(c2[2230],_mm256_xor_si256(c2[3026],c2[2866]))))))))))))))))))))))))))))))))))));
+     d2[168]=simde_mm256_xor_si256(c2[4166],simde_mm256_xor_si256(c2[3843],simde_mm256_xor_si256(c2[321],simde_mm256_xor_si256(c2[1922],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[498],simde_mm256_xor_si256(c2[4182],simde_mm256_xor_si256(c2[4022],simde_mm256_xor_si256(c2[4676],simde_mm256_xor_si256(c2[1952],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[1815],simde_mm256_xor_si256(c2[3731],simde_mm256_xor_si256(c2[4695],simde_mm256_xor_si256(c2[2310],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[2467],simde_mm256_xor_si256(c2[2322],simde_mm256_xor_si256(c2[4241],simde_mm256_xor_si256(c2[4081],simde_mm256_xor_si256(c2[2340],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[581],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[2195],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[2853],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[615],simde_mm256_xor_si256(c2[1729],simde_mm256_xor_si256(c2[4948],simde_mm256_xor_si256(c2[2230],simde_mm256_xor_si256(c2[3026],c2[2866]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[176]=_mm256_xor_si256(c2[4822],c2[4993]);
+     d2[176]=simde_mm256_xor_si256(c2[4822],c2[4993]);
 
 //row: 23
-     d2[184]=_mm256_xor_si256(c2[3680],_mm256_xor_si256(c2[1815],c2[2327]));
+     d2[184]=simde_mm256_xor_si256(c2[3680],simde_mm256_xor_si256(c2[1815],c2[2327]));
 
 //row: 24
-     d2[192]=_mm256_xor_si256(c2[2257],_mm256_xor_si256(c2[1796],c2[1424]));
+     d2[192]=simde_mm256_xor_si256(c2[2257],simde_mm256_xor_si256(c2[1796],c2[1424]));
 
 //row: 25
-     d2[200]=_mm256_xor_si256(c2[647],c2[4564]);
+     d2[200]=simde_mm256_xor_si256(c2[647],c2[4564]);
 
 //row: 26
-     d2[208]=_mm256_xor_si256(c2[1445],_mm256_xor_si256(c2[1285],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[1122],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[2567],_mm256_xor_si256(c2[1606],_mm256_xor_si256(c2[3376],_mm256_xor_si256(c2[3216],_mm256_xor_si256(c2[2263],_mm256_xor_si256(c2[2736],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[1141],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[1955],_mm256_xor_si256(c2[1795],_mm256_xor_si256(c2[834],_mm256_xor_si256(c2[3237],_mm256_xor_si256(c2[4038],_mm256_xor_si256(c2[3077],_mm256_xor_si256(c2[4672],_mm256_xor_si256(c2[4213],_mm256_xor_si256(c2[4053],_mm256_xor_si256(c2[3092],_mm256_xor_si256(c2[1010],_mm256_xor_si256(c2[850],_mm256_xor_si256(c2[5008],_mm256_xor_si256(c2[1814],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[4708],_mm256_xor_si256(c2[4548],_mm256_xor_si256(c2[3587],_mm256_xor_si256(c2[3110],_mm256_xor_si256(c2[2149],_mm256_xor_si256(c2[3904],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[3744],_mm256_xor_si256(c2[4720],_mm256_xor_si256(c2[4560],_mm256_xor_si256(c2[3607],_mm256_xor_si256(c2[407],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[247],_mm256_xor_si256(c2[4738],_mm256_xor_si256(c2[4578],_mm256_xor_si256(c2[3617],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[4418],_mm256_xor_si256(c2[1858],_mm256_xor_si256(c2[2659],_mm256_xor_si256(c2[1698],_mm256_xor_si256(c2[4593],_mm256_xor_si256(c2[4433],_mm256_xor_si256(c2[3472],_mm256_xor_si256(c2[2677],_mm256_xor_si256(c2[3478],_mm256_xor_si256(c2[2517],_mm256_xor_si256(c2[2516],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[5091],_mm256_xor_si256(c2[4130],_mm256_xor_si256(c2[2532],_mm256_xor_si256(c2[1571],_mm256_xor_si256(c2[2052],_mm256_xor_si256(c2[2853],_mm256_xor_si256(c2[1892],_mm256_xor_si256(c2[2227],_mm256_xor_si256(c2[2067],_mm256_xor_si256(c2[1106],_mm256_xor_si256(c2[4628],_mm256_xor_si256(c2[4468],_mm256_xor_si256(c2[3507],_mm256_xor_si256(c2[4311],_mm256_xor_si256(c2[5104],c2[4151])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[208]=simde_mm256_xor_si256(c2[1445],simde_mm256_xor_si256(c2[1285],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[1122],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[2567],simde_mm256_xor_si256(c2[1606],simde_mm256_xor_si256(c2[3376],simde_mm256_xor_si256(c2[3216],simde_mm256_xor_si256(c2[2263],simde_mm256_xor_si256(c2[2736],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[1141],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[1955],simde_mm256_xor_si256(c2[1795],simde_mm256_xor_si256(c2[834],simde_mm256_xor_si256(c2[3237],simde_mm256_xor_si256(c2[4038],simde_mm256_xor_si256(c2[3077],simde_mm256_xor_si256(c2[4672],simde_mm256_xor_si256(c2[4213],simde_mm256_xor_si256(c2[4053],simde_mm256_xor_si256(c2[3092],simde_mm256_xor_si256(c2[1010],simde_mm256_xor_si256(c2[850],simde_mm256_xor_si256(c2[5008],simde_mm256_xor_si256(c2[1814],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[4708],simde_mm256_xor_si256(c2[4548],simde_mm256_xor_si256(c2[3587],simde_mm256_xor_si256(c2[3110],simde_mm256_xor_si256(c2[2149],simde_mm256_xor_si256(c2[3904],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[3744],simde_mm256_xor_si256(c2[4720],simde_mm256_xor_si256(c2[4560],simde_mm256_xor_si256(c2[3607],simde_mm256_xor_si256(c2[407],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[247],simde_mm256_xor_si256(c2[4738],simde_mm256_xor_si256(c2[4578],simde_mm256_xor_si256(c2[3617],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[4418],simde_mm256_xor_si256(c2[1858],simde_mm256_xor_si256(c2[2659],simde_mm256_xor_si256(c2[1698],simde_mm256_xor_si256(c2[4593],simde_mm256_xor_si256(c2[4433],simde_mm256_xor_si256(c2[3472],simde_mm256_xor_si256(c2[2677],simde_mm256_xor_si256(c2[3478],simde_mm256_xor_si256(c2[2517],simde_mm256_xor_si256(c2[2516],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[5091],simde_mm256_xor_si256(c2[4130],simde_mm256_xor_si256(c2[2532],simde_mm256_xor_si256(c2[1571],simde_mm256_xor_si256(c2[2052],simde_mm256_xor_si256(c2[2853],simde_mm256_xor_si256(c2[1892],simde_mm256_xor_si256(c2[2227],simde_mm256_xor_si256(c2[2067],simde_mm256_xor_si256(c2[1106],simde_mm256_xor_si256(c2[4628],simde_mm256_xor_si256(c2[4468],simde_mm256_xor_si256(c2[3507],simde_mm256_xor_si256(c2[4311],simde_mm256_xor_si256(c2[5104],c2[4151])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[216]=_mm256_xor_si256(c2[1280],c2[3780]);
+     d2[216]=simde_mm256_xor_si256(c2[1280],c2[3780]);
 
 //row: 28
-     d2[224]=_mm256_xor_si256(c2[339],_mm256_xor_si256(c2[835],c2[1204]));
+     d2[224]=simde_mm256_xor_si256(c2[339],simde_mm256_xor_si256(c2[835],c2[1204]));
 
 //row: 29
-     d2[232]=_mm256_xor_si256(c2[2880],c2[4544]);
+     d2[232]=simde_mm256_xor_si256(c2[2880],c2[4544]);
 
 //row: 30
-     d2[240]=_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[2647],_mm256_xor_si256(c2[1552],c2[3346])));
+     d2[240]=simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[2647],simde_mm256_xor_si256(c2[1552],c2[3346])));
 
 //row: 31
-     d2[248]=_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[2722],_mm256_xor_si256(c2[3379],_mm256_xor_si256(c2[2899],_mm256_xor_si256(c2[1456],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[1619],_mm256_xor_si256(c2[1958],_mm256_xor_si256(c2[4353],_mm256_xor_si256(c2[4193],_mm256_xor_si256(c2[4208],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[1969],_mm256_xor_si256(c2[4711],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[5028],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[4723],_mm256_xor_si256(c2[1523],_mm256_xor_si256(c2[1363],_mm256_xor_si256(c2[4741],_mm256_xor_si256(c2[423],_mm256_xor_si256(c2[2982],_mm256_xor_si256(c2[2822],_mm256_xor_si256(c2[4596],_mm256_xor_si256(c2[3793],_mm256_xor_si256(c2[3633],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[2695],_mm256_xor_si256(c2[3168],_mm256_xor_si256(c2[3008],_mm256_xor_si256(c2[2230],_mm256_xor_si256(c2[4631],_mm256_xor_si256(c2[308],c2[148])))))))))))))))))))))))))))))))))));
+     d2[248]=simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[2722],simde_mm256_xor_si256(c2[3379],simde_mm256_xor_si256(c2[2899],simde_mm256_xor_si256(c2[1456],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[1619],simde_mm256_xor_si256(c2[1958],simde_mm256_xor_si256(c2[4353],simde_mm256_xor_si256(c2[4193],simde_mm256_xor_si256(c2[4208],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[1969],simde_mm256_xor_si256(c2[4711],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[5028],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[4723],simde_mm256_xor_si256(c2[1523],simde_mm256_xor_si256(c2[1363],simde_mm256_xor_si256(c2[4741],simde_mm256_xor_si256(c2[423],simde_mm256_xor_si256(c2[2982],simde_mm256_xor_si256(c2[2822],simde_mm256_xor_si256(c2[4596],simde_mm256_xor_si256(c2[3793],simde_mm256_xor_si256(c2[3633],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[2695],simde_mm256_xor_si256(c2[3168],simde_mm256_xor_si256(c2[3008],simde_mm256_xor_si256(c2[2230],simde_mm256_xor_si256(c2[4631],simde_mm256_xor_si256(c2[308],c2[148])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[256]=_mm256_xor_si256(c2[2405],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[2082],_mm256_xor_si256(c2[1922],_mm256_xor_si256(c2[3527],_mm256_xor_si256(c2[2887],_mm256_xor_si256(c2[4336],_mm256_xor_si256(c2[4176],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[2101],_mm256_xor_si256(c2[2915],_mm256_xor_si256(c2[2755],_mm256_xor_si256(c2[4998],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[5013],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[1810],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[4070],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[561],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[2160],_mm256_xor_si256(c2[2001],_mm256_xor_si256(c2[579],_mm256_xor_si256(c2[419],_mm256_xor_si256(c2[1380],_mm256_xor_si256(c2[1220],_mm256_xor_si256(c2[3619],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[4438],_mm256_xor_si256(c2[1092],_mm256_xor_si256(c2[932],_mm256_xor_si256(c2[3492],_mm256_xor_si256(c2[3813],_mm256_xor_si256(c2[3187],_mm256_xor_si256(c2[3027],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[309],c2[945]))))))))))))))))))))))))))))))))))))))))));
+     d2[256]=simde_mm256_xor_si256(c2[2405],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[2082],simde_mm256_xor_si256(c2[1922],simde_mm256_xor_si256(c2[3527],simde_mm256_xor_si256(c2[2887],simde_mm256_xor_si256(c2[4336],simde_mm256_xor_si256(c2[4176],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[2101],simde_mm256_xor_si256(c2[2915],simde_mm256_xor_si256(c2[2755],simde_mm256_xor_si256(c2[4998],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[5013],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[1810],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[4070],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[561],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[2160],simde_mm256_xor_si256(c2[2001],simde_mm256_xor_si256(c2[579],simde_mm256_xor_si256(c2[419],simde_mm256_xor_si256(c2[1380],simde_mm256_xor_si256(c2[1220],simde_mm256_xor_si256(c2[3619],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[4438],simde_mm256_xor_si256(c2[1092],simde_mm256_xor_si256(c2[932],simde_mm256_xor_si256(c2[3492],simde_mm256_xor_si256(c2[3813],simde_mm256_xor_si256(c2[3187],simde_mm256_xor_si256(c2[3027],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[309],c2[945]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[264]=_mm256_xor_si256(c2[3047],_mm256_xor_si256(c2[2724],_mm256_xor_si256(c2[4321],_mm256_xor_si256(c2[4978],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[3557],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[2612],_mm256_xor_si256(c2[3568],_mm256_xor_si256(c2[1191],_mm256_xor_si256(c2[4864],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[1203],_mm256_xor_si256(c2[2962],_mm256_xor_si256(c2[1221],_mm256_xor_si256(c2[2022],_mm256_xor_si256(c2[4421],_mm256_xor_si256(c2[1076],_mm256_xor_si256(c2[113],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[1734],_mm256_xor_si256(c2[4294],_mm256_xor_si256(c2[4615],_mm256_xor_si256(c2[3829],_mm256_xor_si256(c2[1111],c2[1747]))))))))))))))))))))))))))));
+     d2[264]=simde_mm256_xor_si256(c2[3047],simde_mm256_xor_si256(c2[2724],simde_mm256_xor_si256(c2[4321],simde_mm256_xor_si256(c2[4978],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[3557],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[2612],simde_mm256_xor_si256(c2[3568],simde_mm256_xor_si256(c2[1191],simde_mm256_xor_si256(c2[4864],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[1203],simde_mm256_xor_si256(c2[2962],simde_mm256_xor_si256(c2[1221],simde_mm256_xor_si256(c2[2022],simde_mm256_xor_si256(c2[4421],simde_mm256_xor_si256(c2[1076],simde_mm256_xor_si256(c2[113],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[1734],simde_mm256_xor_si256(c2[4294],simde_mm256_xor_si256(c2[4615],simde_mm256_xor_si256(c2[3829],simde_mm256_xor_si256(c2[1111],c2[1747]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[272]=_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[4642],_mm256_xor_si256(c2[1921],_mm256_xor_si256(c2[4487],_mm256_xor_si256(c2[4327],_mm256_xor_si256(c2[1606],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[3203],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[1462],_mm256_xor_si256(c2[3860],_mm256_xor_si256(c2[982],_mm256_xor_si256(c2[3380],_mm256_xor_si256(c2[1937],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[1777],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[2439],_mm256_xor_si256(c2[4834],_mm256_xor_si256(c2[2276],_mm256_xor_si256(c2[4674],_mm256_xor_si256(c2[2451],_mm256_xor_si256(c2[2291],_mm256_xor_si256(c2[4689],_mm256_xor_si256(c2[4375],_mm256_xor_si256(c2[4215],_mm256_xor_si256(c2[1494],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[2450],_mm256_xor_si256(c2[2946],_mm256_xor_si256(c2[2786],_mm256_xor_si256(c2[65],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[3746],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[2951],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[2966],_mm256_xor_si256(c2[2806],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[1844],_mm256_xor_si256(c2[2976],_mm256_xor_si256(c2[2816],_mm256_xor_si256(c2[103],_mm256_xor_si256(c2[3777],_mm256_xor_si256(c2[3617],_mm256_xor_si256(c2[896],_mm256_xor_si256(c2[3463],_mm256_xor_si256(c2[897],_mm256_xor_si256(c2[3303],_mm256_xor_si256(c2[2839],_mm256_xor_si256(c2[2679],_mm256_xor_si256(c2[5077],_mm256_xor_si256(c2[4274],_mm256_xor_si256(c2[1716],_mm256_xor_si256(c2[4114],_mm256_xor_si256(c2[3489],_mm256_xor_si256(c2[3329],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[3168],_mm256_xor_si256(c2[3649],_mm256_xor_si256(c2[1091],_mm256_xor_si256(c2[3489],_mm256_xor_si256(c2[465],_mm256_xor_si256(c2[305],_mm256_xor_si256(c2[2711],_mm256_xor_si256(c2[2866],_mm256_xor_si256(c2[2706],_mm256_xor_si256(c2[5104],_mm256_xor_si256(c2[789],_mm256_xor_si256(c2[3350],c2[629]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[272]=simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[4642],simde_mm256_xor_si256(c2[1921],simde_mm256_xor_si256(c2[4487],simde_mm256_xor_si256(c2[4327],simde_mm256_xor_si256(c2[1606],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[3203],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[1462],simde_mm256_xor_si256(c2[3860],simde_mm256_xor_si256(c2[982],simde_mm256_xor_si256(c2[3380],simde_mm256_xor_si256(c2[1937],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[1777],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[2439],simde_mm256_xor_si256(c2[4834],simde_mm256_xor_si256(c2[2276],simde_mm256_xor_si256(c2[4674],simde_mm256_xor_si256(c2[2451],simde_mm256_xor_si256(c2[2291],simde_mm256_xor_si256(c2[4689],simde_mm256_xor_si256(c2[4375],simde_mm256_xor_si256(c2[4215],simde_mm256_xor_si256(c2[1494],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[2450],simde_mm256_xor_si256(c2[2946],simde_mm256_xor_si256(c2[2786],simde_mm256_xor_si256(c2[65],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[3746],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[2951],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[2966],simde_mm256_xor_si256(c2[2806],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[1844],simde_mm256_xor_si256(c2[2976],simde_mm256_xor_si256(c2[2816],simde_mm256_xor_si256(c2[103],simde_mm256_xor_si256(c2[3777],simde_mm256_xor_si256(c2[3617],simde_mm256_xor_si256(c2[896],simde_mm256_xor_si256(c2[3463],simde_mm256_xor_si256(c2[897],simde_mm256_xor_si256(c2[3303],simde_mm256_xor_si256(c2[2839],simde_mm256_xor_si256(c2[2679],simde_mm256_xor_si256(c2[5077],simde_mm256_xor_si256(c2[4274],simde_mm256_xor_si256(c2[1716],simde_mm256_xor_si256(c2[4114],simde_mm256_xor_si256(c2[3489],simde_mm256_xor_si256(c2[3329],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[3168],simde_mm256_xor_si256(c2[3649],simde_mm256_xor_si256(c2[1091],simde_mm256_xor_si256(c2[3489],simde_mm256_xor_si256(c2[465],simde_mm256_xor_si256(c2[305],simde_mm256_xor_si256(c2[2711],simde_mm256_xor_si256(c2[2866],simde_mm256_xor_si256(c2[2706],simde_mm256_xor_si256(c2[5104],simde_mm256_xor_si256(c2[789],simde_mm256_xor_si256(c2[3350],c2[629]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[280]=_mm256_xor_si256(c2[1282],_mm256_xor_si256(c2[1122],_mm256_xor_si256(c2[807],_mm256_xor_si256(c2[2404],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[3061],_mm256_xor_si256(c2[2581],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[4017],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[1632],_mm256_xor_si256(c2[3875],_mm256_xor_si256(c2[4050],_mm256_xor_si256(c2[3890],_mm256_xor_si256(c2[695],_mm256_xor_si256(c2[1651],_mm256_xor_si256(c2[4385],_mm256_xor_si256(c2[2947],_mm256_xor_si256(c2[4550],_mm256_xor_si256(c2[4405],_mm256_xor_si256(c2[1045],_mm256_xor_si256(c2[1361],_mm256_xor_si256(c2[4583],_mm256_xor_si256(c2[4423],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[2496],_mm256_xor_si256(c2[4278],_mm256_xor_si256(c2[3315],_mm256_xor_si256(c2[4928],_mm256_xor_si256(c2[2369],_mm256_xor_si256(c2[2690],_mm256_xor_si256(c2[2064],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[4305],c2[4949]))))))))))))))))))))))))))))))))));
+     d2[280]=simde_mm256_xor_si256(c2[1282],simde_mm256_xor_si256(c2[1122],simde_mm256_xor_si256(c2[807],simde_mm256_xor_si256(c2[2404],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[3061],simde_mm256_xor_si256(c2[2581],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[4017],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[1632],simde_mm256_xor_si256(c2[3875],simde_mm256_xor_si256(c2[4050],simde_mm256_xor_si256(c2[3890],simde_mm256_xor_si256(c2[695],simde_mm256_xor_si256(c2[1651],simde_mm256_xor_si256(c2[4385],simde_mm256_xor_si256(c2[2947],simde_mm256_xor_si256(c2[4550],simde_mm256_xor_si256(c2[4405],simde_mm256_xor_si256(c2[1045],simde_mm256_xor_si256(c2[1361],simde_mm256_xor_si256(c2[4583],simde_mm256_xor_si256(c2[4423],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[2496],simde_mm256_xor_si256(c2[4278],simde_mm256_xor_si256(c2[3315],simde_mm256_xor_si256(c2[4928],simde_mm256_xor_si256(c2[2369],simde_mm256_xor_si256(c2[2690],simde_mm256_xor_si256(c2[2064],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[4305],c2[4949]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[288]=_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[993],c2[4276]));
+     d2[288]=simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[993],c2[4276]));
 
 //row: 37
-     d2[296]=_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[4964],_mm256_xor_si256(c2[164],_mm256_xor_si256(c2[4641],_mm256_xor_si256(c2[1761],_mm256_xor_si256(c2[1127],_mm256_xor_si256(c2[2418],_mm256_xor_si256(c2[1776],_mm256_xor_si256(c2[1938],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[4980],_mm256_xor_si256(c2[343],_mm256_xor_si256(c2[4820],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[355],_mm256_xor_si256(c2[2758],_mm256_xor_si256(c2[3232],_mm256_xor_si256(c2[2598],_mm256_xor_si256(c2[3255],_mm256_xor_si256(c2[2613],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[4529],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[3750],_mm256_xor_si256(c2[3108],_mm256_xor_si256(c2[2304],_mm256_xor_si256(c2[1670],_mm256_xor_si256(c2[3425],_mm256_xor_si256(c2[3907],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[3762],_mm256_xor_si256(c2[3120],_mm256_xor_si256(c2[5047],_mm256_xor_si256(c2[402],_mm256_xor_si256(c2[4887],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[3138],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[1379],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[1219],_mm256_xor_si256(c2[3635],_mm256_xor_si256(c2[2993],_mm256_xor_si256(c2[2198],_mm256_xor_si256(c2[2672],_mm256_xor_si256(c2[2038],_mm256_xor_si256(c2[4293],_mm256_xor_si256(c2[3651],_mm256_xor_si256(c2[1734],_mm256_xor_si256(c2[1092],_mm256_xor_si256(c2[1573],_mm256_xor_si256(c2[2055],_mm256_xor_si256(c2[1413],_mm256_xor_si256(c2[1269],_mm256_xor_si256(c2[627],_mm256_xor_si256(c2[3670],_mm256_xor_si256(c2[3028],_mm256_xor_si256(c2[3824],_mm256_xor_si256(c2[4306],c2[3664])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[296]=simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[4964],simde_mm256_xor_si256(c2[164],simde_mm256_xor_si256(c2[4641],simde_mm256_xor_si256(c2[1761],simde_mm256_xor_si256(c2[1127],simde_mm256_xor_si256(c2[2418],simde_mm256_xor_si256(c2[1776],simde_mm256_xor_si256(c2[1938],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[4980],simde_mm256_xor_si256(c2[343],simde_mm256_xor_si256(c2[4820],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[355],simde_mm256_xor_si256(c2[2758],simde_mm256_xor_si256(c2[3232],simde_mm256_xor_si256(c2[2598],simde_mm256_xor_si256(c2[3255],simde_mm256_xor_si256(c2[2613],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[4529],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[3750],simde_mm256_xor_si256(c2[3108],simde_mm256_xor_si256(c2[2304],simde_mm256_xor_si256(c2[1670],simde_mm256_xor_si256(c2[3425],simde_mm256_xor_si256(c2[3907],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[3762],simde_mm256_xor_si256(c2[3120],simde_mm256_xor_si256(c2[5047],simde_mm256_xor_si256(c2[402],simde_mm256_xor_si256(c2[4887],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[3138],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[1379],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[1219],simde_mm256_xor_si256(c2[3635],simde_mm256_xor_si256(c2[2993],simde_mm256_xor_si256(c2[2198],simde_mm256_xor_si256(c2[2672],simde_mm256_xor_si256(c2[2038],simde_mm256_xor_si256(c2[4293],simde_mm256_xor_si256(c2[3651],simde_mm256_xor_si256(c2[1734],simde_mm256_xor_si256(c2[1092],simde_mm256_xor_si256(c2[1573],simde_mm256_xor_si256(c2[2055],simde_mm256_xor_si256(c2[1413],simde_mm256_xor_si256(c2[1269],simde_mm256_xor_si256(c2[627],simde_mm256_xor_si256(c2[3670],simde_mm256_xor_si256(c2[3028],simde_mm256_xor_si256(c2[3824],simde_mm256_xor_si256(c2[4306],c2[3664])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[304]=_mm256_xor_si256(c2[2401],_mm256_xor_si256(c2[2241],_mm256_xor_si256(c2[1926],_mm256_xor_si256(c2[3523],_mm256_xor_si256(c2[4340],_mm256_xor_si256(c2[4180],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[2097],_mm256_xor_si256(c2[4976],_mm256_xor_si256(c2[2919],_mm256_xor_si256(c2[2759],_mm256_xor_si256(c2[4994],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[5009],_mm256_xor_si256(c2[1814],_mm256_xor_si256(c2[2770],_mm256_xor_si256(c2[385],_mm256_xor_si256(c2[4066],_mm256_xor_si256(c2[550],_mm256_xor_si256(c2[405],_mm256_xor_si256(c2[2164],_mm256_xor_si256(c2[402],_mm256_xor_si256(c2[583],_mm256_xor_si256(c2[423],_mm256_xor_si256(c2[1216],_mm256_xor_si256(c2[3623],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[4434],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[3488],_mm256_xor_si256(c2[3809],_mm256_xor_si256(c2[3191],_mm256_xor_si256(c2[3031],_mm256_xor_si256(c2[305],c2[949]))))))))))))))))))))))))))))))))));
+     d2[304]=simde_mm256_xor_si256(c2[2401],simde_mm256_xor_si256(c2[2241],simde_mm256_xor_si256(c2[1926],simde_mm256_xor_si256(c2[3523],simde_mm256_xor_si256(c2[4340],simde_mm256_xor_si256(c2[4180],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[2097],simde_mm256_xor_si256(c2[4976],simde_mm256_xor_si256(c2[2919],simde_mm256_xor_si256(c2[2759],simde_mm256_xor_si256(c2[4994],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[5009],simde_mm256_xor_si256(c2[1814],simde_mm256_xor_si256(c2[2770],simde_mm256_xor_si256(c2[385],simde_mm256_xor_si256(c2[4066],simde_mm256_xor_si256(c2[550],simde_mm256_xor_si256(c2[405],simde_mm256_xor_si256(c2[2164],simde_mm256_xor_si256(c2[402],simde_mm256_xor_si256(c2[583],simde_mm256_xor_si256(c2[423],simde_mm256_xor_si256(c2[1216],simde_mm256_xor_si256(c2[3623],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[4434],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[3488],simde_mm256_xor_si256(c2[3809],simde_mm256_xor_si256(c2[3191],simde_mm256_xor_si256(c2[3031],simde_mm256_xor_si256(c2[305],c2[949]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[312]=_mm256_xor_si256(c2[1761],_mm256_xor_si256(c2[1601],_mm256_xor_si256(c2[1446],_mm256_xor_si256(c2[1286],_mm256_xor_si256(c2[2883],_mm256_xor_si256(c2[2407],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[3540],_mm256_xor_si256(c2[3060],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[2279],_mm256_xor_si256(c2[2119],_mm256_xor_si256(c2[4354],_mm256_xor_si256(c2[4529],_mm256_xor_si256(c2[4369],_mm256_xor_si256(c2[1334],_mm256_xor_si256(c2[1174],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[5024],_mm256_xor_si256(c2[4864],_mm256_xor_si256(c2[3426],_mm256_xor_si256(c2[5029],_mm256_xor_si256(c2[5044],_mm256_xor_si256(c2[4884],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[5062],_mm256_xor_si256(c2[4902],_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[2983],_mm256_xor_si256(c2[4917],_mm256_xor_si256(c2[4757],_mm256_xor_si256(c2[3794],_mm256_xor_si256(c2[2037],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[2848],_mm256_xor_si256(c2[3169],_mm256_xor_si256(c2[2551],_mm256_xor_si256(c2[2391],_mm256_xor_si256(c2[4944],_mm256_xor_si256(c2[4784],c2[309]))))))))))))))))))))))))))))))))))))))))));
+     d2[312]=simde_mm256_xor_si256(c2[1761],simde_mm256_xor_si256(c2[1601],simde_mm256_xor_si256(c2[1446],simde_mm256_xor_si256(c2[1286],simde_mm256_xor_si256(c2[2883],simde_mm256_xor_si256(c2[2407],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[3540],simde_mm256_xor_si256(c2[3060],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[2279],simde_mm256_xor_si256(c2[2119],simde_mm256_xor_si256(c2[4354],simde_mm256_xor_si256(c2[4529],simde_mm256_xor_si256(c2[4369],simde_mm256_xor_si256(c2[1334],simde_mm256_xor_si256(c2[1174],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[5024],simde_mm256_xor_si256(c2[4864],simde_mm256_xor_si256(c2[3426],simde_mm256_xor_si256(c2[5029],simde_mm256_xor_si256(c2[5044],simde_mm256_xor_si256(c2[4884],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[5062],simde_mm256_xor_si256(c2[4902],simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[2983],simde_mm256_xor_si256(c2[4917],simde_mm256_xor_si256(c2[4757],simde_mm256_xor_si256(c2[3794],simde_mm256_xor_si256(c2[2037],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[2848],simde_mm256_xor_si256(c2[3169],simde_mm256_xor_si256(c2[2551],simde_mm256_xor_si256(c2[2391],simde_mm256_xor_si256(c2[4944],simde_mm256_xor_si256(c2[4784],c2[309]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[320]=_mm256_xor_si256(c2[3042],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[2727],_mm256_xor_si256(c2[4800],_mm256_xor_si256(c2[4324],_mm256_xor_si256(c2[1286],_mm256_xor_si256(c2[4981],_mm256_xor_si256(c2[1943],_mm256_xor_si256(c2[4501],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[2898],_mm256_xor_si256(c2[4979],_mm256_xor_si256(c2[3552],_mm256_xor_si256(c2[514],_mm256_xor_si256(c2[2917],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[2757],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[2772],_mm256_xor_si256(c2[2615],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[3571],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[1186],_mm256_xor_si256(c2[3267],_mm256_xor_si256(c2[4867],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[3584],_mm256_xor_si256(c2[1351],_mm256_xor_si256(c2[3424],_mm256_xor_si256(c2[1206],_mm256_xor_si256(c2[3287],_mm256_xor_si256(c2[87],_mm256_xor_si256(c2[2965],_mm256_xor_si256(c2[5046],_mm256_xor_si256(c2[1216],_mm256_xor_si256(c2[3297],_mm256_xor_si256(c2[2017],_mm256_xor_si256(c2[4098],_mm256_xor_si256(c2[1538],_mm256_xor_si256(c2[4416],_mm256_xor_si256(c2[1378],_mm256_xor_si256(c2[1079],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[2357],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[2197],_mm256_xor_si256(c2[1729],_mm256_xor_si256(c2[3810],_mm256_xor_si256(c2[4289],_mm256_xor_si256(c2[1251],_mm256_xor_si256(c2[1732],_mm256_xor_si256(c2[4610],_mm256_xor_si256(c2[1572],_mm256_xor_si256(c2[3824],_mm256_xor_si256(c2[786],_mm256_xor_si256(c2[1106],_mm256_xor_si256(c2[3187],_mm256_xor_si256(c2[3991],_mm256_xor_si256(c2[1750],c2[3831]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[320]=simde_mm256_xor_si256(c2[3042],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[2727],simde_mm256_xor_si256(c2[4800],simde_mm256_xor_si256(c2[4324],simde_mm256_xor_si256(c2[1286],simde_mm256_xor_si256(c2[4981],simde_mm256_xor_si256(c2[1943],simde_mm256_xor_si256(c2[4501],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[2898],simde_mm256_xor_si256(c2[4979],simde_mm256_xor_si256(c2[3552],simde_mm256_xor_si256(c2[514],simde_mm256_xor_si256(c2[2917],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[2757],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[2772],simde_mm256_xor_si256(c2[2615],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[3571],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[1186],simde_mm256_xor_si256(c2[3267],simde_mm256_xor_si256(c2[4867],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[3584],simde_mm256_xor_si256(c2[1351],simde_mm256_xor_si256(c2[3424],simde_mm256_xor_si256(c2[1206],simde_mm256_xor_si256(c2[3287],simde_mm256_xor_si256(c2[87],simde_mm256_xor_si256(c2[2965],simde_mm256_xor_si256(c2[5046],simde_mm256_xor_si256(c2[1216],simde_mm256_xor_si256(c2[3297],simde_mm256_xor_si256(c2[2017],simde_mm256_xor_si256(c2[4098],simde_mm256_xor_si256(c2[1538],simde_mm256_xor_si256(c2[4416],simde_mm256_xor_si256(c2[1378],simde_mm256_xor_si256(c2[1079],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[2357],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[2197],simde_mm256_xor_si256(c2[1729],simde_mm256_xor_si256(c2[3810],simde_mm256_xor_si256(c2[4289],simde_mm256_xor_si256(c2[1251],simde_mm256_xor_si256(c2[1732],simde_mm256_xor_si256(c2[4610],simde_mm256_xor_si256(c2[1572],simde_mm256_xor_si256(c2[3824],simde_mm256_xor_si256(c2[786],simde_mm256_xor_si256(c2[1106],simde_mm256_xor_si256(c2[3187],simde_mm256_xor_si256(c2[3991],simde_mm256_xor_si256(c2[1750],c2[3831]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[328]=_mm256_xor_si256(c2[4963],_mm256_xor_si256(c2[4803],_mm256_xor_si256(c2[4480],_mm256_xor_si256(c2[966],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[1623],_mm256_xor_si256(c2[1143],_mm256_xor_si256(c2[4659],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[2437],_mm256_xor_si256(c2[2612],_mm256_xor_si256(c2[2452],_mm256_xor_si256(c2[4368],_mm256_xor_si256(c2[213],_mm256_xor_si256(c2[2947],_mm256_xor_si256(c2[1509],_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[2967],_mm256_xor_si256(c2[4726],_mm256_xor_si256(c2[887],_mm256_xor_si256(c2[3137],_mm256_xor_si256(c2[2977],_mm256_xor_si256(c2[3778],_mm256_xor_si256(c2[1058],_mm256_xor_si256(c2[2832],_mm256_xor_si256(c2[1877],_mm256_xor_si256(c2[3490],_mm256_xor_si256(c2[931],_mm256_xor_si256(c2[1252],_mm256_xor_si256(c2[626],_mm256_xor_si256(c2[466],_mm256_xor_si256(c2[2867],c2[3511]))))))))))))))))))))))))))))))))));
+     d2[328]=simde_mm256_xor_si256(c2[4963],simde_mm256_xor_si256(c2[4803],simde_mm256_xor_si256(c2[4480],simde_mm256_xor_si256(c2[966],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[1623],simde_mm256_xor_si256(c2[1143],simde_mm256_xor_si256(c2[4659],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[2437],simde_mm256_xor_si256(c2[2612],simde_mm256_xor_si256(c2[2452],simde_mm256_xor_si256(c2[4368],simde_mm256_xor_si256(c2[213],simde_mm256_xor_si256(c2[2947],simde_mm256_xor_si256(c2[1509],simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[2967],simde_mm256_xor_si256(c2[4726],simde_mm256_xor_si256(c2[887],simde_mm256_xor_si256(c2[3137],simde_mm256_xor_si256(c2[2977],simde_mm256_xor_si256(c2[3778],simde_mm256_xor_si256(c2[1058],simde_mm256_xor_si256(c2[2832],simde_mm256_xor_si256(c2[1877],simde_mm256_xor_si256(c2[3490],simde_mm256_xor_si256(c2[931],simde_mm256_xor_si256(c2[1252],simde_mm256_xor_si256(c2[626],simde_mm256_xor_si256(c2[466],simde_mm256_xor_si256(c2[2867],c2[3511]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc288_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc288_byte.c
index 99b257f4b054b43f688a49ebcbec70723bc7d596..9e349d5424cdcd26c6f56d8a7f61028540d93706 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc288_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc288_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc288_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[5402],_mm256_xor_si256(c2[4518],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[3637],_mm256_xor_si256(c2[4359],_mm256_xor_si256(c2[415],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[2415],_mm256_xor_si256(c2[3852],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[3530],_mm256_xor_si256(c2[1550],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[3007],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[5727],_mm256_xor_si256(c2[5565],_mm256_xor_si256(c2[3223],c2[2684]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[5402],simde_mm256_xor_si256(c2[4518],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[3637],simde_mm256_xor_si256(c2[4359],simde_mm256_xor_si256(c2[415],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[2415],simde_mm256_xor_si256(c2[3852],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[3530],simde_mm256_xor_si256(c2[1550],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[3007],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[5727],simde_mm256_xor_si256(c2[5565],simde_mm256_xor_si256(c2[3223],c2[2684]))))))))))))))))))))))))));
 
 //row: 1
-     d2[9]=_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[5402],_mm256_xor_si256(c2[4698],_mm256_xor_si256(c2[4518],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[3817],_mm256_xor_si256(c2[3637],_mm256_xor_si256(c2[4359],_mm256_xor_si256(c2[595],_mm256_xor_si256(c2[415],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[2415],_mm256_xor_si256(c2[3852],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[3531],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[3530],_mm256_xor_si256(c2[1550],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[3007],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[5727],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[5565],_mm256_xor_si256(c2[3223],c2[2684]))))))))))))))))))))))))))))))));
+     d2[9]=simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[5402],simde_mm256_xor_si256(c2[4698],simde_mm256_xor_si256(c2[4518],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[3817],simde_mm256_xor_si256(c2[3637],simde_mm256_xor_si256(c2[4359],simde_mm256_xor_si256(c2[595],simde_mm256_xor_si256(c2[415],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[2415],simde_mm256_xor_si256(c2[3852],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[3531],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[3530],simde_mm256_xor_si256(c2[1550],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[3007],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[5727],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[5565],simde_mm256_xor_si256(c2[3223],c2[2684]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[18]=_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[3420],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[5402],_mm256_xor_si256(c2[4698],_mm256_xor_si256(c2[4518],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[3817],_mm256_xor_si256(c2[3637],_mm256_xor_si256(c2[4359],_mm256_xor_si256(c2[595],_mm256_xor_si256(c2[415],_mm256_xor_si256(c2[5456],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[2415],_mm256_xor_si256(c2[3852],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[3531],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[3710],_mm256_xor_si256(c2[3530],_mm256_xor_si256(c2[1550],_mm256_xor_si256(c2[489],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[3007],_mm256_xor_si256(c2[1046],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[5727],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[5565],_mm256_xor_si256(c2[3403],_mm256_xor_si256(c2[3223],c2[2684]))))))))))))))))))))))))))))))))))))))));
+     d2[18]=simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[3420],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[5402],simde_mm256_xor_si256(c2[4698],simde_mm256_xor_si256(c2[4518],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[3817],simde_mm256_xor_si256(c2[3637],simde_mm256_xor_si256(c2[4359],simde_mm256_xor_si256(c2[595],simde_mm256_xor_si256(c2[415],simde_mm256_xor_si256(c2[5456],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[2415],simde_mm256_xor_si256(c2[3852],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[3531],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[3710],simde_mm256_xor_si256(c2[3530],simde_mm256_xor_si256(c2[1550],simde_mm256_xor_si256(c2[489],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[3007],simde_mm256_xor_si256(c2[1046],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[5727],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[5565],simde_mm256_xor_si256(c2[3403],simde_mm256_xor_si256(c2[3223],c2[2684]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[27]=_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[5402],_mm256_xor_si256(c2[4518],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[3637],_mm256_xor_si256(c2[4539],_mm256_xor_si256(c2[4359],_mm256_xor_si256(c2[415],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[4916],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[2415],_mm256_xor_si256(c2[4032],_mm256_xor_si256(c2[3852],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[810],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[3530],_mm256_xor_si256(c2[1730],_mm256_xor_si256(c2[1550],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[3187],_mm256_xor_si256(c2[3007],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[5727],_mm256_xor_si256(c2[5565],_mm256_xor_si256(c2[3223],_mm256_xor_si256(c2[2864],c2[2684]))))))))))))))))))))))))))))))))));
+     d2[27]=simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[5402],simde_mm256_xor_si256(c2[4518],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[3637],simde_mm256_xor_si256(c2[4539],simde_mm256_xor_si256(c2[4359],simde_mm256_xor_si256(c2[415],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[4916],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[2415],simde_mm256_xor_si256(c2[4032],simde_mm256_xor_si256(c2[3852],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[810],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[3530],simde_mm256_xor_si256(c2[1730],simde_mm256_xor_si256(c2[1550],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[3187],simde_mm256_xor_si256(c2[3007],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[5727],simde_mm256_xor_si256(c2[5565],simde_mm256_xor_si256(c2[3223],simde_mm256_xor_si256(c2[2864],c2[2684]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[36]=_mm256_xor_si256(c2[3961],_mm256_xor_si256(c2[3781],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[1801],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[2180],_mm256_xor_si256(c2[5062],_mm256_xor_si256(c2[1823],_mm256_xor_si256(c2[4338],_mm256_xor_si256(c2[1479],_mm256_xor_si256(c2[1299],_mm256_xor_si256(c2[2021],_mm256_xor_si256(c2[4016],_mm256_xor_si256(c2[3836],_mm256_xor_si256(c2[2938],_mm256_xor_si256(c2[2578],_mm256_xor_si256(c2[4756],_mm256_xor_si256(c2[77],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[3693],_mm256_xor_si256(c2[4051],_mm256_xor_si256(c2[1193],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[1192],_mm256_xor_si256(c2[4971],_mm256_xor_si256(c2[3730],_mm256_xor_si256(c2[669],_mm256_xor_si256(c2[4287],_mm256_xor_si256(c2[329],_mm256_xor_si256(c2[3389],_mm256_xor_si256(c2[3407],_mm256_xor_si256(c2[3227],_mm256_xor_si256(c2[885],c2[346]))))))))))))))))))))))))))))))))));
+     d2[36]=simde_mm256_xor_si256(c2[3961],simde_mm256_xor_si256(c2[3781],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[1801],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[2180],simde_mm256_xor_si256(c2[5062],simde_mm256_xor_si256(c2[1823],simde_mm256_xor_si256(c2[4338],simde_mm256_xor_si256(c2[1479],simde_mm256_xor_si256(c2[1299],simde_mm256_xor_si256(c2[2021],simde_mm256_xor_si256(c2[4016],simde_mm256_xor_si256(c2[3836],simde_mm256_xor_si256(c2[2938],simde_mm256_xor_si256(c2[2578],simde_mm256_xor_si256(c2[4756],simde_mm256_xor_si256(c2[77],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[3693],simde_mm256_xor_si256(c2[4051],simde_mm256_xor_si256(c2[1193],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[1192],simde_mm256_xor_si256(c2[4971],simde_mm256_xor_si256(c2[3730],simde_mm256_xor_si256(c2[669],simde_mm256_xor_si256(c2[4287],simde_mm256_xor_si256(c2[329],simde_mm256_xor_si256(c2[3389],simde_mm256_xor_si256(c2[3407],simde_mm256_xor_si256(c2[3227],simde_mm256_xor_si256(c2[885],c2[346]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[45]=_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[4502],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[1441],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[5423],_mm256_xor_si256(c2[2182],_mm256_xor_si256(c2[5079],_mm256_xor_si256(c2[4899],_mm256_xor_si256(c2[5621],_mm256_xor_si256(c2[1857],_mm256_xor_si256(c2[1677],_mm256_xor_si256(c2[779],_mm256_xor_si256(c2[419],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[3677],_mm256_xor_si256(c2[5114],_mm256_xor_si256(c2[1534],_mm256_xor_si256(c2[1892],_mm256_xor_si256(c2[3692],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[4613],_mm256_xor_si256(c2[4792],_mm256_xor_si256(c2[2812],_mm256_xor_si256(c2[1571],_mm256_xor_si256(c2[4269],_mm256_xor_si256(c2[1750],_mm256_xor_si256(c2[2128],_mm256_xor_si256(c2[3929],_mm256_xor_si256(c2[1230],_mm256_xor_si256(c2[1248],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[4485],c2[3946]))))))))))))))))))))))))))))))))))));
+     d2[45]=simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[4502],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[1441],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[5423],simde_mm256_xor_si256(c2[2182],simde_mm256_xor_si256(c2[5079],simde_mm256_xor_si256(c2[4899],simde_mm256_xor_si256(c2[5621],simde_mm256_xor_si256(c2[1857],simde_mm256_xor_si256(c2[1677],simde_mm256_xor_si256(c2[779],simde_mm256_xor_si256(c2[419],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[3677],simde_mm256_xor_si256(c2[5114],simde_mm256_xor_si256(c2[1534],simde_mm256_xor_si256(c2[1892],simde_mm256_xor_si256(c2[3692],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[4613],simde_mm256_xor_si256(c2[4792],simde_mm256_xor_si256(c2[2812],simde_mm256_xor_si256(c2[1571],simde_mm256_xor_si256(c2[4269],simde_mm256_xor_si256(c2[1750],simde_mm256_xor_si256(c2[2128],simde_mm256_xor_si256(c2[3929],simde_mm256_xor_si256(c2[1230],simde_mm256_xor_si256(c2[1248],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[4485],c2[3946]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[54]=_mm256_xor_si256(c2[2524],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[5224],_mm256_xor_si256(c2[1627],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[923],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[3625],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[5621],_mm256_xor_si256(c2[584],_mm256_xor_si256(c2[2579],_mm256_xor_si256(c2[2399],_mm256_xor_si256(c2[1501],_mm256_xor_si256(c2[1141],_mm256_xor_si256(c2[3319],_mm256_xor_si256(c2[4399],_mm256_xor_si256(c2[77],_mm256_xor_si256(c2[2256],_mm256_xor_si256(c2[2614],_mm256_xor_si256(c2[4232],_mm256_xor_si256(c2[5515],_mm256_xor_si256(c2[5335],_mm256_xor_si256(c2[5514],_mm256_xor_si256(c2[3534],_mm256_xor_si256(c2[2293],_mm256_xor_si256(c2[4991],_mm256_xor_si256(c2[2109],_mm256_xor_si256(c2[2850],_mm256_xor_si256(c2[4651],_mm256_xor_si256(c2[1952],_mm256_xor_si256(c2[1970],_mm256_xor_si256(c2[1790],_mm256_xor_si256(c2[5207],_mm256_xor_si256(c2[4668],c2[1066]))))))))))))))))))))))))))))))))))));
+     d2[54]=simde_mm256_xor_si256(c2[2524],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[5224],simde_mm256_xor_si256(c2[1627],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[923],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[3625],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[5621],simde_mm256_xor_si256(c2[584],simde_mm256_xor_si256(c2[2579],simde_mm256_xor_si256(c2[2399],simde_mm256_xor_si256(c2[1501],simde_mm256_xor_si256(c2[1141],simde_mm256_xor_si256(c2[3319],simde_mm256_xor_si256(c2[4399],simde_mm256_xor_si256(c2[77],simde_mm256_xor_si256(c2[2256],simde_mm256_xor_si256(c2[2614],simde_mm256_xor_si256(c2[4232],simde_mm256_xor_si256(c2[5515],simde_mm256_xor_si256(c2[5335],simde_mm256_xor_si256(c2[5514],simde_mm256_xor_si256(c2[3534],simde_mm256_xor_si256(c2[2293],simde_mm256_xor_si256(c2[4991],simde_mm256_xor_si256(c2[2109],simde_mm256_xor_si256(c2[2850],simde_mm256_xor_si256(c2[4651],simde_mm256_xor_si256(c2[1952],simde_mm256_xor_si256(c2[1970],simde_mm256_xor_si256(c2[1790],simde_mm256_xor_si256(c2[5207],simde_mm256_xor_si256(c2[4668],c2[1066]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[63]=_mm256_xor_si256(c2[2703],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[542],_mm256_xor_si256(c2[5403],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[1806],_mm256_xor_si256(c2[5584],_mm256_xor_si256(c2[1102],_mm256_xor_si256(c2[922],_mm256_xor_si256(c2[4700],_mm256_xor_si256(c2[3804],_mm256_xor_si256(c2[1823],_mm256_xor_si256(c2[565],_mm256_xor_si256(c2[4523],_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[221],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[3819],_mm256_xor_si256(c2[763],_mm256_xor_si256(c2[4721],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[2758],_mm256_xor_si256(c2[2578],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[5458],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[5098],_mm256_xor_si256(c2[3498],_mm256_xor_si256(c2[1517],_mm256_xor_si256(c2[4578],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[4214],_mm256_xor_si256(c2[4034],_mm256_xor_si256(c2[2435],_mm256_xor_si256(c2[454],_mm256_xor_si256(c2[2793],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[1354],_mm256_xor_si256(c2[5694],_mm256_xor_si256(c2[5514],_mm256_xor_si256(c2[3533],_mm256_xor_si256(c2[5693],_mm256_xor_si256(c2[3712],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[1912],_mm256_xor_si256(c2[1732],_mm256_xor_si256(c2[2472],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[5170],_mm256_xor_si256(c2[3369],_mm256_xor_si256(c2[3189],_mm256_xor_si256(c2[667],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[1048],_mm256_xor_si256(c2[4830],_mm256_xor_si256(c2[2849],_mm256_xor_si256(c2[2131],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[2149],_mm256_xor_si256(c2[1969],_mm256_xor_si256(c2[5747],_mm256_xor_si256(c2[5386],_mm256_xor_si256(c2[3405],_mm256_xor_si256(c2[4847],_mm256_xor_si256(c2[3046],c2[2866]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[63]=simde_mm256_xor_si256(c2[2703],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[542],simde_mm256_xor_si256(c2[5403],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[1806],simde_mm256_xor_si256(c2[5584],simde_mm256_xor_si256(c2[1102],simde_mm256_xor_si256(c2[922],simde_mm256_xor_si256(c2[4700],simde_mm256_xor_si256(c2[3804],simde_mm256_xor_si256(c2[1823],simde_mm256_xor_si256(c2[565],simde_mm256_xor_si256(c2[4523],simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[221],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[3819],simde_mm256_xor_si256(c2[763],simde_mm256_xor_si256(c2[4721],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[2758],simde_mm256_xor_si256(c2[2578],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[5458],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[5098],simde_mm256_xor_si256(c2[3498],simde_mm256_xor_si256(c2[1517],simde_mm256_xor_si256(c2[4578],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[4214],simde_mm256_xor_si256(c2[4034],simde_mm256_xor_si256(c2[2435],simde_mm256_xor_si256(c2[454],simde_mm256_xor_si256(c2[2793],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[1354],simde_mm256_xor_si256(c2[5694],simde_mm256_xor_si256(c2[5514],simde_mm256_xor_si256(c2[3533],simde_mm256_xor_si256(c2[5693],simde_mm256_xor_si256(c2[3712],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[1912],simde_mm256_xor_si256(c2[1732],simde_mm256_xor_si256(c2[2472],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[5170],simde_mm256_xor_si256(c2[3369],simde_mm256_xor_si256(c2[3189],simde_mm256_xor_si256(c2[667],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[1048],simde_mm256_xor_si256(c2[4830],simde_mm256_xor_si256(c2[2849],simde_mm256_xor_si256(c2[2131],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[2149],simde_mm256_xor_si256(c2[1969],simde_mm256_xor_si256(c2[5747],simde_mm256_xor_si256(c2[5386],simde_mm256_xor_si256(c2[3405],simde_mm256_xor_si256(c2[4847],simde_mm256_xor_si256(c2[3046],c2[2866]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[72]=_mm256_xor_si256(c2[4862],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[1983],_mm256_xor_si256(c2[1803],_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[1082],_mm256_xor_si256(c2[3261],_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[2724],_mm256_xor_si256(c2[920],_mm256_xor_si256(c2[2380],_mm256_xor_si256(c2[2200],_mm256_xor_si256(c2[2922],_mm256_xor_si256(c2[4917],_mm256_xor_si256(c2[4737],_mm256_xor_si256(c2[4019],_mm256_xor_si256(c2[3839],_mm256_xor_si256(c2[3479],_mm256_xor_si256(c2[78],_mm256_xor_si256(c2[5657],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[2415],_mm256_xor_si256(c2[4774],_mm256_xor_si256(c2[4594],_mm256_xor_si256(c2[4952],_mm256_xor_si256(c2[2094],_mm256_xor_si256(c2[1914],_mm256_xor_si256(c2[2273],_mm256_xor_si256(c2[2093],_mm256_xor_si256(c2[113],_mm256_xor_si256(c2[4811],_mm256_xor_si256(c2[4631],_mm256_xor_si256(c2[1570],_mm256_xor_si256(c2[5368],_mm256_xor_si256(c2[5188],_mm256_xor_si256(c2[1230],_mm256_xor_si256(c2[4290],_mm256_xor_si256(c2[4308],_mm256_xor_si256(c2[4128],_mm256_xor_si256(c2[1966],_mm256_xor_si256(c2[1786],c2[1247]))))))))))))))))))))))))))))))))))))))))));
+     d2[72]=simde_mm256_xor_si256(c2[4862],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[1983],simde_mm256_xor_si256(c2[1803],simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[1082],simde_mm256_xor_si256(c2[3261],simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[2724],simde_mm256_xor_si256(c2[920],simde_mm256_xor_si256(c2[2380],simde_mm256_xor_si256(c2[2200],simde_mm256_xor_si256(c2[2922],simde_mm256_xor_si256(c2[4917],simde_mm256_xor_si256(c2[4737],simde_mm256_xor_si256(c2[4019],simde_mm256_xor_si256(c2[3839],simde_mm256_xor_si256(c2[3479],simde_mm256_xor_si256(c2[78],simde_mm256_xor_si256(c2[5657],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[2415],simde_mm256_xor_si256(c2[4774],simde_mm256_xor_si256(c2[4594],simde_mm256_xor_si256(c2[4952],simde_mm256_xor_si256(c2[2094],simde_mm256_xor_si256(c2[1914],simde_mm256_xor_si256(c2[2273],simde_mm256_xor_si256(c2[2093],simde_mm256_xor_si256(c2[113],simde_mm256_xor_si256(c2[4811],simde_mm256_xor_si256(c2[4631],simde_mm256_xor_si256(c2[1570],simde_mm256_xor_si256(c2[5368],simde_mm256_xor_si256(c2[5188],simde_mm256_xor_si256(c2[1230],simde_mm256_xor_si256(c2[4290],simde_mm256_xor_si256(c2[4308],simde_mm256_xor_si256(c2[4128],simde_mm256_xor_si256(c2[1966],simde_mm256_xor_si256(c2[1786],c2[1247]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[81]=_mm256_xor_si256(c2[4680],_mm256_xor_si256(c2[3601],_mm256_xor_si256(c2[3421],_mm256_xor_si256(c2[1801],_mm256_xor_si256(c2[542],_mm256_xor_si256(c2[3963],_mm256_xor_si256(c2[2704],_mm256_xor_si256(c2[3079],_mm256_xor_si256(c2[2000],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[4702],_mm256_xor_si256(c2[2722],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[2198],_mm256_xor_si256(c2[1119],_mm256_xor_si256(c2[939],_mm256_xor_si256(c2[2920],_mm256_xor_si256(c2[1661],_mm256_xor_si256(c2[4735],_mm256_xor_si256(c2[3656],_mm256_xor_si256(c2[3476],_mm256_xor_si256(c2[3837],_mm256_xor_si256(c2[2578],_mm256_xor_si256(c2[3477],_mm256_xor_si256(c2[2218],_mm256_xor_si256(c2[5655],_mm256_xor_si256(c2[4396],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[5476],_mm256_xor_si256(c2[2413],_mm256_xor_si256(c2[1154],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[3333],_mm256_xor_si256(c2[4950],_mm256_xor_si256(c2[3691],_mm256_xor_si256(c2[1912],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[653],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[111],_mm256_xor_si256(c2[4611],_mm256_xor_si256(c2[4629],_mm256_xor_si256(c2[3370],_mm256_xor_si256(c2[1568],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[5186],_mm256_xor_si256(c2[3927],_mm256_xor_si256(c2[1228],_mm256_xor_si256(c2[5728],_mm256_xor_si256(c2[4288],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[4126],_mm256_xor_si256(c2[3047],_mm256_xor_si256(c2[2867],_mm256_xor_si256(c2[1784],_mm256_xor_si256(c2[525],_mm256_xor_si256(c2[1245],c2[5745])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[81]=simde_mm256_xor_si256(c2[4680],simde_mm256_xor_si256(c2[3601],simde_mm256_xor_si256(c2[3421],simde_mm256_xor_si256(c2[1801],simde_mm256_xor_si256(c2[542],simde_mm256_xor_si256(c2[3963],simde_mm256_xor_si256(c2[2704],simde_mm256_xor_si256(c2[3079],simde_mm256_xor_si256(c2[2000],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[4702],simde_mm256_xor_si256(c2[2722],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[2198],simde_mm256_xor_si256(c2[1119],simde_mm256_xor_si256(c2[939],simde_mm256_xor_si256(c2[2920],simde_mm256_xor_si256(c2[1661],simde_mm256_xor_si256(c2[4735],simde_mm256_xor_si256(c2[3656],simde_mm256_xor_si256(c2[3476],simde_mm256_xor_si256(c2[3837],simde_mm256_xor_si256(c2[2578],simde_mm256_xor_si256(c2[3477],simde_mm256_xor_si256(c2[2218],simde_mm256_xor_si256(c2[5655],simde_mm256_xor_si256(c2[4396],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[5476],simde_mm256_xor_si256(c2[2413],simde_mm256_xor_si256(c2[1154],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[3333],simde_mm256_xor_si256(c2[4950],simde_mm256_xor_si256(c2[3691],simde_mm256_xor_si256(c2[1912],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[653],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[111],simde_mm256_xor_si256(c2[4611],simde_mm256_xor_si256(c2[4629],simde_mm256_xor_si256(c2[3370],simde_mm256_xor_si256(c2[1568],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[5186],simde_mm256_xor_si256(c2[3927],simde_mm256_xor_si256(c2[1228],simde_mm256_xor_si256(c2[5728],simde_mm256_xor_si256(c2[4288],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[4126],simde_mm256_xor_si256(c2[3047],simde_mm256_xor_si256(c2[2867],simde_mm256_xor_si256(c2[1784],simde_mm256_xor_si256(c2[525],simde_mm256_xor_si256(c2[1245],c2[5745])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[90]=_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[1639],_mm256_xor_si256(c2[1011],c2[129])));
+     d2[90]=simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[1639],simde_mm256_xor_si256(c2[1011],c2[129])));
 
 //row: 11
-     d2[99]=_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[3242],_mm256_xor_si256(c2[5404],_mm256_xor_si256(c2[5040],_mm256_xor_si256(c2[4520],_mm256_xor_si256(c2[1643],_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[4163],_mm256_xor_si256(c2[3639],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[4361],_mm256_xor_si256(c2[417],_mm256_xor_si256(c2[5278],_mm256_xor_si256(c2[4918],_mm256_xor_si256(c2[1337],_mm256_xor_si256(c2[2417],_mm256_xor_si256(c2[4034],_mm256_xor_si256(c2[3854],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[3353],_mm256_xor_si256(c2[3532],_mm256_xor_si256(c2[1732],_mm256_xor_si256(c2[1552],_mm256_xor_si256(c2[311],_mm256_xor_si256(c2[3189],_mm256_xor_si256(c2[3009],_mm256_xor_si256(c2[5526],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[2669],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[5729],_mm256_xor_si256(c2[5567],_mm256_xor_si256(c2[3225],_mm256_xor_si256(c2[2866],_mm256_xor_si256(c2[2686],c2[3765])))))))))))))))))))))))))))))))))))));
+     d2[99]=simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[3242],simde_mm256_xor_si256(c2[5404],simde_mm256_xor_si256(c2[5040],simde_mm256_xor_si256(c2[4520],simde_mm256_xor_si256(c2[1643],simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[4163],simde_mm256_xor_si256(c2[3639],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[4361],simde_mm256_xor_si256(c2[417],simde_mm256_xor_si256(c2[5278],simde_mm256_xor_si256(c2[4918],simde_mm256_xor_si256(c2[1337],simde_mm256_xor_si256(c2[2417],simde_mm256_xor_si256(c2[4034],simde_mm256_xor_si256(c2[3854],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[3353],simde_mm256_xor_si256(c2[3532],simde_mm256_xor_si256(c2[1732],simde_mm256_xor_si256(c2[1552],simde_mm256_xor_si256(c2[311],simde_mm256_xor_si256(c2[3189],simde_mm256_xor_si256(c2[3009],simde_mm256_xor_si256(c2[5526],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[2669],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[5729],simde_mm256_xor_si256(c2[5567],simde_mm256_xor_si256(c2[3225],simde_mm256_xor_si256(c2[2866],simde_mm256_xor_si256(c2[2686],c2[3765])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[108]=_mm256_xor_si256(c2[1084],_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[3784],_mm256_xor_si256(c2[187],_mm256_xor_si256(c2[5242],_mm256_xor_si256(c2[5062],_mm256_xor_si256(c2[2185],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[4361],_mm256_xor_si256(c2[4181],_mm256_xor_si256(c2[4903],_mm256_xor_si256(c2[1139],_mm256_xor_si256(c2[959],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[5460],_mm256_xor_si256(c2[4737],_mm256_xor_si256(c2[1879],_mm256_xor_si256(c2[2959],_mm256_xor_si256(c2[4396],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[1174],_mm256_xor_si256(c2[4075],_mm256_xor_si256(c2[3895],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[2094],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[3551],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[3211],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[3767],c2[3228]))))))))))))))))))))))))))))))))));
+     d2[108]=simde_mm256_xor_si256(c2[1084],simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[3784],simde_mm256_xor_si256(c2[187],simde_mm256_xor_si256(c2[5242],simde_mm256_xor_si256(c2[5062],simde_mm256_xor_si256(c2[2185],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[4361],simde_mm256_xor_si256(c2[4181],simde_mm256_xor_si256(c2[4903],simde_mm256_xor_si256(c2[1139],simde_mm256_xor_si256(c2[959],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[5460],simde_mm256_xor_si256(c2[4737],simde_mm256_xor_si256(c2[1879],simde_mm256_xor_si256(c2[2959],simde_mm256_xor_si256(c2[4396],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[1174],simde_mm256_xor_si256(c2[4075],simde_mm256_xor_si256(c2[3895],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[2094],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[3551],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[3211],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[3767],c2[3228]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[117]=_mm256_xor_si256(c2[3061],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[2162],_mm256_xor_si256(c2[1460],_mm256_xor_si256(c2[4342],_mm256_xor_si256(c2[1283],_mm256_xor_si256(c2[1103],_mm256_xor_si256(c2[919],_mm256_xor_si256(c2[579],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[1301],_mm256_xor_si256(c2[3116],_mm256_xor_si256(c2[2218],_mm256_xor_si256(c2[1858],_mm256_xor_si256(c2[4036],_mm256_xor_si256(c2[5116],_mm256_xor_si256(c2[974],_mm256_xor_si256(c2[794],_mm256_xor_si256(c2[2973],_mm256_xor_si256(c2[3511],_mm256_xor_si256(c2[3331],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[472],_mm256_xor_si256(c2[4431],_mm256_xor_si256(c2[4251],_mm256_xor_si256(c2[3010],_mm256_xor_si256(c2[129],_mm256_xor_si256(c2[5708],_mm256_xor_si256(c2[3567],_mm256_xor_si256(c2[5368],_mm256_xor_si256(c2[2849],_mm256_xor_si256(c2[2669],_mm256_xor_si256(c2[5545],_mm256_xor_si256(c2[2507],_mm256_xor_si256(c2[165],_mm256_xor_si256(c2[5565],c2[5385])))))))))))))))))))))))))))))))))))));
+     d2[117]=simde_mm256_xor_si256(c2[3061],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[2162],simde_mm256_xor_si256(c2[1460],simde_mm256_xor_si256(c2[4342],simde_mm256_xor_si256(c2[1283],simde_mm256_xor_si256(c2[1103],simde_mm256_xor_si256(c2[919],simde_mm256_xor_si256(c2[579],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[1301],simde_mm256_xor_si256(c2[3116],simde_mm256_xor_si256(c2[2218],simde_mm256_xor_si256(c2[1858],simde_mm256_xor_si256(c2[4036],simde_mm256_xor_si256(c2[5116],simde_mm256_xor_si256(c2[974],simde_mm256_xor_si256(c2[794],simde_mm256_xor_si256(c2[2973],simde_mm256_xor_si256(c2[3511],simde_mm256_xor_si256(c2[3331],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[472],simde_mm256_xor_si256(c2[4431],simde_mm256_xor_si256(c2[4251],simde_mm256_xor_si256(c2[3010],simde_mm256_xor_si256(c2[129],simde_mm256_xor_si256(c2[5708],simde_mm256_xor_si256(c2[3567],simde_mm256_xor_si256(c2[5368],simde_mm256_xor_si256(c2[2849],simde_mm256_xor_si256(c2[2669],simde_mm256_xor_si256(c2[5545],simde_mm256_xor_si256(c2[2507],simde_mm256_xor_si256(c2[165],simde_mm256_xor_si256(c2[5565],c2[5385])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[126]=_mm256_xor_si256(c2[904],_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[3604],_mm256_xor_si256(c2[3423],_mm256_xor_si256(c2[7],_mm256_xor_si256(c2[5585],_mm256_xor_si256(c2[5062],_mm256_xor_si256(c2[4882],_mm256_xor_si256(c2[4701],_mm256_xor_si256(c2[2005],_mm256_xor_si256(c2[1824],_mm256_xor_si256(c2[4525],_mm256_xor_si256(c2[4524],_mm256_xor_si256(c2[4344],_mm256_xor_si256(c2[2722],_mm256_xor_si256(c2[4181],_mm256_xor_si256(c2[4001],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[4723],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[4542],_mm256_xor_si256(c2[959],_mm256_xor_si256(c2[779],_mm256_xor_si256(c2[598],_mm256_xor_si256(c2[5640],_mm256_xor_si256(c2[5459],_mm256_xor_si256(c2[5280],_mm256_xor_si256(c2[5099],_mm256_xor_si256(c2[1699],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[2779],_mm256_xor_si256(c2[2598],_mm256_xor_si256(c2[4216],_mm256_xor_si256(c2[4215],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[636],_mm256_xor_si256(c2[455],_mm256_xor_si256(c2[994],_mm256_xor_si256(c2[993],_mm256_xor_si256(c2[813],_mm256_xor_si256(c2[3895],_mm256_xor_si256(c2[3715],_mm256_xor_si256(c2[3534],_mm256_xor_si256(c2[3894],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[1914],_mm256_xor_si256(c2[1913],_mm256_xor_si256(c2[1733],_mm256_xor_si256(c2[3529],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[492],_mm256_xor_si256(c2[3371],_mm256_xor_si256(c2[3370],_mm256_xor_si256(c2[3190],_mm256_xor_si256(c2[1230],_mm256_xor_si256(c2[1049],_mm256_xor_si256(c2[3031],_mm256_xor_si256(c2[2850],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[331],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[5748],_mm256_xor_si256(c2[3587],_mm256_xor_si256(c2[3406],_mm256_xor_si256(c2[3048],_mm256_xor_si256(c2[3047],c2[2867])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[126]=simde_mm256_xor_si256(c2[904],simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[3604],simde_mm256_xor_si256(c2[3423],simde_mm256_xor_si256(c2[7],simde_mm256_xor_si256(c2[5585],simde_mm256_xor_si256(c2[5062],simde_mm256_xor_si256(c2[4882],simde_mm256_xor_si256(c2[4701],simde_mm256_xor_si256(c2[2005],simde_mm256_xor_si256(c2[1824],simde_mm256_xor_si256(c2[4525],simde_mm256_xor_si256(c2[4524],simde_mm256_xor_si256(c2[4344],simde_mm256_xor_si256(c2[2722],simde_mm256_xor_si256(c2[4181],simde_mm256_xor_si256(c2[4001],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[4723],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[4542],simde_mm256_xor_si256(c2[959],simde_mm256_xor_si256(c2[779],simde_mm256_xor_si256(c2[598],simde_mm256_xor_si256(c2[5640],simde_mm256_xor_si256(c2[5459],simde_mm256_xor_si256(c2[5280],simde_mm256_xor_si256(c2[5099],simde_mm256_xor_si256(c2[1699],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[2779],simde_mm256_xor_si256(c2[2598],simde_mm256_xor_si256(c2[4216],simde_mm256_xor_si256(c2[4215],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[636],simde_mm256_xor_si256(c2[455],simde_mm256_xor_si256(c2[994],simde_mm256_xor_si256(c2[993],simde_mm256_xor_si256(c2[813],simde_mm256_xor_si256(c2[3895],simde_mm256_xor_si256(c2[3715],simde_mm256_xor_si256(c2[3534],simde_mm256_xor_si256(c2[3894],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[1914],simde_mm256_xor_si256(c2[1913],simde_mm256_xor_si256(c2[1733],simde_mm256_xor_si256(c2[3529],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[492],simde_mm256_xor_si256(c2[3371],simde_mm256_xor_si256(c2[3370],simde_mm256_xor_si256(c2[3190],simde_mm256_xor_si256(c2[1230],simde_mm256_xor_si256(c2[1049],simde_mm256_xor_si256(c2[3031],simde_mm256_xor_si256(c2[2850],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[331],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[5748],simde_mm256_xor_si256(c2[3587],simde_mm256_xor_si256(c2[3406],simde_mm256_xor_si256(c2[3048],simde_mm256_xor_si256(c2[3047],c2[2867])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[135]=_mm256_xor_si256(c2[363],_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[3243],_mm256_xor_si256(c2[3244],_mm256_xor_si256(c2[5405],_mm256_xor_si256(c2[5406],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[4521],_mm256_xor_si256(c2[4702],_mm256_xor_si256(c2[4522],_mm256_xor_si256(c2[1644],_mm256_xor_si256(c2[1645],_mm256_xor_si256(c2[4164],_mm256_xor_si256(c2[4165],_mm256_xor_si256(c2[3640],_mm256_xor_si256(c2[3821],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[4362],_mm256_xor_si256(c2[4363],_mm256_xor_si256(c2[418],_mm256_xor_si256(c2[599],_mm256_xor_si256(c2[419],_mm256_xor_si256(c2[5279],_mm256_xor_si256(c2[5280],_mm256_xor_si256(c2[4919],_mm256_xor_si256(c2[4920],_mm256_xor_si256(c2[1338],_mm256_xor_si256(c2[1339],_mm256_xor_si256(c2[2418],_mm256_xor_si256(c2[2419],_mm256_xor_si256(c2[3855],_mm256_xor_si256(c2[3856],_mm256_xor_si256(c2[275],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[634],_mm256_xor_si256(c2[3354],_mm256_xor_si256(c2[3535],_mm256_xor_si256(c2[3355],_mm256_xor_si256(c2[3533],_mm256_xor_si256(c2[3534],_mm256_xor_si256(c2[1553],_mm256_xor_si256(c2[1554],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[3010],_mm256_xor_si256(c2[3011],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[870],_mm256_xor_si256(c2[2670],_mm256_xor_si256(c2[2671],_mm256_xor_si256(c2[5730],_mm256_xor_si256(c2[5731],_mm256_xor_si256(c2[5568],_mm256_xor_si256(c2[5749],_mm256_xor_si256(c2[5569],_mm256_xor_si256(c2[3226],_mm256_xor_si256(c2[3227],_mm256_xor_si256(c2[2687],c2[2688]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[135]=simde_mm256_xor_si256(c2[363],simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[3243],simde_mm256_xor_si256(c2[3244],simde_mm256_xor_si256(c2[5405],simde_mm256_xor_si256(c2[5406],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[4521],simde_mm256_xor_si256(c2[4702],simde_mm256_xor_si256(c2[4522],simde_mm256_xor_si256(c2[1644],simde_mm256_xor_si256(c2[1645],simde_mm256_xor_si256(c2[4164],simde_mm256_xor_si256(c2[4165],simde_mm256_xor_si256(c2[3640],simde_mm256_xor_si256(c2[3821],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[4362],simde_mm256_xor_si256(c2[4363],simde_mm256_xor_si256(c2[418],simde_mm256_xor_si256(c2[599],simde_mm256_xor_si256(c2[419],simde_mm256_xor_si256(c2[5279],simde_mm256_xor_si256(c2[5280],simde_mm256_xor_si256(c2[4919],simde_mm256_xor_si256(c2[4920],simde_mm256_xor_si256(c2[1338],simde_mm256_xor_si256(c2[1339],simde_mm256_xor_si256(c2[2418],simde_mm256_xor_si256(c2[2419],simde_mm256_xor_si256(c2[3855],simde_mm256_xor_si256(c2[3856],simde_mm256_xor_si256(c2[275],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[634],simde_mm256_xor_si256(c2[3354],simde_mm256_xor_si256(c2[3535],simde_mm256_xor_si256(c2[3355],simde_mm256_xor_si256(c2[3533],simde_mm256_xor_si256(c2[3534],simde_mm256_xor_si256(c2[1553],simde_mm256_xor_si256(c2[1554],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[3010],simde_mm256_xor_si256(c2[3011],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[870],simde_mm256_xor_si256(c2[2670],simde_mm256_xor_si256(c2[2671],simde_mm256_xor_si256(c2[5730],simde_mm256_xor_si256(c2[5731],simde_mm256_xor_si256(c2[5568],simde_mm256_xor_si256(c2[5749],simde_mm256_xor_si256(c2[5569],simde_mm256_xor_si256(c2[3226],simde_mm256_xor_si256(c2[3227],simde_mm256_xor_si256(c2[2687],c2[2688]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[144]=_mm256_xor_si256(c2[5580],_mm256_xor_si256(c2[5400],_mm256_xor_si256(c2[1980],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[4860],_mm256_xor_si256(c2[4680],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[3979],_mm256_xor_si256(c2[3799],_mm256_xor_si256(c2[379],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[922],_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[3442],_mm256_xor_si256(c2[5601],_mm256_xor_si256(c2[2899],_mm256_xor_si256(c2[3098],_mm256_xor_si256(c2[2918],_mm256_xor_si256(c2[5257],_mm256_xor_si256(c2[5077],_mm256_xor_si256(c2[3640],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[5635],_mm256_xor_si256(c2[5455],_mm256_xor_si256(c2[2035],_mm256_xor_si256(c2[1855],_mm256_xor_si256(c2[4557],_mm256_xor_si256(c2[1137],_mm256_xor_si256(c2[957],_mm256_xor_si256(c2[4197],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[2955],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[1696],_mm256_xor_si256(c2[3855],_mm256_xor_si256(c2[3133],_mm256_xor_si256(c2[5292],_mm256_xor_si256(c2[5312],_mm256_xor_si256(c2[1892],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[5670],_mm256_xor_si256(c2[2070],_mm256_xor_si256(c2[2812],_mm256_xor_si256(c2[2632],_mm256_xor_si256(c2[4971],_mm256_xor_si256(c2[4791],_mm256_xor_si256(c2[2811],_mm256_xor_si256(c2[5150],_mm256_xor_si256(c2[4970],_mm256_xor_si256(c2[831],_mm256_xor_si256(c2[2990],_mm256_xor_si256(c2[5349],_mm256_xor_si256(c2[1929],_mm256_xor_si256(c2[1749],_mm256_xor_si256(c2[2288],_mm256_xor_si256(c2[4447],_mm256_xor_si256(c2[147],_mm256_xor_si256(c2[2486],_mm256_xor_si256(c2[2306],_mm256_xor_si256(c2[1948],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[5008],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[5026],_mm256_xor_si256(c2[4846],_mm256_xor_si256(c2[1426],_mm256_xor_si256(c2[1246],_mm256_xor_si256(c2[2504],_mm256_xor_si256(c2[4843],_mm256_xor_si256(c2[4663],_mm256_xor_si256(c2[1965],_mm256_xor_si256(c2[4124],c2[1782])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[144]=simde_mm256_xor_si256(c2[5580],simde_mm256_xor_si256(c2[5400],simde_mm256_xor_si256(c2[1980],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[4860],simde_mm256_xor_si256(c2[4680],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[3979],simde_mm256_xor_si256(c2[3799],simde_mm256_xor_si256(c2[379],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[922],simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[3442],simde_mm256_xor_si256(c2[5601],simde_mm256_xor_si256(c2[2899],simde_mm256_xor_si256(c2[3098],simde_mm256_xor_si256(c2[2918],simde_mm256_xor_si256(c2[5257],simde_mm256_xor_si256(c2[5077],simde_mm256_xor_si256(c2[3640],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[5635],simde_mm256_xor_si256(c2[5455],simde_mm256_xor_si256(c2[2035],simde_mm256_xor_si256(c2[1855],simde_mm256_xor_si256(c2[4557],simde_mm256_xor_si256(c2[1137],simde_mm256_xor_si256(c2[957],simde_mm256_xor_si256(c2[4197],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[2955],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[1696],simde_mm256_xor_si256(c2[3855],simde_mm256_xor_si256(c2[3133],simde_mm256_xor_si256(c2[5292],simde_mm256_xor_si256(c2[5312],simde_mm256_xor_si256(c2[1892],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[5670],simde_mm256_xor_si256(c2[2070],simde_mm256_xor_si256(c2[2812],simde_mm256_xor_si256(c2[2632],simde_mm256_xor_si256(c2[4971],simde_mm256_xor_si256(c2[4791],simde_mm256_xor_si256(c2[2811],simde_mm256_xor_si256(c2[5150],simde_mm256_xor_si256(c2[4970],simde_mm256_xor_si256(c2[831],simde_mm256_xor_si256(c2[2990],simde_mm256_xor_si256(c2[5349],simde_mm256_xor_si256(c2[1929],simde_mm256_xor_si256(c2[1749],simde_mm256_xor_si256(c2[2288],simde_mm256_xor_si256(c2[4447],simde_mm256_xor_si256(c2[147],simde_mm256_xor_si256(c2[2486],simde_mm256_xor_si256(c2[2306],simde_mm256_xor_si256(c2[1948],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[5008],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[5026],simde_mm256_xor_si256(c2[4846],simde_mm256_xor_si256(c2[1426],simde_mm256_xor_si256(c2[1246],simde_mm256_xor_si256(c2[2504],simde_mm256_xor_si256(c2[4843],simde_mm256_xor_si256(c2[4663],simde_mm256_xor_si256(c2[1965],simde_mm256_xor_si256(c2[4124],c2[1782])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[153]=_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[542],_mm256_xor_si256(c2[2341],_mm256_xor_si256(c2[2161],_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[5221],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[5584],_mm256_xor_si256(c2[1444],_mm256_xor_si256(c2[4880],_mm256_xor_si256(c2[4700],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[1823],_mm256_xor_si256(c2[3442],_mm256_xor_si256(c2[4343],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[3999],_mm256_xor_si256(c2[3819],_mm256_xor_si256(c2[5618],_mm256_xor_si256(c2[5438],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[2396],_mm256_xor_si256(c2[2216],_mm256_xor_si256(c2[5458],_mm256_xor_si256(c2[1498],_mm256_xor_si256(c2[1318],_mm256_xor_si256(c2[5098],_mm256_xor_si256(c2[958],_mm256_xor_si256(c2[1517],_mm256_xor_si256(c2[3316],_mm256_xor_si256(c2[3136],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[4216],_mm256_xor_si256(c2[4034],_mm256_xor_si256(c2[5653],_mm256_xor_si256(c2[454],_mm256_xor_si256(c2[2253],_mm256_xor_si256(c2[2073],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[2431],_mm256_xor_si256(c2[2071],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[3533],_mm256_xor_si256(c2[5332],_mm256_xor_si256(c2[5152],_mm256_xor_si256(c2[3712],_mm256_xor_si256(c2[5511],_mm256_xor_si256(c2[5331],_mm256_xor_si256(c2[1732],_mm256_xor_si256(c2[3351],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[2290],_mm256_xor_si256(c2[2110],_mm256_xor_si256(c2[3189],_mm256_xor_si256(c2[4808],_mm256_xor_si256(c2[1048],_mm256_xor_si256(c2[2847],_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[2849],_mm256_xor_si256(c2[4468],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[5747],_mm256_xor_si256(c2[1787],_mm256_xor_si256(c2[1607],_mm256_xor_si256(c2[3405],_mm256_xor_si256(c2[5204],_mm256_xor_si256(c2[5024],_mm256_xor_si256(c2[2866],c2[4485])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[153]=simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[542],simde_mm256_xor_si256(c2[2341],simde_mm256_xor_si256(c2[2161],simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[5221],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[5584],simde_mm256_xor_si256(c2[1444],simde_mm256_xor_si256(c2[4880],simde_mm256_xor_si256(c2[4700],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[1823],simde_mm256_xor_si256(c2[3442],simde_mm256_xor_si256(c2[4343],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[3999],simde_mm256_xor_si256(c2[3819],simde_mm256_xor_si256(c2[5618],simde_mm256_xor_si256(c2[5438],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[2396],simde_mm256_xor_si256(c2[2216],simde_mm256_xor_si256(c2[5458],simde_mm256_xor_si256(c2[1498],simde_mm256_xor_si256(c2[1318],simde_mm256_xor_si256(c2[5098],simde_mm256_xor_si256(c2[958],simde_mm256_xor_si256(c2[1517],simde_mm256_xor_si256(c2[3316],simde_mm256_xor_si256(c2[3136],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[4216],simde_mm256_xor_si256(c2[4034],simde_mm256_xor_si256(c2[5653],simde_mm256_xor_si256(c2[454],simde_mm256_xor_si256(c2[2253],simde_mm256_xor_si256(c2[2073],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[2431],simde_mm256_xor_si256(c2[2071],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[3533],simde_mm256_xor_si256(c2[5332],simde_mm256_xor_si256(c2[5152],simde_mm256_xor_si256(c2[3712],simde_mm256_xor_si256(c2[5511],simde_mm256_xor_si256(c2[5331],simde_mm256_xor_si256(c2[1732],simde_mm256_xor_si256(c2[3351],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[2290],simde_mm256_xor_si256(c2[2110],simde_mm256_xor_si256(c2[3189],simde_mm256_xor_si256(c2[4808],simde_mm256_xor_si256(c2[1048],simde_mm256_xor_si256(c2[2847],simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[2849],simde_mm256_xor_si256(c2[4468],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[5747],simde_mm256_xor_si256(c2[1787],simde_mm256_xor_si256(c2[1607],simde_mm256_xor_si256(c2[3405],simde_mm256_xor_si256(c2[5204],simde_mm256_xor_si256(c2[5024],simde_mm256_xor_si256(c2[2866],c2[4485])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[162]=_mm256_xor_si256(c2[3060],_mm256_xor_si256(c2[1911],c2[2650]));
+     d2[162]=simde_mm256_xor_si256(c2[3060],simde_mm256_xor_si256(c2[1911],c2[2650]));
 
 //row: 19
-     d2[171]=_mm256_xor_si256(c2[1981],_mm256_xor_si256(c2[4861],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[2702],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[3262],_mm256_xor_si256(c2[23],_mm256_xor_si256(c2[5058],_mm256_xor_si256(c2[5258],_mm256_xor_si256(c2[221],_mm256_xor_si256(c2[2036],_mm256_xor_si256(c2[1138],_mm256_xor_si256(c2[778],_mm256_xor_si256(c2[2956],_mm256_xor_si256(c2[4036],_mm256_xor_si256(c2[5473],_mm256_xor_si256(c2[1893],_mm256_xor_si256(c2[2251],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[5151],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[1930],_mm256_xor_si256(c2[4628],_mm256_xor_si256(c2[2487],_mm256_xor_si256(c2[4288],_mm256_xor_si256(c2[1589],_mm256_xor_si256(c2[1427],_mm256_xor_si256(c2[4844],c2[4305]))))))))))))))))))))))))))));
+     d2[171]=simde_mm256_xor_si256(c2[1981],simde_mm256_xor_si256(c2[4861],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[2702],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[3262],simde_mm256_xor_si256(c2[23],simde_mm256_xor_si256(c2[5058],simde_mm256_xor_si256(c2[5258],simde_mm256_xor_si256(c2[221],simde_mm256_xor_si256(c2[2036],simde_mm256_xor_si256(c2[1138],simde_mm256_xor_si256(c2[778],simde_mm256_xor_si256(c2[2956],simde_mm256_xor_si256(c2[4036],simde_mm256_xor_si256(c2[5473],simde_mm256_xor_si256(c2[1893],simde_mm256_xor_si256(c2[2251],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[5151],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[1930],simde_mm256_xor_si256(c2[4628],simde_mm256_xor_si256(c2[2487],simde_mm256_xor_si256(c2[4288],simde_mm256_xor_si256(c2[1589],simde_mm256_xor_si256(c2[1427],simde_mm256_xor_si256(c2[4844],c2[4305]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[180]=_mm256_xor_si256(c2[3062],_mm256_xor_si256(c2[2882],_mm256_xor_si256(c2[3],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[1461],_mm256_xor_si256(c2[1281],_mm256_xor_si256(c2[4163],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[1122],_mm256_xor_si256(c2[3117],_mm256_xor_si256(c2[2937],_mm256_xor_si256(c2[2039],_mm256_xor_si256(c2[1679],_mm256_xor_si256(c2[3857],_mm256_xor_si256(c2[4937],_mm256_xor_si256(c2[615],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[2794],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[4072],_mm256_xor_si256(c2[2831],_mm256_xor_si256(c2[5529],_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[5189],_mm256_xor_si256(c2[2490],_mm256_xor_si256(c2[2508],_mm256_xor_si256(c2[2328],_mm256_xor_si256(c2[5745],c2[5206]))))))))))))))))))))))))))))))))));
+     d2[180]=simde_mm256_xor_si256(c2[3062],simde_mm256_xor_si256(c2[2882],simde_mm256_xor_si256(c2[3],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[1461],simde_mm256_xor_si256(c2[1281],simde_mm256_xor_si256(c2[4163],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[1122],simde_mm256_xor_si256(c2[3117],simde_mm256_xor_si256(c2[2937],simde_mm256_xor_si256(c2[2039],simde_mm256_xor_si256(c2[1679],simde_mm256_xor_si256(c2[3857],simde_mm256_xor_si256(c2[4937],simde_mm256_xor_si256(c2[615],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[2794],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[4072],simde_mm256_xor_si256(c2[2831],simde_mm256_xor_si256(c2[5529],simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[5189],simde_mm256_xor_si256(c2[2490],simde_mm256_xor_si256(c2[2508],simde_mm256_xor_si256(c2[2328],simde_mm256_xor_si256(c2[5745],c2[5206]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[189]=_mm256_xor_si256(c2[3422],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[2705],_mm256_xor_si256(c2[4862],_mm256_xor_si256(c2[1821],_mm256_xor_si256(c2[4703],_mm256_xor_si256(c2[1644],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[1842],_mm256_xor_si256(c2[1662],_mm256_xor_si256(c2[3477],_mm256_xor_si256(c2[2579],_mm256_xor_si256(c2[2219],_mm256_xor_si256(c2[4397],_mm256_xor_si256(c2[5477],_mm256_xor_si256(c2[1335],_mm256_xor_si256(c2[1155],_mm256_xor_si256(c2[3334],_mm256_xor_si256(c2[3872],_mm256_xor_si256(c2[3692],_mm256_xor_si256(c2[654],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[4792],_mm256_xor_si256(c2[4612],_mm256_xor_si256(c2[3371],_mm256_xor_si256(c2[490],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[3928],_mm256_xor_si256(c2[5729],_mm256_xor_si256(c2[3210],_mm256_xor_si256(c2[3030],_mm256_xor_si256(c2[2126],_mm256_xor_si256(c2[2868],_mm256_xor_si256(c2[526],_mm256_xor_si256(c2[167],c2[5746]))))))))))))))))))))))))))))))))))));
+     d2[189]=simde_mm256_xor_si256(c2[3422],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[2705],simde_mm256_xor_si256(c2[4862],simde_mm256_xor_si256(c2[1821],simde_mm256_xor_si256(c2[4703],simde_mm256_xor_si256(c2[1644],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[1842],simde_mm256_xor_si256(c2[1662],simde_mm256_xor_si256(c2[3477],simde_mm256_xor_si256(c2[2579],simde_mm256_xor_si256(c2[2219],simde_mm256_xor_si256(c2[4397],simde_mm256_xor_si256(c2[5477],simde_mm256_xor_si256(c2[1335],simde_mm256_xor_si256(c2[1155],simde_mm256_xor_si256(c2[3334],simde_mm256_xor_si256(c2[3872],simde_mm256_xor_si256(c2[3692],simde_mm256_xor_si256(c2[654],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[4792],simde_mm256_xor_si256(c2[4612],simde_mm256_xor_si256(c2[3371],simde_mm256_xor_si256(c2[490],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[3928],simde_mm256_xor_si256(c2[5729],simde_mm256_xor_si256(c2[3210],simde_mm256_xor_si256(c2[3030],simde_mm256_xor_si256(c2[2126],simde_mm256_xor_si256(c2[2868],simde_mm256_xor_si256(c2[526],simde_mm256_xor_si256(c2[167],c2[5746]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[198]=_mm256_xor_si256(c2[3979],c2[760]);
+     d2[198]=simde_mm256_xor_si256(c2[3979],c2[760]);
 
 //row: 23
-     d2[207]=_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[3477],c2[4411]));
+     d2[207]=simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[3477],c2[4411]));
 
 //row: 24
-     d2[216]=_mm256_xor_si256(c2[5418],_mm256_xor_si256(c2[1837],c2[1065]));
+     d2[216]=simde_mm256_xor_si256(c2[5418],simde_mm256_xor_si256(c2[1837],c2[1065]));
 
 //row: 25
-     d2[225]=_mm256_xor_si256(c2[4],c2[5671]);
+     d2[225]=simde_mm256_xor_si256(c2[4],c2[5671]);
 
 //row: 26
-     d2[234]=_mm256_xor_si256(c2[1263],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[4143],_mm256_xor_si256(c2[3963],_mm256_xor_si256(c2[4144],_mm256_xor_si256(c2[366],_mm256_xor_si256(c2[547],_mm256_xor_si256(c2[5421],_mm256_xor_si256(c2[5241],_mm256_xor_si256(c2[5422],_mm256_xor_si256(c2[2364],_mm256_xor_si256(c2[2545],_mm256_xor_si256(c2[4884],_mm256_xor_si256(c2[5245],_mm256_xor_si256(c2[5065],_mm256_xor_si256(c2[4540],_mm256_xor_si256(c2[4360],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[5082],_mm256_xor_si256(c2[5443],_mm256_xor_si256(c2[5263],_mm256_xor_si256(c2[2560],_mm256_xor_si256(c2[1318],_mm256_xor_si256(c2[1138],_mm256_xor_si256(c2[1319],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[5639],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[2238],_mm256_xor_si256(c2[2058],_mm256_xor_si256(c2[2239],_mm256_xor_si256(c2[3138],_mm256_xor_si256(c2[3319],_mm256_xor_si256(c2[4575],_mm256_xor_si256(c2[4936],_mm256_xor_si256(c2[4756],_mm256_xor_si256(c2[1175],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[1353],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[1534],_mm256_xor_si256(c2[4254],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[4255],_mm256_xor_si256(c2[4433],_mm256_xor_si256(c2[4253],_mm256_xor_si256(c2[4434],_mm256_xor_si256(c2[2273],_mm256_xor_si256(c2[2634],_mm256_xor_si256(c2[2454],_mm256_xor_si256(c2[1212],_mm256_xor_si256(c2[1032],_mm256_xor_si256(c2[1213],_mm256_xor_si256(c2[3730],_mm256_xor_si256(c2[4091],_mm256_xor_si256(c2[3911],_mm256_xor_si256(c2[5166],_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[1589],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[3390],_mm256_xor_si256(c2[3571],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[1052],_mm256_xor_si256(c2[872],_mm256_xor_si256(c2[709],_mm256_xor_si256(c2[529],_mm256_xor_si256(c2[710],_mm256_xor_si256(c2[4126],_mm256_xor_si256(c2[3946],_mm256_xor_si256(c2[4127],_mm256_xor_si256(c2[3407],_mm256_xor_si256(c2[3768],c2[3588])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[234]=simde_mm256_xor_si256(c2[1263],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[4143],simde_mm256_xor_si256(c2[3963],simde_mm256_xor_si256(c2[4144],simde_mm256_xor_si256(c2[366],simde_mm256_xor_si256(c2[547],simde_mm256_xor_si256(c2[5421],simde_mm256_xor_si256(c2[5241],simde_mm256_xor_si256(c2[5422],simde_mm256_xor_si256(c2[2364],simde_mm256_xor_si256(c2[2545],simde_mm256_xor_si256(c2[4884],simde_mm256_xor_si256(c2[5245],simde_mm256_xor_si256(c2[5065],simde_mm256_xor_si256(c2[4540],simde_mm256_xor_si256(c2[4360],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[5082],simde_mm256_xor_si256(c2[5443],simde_mm256_xor_si256(c2[5263],simde_mm256_xor_si256(c2[2560],simde_mm256_xor_si256(c2[1318],simde_mm256_xor_si256(c2[1138],simde_mm256_xor_si256(c2[1319],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[5639],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[2238],simde_mm256_xor_si256(c2[2058],simde_mm256_xor_si256(c2[2239],simde_mm256_xor_si256(c2[3138],simde_mm256_xor_si256(c2[3319],simde_mm256_xor_si256(c2[4575],simde_mm256_xor_si256(c2[4936],simde_mm256_xor_si256(c2[4756],simde_mm256_xor_si256(c2[1175],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[1353],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[1534],simde_mm256_xor_si256(c2[4254],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[4255],simde_mm256_xor_si256(c2[4433],simde_mm256_xor_si256(c2[4253],simde_mm256_xor_si256(c2[4434],simde_mm256_xor_si256(c2[2273],simde_mm256_xor_si256(c2[2634],simde_mm256_xor_si256(c2[2454],simde_mm256_xor_si256(c2[1212],simde_mm256_xor_si256(c2[1032],simde_mm256_xor_si256(c2[1213],simde_mm256_xor_si256(c2[3730],simde_mm256_xor_si256(c2[4091],simde_mm256_xor_si256(c2[3911],simde_mm256_xor_si256(c2[5166],simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[1589],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[3390],simde_mm256_xor_si256(c2[3571],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[1052],simde_mm256_xor_si256(c2[872],simde_mm256_xor_si256(c2[709],simde_mm256_xor_si256(c2[529],simde_mm256_xor_si256(c2[710],simde_mm256_xor_si256(c2[4126],simde_mm256_xor_si256(c2[3946],simde_mm256_xor_si256(c2[4127],simde_mm256_xor_si256(c2[3407],simde_mm256_xor_si256(c2[3768],c2[3588])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[243]=_mm256_xor_si256(c2[2340],c2[1908]);
+     d2[243]=simde_mm256_xor_si256(c2[2340],c2[1908]);
 
 //row: 28
-     d2[252]=_mm256_xor_si256(c2[1821],_mm256_xor_si256(c2[2378],c2[2071]));
+     d2[252]=simde_mm256_xor_si256(c2[1821],simde_mm256_xor_si256(c2[2378],c2[2071]));
 
 //row: 29
-     d2[261]=_mm256_xor_si256(c2[904],c2[4572]);
+     d2[261]=simde_mm256_xor_si256(c2[904],c2[4572]);
 
 //row: 30
-     d2[270]=_mm256_xor_si256(c2[4178],_mm256_xor_si256(c2[4411],_mm256_xor_si256(c2[1569],c2[1244])));
+     d2[270]=simde_mm256_xor_si256(c2[4178],simde_mm256_xor_si256(c2[4411],simde_mm256_xor_si256(c2[1569],c2[1244])));
 
 //row: 31
-     d2[279]=_mm256_xor_si256(c2[2344],_mm256_xor_si256(c2[5224],_mm256_xor_si256(c2[1627],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[3625],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[2900],_mm256_xor_si256(c2[5621],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[584],_mm256_xor_si256(c2[2399],_mm256_xor_si256(c2[1501],_mm256_xor_si256(c2[1141],_mm256_xor_si256(c2[3319],_mm256_xor_si256(c2[4399],_mm256_xor_si256(c2[257],_mm256_xor_si256(c2[77],_mm256_xor_si256(c2[2256],_mm256_xor_si256(c2[2794],_mm256_xor_si256(c2[2614],_mm256_xor_si256(c2[5335],_mm256_xor_si256(c2[5514],_mm256_xor_si256(c2[3714],_mm256_xor_si256(c2[3534],_mm256_xor_si256(c2[2293],_mm256_xor_si256(c2[5171],_mm256_xor_si256(c2[4991],_mm256_xor_si256(c2[2850],_mm256_xor_si256(c2[4651],_mm256_xor_si256(c2[2132],_mm256_xor_si256(c2[1952],_mm256_xor_si256(c2[1790],_mm256_xor_si256(c2[5207],_mm256_xor_si256(c2[4848],c2[4668])))))))))))))))))))))))))))))))))));
+     d2[279]=simde_mm256_xor_si256(c2[2344],simde_mm256_xor_si256(c2[5224],simde_mm256_xor_si256(c2[1627],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[3625],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[2900],simde_mm256_xor_si256(c2[5621],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[584],simde_mm256_xor_si256(c2[2399],simde_mm256_xor_si256(c2[1501],simde_mm256_xor_si256(c2[1141],simde_mm256_xor_si256(c2[3319],simde_mm256_xor_si256(c2[4399],simde_mm256_xor_si256(c2[257],simde_mm256_xor_si256(c2[77],simde_mm256_xor_si256(c2[2256],simde_mm256_xor_si256(c2[2794],simde_mm256_xor_si256(c2[2614],simde_mm256_xor_si256(c2[5335],simde_mm256_xor_si256(c2[5514],simde_mm256_xor_si256(c2[3714],simde_mm256_xor_si256(c2[3534],simde_mm256_xor_si256(c2[2293],simde_mm256_xor_si256(c2[5171],simde_mm256_xor_si256(c2[4991],simde_mm256_xor_si256(c2[2850],simde_mm256_xor_si256(c2[4651],simde_mm256_xor_si256(c2[2132],simde_mm256_xor_si256(c2[1952],simde_mm256_xor_si256(c2[1790],simde_mm256_xor_si256(c2[5207],simde_mm256_xor_si256(c2[4848],c2[4668])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[288]=_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[4502],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[5423],_mm256_xor_si256(c2[5079],_mm256_xor_si256(c2[4899],_mm256_xor_si256(c2[5621],_mm256_xor_si256(c2[1857],_mm256_xor_si256(c2[1677],_mm256_xor_si256(c2[959],_mm256_xor_si256(c2[779],_mm256_xor_si256(c2[419],_mm256_xor_si256(c2[2777],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[3677],_mm256_xor_si256(c2[5114],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[1534],_mm256_xor_si256(c2[1892],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[4613],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[4792],_mm256_xor_si256(c2[2812],_mm256_xor_si256(c2[1751],_mm256_xor_si256(c2[1571],_mm256_xor_si256(c2[4269],_mm256_xor_si256(c2[2308],_mm256_xor_si256(c2[2128],_mm256_xor_si256(c2[3929],_mm256_xor_si256(c2[1230],_mm256_xor_si256(c2[1248],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[4665],_mm256_xor_si256(c2[4485],c2[3946]))))))))))))))))))))))))))))))))))))))))));
+     d2[288]=simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[4502],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[5423],simde_mm256_xor_si256(c2[5079],simde_mm256_xor_si256(c2[4899],simde_mm256_xor_si256(c2[5621],simde_mm256_xor_si256(c2[1857],simde_mm256_xor_si256(c2[1677],simde_mm256_xor_si256(c2[959],simde_mm256_xor_si256(c2[779],simde_mm256_xor_si256(c2[419],simde_mm256_xor_si256(c2[2777],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[3677],simde_mm256_xor_si256(c2[5114],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[1534],simde_mm256_xor_si256(c2[1892],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[4613],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[4792],simde_mm256_xor_si256(c2[2812],simde_mm256_xor_si256(c2[1751],simde_mm256_xor_si256(c2[1571],simde_mm256_xor_si256(c2[4269],simde_mm256_xor_si256(c2[2308],simde_mm256_xor_si256(c2[2128],simde_mm256_xor_si256(c2[3929],simde_mm256_xor_si256(c2[1230],simde_mm256_xor_si256(c2[1248],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[4665],simde_mm256_xor_si256(c2[4485],c2[3946]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[297]=_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[3600],_mm256_xor_si256(c2[3],_mm256_xor_si256(c2[4878],_mm256_xor_si256(c2[2001],_mm256_xor_si256(c2[4521],_mm256_xor_si256(c2[3997],_mm256_xor_si256(c2[4719],_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[5636],_mm256_xor_si256(c2[5276],_mm256_xor_si256(c2[1695],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[4212],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[990],_mm256_xor_si256(c2[3711],_mm256_xor_si256(c2[3890],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[669],_mm256_xor_si256(c2[3367],_mm256_xor_si256(c2[1206],_mm256_xor_si256(c2[1226],_mm256_xor_si256(c2[3027],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[166],_mm256_xor_si256(c2[3583],c2[3044]))))))))))))))))))))))))))));
+     d2[297]=simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[3600],simde_mm256_xor_si256(c2[3],simde_mm256_xor_si256(c2[4878],simde_mm256_xor_si256(c2[2001],simde_mm256_xor_si256(c2[4521],simde_mm256_xor_si256(c2[3997],simde_mm256_xor_si256(c2[4719],simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[5636],simde_mm256_xor_si256(c2[5276],simde_mm256_xor_si256(c2[1695],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[4212],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[990],simde_mm256_xor_si256(c2[3711],simde_mm256_xor_si256(c2[3890],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[669],simde_mm256_xor_si256(c2[3367],simde_mm256_xor_si256(c2[1206],simde_mm256_xor_si256(c2[1226],simde_mm256_xor_si256(c2[3027],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[166],simde_mm256_xor_si256(c2[3583],c2[3044]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[306]=_mm256_xor_si256(c2[5040],_mm256_xor_si256(c2[4860],_mm256_xor_si256(c2[3603],_mm256_xor_si256(c2[2161],_mm256_xor_si256(c2[1981],_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[4143],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[3439],_mm256_xor_si256(c2[3259],_mm256_xor_si256(c2[2002],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[4884],_mm256_xor_si256(c2[2902],_mm256_xor_si256(c2[1825],_mm256_xor_si256(c2[1645],_mm256_xor_si256(c2[2558],_mm256_xor_si256(c2[2378],_mm256_xor_si256(c2[1121],_mm256_xor_si256(c2[3100],_mm256_xor_si256(c2[2023],_mm256_xor_si256(c2[1843],_mm256_xor_si256(c2[5095],_mm256_xor_si256(c2[4915],_mm256_xor_si256(c2[3658],_mm256_xor_si256(c2[4197],_mm256_xor_si256(c2[4017],_mm256_xor_si256(c2[2760],_mm256_xor_si256(c2[3657],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[4578],_mm256_xor_si256(c2[1156],_mm256_xor_si256(c2[5658],_mm256_xor_si256(c2[2593],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[1336],_mm256_xor_si256(c2[4952],_mm256_xor_si256(c2[4772],_mm256_xor_si256(c2[3515],_mm256_xor_si256(c2[5130],_mm256_xor_si256(c2[4053],_mm256_xor_si256(c2[3873],_mm256_xor_si256(c2[2272],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[835],_mm256_xor_si256(c2[2451],_mm256_xor_si256(c2[2271],_mm256_xor_si256(c2[1014],_mm256_xor_si256(c2[291],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[4793],_mm256_xor_si256(c2[4989],_mm256_xor_si256(c2[4809],_mm256_xor_si256(c2[3552],_mm256_xor_si256(c2[1748],_mm256_xor_si256(c2[671],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[5546],_mm256_xor_si256(c2[5366],_mm256_xor_si256(c2[4109],_mm256_xor_si256(c2[1408],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[4468],_mm256_xor_si256(c2[3391],_mm256_xor_si256(c2[3211],_mm256_xor_si256(c2[4486],_mm256_xor_si256(c2[4306],_mm256_xor_si256(c2[3049],_mm256_xor_si256(c2[2144],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[707],_mm256_xor_si256(c2[1425],_mm256_xor_si256(c2[348],c2[168]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[306]=simde_mm256_xor_si256(c2[5040],simde_mm256_xor_si256(c2[4860],simde_mm256_xor_si256(c2[3603],simde_mm256_xor_si256(c2[2161],simde_mm256_xor_si256(c2[1981],simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[4143],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[3439],simde_mm256_xor_si256(c2[3259],simde_mm256_xor_si256(c2[2002],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[4884],simde_mm256_xor_si256(c2[2902],simde_mm256_xor_si256(c2[1825],simde_mm256_xor_si256(c2[1645],simde_mm256_xor_si256(c2[2558],simde_mm256_xor_si256(c2[2378],simde_mm256_xor_si256(c2[1121],simde_mm256_xor_si256(c2[3100],simde_mm256_xor_si256(c2[2023],simde_mm256_xor_si256(c2[1843],simde_mm256_xor_si256(c2[5095],simde_mm256_xor_si256(c2[4915],simde_mm256_xor_si256(c2[3658],simde_mm256_xor_si256(c2[4197],simde_mm256_xor_si256(c2[4017],simde_mm256_xor_si256(c2[2760],simde_mm256_xor_si256(c2[3657],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[4578],simde_mm256_xor_si256(c2[1156],simde_mm256_xor_si256(c2[5658],simde_mm256_xor_si256(c2[2593],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[1336],simde_mm256_xor_si256(c2[4952],simde_mm256_xor_si256(c2[4772],simde_mm256_xor_si256(c2[3515],simde_mm256_xor_si256(c2[5130],simde_mm256_xor_si256(c2[4053],simde_mm256_xor_si256(c2[3873],simde_mm256_xor_si256(c2[2272],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[835],simde_mm256_xor_si256(c2[2451],simde_mm256_xor_si256(c2[2271],simde_mm256_xor_si256(c2[1014],simde_mm256_xor_si256(c2[291],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[4793],simde_mm256_xor_si256(c2[4989],simde_mm256_xor_si256(c2[4809],simde_mm256_xor_si256(c2[3552],simde_mm256_xor_si256(c2[1748],simde_mm256_xor_si256(c2[671],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[5546],simde_mm256_xor_si256(c2[5366],simde_mm256_xor_si256(c2[4109],simde_mm256_xor_si256(c2[1408],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[4468],simde_mm256_xor_si256(c2[3391],simde_mm256_xor_si256(c2[3211],simde_mm256_xor_si256(c2[4486],simde_mm256_xor_si256(c2[4306],simde_mm256_xor_si256(c2[3049],simde_mm256_xor_si256(c2[2144],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[707],simde_mm256_xor_si256(c2[1425],simde_mm256_xor_si256(c2[348],c2[168]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[315]=_mm256_xor_si256(c2[3],_mm256_xor_si256(c2[5582],_mm256_xor_si256(c2[2703],_mm256_xor_si256(c2[4865],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[3981],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[3624],_mm256_xor_si256(c2[5059],_mm256_xor_si256(c2[3280],_mm256_xor_si256(c2[3100],_mm256_xor_si256(c2[3822],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[5637],_mm256_xor_si256(c2[4739],_mm256_xor_si256(c2[4379],_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[1878],_mm256_xor_si256(c2[3315],_mm256_xor_si256(c2[5494],_mm256_xor_si256(c2[93],_mm256_xor_si256(c2[1714],_mm256_xor_si256(c2[2994],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[2993],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[5531],_mm256_xor_si256(c2[2470],_mm256_xor_si256(c2[329],_mm256_xor_si256(c2[2130],_mm256_xor_si256(c2[5190],_mm256_xor_si256(c2[5208],_mm256_xor_si256(c2[5028],_mm256_xor_si256(c2[2686],c2[2147]))))))))))))))))))))))))))))))))));
+     d2[315]=simde_mm256_xor_si256(c2[3],simde_mm256_xor_si256(c2[5582],simde_mm256_xor_si256(c2[2703],simde_mm256_xor_si256(c2[4865],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[3981],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[3624],simde_mm256_xor_si256(c2[5059],simde_mm256_xor_si256(c2[3280],simde_mm256_xor_si256(c2[3100],simde_mm256_xor_si256(c2[3822],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[5637],simde_mm256_xor_si256(c2[4739],simde_mm256_xor_si256(c2[4379],simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[1878],simde_mm256_xor_si256(c2[3315],simde_mm256_xor_si256(c2[5494],simde_mm256_xor_si256(c2[93],simde_mm256_xor_si256(c2[1714],simde_mm256_xor_si256(c2[2994],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[2993],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[5531],simde_mm256_xor_si256(c2[2470],simde_mm256_xor_si256(c2[329],simde_mm256_xor_si256(c2[2130],simde_mm256_xor_si256(c2[5190],simde_mm256_xor_si256(c2[5208],simde_mm256_xor_si256(c2[5028],simde_mm256_xor_si256(c2[2686],c2[2147]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[324]=_mm256_xor_si256(c2[4503],_mm256_xor_si256(c2[220],c2[4806]));
+     d2[324]=simde_mm256_xor_si256(c2[4503],simde_mm256_xor_si256(c2[220],c2[4806]));
 
 //row: 37
-     d2[333]=_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[4681],_mm256_xor_si256(c2[3423],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[5585],_mm256_xor_si256(c2[3964],_mm256_xor_si256(c2[4701],_mm256_xor_si256(c2[3080],_mm256_xor_si256(c2[1824],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[4344],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[2723],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[2199],_mm256_xor_si256(c2[4542],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[2921],_mm256_xor_si256(c2[598],_mm256_xor_si256(c2[4736],_mm256_xor_si256(c2[5459],_mm256_xor_si256(c2[3838],_mm256_xor_si256(c2[5099],_mm256_xor_si256(c2[3478],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[5656],_mm256_xor_si256(c2[2598],_mm256_xor_si256(c2[977],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[2594],_mm256_xor_si256(c2[2414],_mm256_xor_si256(c2[455],_mm256_xor_si256(c2[4593],_mm256_xor_si256(c2[813],_mm256_xor_si256(c2[5131],_mm256_xor_si256(c2[4951],_mm256_xor_si256(c2[3534],_mm256_xor_si256(c2[1913],_mm256_xor_si256(c2[3713],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[1733],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[492],_mm256_xor_si256(c2[4630],_mm256_xor_si256(c2[3190],_mm256_xor_si256(c2[1749],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[1049],_mm256_xor_si256(c2[5187],_mm256_xor_si256(c2[2850],_mm256_xor_si256(c2[1229],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[4469],_mm256_xor_si256(c2[4289],_mm256_xor_si256(c2[5748],_mm256_xor_si256(c2[4127],_mm256_xor_si256(c2[3406],_mm256_xor_si256(c2[1785],_mm256_xor_si256(c2[2867],_mm256_xor_si256(c2[1426],c2[1246])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[333]=simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[4681],simde_mm256_xor_si256(c2[3423],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[5585],simde_mm256_xor_si256(c2[3964],simde_mm256_xor_si256(c2[4701],simde_mm256_xor_si256(c2[3080],simde_mm256_xor_si256(c2[1824],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[4344],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[2723],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[2199],simde_mm256_xor_si256(c2[4542],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[2921],simde_mm256_xor_si256(c2[598],simde_mm256_xor_si256(c2[4736],simde_mm256_xor_si256(c2[5459],simde_mm256_xor_si256(c2[3838],simde_mm256_xor_si256(c2[5099],simde_mm256_xor_si256(c2[3478],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[5656],simde_mm256_xor_si256(c2[2598],simde_mm256_xor_si256(c2[977],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[2594],simde_mm256_xor_si256(c2[2414],simde_mm256_xor_si256(c2[455],simde_mm256_xor_si256(c2[4593],simde_mm256_xor_si256(c2[813],simde_mm256_xor_si256(c2[5131],simde_mm256_xor_si256(c2[4951],simde_mm256_xor_si256(c2[3534],simde_mm256_xor_si256(c2[1913],simde_mm256_xor_si256(c2[3713],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[1733],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[492],simde_mm256_xor_si256(c2[4630],simde_mm256_xor_si256(c2[3190],simde_mm256_xor_si256(c2[1749],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[1049],simde_mm256_xor_si256(c2[5187],simde_mm256_xor_si256(c2[2850],simde_mm256_xor_si256(c2[1229],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[4469],simde_mm256_xor_si256(c2[4289],simde_mm256_xor_si256(c2[5748],simde_mm256_xor_si256(c2[4127],simde_mm256_xor_si256(c2[3406],simde_mm256_xor_si256(c2[1785],simde_mm256_xor_si256(c2[2867],simde_mm256_xor_si256(c2[1426],c2[1246])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[342]=_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[3420],_mm256_xor_si256(c2[5582],_mm256_xor_si256(c2[4878],_mm256_xor_si256(c2[4698],_mm256_xor_si256(c2[1821],_mm256_xor_si256(c2[4341],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[3997],_mm256_xor_si256(c2[3817],_mm256_xor_si256(c2[4539],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[595],_mm256_xor_si256(c2[5456],_mm256_xor_si256(c2[5096],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[2595],_mm256_xor_si256(c2[4032],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[810],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[3711],_mm256_xor_si256(c2[3531],_mm256_xor_si256(c2[3710],_mm256_xor_si256(c2[1730],_mm256_xor_si256(c2[489],_mm256_xor_si256(c2[3187],_mm256_xor_si256(c2[1046],_mm256_xor_si256(c2[2847],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[166],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[3403],c2[2864]))))))))))))))))))))))))))))))))));
+     d2[342]=simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[3420],simde_mm256_xor_si256(c2[5582],simde_mm256_xor_si256(c2[4878],simde_mm256_xor_si256(c2[4698],simde_mm256_xor_si256(c2[1821],simde_mm256_xor_si256(c2[4341],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[3997],simde_mm256_xor_si256(c2[3817],simde_mm256_xor_si256(c2[4539],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[595],simde_mm256_xor_si256(c2[5456],simde_mm256_xor_si256(c2[5096],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[2595],simde_mm256_xor_si256(c2[4032],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[810],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[3711],simde_mm256_xor_si256(c2[3531],simde_mm256_xor_si256(c2[3710],simde_mm256_xor_si256(c2[1730],simde_mm256_xor_si256(c2[489],simde_mm256_xor_si256(c2[3187],simde_mm256_xor_si256(c2[1046],simde_mm256_xor_si256(c2[2847],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[166],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[3403],c2[2864]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[351]=_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[185],_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[2167],_mm256_xor_si256(c2[4143],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[1283],_mm256_xor_si256(c2[4165],_mm256_xor_si256(c2[926],_mm256_xor_si256(c2[582],_mm256_xor_si256(c2[402],_mm256_xor_si256(c2[1124],_mm256_xor_si256(c2[3119],_mm256_xor_si256(c2[2939],_mm256_xor_si256(c2[2221],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[1681],_mm256_xor_si256(c2[4039],_mm256_xor_si256(c2[3859],_mm256_xor_si256(c2[4939],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[2976],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[3154],_mm256_xor_si256(c2[296],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[475],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[4074],_mm256_xor_si256(c2[3013],_mm256_xor_si256(c2[2833],_mm256_xor_si256(c2[5531],_mm256_xor_si256(c2[127],_mm256_xor_si256(c2[3570],_mm256_xor_si256(c2[3390],_mm256_xor_si256(c2[5191],_mm256_xor_si256(c2[2492],_mm256_xor_si256(c2[2510],_mm256_xor_si256(c2[2330],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[5747],c2[5208]))))))))))))))))))))))))))))))))))))))))));
+     d2[351]=simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[185],simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[2167],simde_mm256_xor_si256(c2[4143],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[1283],simde_mm256_xor_si256(c2[4165],simde_mm256_xor_si256(c2[926],simde_mm256_xor_si256(c2[582],simde_mm256_xor_si256(c2[402],simde_mm256_xor_si256(c2[1124],simde_mm256_xor_si256(c2[3119],simde_mm256_xor_si256(c2[2939],simde_mm256_xor_si256(c2[2221],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[1681],simde_mm256_xor_si256(c2[4039],simde_mm256_xor_si256(c2[3859],simde_mm256_xor_si256(c2[4939],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[2976],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[3154],simde_mm256_xor_si256(c2[296],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[475],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[4074],simde_mm256_xor_si256(c2[3013],simde_mm256_xor_si256(c2[2833],simde_mm256_xor_si256(c2[5531],simde_mm256_xor_si256(c2[127],simde_mm256_xor_si256(c2[3570],simde_mm256_xor_si256(c2[3390],simde_mm256_xor_si256(c2[5191],simde_mm256_xor_si256(c2[2492],simde_mm256_xor_si256(c2[2510],simde_mm256_xor_si256(c2[2330],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[5747],c2[5208]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[360]=_mm256_xor_si256(c2[1982],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[4862],_mm256_xor_si256(c2[4323],_mm256_xor_si256(c2[1265],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[381],_mm256_xor_si256(c2[5601],_mm256_xor_si256(c2[3263],_mm256_xor_si256(c2[2724],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[5424],_mm256_xor_si256(c2[5244],_mm256_xor_si256(c2[5259],_mm256_xor_si256(c2[4720],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[5622],_mm256_xor_si256(c2[5442],_mm256_xor_si256(c2[1116],_mm256_xor_si256(c2[2037],_mm256_xor_si256(c2[1498],_mm256_xor_si256(c2[1139],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[779],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[2957],_mm256_xor_si256(c2[2418],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[3498],_mm256_xor_si256(c2[5474],_mm256_xor_si256(c2[5115],_mm256_xor_si256(c2[4935],_mm256_xor_si256(c2[1894],_mm256_xor_si256(c2[1355],_mm256_xor_si256(c2[2252],_mm256_xor_si256(c2[1893],_mm256_xor_si256(c2[1713],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[4434],_mm256_xor_si256(c2[5152],_mm256_xor_si256(c2[4613],_mm256_xor_si256(c2[3172],_mm256_xor_si256(c2[2813],_mm256_xor_si256(c2[2633],_mm256_xor_si256(c2[1931],_mm256_xor_si256(c2[1392],_mm256_xor_si256(c2[4629],_mm256_xor_si256(c2[4270],_mm256_xor_si256(c2[4090],_mm256_xor_si256(c2[2488],_mm256_xor_si256(c2[1949],_mm256_xor_si256(c2[4289],_mm256_xor_si256(c2[3750],_mm256_xor_si256(c2[1590],_mm256_xor_si256(c2[1231],_mm256_xor_si256(c2[1051],_mm256_xor_si256(c2[1428],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[4845],_mm256_xor_si256(c2[4306],_mm256_xor_si256(c2[4306],_mm256_xor_si256(c2[3947],c2[3767]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[360]=simde_mm256_xor_si256(c2[1982],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[4862],simde_mm256_xor_si256(c2[4323],simde_mm256_xor_si256(c2[1265],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[381],simde_mm256_xor_si256(c2[5601],simde_mm256_xor_si256(c2[3263],simde_mm256_xor_si256(c2[2724],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[5424],simde_mm256_xor_si256(c2[5244],simde_mm256_xor_si256(c2[5259],simde_mm256_xor_si256(c2[4720],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[5622],simde_mm256_xor_si256(c2[5442],simde_mm256_xor_si256(c2[1116],simde_mm256_xor_si256(c2[2037],simde_mm256_xor_si256(c2[1498],simde_mm256_xor_si256(c2[1139],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[779],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[2957],simde_mm256_xor_si256(c2[2418],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[3498],simde_mm256_xor_si256(c2[5474],simde_mm256_xor_si256(c2[5115],simde_mm256_xor_si256(c2[4935],simde_mm256_xor_si256(c2[1894],simde_mm256_xor_si256(c2[1355],simde_mm256_xor_si256(c2[2252],simde_mm256_xor_si256(c2[1893],simde_mm256_xor_si256(c2[1713],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[4434],simde_mm256_xor_si256(c2[5152],simde_mm256_xor_si256(c2[4613],simde_mm256_xor_si256(c2[3172],simde_mm256_xor_si256(c2[2813],simde_mm256_xor_si256(c2[2633],simde_mm256_xor_si256(c2[1931],simde_mm256_xor_si256(c2[1392],simde_mm256_xor_si256(c2[4629],simde_mm256_xor_si256(c2[4270],simde_mm256_xor_si256(c2[4090],simde_mm256_xor_si256(c2[2488],simde_mm256_xor_si256(c2[1949],simde_mm256_xor_si256(c2[4289],simde_mm256_xor_si256(c2[3750],simde_mm256_xor_si256(c2[1590],simde_mm256_xor_si256(c2[1231],simde_mm256_xor_si256(c2[1051],simde_mm256_xor_si256(c2[1428],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[4845],simde_mm256_xor_si256(c2[4306],simde_mm256_xor_si256(c2[4306],simde_mm256_xor_si256(c2[3947],c2[3767]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[369]=_mm256_xor_si256(c2[3960],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[2359],_mm256_xor_si256(c2[2179],_mm256_xor_si256(c2[5061],_mm256_xor_si256(c2[1822],_mm256_xor_si256(c2[2899],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[1298],_mm256_xor_si256(c2[2020],_mm256_xor_si256(c2[4015],_mm256_xor_si256(c2[3835],_mm256_xor_si256(c2[2937],_mm256_xor_si256(c2[2577],_mm256_xor_si256(c2[4755],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[3692],_mm256_xor_si256(c2[4050],_mm256_xor_si256(c2[2791],_mm256_xor_si256(c2[1192],_mm256_xor_si256(c2[1012],_mm256_xor_si256(c2[1191],_mm256_xor_si256(c2[4970],_mm256_xor_si256(c2[3729],_mm256_xor_si256(c2[668],_mm256_xor_si256(c2[4286],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[3406],_mm256_xor_si256(c2[3226],_mm256_xor_si256(c2[884],c2[345]))))))))))))))))))))))))))))))))));
+     d2[369]=simde_mm256_xor_si256(c2[3960],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[2359],simde_mm256_xor_si256(c2[2179],simde_mm256_xor_si256(c2[5061],simde_mm256_xor_si256(c2[1822],simde_mm256_xor_si256(c2[2899],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[1298],simde_mm256_xor_si256(c2[2020],simde_mm256_xor_si256(c2[4015],simde_mm256_xor_si256(c2[3835],simde_mm256_xor_si256(c2[2937],simde_mm256_xor_si256(c2[2577],simde_mm256_xor_si256(c2[4755],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[3692],simde_mm256_xor_si256(c2[4050],simde_mm256_xor_si256(c2[2791],simde_mm256_xor_si256(c2[1192],simde_mm256_xor_si256(c2[1012],simde_mm256_xor_si256(c2[1191],simde_mm256_xor_si256(c2[4970],simde_mm256_xor_si256(c2[3729],simde_mm256_xor_si256(c2[668],simde_mm256_xor_si256(c2[4286],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[3406],simde_mm256_xor_si256(c2[3226],simde_mm256_xor_si256(c2[884],c2[345]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc320_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc320_byte.c
index d063f5a3bc3adc4d1b61612bd0241b031e25fdb8..2fcdf136fcf0a15d127625a5faf8e7d7674e229c 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc320_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc320_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc320_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[6209],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[5822],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[6249],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[6269],_mm256_xor_si256(c2[5463],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[6309],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[6329],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[6349],_mm256_xor_si256(c2[4941],_mm256_xor_si256(c2[6369],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[4581],c2[2781]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[6209],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[5822],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[6249],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[6269],simde_mm256_xor_si256(c2[5463],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[6309],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[6329],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[6349],simde_mm256_xor_si256(c2[4941],simde_mm256_xor_si256(c2[6369],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[4581],c2[2781]))))))))))))))))))))))))));
 
 //row: 1
-     d2[10]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[6209],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[5822],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[6249],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[6269],_mm256_xor_si256(c2[5463],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[6309],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[6329],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[6349],_mm256_xor_si256(c2[4941],_mm256_xor_si256(c2[6369],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[4581],c2[2781]))))))))))))))))))))))))))))))));
+     d2[10]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[6209],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[5822],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[6249],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[6269],simde_mm256_xor_si256(c2[5463],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[6309],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[6329],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[6349],simde_mm256_xor_si256(c2[4941],simde_mm256_xor_si256(c2[6369],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[4581],c2[2781]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[20]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[6209],_mm256_xor_si256(c2[1804],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[5822],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[6249],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[6269],_mm256_xor_si256(c2[5663],_mm256_xor_si256(c2[5463],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[6309],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[6329],_mm256_xor_si256(c2[4922],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[6349],_mm256_xor_si256(c2[4941],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[6369],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[4781],_mm256_xor_si256(c2[4581],c2[2781]))))))))))))))))))))))))))))))))))))))));
+     d2[20]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[6209],simde_mm256_xor_si256(c2[1804],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[5822],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[6249],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[6269],simde_mm256_xor_si256(c2[5663],simde_mm256_xor_si256(c2[5463],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[6309],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[6329],simde_mm256_xor_si256(c2[4922],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[6349],simde_mm256_xor_si256(c2[4941],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[6369],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[4781],simde_mm256_xor_si256(c2[4581],c2[2781]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[30]=_mm256_xor_si256(c2[6209],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[5822],_mm256_xor_si256(c2[1221],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[6249],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[6269],_mm256_xor_si256(c2[5463],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[1283],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[6309],_mm256_xor_si256(c2[3704],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[6329],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[6349],_mm256_xor_si256(c2[5141],_mm256_xor_si256(c2[4941],_mm256_xor_si256(c2[6369],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[4361],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[2981],c2[2781]))))))))))))))))))))))))))))))))));
+     d2[30]=simde_mm256_xor_si256(c2[6209],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[5822],simde_mm256_xor_si256(c2[1221],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[6249],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[6269],simde_mm256_xor_si256(c2[5463],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[1283],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[6309],simde_mm256_xor_si256(c2[3704],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[6329],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[6349],simde_mm256_xor_si256(c2[5141],simde_mm256_xor_si256(c2[4941],simde_mm256_xor_si256(c2[6369],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[4361],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[2981],c2[2781]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[40]=_mm256_xor_si256(c2[5804],_mm256_xor_si256(c2[5604],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[3205],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[5824],_mm256_xor_si256(c2[5624],_mm256_xor_si256(c2[5227],_mm256_xor_si256(c2[426],_mm256_xor_si256(c2[1624],_mm256_xor_si256(c2[5844],_mm256_xor_si256(c2[5644],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[5864],_mm256_xor_si256(c2[5664],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[6267],_mm256_xor_si256(c2[5684],_mm256_xor_si256(c2[1085],_mm256_xor_si256(c2[488],_mm256_xor_si256(c2[5704],_mm256_xor_si256(c2[2909],_mm256_xor_si256(c2[5924],_mm256_xor_si256(c2[5724],_mm256_xor_si256(c2[4127],_mm256_xor_si256(c2[1725],_mm256_xor_si256(c2[5744],_mm256_xor_si256(c2[4346],_mm256_xor_si256(c2[5764],_mm256_xor_si256(c2[1768],_mm256_xor_si256(c2[3566],_mm256_xor_si256(c2[5984],_mm256_xor_si256(c2[5784],_mm256_xor_si256(c2[3986],c2[2186]))))))))))))))))))))))))))))))))));
+     d2[40]=simde_mm256_xor_si256(c2[5804],simde_mm256_xor_si256(c2[5604],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[3205],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[5824],simde_mm256_xor_si256(c2[5624],simde_mm256_xor_si256(c2[5227],simde_mm256_xor_si256(c2[426],simde_mm256_xor_si256(c2[1624],simde_mm256_xor_si256(c2[5844],simde_mm256_xor_si256(c2[5644],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[5864],simde_mm256_xor_si256(c2[5664],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[6267],simde_mm256_xor_si256(c2[5684],simde_mm256_xor_si256(c2[1085],simde_mm256_xor_si256(c2[488],simde_mm256_xor_si256(c2[5704],simde_mm256_xor_si256(c2[2909],simde_mm256_xor_si256(c2[5924],simde_mm256_xor_si256(c2[5724],simde_mm256_xor_si256(c2[4127],simde_mm256_xor_si256(c2[1725],simde_mm256_xor_si256(c2[5744],simde_mm256_xor_si256(c2[4346],simde_mm256_xor_si256(c2[5764],simde_mm256_xor_si256(c2[1768],simde_mm256_xor_si256(c2[3566],simde_mm256_xor_si256(c2[5984],simde_mm256_xor_si256(c2[5784],simde_mm256_xor_si256(c2[3986],c2[2186]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[50]=_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[6201],_mm256_xor_si256(c2[1606],_mm256_xor_si256(c2[3802],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[6221],_mm256_xor_si256(c2[5824],_mm256_xor_si256(c2[1023],_mm256_xor_si256(c2[624],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[6241],_mm256_xor_si256(c2[2842],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[6261],_mm256_xor_si256(c2[5465],_mm256_xor_si256(c2[465],_mm256_xor_si256(c2[6281],_mm256_xor_si256(c2[1682],_mm256_xor_si256(c2[1085],_mm256_xor_si256(c2[6301],_mm256_xor_si256(c2[3506],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[6321],_mm256_xor_si256(c2[4724],_mm256_xor_si256(c2[2322],_mm256_xor_si256(c2[6341],_mm256_xor_si256(c2[4943],_mm256_xor_si256(c2[2744],_mm256_xor_si256(c2[6361],_mm256_xor_si256(c2[2365],_mm256_xor_si256(c2[4163],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[6381],_mm256_xor_si256(c2[4583],c2[2783]))))))))))))))))))))))))))))))))))));
+     d2[50]=simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[6201],simde_mm256_xor_si256(c2[1606],simde_mm256_xor_si256(c2[3802],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[6221],simde_mm256_xor_si256(c2[5824],simde_mm256_xor_si256(c2[1023],simde_mm256_xor_si256(c2[624],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[6241],simde_mm256_xor_si256(c2[2842],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[6261],simde_mm256_xor_si256(c2[5465],simde_mm256_xor_si256(c2[465],simde_mm256_xor_si256(c2[6281],simde_mm256_xor_si256(c2[1682],simde_mm256_xor_si256(c2[1085],simde_mm256_xor_si256(c2[6301],simde_mm256_xor_si256(c2[3506],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[6321],simde_mm256_xor_si256(c2[4724],simde_mm256_xor_si256(c2[2322],simde_mm256_xor_si256(c2[6341],simde_mm256_xor_si256(c2[4943],simde_mm256_xor_si256(c2[2744],simde_mm256_xor_si256(c2[6361],simde_mm256_xor_si256(c2[2365],simde_mm256_xor_si256(c2[4163],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[6381],simde_mm256_xor_si256(c2[4583],c2[2783]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[60]=_mm256_xor_si256(c2[4004],_mm256_xor_si256(c2[3804],_mm256_xor_si256(c2[5608],_mm256_xor_si256(c2[1405],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[4024],_mm256_xor_si256(c2[3824],_mm256_xor_si256(c2[3427],_mm256_xor_si256(c2[5025],_mm256_xor_si256(c2[4044],_mm256_xor_si256(c2[3844],_mm256_xor_si256(c2[445],_mm256_xor_si256(c2[4064],_mm256_xor_si256(c2[3864],_mm256_xor_si256(c2[3068],_mm256_xor_si256(c2[4467],_mm256_xor_si256(c2[3884],_mm256_xor_si256(c2[5684],_mm256_xor_si256(c2[5087],_mm256_xor_si256(c2[3904],_mm256_xor_si256(c2[1109],_mm256_xor_si256(c2[5703],_mm256_xor_si256(c2[4124],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[2327],_mm256_xor_si256(c2[6324],_mm256_xor_si256(c2[3944],_mm256_xor_si256(c2[2546],_mm256_xor_si256(c2[743],_mm256_xor_si256(c2[3964],_mm256_xor_si256(c2[6367],_mm256_xor_si256(c2[1766],_mm256_xor_si256(c2[4184],_mm256_xor_si256(c2[3984],_mm256_xor_si256(c2[2186],_mm256_xor_si256(c2[386],c2[2781]))))))))))))))))))))))))))))))))))));
+     d2[60]=simde_mm256_xor_si256(c2[4004],simde_mm256_xor_si256(c2[3804],simde_mm256_xor_si256(c2[5608],simde_mm256_xor_si256(c2[1405],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[4024],simde_mm256_xor_si256(c2[3824],simde_mm256_xor_si256(c2[3427],simde_mm256_xor_si256(c2[5025],simde_mm256_xor_si256(c2[4044],simde_mm256_xor_si256(c2[3844],simde_mm256_xor_si256(c2[445],simde_mm256_xor_si256(c2[4064],simde_mm256_xor_si256(c2[3864],simde_mm256_xor_si256(c2[3068],simde_mm256_xor_si256(c2[4467],simde_mm256_xor_si256(c2[3884],simde_mm256_xor_si256(c2[5684],simde_mm256_xor_si256(c2[5087],simde_mm256_xor_si256(c2[3904],simde_mm256_xor_si256(c2[1109],simde_mm256_xor_si256(c2[5703],simde_mm256_xor_si256(c2[4124],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[2327],simde_mm256_xor_si256(c2[6324],simde_mm256_xor_si256(c2[3944],simde_mm256_xor_si256(c2[2546],simde_mm256_xor_si256(c2[743],simde_mm256_xor_si256(c2[3964],simde_mm256_xor_si256(c2[6367],simde_mm256_xor_si256(c2[1766],simde_mm256_xor_si256(c2[4184],simde_mm256_xor_si256(c2[3984],simde_mm256_xor_si256(c2[2186],simde_mm256_xor_si256(c2[386],c2[2781]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[70]=_mm256_xor_si256(c2[3],_mm256_xor_si256(c2[6202],_mm256_xor_si256(c2[2602],_mm256_xor_si256(c2[1607],_mm256_xor_si256(c2[4406],_mm256_xor_si256(c2[3803],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[23],_mm256_xor_si256(c2[6222],_mm256_xor_si256(c2[2622],_mm256_xor_si256(c2[5825],_mm256_xor_si256(c2[2225],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[4023],_mm256_xor_si256(c2[3823],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[6242],_mm256_xor_si256(c2[2642],_mm256_xor_si256(c2[2843],_mm256_xor_si256(c2[5842],_mm256_xor_si256(c2[5642],_mm256_xor_si256(c2[63],_mm256_xor_si256(c2[6262],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[5466],_mm256_xor_si256(c2[1866],_mm256_xor_si256(c2[466],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[6282],_mm256_xor_si256(c2[2682],_mm256_xor_si256(c2[1683],_mm256_xor_si256(c2[4482],_mm256_xor_si256(c2[1086],_mm256_xor_si256(c2[4085],_mm256_xor_si256(c2[3885],_mm256_xor_si256(c2[6302],_mm256_xor_si256(c2[2702],_mm256_xor_si256(c2[3507],_mm256_xor_si256(c2[107],_mm256_xor_si256(c2[6306],_mm256_xor_si256(c2[2701],_mm256_xor_si256(c2[123],_mm256_xor_si256(c2[6322],_mm256_xor_si256(c2[2722],_mm256_xor_si256(c2[4725],_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[2323],_mm256_xor_si256(c2[5322],_mm256_xor_si256(c2[5122],_mm256_xor_si256(c2[6342],_mm256_xor_si256(c2[2742],_mm256_xor_si256(c2[4944],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[4144],_mm256_xor_si256(c2[6362],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[2366],_mm256_xor_si256(c2[5165],_mm256_xor_si256(c2[4164],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[183],_mm256_xor_si256(c2[6382],_mm256_xor_si256(c2[2782],_mm256_xor_si256(c2[4584],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[2784],_mm256_xor_si256(c2[5783],c2[5583]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[70]=simde_mm256_xor_si256(c2[3],simde_mm256_xor_si256(c2[6202],simde_mm256_xor_si256(c2[2602],simde_mm256_xor_si256(c2[1607],simde_mm256_xor_si256(c2[4406],simde_mm256_xor_si256(c2[3803],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[23],simde_mm256_xor_si256(c2[6222],simde_mm256_xor_si256(c2[2622],simde_mm256_xor_si256(c2[5825],simde_mm256_xor_si256(c2[2225],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[4023],simde_mm256_xor_si256(c2[3823],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[6242],simde_mm256_xor_si256(c2[2642],simde_mm256_xor_si256(c2[2843],simde_mm256_xor_si256(c2[5842],simde_mm256_xor_si256(c2[5642],simde_mm256_xor_si256(c2[63],simde_mm256_xor_si256(c2[6262],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[5466],simde_mm256_xor_si256(c2[1866],simde_mm256_xor_si256(c2[466],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[6282],simde_mm256_xor_si256(c2[2682],simde_mm256_xor_si256(c2[1683],simde_mm256_xor_si256(c2[4482],simde_mm256_xor_si256(c2[1086],simde_mm256_xor_si256(c2[4085],simde_mm256_xor_si256(c2[3885],simde_mm256_xor_si256(c2[6302],simde_mm256_xor_si256(c2[2702],simde_mm256_xor_si256(c2[3507],simde_mm256_xor_si256(c2[107],simde_mm256_xor_si256(c2[6306],simde_mm256_xor_si256(c2[2701],simde_mm256_xor_si256(c2[123],simde_mm256_xor_si256(c2[6322],simde_mm256_xor_si256(c2[2722],simde_mm256_xor_si256(c2[4725],simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[2323],simde_mm256_xor_si256(c2[5322],simde_mm256_xor_si256(c2[5122],simde_mm256_xor_si256(c2[6342],simde_mm256_xor_si256(c2[2742],simde_mm256_xor_si256(c2[4944],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[4144],simde_mm256_xor_si256(c2[6362],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[2366],simde_mm256_xor_si256(c2[5165],simde_mm256_xor_si256(c2[4164],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[183],simde_mm256_xor_si256(c2[6382],simde_mm256_xor_si256(c2[2782],simde_mm256_xor_si256(c2[4584],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[2784],simde_mm256_xor_si256(c2[5783],c2[5583]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[80]=_mm256_xor_si256(c2[4602],_mm256_xor_si256(c2[4402],_mm256_xor_si256(c2[7],_mm256_xor_si256(c2[6206],_mm256_xor_si256(c2[2003],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[4622],_mm256_xor_si256(c2[4422],_mm256_xor_si256(c2[4025],_mm256_xor_si256(c2[5623],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[4642],_mm256_xor_si256(c2[4442],_mm256_xor_si256(c2[1043],_mm256_xor_si256(c2[4662],_mm256_xor_si256(c2[4462],_mm256_xor_si256(c2[3866],_mm256_xor_si256(c2[3666],_mm256_xor_si256(c2[5065],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[4482],_mm256_xor_si256(c2[6282],_mm256_xor_si256(c2[5685],_mm256_xor_si256(c2[4702],_mm256_xor_si256(c2[4502],_mm256_xor_si256(c2[1707],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[4522],_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[2925],_mm256_xor_si256(c2[523],_mm256_xor_si256(c2[4742],_mm256_xor_si256(c2[4542],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[4762],_mm256_xor_si256(c2[4562],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[2364],_mm256_xor_si256(c2[4782],_mm256_xor_si256(c2[4582],_mm256_xor_si256(c2[2984],_mm256_xor_si256(c2[2784],c2[984]))))))))))))))))))))))))))))))))))))))))));
+     d2[80]=simde_mm256_xor_si256(c2[4602],simde_mm256_xor_si256(c2[4402],simde_mm256_xor_si256(c2[7],simde_mm256_xor_si256(c2[6206],simde_mm256_xor_si256(c2[2003],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[4622],simde_mm256_xor_si256(c2[4422],simde_mm256_xor_si256(c2[4025],simde_mm256_xor_si256(c2[5623],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[4642],simde_mm256_xor_si256(c2[4442],simde_mm256_xor_si256(c2[1043],simde_mm256_xor_si256(c2[4662],simde_mm256_xor_si256(c2[4462],simde_mm256_xor_si256(c2[3866],simde_mm256_xor_si256(c2[3666],simde_mm256_xor_si256(c2[5065],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[4482],simde_mm256_xor_si256(c2[6282],simde_mm256_xor_si256(c2[5685],simde_mm256_xor_si256(c2[4702],simde_mm256_xor_si256(c2[4502],simde_mm256_xor_si256(c2[1707],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[4522],simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[2925],simde_mm256_xor_si256(c2[523],simde_mm256_xor_si256(c2[4742],simde_mm256_xor_si256(c2[4542],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[4762],simde_mm256_xor_si256(c2[4562],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[2364],simde_mm256_xor_si256(c2[4782],simde_mm256_xor_si256(c2[4582],simde_mm256_xor_si256(c2[2984],simde_mm256_xor_si256(c2[2784],c2[984]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[90]=_mm256_xor_si256(c2[3601],_mm256_xor_si256(c2[4202],_mm256_xor_si256(c2[4002],_mm256_xor_si256(c2[5405],_mm256_xor_si256(c2[5806],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1603],_mm256_xor_si256(c2[3621],_mm256_xor_si256(c2[4222],_mm256_xor_si256(c2[4022],_mm256_xor_si256(c2[3224],_mm256_xor_si256(c2[3625],_mm256_xor_si256(c2[4822],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[4242],_mm256_xor_si256(c2[4042],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[3661],_mm256_xor_si256(c2[4262],_mm256_xor_si256(c2[4062],_mm256_xor_si256(c2[2865],_mm256_xor_si256(c2[3266],_mm256_xor_si256(c2[4264],_mm256_xor_si256(c2[4665],_mm256_xor_si256(c2[3681],_mm256_xor_si256(c2[4082],_mm256_xor_si256(c2[5481],_mm256_xor_si256(c2[5882],_mm256_xor_si256(c2[4884],_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[4102],_mm256_xor_si256(c2[906],_mm256_xor_si256(c2[1307],_mm256_xor_si256(c2[3721],_mm256_xor_si256(c2[4322],_mm256_xor_si256(c2[4122],_mm256_xor_si256(c2[2124],_mm256_xor_si256(c2[2525],_mm256_xor_si256(c2[6121],_mm256_xor_si256(c2[123],_mm256_xor_si256(c2[3741],_mm256_xor_si256(c2[4142],_mm256_xor_si256(c2[2343],_mm256_xor_si256(c2[2744],_mm256_xor_si256(c2[3761],_mm256_xor_si256(c2[4162],_mm256_xor_si256(c2[6164],_mm256_xor_si256(c2[166],_mm256_xor_si256(c2[1563],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[363],_mm256_xor_si256(c2[3781],_mm256_xor_si256(c2[4382],_mm256_xor_si256(c2[4182],_mm256_xor_si256(c2[1983],_mm256_xor_si256(c2[2384],_mm256_xor_si256(c2[183],c2[584])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[90]=simde_mm256_xor_si256(c2[3601],simde_mm256_xor_si256(c2[4202],simde_mm256_xor_si256(c2[4002],simde_mm256_xor_si256(c2[5405],simde_mm256_xor_si256(c2[5806],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1603],simde_mm256_xor_si256(c2[3621],simde_mm256_xor_si256(c2[4222],simde_mm256_xor_si256(c2[4022],simde_mm256_xor_si256(c2[3224],simde_mm256_xor_si256(c2[3625],simde_mm256_xor_si256(c2[4822],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[4242],simde_mm256_xor_si256(c2[4042],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[3661],simde_mm256_xor_si256(c2[4262],simde_mm256_xor_si256(c2[4062],simde_mm256_xor_si256(c2[2865],simde_mm256_xor_si256(c2[3266],simde_mm256_xor_si256(c2[4264],simde_mm256_xor_si256(c2[4665],simde_mm256_xor_si256(c2[3681],simde_mm256_xor_si256(c2[4082],simde_mm256_xor_si256(c2[5481],simde_mm256_xor_si256(c2[5882],simde_mm256_xor_si256(c2[4884],simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[4102],simde_mm256_xor_si256(c2[906],simde_mm256_xor_si256(c2[1307],simde_mm256_xor_si256(c2[3721],simde_mm256_xor_si256(c2[4322],simde_mm256_xor_si256(c2[4122],simde_mm256_xor_si256(c2[2124],simde_mm256_xor_si256(c2[2525],simde_mm256_xor_si256(c2[6121],simde_mm256_xor_si256(c2[123],simde_mm256_xor_si256(c2[3741],simde_mm256_xor_si256(c2[4142],simde_mm256_xor_si256(c2[2343],simde_mm256_xor_si256(c2[2744],simde_mm256_xor_si256(c2[3761],simde_mm256_xor_si256(c2[4162],simde_mm256_xor_si256(c2[6164],simde_mm256_xor_si256(c2[166],simde_mm256_xor_si256(c2[1563],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[363],simde_mm256_xor_si256(c2[3781],simde_mm256_xor_si256(c2[4382],simde_mm256_xor_si256(c2[4182],simde_mm256_xor_si256(c2[1983],simde_mm256_xor_si256(c2[2384],simde_mm256_xor_si256(c2[183],c2[584])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[100]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3420],_mm256_xor_si256(c2[5724],c2[4140])));
+     d2[100]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3420],simde_mm256_xor_si256(c2[5724],c2[4140])));
 
 //row: 11
-     d2[110]=_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[4200],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[6222],_mm256_xor_si256(c2[1621],_mm256_xor_si256(c2[1421],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[3440],_mm256_xor_si256(c2[3240],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[5863],_mm256_xor_si256(c2[863],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[2080],_mm256_xor_si256(c2[1683],_mm256_xor_si256(c2[1483],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[4104],_mm256_xor_si256(c2[3904],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[5122],_mm256_xor_si256(c2[2920],_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[5541],_mm256_xor_si256(c2[5341],_mm256_xor_si256(c2[1540],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[2763],_mm256_xor_si256(c2[4761],_mm256_xor_si256(c2[4561],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[4981],_mm256_xor_si256(c2[3381],_mm256_xor_si256(c2[3181],c2[980])))))))))))))))))))))))))))))))))))));
+     d2[110]=simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[4200],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[6222],simde_mm256_xor_si256(c2[1621],simde_mm256_xor_si256(c2[1421],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[3440],simde_mm256_xor_si256(c2[3240],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[5863],simde_mm256_xor_si256(c2[863],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[2080],simde_mm256_xor_si256(c2[1683],simde_mm256_xor_si256(c2[1483],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[4104],simde_mm256_xor_si256(c2[3904],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[5122],simde_mm256_xor_si256(c2[2920],simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[5541],simde_mm256_xor_si256(c2[5341],simde_mm256_xor_si256(c2[1540],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[2763],simde_mm256_xor_si256(c2[4761],simde_mm256_xor_si256(c2[4561],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[4981],simde_mm256_xor_si256(c2[3381],simde_mm256_xor_si256(c2[3181],c2[980])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[120]=_mm256_xor_si256(c2[3201],_mm256_xor_si256(c2[3001],_mm256_xor_si256(c2[4805],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[3021],_mm256_xor_si256(c2[2624],_mm256_xor_si256(c2[4222],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[3241],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[6041],_mm256_xor_si256(c2[3261],_mm256_xor_si256(c2[3061],_mm256_xor_si256(c2[2265],_mm256_xor_si256(c2[3664],_mm256_xor_si256(c2[3463],_mm256_xor_si256(c2[3081],_mm256_xor_si256(c2[4881],_mm256_xor_si256(c2[4284],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[3321],_mm256_xor_si256(c2[3121],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[5521],_mm256_xor_si256(c2[3141],_mm256_xor_si256(c2[1743],_mm256_xor_si256(c2[3161],_mm256_xor_si256(c2[5564],_mm256_xor_si256(c2[963],_mm256_xor_si256(c2[3381],_mm256_xor_si256(c2[3181],_mm256_xor_si256(c2[1383],c2[5982]))))))))))))))))))))))))))))))))));
+     d2[120]=simde_mm256_xor_si256(c2[3201],simde_mm256_xor_si256(c2[3001],simde_mm256_xor_si256(c2[4805],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[3021],simde_mm256_xor_si256(c2[2624],simde_mm256_xor_si256(c2[4222],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[3241],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[6041],simde_mm256_xor_si256(c2[3261],simde_mm256_xor_si256(c2[3061],simde_mm256_xor_si256(c2[2265],simde_mm256_xor_si256(c2[3664],simde_mm256_xor_si256(c2[3463],simde_mm256_xor_si256(c2[3081],simde_mm256_xor_si256(c2[4881],simde_mm256_xor_si256(c2[4284],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[3321],simde_mm256_xor_si256(c2[3121],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[5521],simde_mm256_xor_si256(c2[3141],simde_mm256_xor_si256(c2[1743],simde_mm256_xor_si256(c2[3161],simde_mm256_xor_si256(c2[5564],simde_mm256_xor_si256(c2[963],simde_mm256_xor_si256(c2[3381],simde_mm256_xor_si256(c2[3181],simde_mm256_xor_si256(c2[1383],c2[5982]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[130]=_mm256_xor_si256(c2[5000],_mm256_xor_si256(c2[405],_mm256_xor_si256(c2[2601],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[5020],_mm256_xor_si256(c2[4623],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[6221],_mm256_xor_si256(c2[3223],_mm256_xor_si256(c2[5040],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[1641],_mm256_xor_si256(c2[5060],_mm256_xor_si256(c2[4264],_mm256_xor_si256(c2[5663],_mm256_xor_si256(c2[5080],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[6283],_mm256_xor_si256(c2[5100],_mm256_xor_si256(c2[2505],_mm256_xor_si256(c2[2305],_mm256_xor_si256(c2[5120],_mm256_xor_si256(c2[3523],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[1121],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[3742],_mm256_xor_si256(c2[5160],_mm256_xor_si256(c2[1164],_mm256_xor_si256(c2[3162],_mm256_xor_si256(c2[2962],_mm256_xor_si256(c2[1363],_mm256_xor_si256(c2[5180],_mm256_xor_si256(c2[3382],_mm256_xor_si256(c2[1782],c2[1582])))))))))))))))))))))))))))))))))))));
+     d2[130]=simde_mm256_xor_si256(c2[5000],simde_mm256_xor_si256(c2[405],simde_mm256_xor_si256(c2[2601],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[5020],simde_mm256_xor_si256(c2[4623],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[6221],simde_mm256_xor_si256(c2[3223],simde_mm256_xor_si256(c2[5040],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[1641],simde_mm256_xor_si256(c2[5060],simde_mm256_xor_si256(c2[4264],simde_mm256_xor_si256(c2[5663],simde_mm256_xor_si256(c2[5080],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[6283],simde_mm256_xor_si256(c2[5100],simde_mm256_xor_si256(c2[2505],simde_mm256_xor_si256(c2[2305],simde_mm256_xor_si256(c2[5120],simde_mm256_xor_si256(c2[3523],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[1121],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[3742],simde_mm256_xor_si256(c2[5160],simde_mm256_xor_si256(c2[1164],simde_mm256_xor_si256(c2[3162],simde_mm256_xor_si256(c2[2962],simde_mm256_xor_si256(c2[1363],simde_mm256_xor_si256(c2[5180],simde_mm256_xor_si256(c2[3382],simde_mm256_xor_si256(c2[1782],c2[1582])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[140]=_mm256_xor_si256(c2[5001],_mm256_xor_si256(c2[4801],_mm256_xor_si256(c2[5200],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[2801],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[4821],_mm256_xor_si256(c2[5220],_mm256_xor_si256(c2[4424],_mm256_xor_si256(c2[4823],_mm256_xor_si256(c2[6022],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[4841],_mm256_xor_si256(c2[5240],_mm256_xor_si256(c2[1442],_mm256_xor_si256(c2[2041],_mm256_xor_si256(c2[1841],_mm256_xor_si256(c2[5061],_mm256_xor_si256(c2[4861],_mm256_xor_si256(c2[5260],_mm256_xor_si256(c2[4065],_mm256_xor_si256(c2[4464],_mm256_xor_si256(c2[5464],_mm256_xor_si256(c2[5863],_mm256_xor_si256(c2[4881],_mm256_xor_si256(c2[5280],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[681],_mm256_xor_si256(c2[6084],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[4901],_mm256_xor_si256(c2[5300],_mm256_xor_si256(c2[2106],_mm256_xor_si256(c2[2705],_mm256_xor_si256(c2[2505],_mm256_xor_si256(c2[5121],_mm256_xor_si256(c2[4921],_mm256_xor_si256(c2[5320],_mm256_xor_si256(c2[3324],_mm256_xor_si256(c2[3723],_mm256_xor_si256(c2[922],_mm256_xor_si256(c2[1521],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[2124],_mm256_xor_si256(c2[4941],_mm256_xor_si256(c2[5340],_mm256_xor_si256(c2[3543],_mm256_xor_si256(c2[4142],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[4961],_mm256_xor_si256(c2[5360],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[1364],_mm256_xor_si256(c2[2763],_mm256_xor_si256(c2[3362],_mm256_xor_si256(c2[3162],_mm256_xor_si256(c2[5181],_mm256_xor_si256(c2[4981],_mm256_xor_si256(c2[5380],_mm256_xor_si256(c2[3183],_mm256_xor_si256(c2[3582],_mm256_xor_si256(c2[1383],_mm256_xor_si256(c2[1982],c2[1782])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[140]=simde_mm256_xor_si256(c2[5001],simde_mm256_xor_si256(c2[4801],simde_mm256_xor_si256(c2[5200],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[2801],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[4821],simde_mm256_xor_si256(c2[5220],simde_mm256_xor_si256(c2[4424],simde_mm256_xor_si256(c2[4823],simde_mm256_xor_si256(c2[6022],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[4841],simde_mm256_xor_si256(c2[5240],simde_mm256_xor_si256(c2[1442],simde_mm256_xor_si256(c2[2041],simde_mm256_xor_si256(c2[1841],simde_mm256_xor_si256(c2[5061],simde_mm256_xor_si256(c2[4861],simde_mm256_xor_si256(c2[5260],simde_mm256_xor_si256(c2[4065],simde_mm256_xor_si256(c2[4464],simde_mm256_xor_si256(c2[5464],simde_mm256_xor_si256(c2[5863],simde_mm256_xor_si256(c2[4881],simde_mm256_xor_si256(c2[5280],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[681],simde_mm256_xor_si256(c2[6084],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[4901],simde_mm256_xor_si256(c2[5300],simde_mm256_xor_si256(c2[2106],simde_mm256_xor_si256(c2[2705],simde_mm256_xor_si256(c2[2505],simde_mm256_xor_si256(c2[5121],simde_mm256_xor_si256(c2[4921],simde_mm256_xor_si256(c2[5320],simde_mm256_xor_si256(c2[3324],simde_mm256_xor_si256(c2[3723],simde_mm256_xor_si256(c2[922],simde_mm256_xor_si256(c2[1521],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[2124],simde_mm256_xor_si256(c2[4941],simde_mm256_xor_si256(c2[5340],simde_mm256_xor_si256(c2[3543],simde_mm256_xor_si256(c2[4142],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[4961],simde_mm256_xor_si256(c2[5360],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[1364],simde_mm256_xor_si256(c2[2763],simde_mm256_xor_si256(c2[3362],simde_mm256_xor_si256(c2[3162],simde_mm256_xor_si256(c2[5181],simde_mm256_xor_si256(c2[4981],simde_mm256_xor_si256(c2[5380],simde_mm256_xor_si256(c2[3183],simde_mm256_xor_si256(c2[3582],simde_mm256_xor_si256(c2[1383],simde_mm256_xor_si256(c2[1982],c2[1782])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[150]=_mm256_xor_si256(c2[1602],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[403],_mm256_xor_si256(c2[3406],_mm256_xor_si256(c2[2207],_mm256_xor_si256(c2[5602],_mm256_xor_si256(c2[4403],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[623],_mm256_xor_si256(c2[423],_mm256_xor_si256(c2[1225],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[2823],_mm256_xor_si256(c2[1624],_mm256_xor_si256(c2[1642],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[443],_mm256_xor_si256(c2[4642],_mm256_xor_si256(c2[3443],_mm256_xor_si256(c2[1662],_mm256_xor_si256(c2[663],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[6066],_mm256_xor_si256(c2[2265],_mm256_xor_si256(c2[1066],_mm256_xor_si256(c2[1682],_mm256_xor_si256(c2[483],_mm256_xor_si256(c2[3482],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[2885],_mm256_xor_si256(c2[1686],_mm256_xor_si256(c2[1702],_mm256_xor_si256(c2[503],_mm256_xor_si256(c2[5306],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[1722],_mm256_xor_si256(c2[723],_mm256_xor_si256(c2[523],_mm256_xor_si256(c2[125],_mm256_xor_si256(c2[5325],_mm256_xor_si256(c2[4122],_mm256_xor_si256(c2[2923],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[543],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[5544],_mm256_xor_si256(c2[1762],_mm256_xor_si256(c2[563],_mm256_xor_si256(c2[4165],_mm256_xor_si256(c2[2966],_mm256_xor_si256(c2[5963],_mm256_xor_si256(c2[4764],_mm256_xor_si256(c2[1782],_mm256_xor_si256(c2[783],_mm256_xor_si256(c2[583],_mm256_xor_si256(c2[6383],_mm256_xor_si256(c2[5184],_mm256_xor_si256(c2[4583],c2[3384]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[150]=simde_mm256_xor_si256(c2[1602],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[403],simde_mm256_xor_si256(c2[3406],simde_mm256_xor_si256(c2[2207],simde_mm256_xor_si256(c2[5602],simde_mm256_xor_si256(c2[4403],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[623],simde_mm256_xor_si256(c2[423],simde_mm256_xor_si256(c2[1225],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[2823],simde_mm256_xor_si256(c2[1624],simde_mm256_xor_si256(c2[1642],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[443],simde_mm256_xor_si256(c2[4642],simde_mm256_xor_si256(c2[3443],simde_mm256_xor_si256(c2[1662],simde_mm256_xor_si256(c2[663],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[6066],simde_mm256_xor_si256(c2[2265],simde_mm256_xor_si256(c2[1066],simde_mm256_xor_si256(c2[1682],simde_mm256_xor_si256(c2[483],simde_mm256_xor_si256(c2[3482],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[2885],simde_mm256_xor_si256(c2[1686],simde_mm256_xor_si256(c2[1702],simde_mm256_xor_si256(c2[503],simde_mm256_xor_si256(c2[5306],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[1722],simde_mm256_xor_si256(c2[723],simde_mm256_xor_si256(c2[523],simde_mm256_xor_si256(c2[125],simde_mm256_xor_si256(c2[5325],simde_mm256_xor_si256(c2[4122],simde_mm256_xor_si256(c2[2923],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[543],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[5544],simde_mm256_xor_si256(c2[1762],simde_mm256_xor_si256(c2[563],simde_mm256_xor_si256(c2[4165],simde_mm256_xor_si256(c2[2966],simde_mm256_xor_si256(c2[5963],simde_mm256_xor_si256(c2[4764],simde_mm256_xor_si256(c2[1782],simde_mm256_xor_si256(c2[783],simde_mm256_xor_si256(c2[583],simde_mm256_xor_si256(c2[6383],simde_mm256_xor_si256(c2[5184],simde_mm256_xor_si256(c2[4583],c2[3384]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[160]=_mm256_xor_si256(c2[3003],_mm256_xor_si256(c2[2803],_mm256_xor_si256(c2[3004],_mm256_xor_si256(c2[2804],_mm256_xor_si256(c2[4607],_mm256_xor_si256(c2[4808],_mm256_xor_si256(c2[4608],_mm256_xor_si256(c2[404],_mm256_xor_si256(c2[405],_mm256_xor_si256(c2[3023],_mm256_xor_si256(c2[2823],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[2426],_mm256_xor_si256(c2[2427],_mm256_xor_si256(c2[4024],_mm256_xor_si256(c2[4025],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[3043],_mm256_xor_si256(c2[2843],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[5843],_mm256_xor_si256(c2[5844],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[2863],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[2067],_mm256_xor_si256(c2[2268],_mm256_xor_si256(c2[2068],_mm256_xor_si256(c2[3466],_mm256_xor_si256(c2[3467],_mm256_xor_si256(c2[2883],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[4684],_mm256_xor_si256(c2[4086],_mm256_xor_si256(c2[4087],_mm256_xor_si256(c2[2903],_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[109],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[2923],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[2924],_mm256_xor_si256(c2[1326],_mm256_xor_si256(c2[1527],_mm256_xor_si256(c2[1327],_mm256_xor_si256(c2[5323],_mm256_xor_si256(c2[5324],_mm256_xor_si256(c2[2943],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[2944],_mm256_xor_si256(c2[1545],_mm256_xor_si256(c2[1546],_mm256_xor_si256(c2[2963],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[2964],_mm256_xor_si256(c2[5366],_mm256_xor_si256(c2[5367],_mm256_xor_si256(c2[765],_mm256_xor_si256(c2[766],_mm256_xor_si256(c2[3183],_mm256_xor_si256(c2[2983],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[2984],_mm256_xor_si256(c2[1185],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[1186],_mm256_xor_si256(c2[5784],_mm256_xor_si256(c2[5785],c2[3182])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[160]=simde_mm256_xor_si256(c2[3003],simde_mm256_xor_si256(c2[2803],simde_mm256_xor_si256(c2[3004],simde_mm256_xor_si256(c2[2804],simde_mm256_xor_si256(c2[4607],simde_mm256_xor_si256(c2[4808],simde_mm256_xor_si256(c2[4608],simde_mm256_xor_si256(c2[404],simde_mm256_xor_si256(c2[405],simde_mm256_xor_si256(c2[3023],simde_mm256_xor_si256(c2[2823],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[2426],simde_mm256_xor_si256(c2[2427],simde_mm256_xor_si256(c2[4024],simde_mm256_xor_si256(c2[4025],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[3043],simde_mm256_xor_si256(c2[2843],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[5843],simde_mm256_xor_si256(c2[5844],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[2863],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[2067],simde_mm256_xor_si256(c2[2268],simde_mm256_xor_si256(c2[2068],simde_mm256_xor_si256(c2[3466],simde_mm256_xor_si256(c2[3467],simde_mm256_xor_si256(c2[2883],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[4684],simde_mm256_xor_si256(c2[4086],simde_mm256_xor_si256(c2[4087],simde_mm256_xor_si256(c2[2903],simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[109],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[2923],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[2924],simde_mm256_xor_si256(c2[1326],simde_mm256_xor_si256(c2[1527],simde_mm256_xor_si256(c2[1327],simde_mm256_xor_si256(c2[5323],simde_mm256_xor_si256(c2[5324],simde_mm256_xor_si256(c2[2943],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[2944],simde_mm256_xor_si256(c2[1545],simde_mm256_xor_si256(c2[1546],simde_mm256_xor_si256(c2[2963],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[2964],simde_mm256_xor_si256(c2[5366],simde_mm256_xor_si256(c2[5367],simde_mm256_xor_si256(c2[765],simde_mm256_xor_si256(c2[766],simde_mm256_xor_si256(c2[3183],simde_mm256_xor_si256(c2[2983],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[2984],simde_mm256_xor_si256(c2[1185],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[1186],simde_mm256_xor_si256(c2[5784],simde_mm256_xor_si256(c2[5785],c2[3182])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[170]=_mm256_xor_si256(c2[2603],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[3600],_mm256_xor_si256(c2[3400],_mm256_xor_si256(c2[4207],_mm256_xor_si256(c2[5404],_mm256_xor_si256(c2[5204],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[1001],_mm256_xor_si256(c2[2623],_mm256_xor_si256(c2[2423],_mm256_xor_si256(c2[3620],_mm256_xor_si256(c2[3420],_mm256_xor_si256(c2[2026],_mm256_xor_si256(c2[3023],_mm256_xor_si256(c2[3624],_mm256_xor_si256(c2[4621],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[2643],_mm256_xor_si256(c2[2443],_mm256_xor_si256(c2[3640],_mm256_xor_si256(c2[3440],_mm256_xor_si256(c2[5443],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[2663],_mm256_xor_si256(c2[2463],_mm256_xor_si256(c2[3660],_mm256_xor_si256(c2[3460],_mm256_xor_si256(c2[1667],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[2664],_mm256_xor_si256(c2[3066],_mm256_xor_si256(c2[4063],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[3680],_mm256_xor_si256(c2[3480],_mm256_xor_si256(c2[4283],_mm256_xor_si256(c2[5280],_mm256_xor_si256(c2[3686],_mm256_xor_si256(c2[4683],_mm256_xor_si256(c2[2503],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[3500],_mm256_xor_si256(c2[6107],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[4900],_mm256_xor_si256(c2[2723],_mm256_xor_si256(c2[2523],_mm256_xor_si256(c2[3720],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[926],_mm256_xor_si256(c2[2123],_mm256_xor_si256(c2[1923],_mm256_xor_si256(c2[4923],_mm256_xor_si256(c2[5920],_mm256_xor_si256(c2[2543],_mm256_xor_si256(c2[3740],_mm256_xor_si256(c2[3540],_mm256_xor_si256(c2[1145],_mm256_xor_si256(c2[2142],_mm256_xor_si256(c2[2563],_mm256_xor_si256(c2[3760],_mm256_xor_si256(c2[3560],_mm256_xor_si256(c2[4966],_mm256_xor_si256(c2[5963],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[1362],_mm256_xor_si256(c2[2783],_mm256_xor_si256(c2[2583],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[3580],_mm256_xor_si256(c2[785],_mm256_xor_si256(c2[1982],_mm256_xor_si256(c2[1782],_mm256_xor_si256(c2[5384],c2[6381])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[170]=simde_mm256_xor_si256(c2[2603],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[3600],simde_mm256_xor_si256(c2[3400],simde_mm256_xor_si256(c2[4207],simde_mm256_xor_si256(c2[5404],simde_mm256_xor_si256(c2[5204],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[1001],simde_mm256_xor_si256(c2[2623],simde_mm256_xor_si256(c2[2423],simde_mm256_xor_si256(c2[3620],simde_mm256_xor_si256(c2[3420],simde_mm256_xor_si256(c2[2026],simde_mm256_xor_si256(c2[3023],simde_mm256_xor_si256(c2[3624],simde_mm256_xor_si256(c2[4621],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[2643],simde_mm256_xor_si256(c2[2443],simde_mm256_xor_si256(c2[3640],simde_mm256_xor_si256(c2[3440],simde_mm256_xor_si256(c2[5443],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[2663],simde_mm256_xor_si256(c2[2463],simde_mm256_xor_si256(c2[3660],simde_mm256_xor_si256(c2[3460],simde_mm256_xor_si256(c2[1667],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[2664],simde_mm256_xor_si256(c2[3066],simde_mm256_xor_si256(c2[4063],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[3680],simde_mm256_xor_si256(c2[3480],simde_mm256_xor_si256(c2[4283],simde_mm256_xor_si256(c2[5280],simde_mm256_xor_si256(c2[3686],simde_mm256_xor_si256(c2[4683],simde_mm256_xor_si256(c2[2503],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[3500],simde_mm256_xor_si256(c2[6107],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[4900],simde_mm256_xor_si256(c2[2723],simde_mm256_xor_si256(c2[2523],simde_mm256_xor_si256(c2[3720],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[926],simde_mm256_xor_si256(c2[2123],simde_mm256_xor_si256(c2[1923],simde_mm256_xor_si256(c2[4923],simde_mm256_xor_si256(c2[5920],simde_mm256_xor_si256(c2[2543],simde_mm256_xor_si256(c2[3740],simde_mm256_xor_si256(c2[3540],simde_mm256_xor_si256(c2[1145],simde_mm256_xor_si256(c2[2142],simde_mm256_xor_si256(c2[2563],simde_mm256_xor_si256(c2[3760],simde_mm256_xor_si256(c2[3560],simde_mm256_xor_si256(c2[4966],simde_mm256_xor_si256(c2[5963],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[1362],simde_mm256_xor_si256(c2[2783],simde_mm256_xor_si256(c2[2583],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[3580],simde_mm256_xor_si256(c2[785],simde_mm256_xor_si256(c2[1982],simde_mm256_xor_si256(c2[1782],simde_mm256_xor_si256(c2[5384],c2[6381])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[180]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3720],c2[4542]));
+     d2[180]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3720],c2[4542]));
 
 //row: 19
-     d2[190]=_mm256_xor_si256(c2[5004],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[2605],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[5024],_mm256_xor_si256(c2[4627],_mm256_xor_si256(c2[6225],_mm256_xor_si256(c2[6024],_mm256_xor_si256(c2[5044],_mm256_xor_si256(c2[1645],_mm256_xor_si256(c2[5064],_mm256_xor_si256(c2[4268],_mm256_xor_si256(c2[5667],_mm256_xor_si256(c2[5084],_mm256_xor_si256(c2[485],_mm256_xor_si256(c2[6287],_mm256_xor_si256(c2[5104],_mm256_xor_si256(c2[2309],_mm256_xor_si256(c2[5124],_mm256_xor_si256(c2[3527],_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[5144],_mm256_xor_si256(c2[3746],_mm256_xor_si256(c2[5164],_mm256_xor_si256(c2[1168],_mm256_xor_si256(c2[2966],_mm256_xor_si256(c2[5184],_mm256_xor_si256(c2[3386],c2[1586]))))))))))))))))))))))))))));
+     d2[190]=simde_mm256_xor_si256(c2[5004],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[2605],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[5024],simde_mm256_xor_si256(c2[4627],simde_mm256_xor_si256(c2[6225],simde_mm256_xor_si256(c2[6024],simde_mm256_xor_si256(c2[5044],simde_mm256_xor_si256(c2[1645],simde_mm256_xor_si256(c2[5064],simde_mm256_xor_si256(c2[4268],simde_mm256_xor_si256(c2[5667],simde_mm256_xor_si256(c2[5084],simde_mm256_xor_si256(c2[485],simde_mm256_xor_si256(c2[6287],simde_mm256_xor_si256(c2[5104],simde_mm256_xor_si256(c2[2309],simde_mm256_xor_si256(c2[5124],simde_mm256_xor_si256(c2[3527],simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[5144],simde_mm256_xor_si256(c2[3746],simde_mm256_xor_si256(c2[5164],simde_mm256_xor_si256(c2[1168],simde_mm256_xor_si256(c2[2966],simde_mm256_xor_si256(c2[5184],simde_mm256_xor_si256(c2[3386],c2[1586]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[200]=_mm256_xor_si256(c2[1603],_mm256_xor_si256(c2[1403],_mm256_xor_si256(c2[3207],_mm256_xor_si256(c2[5403],_mm256_xor_si256(c2[1623],_mm256_xor_si256(c2[1423],_mm256_xor_si256(c2[1026],_mm256_xor_si256(c2[2624],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[1643],_mm256_xor_si256(c2[1443],_mm256_xor_si256(c2[4443],_mm256_xor_si256(c2[1663],_mm256_xor_si256(c2[1463],_mm256_xor_si256(c2[667],_mm256_xor_si256(c2[2066],_mm256_xor_si256(c2[1483],_mm256_xor_si256(c2[3283],_mm256_xor_si256(c2[2686],_mm256_xor_si256(c2[4084],_mm256_xor_si256(c2[1503],_mm256_xor_si256(c2[5107],_mm256_xor_si256(c2[1723],_mm256_xor_si256(c2[1523],_mm256_xor_si256(c2[6325],_mm256_xor_si256(c2[3923],_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[1563],_mm256_xor_si256(c2[3966],_mm256_xor_si256(c2[5764],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[1583],_mm256_xor_si256(c2[6184],c2[4384]))))))))))))))))))))))))))))))))));
+     d2[200]=simde_mm256_xor_si256(c2[1603],simde_mm256_xor_si256(c2[1403],simde_mm256_xor_si256(c2[3207],simde_mm256_xor_si256(c2[5403],simde_mm256_xor_si256(c2[1623],simde_mm256_xor_si256(c2[1423],simde_mm256_xor_si256(c2[1026],simde_mm256_xor_si256(c2[2624],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[1643],simde_mm256_xor_si256(c2[1443],simde_mm256_xor_si256(c2[4443],simde_mm256_xor_si256(c2[1663],simde_mm256_xor_si256(c2[1463],simde_mm256_xor_si256(c2[667],simde_mm256_xor_si256(c2[2066],simde_mm256_xor_si256(c2[1483],simde_mm256_xor_si256(c2[3283],simde_mm256_xor_si256(c2[2686],simde_mm256_xor_si256(c2[4084],simde_mm256_xor_si256(c2[1503],simde_mm256_xor_si256(c2[5107],simde_mm256_xor_si256(c2[1723],simde_mm256_xor_si256(c2[1523],simde_mm256_xor_si256(c2[6325],simde_mm256_xor_si256(c2[3923],simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[1563],simde_mm256_xor_si256(c2[3966],simde_mm256_xor_si256(c2[5764],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[1583],simde_mm256_xor_si256(c2[6184],c2[4384]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[210]=_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[1805],_mm256_xor_si256(c2[4001],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[6023],_mm256_xor_si256(c2[1422],_mm256_xor_si256(c2[1222],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[3241],_mm256_xor_si256(c2[3041],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[5664],_mm256_xor_si256(c2[664],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[1881],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[1284],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[3905],_mm256_xor_si256(c2[3705],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[4923],_mm256_xor_si256(c2[2721],_mm256_xor_si256(c2[2521],_mm256_xor_si256(c2[141],_mm256_xor_si256(c2[5342],_mm256_xor_si256(c2[5142],_mm256_xor_si256(c2[161],_mm256_xor_si256(c2[2564],_mm256_xor_si256(c2[4562],_mm256_xor_si256(c2[4362],_mm256_xor_si256(c2[3560],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[4782],_mm256_xor_si256(c2[3182],c2[2982]))))))))))))))))))))))))))))))))))));
+     d2[210]=simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[1805],simde_mm256_xor_si256(c2[4001],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[6023],simde_mm256_xor_si256(c2[1422],simde_mm256_xor_si256(c2[1222],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[3241],simde_mm256_xor_si256(c2[3041],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[5664],simde_mm256_xor_si256(c2[664],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[1881],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[1284],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[3905],simde_mm256_xor_si256(c2[3705],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[4923],simde_mm256_xor_si256(c2[2721],simde_mm256_xor_si256(c2[2521],simde_mm256_xor_si256(c2[141],simde_mm256_xor_si256(c2[5342],simde_mm256_xor_si256(c2[5142],simde_mm256_xor_si256(c2[161],simde_mm256_xor_si256(c2[2564],simde_mm256_xor_si256(c2[4562],simde_mm256_xor_si256(c2[4362],simde_mm256_xor_si256(c2[3560],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[4782],simde_mm256_xor_si256(c2[3182],c2[2982]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[220]=_mm256_xor_si256(c2[20],c2[840]);
+     d2[220]=simde_mm256_xor_si256(c2[20],c2[840]);
 
 //row: 23
-     d2[230]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2262],c2[6104]));
+     d2[230]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2262],c2[6104]));
 
 //row: 24
-     d2[240]=_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[1042],c2[4782]));
+     d2[240]=simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[1042],c2[4782]));
 
 //row: 25
-     d2[250]=_mm256_xor_si256(c2[0],c2[302]);
+     d2[250]=simde_mm256_xor_si256(c2[0],c2[302]);
 
 //row: 26
-     d2[260]=_mm256_xor_si256(c2[2600],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[4404],_mm256_xor_si256(c2[4204],_mm256_xor_si256(c2[3004],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[5200],_mm256_xor_si256(c2[2620],_mm256_xor_si256(c2[2420],_mm256_xor_si256(c2[1220],_mm256_xor_si256(c2[2023],_mm256_xor_si256(c2[823],_mm256_xor_si256(c2[3621],_mm256_xor_si256(c2[2621],_mm256_xor_si256(c2[2421],_mm256_xor_si256(c2[2640],_mm256_xor_si256(c2[2440],_mm256_xor_si256(c2[1240],_mm256_xor_si256(c2[5440],_mm256_xor_si256(c2[4440],_mm256_xor_si256(c2[4240],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[2660],_mm256_xor_si256(c2[2460],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[1864],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[1863],_mm256_xor_si256(c2[2680],_mm256_xor_si256(c2[2480],_mm256_xor_si256(c2[1280],_mm256_xor_si256(c2[4280],_mm256_xor_si256(c2[3080],_mm256_xor_si256(c2[3683],_mm256_xor_si256(c2[2683],_mm256_xor_si256(c2[2483],_mm256_xor_si256(c2[2700],_mm256_xor_si256(c2[2500],_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[6104],_mm256_xor_si256(c2[5104],_mm256_xor_si256(c2[4904],_mm256_xor_si256(c2[2720],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[1123],_mm256_xor_si256(c2[923],_mm256_xor_si256(c2[6122],_mm256_xor_si256(c2[4920],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[3720],_mm256_xor_si256(c2[2740],_mm256_xor_si256(c2[2540],_mm256_xor_si256(c2[1340],_mm256_xor_si256(c2[1142],_mm256_xor_si256(c2[142],_mm256_xor_si256(c2[6341],_mm256_xor_si256(c2[943],_mm256_xor_si256(c2[2760],_mm256_xor_si256(c2[2560],_mm256_xor_si256(c2[1360],_mm256_xor_si256(c2[4963],_mm256_xor_si256(c2[3763],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[5761],_mm256_xor_si256(c2[5561],_mm256_xor_si256(c2[2780],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[1380],_mm256_xor_si256(c2[982],_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[5981],_mm256_xor_si256(c2[5381],_mm256_xor_si256(c2[4381],c2[4181])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[260]=simde_mm256_xor_si256(c2[2600],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[4404],simde_mm256_xor_si256(c2[4204],simde_mm256_xor_si256(c2[3004],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[5200],simde_mm256_xor_si256(c2[2620],simde_mm256_xor_si256(c2[2420],simde_mm256_xor_si256(c2[1220],simde_mm256_xor_si256(c2[2023],simde_mm256_xor_si256(c2[823],simde_mm256_xor_si256(c2[3621],simde_mm256_xor_si256(c2[2621],simde_mm256_xor_si256(c2[2421],simde_mm256_xor_si256(c2[2640],simde_mm256_xor_si256(c2[2440],simde_mm256_xor_si256(c2[1240],simde_mm256_xor_si256(c2[5440],simde_mm256_xor_si256(c2[4440],simde_mm256_xor_si256(c2[4240],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[2660],simde_mm256_xor_si256(c2[2460],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[1864],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[1863],simde_mm256_xor_si256(c2[2680],simde_mm256_xor_si256(c2[2480],simde_mm256_xor_si256(c2[1280],simde_mm256_xor_si256(c2[4280],simde_mm256_xor_si256(c2[3080],simde_mm256_xor_si256(c2[3683],simde_mm256_xor_si256(c2[2683],simde_mm256_xor_si256(c2[2483],simde_mm256_xor_si256(c2[2700],simde_mm256_xor_si256(c2[2500],simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[6104],simde_mm256_xor_si256(c2[5104],simde_mm256_xor_si256(c2[4904],simde_mm256_xor_si256(c2[2720],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[1123],simde_mm256_xor_si256(c2[923],simde_mm256_xor_si256(c2[6122],simde_mm256_xor_si256(c2[4920],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[3720],simde_mm256_xor_si256(c2[2740],simde_mm256_xor_si256(c2[2540],simde_mm256_xor_si256(c2[1340],simde_mm256_xor_si256(c2[1142],simde_mm256_xor_si256(c2[142],simde_mm256_xor_si256(c2[6341],simde_mm256_xor_si256(c2[943],simde_mm256_xor_si256(c2[2760],simde_mm256_xor_si256(c2[2560],simde_mm256_xor_si256(c2[1360],simde_mm256_xor_si256(c2[4963],simde_mm256_xor_si256(c2[3763],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[5761],simde_mm256_xor_si256(c2[5561],simde_mm256_xor_si256(c2[2780],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[1380],simde_mm256_xor_si256(c2[982],simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[5981],simde_mm256_xor_si256(c2[5381],simde_mm256_xor_si256(c2[4381],c2[4181])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[270]=_mm256_xor_si256(c2[0],c2[121]);
+     d2[270]=simde_mm256_xor_si256(c2[0],c2[121]);
 
 //row: 28
-     d2[280]=_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[6043],c2[2903]));
+     d2[280]=simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[6043],c2[2903]));
 
 //row: 29
-     d2[290]=_mm256_xor_si256(c2[0],c2[5284]);
+     d2[290]=simde_mm256_xor_si256(c2[0],c2[5284]);
 
 //row: 30
-     d2[300]=_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[3941],c2[1384])));
+     d2[300]=simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[3941],c2[1384])));
 
 //row: 31
-     d2[310]=_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[5604],_mm256_xor_si256(c2[1401],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[3423],_mm256_xor_si256(c2[5221],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[3840],_mm256_xor_si256(c2[641],_mm256_xor_si256(c2[441],_mm256_xor_si256(c2[3860],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[4463],_mm256_xor_si256(c2[3880],_mm256_xor_si256(c2[5680],_mm256_xor_si256(c2[5283],_mm256_xor_si256(c2[5083],_mm256_xor_si256(c2[3900],_mm256_xor_si256(c2[1305],_mm256_xor_si256(c2[1105],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[2323],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[6320],_mm256_xor_si256(c2[3940],_mm256_xor_si256(c2[2742],_mm256_xor_si256(c2[2542],_mm256_xor_si256(c2[3960],_mm256_xor_si256(c2[6363],_mm256_xor_si256(c2[1962],_mm256_xor_si256(c2[1762],_mm256_xor_si256(c2[3980],_mm256_xor_si256(c2[2182],_mm256_xor_si256(c2[582],c2[382])))))))))))))))))))))))))))))))))));
+     d2[310]=simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[5604],simde_mm256_xor_si256(c2[1401],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[3423],simde_mm256_xor_si256(c2[5221],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[3840],simde_mm256_xor_si256(c2[641],simde_mm256_xor_si256(c2[441],simde_mm256_xor_si256(c2[3860],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[4463],simde_mm256_xor_si256(c2[3880],simde_mm256_xor_si256(c2[5680],simde_mm256_xor_si256(c2[5283],simde_mm256_xor_si256(c2[5083],simde_mm256_xor_si256(c2[3900],simde_mm256_xor_si256(c2[1305],simde_mm256_xor_si256(c2[1105],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[2323],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[6320],simde_mm256_xor_si256(c2[3940],simde_mm256_xor_si256(c2[2742],simde_mm256_xor_si256(c2[2542],simde_mm256_xor_si256(c2[3960],simde_mm256_xor_si256(c2[6363],simde_mm256_xor_si256(c2[1962],simde_mm256_xor_si256(c2[1762],simde_mm256_xor_si256(c2[3980],simde_mm256_xor_si256(c2[2182],simde_mm256_xor_si256(c2[582],c2[382])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[320]=_mm256_xor_si256(c2[5203],_mm256_xor_si256(c2[5003],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[408],_mm256_xor_si256(c2[2604],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[5223],_mm256_xor_si256(c2[5023],_mm256_xor_si256(c2[4626],_mm256_xor_si256(c2[6224],_mm256_xor_si256(c2[5243],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[1644],_mm256_xor_si256(c2[5263],_mm256_xor_si256(c2[5063],_mm256_xor_si256(c2[4467],_mm256_xor_si256(c2[4267],_mm256_xor_si256(c2[5666],_mm256_xor_si256(c2[5283],_mm256_xor_si256(c2[5083],_mm256_xor_si256(c2[484],_mm256_xor_si256(c2[6286],_mm256_xor_si256(c2[5303],_mm256_xor_si256(c2[5103],_mm256_xor_si256(c2[2308],_mm256_xor_si256(c2[4100],_mm256_xor_si256(c2[5323],_mm256_xor_si256(c2[5123],_mm256_xor_si256(c2[3726],_mm256_xor_si256(c2[3526],_mm256_xor_si256(c2[1124],_mm256_xor_si256(c2[5343],_mm256_xor_si256(c2[5143],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[5363],_mm256_xor_si256(c2[5163],_mm256_xor_si256(c2[1167],_mm256_xor_si256(c2[2965],_mm256_xor_si256(c2[5383],_mm256_xor_si256(c2[5183],_mm256_xor_si256(c2[3585],_mm256_xor_si256(c2[3385],c2[1585]))))))))))))))))))))))))))))))))))))))))));
+     d2[320]=simde_mm256_xor_si256(c2[5203],simde_mm256_xor_si256(c2[5003],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[408],simde_mm256_xor_si256(c2[2604],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[5223],simde_mm256_xor_si256(c2[5023],simde_mm256_xor_si256(c2[4626],simde_mm256_xor_si256(c2[6224],simde_mm256_xor_si256(c2[5243],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[1644],simde_mm256_xor_si256(c2[5263],simde_mm256_xor_si256(c2[5063],simde_mm256_xor_si256(c2[4467],simde_mm256_xor_si256(c2[4267],simde_mm256_xor_si256(c2[5666],simde_mm256_xor_si256(c2[5283],simde_mm256_xor_si256(c2[5083],simde_mm256_xor_si256(c2[484],simde_mm256_xor_si256(c2[6286],simde_mm256_xor_si256(c2[5303],simde_mm256_xor_si256(c2[5103],simde_mm256_xor_si256(c2[2308],simde_mm256_xor_si256(c2[4100],simde_mm256_xor_si256(c2[5323],simde_mm256_xor_si256(c2[5123],simde_mm256_xor_si256(c2[3726],simde_mm256_xor_si256(c2[3526],simde_mm256_xor_si256(c2[1124],simde_mm256_xor_si256(c2[5343],simde_mm256_xor_si256(c2[5143],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[5363],simde_mm256_xor_si256(c2[5163],simde_mm256_xor_si256(c2[1167],simde_mm256_xor_si256(c2[2965],simde_mm256_xor_si256(c2[5383],simde_mm256_xor_si256(c2[5183],simde_mm256_xor_si256(c2[3585],simde_mm256_xor_si256(c2[3385],c2[1585]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[330]=_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[4204],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[2420],_mm256_xor_si256(c2[2023],_mm256_xor_si256(c2[3621],_mm256_xor_si256(c2[2440],_mm256_xor_si256(c2[5440],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[2460],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[2480],_mm256_xor_si256(c2[4280],_mm256_xor_si256(c2[3683],_mm256_xor_si256(c2[2500],_mm256_xor_si256(c2[6104],_mm256_xor_si256(c2[2520],_mm256_xor_si256(c2[923],_mm256_xor_si256(c2[4920],_mm256_xor_si256(c2[2540],_mm256_xor_si256(c2[1142],_mm256_xor_si256(c2[4942],_mm256_xor_si256(c2[2560],_mm256_xor_si256(c2[4963],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[782],c2[5381]))))))))))))))))))))))))))));
+     d2[330]=simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[4204],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[2420],simde_mm256_xor_si256(c2[2023],simde_mm256_xor_si256(c2[3621],simde_mm256_xor_si256(c2[2440],simde_mm256_xor_si256(c2[5440],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[2460],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[2480],simde_mm256_xor_si256(c2[4280],simde_mm256_xor_si256(c2[3683],simde_mm256_xor_si256(c2[2500],simde_mm256_xor_si256(c2[6104],simde_mm256_xor_si256(c2[2520],simde_mm256_xor_si256(c2[923],simde_mm256_xor_si256(c2[4920],simde_mm256_xor_si256(c2[2540],simde_mm256_xor_si256(c2[1142],simde_mm256_xor_si256(c2[4942],simde_mm256_xor_si256(c2[2560],simde_mm256_xor_si256(c2[4963],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[782],c2[5381]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[340]=_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[3600],_mm256_xor_si256(c2[2602],_mm256_xor_si256(c2[5604],_mm256_xor_si256(c2[5404],_mm256_xor_si256(c2[4406],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[3620],_mm256_xor_si256(c2[2622],_mm256_xor_si256(c2[3223],_mm256_xor_si256(c2[2225],_mm256_xor_si256(c2[4821],_mm256_xor_si256(c2[4023],_mm256_xor_si256(c2[3823],_mm256_xor_si256(c2[3840],_mm256_xor_si256(c2[3640],_mm256_xor_si256(c2[2642],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[5842],_mm256_xor_si256(c2[5642],_mm256_xor_si256(c2[3860],_mm256_xor_si256(c2[3660],_mm256_xor_si256(c2[2662],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[1866],_mm256_xor_si256(c2[4263],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[3880],_mm256_xor_si256(c2[3680],_mm256_xor_si256(c2[2682],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[4482],_mm256_xor_si256(c2[4883],_mm256_xor_si256(c2[4085],_mm256_xor_si256(c2[3885],_mm256_xor_si256(c2[3900],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[2702],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[107],_mm256_xor_si256(c2[6306],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[3720],_mm256_xor_si256(c2[2722],_mm256_xor_si256(c2[2323],_mm256_xor_si256(c2[2123],_mm256_xor_si256(c2[1125],_mm256_xor_si256(c2[6120],_mm256_xor_si256(c2[5322],_mm256_xor_si256(c2[5122],_mm256_xor_si256(c2[3940],_mm256_xor_si256(c2[3740],_mm256_xor_si256(c2[2742],_mm256_xor_si256(c2[2342],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[3960],_mm256_xor_si256(c2[3760],_mm256_xor_si256(c2[2762],_mm256_xor_si256(c2[6163],_mm256_xor_si256(c2[5165],_mm256_xor_si256(c2[1562],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[3980],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[2782],_mm256_xor_si256(c2[2182],_mm256_xor_si256(c2[1982],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[5783],c2[5583]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[340]=simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[3600],simde_mm256_xor_si256(c2[2602],simde_mm256_xor_si256(c2[5604],simde_mm256_xor_si256(c2[5404],simde_mm256_xor_si256(c2[4406],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[3620],simde_mm256_xor_si256(c2[2622],simde_mm256_xor_si256(c2[3223],simde_mm256_xor_si256(c2[2225],simde_mm256_xor_si256(c2[4821],simde_mm256_xor_si256(c2[4023],simde_mm256_xor_si256(c2[3823],simde_mm256_xor_si256(c2[3840],simde_mm256_xor_si256(c2[3640],simde_mm256_xor_si256(c2[2642],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[5842],simde_mm256_xor_si256(c2[5642],simde_mm256_xor_si256(c2[3860],simde_mm256_xor_si256(c2[3660],simde_mm256_xor_si256(c2[2662],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[1866],simde_mm256_xor_si256(c2[4263],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[3880],simde_mm256_xor_si256(c2[3680],simde_mm256_xor_si256(c2[2682],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[4482],simde_mm256_xor_si256(c2[4883],simde_mm256_xor_si256(c2[4085],simde_mm256_xor_si256(c2[3885],simde_mm256_xor_si256(c2[3900],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[2702],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[107],simde_mm256_xor_si256(c2[6306],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[3720],simde_mm256_xor_si256(c2[2722],simde_mm256_xor_si256(c2[2323],simde_mm256_xor_si256(c2[2123],simde_mm256_xor_si256(c2[1125],simde_mm256_xor_si256(c2[6120],simde_mm256_xor_si256(c2[5322],simde_mm256_xor_si256(c2[5122],simde_mm256_xor_si256(c2[3940],simde_mm256_xor_si256(c2[3740],simde_mm256_xor_si256(c2[2742],simde_mm256_xor_si256(c2[2342],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[3960],simde_mm256_xor_si256(c2[3760],simde_mm256_xor_si256(c2[2762],simde_mm256_xor_si256(c2[6163],simde_mm256_xor_si256(c2[5165],simde_mm256_xor_si256(c2[1562],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[3980],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[2782],simde_mm256_xor_si256(c2[2182],simde_mm256_xor_si256(c2[1982],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[5783],c2[5583]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[350]=_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[2804],_mm256_xor_si256(c2[5000],_mm256_xor_si256(c2[1220],_mm256_xor_si256(c2[1020],_mm256_xor_si256(c2[623],_mm256_xor_si256(c2[2221],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[1240],_mm256_xor_si256(c2[1040],_mm256_xor_si256(c2[4040],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[1060],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[1663],_mm256_xor_si256(c2[1080],_mm256_xor_si256(c2[2880],_mm256_xor_si256(c2[2283],_mm256_xor_si256(c2[1100],_mm256_xor_si256(c2[4704],_mm256_xor_si256(c2[5904],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[1120],_mm256_xor_si256(c2[5922],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[1140],_mm256_xor_si256(c2[6141],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[3563],_mm256_xor_si256(c2[5361],_mm256_xor_si256(c2[1380],_mm256_xor_si256(c2[1180],_mm256_xor_si256(c2[5781],c2[3981]))))))))))))))))))))))))))))))))));
+     d2[350]=simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[2804],simde_mm256_xor_si256(c2[5000],simde_mm256_xor_si256(c2[1220],simde_mm256_xor_si256(c2[1020],simde_mm256_xor_si256(c2[623],simde_mm256_xor_si256(c2[2221],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[1240],simde_mm256_xor_si256(c2[1040],simde_mm256_xor_si256(c2[4040],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[1060],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[1663],simde_mm256_xor_si256(c2[1080],simde_mm256_xor_si256(c2[2880],simde_mm256_xor_si256(c2[2283],simde_mm256_xor_si256(c2[1100],simde_mm256_xor_si256(c2[4704],simde_mm256_xor_si256(c2[5904],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[1120],simde_mm256_xor_si256(c2[5922],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[1140],simde_mm256_xor_si256(c2[6141],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[3563],simde_mm256_xor_si256(c2[5361],simde_mm256_xor_si256(c2[1380],simde_mm256_xor_si256(c2[1180],simde_mm256_xor_si256(c2[5781],c2[3981]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[360]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[6241],c2[3742]));
+     d2[360]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[6241],c2[3742]));
 
 //row: 37
-     d2[370]=_mm256_xor_si256(c2[6209],_mm256_xor_si256(c2[3004],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[4808],_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[3024],_mm256_xor_si256(c2[5822],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[4425],_mm256_xor_si256(c2[4225],_mm256_xor_si256(c2[6249],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[6244],_mm256_xor_si256(c2[6044],_mm256_xor_si256(c2[6269],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[5463],_mm256_xor_si256(c2[2268],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[3667],_mm256_xor_si256(c2[6289],_mm256_xor_si256(c2[3084],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[4884],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[4487],_mm256_xor_si256(c2[4287],_mm256_xor_si256(c2[6309],_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[3504],_mm256_xor_si256(c2[509],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[6329],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[4722],_mm256_xor_si256(c2[1527],_mm256_xor_si256(c2[2320],_mm256_xor_si256(c2[5724],_mm256_xor_si256(c2[5524],_mm256_xor_si256(c2[6349],_mm256_xor_si256(c2[3144],_mm256_xor_si256(c2[4941],_mm256_xor_si256(c2[1946],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[6369],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[5567],_mm256_xor_si256(c2[4161],_mm256_xor_si256(c2[1166],_mm256_xor_si256(c2[966],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[3184],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[2781],_mm256_xor_si256(c2[6185],c2[5985])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[370]=simde_mm256_xor_si256(c2[6209],simde_mm256_xor_si256(c2[3004],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[4808],simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[3024],simde_mm256_xor_si256(c2[5822],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[4425],simde_mm256_xor_si256(c2[4225],simde_mm256_xor_si256(c2[6249],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[6244],simde_mm256_xor_si256(c2[6044],simde_mm256_xor_si256(c2[6269],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[5463],simde_mm256_xor_si256(c2[2268],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[3667],simde_mm256_xor_si256(c2[6289],simde_mm256_xor_si256(c2[3084],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[4884],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[4487],simde_mm256_xor_si256(c2[4287],simde_mm256_xor_si256(c2[6309],simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[3504],simde_mm256_xor_si256(c2[509],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[6329],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[4722],simde_mm256_xor_si256(c2[1527],simde_mm256_xor_si256(c2[2320],simde_mm256_xor_si256(c2[5724],simde_mm256_xor_si256(c2[5524],simde_mm256_xor_si256(c2[6349],simde_mm256_xor_si256(c2[3144],simde_mm256_xor_si256(c2[4941],simde_mm256_xor_si256(c2[1946],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[6369],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[5567],simde_mm256_xor_si256(c2[4161],simde_mm256_xor_si256(c2[1166],simde_mm256_xor_si256(c2[966],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[3184],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[2781],simde_mm256_xor_si256(c2[6185],c2[5985])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[380]=_mm256_xor_si256(c2[3800],_mm256_xor_si256(c2[3600],_mm256_xor_si256(c2[5404],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[3820],_mm256_xor_si256(c2[3620],_mm256_xor_si256(c2[3223],_mm256_xor_si256(c2[4821],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[3840],_mm256_xor_si256(c2[3640],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[3860],_mm256_xor_si256(c2[3660],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[4263],_mm256_xor_si256(c2[3680],_mm256_xor_si256(c2[5480],_mm256_xor_si256(c2[4883],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[5902],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[3720],_mm256_xor_si256(c2[2123],_mm256_xor_si256(c2[6120],_mm256_xor_si256(c2[3740],_mm256_xor_si256(c2[2342],_mm256_xor_si256(c2[3760],_mm256_xor_si256(c2[6163],_mm256_xor_si256(c2[1562],_mm256_xor_si256(c2[3980],_mm256_xor_si256(c2[3780],_mm256_xor_si256(c2[1982],c2[182]))))))))))))))))))))))))))))))))));
+     d2[380]=simde_mm256_xor_si256(c2[3800],simde_mm256_xor_si256(c2[3600],simde_mm256_xor_si256(c2[5404],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[3820],simde_mm256_xor_si256(c2[3620],simde_mm256_xor_si256(c2[3223],simde_mm256_xor_si256(c2[4821],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[3840],simde_mm256_xor_si256(c2[3640],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[3860],simde_mm256_xor_si256(c2[3660],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[4263],simde_mm256_xor_si256(c2[3680],simde_mm256_xor_si256(c2[5480],simde_mm256_xor_si256(c2[4883],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[5902],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[3720],simde_mm256_xor_si256(c2[2123],simde_mm256_xor_si256(c2[6120],simde_mm256_xor_si256(c2[3740],simde_mm256_xor_si256(c2[2342],simde_mm256_xor_si256(c2[3760],simde_mm256_xor_si256(c2[6163],simde_mm256_xor_si256(c2[1562],simde_mm256_xor_si256(c2[3980],simde_mm256_xor_si256(c2[3780],simde_mm256_xor_si256(c2[1982],c2[182]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[390]=_mm256_xor_si256(c2[2004],_mm256_xor_si256(c2[1804],_mm256_xor_si256(c2[3808],_mm256_xor_si256(c2[3608],_mm256_xor_si256(c2[5804],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[1824],_mm256_xor_si256(c2[1427],_mm256_xor_si256(c2[3025],_mm256_xor_si256(c2[2044],_mm256_xor_si256(c2[1844],_mm256_xor_si256(c2[4844],_mm256_xor_si256(c2[2064],_mm256_xor_si256(c2[1864],_mm256_xor_si256(c2[1268],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[2467],_mm256_xor_si256(c2[2084],_mm256_xor_si256(c2[1884],_mm256_xor_si256(c2[3684],_mm256_xor_si256(c2[3087],_mm256_xor_si256(c2[2104],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[5508],_mm256_xor_si256(c2[2124],_mm256_xor_si256(c2[1924],_mm256_xor_si256(c2[527],_mm256_xor_si256(c2[327],_mm256_xor_si256(c2[4324],_mm256_xor_si256(c2[2144],_mm256_xor_si256(c2[1944],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[4940],_mm256_xor_si256(c2[2164],_mm256_xor_si256(c2[1964],_mm256_xor_si256(c2[4367],_mm256_xor_si256(c2[6165],_mm256_xor_si256(c2[2184],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[186],c2[4785]))))))))))))))))))))))))))))))))))))))))));
+     d2[390]=simde_mm256_xor_si256(c2[2004],simde_mm256_xor_si256(c2[1804],simde_mm256_xor_si256(c2[3808],simde_mm256_xor_si256(c2[3608],simde_mm256_xor_si256(c2[5804],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[1824],simde_mm256_xor_si256(c2[1427],simde_mm256_xor_si256(c2[3025],simde_mm256_xor_si256(c2[2044],simde_mm256_xor_si256(c2[1844],simde_mm256_xor_si256(c2[4844],simde_mm256_xor_si256(c2[2064],simde_mm256_xor_si256(c2[1864],simde_mm256_xor_si256(c2[1268],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[2467],simde_mm256_xor_si256(c2[2084],simde_mm256_xor_si256(c2[1884],simde_mm256_xor_si256(c2[3684],simde_mm256_xor_si256(c2[3087],simde_mm256_xor_si256(c2[2104],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[5508],simde_mm256_xor_si256(c2[2124],simde_mm256_xor_si256(c2[1924],simde_mm256_xor_si256(c2[527],simde_mm256_xor_si256(c2[327],simde_mm256_xor_si256(c2[4324],simde_mm256_xor_si256(c2[2144],simde_mm256_xor_si256(c2[1944],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[4940],simde_mm256_xor_si256(c2[2164],simde_mm256_xor_si256(c2[1964],simde_mm256_xor_si256(c2[4367],simde_mm256_xor_si256(c2[6165],simde_mm256_xor_si256(c2[2184],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[186],c2[4785]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[400]=_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[2804],_mm256_xor_si256(c2[2405],_mm256_xor_si256(c2[4608],_mm256_xor_si256(c2[4601],_mm256_xor_si256(c2[405],_mm256_xor_si256(c2[621],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[2427],_mm256_xor_si256(c2[1822],_mm256_xor_si256(c2[4225],_mm256_xor_si256(c2[4025],_mm256_xor_si256(c2[641],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[3641],_mm256_xor_si256(c2[6044],_mm256_xor_si256(c2[5844],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[2864],_mm256_xor_si256(c2[6264],_mm256_xor_si256(c2[2068],_mm256_xor_si256(c2[1264],_mm256_xor_si256(c2[3467],_mm256_xor_si256(c2[681],_mm256_xor_si256(c2[2884],_mm256_xor_si256(c2[2481],_mm256_xor_si256(c2[4684],_mm256_xor_si256(c2[1884],_mm256_xor_si256(c2[4287],_mm256_xor_si256(c2[4087],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[4305],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[109],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[2924],_mm256_xor_si256(c2[5523],_mm256_xor_si256(c2[1327],_mm256_xor_si256(c2[3121],_mm256_xor_si256(c2[5524],_mm256_xor_si256(c2[5324],_mm256_xor_si256(c2[741],_mm256_xor_si256(c2[2944],_mm256_xor_si256(c2[5742],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[1546],_mm256_xor_si256(c2[761],_mm256_xor_si256(c2[2964],_mm256_xor_si256(c2[3164],_mm256_xor_si256(c2[5367],_mm256_xor_si256(c2[4962],_mm256_xor_si256(c2[966],_mm256_xor_si256(c2[766],_mm256_xor_si256(c2[781],_mm256_xor_si256(c2[2984],_mm256_xor_si256(c2[5382],_mm256_xor_si256(c2[1186],_mm256_xor_si256(c2[3582],_mm256_xor_si256(c2[5985],c2[5785]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[400]=simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[2804],simde_mm256_xor_si256(c2[2405],simde_mm256_xor_si256(c2[4608],simde_mm256_xor_si256(c2[4601],simde_mm256_xor_si256(c2[405],simde_mm256_xor_si256(c2[621],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[2427],simde_mm256_xor_si256(c2[1822],simde_mm256_xor_si256(c2[4225],simde_mm256_xor_si256(c2[4025],simde_mm256_xor_si256(c2[641],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[3641],simde_mm256_xor_si256(c2[6044],simde_mm256_xor_si256(c2[5844],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[2864],simde_mm256_xor_si256(c2[6264],simde_mm256_xor_si256(c2[2068],simde_mm256_xor_si256(c2[1264],simde_mm256_xor_si256(c2[3467],simde_mm256_xor_si256(c2[681],simde_mm256_xor_si256(c2[2884],simde_mm256_xor_si256(c2[2481],simde_mm256_xor_si256(c2[4684],simde_mm256_xor_si256(c2[1884],simde_mm256_xor_si256(c2[4287],simde_mm256_xor_si256(c2[4087],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[4305],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[109],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[2924],simde_mm256_xor_si256(c2[5523],simde_mm256_xor_si256(c2[1327],simde_mm256_xor_si256(c2[3121],simde_mm256_xor_si256(c2[5524],simde_mm256_xor_si256(c2[5324],simde_mm256_xor_si256(c2[741],simde_mm256_xor_si256(c2[2944],simde_mm256_xor_si256(c2[5742],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[1546],simde_mm256_xor_si256(c2[761],simde_mm256_xor_si256(c2[2964],simde_mm256_xor_si256(c2[3164],simde_mm256_xor_si256(c2[5367],simde_mm256_xor_si256(c2[4962],simde_mm256_xor_si256(c2[966],simde_mm256_xor_si256(c2[766],simde_mm256_xor_si256(c2[781],simde_mm256_xor_si256(c2[2984],simde_mm256_xor_si256(c2[5382],simde_mm256_xor_si256(c2[1186],simde_mm256_xor_si256(c2[3582],simde_mm256_xor_si256(c2[5985],c2[5785]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[410]=_mm256_xor_si256(c2[4601],_mm256_xor_si256(c2[4401],_mm256_xor_si256(c2[6205],_mm256_xor_si256(c2[2002],_mm256_xor_si256(c2[4621],_mm256_xor_si256(c2[4421],_mm256_xor_si256(c2[4024],_mm256_xor_si256(c2[5622],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[4641],_mm256_xor_si256(c2[4441],_mm256_xor_si256(c2[1042],_mm256_xor_si256(c2[4661],_mm256_xor_si256(c2[4461],_mm256_xor_si256(c2[3665],_mm256_xor_si256(c2[5064],_mm256_xor_si256(c2[4481],_mm256_xor_si256(c2[6281],_mm256_xor_si256(c2[5684],_mm256_xor_si256(c2[4501],_mm256_xor_si256(c2[1706],_mm256_xor_si256(c2[500],_mm256_xor_si256(c2[4721],_mm256_xor_si256(c2[4521],_mm256_xor_si256(c2[2924],_mm256_xor_si256(c2[522],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[3143],_mm256_xor_si256(c2[4561],_mm256_xor_si256(c2[565],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[4781],_mm256_xor_si256(c2[4581],_mm256_xor_si256(c2[2783],c2[983]))))))))))))))))))))))))))))))))));
+     d2[410]=simde_mm256_xor_si256(c2[4601],simde_mm256_xor_si256(c2[4401],simde_mm256_xor_si256(c2[6205],simde_mm256_xor_si256(c2[2002],simde_mm256_xor_si256(c2[4621],simde_mm256_xor_si256(c2[4421],simde_mm256_xor_si256(c2[4024],simde_mm256_xor_si256(c2[5622],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[4641],simde_mm256_xor_si256(c2[4441],simde_mm256_xor_si256(c2[1042],simde_mm256_xor_si256(c2[4661],simde_mm256_xor_si256(c2[4461],simde_mm256_xor_si256(c2[3665],simde_mm256_xor_si256(c2[5064],simde_mm256_xor_si256(c2[4481],simde_mm256_xor_si256(c2[6281],simde_mm256_xor_si256(c2[5684],simde_mm256_xor_si256(c2[4501],simde_mm256_xor_si256(c2[1706],simde_mm256_xor_si256(c2[500],simde_mm256_xor_si256(c2[4721],simde_mm256_xor_si256(c2[4521],simde_mm256_xor_si256(c2[2924],simde_mm256_xor_si256(c2[522],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[3143],simde_mm256_xor_si256(c2[4561],simde_mm256_xor_si256(c2[565],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[4781],simde_mm256_xor_si256(c2[4581],simde_mm256_xor_si256(c2[2783],c2[983]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc32_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc32_byte.c
index d28f5501d425ea2a5ac64d71e6cdccdcb66a53c8..6356cfb3919847bca10d2232a5da496f299eda4a 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc32_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc32_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc32_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[342],c2[390]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[342],c2[390]))))))))))))))))))))))))));
 
 //row: 1
-     d2[1]=_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[342],c2[390]))))))))))))))))))))))))))))))));
+     d2[1]=simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[342],c2[390]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[2]=_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[78],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[358],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[342],c2[390]))))))))))))))))))))))))))))))))))))))));
+     d2[2]=simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[78],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[358],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[342],c2[390]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[3]=_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[106],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[18],c2[390]))))))))))))))))))))))))))))))))));
+     d2[3]=simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[106],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[18],c2[390]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[4]=_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[42],c2[90]))))))))))))))))))))))))))))))))));
+     d2[4]=simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[42],c2[90]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[5]=_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[42],c2[90]))))))))))))))))))))))))))))))))))));
+     d2[5]=simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[42],c2[90]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[6]=_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[212],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[366],c2[354]))))))))))))))))))))))))))))))))))));
+     d2[6]=simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[212],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[366],c2[354]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[7]=_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[212],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[130],_mm256_xor_si256(c2[334],_mm256_xor_si256(c2[238],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[42],c2[246]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[7]=simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[212],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[130],simde_mm256_xor_si256(c2[334],simde_mm256_xor_si256(c2[238],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[42],c2[246]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[8]=_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[166],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[30],c2[78]))))))))))))))))))))))))))))))))))))))))));
+     d2[8]=simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[166],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[30],c2[78]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[9]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[238],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[370],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[354],c2[282])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[9]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[238],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[370],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[354],c2[282])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[10]=_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[12],c2[266])));
+     d2[10]=simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[12],c2[266])));
 
 //row: 11
-     d2[11]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[250],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[294],c2[234])))))))))))))))))))))))))))))))))))));
+     d2[11]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[250],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[294],c2[234])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[12]=_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[130],_mm256_xor_si256(c2[262],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[126],c2[174]))))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[130],simde_mm256_xor_si256(c2[262],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[126],c2[174]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[13]=_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[212],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[82],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[378],c2[366])))))))))))))))))))))))))))))))))))));
+     d2[13]=simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[212],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[82],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[378],c2[366])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[14]=_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[130],_mm256_xor_si256(c2[190],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[42],c2[102])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[14]=simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[130],simde_mm256_xor_si256(c2[190],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[42],c2[102])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[15]=_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[212],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[152],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[142],_mm256_xor_si256(c2[214],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[186],c2[258]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[15]=simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[212],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[152],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[142],simde_mm256_xor_si256(c2[214],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[186],c2[258]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[16]=_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[176],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[190],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[190],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[222],c2[186])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[16]=simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[176],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[190],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[190],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[222],c2[186])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[17]=_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[78],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[176],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[358],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[222],c2[390])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[17]=simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[78],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[176],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[358],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[222],c2[390])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[18]=_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[36],c2[230]));
+     d2[18]=simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[36],c2[230]));
 
 //row: 19
-     d2[19]=_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[366],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[152],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[262],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[78],_mm256_xor_si256(c2[258],c2[306]))))))))))))))))))))))))))));
+     d2[19]=simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[366],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[152],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[262],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[78],simde_mm256_xor_si256(c2[258],c2[306]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[20]=_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[58],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[306],c2[354]))))))))))))))))))))))))))))))))));
+     d2[20]=simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[58],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[306],c2[354]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[21]=_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[176],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[178],_mm256_xor_si256(c2[322],_mm256_xor_si256(c2[310],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[234],c2[222]))))))))))))))))))))))))))))))))))));
+     d2[21]=simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[176],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[178],simde_mm256_xor_si256(c2[322],simde_mm256_xor_si256(c2[310],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[234],c2[222]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[22]=_mm256_xor_si256(c2[362],c2[376]);
+     d2[22]=simde_mm256_xor_si256(c2[362],c2[376]);
 
 //row: 23
-     d2[23]=_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[138],c2[178]));
+     d2[23]=simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[138],c2[178]));
 
 //row: 24
-     d2[24]=_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[136],c2[114]));
+     d2[24]=simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[136],c2[114]));
 
 //row: 25
-     d2[25]=_mm256_xor_si256(c2[48],c2[346]);
+     d2[25]=simde_mm256_xor_si256(c2[48],c2[346]);
 
 //row: 26
-     d2[26]=_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[304],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[78],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[164],_mm256_xor_si256(c2[296],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[358],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[22],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[390],c2[318])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[26]=simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[304],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[78],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[164],simde_mm256_xor_si256(c2[296],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[358],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[22],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[390],c2[318])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[27]=_mm256_xor_si256(c2[96],c2[288]);
+     d2[27]=simde_mm256_xor_si256(c2[96],c2[288]);
 
 //row: 28
-     d2[28]=_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[64],c2[94]));
+     d2[28]=simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[64],c2[94]));
 
 //row: 29
-     d2[29]=_mm256_xor_si256(c2[216],c2[344]);
+     d2[29]=simde_mm256_xor_si256(c2[216],c2[344]);
 
 //row: 30
-     d2[30]=_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[202],_mm256_xor_si256(c2[122],c2[258])));
+     d2[30]=simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[202],simde_mm256_xor_si256(c2[122],c2[258])));
 
 //row: 31
-     d2[31]=_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[78],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[358],_mm256_xor_si256(c2[118],_mm256_xor_si256(c2[106],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[30],c2[18])))))))))))))))))))))))))))))))))));
+     d2[31]=simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[78],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[358],simde_mm256_xor_si256(c2[118],simde_mm256_xor_si256(c2[106],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[30],c2[18])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[32]=_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[46],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[166],_mm256_xor_si256(c2[154],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[38],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[30],c2[78]))))))))))))))))))))))))))))))))))))))))));
+     d2[32]=simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[46],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[166],simde_mm256_xor_si256(c2[154],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[38],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[30],c2[78]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[33]=_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[90],c2[138]))))))))))))))))))))))))))));
+     d2[33]=simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[90],c2[138]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[34]=_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[212],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[214],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[154],_mm256_xor_si256(c2[346],_mm256_xor_si256(c2[142],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[258],c2[54]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[34]=simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[212],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[214],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[154],simde_mm256_xor_si256(c2[346],simde_mm256_xor_si256(c2[142],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[258],c2[54]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[35]=_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[334],_mm256_xor_si256(c2[82],_mm256_xor_si256(c2[106],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[330],c2[378]))))))))))))))))))))))))))))))))));
+     d2[35]=simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[334],simde_mm256_xor_si256(c2[82],simde_mm256_xor_si256(c2[106],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[330],c2[378]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[36]=_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[76],c2[326]));
+     d2[36]=simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[76],c2[326]));
 
 //row: 37
-     d2[37]=_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[78],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[176],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[296],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[286],_mm256_xor_si256(c2[238],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[370],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[206],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[330],c2[282])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[37]=simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[78],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[176],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[296],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[286],simde_mm256_xor_si256(c2[238],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[370],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[206],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[330],c2[282])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[38]=_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[166],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[30],c2[78]))))))))))))))))))))))))))))))))));
+     d2[38]=simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[166],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[30],c2[78]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[39]=_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[162],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[370],_mm256_xor_si256(c2[118],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[366],c2[30]))))))))))))))))))))))))))))))))))))))))));
+     d2[39]=simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[162],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[370],simde_mm256_xor_si256(c2[118],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[366],c2[30]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[40]=_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[354],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[250],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[382],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[182],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[246],_mm256_xor_si256(c2[306],_mm256_xor_si256(c2[138],c2[294]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[40]=simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[354],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[250],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[382],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[182],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[246],simde_mm256_xor_si256(c2[306],simde_mm256_xor_si256(c2[138],c2[294]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[41]=_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[134],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[350],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[16],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[358],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[146],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[222],c2[270]))))))))))))))))))))))))))))))))));
+     d2[41]=simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[134],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[350],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[16],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[358],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[146],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[222],c2[270]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc352_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc352_byte.c
index df6d7e428a7413919ccbde3467f37ecec387970e..7141d91aa9566b9b0ec75cf2597030db11633b30 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc352_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc352_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc352_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[223],_mm256_xor_si256(c2[3106],_mm256_xor_si256(c2[1567],_mm256_xor_si256(c2[4423],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[2909],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[2269],_mm256_xor_si256(c2[6689],_mm256_xor_si256(c2[3830],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[6713],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[3652],_mm256_xor_si256(c2[1035],_mm256_xor_si256(c2[5436],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[5921],_mm256_xor_si256(c2[1082],c2[1961]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[223],simde_mm256_xor_si256(c2[3106],simde_mm256_xor_si256(c2[1567],simde_mm256_xor_si256(c2[4423],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[2909],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[2269],simde_mm256_xor_si256(c2[6689],simde_mm256_xor_si256(c2[3830],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[6713],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[3652],simde_mm256_xor_si256(c2[1035],simde_mm256_xor_si256(c2[5436],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[5921],simde_mm256_xor_si256(c2[1082],c2[1961]))))))))))))))))))))))))));
 
 //row: 1
-     d2[11]=_mm256_xor_si256(c2[6164],_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[223],_mm256_xor_si256(c2[3326],_mm256_xor_si256(c2[3106],_mm256_xor_si256(c2[1567],_mm256_xor_si256(c2[4423],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[2909],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[2269],_mm256_xor_si256(c2[6689],_mm256_xor_si256(c2[3830],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[6713],_mm256_xor_si256(c2[1893],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[3652],_mm256_xor_si256(c2[1035],_mm256_xor_si256(c2[5436],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[6141],_mm256_xor_si256(c2[5921],_mm256_xor_si256(c2[1082],c2[1961]))))))))))))))))))))))))))))))));
+     d2[11]=simde_mm256_xor_si256(c2[6164],simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[223],simde_mm256_xor_si256(c2[3326],simde_mm256_xor_si256(c2[3106],simde_mm256_xor_si256(c2[1567],simde_mm256_xor_si256(c2[4423],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[2909],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[2269],simde_mm256_xor_si256(c2[6689],simde_mm256_xor_si256(c2[3830],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[6713],simde_mm256_xor_si256(c2[1893],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[3652],simde_mm256_xor_si256(c2[1035],simde_mm256_xor_si256(c2[5436],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[6141],simde_mm256_xor_si256(c2[5921],simde_mm256_xor_si256(c2[1082],c2[1961]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[22]=_mm256_xor_si256(c2[6164],_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[3740],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[223],_mm256_xor_si256(c2[3326],_mm256_xor_si256(c2[3106],_mm256_xor_si256(c2[1567],_mm256_xor_si256(c2[4423],_mm256_xor_si256(c2[3124],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[2909],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[2269],_mm256_xor_si256(c2[6909],_mm256_xor_si256(c2[6689],_mm256_xor_si256(c2[3830],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[330],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[6713],_mm256_xor_si256(c2[1893],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[5193],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[3652],_mm256_xor_si256(c2[1255],_mm256_xor_si256(c2[1035],_mm256_xor_si256(c2[5436],_mm256_xor_si256(c2[2601],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[6141],_mm256_xor_si256(c2[5921],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[1082],c2[1961]))))))))))))))))))))))))))))))))))))))));
+     d2[22]=simde_mm256_xor_si256(c2[6164],simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[3740],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[223],simde_mm256_xor_si256(c2[3326],simde_mm256_xor_si256(c2[3106],simde_mm256_xor_si256(c2[1567],simde_mm256_xor_si256(c2[4423],simde_mm256_xor_si256(c2[3124],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[2909],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[2269],simde_mm256_xor_si256(c2[6909],simde_mm256_xor_si256(c2[6689],simde_mm256_xor_si256(c2[3830],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[330],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[6713],simde_mm256_xor_si256(c2[1893],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[5193],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[3652],simde_mm256_xor_si256(c2[1255],simde_mm256_xor_si256(c2[1035],simde_mm256_xor_si256(c2[5436],simde_mm256_xor_si256(c2[2601],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[6141],simde_mm256_xor_si256(c2[5921],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[1082],c2[1961]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[33]=_mm256_xor_si256(c2[5944],_mm256_xor_si256(c2[3520],_mm256_xor_si256(c2[223],_mm256_xor_si256(c2[3106],_mm256_xor_si256(c2[1567],_mm256_xor_si256(c2[4643],_mm256_xor_si256(c2[4423],_mm256_xor_si256(c2[2904],_mm256_xor_si256(c2[3129],_mm256_xor_si256(c2[2909],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[2269],_mm256_xor_si256(c2[6689],_mm256_xor_si256(c2[3830],_mm256_xor_si256(c2[3173],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[6933],_mm256_xor_si256(c2[6713],_mm256_xor_si256(c2[1673],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[3872],_mm256_xor_si256(c2[3652],_mm256_xor_si256(c2[1035],_mm256_xor_si256(c2[5656],_mm256_xor_si256(c2[5436],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[3040],_mm256_xor_si256(c2[3916],_mm256_xor_si256(c2[3696],_mm256_xor_si256(c2[5921],_mm256_xor_si256(c2[1082],_mm256_xor_si256(c2[2181],c2[1961]))))))))))))))))))))))))))))))))));
+     d2[33]=simde_mm256_xor_si256(c2[5944],simde_mm256_xor_si256(c2[3520],simde_mm256_xor_si256(c2[223],simde_mm256_xor_si256(c2[3106],simde_mm256_xor_si256(c2[1567],simde_mm256_xor_si256(c2[4643],simde_mm256_xor_si256(c2[4423],simde_mm256_xor_si256(c2[2904],simde_mm256_xor_si256(c2[3129],simde_mm256_xor_si256(c2[2909],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[2269],simde_mm256_xor_si256(c2[6689],simde_mm256_xor_si256(c2[3830],simde_mm256_xor_si256(c2[3173],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[6933],simde_mm256_xor_si256(c2[6713],simde_mm256_xor_si256(c2[1673],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[3872],simde_mm256_xor_si256(c2[3652],simde_mm256_xor_si256(c2[1035],simde_mm256_xor_si256(c2[5656],simde_mm256_xor_si256(c2[5436],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[3040],simde_mm256_xor_si256(c2[3916],simde_mm256_xor_si256(c2[3696],simde_mm256_xor_si256(c2[5921],simde_mm256_xor_si256(c2[1082],simde_mm256_xor_si256(c2[2181],c2[1961]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[44]=_mm256_xor_si256(c2[3307],_mm256_xor_si256(c2[3087],_mm256_xor_si256(c2[663],_mm256_xor_si256(c2[4405],_mm256_xor_si256(c2[4842],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[5749],_mm256_xor_si256(c2[1566],_mm256_xor_si256(c2[684],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[47],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[4908],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[4250],_mm256_xor_si256(c2[6451],_mm256_xor_si256(c2[3832],_mm256_xor_si256(c2[973],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[4292],_mm256_xor_si256(c2[3856],_mm256_xor_si256(c2[6075],_mm256_xor_si256(c2[5855],_mm256_xor_si256(c2[2116],_mm256_xor_si256(c2[795],_mm256_xor_si256(c2[5217],_mm256_xor_si256(c2[2579],_mm256_xor_si256(c2[6563],_mm256_xor_si256(c2[183],_mm256_xor_si256(c2[839],_mm256_xor_si256(c2[3284],_mm256_xor_si256(c2[3064],_mm256_xor_si256(c2[5264],c2[6143]))))))))))))))))))))))))))))))))));
+     d2[44]=simde_mm256_xor_si256(c2[3307],simde_mm256_xor_si256(c2[3087],simde_mm256_xor_si256(c2[663],simde_mm256_xor_si256(c2[4405],simde_mm256_xor_si256(c2[4842],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[5749],simde_mm256_xor_si256(c2[1566],simde_mm256_xor_si256(c2[684],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[47],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[4908],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[4250],simde_mm256_xor_si256(c2[6451],simde_mm256_xor_si256(c2[3832],simde_mm256_xor_si256(c2[973],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[4292],simde_mm256_xor_si256(c2[3856],simde_mm256_xor_si256(c2[6075],simde_mm256_xor_si256(c2[5855],simde_mm256_xor_si256(c2[2116],simde_mm256_xor_si256(c2[795],simde_mm256_xor_si256(c2[5217],simde_mm256_xor_si256(c2[2579],simde_mm256_xor_si256(c2[6563],simde_mm256_xor_si256(c2[183],simde_mm256_xor_si256(c2[839],simde_mm256_xor_si256(c2[3284],simde_mm256_xor_si256(c2[3064],simde_mm256_xor_si256(c2[5264],c2[6143]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[55]=_mm256_xor_si256(c2[5286],_mm256_xor_si256(c2[5066],_mm256_xor_si256(c2[2642],_mm256_xor_si256(c2[6384],_mm256_xor_si256(c2[3302],_mm256_xor_si256(c2[2448],_mm256_xor_si256(c2[2228],_mm256_xor_si256(c2[689],_mm256_xor_si256(c2[3545],_mm256_xor_si256(c2[4424],_mm256_xor_si256(c2[2246],_mm256_xor_si256(c2[2026],_mm256_xor_si256(c2[2031],_mm256_xor_si256(c2[6887],_mm256_xor_si256(c2[6667],_mm256_xor_si256(c2[6229],_mm256_xor_si256(c2[1391],_mm256_xor_si256(c2[5811],_mm256_xor_si256(c2[2952],_mm256_xor_si256(c2[2075],_mm256_xor_si256(c2[6271],_mm256_xor_si256(c2[5835],_mm256_xor_si256(c2[771],_mm256_xor_si256(c2[1015],_mm256_xor_si256(c2[795],_mm256_xor_si256(c2[4095],_mm256_xor_si256(c2[2774],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[4558],_mm256_xor_si256(c2[1697],_mm256_xor_si256(c2[1503],_mm256_xor_si256(c2[2162],_mm256_xor_si256(c2[2818],_mm256_xor_si256(c2[5263],_mm256_xor_si256(c2[5043],_mm256_xor_si256(c2[204],c2[1083]))))))))))))))))))))))))))))))))))));
+     d2[55]=simde_mm256_xor_si256(c2[5286],simde_mm256_xor_si256(c2[5066],simde_mm256_xor_si256(c2[2642],simde_mm256_xor_si256(c2[6384],simde_mm256_xor_si256(c2[3302],simde_mm256_xor_si256(c2[2448],simde_mm256_xor_si256(c2[2228],simde_mm256_xor_si256(c2[689],simde_mm256_xor_si256(c2[3545],simde_mm256_xor_si256(c2[4424],simde_mm256_xor_si256(c2[2246],simde_mm256_xor_si256(c2[2026],simde_mm256_xor_si256(c2[2031],simde_mm256_xor_si256(c2[6887],simde_mm256_xor_si256(c2[6667],simde_mm256_xor_si256(c2[6229],simde_mm256_xor_si256(c2[1391],simde_mm256_xor_si256(c2[5811],simde_mm256_xor_si256(c2[2952],simde_mm256_xor_si256(c2[2075],simde_mm256_xor_si256(c2[6271],simde_mm256_xor_si256(c2[5835],simde_mm256_xor_si256(c2[771],simde_mm256_xor_si256(c2[1015],simde_mm256_xor_si256(c2[795],simde_mm256_xor_si256(c2[4095],simde_mm256_xor_si256(c2[2774],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[4558],simde_mm256_xor_si256(c2[1697],simde_mm256_xor_si256(c2[1503],simde_mm256_xor_si256(c2[2162],simde_mm256_xor_si256(c2[2818],simde_mm256_xor_si256(c2[5263],simde_mm256_xor_si256(c2[5043],simde_mm256_xor_si256(c2[204],c2[1083]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[66]=_mm256_xor_si256(c2[5505],_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[6603],_mm256_xor_si256(c2[3301],_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[2447],_mm256_xor_si256(c2[908],_mm256_xor_si256(c2[3764],_mm256_xor_si256(c2[2465],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[2250],_mm256_xor_si256(c2[67],_mm256_xor_si256(c2[6886],_mm256_xor_si256(c2[6448],_mm256_xor_si256(c2[1610],_mm256_xor_si256(c2[6030],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[2294],_mm256_xor_si256(c2[6490],_mm256_xor_si256(c2[6054],_mm256_xor_si256(c2[5834],_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[1014],_mm256_xor_si256(c2[4314],_mm256_xor_si256(c2[2993],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[4777],_mm256_xor_si256(c2[2354],_mm256_xor_si256(c2[1722],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[3037],_mm256_xor_si256(c2[5482],_mm256_xor_si256(c2[5262],_mm256_xor_si256(c2[423],_mm256_xor_si256(c2[1302],c2[6142]))))))))))))))))))))))))))))))))))));
+     d2[66]=simde_mm256_xor_si256(c2[5505],simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[6603],simde_mm256_xor_si256(c2[3301],simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[2447],simde_mm256_xor_si256(c2[908],simde_mm256_xor_si256(c2[3764],simde_mm256_xor_si256(c2[2465],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[2250],simde_mm256_xor_si256(c2[67],simde_mm256_xor_si256(c2[6886],simde_mm256_xor_si256(c2[6448],simde_mm256_xor_si256(c2[1610],simde_mm256_xor_si256(c2[6030],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[2294],simde_mm256_xor_si256(c2[6490],simde_mm256_xor_si256(c2[6054],simde_mm256_xor_si256(c2[5834],simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[1014],simde_mm256_xor_si256(c2[4314],simde_mm256_xor_si256(c2[2993],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[4777],simde_mm256_xor_si256(c2[2354],simde_mm256_xor_si256(c2[1722],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[3037],simde_mm256_xor_si256(c2[5482],simde_mm256_xor_si256(c2[5262],simde_mm256_xor_si256(c2[423],simde_mm256_xor_si256(c2[1302],c2[6142]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[77]=_mm256_xor_si256(c2[2426],_mm256_xor_si256(c2[2206],_mm256_xor_si256(c2[3966],_mm256_xor_si256(c2[6821],_mm256_xor_si256(c2[1542],_mm256_xor_si256(c2[3524],_mm256_xor_si256(c2[5284],_mm256_xor_si256(c2[6627],_mm256_xor_si256(c2[6407],_mm256_xor_si256(c2[1128],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[6628],_mm256_xor_si256(c2[685],_mm256_xor_si256(c2[2665],_mm256_xor_si256(c2[2445],_mm256_xor_si256(c2[3543],_mm256_xor_si256(c2[6425],_mm256_xor_si256(c2[6205],_mm256_xor_si256(c2[926],_mm256_xor_si256(c2[6210],_mm256_xor_si256(c2[1151],_mm256_xor_si256(c2[931],_mm256_xor_si256(c2[4027],_mm256_xor_si256(c2[3807],_mm256_xor_si256(c2[5567],_mm256_xor_si256(c2[3369],_mm256_xor_si256(c2[5129],_mm256_xor_si256(c2[5570],_mm256_xor_si256(c2[291],_mm256_xor_si256(c2[2951],_mm256_xor_si256(c2[4711],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[1852],_mm256_xor_si256(c2[6254],_mm256_xor_si256(c2[1195],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[3411],_mm256_xor_si256(c2[5171],_mm256_xor_si256(c2[2975],_mm256_xor_si256(c2[4955],_mm256_xor_si256(c2[4735],_mm256_xor_si256(c2[6493],_mm256_xor_si256(c2[5194],_mm256_xor_si256(c2[4974],_mm256_xor_si256(c2[6734],_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[2995],_mm256_xor_si256(c2[6953],_mm256_xor_si256(c2[1894],_mm256_xor_si256(c2[1674],_mm256_xor_si256(c2[4336],_mm256_xor_si256(c2[6096],_mm256_xor_si256(c2[1698],_mm256_xor_si256(c2[3678],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[5434],_mm256_xor_si256(c2[5682],_mm256_xor_si256(c2[403],_mm256_xor_si256(c2[6341],_mm256_xor_si256(c2[1062],_mm256_xor_si256(c2[6997],_mm256_xor_si256(c2[1938],_mm256_xor_si256(c2[1718],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[2183],_mm256_xor_si256(c2[3943],_mm256_xor_si256(c2[4383],_mm256_xor_si256(c2[6143],_mm256_xor_si256(c2[5262],_mm256_xor_si256(c2[203],c2[7022]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[77]=simde_mm256_xor_si256(c2[2426],simde_mm256_xor_si256(c2[2206],simde_mm256_xor_si256(c2[3966],simde_mm256_xor_si256(c2[6821],simde_mm256_xor_si256(c2[1542],simde_mm256_xor_si256(c2[3524],simde_mm256_xor_si256(c2[5284],simde_mm256_xor_si256(c2[6627],simde_mm256_xor_si256(c2[6407],simde_mm256_xor_si256(c2[1128],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[6628],simde_mm256_xor_si256(c2[685],simde_mm256_xor_si256(c2[2665],simde_mm256_xor_si256(c2[2445],simde_mm256_xor_si256(c2[3543],simde_mm256_xor_si256(c2[6425],simde_mm256_xor_si256(c2[6205],simde_mm256_xor_si256(c2[926],simde_mm256_xor_si256(c2[6210],simde_mm256_xor_si256(c2[1151],simde_mm256_xor_si256(c2[931],simde_mm256_xor_si256(c2[4027],simde_mm256_xor_si256(c2[3807],simde_mm256_xor_si256(c2[5567],simde_mm256_xor_si256(c2[3369],simde_mm256_xor_si256(c2[5129],simde_mm256_xor_si256(c2[5570],simde_mm256_xor_si256(c2[291],simde_mm256_xor_si256(c2[2951],simde_mm256_xor_si256(c2[4711],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[1852],simde_mm256_xor_si256(c2[6254],simde_mm256_xor_si256(c2[1195],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[3411],simde_mm256_xor_si256(c2[5171],simde_mm256_xor_si256(c2[2975],simde_mm256_xor_si256(c2[4955],simde_mm256_xor_si256(c2[4735],simde_mm256_xor_si256(c2[6493],simde_mm256_xor_si256(c2[5194],simde_mm256_xor_si256(c2[4974],simde_mm256_xor_si256(c2[6734],simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[2995],simde_mm256_xor_si256(c2[6953],simde_mm256_xor_si256(c2[1894],simde_mm256_xor_si256(c2[1674],simde_mm256_xor_si256(c2[4336],simde_mm256_xor_si256(c2[6096],simde_mm256_xor_si256(c2[1698],simde_mm256_xor_si256(c2[3678],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[5434],simde_mm256_xor_si256(c2[5682],simde_mm256_xor_si256(c2[403],simde_mm256_xor_si256(c2[6341],simde_mm256_xor_si256(c2[1062],simde_mm256_xor_si256(c2[6997],simde_mm256_xor_si256(c2[1938],simde_mm256_xor_si256(c2[1718],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[2183],simde_mm256_xor_si256(c2[3943],simde_mm256_xor_si256(c2[4383],simde_mm256_xor_si256(c2[6143],simde_mm256_xor_si256(c2[5262],simde_mm256_xor_si256(c2[203],c2[7022]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[88]=_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[6169],_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[4621],_mm256_xor_si256(c2[3551],_mm256_xor_si256(c2[3331],_mm256_xor_si256(c2[1792],_mm256_xor_si256(c2[4648],_mm256_xor_si256(c2[6842],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[3129],_mm256_xor_si256(c2[3134],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[731],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[2494],_mm256_xor_si256(c2[95],_mm256_xor_si256(c2[6914],_mm256_xor_si256(c2[4055],_mm256_xor_si256(c2[3178],_mm256_xor_si256(c2[555],_mm256_xor_si256(c2[335],_mm256_xor_si256(c2[6938],_mm256_xor_si256(c2[2118],_mm256_xor_si256(c2[1898],_mm256_xor_si256(c2[5418],_mm256_xor_si256(c2[5198],_mm256_xor_si256(c2[3877],_mm256_xor_si256(c2[1480],_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[5661],_mm256_xor_si256(c2[2826],_mm256_xor_si256(c2[2606],_mm256_xor_si256(c2[3265],_mm256_xor_si256(c2[3921],_mm256_xor_si256(c2[6366],_mm256_xor_si256(c2[6146],_mm256_xor_si256(c2[1527],_mm256_xor_si256(c2[1307],c2[2186]))))))))))))))))))))))))))))))))))))))))));
+     d2[88]=simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[6169],simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[4621],simde_mm256_xor_si256(c2[3551],simde_mm256_xor_si256(c2[3331],simde_mm256_xor_si256(c2[1792],simde_mm256_xor_si256(c2[4648],simde_mm256_xor_si256(c2[6842],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[3129],simde_mm256_xor_si256(c2[3134],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[731],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[2494],simde_mm256_xor_si256(c2[95],simde_mm256_xor_si256(c2[6914],simde_mm256_xor_si256(c2[4055],simde_mm256_xor_si256(c2[3178],simde_mm256_xor_si256(c2[555],simde_mm256_xor_si256(c2[335],simde_mm256_xor_si256(c2[6938],simde_mm256_xor_si256(c2[2118],simde_mm256_xor_si256(c2[1898],simde_mm256_xor_si256(c2[5418],simde_mm256_xor_si256(c2[5198],simde_mm256_xor_si256(c2[3877],simde_mm256_xor_si256(c2[1480],simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[5661],simde_mm256_xor_si256(c2[2826],simde_mm256_xor_si256(c2[2606],simde_mm256_xor_si256(c2[3265],simde_mm256_xor_si256(c2[3921],simde_mm256_xor_si256(c2[6366],simde_mm256_xor_si256(c2[6146],simde_mm256_xor_si256(c2[1527],simde_mm256_xor_si256(c2[1307],c2[2186]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[99]=_mm256_xor_si256(c2[6607],_mm256_xor_si256(c2[6166],_mm256_xor_si256(c2[5946],_mm256_xor_si256(c2[4183],_mm256_xor_si256(c2[3522],_mm256_xor_si256(c2[886],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[3769],_mm256_xor_si256(c2[3328],_mm256_xor_si256(c2[3108],_mm256_xor_si256(c2[2230],_mm256_xor_si256(c2[1569],_mm256_xor_si256(c2[5086],_mm256_xor_si256(c2[4425],_mm256_xor_si256(c2[1785],_mm256_xor_si256(c2[3567],_mm256_xor_si256(c2[3126],_mm256_xor_si256(c2[2906],_mm256_xor_si256(c2[3572],_mm256_xor_si256(c2[2911],_mm256_xor_si256(c2[1169],_mm256_xor_si256(c2[728],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[731],_mm256_xor_si256(c2[70],_mm256_xor_si256(c2[2932],_mm256_xor_si256(c2[2271],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[6691],_mm256_xor_si256(c2[4493],_mm256_xor_si256(c2[3832],_mm256_xor_si256(c2[3616],_mm256_xor_si256(c2[2955],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[6715],_mm256_xor_si256(c2[2336],_mm256_xor_si256(c2[1895],_mm256_xor_si256(c2[1675],_mm256_xor_si256(c2[5636],_mm256_xor_si256(c2[4975],_mm256_xor_si256(c2[4315],_mm256_xor_si256(c2[3654],_mm256_xor_si256(c2[1698],_mm256_xor_si256(c2[1037],_mm256_xor_si256(c2[6099],_mm256_xor_si256(c2[5438],_mm256_xor_si256(c2[3044],_mm256_xor_si256(c2[2383],_mm256_xor_si256(c2[3703],_mm256_xor_si256(c2[3042],_mm256_xor_si256(c2[4359],_mm256_xor_si256(c2[3698],_mm256_xor_si256(c2[3260],_mm256_xor_si256(c2[6584],_mm256_xor_si256(c2[6143],_mm256_xor_si256(c2[5923],_mm256_xor_si256(c2[1745],_mm256_xor_si256(c2[1084],_mm256_xor_si256(c2[2624],c2[1963])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[99]=simde_mm256_xor_si256(c2[6607],simde_mm256_xor_si256(c2[6166],simde_mm256_xor_si256(c2[5946],simde_mm256_xor_si256(c2[4183],simde_mm256_xor_si256(c2[3522],simde_mm256_xor_si256(c2[886],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[3769],simde_mm256_xor_si256(c2[3328],simde_mm256_xor_si256(c2[3108],simde_mm256_xor_si256(c2[2230],simde_mm256_xor_si256(c2[1569],simde_mm256_xor_si256(c2[5086],simde_mm256_xor_si256(c2[4425],simde_mm256_xor_si256(c2[1785],simde_mm256_xor_si256(c2[3567],simde_mm256_xor_si256(c2[3126],simde_mm256_xor_si256(c2[2906],simde_mm256_xor_si256(c2[3572],simde_mm256_xor_si256(c2[2911],simde_mm256_xor_si256(c2[1169],simde_mm256_xor_si256(c2[728],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[731],simde_mm256_xor_si256(c2[70],simde_mm256_xor_si256(c2[2932],simde_mm256_xor_si256(c2[2271],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[6691],simde_mm256_xor_si256(c2[4493],simde_mm256_xor_si256(c2[3832],simde_mm256_xor_si256(c2[3616],simde_mm256_xor_si256(c2[2955],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[6715],simde_mm256_xor_si256(c2[2336],simde_mm256_xor_si256(c2[1895],simde_mm256_xor_si256(c2[1675],simde_mm256_xor_si256(c2[5636],simde_mm256_xor_si256(c2[4975],simde_mm256_xor_si256(c2[4315],simde_mm256_xor_si256(c2[3654],simde_mm256_xor_si256(c2[1698],simde_mm256_xor_si256(c2[1037],simde_mm256_xor_si256(c2[6099],simde_mm256_xor_si256(c2[5438],simde_mm256_xor_si256(c2[3044],simde_mm256_xor_si256(c2[2383],simde_mm256_xor_si256(c2[3703],simde_mm256_xor_si256(c2[3042],simde_mm256_xor_si256(c2[4359],simde_mm256_xor_si256(c2[3698],simde_mm256_xor_si256(c2[3260],simde_mm256_xor_si256(c2[6584],simde_mm256_xor_si256(c2[6143],simde_mm256_xor_si256(c2[5923],simde_mm256_xor_si256(c2[1745],simde_mm256_xor_si256(c2[1084],simde_mm256_xor_si256(c2[2624],c2[1963])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[110]=_mm256_xor_si256(c2[3303],_mm256_xor_si256(c2[5522],_mm256_xor_si256(c2[3217],c2[5214])));
+     d2[110]=simde_mm256_xor_si256(c2[3303],simde_mm256_xor_si256(c2[5522],simde_mm256_xor_si256(c2[3217],c2[5214])));
 
 //row: 11
-     d2[121]=_mm256_xor_si256(c2[1769],_mm256_xor_si256(c2[6384],_mm256_xor_si256(c2[3087],_mm256_xor_si256(c2[5942],_mm256_xor_si256(c2[5970],_mm256_xor_si256(c2[4431],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[5768],_mm256_xor_si256(c2[5993],_mm256_xor_si256(c2[5773],_mm256_xor_si256(c2[3370],_mm256_xor_si256(c2[2932],_mm256_xor_si256(c2[5133],_mm256_xor_si256(c2[2514],_mm256_xor_si256(c2[6694],_mm256_xor_si256(c2[6037],_mm256_xor_si256(c2[5817],_mm256_xor_si256(c2[2974],_mm256_xor_si256(c2[2758],_mm256_xor_si256(c2[2538],_mm256_xor_si256(c2[4537],_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[6736],_mm256_xor_si256(c2[6516],_mm256_xor_si256(c2[3899],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[1261],_mm256_xor_si256(c2[3459],_mm256_xor_si256(c2[5245],_mm256_xor_si256(c2[5904],_mm256_xor_si256(c2[6780],_mm256_xor_si256(c2[6560],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[3946],_mm256_xor_si256(c2[5045],_mm256_xor_si256(c2[4825],c2[5478])))))))))))))))))))))))))))))))))))));
+     d2[121]=simde_mm256_xor_si256(c2[1769],simde_mm256_xor_si256(c2[6384],simde_mm256_xor_si256(c2[3087],simde_mm256_xor_si256(c2[5942],simde_mm256_xor_si256(c2[5970],simde_mm256_xor_si256(c2[4431],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[5768],simde_mm256_xor_si256(c2[5993],simde_mm256_xor_si256(c2[5773],simde_mm256_xor_si256(c2[3370],simde_mm256_xor_si256(c2[2932],simde_mm256_xor_si256(c2[5133],simde_mm256_xor_si256(c2[2514],simde_mm256_xor_si256(c2[6694],simde_mm256_xor_si256(c2[6037],simde_mm256_xor_si256(c2[5817],simde_mm256_xor_si256(c2[2974],simde_mm256_xor_si256(c2[2758],simde_mm256_xor_si256(c2[2538],simde_mm256_xor_si256(c2[4537],simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[6736],simde_mm256_xor_si256(c2[6516],simde_mm256_xor_si256(c2[3899],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[1261],simde_mm256_xor_si256(c2[3459],simde_mm256_xor_si256(c2[5245],simde_mm256_xor_si256(c2[5904],simde_mm256_xor_si256(c2[6780],simde_mm256_xor_si256(c2[6560],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[3946],simde_mm256_xor_si256(c2[5045],simde_mm256_xor_si256(c2[4825],c2[5478])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[132]=_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[6824],_mm256_xor_si256(c2[4400],_mm256_xor_si256(c2[1103],_mm256_xor_si256(c2[4206],_mm256_xor_si256(c2[3986],_mm256_xor_si256(c2[2447],_mm256_xor_si256(c2[5303],_mm256_xor_si256(c2[5745],_mm256_xor_si256(c2[4004],_mm256_xor_si256(c2[3784],_mm256_xor_si256(c2[3789],_mm256_xor_si256(c2[1606],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[948],_mm256_xor_si256(c2[3149],_mm256_xor_si256(c2[2486],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[4710],_mm256_xor_si256(c2[3833],_mm256_xor_si256(c2[990],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[2773],_mm256_xor_si256(c2[2553],_mm256_xor_si256(c2[5853],_mm256_xor_si256(c2[4532],_mm256_xor_si256(c2[1915],_mm256_xor_si256(c2[6316],_mm256_xor_si256(c2[3261],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[4576],_mm256_xor_si256(c2[7021],_mm256_xor_si256(c2[6801],_mm256_xor_si256(c2[1962],c2[2841]))))))))))))))))))))))))))))))))));
+     d2[132]=simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[6824],simde_mm256_xor_si256(c2[4400],simde_mm256_xor_si256(c2[1103],simde_mm256_xor_si256(c2[4206],simde_mm256_xor_si256(c2[3986],simde_mm256_xor_si256(c2[2447],simde_mm256_xor_si256(c2[5303],simde_mm256_xor_si256(c2[5745],simde_mm256_xor_si256(c2[4004],simde_mm256_xor_si256(c2[3784],simde_mm256_xor_si256(c2[3789],simde_mm256_xor_si256(c2[1606],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[948],simde_mm256_xor_si256(c2[3149],simde_mm256_xor_si256(c2[2486],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[4710],simde_mm256_xor_si256(c2[3833],simde_mm256_xor_si256(c2[990],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[2773],simde_mm256_xor_si256(c2[2553],simde_mm256_xor_si256(c2[5853],simde_mm256_xor_si256(c2[4532],simde_mm256_xor_si256(c2[1915],simde_mm256_xor_si256(c2[6316],simde_mm256_xor_si256(c2[3261],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[4576],simde_mm256_xor_si256(c2[7021],simde_mm256_xor_si256(c2[6801],simde_mm256_xor_si256(c2[1962],c2[2841]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[143]=_mm256_xor_si256(c2[5728],_mm256_xor_si256(c2[3304],_mm256_xor_si256(c2[7],_mm256_xor_si256(c2[6380],_mm256_xor_si256(c2[2890],_mm256_xor_si256(c2[1351],_mm256_xor_si256(c2[4427],_mm256_xor_si256(c2[4207],_mm256_xor_si256(c2[5964],_mm256_xor_si256(c2[2688],_mm256_xor_si256(c2[2913],_mm256_xor_si256(c2[2693],_mm256_xor_si256(c2[290],_mm256_xor_si256(c2[6891],_mm256_xor_si256(c2[2053],_mm256_xor_si256(c2[6473],_mm256_xor_si256(c2[3614],_mm256_xor_si256(c2[2957],_mm256_xor_si256(c2[2737],_mm256_xor_si256(c2[6933],_mm256_xor_si256(c2[6717],_mm256_xor_si256(c2[6497],_mm256_xor_si256(c2[1457],_mm256_xor_si256(c2[4757],_mm256_xor_si256(c2[3656],_mm256_xor_si256(c2[3436],_mm256_xor_si256(c2[819],_mm256_xor_si256(c2[5440],_mm256_xor_si256(c2[5220],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[2824],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[3480],_mm256_xor_si256(c2[6116],_mm256_xor_si256(c2[5705],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[1965],c2[1745])))))))))))))))))))))))))))))))))))));
+     d2[143]=simde_mm256_xor_si256(c2[5728],simde_mm256_xor_si256(c2[3304],simde_mm256_xor_si256(c2[7],simde_mm256_xor_si256(c2[6380],simde_mm256_xor_si256(c2[2890],simde_mm256_xor_si256(c2[1351],simde_mm256_xor_si256(c2[4427],simde_mm256_xor_si256(c2[4207],simde_mm256_xor_si256(c2[5964],simde_mm256_xor_si256(c2[2688],simde_mm256_xor_si256(c2[2913],simde_mm256_xor_si256(c2[2693],simde_mm256_xor_si256(c2[290],simde_mm256_xor_si256(c2[6891],simde_mm256_xor_si256(c2[2053],simde_mm256_xor_si256(c2[6473],simde_mm256_xor_si256(c2[3614],simde_mm256_xor_si256(c2[2957],simde_mm256_xor_si256(c2[2737],simde_mm256_xor_si256(c2[6933],simde_mm256_xor_si256(c2[6717],simde_mm256_xor_si256(c2[6497],simde_mm256_xor_si256(c2[1457],simde_mm256_xor_si256(c2[4757],simde_mm256_xor_si256(c2[3656],simde_mm256_xor_si256(c2[3436],simde_mm256_xor_si256(c2[819],simde_mm256_xor_si256(c2[5440],simde_mm256_xor_si256(c2[5220],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[2824],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[3480],simde_mm256_xor_si256(c2[6116],simde_mm256_xor_si256(c2[5705],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[1965],c2[1745])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[154]=_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[5280],_mm256_xor_si256(c2[4845],_mm256_xor_si256(c2[1983],_mm256_xor_si256(c2[1548],_mm256_xor_si256(c2[5086],_mm256_xor_si256(c2[4866],_mm256_xor_si256(c2[4431],_mm256_xor_si256(c2[3327],_mm256_xor_si256(c2[2892],_mm256_xor_si256(c2[6183],_mm256_xor_si256(c2[5968],_mm256_xor_si256(c2[5748],_mm256_xor_si256(c2[2442],_mm256_xor_si256(c2[4884],_mm256_xor_si256(c2[4664],_mm256_xor_si256(c2[4229],_mm256_xor_si256(c2[4669],_mm256_xor_si256(c2[4454],_mm256_xor_si256(c2[4234],_mm256_xor_si256(c2[2486],_mm256_xor_si256(c2[2266],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[1828],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[4029],_mm256_xor_si256(c2[3594],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[5590],_mm256_xor_si256(c2[5155],_mm256_xor_si256(c2[4713],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[4278],_mm256_xor_si256(c2[1870],_mm256_xor_si256(c2[1435],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[1219],_mm256_xor_si256(c2[999],_mm256_xor_si256(c2[3653],_mm256_xor_si256(c2[3433],_mm256_xor_si256(c2[2998],_mm256_xor_si256(c2[6733],_mm256_xor_si256(c2[6298],_mm256_xor_si256(c2[5412],_mm256_xor_si256(c2[5197],_mm256_xor_si256(c2[4977],_mm256_xor_si256(c2[3876],_mm256_xor_si256(c2[2795],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[6981],_mm256_xor_si256(c2[6761],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[3706],_mm256_xor_si256(c2[4800],_mm256_xor_si256(c2[4365],_mm256_xor_si256(c2[5456],_mm256_xor_si256(c2[5241],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[862],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[207],_mm256_xor_si256(c2[2842],_mm256_xor_si256(c2[2407],_mm256_xor_si256(c2[3721],_mm256_xor_si256(c2[3506],c2[3286])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[154]=simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[5280],simde_mm256_xor_si256(c2[4845],simde_mm256_xor_si256(c2[1983],simde_mm256_xor_si256(c2[1548],simde_mm256_xor_si256(c2[5086],simde_mm256_xor_si256(c2[4866],simde_mm256_xor_si256(c2[4431],simde_mm256_xor_si256(c2[3327],simde_mm256_xor_si256(c2[2892],simde_mm256_xor_si256(c2[6183],simde_mm256_xor_si256(c2[5968],simde_mm256_xor_si256(c2[5748],simde_mm256_xor_si256(c2[2442],simde_mm256_xor_si256(c2[4884],simde_mm256_xor_si256(c2[4664],simde_mm256_xor_si256(c2[4229],simde_mm256_xor_si256(c2[4669],simde_mm256_xor_si256(c2[4454],simde_mm256_xor_si256(c2[4234],simde_mm256_xor_si256(c2[2486],simde_mm256_xor_si256(c2[2266],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[1828],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[4029],simde_mm256_xor_si256(c2[3594],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[5590],simde_mm256_xor_si256(c2[5155],simde_mm256_xor_si256(c2[4713],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[4278],simde_mm256_xor_si256(c2[1870],simde_mm256_xor_si256(c2[1435],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[1219],simde_mm256_xor_si256(c2[999],simde_mm256_xor_si256(c2[3653],simde_mm256_xor_si256(c2[3433],simde_mm256_xor_si256(c2[2998],simde_mm256_xor_si256(c2[6733],simde_mm256_xor_si256(c2[6298],simde_mm256_xor_si256(c2[5412],simde_mm256_xor_si256(c2[5197],simde_mm256_xor_si256(c2[4977],simde_mm256_xor_si256(c2[3876],simde_mm256_xor_si256(c2[2795],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[6981],simde_mm256_xor_si256(c2[6761],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[3706],simde_mm256_xor_si256(c2[4800],simde_mm256_xor_si256(c2[4365],simde_mm256_xor_si256(c2[5456],simde_mm256_xor_si256(c2[5241],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[862],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[207],simde_mm256_xor_si256(c2[2842],simde_mm256_xor_si256(c2[2407],simde_mm256_xor_si256(c2[3721],simde_mm256_xor_si256(c2[3506],c2[3286])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[165]=_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[886],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[5063],_mm256_xor_si256(c2[5281],_mm256_xor_si256(c2[1766],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[4649],_mm256_xor_si256(c2[5087],_mm256_xor_si256(c2[4867],_mm256_xor_si256(c2[3110],_mm256_xor_si256(c2[3328],_mm256_xor_si256(c2[5966],_mm256_xor_si256(c2[6184],_mm256_xor_si256(c2[4447],_mm256_xor_si256(c2[4885],_mm256_xor_si256(c2[4665],_mm256_xor_si256(c2[4452],_mm256_xor_si256(c2[4670],_mm256_xor_si256(c2[2049],_mm256_xor_si256(c2[2487],_mm256_xor_si256(c2[2267],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[3812],_mm256_xor_si256(c2[4030],_mm256_xor_si256(c2[1193],_mm256_xor_si256(c2[1411],_mm256_xor_si256(c2[5373],_mm256_xor_si256(c2[5591],_mm256_xor_si256(c2[4496],_mm256_xor_si256(c2[4714],_mm256_xor_si256(c2[1653],_mm256_xor_si256(c2[1871],_mm256_xor_si256(c2[1217],_mm256_xor_si256(c2[1435],_mm256_xor_si256(c2[3216],_mm256_xor_si256(c2[3654],_mm256_xor_si256(c2[3434],_mm256_xor_si256(c2[6516],_mm256_xor_si256(c2[6734],_mm256_xor_si256(c2[5195],_mm256_xor_si256(c2[5413],_mm256_xor_si256(c2[2578],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[6979],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[3924],_mm256_xor_si256(c2[4142],_mm256_xor_si256(c2[4583],_mm256_xor_si256(c2[4801],_mm256_xor_si256(c2[5239],_mm256_xor_si256(c2[5457],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[863],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[2625],_mm256_xor_si256(c2[2843],_mm256_xor_si256(c2[3504],c2[3722]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[165]=simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[886],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[5063],simde_mm256_xor_si256(c2[5281],simde_mm256_xor_si256(c2[1766],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[4649],simde_mm256_xor_si256(c2[5087],simde_mm256_xor_si256(c2[4867],simde_mm256_xor_si256(c2[3110],simde_mm256_xor_si256(c2[3328],simde_mm256_xor_si256(c2[5966],simde_mm256_xor_si256(c2[6184],simde_mm256_xor_si256(c2[4447],simde_mm256_xor_si256(c2[4885],simde_mm256_xor_si256(c2[4665],simde_mm256_xor_si256(c2[4452],simde_mm256_xor_si256(c2[4670],simde_mm256_xor_si256(c2[2049],simde_mm256_xor_si256(c2[2487],simde_mm256_xor_si256(c2[2267],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[3812],simde_mm256_xor_si256(c2[4030],simde_mm256_xor_si256(c2[1193],simde_mm256_xor_si256(c2[1411],simde_mm256_xor_si256(c2[5373],simde_mm256_xor_si256(c2[5591],simde_mm256_xor_si256(c2[4496],simde_mm256_xor_si256(c2[4714],simde_mm256_xor_si256(c2[1653],simde_mm256_xor_si256(c2[1871],simde_mm256_xor_si256(c2[1217],simde_mm256_xor_si256(c2[1435],simde_mm256_xor_si256(c2[3216],simde_mm256_xor_si256(c2[3654],simde_mm256_xor_si256(c2[3434],simde_mm256_xor_si256(c2[6516],simde_mm256_xor_si256(c2[6734],simde_mm256_xor_si256(c2[5195],simde_mm256_xor_si256(c2[5413],simde_mm256_xor_si256(c2[2578],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[6979],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[3924],simde_mm256_xor_si256(c2[4142],simde_mm256_xor_si256(c2[4583],simde_mm256_xor_si256(c2[4801],simde_mm256_xor_si256(c2[5239],simde_mm256_xor_si256(c2[5457],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[863],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[2625],simde_mm256_xor_si256(c2[2843],simde_mm256_xor_si256(c2[3504],c2[3722]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[176]=_mm256_xor_si256(c2[2865],_mm256_xor_si256(c2[2645],_mm256_xor_si256(c2[230],_mm256_xor_si256(c2[10],_mm256_xor_si256(c2[221],_mm256_xor_si256(c2[4845],_mm256_xor_si256(c2[4625],_mm256_xor_si256(c2[3963],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[27],_mm256_xor_si256(c2[6846],_mm256_xor_si256(c2[4431],_mm256_xor_si256(c2[4211],_mm256_xor_si256(c2[5307],_mm256_xor_si256(c2[2672],_mm256_xor_si256(c2[1124],_mm256_xor_si256(c2[5528],_mm256_xor_si256(c2[3104],_mm256_xor_si256(c2[6864],_mm256_xor_si256(c2[6644],_mm256_xor_si256(c2[4229],_mm256_xor_si256(c2[4009],_mm256_xor_si256(c2[6649],_mm256_xor_si256(c2[4014],_mm256_xor_si256(c2[4466],_mm256_xor_si256(c2[4246],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[1611],_mm256_xor_si256(c2[3808],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[1173],_mm256_xor_si256(c2[6009],_mm256_xor_si256(c2[3374],_mm256_xor_si256(c2[3390],_mm256_xor_si256(c2[975],_mm256_xor_si256(c2[755],_mm256_xor_si256(c2[531],_mm256_xor_si256(c2[4935],_mm256_xor_si256(c2[6693],_mm256_xor_si256(c2[4058],_mm256_xor_si256(c2[3850],_mm256_xor_si256(c2[1435],_mm256_xor_si256(c2[1215],_mm256_xor_si256(c2[3414],_mm256_xor_si256(c2[779],_mm256_xor_si256(c2[5633],_mm256_xor_si256(c2[5413],_mm256_xor_si256(c2[2998],_mm256_xor_si256(c2[2778],_mm256_xor_si256(c2[1674],_mm256_xor_si256(c2[6298],_mm256_xor_si256(c2[6078],_mm256_xor_si256(c2[353],_mm256_xor_si256(c2[4757],_mm256_xor_si256(c2[4775],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[2140],_mm256_xor_si256(c2[2137],_mm256_xor_si256(c2[6541],_mm256_xor_si256(c2[6121],_mm256_xor_si256(c2[3706],_mm256_xor_si256(c2[3486],_mm256_xor_si256(c2[6780],_mm256_xor_si256(c2[4145],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[4801],_mm256_xor_si256(c2[2842],_mm256_xor_si256(c2[2622],_mm256_xor_si256(c2[207],_mm256_xor_si256(c2[7026],_mm256_xor_si256(c2[4822],_mm256_xor_si256(c2[2407],_mm256_xor_si256(c2[2187],_mm256_xor_si256(c2[5701],_mm256_xor_si256(c2[3066],c2[6802])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[176]=simde_mm256_xor_si256(c2[2865],simde_mm256_xor_si256(c2[2645],simde_mm256_xor_si256(c2[230],simde_mm256_xor_si256(c2[10],simde_mm256_xor_si256(c2[221],simde_mm256_xor_si256(c2[4845],simde_mm256_xor_si256(c2[4625],simde_mm256_xor_si256(c2[3963],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[27],simde_mm256_xor_si256(c2[6846],simde_mm256_xor_si256(c2[4431],simde_mm256_xor_si256(c2[4211],simde_mm256_xor_si256(c2[5307],simde_mm256_xor_si256(c2[2672],simde_mm256_xor_si256(c2[1124],simde_mm256_xor_si256(c2[5528],simde_mm256_xor_si256(c2[3104],simde_mm256_xor_si256(c2[6864],simde_mm256_xor_si256(c2[6644],simde_mm256_xor_si256(c2[4229],simde_mm256_xor_si256(c2[4009],simde_mm256_xor_si256(c2[6649],simde_mm256_xor_si256(c2[4014],simde_mm256_xor_si256(c2[4466],simde_mm256_xor_si256(c2[4246],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[1611],simde_mm256_xor_si256(c2[3808],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[1173],simde_mm256_xor_si256(c2[6009],simde_mm256_xor_si256(c2[3374],simde_mm256_xor_si256(c2[3390],simde_mm256_xor_si256(c2[975],simde_mm256_xor_si256(c2[755],simde_mm256_xor_si256(c2[531],simde_mm256_xor_si256(c2[4935],simde_mm256_xor_si256(c2[6693],simde_mm256_xor_si256(c2[4058],simde_mm256_xor_si256(c2[3850],simde_mm256_xor_si256(c2[1435],simde_mm256_xor_si256(c2[1215],simde_mm256_xor_si256(c2[3414],simde_mm256_xor_si256(c2[779],simde_mm256_xor_si256(c2[5633],simde_mm256_xor_si256(c2[5413],simde_mm256_xor_si256(c2[2998],simde_mm256_xor_si256(c2[2778],simde_mm256_xor_si256(c2[1674],simde_mm256_xor_si256(c2[6298],simde_mm256_xor_si256(c2[6078],simde_mm256_xor_si256(c2[353],simde_mm256_xor_si256(c2[4757],simde_mm256_xor_si256(c2[4775],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[2140],simde_mm256_xor_si256(c2[2137],simde_mm256_xor_si256(c2[6541],simde_mm256_xor_si256(c2[6121],simde_mm256_xor_si256(c2[3706],simde_mm256_xor_si256(c2[3486],simde_mm256_xor_si256(c2[6780],simde_mm256_xor_si256(c2[4145],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[4801],simde_mm256_xor_si256(c2[2842],simde_mm256_xor_si256(c2[2622],simde_mm256_xor_si256(c2[207],simde_mm256_xor_si256(c2[7026],simde_mm256_xor_si256(c2[4822],simde_mm256_xor_si256(c2[2407],simde_mm256_xor_si256(c2[2187],simde_mm256_xor_si256(c2[5701],simde_mm256_xor_si256(c2[3066],c2[6802])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[187]=_mm256_xor_si256(c2[5726],_mm256_xor_si256(c2[5506],_mm256_xor_si256(c2[6829],_mm256_xor_si256(c2[6609],_mm256_xor_si256(c2[3082],_mm256_xor_si256(c2[4405],_mm256_xor_si256(c2[4185],_mm256_xor_si256(c2[6824],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[2888],_mm256_xor_si256(c2[2668],_mm256_xor_si256(c2[3991],_mm256_xor_si256(c2[3771],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[2232],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[5088],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[2686],_mm256_xor_si256(c2[2466],_mm256_xor_si256(c2[3789],_mm256_xor_si256(c2[3569],_mm256_xor_si256(c2[2471],_mm256_xor_si256(c2[3574],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[1391],_mm256_xor_si256(c2[1171],_mm256_xor_si256(c2[6669],_mm256_xor_si256(c2[953],_mm256_xor_si256(c2[733],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[2934],_mm256_xor_si256(c2[6251],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[315],_mm256_xor_si256(c2[3392],_mm256_xor_si256(c2[4495],_mm256_xor_si256(c2[2515],_mm256_xor_si256(c2[3618],_mm256_xor_si256(c2[6711],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[6275],_mm256_xor_si256(c2[339],_mm256_xor_si256(c2[5170],_mm256_xor_si256(c2[1455],_mm256_xor_si256(c2[1235],_mm256_xor_si256(c2[2558],_mm256_xor_si256(c2[2338],_mm256_xor_si256(c2[4535],_mm256_xor_si256(c2[5858],_mm256_xor_si256(c2[5638],_mm256_xor_si256(c2[3214],_mm256_xor_si256(c2[4317],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[1920],_mm256_xor_si256(c2[1700],_mm256_xor_si256(c2[4998],_mm256_xor_si256(c2[6101],_mm256_xor_si256(c2[1943],_mm256_xor_si256(c2[3266],_mm256_xor_si256(c2[3046],_mm256_xor_si256(c2[2602],_mm256_xor_si256(c2[3705],_mm256_xor_si256(c2[3258],_mm256_xor_si256(c2[4361],_mm256_xor_si256(c2[5703],_mm256_xor_si256(c2[5483],_mm256_xor_si256(c2[6806],_mm256_xor_si256(c2[6586],_mm256_xor_si256(c2[644],_mm256_xor_si256(c2[1967],_mm256_xor_si256(c2[1747],_mm256_xor_si256(c2[1523],c2[2626])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[187]=simde_mm256_xor_si256(c2[5726],simde_mm256_xor_si256(c2[5506],simde_mm256_xor_si256(c2[6829],simde_mm256_xor_si256(c2[6609],simde_mm256_xor_si256(c2[3082],simde_mm256_xor_si256(c2[4405],simde_mm256_xor_si256(c2[4185],simde_mm256_xor_si256(c2[6824],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[2888],simde_mm256_xor_si256(c2[2668],simde_mm256_xor_si256(c2[3991],simde_mm256_xor_si256(c2[3771],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[2232],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[5088],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[2686],simde_mm256_xor_si256(c2[2466],simde_mm256_xor_si256(c2[3789],simde_mm256_xor_si256(c2[3569],simde_mm256_xor_si256(c2[2471],simde_mm256_xor_si256(c2[3574],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[1391],simde_mm256_xor_si256(c2[1171],simde_mm256_xor_si256(c2[6669],simde_mm256_xor_si256(c2[953],simde_mm256_xor_si256(c2[733],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[2934],simde_mm256_xor_si256(c2[6251],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[315],simde_mm256_xor_si256(c2[3392],simde_mm256_xor_si256(c2[4495],simde_mm256_xor_si256(c2[2515],simde_mm256_xor_si256(c2[3618],simde_mm256_xor_si256(c2[6711],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[6275],simde_mm256_xor_si256(c2[339],simde_mm256_xor_si256(c2[5170],simde_mm256_xor_si256(c2[1455],simde_mm256_xor_si256(c2[1235],simde_mm256_xor_si256(c2[2558],simde_mm256_xor_si256(c2[2338],simde_mm256_xor_si256(c2[4535],simde_mm256_xor_si256(c2[5858],simde_mm256_xor_si256(c2[5638],simde_mm256_xor_si256(c2[3214],simde_mm256_xor_si256(c2[4317],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[1920],simde_mm256_xor_si256(c2[1700],simde_mm256_xor_si256(c2[4998],simde_mm256_xor_si256(c2[6101],simde_mm256_xor_si256(c2[1943],simde_mm256_xor_si256(c2[3266],simde_mm256_xor_si256(c2[3046],simde_mm256_xor_si256(c2[2602],simde_mm256_xor_si256(c2[3705],simde_mm256_xor_si256(c2[3258],simde_mm256_xor_si256(c2[4361],simde_mm256_xor_si256(c2[5703],simde_mm256_xor_si256(c2[5483],simde_mm256_xor_si256(c2[6806],simde_mm256_xor_si256(c2[6586],simde_mm256_xor_si256(c2[644],simde_mm256_xor_si256(c2[1967],simde_mm256_xor_si256(c2[1747],simde_mm256_xor_si256(c2[1523],c2[2626])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[198]=_mm256_xor_si256(c2[2865],_mm256_xor_si256(c2[6952],c2[4994]));
+     d2[198]=simde_mm256_xor_si256(c2[2865],simde_mm256_xor_si256(c2[6952],c2[4994]));
 
 //row: 19
-     d2[209]=_mm256_xor_si256(c2[2649],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[3967],_mm256_xor_si256(c2[2860],_mm256_xor_si256(c2[6850],_mm256_xor_si256(c2[5311],_mm256_xor_si256(c2[1128],_mm256_xor_si256(c2[1566],_mm256_xor_si256(c2[6648],_mm256_xor_si256(c2[6653],_mm256_xor_si256(c2[4250],_mm256_xor_si256(c2[3812],_mm256_xor_si256(c2[6013],_mm256_xor_si256(c2[3394],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[6697],_mm256_xor_si256(c2[3854],_mm256_xor_si256(c2[3418],_mm256_xor_si256(c2[5417],_mm256_xor_si256(c2[1678],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[4779],_mm256_xor_si256(c2[2141],_mm256_xor_si256(c2[6125],_mm256_xor_si256(c2[6784],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[2626],_mm256_xor_si256(c2[4826],c2[5705]))))))))))))))))))))))))))));
+     d2[209]=simde_mm256_xor_si256(c2[2649],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[3967],simde_mm256_xor_si256(c2[2860],simde_mm256_xor_si256(c2[6850],simde_mm256_xor_si256(c2[5311],simde_mm256_xor_si256(c2[1128],simde_mm256_xor_si256(c2[1566],simde_mm256_xor_si256(c2[6648],simde_mm256_xor_si256(c2[6653],simde_mm256_xor_si256(c2[4250],simde_mm256_xor_si256(c2[3812],simde_mm256_xor_si256(c2[6013],simde_mm256_xor_si256(c2[3394],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[6697],simde_mm256_xor_si256(c2[3854],simde_mm256_xor_si256(c2[3418],simde_mm256_xor_si256(c2[5417],simde_mm256_xor_si256(c2[1678],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[4779],simde_mm256_xor_si256(c2[2141],simde_mm256_xor_si256(c2[6125],simde_mm256_xor_si256(c2[6784],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[2626],simde_mm256_xor_si256(c2[4826],c2[5705]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[220]=_mm256_xor_si256(c2[1990],_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[6385],_mm256_xor_si256(c2[3088],_mm256_xor_si256(c2[6191],_mm256_xor_si256(c2[5971],_mm256_xor_si256(c2[4432],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[5989],_mm256_xor_si256(c2[5769],_mm256_xor_si256(c2[5774],_mm256_xor_si256(c2[3591],_mm256_xor_si256(c2[3371],_mm256_xor_si256(c2[2933],_mm256_xor_si256(c2[5134],_mm256_xor_si256(c2[2515],_mm256_xor_si256(c2[6695],_mm256_xor_si256(c2[5818],_mm256_xor_si256(c2[4489],_mm256_xor_si256(c2[2975],_mm256_xor_si256(c2[2539],_mm256_xor_si256(c2[4758],_mm256_xor_si256(c2[4538],_mm256_xor_si256(c2[799],_mm256_xor_si256(c2[6517],_mm256_xor_si256(c2[3900],_mm256_xor_si256(c2[1262],_mm256_xor_si256(c2[5246],_mm256_xor_si256(c2[5905],_mm256_xor_si256(c2[6561],_mm256_xor_si256(c2[1967],_mm256_xor_si256(c2[1747],_mm256_xor_si256(c2[3947],c2[4826]))))))))))))))))))))))))))))))))));
+     d2[220]=simde_mm256_xor_si256(c2[1990],simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[6385],simde_mm256_xor_si256(c2[3088],simde_mm256_xor_si256(c2[6191],simde_mm256_xor_si256(c2[5971],simde_mm256_xor_si256(c2[4432],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[5989],simde_mm256_xor_si256(c2[5769],simde_mm256_xor_si256(c2[5774],simde_mm256_xor_si256(c2[3591],simde_mm256_xor_si256(c2[3371],simde_mm256_xor_si256(c2[2933],simde_mm256_xor_si256(c2[5134],simde_mm256_xor_si256(c2[2515],simde_mm256_xor_si256(c2[6695],simde_mm256_xor_si256(c2[5818],simde_mm256_xor_si256(c2[4489],simde_mm256_xor_si256(c2[2975],simde_mm256_xor_si256(c2[2539],simde_mm256_xor_si256(c2[4758],simde_mm256_xor_si256(c2[4538],simde_mm256_xor_si256(c2[799],simde_mm256_xor_si256(c2[6517],simde_mm256_xor_si256(c2[3900],simde_mm256_xor_si256(c2[1262],simde_mm256_xor_si256(c2[5246],simde_mm256_xor_si256(c2[5905],simde_mm256_xor_si256(c2[6561],simde_mm256_xor_si256(c2[1967],simde_mm256_xor_si256(c2[1747],simde_mm256_xor_si256(c2[3947],c2[4826]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[231]=_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[5281],_mm256_xor_si256(c2[1984],_mm256_xor_si256(c2[6164],_mm256_xor_si256(c2[4867],_mm256_xor_si256(c2[3328],_mm256_xor_si256(c2[6404],_mm256_xor_si256(c2[6184],_mm256_xor_si256(c2[4665],_mm256_xor_si256(c2[4890],_mm256_xor_si256(c2[4670],_mm256_xor_si256(c2[2267],_mm256_xor_si256(c2[1829],_mm256_xor_si256(c2[4030],_mm256_xor_si256(c2[1411],_mm256_xor_si256(c2[5591],_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[4714],_mm256_xor_si256(c2[1871],_mm256_xor_si256(c2[1655],_mm256_xor_si256(c2[1435],_mm256_xor_si256(c2[3434],_mm256_xor_si256(c2[6734],_mm256_xor_si256(c2[5633],_mm256_xor_si256(c2[5413],_mm256_xor_si256(c2[2796],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[158],_mm256_xor_si256(c2[4142],_mm256_xor_si256(c2[4801],_mm256_xor_si256(c2[5677],_mm256_xor_si256(c2[5457],_mm256_xor_si256(c2[1501],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[2843],_mm256_xor_si256(c2[3942],c2[3722]))))))))))))))))))))))))))))))))))));
+     d2[231]=simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[5281],simde_mm256_xor_si256(c2[1984],simde_mm256_xor_si256(c2[6164],simde_mm256_xor_si256(c2[4867],simde_mm256_xor_si256(c2[3328],simde_mm256_xor_si256(c2[6404],simde_mm256_xor_si256(c2[6184],simde_mm256_xor_si256(c2[4665],simde_mm256_xor_si256(c2[4890],simde_mm256_xor_si256(c2[4670],simde_mm256_xor_si256(c2[2267],simde_mm256_xor_si256(c2[1829],simde_mm256_xor_si256(c2[4030],simde_mm256_xor_si256(c2[1411],simde_mm256_xor_si256(c2[5591],simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[4714],simde_mm256_xor_si256(c2[1871],simde_mm256_xor_si256(c2[1655],simde_mm256_xor_si256(c2[1435],simde_mm256_xor_si256(c2[3434],simde_mm256_xor_si256(c2[6734],simde_mm256_xor_si256(c2[5633],simde_mm256_xor_si256(c2[5413],simde_mm256_xor_si256(c2[2796],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[158],simde_mm256_xor_si256(c2[4142],simde_mm256_xor_si256(c2[4801],simde_mm256_xor_si256(c2[5677],simde_mm256_xor_si256(c2[5457],simde_mm256_xor_si256(c2[1501],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[2843],simde_mm256_xor_si256(c2[3942],c2[3722]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[242]=_mm256_xor_si256(c2[3982],c2[709]);
+     d2[242]=simde_mm256_xor_si256(c2[3982],c2[709]);
 
 //row: 23
-     d2[253]=_mm256_xor_si256(c2[3083],_mm256_xor_si256(c2[950],c2[4954]));
+     d2[253]=simde_mm256_xor_si256(c2[3083],simde_mm256_xor_si256(c2[950],c2[4954]));
 
 //row: 24
-     d2[264]=_mm256_xor_si256(c2[3765],_mm256_xor_si256(c2[2687],c2[6579]));
+     d2[264]=simde_mm256_xor_si256(c2[3765],simde_mm256_xor_si256(c2[2687],c2[6579]));
 
 //row: 25
-     d2[275]=_mm256_xor_si256(c2[1762],c2[1874]);
+     d2[275]=simde_mm256_xor_si256(c2[1762],c2[1874]);
 
 //row: 26
-     d2[286]=_mm256_xor_si256(c2[3746],_mm256_xor_si256(c2[3526],_mm256_xor_si256(c2[2649],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[1102],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[4844],_mm256_xor_si256(c2[3967],_mm256_xor_si256(c2[908],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[6850],_mm256_xor_si256(c2[6188],_mm256_xor_si256(c2[5311],_mm256_xor_si256(c2[2005],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[1128],_mm256_xor_si256(c2[706],_mm256_xor_si256(c2[486],_mm256_xor_si256(c2[6648],_mm256_xor_si256(c2[491],_mm256_xor_si256(c2[6873],_mm256_xor_si256(c2[6653],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[5347],_mm256_xor_si256(c2[5127],_mm256_xor_si256(c2[4250],_mm256_xor_si256(c2[4909],_mm256_xor_si256(c2[4689],_mm256_xor_si256(c2[3812],_mm256_xor_si256(c2[6890],_mm256_xor_si256(c2[6013],_mm256_xor_si256(c2[4491],_mm256_xor_si256(c2[4271],_mm256_xor_si256(c2[3394],_mm256_xor_si256(c2[1412],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[6917],_mm256_xor_si256(c2[6697],_mm256_xor_si256(c2[4951],_mm256_xor_si256(c2[4731],_mm256_xor_si256(c2[3854],_mm256_xor_si256(c2[4295],_mm256_xor_si256(c2[3638],_mm256_xor_si256(c2[3418],_mm256_xor_si256(c2[6514],_mm256_xor_si256(c2[6294],_mm256_xor_si256(c2[5417],_mm256_xor_si256(c2[2775],_mm256_xor_si256(c2[2555],_mm256_xor_si256(c2[1678],_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[5876],_mm256_xor_si256(c2[5656],_mm256_xor_si256(c2[4779],_mm256_xor_si256(c2[3018],_mm256_xor_si256(c2[2361],_mm256_xor_si256(c2[2141],_mm256_xor_si256(c2[1475],_mm256_xor_si256(c2[183],_mm256_xor_si256(c2[7002],_mm256_xor_si256(c2[6125],_mm256_xor_si256(c2[622],_mm256_xor_si256(c2[6784],_mm256_xor_si256(c2[1278],_mm256_xor_si256(c2[621],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[3723],_mm256_xor_si256(c2[3503],_mm256_xor_si256(c2[2626],_mm256_xor_si256(c2[5923],_mm256_xor_si256(c2[5703],_mm256_xor_si256(c2[4826],_mm256_xor_si256(c2[6582],_mm256_xor_si256(c2[5925],c2[5705])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[286]=simde_mm256_xor_si256(c2[3746],simde_mm256_xor_si256(c2[3526],simde_mm256_xor_si256(c2[2649],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[1102],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[4844],simde_mm256_xor_si256(c2[3967],simde_mm256_xor_si256(c2[908],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[6850],simde_mm256_xor_si256(c2[6188],simde_mm256_xor_si256(c2[5311],simde_mm256_xor_si256(c2[2005],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[1128],simde_mm256_xor_si256(c2[706],simde_mm256_xor_si256(c2[486],simde_mm256_xor_si256(c2[6648],simde_mm256_xor_si256(c2[491],simde_mm256_xor_si256(c2[6873],simde_mm256_xor_si256(c2[6653],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[5347],simde_mm256_xor_si256(c2[5127],simde_mm256_xor_si256(c2[4250],simde_mm256_xor_si256(c2[4909],simde_mm256_xor_si256(c2[4689],simde_mm256_xor_si256(c2[3812],simde_mm256_xor_si256(c2[6890],simde_mm256_xor_si256(c2[6013],simde_mm256_xor_si256(c2[4491],simde_mm256_xor_si256(c2[4271],simde_mm256_xor_si256(c2[3394],simde_mm256_xor_si256(c2[1412],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[6917],simde_mm256_xor_si256(c2[6697],simde_mm256_xor_si256(c2[4951],simde_mm256_xor_si256(c2[4731],simde_mm256_xor_si256(c2[3854],simde_mm256_xor_si256(c2[4295],simde_mm256_xor_si256(c2[3638],simde_mm256_xor_si256(c2[3418],simde_mm256_xor_si256(c2[6514],simde_mm256_xor_si256(c2[6294],simde_mm256_xor_si256(c2[5417],simde_mm256_xor_si256(c2[2775],simde_mm256_xor_si256(c2[2555],simde_mm256_xor_si256(c2[1678],simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[5876],simde_mm256_xor_si256(c2[5656],simde_mm256_xor_si256(c2[4779],simde_mm256_xor_si256(c2[3018],simde_mm256_xor_si256(c2[2361],simde_mm256_xor_si256(c2[2141],simde_mm256_xor_si256(c2[1475],simde_mm256_xor_si256(c2[183],simde_mm256_xor_si256(c2[7002],simde_mm256_xor_si256(c2[6125],simde_mm256_xor_si256(c2[622],simde_mm256_xor_si256(c2[6784],simde_mm256_xor_si256(c2[1278],simde_mm256_xor_si256(c2[621],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[3723],simde_mm256_xor_si256(c2[3503],simde_mm256_xor_si256(c2[2626],simde_mm256_xor_si256(c2[5923],simde_mm256_xor_si256(c2[5703],simde_mm256_xor_si256(c2[4826],simde_mm256_xor_si256(c2[6582],simde_mm256_xor_si256(c2[5925],c2[5705])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[297]=_mm256_xor_si256(c2[2201],c2[1895]);
+     d2[297]=simde_mm256_xor_si256(c2[2201],c2[1895]);
 
 //row: 28
-     d2[308]=_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[5324],c2[4734]));
+     d2[308]=simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[5324],c2[4734]));
 
 //row: 29
-     d2[319]=_mm256_xor_si256(c2[2424],c2[313]);
+     d2[319]=simde_mm256_xor_si256(c2[2424],c2[313]);
 
 //row: 30
-     d2[330]=_mm256_xor_si256(c2[4446],_mm256_xor_si256(c2[2975],_mm256_xor_si256(c2[6536],c2[6578])));
+     d2[330]=simde_mm256_xor_si256(c2[4446],simde_mm256_xor_si256(c2[2975],simde_mm256_xor_si256(c2[6536],c2[6578])));
 
 //row: 31
-     d2[341]=_mm256_xor_si256(c2[3309],_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[4627],_mm256_xor_si256(c2[471],_mm256_xor_si256(c2[5971],_mm256_xor_si256(c2[2008],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[4645],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[494],_mm256_xor_si256(c2[274],_mm256_xor_si256(c2[4910],_mm256_xor_si256(c2[4472],_mm256_xor_si256(c2[6673],_mm256_xor_si256(c2[4054],_mm256_xor_si256(c2[1195],_mm256_xor_si256(c2[538],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[4514],_mm256_xor_si256(c2[4298],_mm256_xor_si256(c2[4078],_mm256_xor_si256(c2[6077],_mm256_xor_si256(c2[2338],_mm256_xor_si256(c2[1237],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[5439],_mm256_xor_si256(c2[3021],_mm256_xor_si256(c2[2801],_mm256_xor_si256(c2[6785],_mm256_xor_si256(c2[405],_mm256_xor_si256(c2[1281],_mm256_xor_si256(c2[1061],_mm256_xor_si256(c2[3286],_mm256_xor_si256(c2[5486],_mm256_xor_si256(c2[6585],c2[6365])))))))))))))))))))))))))))))))))));
+     d2[341]=simde_mm256_xor_si256(c2[3309],simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[4627],simde_mm256_xor_si256(c2[471],simde_mm256_xor_si256(c2[5971],simde_mm256_xor_si256(c2[2008],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[4645],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[494],simde_mm256_xor_si256(c2[274],simde_mm256_xor_si256(c2[4910],simde_mm256_xor_si256(c2[4472],simde_mm256_xor_si256(c2[6673],simde_mm256_xor_si256(c2[4054],simde_mm256_xor_si256(c2[1195],simde_mm256_xor_si256(c2[538],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[4514],simde_mm256_xor_si256(c2[4298],simde_mm256_xor_si256(c2[4078],simde_mm256_xor_si256(c2[6077],simde_mm256_xor_si256(c2[2338],simde_mm256_xor_si256(c2[1237],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[5439],simde_mm256_xor_si256(c2[3021],simde_mm256_xor_si256(c2[2801],simde_mm256_xor_si256(c2[6785],simde_mm256_xor_si256(c2[405],simde_mm256_xor_si256(c2[1281],simde_mm256_xor_si256(c2[1061],simde_mm256_xor_si256(c2[3286],simde_mm256_xor_si256(c2[5486],simde_mm256_xor_si256(c2[6585],c2[6365])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[352]=_mm256_xor_si256(c2[2209],_mm256_xor_si256(c2[1989],_mm256_xor_si256(c2[6824],_mm256_xor_si256(c2[6604],_mm256_xor_si256(c2[3307],_mm256_xor_si256(c2[4403],_mm256_xor_si256(c2[6410],_mm256_xor_si256(c2[6190],_mm256_xor_si256(c2[4651],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[6208],_mm256_xor_si256(c2[5988],_mm256_xor_si256(c2[5993],_mm256_xor_si256(c2[3810],_mm256_xor_si256(c2[3590],_mm256_xor_si256(c2[3372],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[5353],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[2734],_mm256_xor_si256(c2[6914],_mm256_xor_si256(c2[6037],_mm256_xor_si256(c2[3414],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2758],_mm256_xor_si256(c2[2092],_mm256_xor_si256(c2[4977],_mm256_xor_si256(c2[4757],_mm256_xor_si256(c2[1238],_mm256_xor_si256(c2[1018],_mm256_xor_si256(c2[6736],_mm256_xor_si256(c2[4339],_mm256_xor_si256(c2[4119],_mm256_xor_si256(c2[1481],_mm256_xor_si256(c2[5685],_mm256_xor_si256(c2[5465],_mm256_xor_si256(c2[6124],_mm256_xor_si256(c2[6780],_mm256_xor_si256(c2[2186],_mm256_xor_si256(c2[1966],_mm256_xor_si256(c2[4386],_mm256_xor_si256(c2[4166],c2[5045]))))))))))))))))))))))))))))))))))))))))));
+     d2[352]=simde_mm256_xor_si256(c2[2209],simde_mm256_xor_si256(c2[1989],simde_mm256_xor_si256(c2[6824],simde_mm256_xor_si256(c2[6604],simde_mm256_xor_si256(c2[3307],simde_mm256_xor_si256(c2[4403],simde_mm256_xor_si256(c2[6410],simde_mm256_xor_si256(c2[6190],simde_mm256_xor_si256(c2[4651],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[6208],simde_mm256_xor_si256(c2[5988],simde_mm256_xor_si256(c2[5993],simde_mm256_xor_si256(c2[3810],simde_mm256_xor_si256(c2[3590],simde_mm256_xor_si256(c2[3372],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[5353],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[2734],simde_mm256_xor_si256(c2[6914],simde_mm256_xor_si256(c2[6037],simde_mm256_xor_si256(c2[3414],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2758],simde_mm256_xor_si256(c2[2092],simde_mm256_xor_si256(c2[4977],simde_mm256_xor_si256(c2[4757],simde_mm256_xor_si256(c2[1238],simde_mm256_xor_si256(c2[1018],simde_mm256_xor_si256(c2[6736],simde_mm256_xor_si256(c2[4339],simde_mm256_xor_si256(c2[4119],simde_mm256_xor_si256(c2[1481],simde_mm256_xor_si256(c2[5685],simde_mm256_xor_si256(c2[5465],simde_mm256_xor_si256(c2[6124],simde_mm256_xor_si256(c2[6780],simde_mm256_xor_si256(c2[2186],simde_mm256_xor_si256(c2[1966],simde_mm256_xor_si256(c2[4386],simde_mm256_xor_si256(c2[4166],c2[5045]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[363]=_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[6603],_mm256_xor_si256(c2[2447],_mm256_xor_si256(c2[908],_mm256_xor_si256(c2[3764],_mm256_xor_si256(c2[2245],_mm256_xor_si256(c2[2250],_mm256_xor_si256(c2[2027],_mm256_xor_si256(c2[6886],_mm256_xor_si256(c2[6448],_mm256_xor_si256(c2[1610],_mm256_xor_si256(c2[6030],_mm256_xor_si256(c2[3171],_mm256_xor_si256(c2[2294],_mm256_xor_si256(c2[6490],_mm256_xor_si256(c2[6054],_mm256_xor_si256(c2[1014],_mm256_xor_si256(c2[4314],_mm256_xor_si256(c2[2993],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[4777],_mm256_xor_si256(c2[2138],_mm256_xor_si256(c2[1722],_mm256_xor_si256(c2[2381],_mm256_xor_si256(c2[3037],_mm256_xor_si256(c2[5262],_mm256_xor_si256(c2[423],c2[1302]))))))))))))))))))))))))))));
+     d2[363]=simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[6603],simde_mm256_xor_si256(c2[2447],simde_mm256_xor_si256(c2[908],simde_mm256_xor_si256(c2[3764],simde_mm256_xor_si256(c2[2245],simde_mm256_xor_si256(c2[2250],simde_mm256_xor_si256(c2[2027],simde_mm256_xor_si256(c2[6886],simde_mm256_xor_si256(c2[6448],simde_mm256_xor_si256(c2[1610],simde_mm256_xor_si256(c2[6030],simde_mm256_xor_si256(c2[3171],simde_mm256_xor_si256(c2[2294],simde_mm256_xor_si256(c2[6490],simde_mm256_xor_si256(c2[6054],simde_mm256_xor_si256(c2[1014],simde_mm256_xor_si256(c2[4314],simde_mm256_xor_si256(c2[2993],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[4777],simde_mm256_xor_si256(c2[2138],simde_mm256_xor_si256(c2[1722],simde_mm256_xor_si256(c2[2381],simde_mm256_xor_si256(c2[3037],simde_mm256_xor_si256(c2[5262],simde_mm256_xor_si256(c2[423],c2[1302]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[374]=_mm256_xor_si256(c2[1106],_mm256_xor_si256(c2[886],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[5721],_mm256_xor_si256(c2[5501],_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[2204],_mm256_xor_si256(c2[668],_mm256_xor_si256(c2[2420],_mm256_xor_si256(c2[5307],_mm256_xor_si256(c2[5087],_mm256_xor_si256(c2[3551],_mm256_xor_si256(c2[3548],_mm256_xor_si256(c2[2012],_mm256_xor_si256(c2[6404],_mm256_xor_si256(c2[5088],_mm256_xor_si256(c2[4868],_mm256_xor_si256(c2[5105],_mm256_xor_si256(c2[4885],_mm256_xor_si256(c2[3349],_mm256_xor_si256(c2[4890],_mm256_xor_si256(c2[3574],_mm256_xor_si256(c2[3354],_mm256_xor_si256(c2[2707],_mm256_xor_si256(c2[2487],_mm256_xor_si256(c2[951],_mm256_xor_si256(c2[2269],_mm256_xor_si256(c2[2049],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[4250],_mm256_xor_si256(c2[2714],_mm256_xor_si256(c2[1851],_mm256_xor_si256(c2[1631],_mm256_xor_si256(c2[95],_mm256_xor_si256(c2[5811],_mm256_xor_si256(c2[4275],_mm256_xor_si256(c2[4934],_mm256_xor_si256(c2[3618],_mm256_xor_si256(c2[3398],_mm256_xor_si256(c2[2311],_mm256_xor_si256(c2[2091],_mm256_xor_si256(c2[555],_mm256_xor_si256(c2[1655],_mm256_xor_si256(c2[339],_mm256_xor_si256(c2[119],_mm256_xor_si256(c2[3874],_mm256_xor_si256(c2[3654],_mm256_xor_si256(c2[2118],_mm256_xor_si256(c2[135],_mm256_xor_si256(c2[6954],_mm256_xor_si256(c2[5418],_mm256_xor_si256(c2[5633],_mm256_xor_si256(c2[4317],_mm256_xor_si256(c2[4097],_mm256_xor_si256(c2[3236],_mm256_xor_si256(c2[3016],_mm256_xor_si256(c2[1480],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[6101],_mm256_xor_si256(c2[5881],_mm256_xor_si256(c2[4582],_mm256_xor_si256(c2[4362],_mm256_xor_si256(c2[2826],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[3485],_mm256_xor_si256(c2[5677],_mm256_xor_si256(c2[4361],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[863],_mm256_xor_si256(c2[6366],_mm256_xor_si256(c2[3283],_mm256_xor_si256(c2[3063],_mm256_xor_si256(c2[1527],_mm256_xor_si256(c2[3942],_mm256_xor_si256(c2[2626],c2[2406]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[374]=simde_mm256_xor_si256(c2[1106],simde_mm256_xor_si256(c2[886],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[5721],simde_mm256_xor_si256(c2[5501],simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[2204],simde_mm256_xor_si256(c2[668],simde_mm256_xor_si256(c2[2420],simde_mm256_xor_si256(c2[5307],simde_mm256_xor_si256(c2[5087],simde_mm256_xor_si256(c2[3551],simde_mm256_xor_si256(c2[3548],simde_mm256_xor_si256(c2[2012],simde_mm256_xor_si256(c2[6404],simde_mm256_xor_si256(c2[5088],simde_mm256_xor_si256(c2[4868],simde_mm256_xor_si256(c2[5105],simde_mm256_xor_si256(c2[4885],simde_mm256_xor_si256(c2[3349],simde_mm256_xor_si256(c2[4890],simde_mm256_xor_si256(c2[3574],simde_mm256_xor_si256(c2[3354],simde_mm256_xor_si256(c2[2707],simde_mm256_xor_si256(c2[2487],simde_mm256_xor_si256(c2[951],simde_mm256_xor_si256(c2[2269],simde_mm256_xor_si256(c2[2049],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[4250],simde_mm256_xor_si256(c2[2714],simde_mm256_xor_si256(c2[1851],simde_mm256_xor_si256(c2[1631],simde_mm256_xor_si256(c2[95],simde_mm256_xor_si256(c2[5811],simde_mm256_xor_si256(c2[4275],simde_mm256_xor_si256(c2[4934],simde_mm256_xor_si256(c2[3618],simde_mm256_xor_si256(c2[3398],simde_mm256_xor_si256(c2[2311],simde_mm256_xor_si256(c2[2091],simde_mm256_xor_si256(c2[555],simde_mm256_xor_si256(c2[1655],simde_mm256_xor_si256(c2[339],simde_mm256_xor_si256(c2[119],simde_mm256_xor_si256(c2[3874],simde_mm256_xor_si256(c2[3654],simde_mm256_xor_si256(c2[2118],simde_mm256_xor_si256(c2[135],simde_mm256_xor_si256(c2[6954],simde_mm256_xor_si256(c2[5418],simde_mm256_xor_si256(c2[5633],simde_mm256_xor_si256(c2[4317],simde_mm256_xor_si256(c2[4097],simde_mm256_xor_si256(c2[3236],simde_mm256_xor_si256(c2[3016],simde_mm256_xor_si256(c2[1480],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[6101],simde_mm256_xor_si256(c2[5881],simde_mm256_xor_si256(c2[4582],simde_mm256_xor_si256(c2[4362],simde_mm256_xor_si256(c2[2826],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[3485],simde_mm256_xor_si256(c2[5677],simde_mm256_xor_si256(c2[4361],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[863],simde_mm256_xor_si256(c2[6366],simde_mm256_xor_si256(c2[3283],simde_mm256_xor_si256(c2[3063],simde_mm256_xor_si256(c2[1527],simde_mm256_xor_si256(c2[3942],simde_mm256_xor_si256(c2[2626],c2[2406]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[385]=_mm256_xor_si256(c2[1770],_mm256_xor_si256(c2[1550],_mm256_xor_si256(c2[6165],_mm256_xor_si256(c2[2868],_mm256_xor_si256(c2[5971],_mm256_xor_si256(c2[5751],_mm256_xor_si256(c2[4212],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[6625],_mm256_xor_si256(c2[5769],_mm256_xor_si256(c2[5549],_mm256_xor_si256(c2[5554],_mm256_xor_si256(c2[3371],_mm256_xor_si256(c2[3151],_mm256_xor_si256(c2[2713],_mm256_xor_si256(c2[4914],_mm256_xor_si256(c2[2295],_mm256_xor_si256(c2[6475],_mm256_xor_si256(c2[5598],_mm256_xor_si256(c2[2755],_mm256_xor_si256(c2[2319],_mm256_xor_si256(c2[5394],_mm256_xor_si256(c2[4538],_mm256_xor_si256(c2[4318],_mm256_xor_si256(c2[579],_mm256_xor_si256(c2[6297],_mm256_xor_si256(c2[3680],_mm256_xor_si256(c2[1042],_mm256_xor_si256(c2[5026],_mm256_xor_si256(c2[5685],_mm256_xor_si256(c2[6341],_mm256_xor_si256(c2[1747],_mm256_xor_si256(c2[1527],_mm256_xor_si256(c2[3727],c2[4606]))))))))))))))))))))))))))))))))));
+     d2[385]=simde_mm256_xor_si256(c2[1770],simde_mm256_xor_si256(c2[1550],simde_mm256_xor_si256(c2[6165],simde_mm256_xor_si256(c2[2868],simde_mm256_xor_si256(c2[5971],simde_mm256_xor_si256(c2[5751],simde_mm256_xor_si256(c2[4212],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[6625],simde_mm256_xor_si256(c2[5769],simde_mm256_xor_si256(c2[5549],simde_mm256_xor_si256(c2[5554],simde_mm256_xor_si256(c2[3371],simde_mm256_xor_si256(c2[3151],simde_mm256_xor_si256(c2[2713],simde_mm256_xor_si256(c2[4914],simde_mm256_xor_si256(c2[2295],simde_mm256_xor_si256(c2[6475],simde_mm256_xor_si256(c2[5598],simde_mm256_xor_si256(c2[2755],simde_mm256_xor_si256(c2[2319],simde_mm256_xor_si256(c2[5394],simde_mm256_xor_si256(c2[4538],simde_mm256_xor_si256(c2[4318],simde_mm256_xor_si256(c2[579],simde_mm256_xor_si256(c2[6297],simde_mm256_xor_si256(c2[3680],simde_mm256_xor_si256(c2[1042],simde_mm256_xor_si256(c2[5026],simde_mm256_xor_si256(c2[5685],simde_mm256_xor_si256(c2[6341],simde_mm256_xor_si256(c2[1747],simde_mm256_xor_si256(c2[1527],simde_mm256_xor_si256(c2[3727],c2[4606]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[396]=_mm256_xor_si256(c2[1982],_mm256_xor_si256(c2[5768],c2[378]));
+     d2[396]=simde_mm256_xor_si256(c2[1982],simde_mm256_xor_si256(c2[5768],c2[378]));
 
 //row: 37
-     d2[407]=_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[5065],_mm256_xor_si256(c2[4841],_mm256_xor_si256(c2[1768],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[4651],_mm256_xor_si256(c2[4427],_mm256_xor_si256(c2[3112],_mm256_xor_si256(c2[2888],_mm256_xor_si256(c2[5968],_mm256_xor_si256(c2[5964],_mm256_xor_si256(c2[5744],_mm256_xor_si256(c2[4449],_mm256_xor_si256(c2[4225],_mm256_xor_si256(c2[4454],_mm256_xor_si256(c2[4450],_mm256_xor_si256(c2[4230],_mm256_xor_si256(c2[2051],_mm256_xor_si256(c2[1827],_mm256_xor_si256(c2[1613],_mm256_xor_si256(c2[1389],_mm256_xor_si256(c2[3814],_mm256_xor_si256(c2[3590],_mm256_xor_si256(c2[1195],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[5375],_mm256_xor_si256(c2[5151],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[4494],_mm256_xor_si256(c2[4274],_mm256_xor_si256(c2[1655],_mm256_xor_si256(c2[1431],_mm256_xor_si256(c2[1219],_mm256_xor_si256(c2[1215],_mm256_xor_si256(c2[995],_mm256_xor_si256(c2[3218],_mm256_xor_si256(c2[2994],_mm256_xor_si256(c2[6518],_mm256_xor_si256(c2[6294],_mm256_xor_si256(c2[5197],_mm256_xor_si256(c2[5193],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[2580],_mm256_xor_si256(c2[2356],_mm256_xor_si256(c2[6981],_mm256_xor_si256(c2[6977],_mm256_xor_si256(c2[6757],_mm256_xor_si256(c2[3926],_mm256_xor_si256(c2[3702],_mm256_xor_si256(c2[4585],_mm256_xor_si256(c2[4361],_mm256_xor_si256(c2[5241],_mm256_xor_si256(c2[5237],_mm256_xor_si256(c2[5017],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[203],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[3506],_mm256_xor_si256(c2[3502],c2[3282])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[407]=simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[5065],simde_mm256_xor_si256(c2[4841],simde_mm256_xor_si256(c2[1768],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[4651],simde_mm256_xor_si256(c2[4427],simde_mm256_xor_si256(c2[3112],simde_mm256_xor_si256(c2[2888],simde_mm256_xor_si256(c2[5968],simde_mm256_xor_si256(c2[5964],simde_mm256_xor_si256(c2[5744],simde_mm256_xor_si256(c2[4449],simde_mm256_xor_si256(c2[4225],simde_mm256_xor_si256(c2[4454],simde_mm256_xor_si256(c2[4450],simde_mm256_xor_si256(c2[4230],simde_mm256_xor_si256(c2[2051],simde_mm256_xor_si256(c2[1827],simde_mm256_xor_si256(c2[1613],simde_mm256_xor_si256(c2[1389],simde_mm256_xor_si256(c2[3814],simde_mm256_xor_si256(c2[3590],simde_mm256_xor_si256(c2[1195],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[5375],simde_mm256_xor_si256(c2[5151],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[4494],simde_mm256_xor_si256(c2[4274],simde_mm256_xor_si256(c2[1655],simde_mm256_xor_si256(c2[1431],simde_mm256_xor_si256(c2[1219],simde_mm256_xor_si256(c2[1215],simde_mm256_xor_si256(c2[995],simde_mm256_xor_si256(c2[3218],simde_mm256_xor_si256(c2[2994],simde_mm256_xor_si256(c2[6518],simde_mm256_xor_si256(c2[6294],simde_mm256_xor_si256(c2[5197],simde_mm256_xor_si256(c2[5193],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[2580],simde_mm256_xor_si256(c2[2356],simde_mm256_xor_si256(c2[6981],simde_mm256_xor_si256(c2[6977],simde_mm256_xor_si256(c2[6757],simde_mm256_xor_si256(c2[3926],simde_mm256_xor_si256(c2[3702],simde_mm256_xor_si256(c2[4585],simde_mm256_xor_si256(c2[4361],simde_mm256_xor_si256(c2[5241],simde_mm256_xor_si256(c2[5237],simde_mm256_xor_si256(c2[5017],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[203],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[3506],simde_mm256_xor_si256(c2[3502],c2[3282])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[418]=_mm256_xor_si256(c2[3305],_mm256_xor_si256(c2[3085],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[4403],_mm256_xor_si256(c2[467],_mm256_xor_si256(c2[247],_mm256_xor_si256(c2[5747],_mm256_xor_si256(c2[1564],_mm256_xor_si256(c2[3545],_mm256_xor_si256(c2[265],_mm256_xor_si256(c2[45],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[4906],_mm256_xor_si256(c2[4686],_mm256_xor_si256(c2[4248],_mm256_xor_si256(c2[6449],_mm256_xor_si256(c2[3830],_mm256_xor_si256(c2[971],_mm256_xor_si256(c2[94],_mm256_xor_si256(c2[4290],_mm256_xor_si256(c2[3854],_mm256_xor_si256(c2[1650],_mm256_xor_si256(c2[6073],_mm256_xor_si256(c2[5853],_mm256_xor_si256(c2[2114],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[5215],_mm256_xor_si256(c2[2577],_mm256_xor_si256(c2[6561],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[837],_mm256_xor_si256(c2[3282],_mm256_xor_si256(c2[3062],_mm256_xor_si256(c2[5262],c2[6141]))))))))))))))))))))))))))))))))));
+     d2[418]=simde_mm256_xor_si256(c2[3305],simde_mm256_xor_si256(c2[3085],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[4403],simde_mm256_xor_si256(c2[467],simde_mm256_xor_si256(c2[247],simde_mm256_xor_si256(c2[5747],simde_mm256_xor_si256(c2[1564],simde_mm256_xor_si256(c2[3545],simde_mm256_xor_si256(c2[265],simde_mm256_xor_si256(c2[45],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[4906],simde_mm256_xor_si256(c2[4686],simde_mm256_xor_si256(c2[4248],simde_mm256_xor_si256(c2[6449],simde_mm256_xor_si256(c2[3830],simde_mm256_xor_si256(c2[971],simde_mm256_xor_si256(c2[94],simde_mm256_xor_si256(c2[4290],simde_mm256_xor_si256(c2[3854],simde_mm256_xor_si256(c2[1650],simde_mm256_xor_si256(c2[6073],simde_mm256_xor_si256(c2[5853],simde_mm256_xor_si256(c2[2114],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[5215],simde_mm256_xor_si256(c2[2577],simde_mm256_xor_si256(c2[6561],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[837],simde_mm256_xor_si256(c2[3282],simde_mm256_xor_si256(c2[3062],simde_mm256_xor_si256(c2[5262],c2[6141]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[429]=_mm256_xor_si256(c2[1108],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[5723],_mm256_xor_si256(c2[5503],_mm256_xor_si256(c2[2206],_mm256_xor_si256(c2[2863],_mm256_xor_si256(c2[5309],_mm256_xor_si256(c2[5089],_mm256_xor_si256(c2[3550],_mm256_xor_si256(c2[6406],_mm256_xor_si256(c2[5107],_mm256_xor_si256(c2[4887],_mm256_xor_si256(c2[4892],_mm256_xor_si256(c2[2709],_mm256_xor_si256(c2[2489],_mm256_xor_si256(c2[2271],_mm256_xor_si256(c2[2051],_mm256_xor_si256(c2[4252],_mm256_xor_si256(c2[1853],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[5813],_mm256_xor_si256(c2[4936],_mm256_xor_si256(c2[2313],_mm256_xor_si256(c2[2093],_mm256_xor_si256(c2[1657],_mm256_xor_si256(c2[3876],_mm256_xor_si256(c2[3656],_mm256_xor_si256(c2[137],_mm256_xor_si256(c2[6956],_mm256_xor_si256(c2[5635],_mm256_xor_si256(c2[3238],_mm256_xor_si256(c2[3018],_mm256_xor_si256(c2[380],_mm256_xor_si256(c2[1474],_mm256_xor_si256(c2[4584],_mm256_xor_si256(c2[4364],_mm256_xor_si256(c2[5023],_mm256_xor_si256(c2[5679],_mm256_xor_si256(c2[1085],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[3285],_mm256_xor_si256(c2[3065],c2[3944]))))))))))))))))))))))))))))))))))))))))));
+     d2[429]=simde_mm256_xor_si256(c2[1108],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[5723],simde_mm256_xor_si256(c2[5503],simde_mm256_xor_si256(c2[2206],simde_mm256_xor_si256(c2[2863],simde_mm256_xor_si256(c2[5309],simde_mm256_xor_si256(c2[5089],simde_mm256_xor_si256(c2[3550],simde_mm256_xor_si256(c2[6406],simde_mm256_xor_si256(c2[5107],simde_mm256_xor_si256(c2[4887],simde_mm256_xor_si256(c2[4892],simde_mm256_xor_si256(c2[2709],simde_mm256_xor_si256(c2[2489],simde_mm256_xor_si256(c2[2271],simde_mm256_xor_si256(c2[2051],simde_mm256_xor_si256(c2[4252],simde_mm256_xor_si256(c2[1853],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[5813],simde_mm256_xor_si256(c2[4936],simde_mm256_xor_si256(c2[2313],simde_mm256_xor_si256(c2[2093],simde_mm256_xor_si256(c2[1657],simde_mm256_xor_si256(c2[3876],simde_mm256_xor_si256(c2[3656],simde_mm256_xor_si256(c2[137],simde_mm256_xor_si256(c2[6956],simde_mm256_xor_si256(c2[5635],simde_mm256_xor_si256(c2[3238],simde_mm256_xor_si256(c2[3018],simde_mm256_xor_si256(c2[380],simde_mm256_xor_si256(c2[1474],simde_mm256_xor_si256(c2[4584],simde_mm256_xor_si256(c2[4364],simde_mm256_xor_si256(c2[5023],simde_mm256_xor_si256(c2[5679],simde_mm256_xor_si256(c2[1085],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[3285],simde_mm256_xor_si256(c2[3065],c2[3944]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[440]=_mm256_xor_si256(c2[5069],_mm256_xor_si256(c2[2867],_mm256_xor_si256(c2[2645],_mm256_xor_si256(c2[443],_mm256_xor_si256(c2[6387],_mm256_xor_si256(c2[4185],_mm256_xor_si256(c2[2231],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[692],_mm256_xor_si256(c2[5529],_mm256_xor_si256(c2[3548],_mm256_xor_si256(c2[1566],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[2029],_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[2034],_mm256_xor_si256(c2[52],_mm256_xor_si256(c2[6871],_mm256_xor_si256(c2[49],_mm256_xor_si256(c2[6670],_mm256_xor_si256(c2[4468],_mm256_xor_si256(c2[6232],_mm256_xor_si256(c2[4030],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[6231],_mm256_xor_si256(c2[5814],_mm256_xor_si256(c2[3612],_mm256_xor_si256(c2[2955],_mm256_xor_si256(c2[753],_mm256_xor_si256(c2[2078],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[6915],_mm256_xor_si256(c2[6274],_mm256_xor_si256(c2[4072],_mm256_xor_si256(c2[5838],_mm256_xor_si256(c2[3856],_mm256_xor_si256(c2[3636],_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[5635],_mm256_xor_si256(c2[4098],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[2777],_mm256_xor_si256(c2[795],_mm256_xor_si256(c2[575],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[4997],_mm256_xor_si256(c2[4561],_mm256_xor_si256(c2[2579],_mm256_xor_si256(c2[2359],_mm256_xor_si256(c2[1506],_mm256_xor_si256(c2[6343],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[7002],_mm256_xor_si256(c2[2821],_mm256_xor_si256(c2[839],_mm256_xor_si256(c2[619],_mm256_xor_si256(c2[5046],_mm256_xor_si256(c2[2844],_mm256_xor_si256(c2[207],_mm256_xor_si256(c2[5044],_mm256_xor_si256(c2[1086],_mm256_xor_si256(c2[6143],c2[5923]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[440]=simde_mm256_xor_si256(c2[5069],simde_mm256_xor_si256(c2[2867],simde_mm256_xor_si256(c2[2645],simde_mm256_xor_si256(c2[443],simde_mm256_xor_si256(c2[6387],simde_mm256_xor_si256(c2[4185],simde_mm256_xor_si256(c2[2231],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[692],simde_mm256_xor_si256(c2[5529],simde_mm256_xor_si256(c2[3548],simde_mm256_xor_si256(c2[1566],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[2029],simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[2034],simde_mm256_xor_si256(c2[52],simde_mm256_xor_si256(c2[6871],simde_mm256_xor_si256(c2[49],simde_mm256_xor_si256(c2[6670],simde_mm256_xor_si256(c2[4468],simde_mm256_xor_si256(c2[6232],simde_mm256_xor_si256(c2[4030],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[6231],simde_mm256_xor_si256(c2[5814],simde_mm256_xor_si256(c2[3612],simde_mm256_xor_si256(c2[2955],simde_mm256_xor_si256(c2[753],simde_mm256_xor_si256(c2[2078],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[6915],simde_mm256_xor_si256(c2[6274],simde_mm256_xor_si256(c2[4072],simde_mm256_xor_si256(c2[5838],simde_mm256_xor_si256(c2[3856],simde_mm256_xor_si256(c2[3636],simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[5635],simde_mm256_xor_si256(c2[4098],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[2777],simde_mm256_xor_si256(c2[795],simde_mm256_xor_si256(c2[575],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[4997],simde_mm256_xor_si256(c2[4561],simde_mm256_xor_si256(c2[2579],simde_mm256_xor_si256(c2[2359],simde_mm256_xor_si256(c2[1506],simde_mm256_xor_si256(c2[6343],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[7002],simde_mm256_xor_si256(c2[2821],simde_mm256_xor_si256(c2[839],simde_mm256_xor_si256(c2[619],simde_mm256_xor_si256(c2[5046],simde_mm256_xor_si256(c2[2844],simde_mm256_xor_si256(c2[207],simde_mm256_xor_si256(c2[5044],simde_mm256_xor_si256(c2[1086],simde_mm256_xor_si256(c2[6143],c2[5923]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[451]=_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[5280],_mm256_xor_si256(c2[1983],_mm256_xor_si256(c2[5086],_mm256_xor_si256(c2[4866],_mm256_xor_si256(c2[3327],_mm256_xor_si256(c2[6183],_mm256_xor_si256(c2[906],_mm256_xor_si256(c2[4884],_mm256_xor_si256(c2[4664],_mm256_xor_si256(c2[4669],_mm256_xor_si256(c2[2486],_mm256_xor_si256(c2[2266],_mm256_xor_si256(c2[1828],_mm256_xor_si256(c2[4029],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[5590],_mm256_xor_si256(c2[4713],_mm256_xor_si256(c2[1870],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[1430],_mm256_xor_si256(c2[3653],_mm256_xor_si256(c2[3433],_mm256_xor_si256(c2[6733],_mm256_xor_si256(c2[5412],_mm256_xor_si256(c2[2795],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[4141],_mm256_xor_si256(c2[4800],_mm256_xor_si256(c2[5456],_mm256_xor_si256(c2[862],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[2842],c2[3721]))))))))))))))))))))))))))))))))));
+     d2[451]=simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[5280],simde_mm256_xor_si256(c2[1983],simde_mm256_xor_si256(c2[5086],simde_mm256_xor_si256(c2[4866],simde_mm256_xor_si256(c2[3327],simde_mm256_xor_si256(c2[6183],simde_mm256_xor_si256(c2[906],simde_mm256_xor_si256(c2[4884],simde_mm256_xor_si256(c2[4664],simde_mm256_xor_si256(c2[4669],simde_mm256_xor_si256(c2[2486],simde_mm256_xor_si256(c2[2266],simde_mm256_xor_si256(c2[1828],simde_mm256_xor_si256(c2[4029],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[5590],simde_mm256_xor_si256(c2[4713],simde_mm256_xor_si256(c2[1870],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[1430],simde_mm256_xor_si256(c2[3653],simde_mm256_xor_si256(c2[3433],simde_mm256_xor_si256(c2[6733],simde_mm256_xor_si256(c2[5412],simde_mm256_xor_si256(c2[2795],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[4141],simde_mm256_xor_si256(c2[4800],simde_mm256_xor_si256(c2[5456],simde_mm256_xor_si256(c2[862],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[2842],c2[3721]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc384_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc384_byte.c
index 690278f4b3d888a5fbcb5e94028e7b7d58c89bab..b61f7500a8fc2ff3d925d6ed4dc9e9d35096e567 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc384_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc384_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc384_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[6240],_mm256_xor_si256(c2[5760],_mm256_xor_si256(c2[27],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[1708],_mm256_xor_si256(c2[1253],_mm256_xor_si256(c2[3413],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[4875],_mm256_xor_si256(c2[3697],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[6602],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[6387],_mm256_xor_si256(c2[6413],_mm256_xor_si256(c2[5211],_mm256_xor_si256(c2[5957],_mm256_xor_si256(c2[4275],_mm256_xor_si256(c2[6672],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[696],c2[6221]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[6240],simde_mm256_xor_si256(c2[5760],simde_mm256_xor_si256(c2[27],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[1708],simde_mm256_xor_si256(c2[1253],simde_mm256_xor_si256(c2[3413],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[4875],simde_mm256_xor_si256(c2[3697],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[6602],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[6387],simde_mm256_xor_si256(c2[6413],simde_mm256_xor_si256(c2[5211],simde_mm256_xor_si256(c2[5957],simde_mm256_xor_si256(c2[4275],simde_mm256_xor_si256(c2[6672],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[696],c2[6221]))))))))))))))))))))))))));
 
 //row: 1
-     d2[12]=_mm256_xor_si256(c2[3365],_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[6240],_mm256_xor_si256(c2[5760],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[27],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[1708],_mm256_xor_si256(c2[1493],_mm256_xor_si256(c2[1253],_mm256_xor_si256(c2[3413],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[4875],_mm256_xor_si256(c2[3697],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[6602],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[6387],_mm256_xor_si256(c2[6413],_mm256_xor_si256(c2[5211],_mm256_xor_si256(c2[5957],_mm256_xor_si256(c2[4275],_mm256_xor_si256(c2[6672],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[696],c2[6221]))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[3365],simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[6240],simde_mm256_xor_si256(c2[5760],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[27],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[1708],simde_mm256_xor_si256(c2[1493],simde_mm256_xor_si256(c2[1253],simde_mm256_xor_si256(c2[3413],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[4875],simde_mm256_xor_si256(c2[3697],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[6602],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[6387],simde_mm256_xor_si256(c2[6413],simde_mm256_xor_si256(c2[5211],simde_mm256_xor_si256(c2[5957],simde_mm256_xor_si256(c2[4275],simde_mm256_xor_si256(c2[6672],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[696],c2[6221]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[24]=_mm256_xor_si256(c2[3365],_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[6480],_mm256_xor_si256(c2[6240],_mm256_xor_si256(c2[5760],_mm256_xor_si256(c2[267],_mm256_xor_si256(c2[27],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[1708],_mm256_xor_si256(c2[1493],_mm256_xor_si256(c2[1253],_mm256_xor_si256(c2[3413],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[1033],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[4875],_mm256_xor_si256(c2[3937],_mm256_xor_si256(c2[3697],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[6842],_mm256_xor_si256(c2[6602],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[7584],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[6387],_mm256_xor_si256(c2[6653],_mm256_xor_si256(c2[6413],_mm256_xor_si256(c2[5211],_mm256_xor_si256(c2[6197],_mm256_xor_si256(c2[5957],_mm256_xor_si256(c2[4275],_mm256_xor_si256(c2[6672],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[696],c2[6221]))))))))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[3365],simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[6480],simde_mm256_xor_si256(c2[6240],simde_mm256_xor_si256(c2[5760],simde_mm256_xor_si256(c2[267],simde_mm256_xor_si256(c2[27],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[1708],simde_mm256_xor_si256(c2[1493],simde_mm256_xor_si256(c2[1253],simde_mm256_xor_si256(c2[3413],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[1033],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[4875],simde_mm256_xor_si256(c2[3937],simde_mm256_xor_si256(c2[3697],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[6842],simde_mm256_xor_si256(c2[6602],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[7584],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[6387],simde_mm256_xor_si256(c2[6653],simde_mm256_xor_si256(c2[6413],simde_mm256_xor_si256(c2[5211],simde_mm256_xor_si256(c2[6197],simde_mm256_xor_si256(c2[5957],simde_mm256_xor_si256(c2[4275],simde_mm256_xor_si256(c2[6672],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[696],c2[6221]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[36]=_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[6240],_mm256_xor_si256(c2[5760],_mm256_xor_si256(c2[27],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[1948],_mm256_xor_si256(c2[1708],_mm256_xor_si256(c2[1253],_mm256_xor_si256(c2[3653],_mm256_xor_si256(c2[3413],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[4875],_mm256_xor_si256(c2[3697],_mm256_xor_si256(c2[3219],_mm256_xor_si256(c2[4179],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[6602],_mm256_xor_si256(c2[2042],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[7344],_mm256_xor_si256(c2[6627],_mm256_xor_si256(c2[6387],_mm256_xor_si256(c2[6413],_mm256_xor_si256(c2[5451],_mm256_xor_si256(c2[5211],_mm256_xor_si256(c2[5957],_mm256_xor_si256(c2[4275],_mm256_xor_si256(c2[6912],_mm256_xor_si256(c2[6672],_mm256_xor_si256(c2[2861],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[6461],c2[6221]))))))))))))))))))))))))))))))))));
+     d2[36]=simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[6240],simde_mm256_xor_si256(c2[5760],simde_mm256_xor_si256(c2[27],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[1948],simde_mm256_xor_si256(c2[1708],simde_mm256_xor_si256(c2[1253],simde_mm256_xor_si256(c2[3653],simde_mm256_xor_si256(c2[3413],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[4875],simde_mm256_xor_si256(c2[3697],simde_mm256_xor_si256(c2[3219],simde_mm256_xor_si256(c2[4179],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[6602],simde_mm256_xor_si256(c2[2042],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[7344],simde_mm256_xor_si256(c2[6627],simde_mm256_xor_si256(c2[6387],simde_mm256_xor_si256(c2[6413],simde_mm256_xor_si256(c2[5451],simde_mm256_xor_si256(c2[5211],simde_mm256_xor_si256(c2[5957],simde_mm256_xor_si256(c2[4275],simde_mm256_xor_si256(c2[6912],simde_mm256_xor_si256(c2[6672],simde_mm256_xor_si256(c2[2861],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[6461],c2[6221]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[48]=_mm256_xor_si256(c2[2646],_mm256_xor_si256(c2[2406],_mm256_xor_si256(c2[5521],_mm256_xor_si256(c2[5041],_mm256_xor_si256(c2[1922],_mm256_xor_si256(c2[7227],_mm256_xor_si256(c2[6987],_mm256_xor_si256(c2[3388],_mm256_xor_si256(c2[989],_mm256_xor_si256(c2[2426],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[534],_mm256_xor_si256(c2[2694],_mm256_xor_si256(c2[7514],_mm256_xor_si256(c2[7274],_mm256_xor_si256(c2[74],_mm256_xor_si256(c2[4156],_mm256_xor_si256(c2[2978],_mm256_xor_si256(c2[2500],_mm256_xor_si256(c2[3220],_mm256_xor_si256(c2[5883],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[1107],_mm256_xor_si256(c2[867],_mm256_xor_si256(c2[6625],_mm256_xor_si256(c2[5668],_mm256_xor_si256(c2[5694],_mm256_xor_si256(c2[4492],_mm256_xor_si256(c2[5238],_mm256_xor_si256(c2[3556],_mm256_xor_si256(c2[5953],_mm256_xor_si256(c2[2382],_mm256_xor_si256(c2[2142],_mm256_xor_si256(c2[7656],c2[5502]))))))))))))))))))))))))))))))))));
+     d2[48]=simde_mm256_xor_si256(c2[2646],simde_mm256_xor_si256(c2[2406],simde_mm256_xor_si256(c2[5521],simde_mm256_xor_si256(c2[5041],simde_mm256_xor_si256(c2[1922],simde_mm256_xor_si256(c2[7227],simde_mm256_xor_si256(c2[6987],simde_mm256_xor_si256(c2[3388],simde_mm256_xor_si256(c2[989],simde_mm256_xor_si256(c2[2426],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[534],simde_mm256_xor_si256(c2[2694],simde_mm256_xor_si256(c2[7514],simde_mm256_xor_si256(c2[7274],simde_mm256_xor_si256(c2[74],simde_mm256_xor_si256(c2[4156],simde_mm256_xor_si256(c2[2978],simde_mm256_xor_si256(c2[2500],simde_mm256_xor_si256(c2[3220],simde_mm256_xor_si256(c2[5883],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[1107],simde_mm256_xor_si256(c2[867],simde_mm256_xor_si256(c2[6625],simde_mm256_xor_si256(c2[5668],simde_mm256_xor_si256(c2[5694],simde_mm256_xor_si256(c2[4492],simde_mm256_xor_si256(c2[5238],simde_mm256_xor_si256(c2[3556],simde_mm256_xor_si256(c2[5953],simde_mm256_xor_si256(c2[2382],simde_mm256_xor_si256(c2[2142],simde_mm256_xor_si256(c2[7656],c2[5502]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[60]=_mm256_xor_si256(c2[7206],_mm256_xor_si256(c2[6966],_mm256_xor_si256(c2[2402],_mm256_xor_si256(c2[1922],_mm256_xor_si256(c2[2400],_mm256_xor_si256(c2[4108],_mm256_xor_si256(c2[3868],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[5549],_mm256_xor_si256(c2[2905],_mm256_xor_si256(c2[5334],_mm256_xor_si256(c2[5094],_mm256_xor_si256(c2[7254],_mm256_xor_si256(c2[4395],_mm256_xor_si256(c2[4155],_mm256_xor_si256(c2[4634],_mm256_xor_si256(c2[1037],_mm256_xor_si256(c2[7538],_mm256_xor_si256(c2[7060],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[2764],_mm256_xor_si256(c2[5643],_mm256_xor_si256(c2[6123],_mm256_xor_si256(c2[5667],_mm256_xor_si256(c2[5427],_mm256_xor_si256(c2[3506],_mm256_xor_si256(c2[2549],_mm256_xor_si256(c2[2575],_mm256_xor_si256(c2[1373],_mm256_xor_si256(c2[4010],_mm256_xor_si256(c2[2119],_mm256_xor_si256(c2[437],_mm256_xor_si256(c2[2834],_mm256_xor_si256(c2[6942],_mm256_xor_si256(c2[6702],_mm256_xor_si256(c2[4537],c2[2383]))))))))))))))))))))))))))))))))))));
+     d2[60]=simde_mm256_xor_si256(c2[7206],simde_mm256_xor_si256(c2[6966],simde_mm256_xor_si256(c2[2402],simde_mm256_xor_si256(c2[1922],simde_mm256_xor_si256(c2[2400],simde_mm256_xor_si256(c2[4108],simde_mm256_xor_si256(c2[3868],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[5549],simde_mm256_xor_si256(c2[2905],simde_mm256_xor_si256(c2[5334],simde_mm256_xor_si256(c2[5094],simde_mm256_xor_si256(c2[7254],simde_mm256_xor_si256(c2[4395],simde_mm256_xor_si256(c2[4155],simde_mm256_xor_si256(c2[4634],simde_mm256_xor_si256(c2[1037],simde_mm256_xor_si256(c2[7538],simde_mm256_xor_si256(c2[7060],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[2764],simde_mm256_xor_si256(c2[5643],simde_mm256_xor_si256(c2[6123],simde_mm256_xor_si256(c2[5667],simde_mm256_xor_si256(c2[5427],simde_mm256_xor_si256(c2[3506],simde_mm256_xor_si256(c2[2549],simde_mm256_xor_si256(c2[2575],simde_mm256_xor_si256(c2[1373],simde_mm256_xor_si256(c2[4010],simde_mm256_xor_si256(c2[2119],simde_mm256_xor_si256(c2[437],simde_mm256_xor_si256(c2[2834],simde_mm256_xor_si256(c2[6942],simde_mm256_xor_si256(c2[6702],simde_mm256_xor_si256(c2[4537],c2[2383]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[72]=_mm256_xor_si256(c2[1451],_mm256_xor_si256(c2[1211],_mm256_xor_si256(c2[4326],_mm256_xor_si256(c2[3846],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[6032],_mm256_xor_si256(c2[5792],_mm256_xor_si256(c2[2193],_mm256_xor_si256(c2[7473],_mm256_xor_si256(c2[7258],_mm256_xor_si256(c2[7018],_mm256_xor_si256(c2[1499],_mm256_xor_si256(c2[6319],_mm256_xor_si256(c2[6079],_mm256_xor_si256(c2[6558],_mm256_xor_si256(c2[2961],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[1305],_mm256_xor_si256(c2[2025],_mm256_xor_si256(c2[4688],_mm256_xor_si256(c2[7567],_mm256_xor_si256(c2[6842],_mm256_xor_si256(c2[7591],_mm256_xor_si256(c2[7351],_mm256_xor_si256(c2[5430],_mm256_xor_si256(c2[4473],_mm256_xor_si256(c2[4499],_mm256_xor_si256(c2[3297],_mm256_xor_si256(c2[1131],_mm256_xor_si256(c2[4043],_mm256_xor_si256(c2[2361],_mm256_xor_si256(c2[4758],_mm256_xor_si256(c2[1187],_mm256_xor_si256(c2[947],_mm256_xor_si256(c2[6461],_mm256_xor_si256(c2[4307],c2[4297]))))))))))))))))))))))))))))))))))));
+     d2[72]=simde_mm256_xor_si256(c2[1451],simde_mm256_xor_si256(c2[1211],simde_mm256_xor_si256(c2[4326],simde_mm256_xor_si256(c2[3846],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[6032],simde_mm256_xor_si256(c2[5792],simde_mm256_xor_si256(c2[2193],simde_mm256_xor_si256(c2[7473],simde_mm256_xor_si256(c2[7258],simde_mm256_xor_si256(c2[7018],simde_mm256_xor_si256(c2[1499],simde_mm256_xor_si256(c2[6319],simde_mm256_xor_si256(c2[6079],simde_mm256_xor_si256(c2[6558],simde_mm256_xor_si256(c2[2961],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[1305],simde_mm256_xor_si256(c2[2025],simde_mm256_xor_si256(c2[4688],simde_mm256_xor_si256(c2[7567],simde_mm256_xor_si256(c2[6842],simde_mm256_xor_si256(c2[7591],simde_mm256_xor_si256(c2[7351],simde_mm256_xor_si256(c2[5430],simde_mm256_xor_si256(c2[4473],simde_mm256_xor_si256(c2[4499],simde_mm256_xor_si256(c2[3297],simde_mm256_xor_si256(c2[1131],simde_mm256_xor_si256(c2[4043],simde_mm256_xor_si256(c2[2361],simde_mm256_xor_si256(c2[4758],simde_mm256_xor_si256(c2[1187],simde_mm256_xor_si256(c2[947],simde_mm256_xor_si256(c2[6461],simde_mm256_xor_si256(c2[4307],c2[4297]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[84]=_mm256_xor_si256(c2[4808],_mm256_xor_si256(c2[4568],_mm256_xor_si256(c2[6729],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[2165],_mm256_xor_si256(c2[7203],_mm256_xor_si256(c2[1685],_mm256_xor_si256(c2[1710],_mm256_xor_si256(c2[1470],_mm256_xor_si256(c2[3631],_mm256_xor_si256(c2[5550],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[3151],_mm256_xor_si256(c2[5552],_mm256_xor_si256(c2[5312],_mm256_xor_si256(c2[3866],_mm256_xor_si256(c2[2936],_mm256_xor_si256(c2[2696],_mm256_xor_si256(c2[4857],_mm256_xor_si256(c2[4856],_mm256_xor_si256(c2[7257],_mm256_xor_si256(c2[7017],_mm256_xor_si256(c2[1997],_mm256_xor_si256(c2[1757],_mm256_xor_si256(c2[3918],_mm256_xor_si256(c2[2236],_mm256_xor_si256(c2[4397],_mm256_xor_si256(c2[6318],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[7301],_mm256_xor_si256(c2[4662],_mm256_xor_si256(c2[6823],_mm256_xor_si256(c2[5382],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[7543],_mm256_xor_si256(c2[366],_mm256_xor_si256(c2[2527],_mm256_xor_si256(c2[3245],_mm256_xor_si256(c2[5646],_mm256_xor_si256(c2[5406],_mm256_xor_si256(c2[6365],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[3029],_mm256_xor_si256(c2[5190],_mm256_xor_si256(c2[1108],_mm256_xor_si256(c2[3269],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[2552],_mm256_xor_si256(c2[2312],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[2338],_mm256_xor_si256(c2[6654],_mm256_xor_si256(c2[1376],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[4008],_mm256_xor_si256(c2[7400],_mm256_xor_si256(c2[1882],_mm256_xor_si256(c2[5718],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[2837],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[4544],_mm256_xor_si256(c2[4304],_mm256_xor_si256(c2[6465],_mm256_xor_si256(c2[2139],_mm256_xor_si256(c2[4300],_mm256_xor_si256(c2[7664],_mm256_xor_si256(c2[2386],c2[2146]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[84]=simde_mm256_xor_si256(c2[4808],simde_mm256_xor_si256(c2[4568],simde_mm256_xor_si256(c2[6729],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[2165],simde_mm256_xor_si256(c2[7203],simde_mm256_xor_si256(c2[1685],simde_mm256_xor_si256(c2[1710],simde_mm256_xor_si256(c2[1470],simde_mm256_xor_si256(c2[3631],simde_mm256_xor_si256(c2[5550],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[3151],simde_mm256_xor_si256(c2[5552],simde_mm256_xor_si256(c2[5312],simde_mm256_xor_si256(c2[3866],simde_mm256_xor_si256(c2[2936],simde_mm256_xor_si256(c2[2696],simde_mm256_xor_si256(c2[4857],simde_mm256_xor_si256(c2[4856],simde_mm256_xor_si256(c2[7257],simde_mm256_xor_si256(c2[7017],simde_mm256_xor_si256(c2[1997],simde_mm256_xor_si256(c2[1757],simde_mm256_xor_si256(c2[3918],simde_mm256_xor_si256(c2[2236],simde_mm256_xor_si256(c2[4397],simde_mm256_xor_si256(c2[6318],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[7301],simde_mm256_xor_si256(c2[4662],simde_mm256_xor_si256(c2[6823],simde_mm256_xor_si256(c2[5382],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[7543],simde_mm256_xor_si256(c2[366],simde_mm256_xor_si256(c2[2527],simde_mm256_xor_si256(c2[3245],simde_mm256_xor_si256(c2[5646],simde_mm256_xor_si256(c2[5406],simde_mm256_xor_si256(c2[6365],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[3029],simde_mm256_xor_si256(c2[5190],simde_mm256_xor_si256(c2[1108],simde_mm256_xor_si256(c2[3269],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[2552],simde_mm256_xor_si256(c2[2312],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[2338],simde_mm256_xor_si256(c2[6654],simde_mm256_xor_si256(c2[1376],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[4008],simde_mm256_xor_si256(c2[7400],simde_mm256_xor_si256(c2[1882],simde_mm256_xor_si256(c2[5718],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[2837],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[4544],simde_mm256_xor_si256(c2[4304],simde_mm256_xor_si256(c2[6465],simde_mm256_xor_si256(c2[2139],simde_mm256_xor_si256(c2[4300],simde_mm256_xor_si256(c2[7664],simde_mm256_xor_si256(c2[2386],c2[2146]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[96]=_mm256_xor_si256(c2[1450],_mm256_xor_si256(c2[1210],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[4325],_mm256_xor_si256(c2[3845],_mm256_xor_si256(c2[5283],_mm256_xor_si256(c2[6031],_mm256_xor_si256(c2[5791],_mm256_xor_si256(c2[2192],_mm256_xor_si256(c2[7472],_mm256_xor_si256(c2[1466],_mm256_xor_si256(c2[7257],_mm256_xor_si256(c2[7017],_mm256_xor_si256(c2[1498],_mm256_xor_si256(c2[6318],_mm256_xor_si256(c2[6078],_mm256_xor_si256(c2[6797],_mm256_xor_si256(c2[6557],_mm256_xor_si256(c2[2960],_mm256_xor_si256(c2[2022],_mm256_xor_si256(c2[1782],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[2024],_mm256_xor_si256(c2[4927],_mm256_xor_si256(c2[4687],_mm256_xor_si256(c2[7566],_mm256_xor_si256(c2[7590],_mm256_xor_si256(c2[7350],_mm256_xor_si256(c2[5669],_mm256_xor_si256(c2[5429],_mm256_xor_si256(c2[4472],_mm256_xor_si256(c2[4738],_mm256_xor_si256(c2[4498],_mm256_xor_si256(c2[3296],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[4042],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[4757],_mm256_xor_si256(c2[1186],_mm256_xor_si256(c2[946],_mm256_xor_si256(c2[6700],_mm256_xor_si256(c2[6460],c2[4306]))))))))))))))))))))))))))))))))))))))))));
+     d2[96]=simde_mm256_xor_si256(c2[1450],simde_mm256_xor_si256(c2[1210],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[4325],simde_mm256_xor_si256(c2[3845],simde_mm256_xor_si256(c2[5283],simde_mm256_xor_si256(c2[6031],simde_mm256_xor_si256(c2[5791],simde_mm256_xor_si256(c2[2192],simde_mm256_xor_si256(c2[7472],simde_mm256_xor_si256(c2[1466],simde_mm256_xor_si256(c2[7257],simde_mm256_xor_si256(c2[7017],simde_mm256_xor_si256(c2[1498],simde_mm256_xor_si256(c2[6318],simde_mm256_xor_si256(c2[6078],simde_mm256_xor_si256(c2[6797],simde_mm256_xor_si256(c2[6557],simde_mm256_xor_si256(c2[2960],simde_mm256_xor_si256(c2[2022],simde_mm256_xor_si256(c2[1782],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[2024],simde_mm256_xor_si256(c2[4927],simde_mm256_xor_si256(c2[4687],simde_mm256_xor_si256(c2[7566],simde_mm256_xor_si256(c2[7590],simde_mm256_xor_si256(c2[7350],simde_mm256_xor_si256(c2[5669],simde_mm256_xor_si256(c2[5429],simde_mm256_xor_si256(c2[4472],simde_mm256_xor_si256(c2[4738],simde_mm256_xor_si256(c2[4498],simde_mm256_xor_si256(c2[3296],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[4042],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[4757],simde_mm256_xor_si256(c2[1186],simde_mm256_xor_si256(c2[946],simde_mm256_xor_si256(c2[6700],simde_mm256_xor_si256(c2[6460],c2[4306]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[108]=_mm256_xor_si256(c2[1451],_mm256_xor_si256(c2[11],_mm256_xor_si256(c2[7450],_mm256_xor_si256(c2[4566],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[4086],_mm256_xor_si256(c2[2406],_mm256_xor_si256(c2[6032],_mm256_xor_si256(c2[4592],_mm256_xor_si256(c2[4352],_mm256_xor_si256(c2[2433],_mm256_xor_si256(c2[753],_mm256_xor_si256(c2[34],_mm256_xor_si256(c2[6033],_mm256_xor_si256(c2[6744],_mm256_xor_si256(c2[7258],_mm256_xor_si256(c2[5818],_mm256_xor_si256(c2[5578],_mm256_xor_si256(c2[1739],_mm256_xor_si256(c2[59],_mm256_xor_si256(c2[6319],_mm256_xor_si256(c2[4879],_mm256_xor_si256(c2[4639],_mm256_xor_si256(c2[6798],_mm256_xor_si256(c2[5118],_mm256_xor_si256(c2[3201],_mm256_xor_si256(c2[1521],_mm256_xor_si256(c2[2023],_mm256_xor_si256(c2[343],_mm256_xor_si256(c2[1545],_mm256_xor_si256(c2[7544],_mm256_xor_si256(c2[2265],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[4928],_mm256_xor_si256(c2[3248],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[6127],_mm256_xor_si256(c2[7591],_mm256_xor_si256(c2[6151],_mm256_xor_si256(c2[5911],_mm256_xor_si256(c2[5670],_mm256_xor_si256(c2[3990],_mm256_xor_si256(c2[4713],_mm256_xor_si256(c2[3033],_mm256_xor_si256(c2[4739],_mm256_xor_si256(c2[3059],_mm256_xor_si256(c2[3537],_mm256_xor_si256(c2[1857],_mm256_xor_si256(c2[4283],_mm256_xor_si256(c2[2603],_mm256_xor_si256(c2[2601],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[4998],_mm256_xor_si256(c2[3318],_mm256_xor_si256(c2[1156],_mm256_xor_si256(c2[1187],_mm256_xor_si256(c2[7426],_mm256_xor_si256(c2[7186],_mm256_xor_si256(c2[6701],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[4547],c2[2867])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[108]=simde_mm256_xor_si256(c2[1451],simde_mm256_xor_si256(c2[11],simde_mm256_xor_si256(c2[7450],simde_mm256_xor_si256(c2[4566],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[4086],simde_mm256_xor_si256(c2[2406],simde_mm256_xor_si256(c2[6032],simde_mm256_xor_si256(c2[4592],simde_mm256_xor_si256(c2[4352],simde_mm256_xor_si256(c2[2433],simde_mm256_xor_si256(c2[753],simde_mm256_xor_si256(c2[34],simde_mm256_xor_si256(c2[6033],simde_mm256_xor_si256(c2[6744],simde_mm256_xor_si256(c2[7258],simde_mm256_xor_si256(c2[5818],simde_mm256_xor_si256(c2[5578],simde_mm256_xor_si256(c2[1739],simde_mm256_xor_si256(c2[59],simde_mm256_xor_si256(c2[6319],simde_mm256_xor_si256(c2[4879],simde_mm256_xor_si256(c2[4639],simde_mm256_xor_si256(c2[6798],simde_mm256_xor_si256(c2[5118],simde_mm256_xor_si256(c2[3201],simde_mm256_xor_si256(c2[1521],simde_mm256_xor_si256(c2[2023],simde_mm256_xor_si256(c2[343],simde_mm256_xor_si256(c2[1545],simde_mm256_xor_si256(c2[7544],simde_mm256_xor_si256(c2[2265],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[4928],simde_mm256_xor_si256(c2[3248],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[6127],simde_mm256_xor_si256(c2[7591],simde_mm256_xor_si256(c2[6151],simde_mm256_xor_si256(c2[5911],simde_mm256_xor_si256(c2[5670],simde_mm256_xor_si256(c2[3990],simde_mm256_xor_si256(c2[4713],simde_mm256_xor_si256(c2[3033],simde_mm256_xor_si256(c2[4739],simde_mm256_xor_si256(c2[3059],simde_mm256_xor_si256(c2[3537],simde_mm256_xor_si256(c2[1857],simde_mm256_xor_si256(c2[4283],simde_mm256_xor_si256(c2[2603],simde_mm256_xor_si256(c2[2601],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[4998],simde_mm256_xor_si256(c2[3318],simde_mm256_xor_si256(c2[1156],simde_mm256_xor_si256(c2[1187],simde_mm256_xor_si256(c2[7426],simde_mm256_xor_si256(c2[7186],simde_mm256_xor_si256(c2[6701],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[4547],c2[2867])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[120]=_mm256_xor_si256(c2[6481],_mm256_xor_si256(c2[1947],_mm256_xor_si256(c2[5424],c2[4969])));
+     d2[120]=simde_mm256_xor_si256(c2[6481],simde_mm256_xor_si256(c2[1947],simde_mm256_xor_si256(c2[5424],c2[4969])));
 
 //row: 11
-     d2[132]=_mm256_xor_si256(c2[1690],_mm256_xor_si256(c2[4805],_mm256_xor_si256(c2[4325],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[6271],_mm256_xor_si256(c2[2672],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[273],_mm256_xor_si256(c2[7497],_mm256_xor_si256(c2[2218],_mm256_xor_si256(c2[1978],_mm256_xor_si256(c2[6558],_mm256_xor_si256(c2[7037],_mm256_xor_si256(c2[3440],_mm256_xor_si256(c2[2262],_mm256_xor_si256(c2[1784],_mm256_xor_si256(c2[2744],_mm256_xor_si256(c2[2504],_mm256_xor_si256(c2[5167],_mm256_xor_si256(c2[607],_mm256_xor_si256(c2[367],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[5909],_mm256_xor_si256(c2[5192],_mm256_xor_si256(c2[4952],_mm256_xor_si256(c2[4978],_mm256_xor_si256(c2[4016],_mm256_xor_si256(c2[3776],_mm256_xor_si256(c2[6890],_mm256_xor_si256(c2[4522],_mm256_xor_si256(c2[2840],_mm256_xor_si256(c2[5477],_mm256_xor_si256(c2[5237],_mm256_xor_si256(c2[1426],_mm256_xor_si256(c2[6940],_mm256_xor_si256(c2[5026],_mm256_xor_si256(c2[4786],c2[3581])))))))))))))))))))))))))))))))))))));
+     d2[132]=simde_mm256_xor_si256(c2[1690],simde_mm256_xor_si256(c2[4805],simde_mm256_xor_si256(c2[4325],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[6271],simde_mm256_xor_si256(c2[2672],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[273],simde_mm256_xor_si256(c2[7497],simde_mm256_xor_si256(c2[2218],simde_mm256_xor_si256(c2[1978],simde_mm256_xor_si256(c2[6558],simde_mm256_xor_si256(c2[7037],simde_mm256_xor_si256(c2[3440],simde_mm256_xor_si256(c2[2262],simde_mm256_xor_si256(c2[1784],simde_mm256_xor_si256(c2[2744],simde_mm256_xor_si256(c2[2504],simde_mm256_xor_si256(c2[5167],simde_mm256_xor_si256(c2[607],simde_mm256_xor_si256(c2[367],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[5909],simde_mm256_xor_si256(c2[5192],simde_mm256_xor_si256(c2[4952],simde_mm256_xor_si256(c2[4978],simde_mm256_xor_si256(c2[4016],simde_mm256_xor_si256(c2[3776],simde_mm256_xor_si256(c2[6890],simde_mm256_xor_si256(c2[4522],simde_mm256_xor_si256(c2[2840],simde_mm256_xor_si256(c2[5477],simde_mm256_xor_si256(c2[5237],simde_mm256_xor_si256(c2[1426],simde_mm256_xor_si256(c2[6940],simde_mm256_xor_si256(c2[5026],simde_mm256_xor_si256(c2[4786],c2[3581])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[144]=_mm256_xor_si256(c2[6005],_mm256_xor_si256(c2[5765],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[2907],_mm256_xor_si256(c2[2667],_mm256_xor_si256(c2[6747],_mm256_xor_si256(c2[4348],_mm256_xor_si256(c2[1705],_mm256_xor_si256(c2[4133],_mm256_xor_si256(c2[3893],_mm256_xor_si256(c2[6053],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[2954],_mm256_xor_si256(c2[3433],_mm256_xor_si256(c2[7515],_mm256_xor_si256(c2[7034],_mm256_xor_si256(c2[6337],_mm256_xor_si256(c2[5859],_mm256_xor_si256(c2[6579],_mm256_xor_si256(c2[1563],_mm256_xor_si256(c2[4442],_mm256_xor_si256(c2[4466],_mm256_xor_si256(c2[4226],_mm256_xor_si256(c2[2305],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[1374],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[918],_mm256_xor_si256(c2[6915],_mm256_xor_si256(c2[1633],_mm256_xor_si256(c2[5741],_mm256_xor_si256(c2[5501],_mm256_xor_si256(c2[3336],c2[1182]))))))))))))))))))))))))))))))))));
+     d2[144]=simde_mm256_xor_si256(c2[6005],simde_mm256_xor_si256(c2[5765],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[2907],simde_mm256_xor_si256(c2[2667],simde_mm256_xor_si256(c2[6747],simde_mm256_xor_si256(c2[4348],simde_mm256_xor_si256(c2[1705],simde_mm256_xor_si256(c2[4133],simde_mm256_xor_si256(c2[3893],simde_mm256_xor_si256(c2[6053],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[2954],simde_mm256_xor_si256(c2[3433],simde_mm256_xor_si256(c2[7515],simde_mm256_xor_si256(c2[7034],simde_mm256_xor_si256(c2[6337],simde_mm256_xor_si256(c2[5859],simde_mm256_xor_si256(c2[6579],simde_mm256_xor_si256(c2[1563],simde_mm256_xor_si256(c2[4442],simde_mm256_xor_si256(c2[4466],simde_mm256_xor_si256(c2[4226],simde_mm256_xor_si256(c2[2305],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[1374],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[918],simde_mm256_xor_si256(c2[6915],simde_mm256_xor_si256(c2[1633],simde_mm256_xor_si256(c2[5741],simde_mm256_xor_si256(c2[5501],simde_mm256_xor_si256(c2[3336],c2[1182]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[156]=_mm256_xor_si256(c2[4570],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[7205],_mm256_xor_si256(c2[4081],_mm256_xor_si256(c2[1472],_mm256_xor_si256(c2[5552],_mm256_xor_si256(c2[3393],_mm256_xor_si256(c2[3153],_mm256_xor_si256(c2[6987],_mm256_xor_si256(c2[2698],_mm256_xor_si256(c2[5098],_mm256_xor_si256(c2[4858],_mm256_xor_si256(c2[1759],_mm256_xor_si256(c2[2238],_mm256_xor_si256(c2[6320],_mm256_xor_si256(c2[5142],_mm256_xor_si256(c2[4664],_mm256_xor_si256(c2[5624],_mm256_xor_si256(c2[5384],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[3487],_mm256_xor_si256(c2[3247],_mm256_xor_si256(c2[3031],_mm256_xor_si256(c2[1110],_mm256_xor_si256(c2[393],_mm256_xor_si256(c2[153],_mm256_xor_si256(c2[179],_mm256_xor_si256(c2[6896],_mm256_xor_si256(c2[6656],_mm256_xor_si256(c2[7402],_mm256_xor_si256(c2[5720],_mm256_xor_si256(c2[678],_mm256_xor_si256(c2[438],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[4306],_mm256_xor_si256(c2[2141],_mm256_xor_si256(c2[227],c2[7666])))))))))))))))))))))))))))))))))))));
+     d2[156]=simde_mm256_xor_si256(c2[4570],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[7205],simde_mm256_xor_si256(c2[4081],simde_mm256_xor_si256(c2[1472],simde_mm256_xor_si256(c2[5552],simde_mm256_xor_si256(c2[3393],simde_mm256_xor_si256(c2[3153],simde_mm256_xor_si256(c2[6987],simde_mm256_xor_si256(c2[2698],simde_mm256_xor_si256(c2[5098],simde_mm256_xor_si256(c2[4858],simde_mm256_xor_si256(c2[1759],simde_mm256_xor_si256(c2[2238],simde_mm256_xor_si256(c2[6320],simde_mm256_xor_si256(c2[5142],simde_mm256_xor_si256(c2[4664],simde_mm256_xor_si256(c2[5624],simde_mm256_xor_si256(c2[5384],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[3487],simde_mm256_xor_si256(c2[3247],simde_mm256_xor_si256(c2[3031],simde_mm256_xor_si256(c2[1110],simde_mm256_xor_si256(c2[393],simde_mm256_xor_si256(c2[153],simde_mm256_xor_si256(c2[179],simde_mm256_xor_si256(c2[6896],simde_mm256_xor_si256(c2[6656],simde_mm256_xor_si256(c2[7402],simde_mm256_xor_si256(c2[5720],simde_mm256_xor_si256(c2[678],simde_mm256_xor_si256(c2[438],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[4306],simde_mm256_xor_si256(c2[2141],simde_mm256_xor_si256(c2[227],c2[7666])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[168]=_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[486],_mm256_xor_si256(c2[3850],_mm256_xor_si256(c2[3601],_mm256_xor_si256(c2[6965],_mm256_xor_si256(c2[3121],_mm256_xor_si256(c2[6485],_mm256_xor_si256(c2[5307],_mm256_xor_si256(c2[5067],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[1468],_mm256_xor_si256(c2[4832],_mm256_xor_si256(c2[6748],_mm256_xor_si256(c2[2673],_mm256_xor_si256(c2[2433],_mm256_xor_si256(c2[4584],_mm256_xor_si256(c2[6533],_mm256_xor_si256(c2[6293],_mm256_xor_si256(c2[1978],_mm256_xor_si256(c2[774],_mm256_xor_si256(c2[4378],_mm256_xor_si256(c2[4138],_mm256_xor_si256(c2[5594],_mm256_xor_si256(c2[5354],_mm256_xor_si256(c2[1039],_mm256_xor_si256(c2[5833],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[2236],_mm256_xor_si256(c2[5600],_mm256_xor_si256(c2[1058],_mm256_xor_si256(c2[4422],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[3944],_mm256_xor_si256(c2[1300],_mm256_xor_si256(c2[4904],_mm256_xor_si256(c2[4664],_mm256_xor_si256(c2[3963],_mm256_xor_si256(c2[7327],_mm256_xor_si256(c2[6842],_mm256_xor_si256(c2[2767],_mm256_xor_si256(c2[2527],_mm256_xor_si256(c2[6866],_mm256_xor_si256(c2[6626],_mm256_xor_si256(c2[2311],_mm256_xor_si256(c2[4705],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[3748],_mm256_xor_si256(c2[7352],_mm256_xor_si256(c2[7112],_mm256_xor_si256(c2[5427],_mm256_xor_si256(c2[3774],_mm256_xor_si256(c2[7138],_mm256_xor_si256(c2[2572],_mm256_xor_si256(c2[6176],_mm256_xor_si256(c2[5936],_mm256_xor_si256(c2[3318],_mm256_xor_si256(c2[6682],_mm256_xor_si256(c2[1636],_mm256_xor_si256(c2[5000],_mm256_xor_si256(c2[4033],_mm256_xor_si256(c2[7637],_mm256_xor_si256(c2[7397],_mm256_xor_si256(c2[462],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[3586],_mm256_xor_si256(c2[5736],_mm256_xor_si256(c2[1421],_mm256_xor_si256(c2[3582],_mm256_xor_si256(c2[7186],c2[6946])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[168]=simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[486],simde_mm256_xor_si256(c2[3850],simde_mm256_xor_si256(c2[3601],simde_mm256_xor_si256(c2[6965],simde_mm256_xor_si256(c2[3121],simde_mm256_xor_si256(c2[6485],simde_mm256_xor_si256(c2[5307],simde_mm256_xor_si256(c2[5067],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[1468],simde_mm256_xor_si256(c2[4832],simde_mm256_xor_si256(c2[6748],simde_mm256_xor_si256(c2[2673],simde_mm256_xor_si256(c2[2433],simde_mm256_xor_si256(c2[4584],simde_mm256_xor_si256(c2[6533],simde_mm256_xor_si256(c2[6293],simde_mm256_xor_si256(c2[1978],simde_mm256_xor_si256(c2[774],simde_mm256_xor_si256(c2[4378],simde_mm256_xor_si256(c2[4138],simde_mm256_xor_si256(c2[5594],simde_mm256_xor_si256(c2[5354],simde_mm256_xor_si256(c2[1039],simde_mm256_xor_si256(c2[5833],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[2236],simde_mm256_xor_si256(c2[5600],simde_mm256_xor_si256(c2[1058],simde_mm256_xor_si256(c2[4422],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[3944],simde_mm256_xor_si256(c2[1300],simde_mm256_xor_si256(c2[4904],simde_mm256_xor_si256(c2[4664],simde_mm256_xor_si256(c2[3963],simde_mm256_xor_si256(c2[7327],simde_mm256_xor_si256(c2[6842],simde_mm256_xor_si256(c2[2767],simde_mm256_xor_si256(c2[2527],simde_mm256_xor_si256(c2[6866],simde_mm256_xor_si256(c2[6626],simde_mm256_xor_si256(c2[2311],simde_mm256_xor_si256(c2[4705],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[3748],simde_mm256_xor_si256(c2[7352],simde_mm256_xor_si256(c2[7112],simde_mm256_xor_si256(c2[5427],simde_mm256_xor_si256(c2[3774],simde_mm256_xor_si256(c2[7138],simde_mm256_xor_si256(c2[2572],simde_mm256_xor_si256(c2[6176],simde_mm256_xor_si256(c2[5936],simde_mm256_xor_si256(c2[3318],simde_mm256_xor_si256(c2[6682],simde_mm256_xor_si256(c2[1636],simde_mm256_xor_si256(c2[5000],simde_mm256_xor_si256(c2[4033],simde_mm256_xor_si256(c2[7637],simde_mm256_xor_si256(c2[7397],simde_mm256_xor_si256(c2[462],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[3586],simde_mm256_xor_si256(c2[5736],simde_mm256_xor_si256(c2[1421],simde_mm256_xor_si256(c2[3582],simde_mm256_xor_si256(c2[7186],c2[6946])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[180]=_mm256_xor_si256(c2[2887],_mm256_xor_si256(c2[7447],_mm256_xor_si256(c2[7207],_mm256_xor_si256(c2[6002],_mm256_xor_si256(c2[2643],_mm256_xor_si256(c2[5522],_mm256_xor_si256(c2[2163],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[7468],_mm256_xor_si256(c2[4349],_mm256_xor_si256(c2[4109],_mm256_xor_si256(c2[3869],_mm256_xor_si256(c2[510],_mm256_xor_si256(c2[1470],_mm256_xor_si256(c2[5790],_mm256_xor_si256(c2[1015],_mm256_xor_si256(c2[5575],_mm256_xor_si256(c2[5335],_mm256_xor_si256(c2[3175],_mm256_xor_si256(c2[7495],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[4636],_mm256_xor_si256(c2[4396],_mm256_xor_si256(c2[555],_mm256_xor_si256(c2[4875],_mm256_xor_si256(c2[4637],_mm256_xor_si256(c2[1278],_mm256_xor_si256(c2[3459],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[2981],_mm256_xor_si256(c2[7301],_mm256_xor_si256(c2[3701],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[6364],_mm256_xor_si256(c2[3005],_mm256_xor_si256(c2[1564],_mm256_xor_si256(c2[5884],_mm256_xor_si256(c2[1348],_mm256_xor_si256(c2[5908],_mm256_xor_si256(c2[5668],_mm256_xor_si256(c2[7106],_mm256_xor_si256(c2[3747],_mm256_xor_si256(c2[6149],_mm256_xor_si256(c2[2790],_mm256_xor_si256(c2[6175],_mm256_xor_si256(c2[2816],_mm256_xor_si256(c2[4973],_mm256_xor_si256(c2[1614],_mm256_xor_si256(c2[5719],_mm256_xor_si256(c2[2360],_mm256_xor_si256(c2[4037],_mm256_xor_si256(c2[678],_mm256_xor_si256(c2[6434],_mm256_xor_si256(c2[3075],_mm256_xor_si256(c2[2623],_mm256_xor_si256(c2[7183],_mm256_xor_si256(c2[6943],_mm256_xor_si256(c2[458],_mm256_xor_si256(c2[4778],_mm256_xor_si256(c2[5983],c2[2624]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[180]=simde_mm256_xor_si256(c2[2887],simde_mm256_xor_si256(c2[7447],simde_mm256_xor_si256(c2[7207],simde_mm256_xor_si256(c2[6002],simde_mm256_xor_si256(c2[2643],simde_mm256_xor_si256(c2[5522],simde_mm256_xor_si256(c2[2163],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[7468],simde_mm256_xor_si256(c2[4349],simde_mm256_xor_si256(c2[4109],simde_mm256_xor_si256(c2[3869],simde_mm256_xor_si256(c2[510],simde_mm256_xor_si256(c2[1470],simde_mm256_xor_si256(c2[5790],simde_mm256_xor_si256(c2[1015],simde_mm256_xor_si256(c2[5575],simde_mm256_xor_si256(c2[5335],simde_mm256_xor_si256(c2[3175],simde_mm256_xor_si256(c2[7495],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[4636],simde_mm256_xor_si256(c2[4396],simde_mm256_xor_si256(c2[555],simde_mm256_xor_si256(c2[4875],simde_mm256_xor_si256(c2[4637],simde_mm256_xor_si256(c2[1278],simde_mm256_xor_si256(c2[3459],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[2981],simde_mm256_xor_si256(c2[7301],simde_mm256_xor_si256(c2[3701],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[6364],simde_mm256_xor_si256(c2[3005],simde_mm256_xor_si256(c2[1564],simde_mm256_xor_si256(c2[5884],simde_mm256_xor_si256(c2[1348],simde_mm256_xor_si256(c2[5908],simde_mm256_xor_si256(c2[5668],simde_mm256_xor_si256(c2[7106],simde_mm256_xor_si256(c2[3747],simde_mm256_xor_si256(c2[6149],simde_mm256_xor_si256(c2[2790],simde_mm256_xor_si256(c2[6175],simde_mm256_xor_si256(c2[2816],simde_mm256_xor_si256(c2[4973],simde_mm256_xor_si256(c2[1614],simde_mm256_xor_si256(c2[5719],simde_mm256_xor_si256(c2[2360],simde_mm256_xor_si256(c2[4037],simde_mm256_xor_si256(c2[678],simde_mm256_xor_si256(c2[6434],simde_mm256_xor_si256(c2[3075],simde_mm256_xor_si256(c2[2623],simde_mm256_xor_si256(c2[7183],simde_mm256_xor_si256(c2[6943],simde_mm256_xor_si256(c2[458],simde_mm256_xor_si256(c2[4778],simde_mm256_xor_si256(c2[5983],c2[2624]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[192]=_mm256_xor_si256(c2[5049],_mm256_xor_si256(c2[4809],_mm256_xor_si256(c2[3367],_mm256_xor_si256(c2[3127],_mm256_xor_si256(c2[245],_mm256_xor_si256(c2[6482],_mm256_xor_si256(c2[6242],_mm256_xor_si256(c2[7444],_mm256_xor_si256(c2[5762],_mm256_xor_si256(c2[1951],_mm256_xor_si256(c2[1711],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[5791],_mm256_xor_si256(c2[4109],_mm256_xor_si256(c2[3392],_mm256_xor_si256(c2[1710],_mm256_xor_si256(c2[5546],_mm256_xor_si256(c2[3177],_mm256_xor_si256(c2[2937],_mm256_xor_si256(c2[1495],_mm256_xor_si256(c2[1255],_mm256_xor_si256(c2[5097],_mm256_xor_si256(c2[3415],_mm256_xor_si256(c2[2238],_mm256_xor_si256(c2[1998],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[316],_mm256_xor_si256(c2[2477],_mm256_xor_si256(c2[1035],_mm256_xor_si256(c2[795],_mm256_xor_si256(c2[6559],_mm256_xor_si256(c2[4877],_mm256_xor_si256(c2[5381],_mm256_xor_si256(c2[3939],_mm256_xor_si256(c2[3699],_mm256_xor_si256(c2[4903],_mm256_xor_si256(c2[3221],_mm256_xor_si256(c2[5623],_mm256_xor_si256(c2[3941],_mm256_xor_si256(c2[607],_mm256_xor_si256(c2[6844],_mm256_xor_si256(c2[6604],_mm256_xor_si256(c2[3486],_mm256_xor_si256(c2[1804],_mm256_xor_si256(c2[3510],_mm256_xor_si256(c2[3270],_mm256_xor_si256(c2[1828],_mm256_xor_si256(c2[1588],_mm256_xor_si256(c2[1349],_mm256_xor_si256(c2[7586],_mm256_xor_si256(c2[7346],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[418],_mm256_xor_si256(c2[6655],_mm256_xor_si256(c2[6415],_mm256_xor_si256(c2[6895],_mm256_xor_si256(c2[5213],_mm256_xor_si256(c2[7641],_mm256_xor_si256(c2[6199],_mm256_xor_si256(c2[5959],_mm256_xor_si256(c2[5959],_mm256_xor_si256(c2[4277],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[6674],_mm256_xor_si256(c2[4785],_mm256_xor_si256(c2[4545],_mm256_xor_si256(c2[3103],_mm256_xor_si256(c2[2863],_mm256_xor_si256(c2[2380],_mm256_xor_si256(c2[938],_mm256_xor_si256(c2[698],_mm256_xor_si256(c2[226],_mm256_xor_si256(c2[6223],c2[4301])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[192]=simde_mm256_xor_si256(c2[5049],simde_mm256_xor_si256(c2[4809],simde_mm256_xor_si256(c2[3367],simde_mm256_xor_si256(c2[3127],simde_mm256_xor_si256(c2[245],simde_mm256_xor_si256(c2[6482],simde_mm256_xor_si256(c2[6242],simde_mm256_xor_si256(c2[7444],simde_mm256_xor_si256(c2[5762],simde_mm256_xor_si256(c2[1951],simde_mm256_xor_si256(c2[1711],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[5791],simde_mm256_xor_si256(c2[4109],simde_mm256_xor_si256(c2[3392],simde_mm256_xor_si256(c2[1710],simde_mm256_xor_si256(c2[5546],simde_mm256_xor_si256(c2[3177],simde_mm256_xor_si256(c2[2937],simde_mm256_xor_si256(c2[1495],simde_mm256_xor_si256(c2[1255],simde_mm256_xor_si256(c2[5097],simde_mm256_xor_si256(c2[3415],simde_mm256_xor_si256(c2[2238],simde_mm256_xor_si256(c2[1998],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[316],simde_mm256_xor_si256(c2[2477],simde_mm256_xor_si256(c2[1035],simde_mm256_xor_si256(c2[795],simde_mm256_xor_si256(c2[6559],simde_mm256_xor_si256(c2[4877],simde_mm256_xor_si256(c2[5381],simde_mm256_xor_si256(c2[3939],simde_mm256_xor_si256(c2[3699],simde_mm256_xor_si256(c2[4903],simde_mm256_xor_si256(c2[3221],simde_mm256_xor_si256(c2[5623],simde_mm256_xor_si256(c2[3941],simde_mm256_xor_si256(c2[607],simde_mm256_xor_si256(c2[6844],simde_mm256_xor_si256(c2[6604],simde_mm256_xor_si256(c2[3486],simde_mm256_xor_si256(c2[1804],simde_mm256_xor_si256(c2[3510],simde_mm256_xor_si256(c2[3270],simde_mm256_xor_si256(c2[1828],simde_mm256_xor_si256(c2[1588],simde_mm256_xor_si256(c2[1349],simde_mm256_xor_si256(c2[7586],simde_mm256_xor_si256(c2[7346],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[418],simde_mm256_xor_si256(c2[6655],simde_mm256_xor_si256(c2[6415],simde_mm256_xor_si256(c2[6895],simde_mm256_xor_si256(c2[5213],simde_mm256_xor_si256(c2[7641],simde_mm256_xor_si256(c2[6199],simde_mm256_xor_si256(c2[5959],simde_mm256_xor_si256(c2[5959],simde_mm256_xor_si256(c2[4277],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[6674],simde_mm256_xor_si256(c2[4785],simde_mm256_xor_si256(c2[4545],simde_mm256_xor_si256(c2[3103],simde_mm256_xor_si256(c2[2863],simde_mm256_xor_si256(c2[2380],simde_mm256_xor_si256(c2[938],simde_mm256_xor_si256(c2[698],simde_mm256_xor_si256(c2[226],simde_mm256_xor_si256(c2[6223],c2[4301])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[204]=_mm256_xor_si256(c2[5525],_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[4805],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[7200],_mm256_xor_si256(c2[2427],_mm256_xor_si256(c2[2187],_mm256_xor_si256(c2[1707],_mm256_xor_si256(c2[1467],_mm256_xor_si256(c2[6267],_mm256_xor_si256(c2[5547],_mm256_xor_si256(c2[3868],_mm256_xor_si256(c2[3148],_mm256_xor_si256(c2[7228],_mm256_xor_si256(c2[3653],_mm256_xor_si256(c2[3413],_mm256_xor_si256(c2[2933],_mm256_xor_si256(c2[2693],_mm256_xor_si256(c2[5573],_mm256_xor_si256(c2[4853],_mm256_xor_si256(c2[2714],_mm256_xor_si256(c2[2474],_mm256_xor_si256(c2[1994],_mm256_xor_si256(c2[1754],_mm256_xor_si256(c2[2953],_mm256_xor_si256(c2[2473],_mm256_xor_si256(c2[2233],_mm256_xor_si256(c2[7035],_mm256_xor_si256(c2[6315],_mm256_xor_si256(c2[5857],_mm256_xor_si256(c2[5377],_mm256_xor_si256(c2[5137],_mm256_xor_si256(c2[5379],_mm256_xor_si256(c2[4659],_mm256_xor_si256(c2[6099],_mm256_xor_si256(c2[5379],_mm256_xor_si256(c2[1083],_mm256_xor_si256(c2[603],_mm256_xor_si256(c2[363],_mm256_xor_si256(c2[3962],_mm256_xor_si256(c2[3242],_mm256_xor_si256(c2[5640],_mm256_xor_si256(c2[3986],_mm256_xor_si256(c2[3746],_mm256_xor_si256(c2[3266],_mm256_xor_si256(c2[3026],_mm256_xor_si256(c2[1825],_mm256_xor_si256(c2[1345],_mm256_xor_si256(c2[1105],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[894],_mm256_xor_si256(c2[414],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[7371],_mm256_xor_si256(c2[6651],_mm256_xor_si256(c2[438],_mm256_xor_si256(c2[7637],_mm256_xor_si256(c2[7397],_mm256_xor_si256(c2[6435],_mm256_xor_si256(c2[5715],_mm256_xor_si256(c2[1153],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[5261],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[4541],_mm256_xor_si256(c2[4301],_mm256_xor_si256(c2[2856],_mm256_xor_si256(c2[2376],_mm256_xor_si256(c2[2136],_mm256_xor_si256(c2[702],c2[7661])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[204]=simde_mm256_xor_si256(c2[5525],simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[4805],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[7200],simde_mm256_xor_si256(c2[2427],simde_mm256_xor_si256(c2[2187],simde_mm256_xor_si256(c2[1707],simde_mm256_xor_si256(c2[1467],simde_mm256_xor_si256(c2[6267],simde_mm256_xor_si256(c2[5547],simde_mm256_xor_si256(c2[3868],simde_mm256_xor_si256(c2[3148],simde_mm256_xor_si256(c2[7228],simde_mm256_xor_si256(c2[3653],simde_mm256_xor_si256(c2[3413],simde_mm256_xor_si256(c2[2933],simde_mm256_xor_si256(c2[2693],simde_mm256_xor_si256(c2[5573],simde_mm256_xor_si256(c2[4853],simde_mm256_xor_si256(c2[2714],simde_mm256_xor_si256(c2[2474],simde_mm256_xor_si256(c2[1994],simde_mm256_xor_si256(c2[1754],simde_mm256_xor_si256(c2[2953],simde_mm256_xor_si256(c2[2473],simde_mm256_xor_si256(c2[2233],simde_mm256_xor_si256(c2[7035],simde_mm256_xor_si256(c2[6315],simde_mm256_xor_si256(c2[5857],simde_mm256_xor_si256(c2[5377],simde_mm256_xor_si256(c2[5137],simde_mm256_xor_si256(c2[5379],simde_mm256_xor_si256(c2[4659],simde_mm256_xor_si256(c2[6099],simde_mm256_xor_si256(c2[5379],simde_mm256_xor_si256(c2[1083],simde_mm256_xor_si256(c2[603],simde_mm256_xor_si256(c2[363],simde_mm256_xor_si256(c2[3962],simde_mm256_xor_si256(c2[3242],simde_mm256_xor_si256(c2[5640],simde_mm256_xor_si256(c2[3986],simde_mm256_xor_si256(c2[3746],simde_mm256_xor_si256(c2[3266],simde_mm256_xor_si256(c2[3026],simde_mm256_xor_si256(c2[1825],simde_mm256_xor_si256(c2[1345],simde_mm256_xor_si256(c2[1105],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[894],simde_mm256_xor_si256(c2[414],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[7371],simde_mm256_xor_si256(c2[6651],simde_mm256_xor_si256(c2[438],simde_mm256_xor_si256(c2[7637],simde_mm256_xor_si256(c2[7397],simde_mm256_xor_si256(c2[6435],simde_mm256_xor_si256(c2[5715],simde_mm256_xor_si256(c2[1153],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[5261],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[4541],simde_mm256_xor_si256(c2[4301],simde_mm256_xor_si256(c2[2856],simde_mm256_xor_si256(c2[2376],simde_mm256_xor_si256(c2[2136],simde_mm256_xor_si256(c2[702],c2[7661])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[216]=_mm256_xor_si256(c2[6245],_mm256_xor_si256(c2[1584],c2[3529]));
+     d2[216]=simde_mm256_xor_si256(c2[6245],simde_mm256_xor_si256(c2[1584],c2[3529]));
 
 //row: 19
-     d2[228]=_mm256_xor_si256(c2[2170],_mm256_xor_si256(c2[5285],_mm256_xor_si256(c2[4805],_mm256_xor_si256(c2[6241],_mm256_xor_si256(c2[6751],_mm256_xor_si256(c2[3152],_mm256_xor_si256(c2[753],_mm256_xor_si256(c2[2425],_mm256_xor_si256(c2[298],_mm256_xor_si256(c2[2458],_mm256_xor_si256(c2[7038],_mm256_xor_si256(c2[7517],_mm256_xor_si256(c2[3920],_mm256_xor_si256(c2[2742],_mm256_xor_si256(c2[2264],_mm256_xor_si256(c2[2984],_mm256_xor_si256(c2[5647],_mm256_xor_si256(c2[847],_mm256_xor_si256(c2[631],_mm256_xor_si256(c2[6389],_mm256_xor_si256(c2[5432],_mm256_xor_si256(c2[5458],_mm256_xor_si256(c2[4256],_mm256_xor_si256(c2[5002],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[5717],_mm256_xor_si256(c2[1906],_mm256_xor_si256(c2[7420],c2[5266]))))))))))))))))))))))))))));
+     d2[228]=simde_mm256_xor_si256(c2[2170],simde_mm256_xor_si256(c2[5285],simde_mm256_xor_si256(c2[4805],simde_mm256_xor_si256(c2[6241],simde_mm256_xor_si256(c2[6751],simde_mm256_xor_si256(c2[3152],simde_mm256_xor_si256(c2[753],simde_mm256_xor_si256(c2[2425],simde_mm256_xor_si256(c2[298],simde_mm256_xor_si256(c2[2458],simde_mm256_xor_si256(c2[7038],simde_mm256_xor_si256(c2[7517],simde_mm256_xor_si256(c2[3920],simde_mm256_xor_si256(c2[2742],simde_mm256_xor_si256(c2[2264],simde_mm256_xor_si256(c2[2984],simde_mm256_xor_si256(c2[5647],simde_mm256_xor_si256(c2[847],simde_mm256_xor_si256(c2[631],simde_mm256_xor_si256(c2[6389],simde_mm256_xor_si256(c2[5432],simde_mm256_xor_si256(c2[5458],simde_mm256_xor_si256(c2[4256],simde_mm256_xor_si256(c2[5002],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[5717],simde_mm256_xor_si256(c2[1906],simde_mm256_xor_si256(c2[7420],c2[5266]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[240]=_mm256_xor_si256(c2[1690],_mm256_xor_si256(c2[1450],_mm256_xor_si256(c2[4565],_mm256_xor_si256(c2[4085],_mm256_xor_si256(c2[6271],_mm256_xor_si256(c2[6031],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[2906],_mm256_xor_si256(c2[7497],_mm256_xor_si256(c2[7257],_mm256_xor_si256(c2[1738],_mm256_xor_si256(c2[6558],_mm256_xor_si256(c2[6318],_mm256_xor_si256(c2[6797],_mm256_xor_si256(c2[3200],_mm256_xor_si256(c2[2022],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[2264],_mm256_xor_si256(c2[7057],_mm256_xor_si256(c2[4927],_mm256_xor_si256(c2[127],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[7590],_mm256_xor_si256(c2[5669],_mm256_xor_si256(c2[4712],_mm256_xor_si256(c2[4738],_mm256_xor_si256(c2[3536],_mm256_xor_si256(c2[4282],_mm256_xor_si256(c2[2600],_mm256_xor_si256(c2[4997],_mm256_xor_si256(c2[1426],_mm256_xor_si256(c2[1186],_mm256_xor_si256(c2[6700],c2[4546]))))))))))))))))))))))))))))))))));
+     d2[240]=simde_mm256_xor_si256(c2[1690],simde_mm256_xor_si256(c2[1450],simde_mm256_xor_si256(c2[4565],simde_mm256_xor_si256(c2[4085],simde_mm256_xor_si256(c2[6271],simde_mm256_xor_si256(c2[6031],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[2906],simde_mm256_xor_si256(c2[7497],simde_mm256_xor_si256(c2[7257],simde_mm256_xor_si256(c2[1738],simde_mm256_xor_si256(c2[6558],simde_mm256_xor_si256(c2[6318],simde_mm256_xor_si256(c2[6797],simde_mm256_xor_si256(c2[3200],simde_mm256_xor_si256(c2[2022],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[2264],simde_mm256_xor_si256(c2[7057],simde_mm256_xor_si256(c2[4927],simde_mm256_xor_si256(c2[127],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[7590],simde_mm256_xor_si256(c2[5669],simde_mm256_xor_si256(c2[4712],simde_mm256_xor_si256(c2[4738],simde_mm256_xor_si256(c2[3536],simde_mm256_xor_si256(c2[4282],simde_mm256_xor_si256(c2[2600],simde_mm256_xor_si256(c2[4997],simde_mm256_xor_si256(c2[1426],simde_mm256_xor_si256(c2[1186],simde_mm256_xor_si256(c2[6700],c2[4546]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[252]=_mm256_xor_si256(c2[3847],_mm256_xor_si256(c2[6962],_mm256_xor_si256(c2[6482],_mm256_xor_si256(c2[6964],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[4829],_mm256_xor_si256(c2[2670],_mm256_xor_si256(c2[2430],_mm256_xor_si256(c2[1975],_mm256_xor_si256(c2[4375],_mm256_xor_si256(c2[4135],_mm256_xor_si256(c2[1036],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[5597],_mm256_xor_si256(c2[4419],_mm256_xor_si256(c2[3941],_mm256_xor_si256(c2[4901],_mm256_xor_si256(c2[4661],_mm256_xor_si256(c2[7324],_mm256_xor_si256(c2[2764],_mm256_xor_si256(c2[2524],_mm256_xor_si256(c2[2308],_mm256_xor_si256(c2[387],_mm256_xor_si256(c2[7349],_mm256_xor_si256(c2[7109],_mm256_xor_si256(c2[7135],_mm256_xor_si256(c2[6173],_mm256_xor_si256(c2[5933],_mm256_xor_si256(c2[6679],_mm256_xor_si256(c2[4997],_mm256_xor_si256(c2[7634],_mm256_xor_si256(c2[7394],_mm256_xor_si256(c2[3797],_mm256_xor_si256(c2[3583],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[7183],c2[6943]))))))))))))))))))))))))))))))))))));
+     d2[252]=simde_mm256_xor_si256(c2[3847],simde_mm256_xor_si256(c2[6962],simde_mm256_xor_si256(c2[6482],simde_mm256_xor_si256(c2[6964],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[4829],simde_mm256_xor_si256(c2[2670],simde_mm256_xor_si256(c2[2430],simde_mm256_xor_si256(c2[1975],simde_mm256_xor_si256(c2[4375],simde_mm256_xor_si256(c2[4135],simde_mm256_xor_si256(c2[1036],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[5597],simde_mm256_xor_si256(c2[4419],simde_mm256_xor_si256(c2[3941],simde_mm256_xor_si256(c2[4901],simde_mm256_xor_si256(c2[4661],simde_mm256_xor_si256(c2[7324],simde_mm256_xor_si256(c2[2764],simde_mm256_xor_si256(c2[2524],simde_mm256_xor_si256(c2[2308],simde_mm256_xor_si256(c2[387],simde_mm256_xor_si256(c2[7349],simde_mm256_xor_si256(c2[7109],simde_mm256_xor_si256(c2[7135],simde_mm256_xor_si256(c2[6173],simde_mm256_xor_si256(c2[5933],simde_mm256_xor_si256(c2[6679],simde_mm256_xor_si256(c2[4997],simde_mm256_xor_si256(c2[7634],simde_mm256_xor_si256(c2[7394],simde_mm256_xor_si256(c2[3797],simde_mm256_xor_si256(c2[3583],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[7183],c2[6943]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[264]=_mm256_xor_si256(c2[4824],c2[4849]);
+     d2[264]=simde_mm256_xor_si256(c2[4824],c2[4849]);
 
 //row: 23
-     d2[276]=_mm256_xor_si256(c2[2403],_mm256_xor_si256(c2[5354],c2[7562]));
+     d2[276]=simde_mm256_xor_si256(c2[2403],simde_mm256_xor_si256(c2[5354],c2[7562]));
 
 //row: 24
-     d2[288]=_mm256_xor_si256(c2[5309],_mm256_xor_si256(c2[6052],c2[218]));
+     d2[288]=simde_mm256_xor_si256(c2[5309],simde_mm256_xor_si256(c2[6052],c2[218]));
 
 //row: 25
-     d2[300]=_mm256_xor_si256(c2[3121],c2[5160]);
+     d2[300]=simde_mm256_xor_si256(c2[3121],c2[5160]);
 
 //row: 26
-     d2[312]=_mm256_xor_si256(c2[1207],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[488],_mm256_xor_si256(c2[4322],_mm256_xor_si256(c2[4082],_mm256_xor_si256(c2[3603],_mm256_xor_si256(c2[3602],_mm256_xor_si256(c2[3123],_mm256_xor_si256(c2[5788],_mm256_xor_si256(c2[5548],_mm256_xor_si256(c2[5069],_mm256_xor_si256(c2[1949],_mm256_xor_si256(c2[1470],_mm256_xor_si256(c2[7229],_mm256_xor_si256(c2[6990],_mm256_xor_si256(c2[6750],_mm256_xor_si256(c2[7014],_mm256_xor_si256(c2[6774],_mm256_xor_si256(c2[6295],_mm256_xor_si256(c2[1255],_mm256_xor_si256(c2[1016],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[770],_mm256_xor_si256(c2[6075],_mm256_xor_si256(c2[5835],_mm256_xor_si256(c2[5356],_mm256_xor_si256(c2[6554],_mm256_xor_si256(c2[6314],_mm256_xor_si256(c2[5835],_mm256_xor_si256(c2[2717],_mm256_xor_si256(c2[2238],_mm256_xor_si256(c2[1779],_mm256_xor_si256(c2[1539],_mm256_xor_si256(c2[1060],_mm256_xor_si256(c2[1061],_mm256_xor_si256(c2[582],_mm256_xor_si256(c2[1781],_mm256_xor_si256(c2[1542],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[4684],_mm256_xor_si256(c2[4444],_mm256_xor_si256(c2[3965],_mm256_xor_si256(c2[7323],_mm256_xor_si256(c2[7084],_mm256_xor_si256(c2[6844],_mm256_xor_si256(c2[7347],_mm256_xor_si256(c2[7107],_mm256_xor_si256(c2[6628],_mm256_xor_si256(c2[5426],_mm256_xor_si256(c2[5186],_mm256_xor_si256(c2[4707],_mm256_xor_si256(c2[4229],_mm256_xor_si256(c2[3990],_mm256_xor_si256(c2[3750],_mm256_xor_si256(c2[4495],_mm256_xor_si256(c2[4255],_mm256_xor_si256(c2[3776],_mm256_xor_si256(c2[3053],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[2574],_mm256_xor_si256(c2[2332],_mm256_xor_si256(c2[4039],_mm256_xor_si256(c2[3799],_mm256_xor_si256(c2[3320],_mm256_xor_si256(c2[2117],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[4514],_mm256_xor_si256(c2[4275],_mm256_xor_si256(c2[4035],_mm256_xor_si256(c2[943],_mm256_xor_si256(c2[703],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[6457],_mm256_xor_si256(c2[6217],_mm256_xor_si256(c2[5738],_mm256_xor_si256(c2[4063],_mm256_xor_si256(c2[3824],c2[3584])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[312]=simde_mm256_xor_si256(c2[1207],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[488],simde_mm256_xor_si256(c2[4322],simde_mm256_xor_si256(c2[4082],simde_mm256_xor_si256(c2[3603],simde_mm256_xor_si256(c2[3602],simde_mm256_xor_si256(c2[3123],simde_mm256_xor_si256(c2[5788],simde_mm256_xor_si256(c2[5548],simde_mm256_xor_si256(c2[5069],simde_mm256_xor_si256(c2[1949],simde_mm256_xor_si256(c2[1470],simde_mm256_xor_si256(c2[7229],simde_mm256_xor_si256(c2[6990],simde_mm256_xor_si256(c2[6750],simde_mm256_xor_si256(c2[7014],simde_mm256_xor_si256(c2[6774],simde_mm256_xor_si256(c2[6295],simde_mm256_xor_si256(c2[1255],simde_mm256_xor_si256(c2[1016],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[770],simde_mm256_xor_si256(c2[6075],simde_mm256_xor_si256(c2[5835],simde_mm256_xor_si256(c2[5356],simde_mm256_xor_si256(c2[6554],simde_mm256_xor_si256(c2[6314],simde_mm256_xor_si256(c2[5835],simde_mm256_xor_si256(c2[2717],simde_mm256_xor_si256(c2[2238],simde_mm256_xor_si256(c2[1779],simde_mm256_xor_si256(c2[1539],simde_mm256_xor_si256(c2[1060],simde_mm256_xor_si256(c2[1061],simde_mm256_xor_si256(c2[582],simde_mm256_xor_si256(c2[1781],simde_mm256_xor_si256(c2[1542],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[4684],simde_mm256_xor_si256(c2[4444],simde_mm256_xor_si256(c2[3965],simde_mm256_xor_si256(c2[7323],simde_mm256_xor_si256(c2[7084],simde_mm256_xor_si256(c2[6844],simde_mm256_xor_si256(c2[7347],simde_mm256_xor_si256(c2[7107],simde_mm256_xor_si256(c2[6628],simde_mm256_xor_si256(c2[5426],simde_mm256_xor_si256(c2[5186],simde_mm256_xor_si256(c2[4707],simde_mm256_xor_si256(c2[4229],simde_mm256_xor_si256(c2[3990],simde_mm256_xor_si256(c2[3750],simde_mm256_xor_si256(c2[4495],simde_mm256_xor_si256(c2[4255],simde_mm256_xor_si256(c2[3776],simde_mm256_xor_si256(c2[3053],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[2574],simde_mm256_xor_si256(c2[2332],simde_mm256_xor_si256(c2[4039],simde_mm256_xor_si256(c2[3799],simde_mm256_xor_si256(c2[3320],simde_mm256_xor_si256(c2[2117],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[4514],simde_mm256_xor_si256(c2[4275],simde_mm256_xor_si256(c2[4035],simde_mm256_xor_si256(c2[943],simde_mm256_xor_si256(c2[703],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[6457],simde_mm256_xor_si256(c2[6217],simde_mm256_xor_si256(c2[5738],simde_mm256_xor_si256(c2[4063],simde_mm256_xor_si256(c2[3824],c2[3584])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[324]=_mm256_xor_si256(c2[1683],c2[4465]);
+     d2[324]=simde_mm256_xor_si256(c2[1683],c2[4465]);
 
 //row: 28
-     d2[336]=_mm256_xor_si256(c2[1466],_mm256_xor_si256(c2[3651],c2[2045]));
+     d2[336]=simde_mm256_xor_si256(c2[1466],simde_mm256_xor_si256(c2[3651],c2[2045]));
 
 //row: 29
-     d2[348]=_mm256_xor_si256(c2[3363],c2[4176]);
+     d2[348]=simde_mm256_xor_si256(c2[3363],c2[4176]);
 
 //row: 30
-     d2[360]=_mm256_xor_si256(c2[5811],_mm256_xor_si256(c2[6364],_mm256_xor_si256(c2[4969],c2[5977])));
+     d2[360]=simde_mm256_xor_si256(c2[5811],simde_mm256_xor_si256(c2[6364],simde_mm256_xor_si256(c2[4969],c2[5977])));
 
 //row: 31
-     d2[372]=_mm256_xor_si256(c2[5530],_mm256_xor_si256(c2[966],_mm256_xor_si256(c2[486],_mm256_xor_si256(c2[2432],_mm256_xor_si256(c2[6512],_mm256_xor_si256(c2[4353],_mm256_xor_si256(c2[4113],_mm256_xor_si256(c2[744],_mm256_xor_si256(c2[3658],_mm256_xor_si256(c2[6058],_mm256_xor_si256(c2[5818],_mm256_xor_si256(c2[2719],_mm256_xor_si256(c2[3198],_mm256_xor_si256(c2[7280],_mm256_xor_si256(c2[6102],_mm256_xor_si256(c2[5624],_mm256_xor_si256(c2[6584],_mm256_xor_si256(c2[6344],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[4447],_mm256_xor_si256(c2[4207],_mm256_xor_si256(c2[3991],_mm256_xor_si256(c2[2070],_mm256_xor_si256(c2[1353],_mm256_xor_si256(c2[1113],_mm256_xor_si256(c2[1139],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[7616],_mm256_xor_si256(c2[683],_mm256_xor_si256(c2[6680],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[1398],_mm256_xor_si256(c2[5266],_mm256_xor_si256(c2[3101],_mm256_xor_si256(c2[1187],c2[947])))))))))))))))))))))))))))))))))));
+     d2[372]=simde_mm256_xor_si256(c2[5530],simde_mm256_xor_si256(c2[966],simde_mm256_xor_si256(c2[486],simde_mm256_xor_si256(c2[2432],simde_mm256_xor_si256(c2[6512],simde_mm256_xor_si256(c2[4353],simde_mm256_xor_si256(c2[4113],simde_mm256_xor_si256(c2[744],simde_mm256_xor_si256(c2[3658],simde_mm256_xor_si256(c2[6058],simde_mm256_xor_si256(c2[5818],simde_mm256_xor_si256(c2[2719],simde_mm256_xor_si256(c2[3198],simde_mm256_xor_si256(c2[7280],simde_mm256_xor_si256(c2[6102],simde_mm256_xor_si256(c2[5624],simde_mm256_xor_si256(c2[6584],simde_mm256_xor_si256(c2[6344],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[4447],simde_mm256_xor_si256(c2[4207],simde_mm256_xor_si256(c2[3991],simde_mm256_xor_si256(c2[2070],simde_mm256_xor_si256(c2[1353],simde_mm256_xor_si256(c2[1113],simde_mm256_xor_si256(c2[1139],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[7616],simde_mm256_xor_si256(c2[683],simde_mm256_xor_si256(c2[6680],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[1398],simde_mm256_xor_si256(c2[5266],simde_mm256_xor_si256(c2[3101],simde_mm256_xor_si256(c2[1187],c2[947])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[384]=_mm256_xor_si256(c2[7445],_mm256_xor_si256(c2[7205],_mm256_xor_si256(c2[2881],_mm256_xor_si256(c2[2641],_mm256_xor_si256(c2[2161],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[4347],_mm256_xor_si256(c2[4107],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[5788],_mm256_xor_si256(c2[5573],_mm256_xor_si256(c2[5333],_mm256_xor_si256(c2[7493],_mm256_xor_si256(c2[4634],_mm256_xor_si256(c2[4394],_mm256_xor_si256(c2[5113],_mm256_xor_si256(c2[4873],_mm256_xor_si256(c2[1276],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[7299],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[3243],_mm256_xor_si256(c2[3003],_mm256_xor_si256(c2[5882],_mm256_xor_si256(c2[2040],_mm256_xor_si256(c2[5906],_mm256_xor_si256(c2[5666],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[3745],_mm256_xor_si256(c2[2788],_mm256_xor_si256(c2[3054],_mm256_xor_si256(c2[2814],_mm256_xor_si256(c2[1612],_mm256_xor_si256(c2[2598],_mm256_xor_si256(c2[2358],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[3073],_mm256_xor_si256(c2[7181],_mm256_xor_si256(c2[6941],_mm256_xor_si256(c2[5016],_mm256_xor_si256(c2[4776],c2[2622]))))))))))))))))))))))))))))))))))))))))));
+     d2[384]=simde_mm256_xor_si256(c2[7445],simde_mm256_xor_si256(c2[7205],simde_mm256_xor_si256(c2[2881],simde_mm256_xor_si256(c2[2641],simde_mm256_xor_si256(c2[2161],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[4347],simde_mm256_xor_si256(c2[4107],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[5788],simde_mm256_xor_si256(c2[5573],simde_mm256_xor_si256(c2[5333],simde_mm256_xor_si256(c2[7493],simde_mm256_xor_si256(c2[4634],simde_mm256_xor_si256(c2[4394],simde_mm256_xor_si256(c2[5113],simde_mm256_xor_si256(c2[4873],simde_mm256_xor_si256(c2[1276],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[7299],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[3243],simde_mm256_xor_si256(c2[3003],simde_mm256_xor_si256(c2[5882],simde_mm256_xor_si256(c2[2040],simde_mm256_xor_si256(c2[5906],simde_mm256_xor_si256(c2[5666],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[3745],simde_mm256_xor_si256(c2[2788],simde_mm256_xor_si256(c2[3054],simde_mm256_xor_si256(c2[2814],simde_mm256_xor_si256(c2[1612],simde_mm256_xor_si256(c2[2598],simde_mm256_xor_si256(c2[2358],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[3073],simde_mm256_xor_si256(c2[7181],simde_mm256_xor_si256(c2[6941],simde_mm256_xor_si256(c2[5016],simde_mm256_xor_si256(c2[4776],c2[2622]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[396]=_mm256_xor_si256(c2[2169],_mm256_xor_si256(c2[5284],_mm256_xor_si256(c2[4804],_mm256_xor_si256(c2[6750],_mm256_xor_si256(c2[3151],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[297],_mm256_xor_si256(c2[2457],_mm256_xor_si256(c2[1253],_mm256_xor_si256(c2[7037],_mm256_xor_si256(c2[7516],_mm256_xor_si256(c2[3919],_mm256_xor_si256(c2[2741],_mm256_xor_si256(c2[2263],_mm256_xor_si256(c2[2983],_mm256_xor_si256(c2[5646],_mm256_xor_si256(c2[846],_mm256_xor_si256(c2[630],_mm256_xor_si256(c2[6388],_mm256_xor_si256(c2[5431],_mm256_xor_si256(c2[5457],_mm256_xor_si256(c2[4255],_mm256_xor_si256(c2[4733],_mm256_xor_si256(c2[5001],_mm256_xor_si256(c2[3319],_mm256_xor_si256(c2[5716],_mm256_xor_si256(c2[1905],_mm256_xor_si256(c2[7419],c2[5265]))))))))))))))))))))))))))));
+     d2[396]=simde_mm256_xor_si256(c2[2169],simde_mm256_xor_si256(c2[5284],simde_mm256_xor_si256(c2[4804],simde_mm256_xor_si256(c2[6750],simde_mm256_xor_si256(c2[3151],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[297],simde_mm256_xor_si256(c2[2457],simde_mm256_xor_si256(c2[1253],simde_mm256_xor_si256(c2[7037],simde_mm256_xor_si256(c2[7516],simde_mm256_xor_si256(c2[3919],simde_mm256_xor_si256(c2[2741],simde_mm256_xor_si256(c2[2263],simde_mm256_xor_si256(c2[2983],simde_mm256_xor_si256(c2[5646],simde_mm256_xor_si256(c2[846],simde_mm256_xor_si256(c2[630],simde_mm256_xor_si256(c2[6388],simde_mm256_xor_si256(c2[5431],simde_mm256_xor_si256(c2[5457],simde_mm256_xor_si256(c2[4255],simde_mm256_xor_si256(c2[4733],simde_mm256_xor_si256(c2[5001],simde_mm256_xor_si256(c2[3319],simde_mm256_xor_si256(c2[5716],simde_mm256_xor_si256(c2[1905],simde_mm256_xor_si256(c2[7419],c2[5265]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[408]=_mm256_xor_si256(c2[7450],_mm256_xor_si256(c2[7210],_mm256_xor_si256(c2[6005],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[2646],_mm256_xor_si256(c2[1441],_mm256_xor_si256(c2[2166],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[3125],_mm256_xor_si256(c2[4352],_mm256_xor_si256(c2[4112],_mm256_xor_si256(c2[2907],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[6987],_mm256_xor_si256(c2[5793],_mm256_xor_si256(c2[4828],_mm256_xor_si256(c2[4588],_mm256_xor_si256(c2[5578],_mm256_xor_si256(c2[5338],_mm256_xor_si256(c2[4133],_mm256_xor_si256(c2[7498],_mm256_xor_si256(c2[6533],_mm256_xor_si256(c2[6293],_mm256_xor_si256(c2[4639],_mm256_xor_si256(c2[4399],_mm256_xor_si256(c2[3194],_mm256_xor_si256(c2[5118],_mm256_xor_si256(c2[4878],_mm256_xor_si256(c2[3673],_mm256_xor_si256(c2[1281],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[343],_mm256_xor_si256(c2[103],_mm256_xor_si256(c2[6577],_mm256_xor_si256(c2[7304],_mm256_xor_si256(c2[6099],_mm256_xor_si256(c2[345],_mm256_xor_si256(c2[7059],_mm256_xor_si256(c2[6819],_mm256_xor_si256(c2[3248],_mm256_xor_si256(c2[3008],_mm256_xor_si256(c2[1803],_mm256_xor_si256(c2[5887],_mm256_xor_si256(c2[4922],_mm256_xor_si256(c2[4682],_mm256_xor_si256(c2[5911],_mm256_xor_si256(c2[5671],_mm256_xor_si256(c2[4466],_mm256_xor_si256(c2[3990],_mm256_xor_si256(c2[3750],_mm256_xor_si256(c2[2545],_mm256_xor_si256(c2[2793],_mm256_xor_si256(c2[1828],_mm256_xor_si256(c2[1588],_mm256_xor_si256(c2[3059],_mm256_xor_si256(c2[2819],_mm256_xor_si256(c2[1614],_mm256_xor_si256(c2[1617],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[412],_mm256_xor_si256(c2[2603],_mm256_xor_si256(c2[2363],_mm256_xor_si256(c2[1158],_mm256_xor_si256(c2[681],_mm256_xor_si256(c2[7155],_mm256_xor_si256(c2[3078],_mm256_xor_si256(c2[2113],_mm256_xor_si256(c2[1873],_mm256_xor_si256(c2[7186],_mm256_xor_si256(c2[6946],_mm256_xor_si256(c2[5741],_mm256_xor_si256(c2[5021],_mm256_xor_si256(c2[4781],_mm256_xor_si256(c2[3576],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[1662],c2[1422]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[408]=simde_mm256_xor_si256(c2[7450],simde_mm256_xor_si256(c2[7210],simde_mm256_xor_si256(c2[6005],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[2646],simde_mm256_xor_si256(c2[1441],simde_mm256_xor_si256(c2[2166],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[3125],simde_mm256_xor_si256(c2[4352],simde_mm256_xor_si256(c2[4112],simde_mm256_xor_si256(c2[2907],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[6987],simde_mm256_xor_si256(c2[5793],simde_mm256_xor_si256(c2[4828],simde_mm256_xor_si256(c2[4588],simde_mm256_xor_si256(c2[5578],simde_mm256_xor_si256(c2[5338],simde_mm256_xor_si256(c2[4133],simde_mm256_xor_si256(c2[7498],simde_mm256_xor_si256(c2[6533],simde_mm256_xor_si256(c2[6293],simde_mm256_xor_si256(c2[4639],simde_mm256_xor_si256(c2[4399],simde_mm256_xor_si256(c2[3194],simde_mm256_xor_si256(c2[5118],simde_mm256_xor_si256(c2[4878],simde_mm256_xor_si256(c2[3673],simde_mm256_xor_si256(c2[1281],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[343],simde_mm256_xor_si256(c2[103],simde_mm256_xor_si256(c2[6577],simde_mm256_xor_si256(c2[7304],simde_mm256_xor_si256(c2[6099],simde_mm256_xor_si256(c2[345],simde_mm256_xor_si256(c2[7059],simde_mm256_xor_si256(c2[6819],simde_mm256_xor_si256(c2[3248],simde_mm256_xor_si256(c2[3008],simde_mm256_xor_si256(c2[1803],simde_mm256_xor_si256(c2[5887],simde_mm256_xor_si256(c2[4922],simde_mm256_xor_si256(c2[4682],simde_mm256_xor_si256(c2[5911],simde_mm256_xor_si256(c2[5671],simde_mm256_xor_si256(c2[4466],simde_mm256_xor_si256(c2[3990],simde_mm256_xor_si256(c2[3750],simde_mm256_xor_si256(c2[2545],simde_mm256_xor_si256(c2[2793],simde_mm256_xor_si256(c2[1828],simde_mm256_xor_si256(c2[1588],simde_mm256_xor_si256(c2[3059],simde_mm256_xor_si256(c2[2819],simde_mm256_xor_si256(c2[1614],simde_mm256_xor_si256(c2[1617],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[412],simde_mm256_xor_si256(c2[2603],simde_mm256_xor_si256(c2[2363],simde_mm256_xor_si256(c2[1158],simde_mm256_xor_si256(c2[681],simde_mm256_xor_si256(c2[7155],simde_mm256_xor_si256(c2[3078],simde_mm256_xor_si256(c2[2113],simde_mm256_xor_si256(c2[1873],simde_mm256_xor_si256(c2[7186],simde_mm256_xor_si256(c2[6946],simde_mm256_xor_si256(c2[5741],simde_mm256_xor_si256(c2[5021],simde_mm256_xor_si256(c2[4781],simde_mm256_xor_si256(c2[3576],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[1662],c2[1422]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[420]=_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[7445],_mm256_xor_si256(c2[2881],_mm256_xor_si256(c2[2401],_mm256_xor_si256(c2[4587],_mm256_xor_si256(c2[4347],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[6028],_mm256_xor_si256(c2[3146],_mm256_xor_si256(c2[5813],_mm256_xor_si256(c2[5573],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[4874],_mm256_xor_si256(c2[4634],_mm256_xor_si256(c2[5113],_mm256_xor_si256(c2[1516],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[7539],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[3243],_mm256_xor_si256(c2[6122],_mm256_xor_si256(c2[5885],_mm256_xor_si256(c2[6146],_mm256_xor_si256(c2[5906],_mm256_xor_si256(c2[3985],_mm256_xor_si256(c2[3028],_mm256_xor_si256(c2[3054],_mm256_xor_si256(c2[1852],_mm256_xor_si256(c2[2598],_mm256_xor_si256(c2[916],_mm256_xor_si256(c2[3313],_mm256_xor_si256(c2[7421],_mm256_xor_si256(c2[7181],_mm256_xor_si256(c2[5016],c2[2862]))))))))))))))))))))))))))))))))));
+     d2[420]=simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[7445],simde_mm256_xor_si256(c2[2881],simde_mm256_xor_si256(c2[2401],simde_mm256_xor_si256(c2[4587],simde_mm256_xor_si256(c2[4347],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[6028],simde_mm256_xor_si256(c2[3146],simde_mm256_xor_si256(c2[5813],simde_mm256_xor_si256(c2[5573],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[4874],simde_mm256_xor_si256(c2[4634],simde_mm256_xor_si256(c2[5113],simde_mm256_xor_si256(c2[1516],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[7539],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[3243],simde_mm256_xor_si256(c2[6122],simde_mm256_xor_si256(c2[5885],simde_mm256_xor_si256(c2[6146],simde_mm256_xor_si256(c2[5906],simde_mm256_xor_si256(c2[3985],simde_mm256_xor_si256(c2[3028],simde_mm256_xor_si256(c2[3054],simde_mm256_xor_si256(c2[1852],simde_mm256_xor_si256(c2[2598],simde_mm256_xor_si256(c2[916],simde_mm256_xor_si256(c2[3313],simde_mm256_xor_si256(c2[7421],simde_mm256_xor_si256(c2[7181],simde_mm256_xor_si256(c2[5016],c2[2862]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[432]=_mm256_xor_si256(c2[6000],_mm256_xor_si256(c2[5572],c2[2573]));
+     d2[432]=simde_mm256_xor_si256(c2[6000],simde_mm256_xor_si256(c2[5572],c2[2573]));
 
 //row: 37
-     d2[444]=_mm256_xor_si256(c2[4326],_mm256_xor_si256(c2[2886],_mm256_xor_si256(c2[7441],_mm256_xor_si256(c2[6001],_mm256_xor_si256(c2[6961],_mm256_xor_si256(c2[5521],_mm256_xor_si256(c2[1228],_mm256_xor_si256(c2[7467],_mm256_xor_si256(c2[5308],_mm256_xor_si256(c2[3868],_mm256_xor_si256(c2[2909],_mm256_xor_si256(c2[1709],_mm256_xor_si256(c2[1469],_mm256_xor_si256(c2[2454],_mm256_xor_si256(c2[1014],_mm256_xor_si256(c2[4614],_mm256_xor_si256(c2[3414],_mm256_xor_si256(c2[3174],_mm256_xor_si256(c2[1515],_mm256_xor_si256(c2[75],_mm256_xor_si256(c2[1994],_mm256_xor_si256(c2[554],_mm256_xor_si256(c2[6076],_mm256_xor_si256(c2[4636],_mm256_xor_si256(c2[4898],_mm256_xor_si256(c2[3458],_mm256_xor_si256(c2[4420],_mm256_xor_si256(c2[2980],_mm256_xor_si256(c2[5140],_mm256_xor_si256(c2[3940],_mm256_xor_si256(c2[3700],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[6363],_mm256_xor_si256(c2[3003],_mm256_xor_si256(c2[1803],_mm256_xor_si256(c2[1563],_mm256_xor_si256(c2[2787],_mm256_xor_si256(c2[1347],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[7105],_mm256_xor_si256(c2[7588],_mm256_xor_si256(c2[6388],_mm256_xor_si256(c2[6148],_mm256_xor_si256(c2[7614],_mm256_xor_si256(c2[6174],_mm256_xor_si256(c2[6412],_mm256_xor_si256(c2[5212],_mm256_xor_si256(c2[4972],_mm256_xor_si256(c2[7158],_mm256_xor_si256(c2[5718],_mm256_xor_si256(c2[5476],_mm256_xor_si256(c2[4036],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[6673],_mm256_xor_si256(c2[6433],_mm256_xor_si256(c2[4062],_mm256_xor_si256(c2[2622],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[7422],_mm256_xor_si256(c2[6222],c2[5982])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[444]=simde_mm256_xor_si256(c2[4326],simde_mm256_xor_si256(c2[2886],simde_mm256_xor_si256(c2[7441],simde_mm256_xor_si256(c2[6001],simde_mm256_xor_si256(c2[6961],simde_mm256_xor_si256(c2[5521],simde_mm256_xor_si256(c2[1228],simde_mm256_xor_si256(c2[7467],simde_mm256_xor_si256(c2[5308],simde_mm256_xor_si256(c2[3868],simde_mm256_xor_si256(c2[2909],simde_mm256_xor_si256(c2[1709],simde_mm256_xor_si256(c2[1469],simde_mm256_xor_si256(c2[2454],simde_mm256_xor_si256(c2[1014],simde_mm256_xor_si256(c2[4614],simde_mm256_xor_si256(c2[3414],simde_mm256_xor_si256(c2[3174],simde_mm256_xor_si256(c2[1515],simde_mm256_xor_si256(c2[75],simde_mm256_xor_si256(c2[1994],simde_mm256_xor_si256(c2[554],simde_mm256_xor_si256(c2[6076],simde_mm256_xor_si256(c2[4636],simde_mm256_xor_si256(c2[4898],simde_mm256_xor_si256(c2[3458],simde_mm256_xor_si256(c2[4420],simde_mm256_xor_si256(c2[2980],simde_mm256_xor_si256(c2[5140],simde_mm256_xor_si256(c2[3940],simde_mm256_xor_si256(c2[3700],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[6363],simde_mm256_xor_si256(c2[3003],simde_mm256_xor_si256(c2[1803],simde_mm256_xor_si256(c2[1563],simde_mm256_xor_si256(c2[2787],simde_mm256_xor_si256(c2[1347],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[7105],simde_mm256_xor_si256(c2[7588],simde_mm256_xor_si256(c2[6388],simde_mm256_xor_si256(c2[6148],simde_mm256_xor_si256(c2[7614],simde_mm256_xor_si256(c2[6174],simde_mm256_xor_si256(c2[6412],simde_mm256_xor_si256(c2[5212],simde_mm256_xor_si256(c2[4972],simde_mm256_xor_si256(c2[7158],simde_mm256_xor_si256(c2[5718],simde_mm256_xor_si256(c2[5476],simde_mm256_xor_si256(c2[4036],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[6673],simde_mm256_xor_si256(c2[6433],simde_mm256_xor_si256(c2[4062],simde_mm256_xor_si256(c2[2622],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[7422],simde_mm256_xor_si256(c2[6222],c2[5982])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[456]=_mm256_xor_si256(c2[2891],_mm256_xor_si256(c2[2651],_mm256_xor_si256(c2[5766],_mm256_xor_si256(c2[5286],_mm256_xor_si256(c2[7472],_mm256_xor_si256(c2[7232],_mm256_xor_si256(c2[3633],_mm256_xor_si256(c2[1234],_mm256_xor_si256(c2[4826],_mm256_xor_si256(c2[1019],_mm256_xor_si256(c2[779],_mm256_xor_si256(c2[2939],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[7519],_mm256_xor_si256(c2[319],_mm256_xor_si256(c2[4401],_mm256_xor_si256(c2[3223],_mm256_xor_si256(c2[2745],_mm256_xor_si256(c2[3465],_mm256_xor_si256(c2[6128],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[5644],_mm256_xor_si256(c2[1352],_mm256_xor_si256(c2[1112],_mm256_xor_si256(c2[6870],_mm256_xor_si256(c2[5913],_mm256_xor_si256(c2[5939],_mm256_xor_si256(c2[4737],_mm256_xor_si256(c2[5483],_mm256_xor_si256(c2[3801],_mm256_xor_si256(c2[6198],_mm256_xor_si256(c2[2627],_mm256_xor_si256(c2[2387],_mm256_xor_si256(c2[222],c2[5747]))))))))))))))))))))))))))))))))));
+     d2[456]=simde_mm256_xor_si256(c2[2891],simde_mm256_xor_si256(c2[2651],simde_mm256_xor_si256(c2[5766],simde_mm256_xor_si256(c2[5286],simde_mm256_xor_si256(c2[7472],simde_mm256_xor_si256(c2[7232],simde_mm256_xor_si256(c2[3633],simde_mm256_xor_si256(c2[1234],simde_mm256_xor_si256(c2[4826],simde_mm256_xor_si256(c2[1019],simde_mm256_xor_si256(c2[779],simde_mm256_xor_si256(c2[2939],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[7519],simde_mm256_xor_si256(c2[319],simde_mm256_xor_si256(c2[4401],simde_mm256_xor_si256(c2[3223],simde_mm256_xor_si256(c2[2745],simde_mm256_xor_si256(c2[3465],simde_mm256_xor_si256(c2[6128],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[5644],simde_mm256_xor_si256(c2[1352],simde_mm256_xor_si256(c2[1112],simde_mm256_xor_si256(c2[6870],simde_mm256_xor_si256(c2[5913],simde_mm256_xor_si256(c2[5939],simde_mm256_xor_si256(c2[4737],simde_mm256_xor_si256(c2[5483],simde_mm256_xor_si256(c2[3801],simde_mm256_xor_si256(c2[6198],simde_mm256_xor_si256(c2[2627],simde_mm256_xor_si256(c2[2387],simde_mm256_xor_si256(c2[222],c2[5747]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[468]=_mm256_xor_si256(c2[1687],_mm256_xor_si256(c2[1447],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[4562],_mm256_xor_si256(c2[4082],_mm256_xor_si256(c2[6962],_mm256_xor_si256(c2[6268],_mm256_xor_si256(c2[6028],_mm256_xor_si256(c2[2429],_mm256_xor_si256(c2[30],_mm256_xor_si256(c2[7494],_mm256_xor_si256(c2[7254],_mm256_xor_si256(c2[1735],_mm256_xor_si256(c2[6555],_mm256_xor_si256(c2[6315],_mm256_xor_si256(c2[7034],_mm256_xor_si256(c2[6794],_mm256_xor_si256(c2[3197],_mm256_xor_si256(c2[2259],_mm256_xor_si256(c2[2019],_mm256_xor_si256(c2[1541],_mm256_xor_si256(c2[2261],_mm256_xor_si256(c2[5164],_mm256_xor_si256(c2[4924],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[7587],_mm256_xor_si256(c2[5906],_mm256_xor_si256(c2[5666],_mm256_xor_si256(c2[4709],_mm256_xor_si256(c2[4975],_mm256_xor_si256(c2[4735],_mm256_xor_si256(c2[3533],_mm256_xor_si256(c2[1132],_mm256_xor_si256(c2[4519],_mm256_xor_si256(c2[4279],_mm256_xor_si256(c2[2597],_mm256_xor_si256(c2[4994],_mm256_xor_si256(c2[1423],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[6937],_mm256_xor_si256(c2[6697],c2[4543]))))))))))))))))))))))))))))))))))))))))));
+     d2[468]=simde_mm256_xor_si256(c2[1687],simde_mm256_xor_si256(c2[1447],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[4562],simde_mm256_xor_si256(c2[4082],simde_mm256_xor_si256(c2[6962],simde_mm256_xor_si256(c2[6268],simde_mm256_xor_si256(c2[6028],simde_mm256_xor_si256(c2[2429],simde_mm256_xor_si256(c2[30],simde_mm256_xor_si256(c2[7494],simde_mm256_xor_si256(c2[7254],simde_mm256_xor_si256(c2[1735],simde_mm256_xor_si256(c2[6555],simde_mm256_xor_si256(c2[6315],simde_mm256_xor_si256(c2[7034],simde_mm256_xor_si256(c2[6794],simde_mm256_xor_si256(c2[3197],simde_mm256_xor_si256(c2[2259],simde_mm256_xor_si256(c2[2019],simde_mm256_xor_si256(c2[1541],simde_mm256_xor_si256(c2[2261],simde_mm256_xor_si256(c2[5164],simde_mm256_xor_si256(c2[4924],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[7587],simde_mm256_xor_si256(c2[5906],simde_mm256_xor_si256(c2[5666],simde_mm256_xor_si256(c2[4709],simde_mm256_xor_si256(c2[4975],simde_mm256_xor_si256(c2[4735],simde_mm256_xor_si256(c2[3533],simde_mm256_xor_si256(c2[1132],simde_mm256_xor_si256(c2[4519],simde_mm256_xor_si256(c2[4279],simde_mm256_xor_si256(c2[2597],simde_mm256_xor_si256(c2[4994],simde_mm256_xor_si256(c2[1423],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[6937],simde_mm256_xor_si256(c2[6697],c2[4543]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[480]=_mm256_xor_si256(c2[5768],_mm256_xor_si256(c2[3850],_mm256_xor_si256(c2[1204],_mm256_xor_si256(c2[6965],_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[6485],_mm256_xor_si256(c2[2670],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[6750],_mm256_xor_si256(c2[4832],_mm256_xor_si256(c2[4351],_mm256_xor_si256(c2[2673],_mm256_xor_si256(c2[2433],_mm256_xor_si256(c2[3896],_mm256_xor_si256(c2[1978],_mm256_xor_si256(c2[6056],_mm256_xor_si256(c2[4378],_mm256_xor_si256(c2[4138],_mm256_xor_si256(c2[1731],_mm256_xor_si256(c2[2957],_mm256_xor_si256(c2[1039],_mm256_xor_si256(c2[3436],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[7518],_mm256_xor_si256(c2[5600],_mm256_xor_si256(c2[6340],_mm256_xor_si256(c2[4422],_mm256_xor_si256(c2[5862],_mm256_xor_si256(c2[3944],_mm256_xor_si256(c2[6582],_mm256_xor_si256(c2[4904],_mm256_xor_si256(c2[4664],_mm256_xor_si256(c2[1566],_mm256_xor_si256(c2[7327],_mm256_xor_si256(c2[4445],_mm256_xor_si256(c2[2767],_mm256_xor_si256(c2[2527],_mm256_xor_si256(c2[4229],_mm256_xor_si256(c2[2311],_mm256_xor_si256(c2[2308],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[1351],_mm256_xor_si256(c2[7352],_mm256_xor_si256(c2[7112],_mm256_xor_si256(c2[1377],_mm256_xor_si256(c2[7138],_mm256_xor_si256(c2[175],_mm256_xor_si256(c2[6176],_mm256_xor_si256(c2[5936],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[6682],_mm256_xor_si256(c2[6918],_mm256_xor_si256(c2[5000],_mm256_xor_si256(c2[1636],_mm256_xor_si256(c2[7637],_mm256_xor_si256(c2[7397],_mm256_xor_si256(c2[5504],_mm256_xor_si256(c2[3586],_mm256_xor_si256(c2[3339],_mm256_xor_si256(c2[1421],_mm256_xor_si256(c2[1185],_mm256_xor_si256(c2[7186],c2[6946]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[480]=simde_mm256_xor_si256(c2[5768],simde_mm256_xor_si256(c2[3850],simde_mm256_xor_si256(c2[1204],simde_mm256_xor_si256(c2[6965],simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[6485],simde_mm256_xor_si256(c2[2670],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[6750],simde_mm256_xor_si256(c2[4832],simde_mm256_xor_si256(c2[4351],simde_mm256_xor_si256(c2[2673],simde_mm256_xor_si256(c2[2433],simde_mm256_xor_si256(c2[3896],simde_mm256_xor_si256(c2[1978],simde_mm256_xor_si256(c2[6056],simde_mm256_xor_si256(c2[4378],simde_mm256_xor_si256(c2[4138],simde_mm256_xor_si256(c2[1731],simde_mm256_xor_si256(c2[2957],simde_mm256_xor_si256(c2[1039],simde_mm256_xor_si256(c2[3436],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[7518],simde_mm256_xor_si256(c2[5600],simde_mm256_xor_si256(c2[6340],simde_mm256_xor_si256(c2[4422],simde_mm256_xor_si256(c2[5862],simde_mm256_xor_si256(c2[3944],simde_mm256_xor_si256(c2[6582],simde_mm256_xor_si256(c2[4904],simde_mm256_xor_si256(c2[4664],simde_mm256_xor_si256(c2[1566],simde_mm256_xor_si256(c2[7327],simde_mm256_xor_si256(c2[4445],simde_mm256_xor_si256(c2[2767],simde_mm256_xor_si256(c2[2527],simde_mm256_xor_si256(c2[4229],simde_mm256_xor_si256(c2[2311],simde_mm256_xor_si256(c2[2308],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[1351],simde_mm256_xor_si256(c2[7352],simde_mm256_xor_si256(c2[7112],simde_mm256_xor_si256(c2[1377],simde_mm256_xor_si256(c2[7138],simde_mm256_xor_si256(c2[175],simde_mm256_xor_si256(c2[6176],simde_mm256_xor_si256(c2[5936],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[6682],simde_mm256_xor_si256(c2[6918],simde_mm256_xor_si256(c2[5000],simde_mm256_xor_si256(c2[1636],simde_mm256_xor_si256(c2[7637],simde_mm256_xor_si256(c2[7397],simde_mm256_xor_si256(c2[5504],simde_mm256_xor_si256(c2[3586],simde_mm256_xor_si256(c2[3339],simde_mm256_xor_si256(c2[1421],simde_mm256_xor_si256(c2[1185],simde_mm256_xor_si256(c2[7186],c2[6946]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[492]=_mm256_xor_si256(c2[2407],_mm256_xor_si256(c2[2167],_mm256_xor_si256(c2[5282],_mm256_xor_si256(c2[4802],_mm256_xor_si256(c2[6988],_mm256_xor_si256(c2[6748],_mm256_xor_si256(c2[3149],_mm256_xor_si256(c2[750],_mm256_xor_si256(c2[4588],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[2455],_mm256_xor_si256(c2[7275],_mm256_xor_si256(c2[7035],_mm256_xor_si256(c2[7514],_mm256_xor_si256(c2[3917],_mm256_xor_si256(c2[2739],_mm256_xor_si256(c2[2261],_mm256_xor_si256(c2[2981],_mm256_xor_si256(c2[5644],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[628],_mm256_xor_si256(c2[6386],_mm256_xor_si256(c2[5429],_mm256_xor_si256(c2[5455],_mm256_xor_si256(c2[4253],_mm256_xor_si256(c2[4999],_mm256_xor_si256(c2[3317],_mm256_xor_si256(c2[5714],_mm256_xor_si256(c2[2143],_mm256_xor_si256(c2[1903],_mm256_xor_si256(c2[7417],c2[5263]))))))))))))))))))))))))))))))))));
+     d2[492]=simde_mm256_xor_si256(c2[2407],simde_mm256_xor_si256(c2[2167],simde_mm256_xor_si256(c2[5282],simde_mm256_xor_si256(c2[4802],simde_mm256_xor_si256(c2[6988],simde_mm256_xor_si256(c2[6748],simde_mm256_xor_si256(c2[3149],simde_mm256_xor_si256(c2[750],simde_mm256_xor_si256(c2[4588],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[2455],simde_mm256_xor_si256(c2[7275],simde_mm256_xor_si256(c2[7035],simde_mm256_xor_si256(c2[7514],simde_mm256_xor_si256(c2[3917],simde_mm256_xor_si256(c2[2739],simde_mm256_xor_si256(c2[2261],simde_mm256_xor_si256(c2[2981],simde_mm256_xor_si256(c2[5644],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[628],simde_mm256_xor_si256(c2[6386],simde_mm256_xor_si256(c2[5429],simde_mm256_xor_si256(c2[5455],simde_mm256_xor_si256(c2[4253],simde_mm256_xor_si256(c2[4999],simde_mm256_xor_si256(c2[3317],simde_mm256_xor_si256(c2[5714],simde_mm256_xor_si256(c2[2143],simde_mm256_xor_si256(c2[1903],simde_mm256_xor_si256(c2[7417],c2[5263]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc64_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc64_byte.c
index 8ffc540cd436c9151e138fcaf55648082fda9257..5ab2a77edbc6e3d046cb40bb000bb4dee9ae4ef6 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc64_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc64_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc64_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[809],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[245],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[57],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[901],c2[1029]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[809],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[245],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[57],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[901],c2[1029]))))))))))))))))))))))))));
 
 //row: 1
-     d2[2]=_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[809],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[245],_mm256_xor_si256(c2[953],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[57],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[901],c2[1029]))))))))))))))))))))))))))))))));
+     d2[2]=simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[809],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[245],simde_mm256_xor_si256(c2[953],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[57],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[901],c2[1029]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[4]=_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[809],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[205],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[945],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[245],_mm256_xor_si256(c2[953],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[57],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[933],_mm256_xor_si256(c2[901],c2[1029]))))))))))))))))))))))))))))))))))))))));
+     d2[4]=simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[809],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[205],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[945],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[245],simde_mm256_xor_si256(c2[953],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[57],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[933],simde_mm256_xor_si256(c2[901],c2[1029]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[6]=_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[809],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[277],_mm256_xor_si256(c2[245],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[57],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[733],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[36],c2[1029]))))))))))))))))))))))))))))))))));
+     d2[6]=simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[809],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[277],simde_mm256_xor_si256(c2[245],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[57],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[733],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[36],c2[1029]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[8]=_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[417],_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[708],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[584],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[45],_mm256_xor_si256(c2[13],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[589],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[849],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[152],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[281],_mm256_xor_si256(c2[760],_mm256_xor_si256(c2[93],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[644],_mm256_xor_si256(c2[100],c2[228]))))))))))))))))))))))))))))))))));
+     d2[8]=simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[417],simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[708],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[584],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[45],simde_mm256_xor_si256(c2[13],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[589],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[849],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[152],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[281],simde_mm256_xor_si256(c2[760],simde_mm256_xor_si256(c2[93],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[644],simde_mm256_xor_si256(c2[100],c2[228]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[10]=_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[416],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[453],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[9],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[396],_mm256_xor_si256(c2[588],_mm256_xor_si256(c2[113],_mm256_xor_si256(c2[848],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[117],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[153],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[761],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[1020],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[801],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[101],c2[229]))))))))))))))))))))))))))))))))))));
+     d2[10]=simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[416],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[453],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[9],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[396],simde_mm256_xor_si256(c2[588],simde_mm256_xor_si256(c2[113],simde_mm256_xor_si256(c2[848],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[117],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[153],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[761],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[1020],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[801],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[101],c2[229]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[12]=_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[612],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[484],_mm256_xor_si256(c2[165],_mm256_xor_si256(c2[329],_mm256_xor_si256(c2[297],_mm256_xor_si256(c2[744],_mm256_xor_si256(c2[781],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[848],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[852],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[149],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[473],_mm256_xor_si256(c2[829],_mm256_xor_si256(c2[636],_mm256_xor_si256(c2[445],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[836],_mm256_xor_si256(c2[964],c2[932]))))))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[612],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[484],simde_mm256_xor_si256(c2[165],simde_mm256_xor_si256(c2[329],simde_mm256_xor_si256(c2[297],simde_mm256_xor_si256(c2[744],simde_mm256_xor_si256(c2[781],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[848],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[852],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[149],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[473],simde_mm256_xor_si256(c2[829],simde_mm256_xor_si256(c2[636],simde_mm256_xor_si256(c2[445],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[836],simde_mm256_xor_si256(c2[964],c2[932]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[14]=_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[897],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[741],_mm256_xor_si256(c2[261],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[165],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[488],_mm256_xor_si256(c2[456],_mm256_xor_si256(c2[1001],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[908],_mm256_xor_si256(c2[428],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[461],_mm256_xor_si256(c2[1004],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[529],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[916],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[884],_mm256_xor_si256(c2[628],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[537],_mm256_xor_si256(c2[153],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[152],_mm256_xor_si256(c2[988],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[349],_mm256_xor_si256(c2[797],_mm256_xor_si256(c2[317],_mm256_xor_si256(c2[412],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[129],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[516],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[100],c2[645]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[14]=simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[897],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[741],simde_mm256_xor_si256(c2[261],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[165],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[488],simde_mm256_xor_si256(c2[456],simde_mm256_xor_si256(c2[1001],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[908],simde_mm256_xor_si256(c2[428],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[461],simde_mm256_xor_si256(c2[1004],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[529],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[916],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[884],simde_mm256_xor_si256(c2[628],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[537],simde_mm256_xor_si256(c2[153],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[152],simde_mm256_xor_si256(c2[988],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[349],simde_mm256_xor_si256(c2[797],simde_mm256_xor_si256(c2[317],simde_mm256_xor_si256(c2[412],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[129],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[516],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[100],c2[645]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[16]=_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[416],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[836],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[964],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[1005],_mm256_xor_si256(c2[396],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[113],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[117],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[69],c2[197]))))))))))))))))))))))))))))))))))))))))));
+     d2[16]=simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[416],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[836],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[964],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[1005],simde_mm256_xor_si256(c2[396],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[113],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[117],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[69],c2[197]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[18]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[993],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[416],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[453],_mm256_xor_si256(c2[261],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[264],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[713],_mm256_xor_si256(c2[521],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[716],_mm256_xor_si256(c2[524],_mm256_xor_si256(c2[77],_mm256_xor_si256(c2[908],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[77],_mm256_xor_si256(c2[817],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[529],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[848],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[821],_mm256_xor_si256(c2[629],_mm256_xor_si256(c2[149],_mm256_xor_si256(c2[980],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[825],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[792],_mm256_xor_si256(c2[440],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[796],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[413],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[417],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[164],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[933],c2[741])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[18]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[993],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[416],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[453],simde_mm256_xor_si256(c2[261],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[264],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[713],simde_mm256_xor_si256(c2[521],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[716],simde_mm256_xor_si256(c2[524],simde_mm256_xor_si256(c2[77],simde_mm256_xor_si256(c2[908],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[77],simde_mm256_xor_si256(c2[817],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[529],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[848],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[821],simde_mm256_xor_si256(c2[629],simde_mm256_xor_si256(c2[149],simde_mm256_xor_si256(c2[980],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[825],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[792],simde_mm256_xor_si256(c2[440],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[796],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[413],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[417],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[164],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[933],c2[741])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[20]=_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[24],c2[701])));
+     d2[20]=simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[24],c2[701])));
 
 //row: 11
-     d2[22]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[109],_mm256_xor_si256(c2[657],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[1012],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[824],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[636],_mm256_xor_si256(c2[477],_mm256_xor_si256(c2[445],_mm256_xor_si256(c2[413],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[257],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[164],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[773],c2[612])))))))))))))))))))))))))))))))))))));
+     d2[22]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[109],simde_mm256_xor_si256(c2[657],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[1012],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[824],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[636],simde_mm256_xor_si256(c2[477],simde_mm256_xor_si256(c2[445],simde_mm256_xor_si256(c2[413],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[257],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[164],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[773],c2[612])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[24]=_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[641],_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[840],_mm256_xor_si256(c2[808],_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[237],_mm256_xor_si256(c2[621],_mm256_xor_si256(c2[813],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[340],_mm256_xor_si256(c2[693],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[317],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[324],c2[452]))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[641],simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[840],simde_mm256_xor_si256(c2[808],simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[237],simde_mm256_xor_si256(c2[621],simde_mm256_xor_si256(c2[813],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[340],simde_mm256_xor_si256(c2[693],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[317],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[324],c2[452]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[26]=_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[484],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[165],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[297],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[744],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[848],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[852],_mm256_xor_si256(c2[212],_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[473],_mm256_xor_si256(c2[829],_mm256_xor_si256(c2[668],_mm256_xor_si256(c2[636],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[545],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[836],_mm256_xor_si256(c2[996],c2[964])))))))))))))))))))))))))))))))))))));
+     d2[26]=simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[484],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[165],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[297],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[744],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[848],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[852],simde_mm256_xor_si256(c2[212],simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[473],simde_mm256_xor_si256(c2[829],simde_mm256_xor_si256(c2[668],simde_mm256_xor_si256(c2[636],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[545],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[836],simde_mm256_xor_si256(c2[996],c2[964])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[28]=_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[448],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[741],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[485],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[488],_mm256_xor_si256(c2[456],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[73],_mm256_xor_si256(c2[905],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[908],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[269],_mm256_xor_si256(c2[428],_mm256_xor_si256(c2[461],_mm256_xor_si256(c2[620],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[880],_mm256_xor_si256(c2[208],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[176],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[149],_mm256_xor_si256(c2[532],_mm256_xor_si256(c2[341],_mm256_xor_si256(c2[500],_mm256_xor_si256(c2[24],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[153],_mm256_xor_si256(c2[153],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[825],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[988],_mm256_xor_si256(c2[124],_mm256_xor_si256(c2[988],_mm256_xor_si256(c2[797],_mm256_xor_si256(c2[956],_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[516],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[100],c2[261])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[28]=simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[448],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[741],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[485],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[488],simde_mm256_xor_si256(c2[456],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[73],simde_mm256_xor_si256(c2[905],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[908],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[269],simde_mm256_xor_si256(c2[428],simde_mm256_xor_si256(c2[461],simde_mm256_xor_si256(c2[620],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[880],simde_mm256_xor_si256(c2[208],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[176],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[149],simde_mm256_xor_si256(c2[532],simde_mm256_xor_si256(c2[341],simde_mm256_xor_si256(c2[500],simde_mm256_xor_si256(c2[24],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[153],simde_mm256_xor_si256(c2[153],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[825],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[988],simde_mm256_xor_si256(c2[124],simde_mm256_xor_si256(c2[988],simde_mm256_xor_si256(c2[797],simde_mm256_xor_si256(c2[956],simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[516],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[100],c2[261])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[30]=_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[993],_mm256_xor_si256(c2[161],_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[5],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[709],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[9],_mm256_xor_si256(c2[265],_mm256_xor_si256(c2[456],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[461],_mm256_xor_si256(c2[652],_mm256_xor_si256(c2[845],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[400],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[373],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[724],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[377],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[1017],_mm256_xor_si256(c2[185],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[541],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[485],c2[676]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[30]=simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[993],simde_mm256_xor_si256(c2[161],simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[5],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[709],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[9],simde_mm256_xor_si256(c2[265],simde_mm256_xor_si256(c2[456],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[461],simde_mm256_xor_si256(c2[652],simde_mm256_xor_si256(c2[845],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[400],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[373],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[724],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[377],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[1017],simde_mm256_xor_si256(c2[185],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[541],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[485],c2[676]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[32]=_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[933],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[484],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[969],_mm256_xor_si256(c2[616],_mm256_xor_si256(c2[937],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[77],_mm256_xor_si256(c2[396],_mm256_xor_si256(c2[45],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[780],_mm256_xor_si256(c2[429],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[621],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[497],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[465],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[496],_mm256_xor_si256(c2[501],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[501],_mm256_xor_si256(c2[820],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[152],_mm256_xor_si256(c2[473],_mm256_xor_si256(c2[664],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[792],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[476],_mm256_xor_si256(c2[125],_mm256_xor_si256(c2[444],_mm256_xor_si256(c2[957],_mm256_xor_si256(c2[253],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[257],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[65],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[708],_mm256_xor_si256(c2[1029],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[485],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[453],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[581],c2[484])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[32]=simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[933],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[484],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[969],simde_mm256_xor_si256(c2[616],simde_mm256_xor_si256(c2[937],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[77],simde_mm256_xor_si256(c2[396],simde_mm256_xor_si256(c2[45],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[780],simde_mm256_xor_si256(c2[429],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[621],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[497],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[465],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[496],simde_mm256_xor_si256(c2[501],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[501],simde_mm256_xor_si256(c2[820],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[152],simde_mm256_xor_si256(c2[473],simde_mm256_xor_si256(c2[664],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[792],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[476],simde_mm256_xor_si256(c2[125],simde_mm256_xor_si256(c2[444],simde_mm256_xor_si256(c2[957],simde_mm256_xor_si256(c2[253],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[257],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[65],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[708],simde_mm256_xor_si256(c2[1029],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[485],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[453],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[581],c2[484])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[34]=_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[833],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[197],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[965],_mm256_xor_si256(c2[969],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[937],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[809],_mm256_xor_si256(c2[396],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[205],_mm256_xor_si256(c2[748],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[945],_mm256_xor_si256(c2[465],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[177],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[496],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[820],_mm256_xor_si256(c2[245],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[953],_mm256_xor_si256(c2[473],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[57],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[444],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[253],_mm256_xor_si256(c2[701],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[1024],_mm256_xor_si256(c2[65],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[1029],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[933],_mm256_xor_si256(c2[453],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[581],c2[1029])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[34]=simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[833],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[197],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[965],simde_mm256_xor_si256(c2[969],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[937],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[809],simde_mm256_xor_si256(c2[396],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[205],simde_mm256_xor_si256(c2[748],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[945],simde_mm256_xor_si256(c2[465],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[177],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[496],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[820],simde_mm256_xor_si256(c2[245],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[953],simde_mm256_xor_si256(c2[473],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[57],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[444],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[253],simde_mm256_xor_si256(c2[701],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[1024],simde_mm256_xor_si256(c2[65],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[1029],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[933],simde_mm256_xor_si256(c2[453],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[581],c2[1029])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[36]=_mm256_xor_si256(c2[896],_mm256_xor_si256(c2[88],c2[605]));
+     d2[36]=simde_mm256_xor_si256(c2[896],simde_mm256_xor_si256(c2[88],c2[605]));
 
 //row: 19
-     d2[38]=_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[644],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[588],_mm256_xor_si256(c2[972],_mm256_xor_si256(c2[141],_mm256_xor_si256(c2[689],_mm256_xor_si256(c2[401],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[693],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[697],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[668],_mm256_xor_si256(c2[477],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[677],c2[805]))))))))))))))))))))))))))));
+     d2[38]=simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[644],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[588],simde_mm256_xor_si256(c2[972],simde_mm256_xor_si256(c2[141],simde_mm256_xor_si256(c2[689],simde_mm256_xor_si256(c2[401],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[693],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[697],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[668],simde_mm256_xor_si256(c2[477],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[677],c2[805]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[40]=_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[161],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[417],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[836],_mm256_xor_si256(c2[297],_mm256_xor_si256(c2[265],_mm256_xor_si256(c2[712],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[717],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[528],_mm256_xor_si256(c2[849],_mm256_xor_si256(c2[305],_mm256_xor_si256(c2[820],_mm256_xor_si256(c2[148],_mm256_xor_si256(c2[856],_mm256_xor_si256(c2[824],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[441],_mm256_xor_si256(c2[797],_mm256_xor_si256(c2[604],_mm256_xor_si256(c2[929],_mm256_xor_si256(c2[416],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[804],c2[932]))))))))))))))))))))))))))))))))));
+     d2[40]=simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[161],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[417],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[836],simde_mm256_xor_si256(c2[297],simde_mm256_xor_si256(c2[265],simde_mm256_xor_si256(c2[712],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[717],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[528],simde_mm256_xor_si256(c2[849],simde_mm256_xor_si256(c2[305],simde_mm256_xor_si256(c2[820],simde_mm256_xor_si256(c2[148],simde_mm256_xor_si256(c2[856],simde_mm256_xor_si256(c2[824],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[441],simde_mm256_xor_si256(c2[797],simde_mm256_xor_si256(c2[604],simde_mm256_xor_si256(c2[929],simde_mm256_xor_si256(c2[416],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[804],c2[932]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[42]=_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[65],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[836],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[365],_mm256_xor_si256(c2[749],_mm256_xor_si256(c2[941],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[176],_mm256_xor_si256(c2[529],_mm256_xor_si256(c2[497],_mm256_xor_si256(c2[468],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[821],_mm256_xor_si256(c2[472],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[445],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[161],_mm256_xor_si256(c2[129],_mm256_xor_si256(c2[353],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[612],c2[580]))))))))))))))))))))))))))))))))))));
+     d2[42]=simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[65],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[836],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[365],simde_mm256_xor_si256(c2[749],simde_mm256_xor_si256(c2[941],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[176],simde_mm256_xor_si256(c2[529],simde_mm256_xor_si256(c2[497],simde_mm256_xor_si256(c2[468],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[821],simde_mm256_xor_si256(c2[472],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[445],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[161],simde_mm256_xor_si256(c2[129],simde_mm256_xor_si256(c2[353],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[612],c2[580]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[44]=_mm256_xor_si256(c2[964],c2[1001]);
+     d2[44]=simde_mm256_xor_si256(c2[964],c2[1001]);
 
 //row: 23
-     d2[46]=_mm256_xor_si256(c2[736],_mm256_xor_si256(c2[365],c2[469]));
+     d2[46]=simde_mm256_xor_si256(c2[736],simde_mm256_xor_si256(c2[365],c2[469]));
 
 //row: 24
-     d2[48]=_mm256_xor_si256(c2[453],_mm256_xor_si256(c2[360],c2[292]));
+     d2[48]=simde_mm256_xor_si256(c2[453],simde_mm256_xor_si256(c2[360],c2[292]));
 
 //row: 25
-     d2[50]=_mm256_xor_si256(c2[129],c2[916]);
+     d2[50]=simde_mm256_xor_si256(c2[129],c2[916]);
 
 //row: 26
-     d2[52]=_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[257],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[513],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[644],_mm256_xor_si256(c2[453],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[393],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[649],_mm256_xor_si256(c2[808],_mm256_xor_si256(c2[617],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[845],_mm256_xor_si256(c2[813],_mm256_xor_si256(c2[620],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[172],_mm256_xor_si256(c2[1004],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[173],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[912],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[624],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[784],_mm256_xor_si256(c2[945],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[948],_mm256_xor_si256(c2[916],_mm256_xor_si256(c2[725],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[53],_mm256_xor_si256(c2[952],_mm256_xor_si256(c2[920],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[88],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[376],_mm256_xor_si256(c2[537],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[893],_mm256_xor_si256(c2[700],_mm256_xor_si256(c2[541],_mm256_xor_si256(c2[700],_mm256_xor_si256(c2[509],_mm256_xor_si256(c2[508],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[1025],_mm256_xor_si256(c2[832],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[321],_mm256_xor_si256(c2[416],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[453],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[932],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[709],_mm256_xor_si256(c2[869],_mm256_xor_si256(c2[1028],c2[837])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[52]=simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[257],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[513],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[644],simde_mm256_xor_si256(c2[453],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[393],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[649],simde_mm256_xor_si256(c2[808],simde_mm256_xor_si256(c2[617],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[845],simde_mm256_xor_si256(c2[813],simde_mm256_xor_si256(c2[620],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[172],simde_mm256_xor_si256(c2[1004],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[173],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[912],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[624],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[784],simde_mm256_xor_si256(c2[945],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[948],simde_mm256_xor_si256(c2[916],simde_mm256_xor_si256(c2[725],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[53],simde_mm256_xor_si256(c2[952],simde_mm256_xor_si256(c2[920],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[88],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[376],simde_mm256_xor_si256(c2[537],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[893],simde_mm256_xor_si256(c2[700],simde_mm256_xor_si256(c2[541],simde_mm256_xor_si256(c2[700],simde_mm256_xor_si256(c2[509],simde_mm256_xor_si256(c2[508],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[1025],simde_mm256_xor_si256(c2[832],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[321],simde_mm256_xor_si256(c2[416],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[453],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[932],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[709],simde_mm256_xor_si256(c2[869],simde_mm256_xor_si256(c2[1028],c2[837])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[54]=_mm256_xor_si256(c2[256],c2[760]);
+     d2[54]=simde_mm256_xor_si256(c2[256],c2[760]);
 
 //row: 28
-     d2[56]=_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[169],c2[244]));
+     d2[56]=simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[169],c2[244]));
 
 //row: 29
-     d2[58]=_mm256_xor_si256(c2[576],c2[912]);
+     d2[58]=simde_mm256_xor_si256(c2[576],c2[912]);
 
 //row: 30
-     d2[60]=_mm256_xor_si256(c2[232],_mm256_xor_si256(c2[533],_mm256_xor_si256(c2[316],c2[676])));
+     d2[60]=simde_mm256_xor_si256(c2[232],simde_mm256_xor_si256(c2[533],simde_mm256_xor_si256(c2[316],c2[676])));
 
 //row: 31
-     d2[62]=_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[677],_mm256_xor_si256(c2[581],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[392],_mm256_xor_si256(c2[873],_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[844],_mm256_xor_si256(c2[205],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[945],_mm256_xor_si256(c2[657],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[309],_mm256_xor_si256(c2[277],_mm256_xor_si256(c2[953],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[765],_mm256_xor_si256(c2[733],_mm256_xor_si256(c2[33],_mm256_xor_si256(c2[545],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[933],_mm256_xor_si256(c2[68],c2[36])))))))))))))))))))))))))))))))))));
+     d2[62]=simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[677],simde_mm256_xor_si256(c2[581],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[392],simde_mm256_xor_si256(c2[873],simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[844],simde_mm256_xor_si256(c2[205],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[945],simde_mm256_xor_si256(c2[657],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[309],simde_mm256_xor_si256(c2[277],simde_mm256_xor_si256(c2[953],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[765],simde_mm256_xor_si256(c2[733],simde_mm256_xor_si256(c2[33],simde_mm256_xor_si256(c2[545],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[933],simde_mm256_xor_si256(c2[68],c2[36])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[64]=_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[416],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[836],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[1005],_mm256_xor_si256(c2[396],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[113],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[117],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[405],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[69],c2[197]))))))))))))))))))))))))))))))))))))))))));
+     d2[64]=simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[416],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[836],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[1005],simde_mm256_xor_si256(c2[396],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[113],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[117],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[405],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[69],c2[197]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[66]=_mm256_xor_si256(c2[609],_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[581],_mm256_xor_si256(c2[713],_mm256_xor_si256(c2[137],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[524],_mm256_xor_si256(c2[716],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[245],_mm256_xor_si256(c2[596],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[408],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[220],_mm256_xor_si256(c2[29],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[929],_mm256_xor_si256(c2[773],_mm256_xor_si256(c2[229],c2[357]))))))))))))))))))))))))))));
+     d2[66]=simde_mm256_xor_si256(c2[609],simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[581],simde_mm256_xor_si256(c2[713],simde_mm256_xor_si256(c2[137],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[524],simde_mm256_xor_si256(c2[716],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[245],simde_mm256_xor_si256(c2[596],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[408],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[220],simde_mm256_xor_si256(c2[29],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[929],simde_mm256_xor_si256(c2[773],simde_mm256_xor_si256(c2[229],c2[357]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[68]=_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[385],_mm256_xor_si256(c2[897],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[161],_mm256_xor_si256(c2[641],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[292],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[676],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[41],_mm256_xor_si256(c2[9],_mm256_xor_si256(c2[489],_mm256_xor_si256(c2[968],_mm256_xor_si256(c2[456],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[461],_mm256_xor_si256(c2[941],_mm256_xor_si256(c2[877],_mm256_xor_si256(c2[845],_mm256_xor_si256(c2[300],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[492],_mm256_xor_si256(c2[592],_mm256_xor_si256(c2[560],_mm256_xor_si256(c2[17],_mm256_xor_si256(c2[272],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[48],_mm256_xor_si256(c2[596],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[404],_mm256_xor_si256(c2[917],_mm256_xor_si256(c2[372],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[568],_mm256_xor_si256(c2[25],_mm256_xor_si256(c2[761],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[184],_mm256_xor_si256(c2[697],_mm256_xor_si256(c2[185],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[573],_mm256_xor_si256(c2[541],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[860],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[828],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[128],_mm256_xor_si256(c2[160],_mm256_xor_si256(c2[640],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[101],_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[549],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[1028],_mm256_xor_si256(c2[165],_mm256_xor_si256(c2[676],c2[133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[68]=simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[385],simde_mm256_xor_si256(c2[897],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[161],simde_mm256_xor_si256(c2[641],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[292],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[676],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[41],simde_mm256_xor_si256(c2[9],simde_mm256_xor_si256(c2[489],simde_mm256_xor_si256(c2[968],simde_mm256_xor_si256(c2[456],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[461],simde_mm256_xor_si256(c2[941],simde_mm256_xor_si256(c2[877],simde_mm256_xor_si256(c2[845],simde_mm256_xor_si256(c2[300],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[492],simde_mm256_xor_si256(c2[592],simde_mm256_xor_si256(c2[560],simde_mm256_xor_si256(c2[17],simde_mm256_xor_si256(c2[272],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[48],simde_mm256_xor_si256(c2[596],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[404],simde_mm256_xor_si256(c2[917],simde_mm256_xor_si256(c2[372],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[568],simde_mm256_xor_si256(c2[25],simde_mm256_xor_si256(c2[761],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[184],simde_mm256_xor_si256(c2[697],simde_mm256_xor_si256(c2[185],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[573],simde_mm256_xor_si256(c2[541],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[860],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[828],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[128],simde_mm256_xor_si256(c2[160],simde_mm256_xor_si256(c2[640],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[101],simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[549],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[1028],simde_mm256_xor_si256(c2[165],simde_mm256_xor_si256(c2[676],c2[133]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[70]=_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[161],_mm256_xor_si256(c2[480],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[196],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[360],_mm256_xor_si256(c2[328],_mm256_xor_si256(c2[777],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[780],_mm256_xor_si256(c2[141],_mm256_xor_si256(c2[333],_mm256_xor_si256(c2[881],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[912],_mm256_xor_si256(c2[885],_mm256_xor_si256(c2[213],_mm256_xor_si256(c2[277],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[25],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[860],_mm256_xor_si256(c2[669],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[544],_mm256_xor_si256(c2[420],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[869],c2[997]))))))))))))))))))))))))))))))))));
+     d2[70]=simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[161],simde_mm256_xor_si256(c2[480],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[196],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[360],simde_mm256_xor_si256(c2[328],simde_mm256_xor_si256(c2[777],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[780],simde_mm256_xor_si256(c2[141],simde_mm256_xor_si256(c2[333],simde_mm256_xor_si256(c2[881],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[912],simde_mm256_xor_si256(c2[885],simde_mm256_xor_si256(c2[213],simde_mm256_xor_si256(c2[277],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[25],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[860],simde_mm256_xor_si256(c2[669],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[544],simde_mm256_xor_si256(c2[420],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[869],c2[997]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[72]=_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[201],c2[860]));
+     d2[72]=simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[201],c2[860]));
 
 //row: 37
-     d2[74]=_mm256_xor_si256(c2[97],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[929],_mm256_xor_si256(c2[353],_mm256_xor_si256(c2[225],_mm256_xor_si256(c2[484],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[388],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[69],_mm256_xor_si256(c2[964],_mm256_xor_si256(c2[201],_mm256_xor_si256(c2[73],_mm256_xor_si256(c2[552],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[520],_mm256_xor_si256(c2[653],_mm256_xor_si256(c2[525],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[909],_mm256_xor_si256(c2[204],_mm256_xor_si256(c2[76],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[624],_mm256_xor_si256(c2[464],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[689],_mm256_xor_si256(c2[785],_mm256_xor_si256(c2[657],_mm256_xor_si256(c2[756],_mm256_xor_si256(c2[628],_mm256_xor_si256(c2[1013],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[981],_mm256_xor_si256(c2[760],_mm256_xor_si256(c2[632],_mm256_xor_si256(c2[921],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[281],_mm256_xor_si256(c2[377],_mm256_xor_si256(c2[249],_mm256_xor_si256(c2[733],_mm256_xor_si256(c2[605],_mm256_xor_si256(c2[444],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[412],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[737],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[224],_mm256_xor_si256(c2[321],_mm256_xor_si256(c2[417],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[261],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[612],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[868],c2[740])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[74]=simde_mm256_xor_si256(c2[97],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[929],simde_mm256_xor_si256(c2[353],simde_mm256_xor_si256(c2[225],simde_mm256_xor_si256(c2[484],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[388],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[69],simde_mm256_xor_si256(c2[964],simde_mm256_xor_si256(c2[201],simde_mm256_xor_si256(c2[73],simde_mm256_xor_si256(c2[552],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[520],simde_mm256_xor_si256(c2[653],simde_mm256_xor_si256(c2[525],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[909],simde_mm256_xor_si256(c2[204],simde_mm256_xor_si256(c2[76],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[624],simde_mm256_xor_si256(c2[464],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[689],simde_mm256_xor_si256(c2[785],simde_mm256_xor_si256(c2[657],simde_mm256_xor_si256(c2[756],simde_mm256_xor_si256(c2[628],simde_mm256_xor_si256(c2[1013],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[981],simde_mm256_xor_si256(c2[760],simde_mm256_xor_si256(c2[632],simde_mm256_xor_si256(c2[921],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[281],simde_mm256_xor_si256(c2[377],simde_mm256_xor_si256(c2[249],simde_mm256_xor_si256(c2[733],simde_mm256_xor_si256(c2[605],simde_mm256_xor_si256(c2[444],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[412],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[737],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[224],simde_mm256_xor_si256(c2[321],simde_mm256_xor_si256(c2[417],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[261],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[612],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[868],c2[740])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[76]=_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[449],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[705],_mm256_xor_si256(c2[868],_mm256_xor_si256(c2[836],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[1000],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[1005],_mm256_xor_si256(c2[364],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[81],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[112],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[436],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[121],_mm256_xor_si256(c2[89],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[729],_mm256_xor_si256(c2[60],_mm256_xor_si256(c2[892],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[69],c2[197]))))))))))))))))))))))))))))))))));
+     d2[76]=simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[449],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[705],simde_mm256_xor_si256(c2[868],simde_mm256_xor_si256(c2[836],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[1000],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[1005],simde_mm256_xor_si256(c2[364],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[81],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[112],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[436],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[121],simde_mm256_xor_si256(c2[89],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[729],simde_mm256_xor_si256(c2[60],simde_mm256_xor_si256(c2[892],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[69],c2[197]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[78]=_mm256_xor_si256(c2[353],_mm256_xor_si256(c2[321],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[481],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[708],_mm256_xor_si256(c2[612],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[425],_mm256_xor_si256(c2[872],_mm256_xor_si256(c2[909],_mm256_xor_si256(c2[877],_mm256_xor_si256(c2[268],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[428],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[976],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[1012],_mm256_xor_si256(c2[980],_mm256_xor_si256(c2[308],_mm256_xor_si256(c2[1016],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[152],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[989],_mm256_xor_si256(c2[957],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[413],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[64],_mm256_xor_si256(c2[576],_mm256_xor_si256(c2[641],_mm256_xor_si256(c2[517],_mm256_xor_si256(c2[485],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[964],c2[69]))))))))))))))))))))))))))))))))))))))))));
+     d2[78]=simde_mm256_xor_si256(c2[353],simde_mm256_xor_si256(c2[321],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[481],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[708],simde_mm256_xor_si256(c2[612],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[425],simde_mm256_xor_si256(c2[872],simde_mm256_xor_si256(c2[909],simde_mm256_xor_si256(c2[877],simde_mm256_xor_si256(c2[268],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[428],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[976],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[1012],simde_mm256_xor_si256(c2[980],simde_mm256_xor_si256(c2[308],simde_mm256_xor_si256(c2[1016],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[152],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[989],simde_mm256_xor_si256(c2[957],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[413],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[64],simde_mm256_xor_si256(c2[576],simde_mm256_xor_si256(c2[641],simde_mm256_xor_si256(c2[517],simde_mm256_xor_si256(c2[485],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[964],c2[69]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[80]=_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[545],_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[389],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[293],_mm256_xor_si256(c2[4],_mm256_xor_si256(c2[580],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[712],_mm256_xor_si256(c2[104],_mm256_xor_si256(c2[585],_mm256_xor_si256(c2[136],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[141],_mm256_xor_si256(c2[556],_mm256_xor_si256(c2[525],_mm256_xor_si256(c2[940],_mm256_xor_si256(c2[717],_mm256_xor_si256(c2[109],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[657],_mm256_xor_si256(c2[977],_mm256_xor_si256(c2[369],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[273],_mm256_xor_si256(c2[688],_mm256_xor_si256(c2[244],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[21],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[1012],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[665],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[824],_mm256_xor_si256(c2[312],_mm256_xor_si256(c2[888],_mm256_xor_si256(c2[280],_mm256_xor_si256(c2[221],_mm256_xor_si256(c2[636],_mm256_xor_si256(c2[477],_mm256_xor_si256(c2[28],_mm256_xor_si256(c2[445],_mm256_xor_si256(c2[353],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[865],_mm256_xor_si256(c2[257],_mm256_xor_si256(c2[352],_mm256_xor_si256(c2[928],_mm256_xor_si256(c2[320],_mm256_xor_si256(c2[772],_mm256_xor_si256(c2[164],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[645],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[356],c2[773]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[80]=simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[545],simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[389],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[293],simde_mm256_xor_si256(c2[4],simde_mm256_xor_si256(c2[580],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[712],simde_mm256_xor_si256(c2[104],simde_mm256_xor_si256(c2[585],simde_mm256_xor_si256(c2[136],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[141],simde_mm256_xor_si256(c2[556],simde_mm256_xor_si256(c2[525],simde_mm256_xor_si256(c2[940],simde_mm256_xor_si256(c2[717],simde_mm256_xor_si256(c2[109],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[657],simde_mm256_xor_si256(c2[977],simde_mm256_xor_si256(c2[369],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[273],simde_mm256_xor_si256(c2[688],simde_mm256_xor_si256(c2[244],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[21],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[1012],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[665],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[824],simde_mm256_xor_si256(c2[312],simde_mm256_xor_si256(c2[888],simde_mm256_xor_si256(c2[280],simde_mm256_xor_si256(c2[221],simde_mm256_xor_si256(c2[636],simde_mm256_xor_si256(c2[477],simde_mm256_xor_si256(c2[28],simde_mm256_xor_si256(c2[445],simde_mm256_xor_si256(c2[353],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[865],simde_mm256_xor_si256(c2[257],simde_mm256_xor_si256(c2[352],simde_mm256_xor_si256(c2[928],simde_mm256_xor_si256(c2[320],simde_mm256_xor_si256(c2[772],simde_mm256_xor_si256(c2[164],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[645],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[356],c2[773]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[82]=_mm256_xor_si256(c2[993],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[896],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[357],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[933],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[40],_mm256_xor_si256(c2[489],_mm256_xor_si256(c2[524],_mm256_xor_si256(c2[492],_mm256_xor_si256(c2[876],_mm256_xor_si256(c2[45],_mm256_xor_si256(c2[593],_mm256_xor_si256(c2[305],_mm256_xor_si256(c2[624],_mm256_xor_si256(c2[597],_mm256_xor_si256(c2[948],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[633],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[760],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[572],_mm256_xor_si256(c2[381],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[193],_mm256_xor_si256(c2[256],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[100],_mm256_xor_si256(c2[581],c2[709]))))))))))))))))))))))))))))))))));
+     d2[82]=simde_mm256_xor_si256(c2[993],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[896],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[357],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[933],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[40],simde_mm256_xor_si256(c2[489],simde_mm256_xor_si256(c2[524],simde_mm256_xor_si256(c2[492],simde_mm256_xor_si256(c2[876],simde_mm256_xor_si256(c2[45],simde_mm256_xor_si256(c2[593],simde_mm256_xor_si256(c2[305],simde_mm256_xor_si256(c2[624],simde_mm256_xor_si256(c2[597],simde_mm256_xor_si256(c2[948],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[633],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[760],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[572],simde_mm256_xor_si256(c2[381],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[193],simde_mm256_xor_si256(c2[256],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[100],simde_mm256_xor_si256(c2[581],c2[709]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc96_byte.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc96_byte.c
index f5446775ba5c9253be899b52a279bc407a1a37c4..a4d5a47ef416700522364383edc98e3a167c2d8f 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc96_byte.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_BG2_Zc96_byte.c
@@ -11,129 +11,129 @@ static inline void ldpc_BG2_Zc96_byte(uint8_t *c,uint8_t *d) {
      d2=&dsimd[i2];
 
 //row: 0
-     d2[0]=_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[1026],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[854],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[1652],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[1668],_mm256_xor_si256(c2[716],_mm256_xor_si256(c2[174],c2[1556]))))))))))))))))))))))))));
+     d2[0]=simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[1026],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[854],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[1652],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[1668],simde_mm256_xor_si256(c2[716],simde_mm256_xor_si256(c2[174],c2[1556]))))))))))))))))))))))))));
 
 //row: 1
-     d2[3]=_mm256_xor_si256(c2[842],_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[1026],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[854],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[1652],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[458],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[1668],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[716],_mm256_xor_si256(c2[174],c2[1556]))))))))))))))))))))))))))))))));
+     d2[3]=simde_mm256_xor_si256(c2[842],simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[1026],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[854],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[1652],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[458],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[1668],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[716],simde_mm256_xor_si256(c2[174],c2[1556]))))))))))))))))))))))))))))))));
 
 //row: 2
-     d2[6]=_mm256_xor_si256(c2[842],_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[1620],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[1026],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[854],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[259],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[1652],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[458],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[1550],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[1668],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[716],_mm256_xor_si256(c2[234],_mm256_xor_si256(c2[174],c2[1556]))))))))))))))))))))))))))))))))))))))));
+     d2[6]=simde_mm256_xor_si256(c2[842],simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[1620],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[1026],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[854],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[259],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[1652],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[458],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[1550],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[1668],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[716],simde_mm256_xor_si256(c2[234],simde_mm256_xor_si256(c2[174],c2[1556]))))))))))))))))))))))))))))))))))))))));
 
 //row: 3
-     d2[9]=_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[1560],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[1026],_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[854],_mm256_xor_si256(c2[80],_mm256_xor_si256(c2[199],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[804],_mm256_xor_si256(c2[1044],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[1652],_mm256_xor_si256(c2[512],_mm256_xor_si256(c2[452],_mm256_xor_si256(c2[398],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[1656],_mm256_xor_si256(c2[1596],_mm256_xor_si256(c2[1604],_mm256_xor_si256(c2[1362],_mm256_xor_si256(c2[1302],_mm256_xor_si256(c2[1490],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[1728],_mm256_xor_si256(c2[1668],_mm256_xor_si256(c2[716],_mm256_xor_si256(c2[174],_mm256_xor_si256(c2[1616],c2[1556]))))))))))))))))))))))))))))))))));
+     d2[9]=simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[1560],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[1026],simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[854],simde_mm256_xor_si256(c2[80],simde_mm256_xor_si256(c2[199],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[804],simde_mm256_xor_si256(c2[1044],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[1652],simde_mm256_xor_si256(c2[512],simde_mm256_xor_si256(c2[452],simde_mm256_xor_si256(c2[398],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[1656],simde_mm256_xor_si256(c2[1596],simde_mm256_xor_si256(c2[1604],simde_mm256_xor_si256(c2[1362],simde_mm256_xor_si256(c2[1302],simde_mm256_xor_si256(c2[1490],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[1728],simde_mm256_xor_si256(c2[1668],simde_mm256_xor_si256(c2[716],simde_mm256_xor_si256(c2[174],simde_mm256_xor_si256(c2[1616],c2[1556]))))))))))))))))))))))))))))))))));
 
 //row: 4
-     d2[12]=_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[1381],_mm256_xor_si256(c2[1261],_mm256_xor_si256(c2[482],_mm256_xor_si256(c2[1806],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[847],_mm256_xor_si256(c2[248],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[132],_mm256_xor_si256(c2[672],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[1039],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[625],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[1470],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[216],_mm256_xor_si256(c2[1657],_mm256_xor_si256(c2[1417],_mm256_xor_si256(c2[1422],_mm256_xor_si256(c2[1123],_mm256_xor_si256(c2[1308],_mm256_xor_si256(c2[889],_mm256_xor_si256(c2[1489],_mm256_xor_si256(c2[594],_mm256_xor_si256(c2[534],_mm256_xor_si256(c2[1914],c2[1374]))))))))))))))))))))))))))))))))));
+     d2[12]=simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[1381],simde_mm256_xor_si256(c2[1261],simde_mm256_xor_si256(c2[482],simde_mm256_xor_si256(c2[1806],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[847],simde_mm256_xor_si256(c2[248],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[132],simde_mm256_xor_si256(c2[672],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[1039],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[625],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[1470],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[216],simde_mm256_xor_si256(c2[1657],simde_mm256_xor_si256(c2[1417],simde_mm256_xor_si256(c2[1422],simde_mm256_xor_si256(c2[1123],simde_mm256_xor_si256(c2[1308],simde_mm256_xor_si256(c2[889],simde_mm256_xor_si256(c2[1489],simde_mm256_xor_si256(c2[594],simde_mm256_xor_si256(c2[534],simde_mm256_xor_si256(c2[1914],c2[1374]))))))))))))))))))))))))))))))))));
 
 //row: 5
-     d2[15]=_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[1740],_mm256_xor_si256(c2[602],_mm256_xor_si256(c2[482],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[1027],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[1388],_mm256_xor_si256(c2[727],_mm256_xor_si256(c2[1332],_mm256_xor_si256(c2[1272],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[1098],_mm256_xor_si256(c2[1038],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[260],_mm256_xor_si256(c2[1886],_mm256_xor_si256(c2[1765],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[1530],_mm256_xor_si256(c2[1416],_mm256_xor_si256(c2[1356],_mm256_xor_si256(c2[878],_mm256_xor_si256(c2[638],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[1004],_mm256_xor_si256(c2[529],_mm256_xor_si256(c2[110],_mm256_xor_si256(c2[710],_mm256_xor_si256(c2[1734],_mm256_xor_si256(c2[1674],_mm256_xor_si256(c2[1135],c2[595]))))))))))))))))))))))))))))))))))));
+     d2[15]=simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[1740],simde_mm256_xor_si256(c2[602],simde_mm256_xor_si256(c2[482],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[1027],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[1388],simde_mm256_xor_si256(c2[727],simde_mm256_xor_si256(c2[1332],simde_mm256_xor_si256(c2[1272],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[1098],simde_mm256_xor_si256(c2[1038],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[260],simde_mm256_xor_si256(c2[1886],simde_mm256_xor_si256(c2[1765],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[1530],simde_mm256_xor_si256(c2[1416],simde_mm256_xor_si256(c2[1356],simde_mm256_xor_si256(c2[878],simde_mm256_xor_si256(c2[638],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[1004],simde_mm256_xor_si256(c2[529],simde_mm256_xor_si256(c2[110],simde_mm256_xor_si256(c2[710],simde_mm256_xor_si256(c2[1734],simde_mm256_xor_si256(c2[1674],simde_mm256_xor_si256(c2[1135],c2[595]))))))))))))))))))))))))))))))))))));
 
 //row: 6
-     d2[18]=_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[302],_mm256_xor_si256(c2[1080],_mm256_xor_si256(c2[960],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[1508],_mm256_xor_si256(c2[1448],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[1866],_mm256_xor_si256(c2[1813],_mm256_xor_si256(c2[1753],_mm256_xor_si256(c2[374],_mm256_xor_si256(c2[1579],_mm256_xor_si256(c2[1519],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[738],_mm256_xor_si256(c2[445],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[1891],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[1837],_mm256_xor_si256(c2[1356],_mm256_xor_si256(c2[1116],_mm256_xor_si256(c2[1124],_mm256_xor_si256(c2[822],_mm256_xor_si256(c2[282],_mm256_xor_si256(c2[1010],_mm256_xor_si256(c2[588],_mm256_xor_si256(c2[1188],_mm256_xor_si256(c2[296],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[1616],_mm256_xor_si256(c2[1076],c2[1075]))))))))))))))))))))))))))))))))))));
+     d2[18]=simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[302],simde_mm256_xor_si256(c2[1080],simde_mm256_xor_si256(c2[960],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[1508],simde_mm256_xor_si256(c2[1448],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[1866],simde_mm256_xor_si256(c2[1813],simde_mm256_xor_si256(c2[1753],simde_mm256_xor_si256(c2[374],simde_mm256_xor_si256(c2[1579],simde_mm256_xor_si256(c2[1519],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[738],simde_mm256_xor_si256(c2[445],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[1891],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[1837],simde_mm256_xor_si256(c2[1356],simde_mm256_xor_si256(c2[1116],simde_mm256_xor_si256(c2[1124],simde_mm256_xor_si256(c2[822],simde_mm256_xor_si256(c2[282],simde_mm256_xor_si256(c2[1010],simde_mm256_xor_si256(c2[588],simde_mm256_xor_si256(c2[1188],simde_mm256_xor_si256(c2[296],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[1616],simde_mm256_xor_si256(c2[1076],c2[1075]))))))))))))))))))))))))))))))))))));
 
 //row: 7
-     d2[21]=_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1142],_mm256_xor_si256(c2[1680],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[542],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[422],_mm256_xor_si256(c2[426],_mm256_xor_si256(c2[366],_mm256_xor_si256(c2[907],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[787],_mm256_xor_si256(c2[1388],_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[968],_mm256_xor_si256(c2[734],_mm256_xor_si256(c2[674],_mm256_xor_si256(c2[1212],_mm256_xor_si256(c2[1214],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[1752],_mm256_xor_si256(c2[500],_mm256_xor_si256(c2[440],_mm256_xor_si256(c2[978],_mm256_xor_si256(c2[559],_mm256_xor_si256(c2[1100],_mm256_xor_si256(c2[1578],_mm256_xor_si256(c2[200],_mm256_xor_si256(c2[1285],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[1164],_mm256_xor_si256(c2[1705],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[1885],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[631],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[1350],_mm256_xor_si256(c2[1592],_mm256_xor_si256(c2[818],_mm256_xor_si256(c2[758],_mm256_xor_si256(c2[1296],_mm256_xor_si256(c2[277],_mm256_xor_si256(c2[818],_mm256_xor_si256(c2[37],_mm256_xor_si256(c2[638],_mm256_xor_si256(c2[578],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[583],_mm256_xor_si256(c2[1662],_mm256_xor_si256(c2[344],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[1002],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[1428],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[109],_mm256_xor_si256(c2[710],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[1076],_mm256_xor_si256(c2[1614],_mm256_xor_si256(c2[534],_mm256_xor_si256(c2[1075],_mm256_xor_si256(c2[1916],_mm256_xor_si256(c2[595],c2[535]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[21]=simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1142],simde_mm256_xor_si256(c2[1680],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[542],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[422],simde_mm256_xor_si256(c2[426],simde_mm256_xor_si256(c2[366],simde_mm256_xor_si256(c2[907],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[787],simde_mm256_xor_si256(c2[1388],simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[968],simde_mm256_xor_si256(c2[734],simde_mm256_xor_si256(c2[674],simde_mm256_xor_si256(c2[1212],simde_mm256_xor_si256(c2[1214],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[1752],simde_mm256_xor_si256(c2[500],simde_mm256_xor_si256(c2[440],simde_mm256_xor_si256(c2[978],simde_mm256_xor_si256(c2[559],simde_mm256_xor_si256(c2[1100],simde_mm256_xor_si256(c2[1578],simde_mm256_xor_si256(c2[200],simde_mm256_xor_si256(c2[1285],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[1164],simde_mm256_xor_si256(c2[1705],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[1885],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[631],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[1350],simde_mm256_xor_si256(c2[1592],simde_mm256_xor_si256(c2[818],simde_mm256_xor_si256(c2[758],simde_mm256_xor_si256(c2[1296],simde_mm256_xor_si256(c2[277],simde_mm256_xor_si256(c2[818],simde_mm256_xor_si256(c2[37],simde_mm256_xor_si256(c2[638],simde_mm256_xor_si256(c2[578],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[583],simde_mm256_xor_si256(c2[1662],simde_mm256_xor_si256(c2[344],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[1002],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[1428],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[109],simde_mm256_xor_si256(c2[710],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[1076],simde_mm256_xor_si256(c2[1614],simde_mm256_xor_si256(c2[534],simde_mm256_xor_si256(c2[1075],simde_mm256_xor_si256(c2[1916],simde_mm256_xor_si256(c2[595],c2[535]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 8
-     d2[24]=_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[301],_mm256_xor_si256(c2[1142],_mm256_xor_si256(c2[1082],_mm256_xor_si256(c2[962],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[1447],_mm256_xor_si256(c2[548],_mm256_xor_si256(c2[1868],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[1752],_mm256_xor_si256(c2[373],_mm256_xor_si256(c2[1578],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[1700],_mm256_xor_si256(c2[1640],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[444],_mm256_xor_si256(c2[326],_mm256_xor_si256(c2[506],_mm256_xor_si256(c2[1231],_mm256_xor_si256(c2[1171],_mm256_xor_si256(c2[1890],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[1358],_mm256_xor_si256(c2[1118],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[1123],_mm256_xor_si256(c2[824],_mm256_xor_si256(c2[1069],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[590],_mm256_xor_si256(c2[1190],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[235],_mm256_xor_si256(c2[1675],_mm256_xor_si256(c2[1615],c2[1075]))))))))))))))))))))))))))))))))))))))))));
+     d2[24]=simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[301],simde_mm256_xor_si256(c2[1142],simde_mm256_xor_si256(c2[1082],simde_mm256_xor_si256(c2[962],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[1447],simde_mm256_xor_si256(c2[548],simde_mm256_xor_si256(c2[1868],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[1752],simde_mm256_xor_si256(c2[373],simde_mm256_xor_si256(c2[1578],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[1700],simde_mm256_xor_si256(c2[1640],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[444],simde_mm256_xor_si256(c2[326],simde_mm256_xor_si256(c2[506],simde_mm256_xor_si256(c2[1231],simde_mm256_xor_si256(c2[1171],simde_mm256_xor_si256(c2[1890],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[1358],simde_mm256_xor_si256(c2[1118],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[1123],simde_mm256_xor_si256(c2[824],simde_mm256_xor_si256(c2[1069],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[590],simde_mm256_xor_si256(c2[1190],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[235],simde_mm256_xor_si256(c2[1675],simde_mm256_xor_si256(c2[1615],c2[1075]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 9
-     d2[27]=_mm256_xor_si256(c2[362],_mm256_xor_si256(c2[2],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[1140],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[1020],_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[1508],_mm256_xor_si256(c2[1148],_mm256_xor_si256(c2[1088],_mm256_xor_si256(c2[606],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[7],_mm256_xor_si256(c2[1506],_mm256_xor_si256(c2[1686],_mm256_xor_si256(c2[1813],_mm256_xor_si256(c2[1453],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[434],_mm256_xor_si256(c2[14],_mm256_xor_si256(c2[1579],_mm256_xor_si256(c2[1219],_mm256_xor_si256(c2[1159],_mm256_xor_si256(c2[1698],_mm256_xor_si256(c2[1278],_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[505],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[1886],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[32],_mm256_xor_si256(c2[1531],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[1477],_mm256_xor_si256(c2[1416],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[756],_mm256_xor_si256(c2[1184],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[882],_mm256_xor_si256(c2[462],_mm256_xor_si256(c2[1070],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[1248],_mm256_xor_si256(c2[828],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[296],_mm256_xor_si256(c2[1855],_mm256_xor_si256(c2[1795],_mm256_xor_si256(c2[1676],_mm256_xor_si256(c2[1256],_mm256_xor_si256(c2[1136],c2[716])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[27]=simde_mm256_xor_si256(c2[362],simde_mm256_xor_si256(c2[2],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[1140],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[1020],simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[1508],simde_mm256_xor_si256(c2[1148],simde_mm256_xor_si256(c2[1088],simde_mm256_xor_si256(c2[606],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[7],simde_mm256_xor_si256(c2[1506],simde_mm256_xor_si256(c2[1686],simde_mm256_xor_si256(c2[1813],simde_mm256_xor_si256(c2[1453],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[434],simde_mm256_xor_si256(c2[14],simde_mm256_xor_si256(c2[1579],simde_mm256_xor_si256(c2[1219],simde_mm256_xor_si256(c2[1159],simde_mm256_xor_si256(c2[1698],simde_mm256_xor_si256(c2[1278],simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[505],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[1886],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[32],simde_mm256_xor_si256(c2[1531],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[1477],simde_mm256_xor_si256(c2[1416],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[756],simde_mm256_xor_si256(c2[1184],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[882],simde_mm256_xor_si256(c2[462],simde_mm256_xor_si256(c2[1070],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[1248],simde_mm256_xor_si256(c2[828],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[296],simde_mm256_xor_si256(c2[1855],simde_mm256_xor_si256(c2[1795],simde_mm256_xor_si256(c2[1676],simde_mm256_xor_si256(c2[1256],simde_mm256_xor_si256(c2[1136],c2[716])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 10
-     d2[30]=_mm256_xor_si256(c2[1621],_mm256_xor_si256(c2[486],_mm256_xor_si256(c2[1356],c2[1243])));
+     d2[30]=simde_mm256_xor_si256(c2[1621],simde_mm256_xor_si256(c2[486],simde_mm256_xor_si256(c2[1356],c2[1243])));
 
 //row: 11
-     d2[33]=_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1082],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[1567],_mm256_xor_si256(c2[668],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[66],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[553],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[860],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[446],_mm256_xor_si256(c2[686],_mm256_xor_si256(c2[626],_mm256_xor_si256(c2[1291],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[91],_mm256_xor_si256(c2[37],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[1298],_mm256_xor_si256(c2[1238],_mm256_xor_si256(c2[1243],_mm256_xor_si256(c2[1004],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[1724],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[710],_mm256_xor_si256(c2[1370],_mm256_xor_si256(c2[1310],_mm256_xor_si256(c2[355],_mm256_xor_si256(c2[1735],_mm256_xor_si256(c2[1255],_mm256_xor_si256(c2[1195],c2[896])))))))))))))))))))))))))))))))))))));
+     d2[33]=simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1082],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[1567],simde_mm256_xor_si256(c2[668],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[66],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[553],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[860],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[446],simde_mm256_xor_si256(c2[686],simde_mm256_xor_si256(c2[626],simde_mm256_xor_si256(c2[1291],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[91],simde_mm256_xor_si256(c2[37],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[1298],simde_mm256_xor_si256(c2[1238],simde_mm256_xor_si256(c2[1243],simde_mm256_xor_si256(c2[1004],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[1724],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[710],simde_mm256_xor_si256(c2[1370],simde_mm256_xor_si256(c2[1310],simde_mm256_xor_si256(c2[355],simde_mm256_xor_si256(c2[1735],simde_mm256_xor_si256(c2[1255],simde_mm256_xor_si256(c2[1195],c2[896])))))))))))))))))))))))))))))))))))));
 
 //row: 12
-     d2[36]=_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[1442],_mm256_xor_si256(c2[301],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[1686],_mm256_xor_si256(c2[1087],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[1034],_mm256_xor_si256(c2[974],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[859],_mm256_xor_si256(c2[1878],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[1644],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[1112],_mm256_xor_si256(c2[1118],_mm256_xor_si256(c2[1058],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[1728],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[1436],_mm256_xor_si256(c2[1376],_mm256_xor_si256(c2[834],c2[294]))))))))))))))))))))))))))))))))));
+     d2[36]=simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[1442],simde_mm256_xor_si256(c2[301],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[1686],simde_mm256_xor_si256(c2[1087],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[1034],simde_mm256_xor_si256(c2[974],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[859],simde_mm256_xor_si256(c2[1878],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[1644],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[1112],simde_mm256_xor_si256(c2[1118],simde_mm256_xor_si256(c2[1058],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[1728],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[1436],simde_mm256_xor_si256(c2[1376],simde_mm256_xor_si256(c2[834],c2[294]))))))))))))))))))))))))))))))))));
 
 //row: 13
-     d2[39]=_mm256_xor_si256(c2[1141],_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[1021],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[1388],_mm256_xor_si256(c2[846],_mm256_xor_si256(c2[786],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[673],_mm256_xor_si256(c2[1273],_mm256_xor_si256(c2[1213],_mm256_xor_si256(c2[439],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[1580],_mm256_xor_si256(c2[1284],_mm256_xor_si256(c2[1166],_mm256_xor_si256(c2[1406],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[92],_mm256_xor_si256(c2[871],_mm256_xor_si256(c2[811],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[36],_mm256_xor_si256(c2[44],_mm256_xor_si256(c2[1724],_mm256_xor_si256(c2[1664],_mm256_xor_si256(c2[1849],_mm256_xor_si256(c2[1430],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[1075],_mm256_xor_si256(c2[536],_mm256_xor_si256(c2[56],c2[1915])))))))))))))))))))))))))))))))))))));
+     d2[39]=simde_mm256_xor_si256(c2[1141],simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[1021],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[1388],simde_mm256_xor_si256(c2[846],simde_mm256_xor_si256(c2[786],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[673],simde_mm256_xor_si256(c2[1273],simde_mm256_xor_si256(c2[1213],simde_mm256_xor_si256(c2[439],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[1580],simde_mm256_xor_si256(c2[1284],simde_mm256_xor_si256(c2[1166],simde_mm256_xor_si256(c2[1406],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[92],simde_mm256_xor_si256(c2[871],simde_mm256_xor_si256(c2[811],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[36],simde_mm256_xor_si256(c2[44],simde_mm256_xor_si256(c2[1724],simde_mm256_xor_si256(c2[1664],simde_mm256_xor_si256(c2[1849],simde_mm256_xor_si256(c2[1430],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[1075],simde_mm256_xor_si256(c2[536],simde_mm256_xor_si256(c2[56],c2[1915])))))))))))))))))))))))))))))))))))));
 
 //row: 14
-     d2[42]=_mm256_xor_si256(c2[180],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[901],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[781],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[1326],_mm256_xor_si256(c2[1266],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[367],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[1687],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[606],_mm256_xor_si256(c2[1146],_mm256_xor_si256(c2[1634],_mm256_xor_si256(c2[1574],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[192],_mm256_xor_si256(c2[1093],_mm256_xor_si256(c2[1033],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[1340],_mm256_xor_si256(c2[259],_mm256_xor_si256(c2[1459],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[559],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[986],_mm256_xor_si256(c2[325],_mm256_xor_si256(c2[1226],_mm256_xor_si256(c2[1166],_mm256_xor_si256(c2[990],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[1712],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[631],_mm256_xor_si256(c2[1718],_mm256_xor_si256(c2[1658],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[1177],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[937],_mm256_xor_si256(c2[1838],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[1356],_mm256_xor_si256(c2[942],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[643],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[828],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[114],_mm256_xor_si256(c2[54],_mm256_xor_si256(c2[895],_mm256_xor_si256(c2[1434],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[894],_mm256_xor_si256(c2[1795],c2[1735])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[42]=simde_mm256_xor_si256(c2[180],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[901],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[781],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[1326],simde_mm256_xor_si256(c2[1266],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[367],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[1687],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[606],simde_mm256_xor_si256(c2[1146],simde_mm256_xor_si256(c2[1634],simde_mm256_xor_si256(c2[1574],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[192],simde_mm256_xor_si256(c2[1093],simde_mm256_xor_si256(c2[1033],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[1340],simde_mm256_xor_si256(c2[259],simde_mm256_xor_si256(c2[1459],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[559],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[986],simde_mm256_xor_si256(c2[325],simde_mm256_xor_si256(c2[1226],simde_mm256_xor_si256(c2[1166],simde_mm256_xor_si256(c2[990],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[1712],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[631],simde_mm256_xor_si256(c2[1718],simde_mm256_xor_si256(c2[1658],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[1177],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[937],simde_mm256_xor_si256(c2[1838],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[1356],simde_mm256_xor_si256(c2[942],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[643],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[828],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[114],simde_mm256_xor_si256(c2[54],simde_mm256_xor_si256(c2[895],simde_mm256_xor_si256(c2[1434],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[894],simde_mm256_xor_si256(c2[1795],c2[1735])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 15
-     d2[45]=_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[1801],_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[1382],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[242],_mm256_xor_si256(c2[1867],_mm256_xor_si256(c2[1088],_mm256_xor_si256(c2[1028],_mm256_xor_si256(c2[968],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[366],_mm256_xor_si256(c2[1446],_mm256_xor_si256(c2[253],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[793],_mm256_xor_si256(c2[1873],_mm256_xor_si256(c2[19],_mm256_xor_si256(c2[1159],_mm256_xor_si256(c2[1099],_mm256_xor_si256(c2[138],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[25],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[926],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[1591],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[391],_mm256_xor_si256(c2[1471],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[1477],_mm256_xor_si256(c2[1417],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[1538],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[1543],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[1244],_mm256_xor_si256(c2[402],_mm256_xor_si256(c2[1429],_mm256_xor_si256(c2[590],_mm256_xor_si256(c2[1010],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[1610],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[655],_mm256_xor_si256(c2[1795],_mm256_xor_si256(c2[1735],_mm256_xor_si256(c2[116],_mm256_xor_si256(c2[1196],_mm256_xor_si256(c2[1495],c2[656]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[45]=simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[1801],simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[1382],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[242],simde_mm256_xor_si256(c2[1867],simde_mm256_xor_si256(c2[1088],simde_mm256_xor_si256(c2[1028],simde_mm256_xor_si256(c2[968],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[366],simde_mm256_xor_si256(c2[1446],simde_mm256_xor_si256(c2[253],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[793],simde_mm256_xor_si256(c2[1873],simde_mm256_xor_si256(c2[19],simde_mm256_xor_si256(c2[1159],simde_mm256_xor_si256(c2[1099],simde_mm256_xor_si256(c2[138],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[25],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[926],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[1591],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[391],simde_mm256_xor_si256(c2[1471],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[1477],simde_mm256_xor_si256(c2[1417],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[1538],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[1543],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[1244],simde_mm256_xor_si256(c2[402],simde_mm256_xor_si256(c2[1429],simde_mm256_xor_si256(c2[590],simde_mm256_xor_si256(c2[1010],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[1610],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[655],simde_mm256_xor_si256(c2[1795],simde_mm256_xor_si256(c2[1735],simde_mm256_xor_si256(c2[116],simde_mm256_xor_si256(c2[1196],simde_mm256_xor_si256(c2[1495],c2[656]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 16
-     d2[48]=_mm256_xor_si256(c2[1260],_mm256_xor_si256(c2[1200],_mm256_xor_si256(c2[841],_mm256_xor_si256(c2[781],_mm256_xor_si256(c2[62],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[1562],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[1442],_mm256_xor_si256(c2[487],_mm256_xor_si256(c2[427],_mm256_xor_si256(c2[68],_mm256_xor_si256(c2[8],_mm256_xor_si256(c2[1447],_mm256_xor_si256(c2[1028],_mm256_xor_si256(c2[848],_mm256_xor_si256(c2[426],_mm256_xor_si256(c2[1388],_mm256_xor_si256(c2[792],_mm256_xor_si256(c2[732],_mm256_xor_si256(c2[373],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[1272],_mm256_xor_si256(c2[853],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[498],_mm256_xor_si256(c2[139],_mm256_xor_si256(c2[79],_mm256_xor_si256(c2[620],_mm256_xor_si256(c2[258],_mm256_xor_si256(c2[198],_mm256_xor_si256(c2[1639],_mm256_xor_si256(c2[1220],_mm256_xor_si256(c2[1346],_mm256_xor_si256(c2[984],_mm256_xor_si256(c2[924],_mm256_xor_si256(c2[1225],_mm256_xor_si256(c2[806],_mm256_xor_si256(c2[1405],_mm256_xor_si256(c2[986],_mm256_xor_si256(c2[151],_mm256_xor_si256(c2[1711],_mm256_xor_si256(c2[1651],_mm256_xor_si256(c2[870],_mm256_xor_si256(c2[451],_mm256_xor_si256(c2[876],_mm256_xor_si256(c2[816],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[1898],_mm256_xor_si256(c2[1838],_mm256_xor_si256(c2[98],_mm256_xor_si256(c2[1598],_mm256_xor_si256(c2[103],_mm256_xor_si256(c2[1663],_mm256_xor_si256(c2[1603],_mm256_xor_si256(c2[1723],_mm256_xor_si256(c2[1304],_mm256_xor_si256(c2[1908],_mm256_xor_si256(c2[1549],_mm256_xor_si256(c2[1489],_mm256_xor_si256(c2[1489],_mm256_xor_si256(c2[1070],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[1670],_mm256_xor_si256(c2[1194],_mm256_xor_si256(c2[1134],_mm256_xor_si256(c2[775],_mm256_xor_si256(c2[715],_mm256_xor_si256(c2[595],_mm256_xor_si256(c2[236],_mm256_xor_si256(c2[176],_mm256_xor_si256(c2[55],_mm256_xor_si256(c2[1555],c2[1076])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[48]=simde_mm256_xor_si256(c2[1260],simde_mm256_xor_si256(c2[1200],simde_mm256_xor_si256(c2[841],simde_mm256_xor_si256(c2[781],simde_mm256_xor_si256(c2[62],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[1562],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[1442],simde_mm256_xor_si256(c2[487],simde_mm256_xor_si256(c2[427],simde_mm256_xor_si256(c2[68],simde_mm256_xor_si256(c2[8],simde_mm256_xor_si256(c2[1447],simde_mm256_xor_si256(c2[1028],simde_mm256_xor_si256(c2[848],simde_mm256_xor_si256(c2[426],simde_mm256_xor_si256(c2[1388],simde_mm256_xor_si256(c2[792],simde_mm256_xor_si256(c2[732],simde_mm256_xor_si256(c2[373],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[1272],simde_mm256_xor_si256(c2[853],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[498],simde_mm256_xor_si256(c2[139],simde_mm256_xor_si256(c2[79],simde_mm256_xor_si256(c2[620],simde_mm256_xor_si256(c2[258],simde_mm256_xor_si256(c2[198],simde_mm256_xor_si256(c2[1639],simde_mm256_xor_si256(c2[1220],simde_mm256_xor_si256(c2[1346],simde_mm256_xor_si256(c2[984],simde_mm256_xor_si256(c2[924],simde_mm256_xor_si256(c2[1225],simde_mm256_xor_si256(c2[806],simde_mm256_xor_si256(c2[1405],simde_mm256_xor_si256(c2[986],simde_mm256_xor_si256(c2[151],simde_mm256_xor_si256(c2[1711],simde_mm256_xor_si256(c2[1651],simde_mm256_xor_si256(c2[870],simde_mm256_xor_si256(c2[451],simde_mm256_xor_si256(c2[876],simde_mm256_xor_si256(c2[816],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[1898],simde_mm256_xor_si256(c2[1838],simde_mm256_xor_si256(c2[98],simde_mm256_xor_si256(c2[1598],simde_mm256_xor_si256(c2[103],simde_mm256_xor_si256(c2[1663],simde_mm256_xor_si256(c2[1603],simde_mm256_xor_si256(c2[1723],simde_mm256_xor_si256(c2[1304],simde_mm256_xor_si256(c2[1908],simde_mm256_xor_si256(c2[1549],simde_mm256_xor_si256(c2[1489],simde_mm256_xor_si256(c2[1489],simde_mm256_xor_si256(c2[1070],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[1670],simde_mm256_xor_si256(c2[1194],simde_mm256_xor_si256(c2[1134],simde_mm256_xor_si256(c2[775],simde_mm256_xor_si256(c2[715],simde_mm256_xor_si256(c2[595],simde_mm256_xor_si256(c2[236],simde_mm256_xor_si256(c2[176],simde_mm256_xor_si256(c2[55],simde_mm256_xor_si256(c2[1555],c2[1076])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 17
-     d2[51]=_mm256_xor_si256(c2[1382],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1142],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[1],_mm256_xor_si256(c2[61],_mm256_xor_si256(c2[1800],_mm256_xor_si256(c2[606],_mm256_xor_si256(c2[546],_mm256_xor_si256(c2[426],_mm256_xor_si256(c2[366],_mm256_xor_si256(c2[1566],_mm256_xor_si256(c2[1386],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[787],_mm256_xor_si256(c2[1807],_mm256_xor_si256(c2[914],_mm256_xor_si256(c2[854],_mm256_xor_si256(c2[734],_mm256_xor_si256(c2[674],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[1214],_mm256_xor_si256(c2[680],_mm256_xor_si256(c2[620],_mm256_xor_si256(c2[500],_mm256_xor_si256(c2[440],_mm256_xor_si256(c2[739],_mm256_xor_si256(c2[619],_mm256_xor_si256(c2[559],_mm256_xor_si256(c2[1758],_mm256_xor_si256(c2[1578],_mm256_xor_si256(c2[1465],_mm256_xor_si256(c2[1345],_mm256_xor_si256(c2[1285],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[1164],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[1344],_mm256_xor_si256(c2[270],_mm256_xor_si256(c2[150],_mm256_xor_si256(c2[90],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[998],_mm256_xor_si256(c2[938],_mm256_xor_si256(c2[818],_mm256_xor_si256(c2[758],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[277],_mm256_xor_si256(c2[217],_mm256_xor_si256(c2[37],_mm256_xor_si256(c2[222],_mm256_xor_si256(c2[102],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[1842],_mm256_xor_si256(c2[1662],_mm256_xor_si256(c2[108],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[1608],_mm256_xor_si256(c2[1428],_mm256_xor_si256(c2[289],_mm256_xor_si256(c2[109],_mm256_xor_si256(c2[1316],_mm256_xor_si256(c2[1256],_mm256_xor_si256(c2[1136],_mm256_xor_si256(c2[1076],_mm256_xor_si256(c2[714],_mm256_xor_si256(c2[594],_mm256_xor_si256(c2[534],_mm256_xor_si256(c2[174],c2[1916])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[51]=simde_mm256_xor_si256(c2[1382],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1142],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[1],simde_mm256_xor_si256(c2[61],simde_mm256_xor_si256(c2[1800],simde_mm256_xor_si256(c2[606],simde_mm256_xor_si256(c2[546],simde_mm256_xor_si256(c2[426],simde_mm256_xor_si256(c2[366],simde_mm256_xor_si256(c2[1566],simde_mm256_xor_si256(c2[1386],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[787],simde_mm256_xor_si256(c2[1807],simde_mm256_xor_si256(c2[914],simde_mm256_xor_si256(c2[854],simde_mm256_xor_si256(c2[734],simde_mm256_xor_si256(c2[674],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[1214],simde_mm256_xor_si256(c2[680],simde_mm256_xor_si256(c2[620],simde_mm256_xor_si256(c2[500],simde_mm256_xor_si256(c2[440],simde_mm256_xor_si256(c2[739],simde_mm256_xor_si256(c2[619],simde_mm256_xor_si256(c2[559],simde_mm256_xor_si256(c2[1758],simde_mm256_xor_si256(c2[1578],simde_mm256_xor_si256(c2[1465],simde_mm256_xor_si256(c2[1345],simde_mm256_xor_si256(c2[1285],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[1164],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[1344],simde_mm256_xor_si256(c2[270],simde_mm256_xor_si256(c2[150],simde_mm256_xor_si256(c2[90],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[998],simde_mm256_xor_si256(c2[938],simde_mm256_xor_si256(c2[818],simde_mm256_xor_si256(c2[758],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[277],simde_mm256_xor_si256(c2[217],simde_mm256_xor_si256(c2[37],simde_mm256_xor_si256(c2[222],simde_mm256_xor_si256(c2[102],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[1842],simde_mm256_xor_si256(c2[1662],simde_mm256_xor_si256(c2[108],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[1608],simde_mm256_xor_si256(c2[1428],simde_mm256_xor_si256(c2[289],simde_mm256_xor_si256(c2[109],simde_mm256_xor_si256(c2[1316],simde_mm256_xor_si256(c2[1256],simde_mm256_xor_si256(c2[1136],simde_mm256_xor_si256(c2[1076],simde_mm256_xor_si256(c2[714],simde_mm256_xor_si256(c2[594],simde_mm256_xor_si256(c2[534],simde_mm256_xor_si256(c2[174],c2[1916])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 18
-     d2[54]=_mm256_xor_si256(c2[1562],_mm256_xor_si256(c2[396],c2[883]));
+     d2[54]=simde_mm256_xor_si256(c2[1562],simde_mm256_xor_si256(c2[396],c2[883]));
 
 //row: 19
-     d2[57]=_mm256_xor_si256(c2[541],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1561],_mm256_xor_si256(c2[1687],_mm256_xor_si256(c2[788],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[607],_mm256_xor_si256(c2[73],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[1758],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[980],_mm256_xor_si256(c2[684],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[1411],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[1598],_mm256_xor_si256(c2[1358],_mm256_xor_si256(c2[1363],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[830],_mm256_xor_si256(c2[1430],_mm256_xor_si256(c2[475],_mm256_xor_si256(c2[1855],c2[1315]))))))))))))))))))))))))))));
+     d2[57]=simde_mm256_xor_si256(c2[541],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1561],simde_mm256_xor_si256(c2[1687],simde_mm256_xor_si256(c2[788],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[607],simde_mm256_xor_si256(c2[73],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[1758],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[980],simde_mm256_xor_si256(c2[684],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[1411],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[1598],simde_mm256_xor_si256(c2[1358],simde_mm256_xor_si256(c2[1363],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[830],simde_mm256_xor_si256(c2[1430],simde_mm256_xor_si256(c2[475],simde_mm256_xor_si256(c2[1855],c2[1315]))))))))))))))))))))))))))));
 
 //row: 20
-     d2[60]=_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[1142],_mm256_xor_si256(c2[1022],_mm256_xor_si256(c2[1567],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[728],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[1578],_mm256_xor_si256(c2[1700],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[1765],_mm256_xor_si256(c2[1231],_mm256_xor_si256(c2[31],_mm256_xor_si256(c2[37],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[1178],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[884],_mm256_xor_si256(c2[1069],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[355],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[1675],c2[1135]))))))))))))))))))))))))))))))))));
+     d2[60]=simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[1142],simde_mm256_xor_si256(c2[1022],simde_mm256_xor_si256(c2[1567],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[728],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[1578],simde_mm256_xor_si256(c2[1700],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[1765],simde_mm256_xor_si256(c2[1231],simde_mm256_xor_si256(c2[31],simde_mm256_xor_si256(c2[37],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[1178],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[884],simde_mm256_xor_si256(c2[1069],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[355],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[1675],c2[1135]))))))))))))))))))))))))))))))))));
 
 //row: 21
-     d2[63]=_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[1741],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[606],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[1093],_mm256_xor_si256(c2[1033],_mm256_xor_si256(c2[259],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[986],_mm256_xor_si256(c2[1226],_mm256_xor_si256(c2[1166],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[631],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[1838],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[950],_mm256_xor_si256(c2[895],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[1795],c2[1735]))))))))))))))))))))))))))))))))))));
+     d2[63]=simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[1741],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[606],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[1093],simde_mm256_xor_si256(c2[1033],simde_mm256_xor_si256(c2[259],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[986],simde_mm256_xor_si256(c2[1226],simde_mm256_xor_si256(c2[1166],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[631],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[1838],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[950],simde_mm256_xor_si256(c2[895],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[1795],c2[1735]))))))))))))))))))))))))))))))))))));
 
 //row: 22
-     d2[66]=_mm256_xor_si256(c2[1206],c2[1213]);
+     d2[66]=simde_mm256_xor_si256(c2[1206],c2[1213]);
 
 //row: 23
-     d2[69]=_mm256_xor_si256(c2[600],_mm256_xor_si256(c2[1340],c2[1892]));
+     d2[69]=simde_mm256_xor_si256(c2[600],simde_mm256_xor_si256(c2[1340],c2[1892]));
 
 //row: 24
-     d2[72]=_mm256_xor_si256(c2[1328],_mm256_xor_si256(c2[1513],c2[56]));
+     d2[72]=simde_mm256_xor_si256(c2[1328],simde_mm256_xor_si256(c2[1513],c2[56]));
 
 //row: 25
-     d2[75]=_mm256_xor_si256(c2[781],c2[1290]);
+     d2[75]=simde_mm256_xor_si256(c2[781],c2[1290]);
 
 //row: 26
-     d2[78]=_mm256_xor_si256(c2[301],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[122],_mm256_xor_si256(c2[1082],_mm256_xor_si256(c2[1022],_mm256_xor_si256(c2[900],_mm256_xor_si256(c2[902],_mm256_xor_si256(c2[780],_mm256_xor_si256(c2[1447],_mm256_xor_si256(c2[1387],_mm256_xor_si256(c2[1268],_mm256_xor_si256(c2[488],_mm256_xor_si256(c2[366],_mm256_xor_si256(c2[1808],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[1686],_mm256_xor_si256(c2[1752],_mm256_xor_si256(c2[1692],_mm256_xor_si256(c2[1573],_mm256_xor_si256(c2[313],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[1518],_mm256_xor_si256(c2[1458],_mm256_xor_si256(c2[1339],_mm256_xor_si256(c2[1640],_mm256_xor_si256(c2[1580],_mm256_xor_si256(c2[1458],_mm256_xor_si256(c2[680],_mm256_xor_si256(c2[558],_mm256_xor_si256(c2[444],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[265],_mm256_xor_si256(c2[266],_mm256_xor_si256(c2[144],_mm256_xor_si256(c2[446],_mm256_xor_si256(c2[384],_mm256_xor_si256(c2[324],_mm256_xor_si256(c2[1171],_mm256_xor_si256(c2[1111],_mm256_xor_si256(c2[992],_mm256_xor_si256(c2[1830],_mm256_xor_si256(c2[1771],_mm256_xor_si256(c2[1711],_mm256_xor_si256(c2[1836],_mm256_xor_si256(c2[1776],_mm256_xor_si256(c2[1657],_mm256_xor_si256(c2[1358],_mm256_xor_si256(c2[1298],_mm256_xor_si256(c2[1176],_mm256_xor_si256(c2[1058],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[1123],_mm256_xor_si256(c2[1063],_mm256_xor_si256(c2[944],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[702],_mm256_xor_si256(c2[642],_mm256_xor_si256(c2[583],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[949],_mm256_xor_si256(c2[830],_mm256_xor_si256(c2[530],_mm256_xor_si256(c2[408],_mm256_xor_si256(c2[1130],_mm256_xor_si256(c2[1068],_mm256_xor_si256(c2[1008],_mm256_xor_si256(c2[235],_mm256_xor_si256(c2[175],_mm256_xor_si256(c2[56],_mm256_xor_si256(c2[1615],_mm256_xor_si256(c2[1555],_mm256_xor_si256(c2[1436],_mm256_xor_si256(c2[1015],_mm256_xor_si256(c2[956],c2[896])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[78]=simde_mm256_xor_si256(c2[301],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[122],simde_mm256_xor_si256(c2[1082],simde_mm256_xor_si256(c2[1022],simde_mm256_xor_si256(c2[900],simde_mm256_xor_si256(c2[902],simde_mm256_xor_si256(c2[780],simde_mm256_xor_si256(c2[1447],simde_mm256_xor_si256(c2[1387],simde_mm256_xor_si256(c2[1268],simde_mm256_xor_si256(c2[488],simde_mm256_xor_si256(c2[366],simde_mm256_xor_si256(c2[1808],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[1686],simde_mm256_xor_si256(c2[1752],simde_mm256_xor_si256(c2[1692],simde_mm256_xor_si256(c2[1573],simde_mm256_xor_si256(c2[313],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[1518],simde_mm256_xor_si256(c2[1458],simde_mm256_xor_si256(c2[1339],simde_mm256_xor_si256(c2[1640],simde_mm256_xor_si256(c2[1580],simde_mm256_xor_si256(c2[1458],simde_mm256_xor_si256(c2[680],simde_mm256_xor_si256(c2[558],simde_mm256_xor_si256(c2[444],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[265],simde_mm256_xor_si256(c2[266],simde_mm256_xor_si256(c2[144],simde_mm256_xor_si256(c2[446],simde_mm256_xor_si256(c2[384],simde_mm256_xor_si256(c2[324],simde_mm256_xor_si256(c2[1171],simde_mm256_xor_si256(c2[1111],simde_mm256_xor_si256(c2[992],simde_mm256_xor_si256(c2[1830],simde_mm256_xor_si256(c2[1771],simde_mm256_xor_si256(c2[1711],simde_mm256_xor_si256(c2[1836],simde_mm256_xor_si256(c2[1776],simde_mm256_xor_si256(c2[1657],simde_mm256_xor_si256(c2[1358],simde_mm256_xor_si256(c2[1298],simde_mm256_xor_si256(c2[1176],simde_mm256_xor_si256(c2[1058],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[1123],simde_mm256_xor_si256(c2[1063],simde_mm256_xor_si256(c2[944],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[702],simde_mm256_xor_si256(c2[642],simde_mm256_xor_si256(c2[583],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[949],simde_mm256_xor_si256(c2[830],simde_mm256_xor_si256(c2[530],simde_mm256_xor_si256(c2[408],simde_mm256_xor_si256(c2[1130],simde_mm256_xor_si256(c2[1068],simde_mm256_xor_si256(c2[1008],simde_mm256_xor_si256(c2[235],simde_mm256_xor_si256(c2[175],simde_mm256_xor_si256(c2[56],simde_mm256_xor_si256(c2[1615],simde_mm256_xor_si256(c2[1555],simde_mm256_xor_si256(c2[1436],simde_mm256_xor_si256(c2[1015],simde_mm256_xor_si256(c2[956],c2[896])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 27
-     d2[81]=_mm256_xor_si256(c2[420],c2[1117]);
+     d2[81]=simde_mm256_xor_si256(c2[420],c2[1117]);
 
 //row: 28
-     d2[84]=_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[912],c2[512]));
+     d2[84]=simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[912],c2[512]));
 
 //row: 29
-     d2[87]=_mm256_xor_si256(c2[840],c2[1044]);
+     d2[87]=simde_mm256_xor_si256(c2[840],c2[1044]);
 
 //row: 30
-     d2[90]=_mm256_xor_si256(c2[1452],_mm256_xor_si256(c2[1591],_mm256_xor_si256(c2[1243],c2[1495])));
+     d2[90]=simde_mm256_xor_si256(c2[1452],simde_mm256_xor_si256(c2[1591],simde_mm256_xor_si256(c2[1243],c2[1495])));
 
 //row: 31
-     d2[93]=_mm256_xor_si256(c2[1381],_mm256_xor_si256(c2[240],_mm256_xor_si256(c2[120],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[1628],_mm256_xor_si256(c2[1086],_mm256_xor_si256(c2[1026],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[913],_mm256_xor_si256(c2[1513],_mm256_xor_si256(c2[1453],_mm256_xor_si256(c2[679],_mm256_xor_si256(c2[798],_mm256_xor_si256(c2[1820],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[1406],_mm256_xor_si256(c2[1646],_mm256_xor_si256(c2[1586],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[1111],_mm256_xor_si256(c2[1051],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[516],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[276],_mm256_xor_si256(c2[284],_mm256_xor_si256(c2[42],_mm256_xor_si256(c2[1904],_mm256_xor_si256(c2[170],_mm256_xor_si256(c2[1670],_mm256_xor_si256(c2[408],_mm256_xor_si256(c2[348],_mm256_xor_si256(c2[1315],_mm256_xor_si256(c2[776],_mm256_xor_si256(c2[296],c2[236])))))))))))))))))))))))))))))))))));
+     d2[93]=simde_mm256_xor_si256(c2[1381],simde_mm256_xor_si256(c2[240],simde_mm256_xor_si256(c2[120],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[1628],simde_mm256_xor_si256(c2[1086],simde_mm256_xor_si256(c2[1026],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[913],simde_mm256_xor_si256(c2[1513],simde_mm256_xor_si256(c2[1453],simde_mm256_xor_si256(c2[679],simde_mm256_xor_si256(c2[798],simde_mm256_xor_si256(c2[1820],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[1406],simde_mm256_xor_si256(c2[1646],simde_mm256_xor_si256(c2[1586],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[1111],simde_mm256_xor_si256(c2[1051],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[516],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[276],simde_mm256_xor_si256(c2[284],simde_mm256_xor_si256(c2[42],simde_mm256_xor_si256(c2[1904],simde_mm256_xor_si256(c2[170],simde_mm256_xor_si256(c2[1670],simde_mm256_xor_si256(c2[408],simde_mm256_xor_si256(c2[348],simde_mm256_xor_si256(c2[1315],simde_mm256_xor_si256(c2[776],simde_mm256_xor_si256(c2[296],c2[236])))))))))))))))))))))))))))))))))));
 
 //row: 32
-     d2[96]=_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[1802],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[661],_mm256_xor_si256(c2[541],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1086],_mm256_xor_si256(c2[1026],_mm256_xor_si256(c2[127],_mm256_xor_si256(c2[1447],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[1334],_mm256_xor_si256(c2[1874],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[1100],_mm256_xor_si256(c2[1279],_mm256_xor_si256(c2[1219],_mm256_xor_si256(c2[319],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[26],_mm256_xor_si256(c2[1824],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[810],_mm256_xor_si256(c2[750],_mm256_xor_si256(c2[1472],_mm256_xor_si256(c2[510],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[937],_mm256_xor_si256(c2[697],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[702],_mm256_xor_si256(c2[403],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[588],_mm256_xor_si256(c2[169],_mm256_xor_si256(c2[769],_mm256_xor_si256(c2[1796],_mm256_xor_si256(c2[1736],_mm256_xor_si256(c2[1254],_mm256_xor_si256(c2[1194],c2[654]))))))))))))))))))))))))))))))))))))))))));
+     d2[96]=simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[1802],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[661],simde_mm256_xor_si256(c2[541],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1086],simde_mm256_xor_si256(c2[1026],simde_mm256_xor_si256(c2[127],simde_mm256_xor_si256(c2[1447],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[1334],simde_mm256_xor_si256(c2[1874],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[1100],simde_mm256_xor_si256(c2[1279],simde_mm256_xor_si256(c2[1219],simde_mm256_xor_si256(c2[319],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[26],simde_mm256_xor_si256(c2[1824],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[810],simde_mm256_xor_si256(c2[750],simde_mm256_xor_si256(c2[1472],simde_mm256_xor_si256(c2[510],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[937],simde_mm256_xor_si256(c2[697],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[702],simde_mm256_xor_si256(c2[403],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[588],simde_mm256_xor_si256(c2[169],simde_mm256_xor_si256(c2[769],simde_mm256_xor_si256(c2[1796],simde_mm256_xor_si256(c2[1736],simde_mm256_xor_si256(c2[1254],simde_mm256_xor_si256(c2[1194],c2[654]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 33
-     d2[99]=_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[1321],_mm256_xor_si256(c2[1201],_mm256_xor_si256(c2[1686],_mm256_xor_si256(c2[787],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[72],_mm256_xor_si256(c2[612],_mm256_xor_si256(c2[314],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[1879],_mm256_xor_si256(c2[979],_mm256_xor_si256(c2[686],_mm256_xor_si256(c2[565],_mm256_xor_si256(c2[745],_mm256_xor_si256(c2[1410],_mm256_xor_si256(c2[210],_mm256_xor_si256(c2[156],_mm256_xor_si256(c2[1597],_mm256_xor_si256(c2[1357],_mm256_xor_si256(c2[1362],_mm256_xor_si256(c2[1063],_mm256_xor_si256(c2[1184],_mm256_xor_si256(c2[1248],_mm256_xor_si256(c2[829],_mm256_xor_si256(c2[1429],_mm256_xor_si256(c2[474],_mm256_xor_si256(c2[1854],c2[1314]))))))))))))))))))))))))))));
+     d2[99]=simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[1321],simde_mm256_xor_si256(c2[1201],simde_mm256_xor_si256(c2[1686],simde_mm256_xor_si256(c2[787],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[72],simde_mm256_xor_si256(c2[612],simde_mm256_xor_si256(c2[314],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[1879],simde_mm256_xor_si256(c2[979],simde_mm256_xor_si256(c2[686],simde_mm256_xor_si256(c2[565],simde_mm256_xor_si256(c2[745],simde_mm256_xor_si256(c2[1410],simde_mm256_xor_si256(c2[210],simde_mm256_xor_si256(c2[156],simde_mm256_xor_si256(c2[1597],simde_mm256_xor_si256(c2[1357],simde_mm256_xor_si256(c2[1362],simde_mm256_xor_si256(c2[1063],simde_mm256_xor_si256(c2[1184],simde_mm256_xor_si256(c2[1248],simde_mm256_xor_si256(c2[829],simde_mm256_xor_si256(c2[1429],simde_mm256_xor_si256(c2[474],simde_mm256_xor_si256(c2[1854],c2[1314]))))))))))))))))))))))))))));
 
 //row: 34
-     d2[102]=_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[1801],_mm256_xor_si256(c2[1502],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[660],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[540],_mm256_xor_si256(c2[241],_mm256_xor_si256(c2[782],_mm256_xor_si256(c2[1088],_mm256_xor_si256(c2[1028],_mm256_xor_si256(c2[726],_mm256_xor_si256(c2[126],_mm256_xor_si256(c2[1746],_mm256_xor_si256(c2[1446],_mm256_xor_si256(c2[1207],_mm256_xor_si256(c2[1147],_mm256_xor_si256(c2[1393],_mm256_xor_si256(c2[1333],_mm256_xor_si256(c2[1034],_mm256_xor_si256(c2[1873],_mm256_xor_si256(c2[1634],_mm256_xor_si256(c2[1574],_mm256_xor_si256(c2[1159],_mm256_xor_si256(c2[1099],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[1278],_mm256_xor_si256(c2[1218],_mm256_xor_si256(c2[919],_mm256_xor_si256(c2[318],_mm256_xor_si256(c2[19],_mm256_xor_si256(c2[85],_mm256_xor_si256(c2[25],_mm256_xor_si256(c2[1645],_mm256_xor_si256(c2[1826],_mm256_xor_si256(c2[1524],_mm256_xor_si256(c2[84],_mm256_xor_si256(c2[1764],_mm256_xor_si256(c2[1704],_mm256_xor_si256(c2[812],_mm256_xor_si256(c2[752],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[1471],_mm256_xor_si256(c2[1232],_mm256_xor_si256(c2[1172],_mm256_xor_si256(c2[1477],_mm256_xor_si256(c2[1417],_mm256_xor_si256(c2[1118],_mm256_xor_si256(c2[996],_mm256_xor_si256(c2[936],_mm256_xor_si256(c2[637],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[457],_mm256_xor_si256(c2[397],_mm256_xor_si256(c2[764],_mm256_xor_si256(c2[704],_mm256_xor_si256(c2[402],_mm256_xor_si256(c2[402],_mm256_xor_si256(c2[163],_mm256_xor_si256(c2[103],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[590],_mm256_xor_si256(c2[288],_mm256_xor_si256(c2[168],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[768],_mm256_xor_si256(c2[529],_mm256_xor_si256(c2[469],_mm256_xor_si256(c2[1795],_mm256_xor_si256(c2[1735],_mm256_xor_si256(c2[1436],_mm256_xor_si256(c2[1256],_mm256_xor_si256(c2[1196],_mm256_xor_si256(c2[894],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[414],c2[354]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[102]=simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[1801],simde_mm256_xor_si256(c2[1502],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[660],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[540],simde_mm256_xor_si256(c2[241],simde_mm256_xor_si256(c2[782],simde_mm256_xor_si256(c2[1088],simde_mm256_xor_si256(c2[1028],simde_mm256_xor_si256(c2[726],simde_mm256_xor_si256(c2[126],simde_mm256_xor_si256(c2[1746],simde_mm256_xor_si256(c2[1446],simde_mm256_xor_si256(c2[1207],simde_mm256_xor_si256(c2[1147],simde_mm256_xor_si256(c2[1393],simde_mm256_xor_si256(c2[1333],simde_mm256_xor_si256(c2[1034],simde_mm256_xor_si256(c2[1873],simde_mm256_xor_si256(c2[1634],simde_mm256_xor_si256(c2[1574],simde_mm256_xor_si256(c2[1159],simde_mm256_xor_si256(c2[1099],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[1278],simde_mm256_xor_si256(c2[1218],simde_mm256_xor_si256(c2[919],simde_mm256_xor_si256(c2[318],simde_mm256_xor_si256(c2[19],simde_mm256_xor_si256(c2[85],simde_mm256_xor_si256(c2[25],simde_mm256_xor_si256(c2[1645],simde_mm256_xor_si256(c2[1826],simde_mm256_xor_si256(c2[1524],simde_mm256_xor_si256(c2[84],simde_mm256_xor_si256(c2[1764],simde_mm256_xor_si256(c2[1704],simde_mm256_xor_si256(c2[812],simde_mm256_xor_si256(c2[752],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[1471],simde_mm256_xor_si256(c2[1232],simde_mm256_xor_si256(c2[1172],simde_mm256_xor_si256(c2[1477],simde_mm256_xor_si256(c2[1417],simde_mm256_xor_si256(c2[1118],simde_mm256_xor_si256(c2[996],simde_mm256_xor_si256(c2[936],simde_mm256_xor_si256(c2[637],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[457],simde_mm256_xor_si256(c2[397],simde_mm256_xor_si256(c2[764],simde_mm256_xor_si256(c2[704],simde_mm256_xor_si256(c2[402],simde_mm256_xor_si256(c2[402],simde_mm256_xor_si256(c2[163],simde_mm256_xor_si256(c2[103],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[590],simde_mm256_xor_si256(c2[288],simde_mm256_xor_si256(c2[168],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[768],simde_mm256_xor_si256(c2[529],simde_mm256_xor_si256(c2[469],simde_mm256_xor_si256(c2[1795],simde_mm256_xor_si256(c2[1735],simde_mm256_xor_si256(c2[1436],simde_mm256_xor_si256(c2[1256],simde_mm256_xor_si256(c2[1196],simde_mm256_xor_si256(c2[894],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[414],c2[354]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 35
-     d2[105]=_mm256_xor_si256(c2[0],_mm256_xor_si256(c2[1862],_mm256_xor_si256(c2[721],_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[1146],_mm256_xor_si256(c2[1086],_mm256_xor_si256(c2[187],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[788],_mm256_xor_si256(c2[1454],_mm256_xor_si256(c2[1394],_mm256_xor_si256(c2[12],_mm256_xor_si256(c2[1220],_mm256_xor_si256(c2[1160],_mm256_xor_si256(c2[1279],_mm256_xor_si256(c2[379],_mm256_xor_si256(c2[86],_mm256_xor_si256(c2[1884],_mm256_xor_si256(c2[145],_mm256_xor_si256(c2[810],_mm256_xor_si256(c2[1532],_mm256_xor_si256(c2[1472],_mm256_xor_si256(c2[1538],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[997],_mm256_xor_si256(c2[757],_mm256_xor_si256(c2[762],_mm256_xor_si256(c2[463],_mm256_xor_si256(c2[648],_mm256_xor_si256(c2[229],_mm256_xor_si256(c2[829],_mm256_xor_si256(c2[1856],_mm256_xor_si256(c2[1796],_mm256_xor_si256(c2[1254],c2[714]))))))))))))))))))))))))))))))))));
+     d2[105]=simde_mm256_xor_si256(c2[0],simde_mm256_xor_si256(c2[1862],simde_mm256_xor_si256(c2[721],simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[1146],simde_mm256_xor_si256(c2[1086],simde_mm256_xor_si256(c2[187],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[788],simde_mm256_xor_si256(c2[1454],simde_mm256_xor_si256(c2[1394],simde_mm256_xor_si256(c2[12],simde_mm256_xor_si256(c2[1220],simde_mm256_xor_si256(c2[1160],simde_mm256_xor_si256(c2[1279],simde_mm256_xor_si256(c2[379],simde_mm256_xor_si256(c2[86],simde_mm256_xor_si256(c2[1884],simde_mm256_xor_si256(c2[145],simde_mm256_xor_si256(c2[810],simde_mm256_xor_si256(c2[1532],simde_mm256_xor_si256(c2[1472],simde_mm256_xor_si256(c2[1538],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[997],simde_mm256_xor_si256(c2[757],simde_mm256_xor_si256(c2[762],simde_mm256_xor_si256(c2[463],simde_mm256_xor_si256(c2[648],simde_mm256_xor_si256(c2[229],simde_mm256_xor_si256(c2[829],simde_mm256_xor_si256(c2[1856],simde_mm256_xor_si256(c2[1796],simde_mm256_xor_si256(c2[1254],c2[714]))))))))))))))))))))))))))))))))));
 
 //row: 36
-     d2[108]=_mm256_xor_si256(c2[1500],_mm256_xor_si256(c2[1393],c2[644]));
+     d2[108]=simde_mm256_xor_si256(c2[1500],simde_mm256_xor_si256(c2[1393],c2[644]));
 
 //row: 37
-     d2[111]=_mm256_xor_si256(c2[1080],_mm256_xor_si256(c2[720],_mm256_xor_si256(c2[1861],_mm256_xor_si256(c2[1501],_mm256_xor_si256(c2[1741],_mm256_xor_si256(c2[1381],_mm256_xor_si256(c2[307],_mm256_xor_si256(c2[1866],_mm256_xor_si256(c2[1327],_mm256_xor_si256(c2[967],_mm256_xor_si256(c2[728],_mm256_xor_si256(c2[428],_mm256_xor_si256(c2[368],_mm256_xor_si256(c2[612],_mm256_xor_si256(c2[252],_mm256_xor_si256(c2[1152],_mm256_xor_si256(c2[852],_mm256_xor_si256(c2[792],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[18],_mm256_xor_si256(c2[500],_mm256_xor_si256(c2[140],_mm256_xor_si256(c2[1519],_mm256_xor_si256(c2[1159],_mm256_xor_si256(c2[1226],_mm256_xor_si256(c2[866],_mm256_xor_si256(c2[1105],_mm256_xor_si256(c2[745],_mm256_xor_si256(c2[1285],_mm256_xor_si256(c2[985],_mm256_xor_si256(c2[925],_mm256_xor_si256(c2[31],_mm256_xor_si256(c2[1590],_mm256_xor_si256(c2[750],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[696],_mm256_xor_si256(c2[336],_mm256_xor_si256(c2[218],_mm256_xor_si256(c2[1777],_mm256_xor_si256(c2[1897],_mm256_xor_si256(c2[1597],_mm256_xor_si256(c2[1537],_mm256_xor_si256(c2[1902],_mm256_xor_si256(c2[1542],_mm256_xor_si256(c2[1603],_mm256_xor_si256(c2[1303],_mm256_xor_si256(c2[1243],_mm256_xor_si256(c2[1788],_mm256_xor_si256(c2[1428],_mm256_xor_si256(c2[1369],_mm256_xor_si256(c2[1009],_mm256_xor_si256(c2[50],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[1609],_mm256_xor_si256(c2[1014],_mm256_xor_si256(c2[654],_mm256_xor_si256(c2[475],_mm256_xor_si256(c2[115],_mm256_xor_si256(c2[1854],_mm256_xor_si256(c2[1554],c2[1494])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[111]=simde_mm256_xor_si256(c2[1080],simde_mm256_xor_si256(c2[720],simde_mm256_xor_si256(c2[1861],simde_mm256_xor_si256(c2[1501],simde_mm256_xor_si256(c2[1741],simde_mm256_xor_si256(c2[1381],simde_mm256_xor_si256(c2[307],simde_mm256_xor_si256(c2[1866],simde_mm256_xor_si256(c2[1327],simde_mm256_xor_si256(c2[967],simde_mm256_xor_si256(c2[728],simde_mm256_xor_si256(c2[428],simde_mm256_xor_si256(c2[368],simde_mm256_xor_si256(c2[612],simde_mm256_xor_si256(c2[252],simde_mm256_xor_si256(c2[1152],simde_mm256_xor_si256(c2[852],simde_mm256_xor_si256(c2[792],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[18],simde_mm256_xor_si256(c2[500],simde_mm256_xor_si256(c2[140],simde_mm256_xor_si256(c2[1519],simde_mm256_xor_si256(c2[1159],simde_mm256_xor_si256(c2[1226],simde_mm256_xor_si256(c2[866],simde_mm256_xor_si256(c2[1105],simde_mm256_xor_si256(c2[745],simde_mm256_xor_si256(c2[1285],simde_mm256_xor_si256(c2[985],simde_mm256_xor_si256(c2[925],simde_mm256_xor_si256(c2[31],simde_mm256_xor_si256(c2[1590],simde_mm256_xor_si256(c2[750],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[696],simde_mm256_xor_si256(c2[336],simde_mm256_xor_si256(c2[218],simde_mm256_xor_si256(c2[1777],simde_mm256_xor_si256(c2[1897],simde_mm256_xor_si256(c2[1597],simde_mm256_xor_si256(c2[1537],simde_mm256_xor_si256(c2[1902],simde_mm256_xor_si256(c2[1542],simde_mm256_xor_si256(c2[1603],simde_mm256_xor_si256(c2[1303],simde_mm256_xor_si256(c2[1243],simde_mm256_xor_si256(c2[1788],simde_mm256_xor_si256(c2[1428],simde_mm256_xor_si256(c2[1369],simde_mm256_xor_si256(c2[1009],simde_mm256_xor_si256(c2[50],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[1609],simde_mm256_xor_si256(c2[1014],simde_mm256_xor_si256(c2[654],simde_mm256_xor_si256(c2[475],simde_mm256_xor_si256(c2[115],simde_mm256_xor_si256(c2[1854],simde_mm256_xor_si256(c2[1554],c2[1494])))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 38
-     d2[114]=_mm256_xor_si256(c2[722],_mm256_xor_si256(c2[662],_mm256_xor_si256(c2[1440],_mm256_xor_si256(c2[1320],_mm256_xor_si256(c2[1868],_mm256_xor_si256(c2[1808],_mm256_xor_si256(c2[906],_mm256_xor_si256(c2[307],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[254],_mm256_xor_si256(c2[194],_mm256_xor_si256(c2[734],_mm256_xor_si256(c2[20],_mm256_xor_si256(c2[1879],_mm256_xor_si256(c2[79],_mm256_xor_si256(c2[1098],_mm256_xor_si256(c2[805],_mm256_xor_si256(c2[684],_mm256_xor_si256(c2[864],_mm256_xor_si256(c2[1532],_mm256_xor_si256(c2[332],_mm256_xor_si256(c2[1411],_mm256_xor_si256(c2[338],_mm256_xor_si256(c2[278],_mm256_xor_si256(c2[1716],_mm256_xor_si256(c2[1476],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[1182],_mm256_xor_si256(c2[1370],_mm256_xor_si256(c2[948],_mm256_xor_si256(c2[1548],_mm256_xor_si256(c2[656],_mm256_xor_si256(c2[596],_mm256_xor_si256(c2[54],c2[1436]))))))))))))))))))))))))))))))))));
+     d2[114]=simde_mm256_xor_si256(c2[722],simde_mm256_xor_si256(c2[662],simde_mm256_xor_si256(c2[1440],simde_mm256_xor_si256(c2[1320],simde_mm256_xor_si256(c2[1868],simde_mm256_xor_si256(c2[1808],simde_mm256_xor_si256(c2[906],simde_mm256_xor_si256(c2[307],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[254],simde_mm256_xor_si256(c2[194],simde_mm256_xor_si256(c2[734],simde_mm256_xor_si256(c2[20],simde_mm256_xor_si256(c2[1879],simde_mm256_xor_si256(c2[79],simde_mm256_xor_si256(c2[1098],simde_mm256_xor_si256(c2[805],simde_mm256_xor_si256(c2[684],simde_mm256_xor_si256(c2[864],simde_mm256_xor_si256(c2[1532],simde_mm256_xor_si256(c2[332],simde_mm256_xor_si256(c2[1411],simde_mm256_xor_si256(c2[338],simde_mm256_xor_si256(c2[278],simde_mm256_xor_si256(c2[1716],simde_mm256_xor_si256(c2[1476],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[1182],simde_mm256_xor_si256(c2[1370],simde_mm256_xor_si256(c2[948],simde_mm256_xor_si256(c2[1548],simde_mm256_xor_si256(c2[656],simde_mm256_xor_si256(c2[596],simde_mm256_xor_si256(c2[54],c2[1436]))))))))))))))))))))))))))))))))));
 
 //row: 39
-     d2[117]=_mm256_xor_si256(c2[421],_mm256_xor_si256(c2[361],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1142],_mm256_xor_si256(c2[1022],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[1567],_mm256_xor_si256(c2[1507],_mm256_xor_si256(c2[608],_mm256_xor_si256(c2[6],_mm256_xor_si256(c2[1872],_mm256_xor_si256(c2[1812],_mm256_xor_si256(c2[433],_mm256_xor_si256(c2[1638],_mm256_xor_si256(c2[1578],_mm256_xor_si256(c2[1760],_mm256_xor_si256(c2[1700],_mm256_xor_si256(c2[800],_mm256_xor_si256(c2[564],_mm256_xor_si256(c2[504],_mm256_xor_si256(c2[386],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[1291],_mm256_xor_si256(c2[1231],_mm256_xor_si256(c2[31],_mm256_xor_si256(c2[37],_mm256_xor_si256(c2[1896],_mm256_xor_si256(c2[1478],_mm256_xor_si256(c2[1418],_mm256_xor_si256(c2[1178],_mm256_xor_si256(c2[1243],_mm256_xor_si256(c2[1183],_mm256_xor_si256(c2[884],_mm256_xor_si256(c2[283],_mm256_xor_si256(c2[1129],_mm256_xor_si256(c2[1069],_mm256_xor_si256(c2[650],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[355],_mm256_xor_si256(c2[295],_mm256_xor_si256(c2[1735],_mm256_xor_si256(c2[1675],c2[1135]))))))))))))))))))))))))))))))))))))))))));
+     d2[117]=simde_mm256_xor_si256(c2[421],simde_mm256_xor_si256(c2[361],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1142],simde_mm256_xor_si256(c2[1022],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[1567],simde_mm256_xor_si256(c2[1507],simde_mm256_xor_si256(c2[608],simde_mm256_xor_si256(c2[6],simde_mm256_xor_si256(c2[1872],simde_mm256_xor_si256(c2[1812],simde_mm256_xor_si256(c2[433],simde_mm256_xor_si256(c2[1638],simde_mm256_xor_si256(c2[1578],simde_mm256_xor_si256(c2[1760],simde_mm256_xor_si256(c2[1700],simde_mm256_xor_si256(c2[800],simde_mm256_xor_si256(c2[564],simde_mm256_xor_si256(c2[504],simde_mm256_xor_si256(c2[386],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[1291],simde_mm256_xor_si256(c2[1231],simde_mm256_xor_si256(c2[31],simde_mm256_xor_si256(c2[37],simde_mm256_xor_si256(c2[1896],simde_mm256_xor_si256(c2[1478],simde_mm256_xor_si256(c2[1418],simde_mm256_xor_si256(c2[1178],simde_mm256_xor_si256(c2[1243],simde_mm256_xor_si256(c2[1183],simde_mm256_xor_si256(c2[884],simde_mm256_xor_si256(c2[283],simde_mm256_xor_si256(c2[1129],simde_mm256_xor_si256(c2[1069],simde_mm256_xor_si256(c2[650],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[355],simde_mm256_xor_si256(c2[295],simde_mm256_xor_si256(c2[1735],simde_mm256_xor_si256(c2[1675],c2[1135]))))))))))))))))))))))))))))))))))))))))));
 
 //row: 40
-     d2[120]=_mm256_xor_si256(c2[1442],_mm256_xor_si256(c2[961],_mm256_xor_si256(c2[301],_mm256_xor_si256(c2[1742],_mm256_xor_si256(c2[181],_mm256_xor_si256(c2[1622],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[188],_mm256_xor_si256(c2[1686],_mm256_xor_si256(c2[1208],_mm256_xor_si256(c2[1087],_mm256_xor_si256(c2[666],_mm256_xor_si256(c2[606],_mm256_xor_si256(c2[974],_mm256_xor_si256(c2[493],_mm256_xor_si256(c2[1514],_mm256_xor_si256(c2[1093],_mm256_xor_si256(c2[1033],_mm256_xor_si256(c2[432],_mm256_xor_si256(c2[740],_mm256_xor_si256(c2[259],_mm256_xor_si256(c2[859],_mm256_xor_si256(c2[378],_mm256_xor_si256(c2[1878],_mm256_xor_si256(c2[1400],_mm256_xor_si256(c2[1585],_mm256_xor_si256(c2[1104],_mm256_xor_si256(c2[1464],_mm256_xor_si256(c2[986],_mm256_xor_si256(c2[1644],_mm256_xor_si256(c2[1226],_mm256_xor_si256(c2[1166],_mm256_xor_si256(c2[390],_mm256_xor_si256(c2[1831],_mm256_xor_si256(c2[1112],_mm256_xor_si256(c2[691],_mm256_xor_si256(c2[631],_mm256_xor_si256(c2[1058],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[577],_mm256_xor_si256(c2[96],_mm256_xor_si256(c2[337],_mm256_xor_si256(c2[1838],_mm256_xor_si256(c2[1778],_mm256_xor_si256(c2[342],_mm256_xor_si256(c2[1783],_mm256_xor_si256(c2[43],_mm256_xor_si256(c2[1544],_mm256_xor_si256(c2[1484],_mm256_xor_si256(c2[228],_mm256_xor_si256(c2[1669],_mm256_xor_si256(c2[1728],_mm256_xor_si256(c2[1250],_mm256_xor_si256(c2[409],_mm256_xor_si256(c2[1910],_mm256_xor_si256(c2[1850],_mm256_xor_si256(c2[1376],_mm256_xor_si256(c2[895],_mm256_xor_si256(c2[834],_mm256_xor_si256(c2[356],_mm256_xor_si256(c2[294],_mm256_xor_si256(c2[1795],c2[1735]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+     d2[120]=simde_mm256_xor_si256(c2[1442],simde_mm256_xor_si256(c2[961],simde_mm256_xor_si256(c2[301],simde_mm256_xor_si256(c2[1742],simde_mm256_xor_si256(c2[181],simde_mm256_xor_si256(c2[1622],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[188],simde_mm256_xor_si256(c2[1686],simde_mm256_xor_si256(c2[1208],simde_mm256_xor_si256(c2[1087],simde_mm256_xor_si256(c2[666],simde_mm256_xor_si256(c2[606],simde_mm256_xor_si256(c2[974],simde_mm256_xor_si256(c2[493],simde_mm256_xor_si256(c2[1514],simde_mm256_xor_si256(c2[1093],simde_mm256_xor_si256(c2[1033],simde_mm256_xor_si256(c2[432],simde_mm256_xor_si256(c2[740],simde_mm256_xor_si256(c2[259],simde_mm256_xor_si256(c2[859],simde_mm256_xor_si256(c2[378],simde_mm256_xor_si256(c2[1878],simde_mm256_xor_si256(c2[1400],simde_mm256_xor_si256(c2[1585],simde_mm256_xor_si256(c2[1104],simde_mm256_xor_si256(c2[1464],simde_mm256_xor_si256(c2[986],simde_mm256_xor_si256(c2[1644],simde_mm256_xor_si256(c2[1226],simde_mm256_xor_si256(c2[1166],simde_mm256_xor_si256(c2[390],simde_mm256_xor_si256(c2[1831],simde_mm256_xor_si256(c2[1112],simde_mm256_xor_si256(c2[691],simde_mm256_xor_si256(c2[631],simde_mm256_xor_si256(c2[1058],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[577],simde_mm256_xor_si256(c2[96],simde_mm256_xor_si256(c2[337],simde_mm256_xor_si256(c2[1838],simde_mm256_xor_si256(c2[1778],simde_mm256_xor_si256(c2[342],simde_mm256_xor_si256(c2[1783],simde_mm256_xor_si256(c2[43],simde_mm256_xor_si256(c2[1544],simde_mm256_xor_si256(c2[1484],simde_mm256_xor_si256(c2[228],simde_mm256_xor_si256(c2[1669],simde_mm256_xor_si256(c2[1728],simde_mm256_xor_si256(c2[1250],simde_mm256_xor_si256(c2[409],simde_mm256_xor_si256(c2[1910],simde_mm256_xor_si256(c2[1850],simde_mm256_xor_si256(c2[1376],simde_mm256_xor_si256(c2[895],simde_mm256_xor_si256(c2[834],simde_mm256_xor_si256(c2[356],simde_mm256_xor_si256(c2[294],simde_mm256_xor_si256(c2[1795],c2[1735]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
 
 //row: 41
-     d2[123]=_mm256_xor_si256(c2[601],_mm256_xor_si256(c2[541],_mm256_xor_si256(c2[1322],_mm256_xor_si256(c2[1202],_mm256_xor_si256(c2[1747],_mm256_xor_si256(c2[1687],_mm256_xor_si256(c2[788],_mm256_xor_si256(c2[186],_mm256_xor_si256(c2[1147],_mm256_xor_si256(c2[133],_mm256_xor_si256(c2[73],_mm256_xor_si256(c2[613],_mm256_xor_si256(c2[1818],_mm256_xor_si256(c2[1758],_mm256_xor_si256(c2[1880],_mm256_xor_si256(c2[980],_mm256_xor_si256(c2[684],_mm256_xor_si256(c2[566],_mm256_xor_si256(c2[746],_mm256_xor_si256(c2[1411],_mm256_xor_si256(c2[211],_mm256_xor_si256(c2[450],_mm256_xor_si256(c2[217],_mm256_xor_si256(c2[157],_mm256_xor_si256(c2[1598],_mm256_xor_si256(c2[1358],_mm256_xor_si256(c2[1363],_mm256_xor_si256(c2[1064],_mm256_xor_si256(c2[1249],_mm256_xor_si256(c2[830],_mm256_xor_si256(c2[1430],_mm256_xor_si256(c2[535],_mm256_xor_si256(c2[475],_mm256_xor_si256(c2[1855],c2[1315]))))))))))))))))))))))))))))))))));
+     d2[123]=simde_mm256_xor_si256(c2[601],simde_mm256_xor_si256(c2[541],simde_mm256_xor_si256(c2[1322],simde_mm256_xor_si256(c2[1202],simde_mm256_xor_si256(c2[1747],simde_mm256_xor_si256(c2[1687],simde_mm256_xor_si256(c2[788],simde_mm256_xor_si256(c2[186],simde_mm256_xor_si256(c2[1147],simde_mm256_xor_si256(c2[133],simde_mm256_xor_si256(c2[73],simde_mm256_xor_si256(c2[613],simde_mm256_xor_si256(c2[1818],simde_mm256_xor_si256(c2[1758],simde_mm256_xor_si256(c2[1880],simde_mm256_xor_si256(c2[980],simde_mm256_xor_si256(c2[684],simde_mm256_xor_si256(c2[566],simde_mm256_xor_si256(c2[746],simde_mm256_xor_si256(c2[1411],simde_mm256_xor_si256(c2[211],simde_mm256_xor_si256(c2[450],simde_mm256_xor_si256(c2[217],simde_mm256_xor_si256(c2[157],simde_mm256_xor_si256(c2[1598],simde_mm256_xor_si256(c2[1358],simde_mm256_xor_si256(c2[1363],simde_mm256_xor_si256(c2[1064],simde_mm256_xor_si256(c2[1249],simde_mm256_xor_si256(c2[830],simde_mm256_xor_si256(c2[1430],simde_mm256_xor_si256(c2[535],simde_mm256_xor_si256(c2[475],simde_mm256_xor_si256(c2[1855],c2[1315]))))))))))))))))))))))))))))))))));
   }
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c
index d5669fbbabe0fd41effaad17138cdca12dda1b03..90475831ff490280f40a5e3456d9827fed3bd04f 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder.c
@@ -140,7 +140,7 @@ int ldpc_encoder_orig(unsigned char *test_input,unsigned char *channel_input,int
       shift=5; // AVX2 - 256-bit SIMD
       mask=31;
       strcpy(data_type,"__m256i");
-      strcpy(xor_command,"_mm256_xor_si256");
+      strcpy(xor_command,"simde_mm256_xor_si256");
     }
     else if ((Zc&15)==0) {
       shift=4; // SSE4 - 128-bit SIMD
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder2.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder2.c
index 73c165a2c80cad74c3e90e2b4fc0c4435140733b..28fcc7f04fca57d785e32ac618ec7ffeb4ff5263 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder2.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder2.c
@@ -38,6 +38,7 @@
 #include "common/utils/LOG/log.h"
 #include "time_meas.h"
 #include "defs.h"
+#include "PHY/sse_intrin.h"
 
 #include "ldpc384_byte.c"
 #include "ldpc352_byte.c"
@@ -303,21 +304,19 @@ int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_i
   char temp;
   int simd_size;
 
-#ifdef __AVX2__
-  __m256i shufmask = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
-  __m256i andmask  = _mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
-  __m256i zero256   = _mm256_setzero_si256();
+  __m256i shufmask = simde_mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
+  __m256i andmask  = simde_mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
+  __m256i zero256   = simde_mm256_setzero_si256();
   __m256i masks[8];
   register __m256i c256;
-  masks[0] = _mm256_set1_epi8(0x1);
-  masks[1] = _mm256_set1_epi8(0x2);
-  masks[2] = _mm256_set1_epi8(0x4);
-  masks[3] = _mm256_set1_epi8(0x8);
-  masks[4] = _mm256_set1_epi8(0x10);
-  masks[5] = _mm256_set1_epi8(0x20);
-  masks[6] = _mm256_set1_epi8(0x40);
-  masks[7] = _mm256_set1_epi8(0x80);
-#endif
+  masks[0] = simde_mm256_set1_epi8(0x1);
+  masks[1] = simde_mm256_set1_epi8(0x2);
+  masks[2] = simde_mm256_set1_epi8(0x4);
+  masks[3] = simde_mm256_set1_epi8(0x8);
+  masks[4] = simde_mm256_set1_epi8(0x10);
+  masks[5] = simde_mm256_set1_epi8(0x20);
+  masks[6] = simde_mm256_set1_epi8(0x40);
+  masks[7] = simde_mm256_set1_epi8(0x80);
 
   AssertFatal(n_segments>0&&n_segments<=8,"0 < n_segments %d <= 8\n",n_segments);
 
@@ -374,11 +373,10 @@ int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_i
     }
   }
 #else
-#ifdef __AVX2__
   for (i=0; i<block_length>>5; i++) {
-    c256 = _mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[0])[i]), shufmask),andmask),zero256),masks[0]);
+    c256 = simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)test_input[0])[i]), shufmask),andmask),zero256),masks[0]);
     for (j=1; j<n_segments; j++) {
-      c256 = _mm256_or_si256(_mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[j])[i]), shufmask),andmask),zero256),masks[j]),c256);
+      c256 = simde_mm256_or_si256(simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)test_input[j])[i]), shufmask),andmask),zero256),masks[j]),c256);
     }
     ((__m256i *)c)[i] = c256;
   }
@@ -391,9 +389,6 @@ int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_i
       c[i] |= (temp << j);
     }
   }
-#else
-  AssertFatal(1==0,"Need AVX2 for this\n");
-#endif
 #endif
 
   if(tinput != NULL) stop_meas(tinput);
@@ -433,7 +428,6 @@ int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_i
   memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char));
   memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char));
   */
-#ifdef __AVX2__
   if ((((2*Zc)&31) == 0) && (((block_length-(2*Zc))&31) == 0)) {
     //AssertFatal(((2*Zc)&31) == 0,"2*Zc needs to be a multiple of 32 for now\n");
     //AssertFatal(((block_length-(2*Zc))&31) == 0,"block_length-(2*Zc) needs to be a multiple of 32 for now\n");
@@ -444,12 +438,12 @@ int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_i
     //  if (((block_length-(2*Zc))&31)>0) l1++;
     
     for (i=0;i<l1;i++)
-      for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(c256p[i],j),masks[0]);
+      for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(c256p[i],j),masks[0]);
     
     //  if ((((nrows-no_punctured_columns) * Zc-removed_bit)&31)>0) l2++;
     
     for (i1=0;i1<l2;i1++,i++)
-      for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(d256p[i1],j),masks[0]);
+      for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(d256p[i1],j),masks[0]);
   }
   else {
 #ifdef DEBUG_LDPC
@@ -464,10 +458,6 @@ int ldpc_encoder_optim_8seg(unsigned char **test_input,unsigned char **channel_i
 	channel_input[j][block_length-2*Zc+i] = (d[i]>>j)&1;
     }
 
-#else
-    AssertFatal(1==0,"Need AVX2 for now\n");
-#endif
-
   if(toutput != NULL) stop_meas(toutput);
   return 0;
 }
@@ -490,23 +480,19 @@ int ldpc_encoder_optim_8seg_multi(unsigned char **test_input,unsigned char **cha
   //printf("macro_segment: %d\n", macro_segment);
   //printf("macro_segment_end: %d\n", macro_segment_end );
 
-#ifdef __AVX2__
-  __m256i shufmask = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
-  __m256i andmask  = _mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
-  __m256i zero256   = _mm256_setzero_si256();
+  __m256i shufmask = simde_mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
+  __m256i andmask  = simde_mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
+  __m256i zero256   = simde_mm256_setzero_si256();
   __m256i masks[8];
   register __m256i c256;
-  masks[0] = _mm256_set1_epi8(0x1);
-  masks[1] = _mm256_set1_epi8(0x2);
-  masks[2] = _mm256_set1_epi8(0x4);
-  masks[3] = _mm256_set1_epi8(0x8);
-  masks[4] = _mm256_set1_epi8(0x10);
-  masks[5] = _mm256_set1_epi8(0x20);
-  masks[6] = _mm256_set1_epi8(0x40);
-  masks[7] = _mm256_set1_epi8(0x80);
-#endif
-
-
+  masks[0] = simde_mm256_set1_epi8(0x1);
+  masks[1] = simde_mm256_set1_epi8(0x2);
+  masks[2] = simde_mm256_set1_epi8(0x4);
+  masks[3] = simde_mm256_set1_epi8(0x8);
+  masks[4] = simde_mm256_set1_epi8(0x10);
+  masks[5] = simde_mm256_set1_epi8(0x20);
+  masks[6] = simde_mm256_set1_epi8(0x40);
+  masks[7] = simde_mm256_set1_epi8(0x80);
 
   //determine number of bits in codeword
   if (BG==1)
@@ -558,12 +544,11 @@ int ldpc_encoder_optim_8seg_multi(unsigned char **test_input,unsigned char **cha
     }
   }
 #else
-#ifdef __AVX2__
   for (i=0; i<block_length>>5; i++) {
-    c256 = _mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[macro_segment])[i]), shufmask),andmask),zero256),masks[0]);
+    c256 = simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)test_input[macro_segment])[i]), shufmask),andmask),zero256),masks[0]);
     //for (j=1; j<n_segments; j++) {
     for (j=macro_segment+1; j < macro_segment_end; j++) {
-      c256 = _mm256_or_si256(_mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[j])[i]), shufmask),andmask),zero256),masks[j-macro_segment]),c256);
+      c256 = simde_mm256_or_si256(simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)test_input[j])[i]), shufmask),andmask),zero256),masks[j-macro_segment]),c256);
     }
     ((__m256i *)c)[i] = c256;
   }
@@ -577,9 +562,6 @@ int ldpc_encoder_optim_8seg_multi(unsigned char **test_input,unsigned char **cha
       c[i] |= (temp << (j-macro_segment));
     }
   }
-#else
-  AssertFatal(1==0,"Need AVX2 for this\n");
-#endif
 #endif
 
   if(tinput != NULL) stop_meas(tinput);
@@ -619,7 +601,6 @@ int ldpc_encoder_optim_8seg_multi(unsigned char **test_input,unsigned char **cha
   memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char));
   memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char));
   */
-#ifdef __AVX2__
   if ((((2*Zc)&31) == 0) && (((block_length-(2*Zc))&31) == 0)) {
     //AssertFatal(((2*Zc)&31) == 0,"2*Zc needs to be a multiple of 32 for now\n");
     //AssertFatal(((block_length-(2*Zc))&31) == 0,"block_length-(2*Zc) needs to be a multiple of 32 for now\n");
@@ -630,15 +611,15 @@ int ldpc_encoder_optim_8seg_multi(unsigned char **test_input,unsigned char **cha
     //  if (((block_length-(2*Zc))&31)>0) l1++;
 
     for (i=0;i<l1;i++)
-      //for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(c256p[i],j),masks[0]);
-    	for (j=macro_segment; j < macro_segment_end; j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(c256p[i],j-macro_segment),masks[0]);
+      //for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(c256p[i],j),masks[0]);
+    	for (j=macro_segment; j < macro_segment_end; j++) ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(c256p[i],j-macro_segment),masks[0]);
 
 
     //  if ((((nrows-no_punctured_columns) * Zc-removed_bit)&31)>0) l2++;
 
     for (i1=0;i1<l2;i1++,i++)
-      //for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(d256p[i1],j),masks[0]);
-    	for (j=macro_segment; j < macro_segment_end; j++)  ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(d256p[i1],j-macro_segment),masks[0]);
+      //for (j=0;j<n_segments;j++) ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(d256p[i1],j),masks[0]);
+    	for (j=macro_segment; j < macro_segment_end; j++)  ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(d256p[i1],j-macro_segment),masks[0]);
   }
   else {
 #ifdef DEBUG_LDPC
@@ -655,10 +636,6 @@ int ldpc_encoder_optim_8seg_multi(unsigned char **test_input,unsigned char **cha
 	channel_input[j][block_length-2*Zc+i] = (d[i]>>(j-macro_segment))&1;
     }
 
-#else
-    AssertFatal(1==0,"Need AVX2 for now\n");
-#endif
-
   if(toutput != NULL) stop_meas(toutput);
   return 0;
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c
index 958ef5aca7eabe81b920e5e10aa4080560e2acc8..ad71674349c1adabd4c6b48a3d4886aa85ba1621 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8seg.c
@@ -39,7 +39,7 @@
 #include "openair1/PHY/CODING/nrLDPC_defs.h"
 #include "ldpc_encode_parity_check.c" 
 #include "ldpc_generate_coefficient.c"
-
+#include "PHY/sse_intrin.h"
 
 
 int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc,int Kb,short block_length, short BG, encoder_implemparams_t *impp)
@@ -51,21 +51,19 @@ int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc
   char temp;
   int simd_size;
 
-#ifdef __AVX2__
-  __m256i shufmask = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
-  __m256i andmask  = _mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
-  __m256i zero256   = _mm256_setzero_si256();
+  __m256i shufmask = simde_mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
+  __m256i andmask  = simde_mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
+  __m256i zero256   = simde_mm256_setzero_si256();
   __m256i masks[8];
   register __m256i c256;
-  masks[0] = _mm256_set1_epi8(0x1);
-  masks[1] = _mm256_set1_epi8(0x2);
-  masks[2] = _mm256_set1_epi8(0x4);
-  masks[3] = _mm256_set1_epi8(0x8);
-  masks[4] = _mm256_set1_epi8(0x10);
-  masks[5] = _mm256_set1_epi8(0x20);
-  masks[6] = _mm256_set1_epi8(0x40);
-  masks[7] = _mm256_set1_epi8(0x80);
-#endif
+  masks[0] = simde_mm256_set1_epi8(0x1);
+  masks[1] = simde_mm256_set1_epi8(0x2);
+  masks[2] = simde_mm256_set1_epi8(0x4);
+  masks[3] = simde_mm256_set1_epi8(0x8);
+  masks[4] = simde_mm256_set1_epi8(0x10);
+  masks[5] = simde_mm256_set1_epi8(0x20);
+  masks[6] = simde_mm256_set1_epi8(0x40);
+  masks[7] = simde_mm256_set1_epi8(0x80);
 
   AssertFatal((impp->n_segments>0&&impp->n_segments<=8),"0 < n_segments %d <= 8\n",impp->n_segments);
 
@@ -121,11 +119,10 @@ int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc
     }
   }
 #else
-#ifdef __AVX2__
   for (i=0; i<block_length>>5; i++) {
-    c256 = _mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[0])[i]), shufmask),andmask),zero256),masks[0]);
+    c256 = simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)test_input[0])[i]), shufmask),andmask),zero256),masks[0]);
     for (j=1; j<impp->n_segments; j++) {
-      c256 = _mm256_or_si256(_mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)test_input[j])[i]), shufmask),andmask),zero256),masks[j]),c256);
+      c256 = simde_mm256_or_si256(simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)test_input[j])[i]), shufmask),andmask),zero256),masks[j]),c256);
     }
     ((__m256i *)c)[i] = c256;
   }
@@ -138,9 +135,6 @@ int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc
       c[i] |= (temp << j);
     }
   }
-#else
-  AssertFatal(1==0,"Need AVX2 for this\n");
-#endif
 #endif
 
   if(impp->tinput != NULL) stop_meas(impp->tinput);
@@ -166,7 +160,6 @@ int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc
   memcpy(&channel_input[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char));
   memcpy(&channel_input[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char));
   */
-#ifdef __AVX2__
   if ((((2*Zc)&31) == 0) && (((block_length-(2*Zc))&31) == 0)) {
     //AssertFatal(((2*Zc)&31) == 0,"2*Zc needs to be a multiple of 32 for now\n");
     //AssertFatal(((block_length-(2*Zc))&31) == 0,"block_length-(2*Zc) needs to be a multiple of 32 for now\n");
@@ -177,12 +170,12 @@ int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc
     //  if (((block_length-(2*Zc))&31)>0) l1++;
     
     for (i=0;i<l1;i++)
-      for (j=0;j<impp->n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(c256p[i],j),masks[0]);
+      for (j=0;j<impp->n_segments;j++) ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(c256p[i],j),masks[0]);
     
     //  if ((((nrows-no_punctured_columns) * Zc-removed_bit)&31)>0) l2++;
     
     for (i1=0;i1<l2;i1++,i++)
-      for (j=0;j<impp->n_segments;j++) ((__m256i *)channel_input[j])[i] = _mm256_and_si256(_mm256_srai_epi16(d256p[i1],j),masks[0]);
+      for (j=0;j<impp->n_segments;j++) ((__m256i *)channel_input[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(d256p[i1],j),masks[0]);
   }
   else {
 #ifdef DEBUG_LDPC
@@ -197,10 +190,6 @@ int nrLDPC_encod(unsigned char **test_input,unsigned char **channel_input,int Zc
 	channel_input[j][block_length-2*Zc+i] = (d[i]>>j)&1;
     }
 
-#else
-    AssertFatal(1==0,"Need AVX2 for now\n");
-#endif
-
   if(impp->toutput != NULL) stop_meas(impp->toutput);
   return 0;
 }
diff --git a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
index 9ef8cbfe658dc07234fd86f4e095cc9fc4ca6ba9..5df688bc5853021b7cae7176fa62bf09f5663146 100644
--- a/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
+++ b/openair1/PHY/CODING/nrLDPC_encoder/ldpc_encoder_optim8segmulti.c
@@ -37,6 +37,7 @@
 #include "common/utils/LOG/log.h"
 #include "time_meas.h"
 #include "openair1/PHY/CODING/nrLDPC_defs.h"
+#include "PHY/sse_intrin.h"
 
 #include "ldpc_encode_parity_check.c"
 #include "ldpc_generate_coefficient.c"
@@ -61,21 +62,19 @@ int nrLDPC_encod(unsigned char **input,unsigned char **output,int Zc,int Kb,shor
   ///printf("macro_segment: %d\n", macro_segment);
   ///printf("macro_segment_end: %d\n", macro_segment_end );
 
-#ifdef __AVX2__
-  __m256i shufmask = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
-  __m256i andmask  = _mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
-  __m256i zero256   = _mm256_setzero_si256();
+  __m256i shufmask = simde_mm256_set_epi64x(0x0303030303030303, 0x0202020202020202,0x0101010101010101, 0x0000000000000000);
+  __m256i andmask  = simde_mm256_set1_epi64x(0x0102040810204080);  // every 8 bits -> 8 bytes, pattern repeats.
+  __m256i zero256   = simde_mm256_setzero_si256();
   __m256i masks[8];
   register __m256i c256;
-  masks[0] = _mm256_set1_epi8(0x1);
-  masks[1] = _mm256_set1_epi8(0x2);
-  masks[2] = _mm256_set1_epi8(0x4);
-  masks[3] = _mm256_set1_epi8(0x8);
-  masks[4] = _mm256_set1_epi8(0x10);
-  masks[5] = _mm256_set1_epi8(0x20);
-  masks[6] = _mm256_set1_epi8(0x40);
-  masks[7] = _mm256_set1_epi8(0x80);
-#endif
+  masks[0] = simde_mm256_set1_epi8(0x1);
+  masks[1] = simde_mm256_set1_epi8(0x2);
+  masks[2] = simde_mm256_set1_epi8(0x4);
+  masks[3] = simde_mm256_set1_epi8(0x8);
+  masks[4] = simde_mm256_set1_epi8(0x10);
+  masks[5] = simde_mm256_set1_epi8(0x20);
+  masks[6] = simde_mm256_set1_epi8(0x40);
+  masks[7] = simde_mm256_set1_epi8(0x80);
 
 
 
@@ -127,12 +126,11 @@ int nrLDPC_encod(unsigned char **input,unsigned char **output,int Zc,int Kb,shor
     }
   }
 #else
-#ifdef __AVX2__
   for (int i=0; i<block_length>>5; i++) {
-    c256 = _mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)input[macro_segment])[i]), shufmask),andmask),zero256),masks[0]);
+    c256 = simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)input[macro_segment])[i]), shufmask),andmask),zero256),masks[0]);
     //for (j=1; j<n_segments; j++) {
     for (int j=macro_segment+1; j < macro_segment_end; j++) {    
-      c256 = _mm256_or_si256(_mm256_and_si256(_mm256_cmpeq_epi8(_mm256_andnot_si256(_mm256_shuffle_epi8(_mm256_set1_epi32(((uint32_t*)input[j])[i]), shufmask),andmask),zero256),masks[j-macro_segment]),c256);
+      c256 = simde_mm256_or_si256(simde_mm256_and_si256(simde_mm256_cmpeq_epi8(simde_mm256_andnot_si256(simde_mm256_shuffle_epi8(simde_mm256_set1_epi32(((uint32_t*)input[j])[i]), shufmask),andmask),zero256),masks[j-macro_segment]),c256);
     }
     ((__m256i *)cc)[i] = c256;
   }
@@ -146,9 +144,6 @@ int nrLDPC_encod(unsigned char **input,unsigned char **output,int Zc,int Kb,shor
       cc[i] |= (temp << (j-macro_segment));
     }
   }
-#else
-  AssertFatal(1==0,"Need AVX2 for this\n");
-#endif
 #endif
 
   if(impp->tinput != NULL) stop_meas(impp->tinput);
@@ -174,7 +169,6 @@ int nrLDPC_encod(unsigned char **input,unsigned char **output,int Zc,int Kb,shor
   memcpy(&output[0], &c[2*Zc], (block_length-2*Zc)*sizeof(unsigned char));
   memcpy(&output[block_length-2*Zc], &d[0], ((nrows-no_punctured_columns) * Zc-removed_bit)*sizeof(unsigned char));
   */
-#ifdef __AVX2__
   if ((((2*Zc)&31) == 0) && (((block_length-(2*Zc))&31) == 0)) {
     //AssertFatal(((2*Zc)&31) == 0,"2*Zc needs to be a multiple of 32 for now\n");
     //AssertFatal(((block_length-(2*Zc))&31) == 0,"block_length-(2*Zc) needs to be a multiple of 32 for now\n");
@@ -185,15 +179,15 @@ int nrLDPC_encod(unsigned char **input,unsigned char **output,int Zc,int Kb,shor
     //  if (((block_length-(2*Zc))&31)>0) l1++;
 
     for (int i=0;i<l1;i++)
-      //for (j=0;j<n_segments;j++) ((__m256i *)output[j])[i] = _mm256_and_si256(_mm256_srai_epi16(c256p[i],j),masks[0]);
-    	for (int j=macro_segment; j < macro_segment_end; j++) ((__m256i *)output[j])[i] = _mm256_and_si256(_mm256_srai_epi16(c256p[i],j-macro_segment),masks[0]);
+      //for (j=0;j<n_segments;j++) ((__m256i *)output[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(c256p[i],j),masks[0]);
+    	for (int j=macro_segment; j < macro_segment_end; j++) ((__m256i *)output[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(c256p[i],j-macro_segment),masks[0]);
 
 
     //  if ((((nrows-no_punctured_columns) * Zc-removed_bit)&31)>0) l2++;
 
     for (int i1=0, i=l1;i1<l2;i1++,i++)
-      //for (j=0;j<n_segments;j++) ((__m256i *)output[j])[i] = _mm256_and_si256(_mm256_srai_epi16(d256p[i1],j),masks[0]);
-    	for (int j=macro_segment; j < macro_segment_end; j++)  ((__m256i *)output[j])[i] = _mm256_and_si256(_mm256_srai_epi16(d256p[i1],j-macro_segment),masks[0]);
+      //for (j=0;j<n_segments;j++) ((__m256i *)output[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(d256p[i1],j),masks[0]);
+    	for (int j=macro_segment; j < macro_segment_end; j++)  ((__m256i *)output[j])[i] = simde_mm256_and_si256(simde_mm256_srai_epi16(d256p[i1],j-macro_segment),masks[0]);
   }
   else {
 #ifdef DEBUG_LDPC
@@ -210,10 +204,6 @@ int nrLDPC_encod(unsigned char **input,unsigned char **output,int Zc,int Kb,shor
 	output[j][block_length-2*Zc+i] = (dd[i]>>(j-macro_segment))&1;
     }
 
-#else
-    AssertFatal(1==0,"Need AVX2 for now\n");
-#endif
-
   if(impp->toutput != NULL) stop_meas(impp->toutput);
   return 0;
 }
diff --git a/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c b/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c
index b201db2ba62211209075bcccae40e0e166454659..662111fcb608fe8620b2ec96bb2f13ff2d52f1ff 100644
--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_decoding_tools.c
@@ -36,7 +36,6 @@
 
 //#define DEBUG_NEW_IMPL 1
 
-
 static inline void updateBit(uint8_t listSize,
 			     uint16_t row,
 			     uint16_t col,
@@ -262,7 +261,6 @@ void applyFtoleft(const t_nrPolar_params *pp, decoder_node_t *node) {
  
 
   if (node->left->all_frozen == 0) {
-#if defined(__AVX2__)
     int avx2mod = (node->Nv/2)&15;
     if (avx2mod == 0) {
       __m256i a256,b256,absa256,absb256,minabs256;
@@ -272,10 +270,10 @@ void applyFtoleft(const t_nrPolar_params *pp, decoder_node_t *node) {
       for (int i=0;i<avx2len;i++) {
 	a256       =((__m256i*)alpha_v)[i];
 	b256       =((__m256i*)alpha_v)[i+avx2len];
-	absa256    =_mm256_abs_epi16(a256);
-	absb256    =_mm256_abs_epi16(b256);
-	minabs256  =_mm256_min_epi16(absa256,absb256);
-	((__m256i*)alpha_l)[i] =_mm256_sign_epi16(minabs256,_mm256_sign_epi16(a256,b256));
+	absa256    =simde_mm256_abs_epi16(a256);
+	absb256    =simde_mm256_abs_epi16(b256);
+	minabs256  =simde_mm256_min_epi16(absa256,absb256);
+	((__m256i*)alpha_l)[i] =simde_mm256_sign_epi16(minabs256,simde_mm256_sign_epi16(a256,b256));
       }
     }
     else if (avx2mod == 8) {
@@ -297,52 +295,6 @@ void applyFtoleft(const t_nrPolar_params *pp, decoder_node_t *node) {
       *((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_sign_pi16(a64,b64));
     }
     else
-#else
-    int sse4mod = (node->Nv/2)&7;
-    int sse4len = node->Nv/2/8;
-#if defined(__arm__) || defined(__aarch64__)
-    int16x8_t signatimesb,comp1,comp2,negminabs128;
-    int16x8_t zero=vdupq_n_s16(0);
-#endif
-
-    if (sse4mod == 0) {
-      for (int i=0;i<sse4len;i++) {
-	__m128i a128,b128,absa128,absb128,minabs128;
-	int sse4len = node->Nv/2/8;
-	
-	a128       =*((__m128i*)alpha_v);
-	b128       =((__m128i*)alpha_v)[1];
-	absa128    =_mm_abs_epi16(a128);
-	absb128    =_mm_abs_epi16(b128);
-	minabs128  =_mm_min_epi16(absa128,absb128);
-#if defined(__arm__) || defined(__aarch64__)
-	// unfortunately no direct equivalent to _mm_sign_epi16
-	signatimesb=vxorrq_s16(a128,b128);
-	comp1=vcltq_s16(signatimesb,zero);
-	comp2=vcgeq_s16(signatimesb,zero);
-	negminabs128=vnegq_s16(minabs128);
-	*((__m128i*)alpha_l) =vorrq_s16(vandq_s16(minabs128,comp0),vandq_s16(negminabs128,comp1));
-#else
-	*((__m128i*)alpha_l) =_mm_sign_epi16(minabs128,_mm_sign_epi16(a128,b128));
-#endif
-      }
-    }
-    else if (sse4mod == 4) {
-      __m64 a64,b64,absa64,absb64,minabs64;
-      a64       =*((__m64*)alpha_v);
-      b64       =((__m64*)alpha_v)[1];
-      absa64    =_mm_abs_pi16(a64);
-      absb64    =_mm_abs_pi16(b64);
-      minabs64  =_mm_min_pi16(absa64,absb64);
-#if defined(__arm__) || defined(__aarch64__)
-	AssertFatal(1==0,"Need to do this still for ARM\n");
-#else
-      *((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_sign_epi16(a64,b64));
-#endif
-    }
-
-    else
-#endif
     { // equivalent scalar code to above, activated only on non x86/ARM architectures
       for (int i=0;i<node->Nv/2;i++) {
     	  a=alpha_v[i];
@@ -381,15 +333,14 @@ void applyGtoright(const t_nrPolar_params *pp,decoder_node_t *node) {
 #endif
   
   if (node->right->all_frozen == 0) {  
-#if defined(__AVX2__) 
     int avx2mod = (node->Nv/2)&15;
     if (avx2mod == 0) {
       int avx2len = node->Nv/2/16;
       
       for (int i=0;i<avx2len;i++) {
 	((__m256i *)alpha_r)[i] = 
-	  _mm256_subs_epi16(((__m256i *)alpha_v)[i+avx2len],
-			    _mm256_sign_epi16(((__m256i *)alpha_v)[i],
+	  simde_mm256_subs_epi16(((__m256i *)alpha_v)[i+avx2len],
+			    simde_mm256_sign_epi16(((__m256i *)alpha_v)[i],
 					      ((__m256i *)betal)[i]));	
       }
     }
@@ -400,29 +351,6 @@ void applyGtoright(const t_nrPolar_params *pp,decoder_node_t *node) {
       ((__m64 *)alpha_r)[0] = _mm_subs_pi16(((__m64 *)alpha_v)[1],_mm_sign_pi16(((__m64 *)alpha_v)[0],((__m64 *)betal)[0]));	
     }
     else
-#else
-    int sse4mod = (node->Nv/2)&7;
-
-    if (sse4mod == 0) {
-      int sse4len = node->Nv/2/8;
-      
-      for (int i=0;i<sse4len;i++) {
-#if defined(__arm__) || defined(__aarch64__)
-	((int16x8_t *)alpha_r)[0] = vsubq_s16(((int16x8_t *)alpha_v)[1],vmulq_epi16(((int16x8_t *)alpha_v)[0],((int16x8_t *)betal)[0]));
-#else
-	((__m128i *)alpha_r)[0] = _mm_subs_epi16(((__m128i *)alpha_v)[1],_mm_sign_epi16(((__m128i *)alpha_v)[0],((__m128i *)betal)[0]));
-#endif	
-      }
-    }
-    else if (sse4mod == 4) {
-#if defined(__arm__) || defined(__aarch64__)
-      ((int16x4_t *)alpha_r)[0] = vsub_s16(((int16x4_t *)alpha_v)[1],vmul_epi16(((int16x4_t *)alpha_v)[0],((int16x4_t *)betal)[0]));
-#else
-      ((__m64 *)alpha_r)[0] = _mm_subs_pi16(((__m64 *)alpha_v)[1],_mm_sign_pi16(((__64 *)alpha_v)[0],((__m64 *)betal)[0]));	
-#endif
-    }
-    else 
-#endif
       {// equivalent scalar code to above, activated only on non x86/ARM architectures or Nv=1,2
 	for (int i=0;i<node->Nv/2;i++) {
 	  alpha_r[i] = alpha_v[i+(node->Nv/2)] - (betal[i]*alpha_v[i]);
@@ -449,13 +377,12 @@ void computeBeta(const t_nrPolar_params *pp,decoder_node_t *node) {
   printf("Computing beta @ level %d first_leaf_index %d (all_frozen %d)\n",node->level,node->first_leaf_index,node->left->all_frozen);
 #endif
   if (node->left->all_frozen==0) { // if left node is not aggregation of frozen bits
-#if defined(__AVX2__) 
     int avx2mod = (node->Nv/2)&15;
     register __m256i allones=*((__m256i*)all1);
     if (avx2mod == 0) {
       int avx2len = node->Nv/2/16;
       for (int i=0;i<avx2len;i++) {
-	((__m256i*)betav)[i] = _mm256_or_si256(_mm256_cmpeq_epi16(((__m256i*)betar)[i],
+	((__m256i*)betav)[i] = simde_mm256_or_si256(simde_mm256_cmpeq_epi16(((__m256i*)betar)[i],
 								  ((__m256i*)betal)[i]),allones);
       }
     }
@@ -468,21 +395,6 @@ void computeBeta(const t_nrPolar_params *pp,decoder_node_t *node) {
 						      ((__m64*)betal)[0]),*((__m64*)all1));
     }
     else
-#else
-    int avx2mod = (node->Nv/2)&15;
-
-    if (ssr4mod == 0) {
-      int ssr4len = node->Nv/2/8;
-      register __m128i allones=*((__m128i*)all1);
-      for (int i=0;i<sse4len;i++) {
-      ((__m256i*)betav)[i] = _mm_or_si128(_mm_cmpeq_epi16(((__m128i*)betar)[i], ((__m128i*)betal)[i]),allones);
-      }
-    }
-    else if (sse4mod == 4) {
-      ((__m64*)betav)[0] = _mm_or_si64(_mm_cmpeq_pi16(((__m64*)betar)[0], ((__m64*)betal)[0]),*((__m64*)all1));
-    }
-    else
-#endif
       {
 	for (int i=0;i<node->Nv/2;i++) {
 		betav[i] = (betal[i] != betar[i]) ? 1 : -1;
diff --git a/openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c b/openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
index 5985130eee2a982a628daf693c0fe83140f719be..c826a523df792308c2f821cb4dc0de733d8c40c7 100644
--- a/openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
+++ b/openair1/PHY/CODING/nrPolar_tools/nr_polar_kernal_operation.c
@@ -3,7 +3,7 @@
 #include <math.h>
 #include <stdint.h>
 
-#include <immintrin.h>
+#include "PHY/sse_intrin.h"
 
 void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N)
 {
@@ -11,8 +11,6 @@ void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N)
 	
 	uint32_t i,j;
 
-	#ifdef __AVX2__
-
 	__m256i A,B,C,D,E,U,zerosOnly, OUT;
 	__m256i inc;
 	uint32_t dTest[8];
@@ -23,33 +21,33 @@ void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N)
 	//initialisation
 	for(k=0; k<8; k++)
 		incArray[k]=k;
-	inc=_mm256_loadu_si256((__m256i const*)incArray); // 0, 1, ..., 7 to increase
+	inc=simde_mm256_loadu_si256((__m256i const*)incArray); // 0, 1, ..., 7 to increase
 	
-	zerosOnly=_mm256_setzero_si256(); // for comparison
+	zerosOnly=simde_mm256_setzero_si256(); // for comparison
 
 	for(i=0; i<N; i+=8)
         {
-		B=_mm256_set1_epi32((int)i); // i, ..., i
-		B=_mm256_add_epi32(B, inc); // i, i+1, ..., i+7
+		B=simde_mm256_set1_epi32((int)i); // i, ..., i
+		B=simde_mm256_add_epi32(B, inc); // i, i+1, ..., i+7
 		
-		OUT=_mm256_setzero_si256(); // it will contain the result of all the XORs for the d(i)s
+		OUT=simde_mm256_setzero_si256(); // it will contain the result of all the XORs for the d(i)s
                 
 		for(j=0; j<N; j++)
 		{
-			A=_mm256_set1_epi32((int)(j)); //j, j,  ..., j
-			A=_mm256_sub_epi32(A, B); //(j-i), (j-(i+1)), ... (j-(i+7))  
+			A=simde_mm256_set1_epi32((int)(j)); //j, j,  ..., j
+			A=simde_mm256_sub_epi32(A, B); //(j-i), (j-(i+1)), ... (j-(i+7))  
 			
-			U=_mm256_set1_epi32((int)u[j]);
-			_mm256_storeu_si256((__m256i*)uArray, U); //u(j) ... u(j) for the maskload
+			U=simde_mm256_set1_epi32((int)u[j]);
+			simde_mm256_storeu_si256((__m256i*)uArray, U); //u(j) ... u(j) for the maskload
 
-			C=_mm256_and_si256(A, B); //(j-i)&i -> If zero, then XOR with the u(j)
-			D=_mm256_cmpeq_epi32(C, zerosOnly); // compare with zero and use the result as mask
+			C=simde_mm256_and_si256(A, B); //(j-i)&i -> If zero, then XOR with the u(j)
+			D=simde_mm256_cmpeq_epi32(C, zerosOnly); // compare with zero and use the result as mask
 			
-			E=_mm256_maskload_epi32((int const*)uArray, D); // load only some u(j)s for the XOR
-			OUT=_mm256_xor_si256(OUT, E); //32 bit x 8
+			E=simde_mm256_maskload_epi32((int const*)uArray, D); // load only some u(j)s for the XOR
+			OUT=simde_mm256_xor_si256(OUT, E); //32 bit x 8
 
 		}
-		_mm256_storeu_si256((__m256i*)dTest, OUT);
+		simde_mm256_storeu_si256((__m256i*)dTest, OUT);
 
 		for(k=0; k<8; k++) // Conversion from 32 bits to 8 bits
                 {	
@@ -58,18 +56,4 @@ void nr_polar_kernal_operation(uint8_t *u, uint8_t *d, uint16_t N)
 
 	}
 
-	#else
-
-        for(i=0; i<N; i++) // Create the elements of d=u*G_N ...
-        {
-                d[i]=0;
-                for(j=0; j<N; j++) // ... looking at all the elements of u
-                {
-                        d[i]=d[i] ^ (!( (j-i)& i ))*u[j];
-                        // it's like ((j-i)&i)==0
-                }
-        }
-	
-	#endif
-
 }
diff --git a/openair1/PHY/CODING/nrSmallBlock/decodeSmallBlock.c b/openair1/PHY/CODING/nrSmallBlock/decodeSmallBlock.c
index 3dd570cc18212b685da17181ad565822e554cb3e..53b45894bd510b80e8978d2d821e456a8b8b9d78 100644
--- a/openair1/PHY/CODING/nrSmallBlock/decodeSmallBlock.c
+++ b/openair1/PHY/CODING/nrSmallBlock/decodeSmallBlock.c
@@ -53,19 +53,11 @@ uint16_t decodeSmallBlock(int8_t *in, uint8_t len){
 			for (int k = 0; k < NR_SMALL_BLOCK_CODED_BITS; ++k)
 				Rhat[j] += in[k] * hadamard32InterleavedTransposed[j][k];
 
-#if defined(__AVX2__)
 		for (int i = 0; i < NR_SMALL_BLOCK_CODED_BITS; i += 16) {
-			__m256i a15_a0 = _mm256_loadu_si256((__m256i*)&Rhat[i]);
-			a15_a0 = _mm256_abs_epi16(a15_a0);
-			_mm256_storeu_si256((__m256i*)(&Rhatabs[i]), a15_a0);
+			__m256i a15_a0 = simde_mm256_loadu_si256((__m256i*)&Rhat[i]);
+			a15_a0 = simde_mm256_abs_epi16(a15_a0);
+			simde_mm256_storeu_si256((__m256i*)(&Rhatabs[i]), a15_a0);
 		}
-#else
-		for (int i = 0; i < NR_SMALL_BLOCK_CODED_BITS; i += 8) {
-			__m128i a7_a0 = _mm_loadu_si128((__m128i*)&Rhat[i]);
-			a7_a0 = _mm_abs_epi16(a7_a0);
-			_mm_storeu_si128((__m128i*)(&Rhatabs[i]), a7_a0);
-		}
-#endif
 		maxVal = Rhatabs[0];
 		for (int k = 1; k < jmax; ++k){
 			if (Rhatabs[k] > maxVal){
@@ -85,27 +77,28 @@ uint16_t decodeSmallBlock(int8_t *in, uint8_t len){
 	} else {
 		uint8_t maxRow = 0, maxCol = 0;
 
-#if defined(__AVX2__)
         int16_t maxVal = 0;
 		int DmatrixElementVal = 0;
+#if !defined(__AVX512F__)
 		int8_t DmatrixElement[NR_SMALL_BLOCK_CODED_BITS] = {0};
-		__m256i _in_256 = _mm256_loadu_si256 ((__m256i*)&in[0]);
+#endif		
+		__m256i _in_256 = simde_mm256_loadu_si256 ((__m256i*)&in[0]);
 		__m256i _maskD_256, _Dmatrixj_256, _maskH_256, _DmatrixElement_256;
 		for (int j = 0; j < ( 1<<(len-6) ); ++j) {
-			_maskD_256 = _mm256_loadu_si256 ((__m256i*)(&maskD[j][0]));
-			_Dmatrixj_256 = _mm256_sign_epi8 (_in_256, _maskD_256);
+			_maskD_256 = simde_mm256_loadu_si256 ((__m256i*)(&maskD[j][0]));
+			_Dmatrixj_256 = simde_mm256_sign_epi8 (_in_256, _maskD_256);
 			for (int k = 0; k < NR_SMALL_BLOCK_CODED_BITS; ++k) {
-				_maskH_256 = _mm256_loadu_si256 ((__m256i*)(&hadamard32InterleavedTransposed[k][0]));
-				_DmatrixElement_256 = _mm256_sign_epi8 (_Dmatrixj_256, _maskH_256);
+				_maskH_256 = simde_mm256_loadu_si256 ((__m256i*)(&hadamard32InterleavedTransposed[k][0]));
+				_DmatrixElement_256 = simde_mm256_sign_epi8 (_Dmatrixj_256, _maskH_256);
 #if defined(__AVX512F__)
 			    DmatrixElementVal = _mm512_reduce_add_epi32 (
 			    		            _mm512_add_epi32(
-			    				    _mm512_cvtepi8_epi32 (_mm256_extracti128_si256 (_DmatrixElement_256, 1)),
-								    _mm512_cvtepi8_epi32 (_mm256_castsi256_si128 (_DmatrixElement_256))
+			    				    _mm512_cvtepi8_epi32 (simde_mm256_extracti128_si256 (_DmatrixElement_256, 1)),
+								    _mm512_cvtepi8_epi32 (simde_mm256_castsi256_si128 (_DmatrixElement_256))
 			    		            				)
 															);
 #else
-				_mm256_storeu_si256((__m256i*)(&DmatrixElement[0]), _DmatrixElement_256);
+				simde_mm256_storeu_si256((__m256i*)(&DmatrixElement[0]), _DmatrixElement_256);
 				for (int i = 0; i < NR_SMALL_BLOCK_CODED_BITS; ++i)
 					DmatrixElementVal += DmatrixElement[i];
 #endif
@@ -118,33 +111,6 @@ uint16_t decodeSmallBlock(int8_t *in, uint8_t len){
 			}
 		}
 		out = properOrderedBasisExtended[maxRow] | properOrderedBasis[maxCol] | ( (maxVal > 0) ? (uint16_t)0 : (uint16_t)1 );
-#else
-		int8_t Dmatrix[NR_SMALL_BLOCK_CODED_BITS][NR_SMALL_BLOCK_CODED_BITS] = {0};
-		int16_t DmatrixFHT[NR_SMALL_BLOCK_CODED_BITS][NR_SMALL_BLOCK_CODED_BITS] = {0};
-		uint16_t maxVal;
-		uint8_t rowLimit = 1<<(len-6);
-
-		for (int j = 0; j < ( rowLimit ); ++j)
-			for (int k = 0; k < NR_SMALL_BLOCK_CODED_BITS; ++k)
-				Dmatrix[j][k] = in[k] * maskD[j][k];
-
-		for (int i = 0; i < ( rowLimit ); ++i)
-			for (int j = 0; j < NR_SMALL_BLOCK_CODED_BITS; ++j)
-				for (int k = 0; k < NR_SMALL_BLOCK_CODED_BITS; ++k)
-					DmatrixFHT[i][j] += Dmatrix[i][k] * hadamard32InterleavedTransposed[j][k];
-
-		maxVal = abs(DmatrixFHT[0][0]);
-		for (int i = 0; i < ( rowLimit ); ++i)
-			for (int j = 0; j < NR_SMALL_BLOCK_CODED_BITS; ++j)
-				if (abs(DmatrixFHT[i][j]) > maxVal){
-					maxVal = abs(DmatrixFHT[i][j]);
-					maxRow = i;
-					maxCol = j;
-				}
-
-		out = properOrderedBasisExtended[maxRow] | properOrderedBasis[maxCol] | ( (DmatrixFHT[maxRow][maxCol] > 0) ? (uint16_t)0 : (uint16_t)1 );
-#endif
-
 #ifdef DEBUG_DECODESMALLBLOCK
 		for (int k = 0; k < NR_SMALL_BLOCK_CODED_BITS; ++k)
 					printf("[decodeSmallBlock]maxRow = %d maxCol = %d out[%d]=%d\n", maxRow, maxCol, k, ((uint32_t)out>>k)&1);
diff --git a/openair1/PHY/CODING/nr_rate_matching.c b/openair1/PHY/CODING/nr_rate_matching.c
index c99bd02fb27fdd9a5578c8a3ea48906b7f56b1d8..7d1a3ae05ffd9e6752547ff7c195cbfec40894f6 100644
--- a/openair1/PHY/CODING/nr_rate_matching.c
+++ b/openair1/PHY/CODING/nr_rate_matching.c
@@ -27,6 +27,7 @@
 #include "PHY/defs_gNB.h"
 #include "PHY/defs_nr_UE.h"
 #include "PHY/CODING/coding_defs.h"
+#include "PHY/sse_intrin.h"
 
 //#define RM_DEBUG 1
 
@@ -40,7 +41,7 @@ void nr_interleaving_ldpc(uint32_t E, uint8_t Qm, uint8_t *e,uint8_t *f)
   memset(f,0,E*sizeof(uint8_t));
   uint8_t *e0,*e1,*e2,*e3,*e4,*e5,*e6,*e7;
   uint8_t *fp;
-#if 0 //def __AVX2__
+#if 0 //def __WASAVX2__
   __m256i tmp0,tmp1,tmp2,tmp0b,tmp1b,tmp3,tmp4,tmp5;
   __m256i *e0_256,*e1_256,*e2_256,*e3_256,*e4_256,*e5_256,*e6_256,*e7_256;
 
@@ -54,8 +55,8 @@ void nr_interleaving_ldpc(uint32_t E, uint8_t Qm, uint8_t *e,uint8_t *f)
     e0_256=(__m256i *)e0;
     e1_256=(__m256i *)e1;
     for (int k=0,j=0;j<EQm>>5;j++,k+=2) {
-      f_256[k]   = _mm256_unpacklo_epi8(e0_256[j],e1_256[j]);
-      f_256[k+1] = _mm256_unpackhi_epi8(e0_256[j],e1_256[j]); 
+      f_256[k]   = simde_mm256_unpacklo_epi8(e0_256[j],e1_256[j]);
+      f_256[k+1] = simde_mm256_unpackhi_epi8(e0_256[j],e1_256[j]); 
     }
     break;
   case 4:
@@ -68,14 +69,14 @@ void nr_interleaving_ldpc(uint32_t E, uint8_t Qm, uint8_t *e,uint8_t *f)
     e2_256=(__m256i *)e2;
     e3_256=(__m256i *)e3;
     for (int k=0,j=0;j<EQm>>5;j++,k+=4) {
-      tmp0   = _mm256_unpacklo_epi8(e0_256[j],e1_256[j]); // e0(i) e1(i) e0(i+1) e1(i+1) .... e0(i+15) e1(i+15)
-      tmp1   = _mm256_unpacklo_epi8(e2_256[j],e3_256[j]); // e2(i) e3(i) e2(i+1) e3(i+1) .... e2(i+15) e3(i+15)
-      f_256[k]   = _mm256_unpacklo_epi8(tmp0,tmp1);   // e0(i) e1(i) e2(i) e3(i) ... e0(i+7) e1(i+7) e2(i+7) e3(i+7)
-      f_256[k+1] = _mm256_unpackhi_epi8(tmp0,tmp1);   // e0(i+8) e1(i+8) e2(i+8) e3(i+8) ... e0(i+15) e1(i+15) e2(i+15) e3(i+15)
-      tmp0   = _mm256_unpackhi_epi8(e0_256[j],e1_256[j]); // e0(i+16) e1(i+16) e0(i+17) e1(i+17) .... e0(i+31) e1(i+31)
-      tmp1   = _mm256_unpackhi_epi8(e2_256[j],e3_256[j]); // e2(i+16) e3(i+16) e2(i+17) e3(i+17) .... e2(i+31) e3(i+31)
-      f_256[k+2] = _mm256_unpacklo_epi8(tmp0,tmp1);
-      f_256[k+3] = _mm256_unpackhi_epi8(tmp0,tmp1); 
+      tmp0   = simde_mm256_unpacklo_epi8(e0_256[j],e1_256[j]); // e0(i) e1(i) e0(i+1) e1(i+1) .... e0(i+15) e1(i+15)
+      tmp1   = simde_mm256_unpacklo_epi8(e2_256[j],e3_256[j]); // e2(i) e3(i) e2(i+1) e3(i+1) .... e2(i+15) e3(i+15)
+      f_256[k]   = simde_mm256_unpacklo_epi8(tmp0,tmp1);   // e0(i) e1(i) e2(i) e3(i) ... e0(i+7) e1(i+7) e2(i+7) e3(i+7)
+      f_256[k+1] = simde_mm256_unpackhi_epi8(tmp0,tmp1);   // e0(i+8) e1(i+8) e2(i+8) e3(i+8) ... e0(i+15) e1(i+15) e2(i+15) e3(i+15)
+      tmp0   = simde_mm256_unpackhi_epi8(e0_256[j],e1_256[j]); // e0(i+16) e1(i+16) e0(i+17) e1(i+17) .... e0(i+31) e1(i+31)
+      tmp1   = simde_mm256_unpackhi_epi8(e2_256[j],e3_256[j]); // e2(i+16) e3(i+16) e2(i+17) e3(i+17) .... e2(i+31) e3(i+31)
+      f_256[k+2] = simde_mm256_unpacklo_epi8(tmp0,tmp1);
+      f_256[k+3] = simde_mm256_unpackhi_epi8(tmp0,tmp1); 
     }
     break;
   case 6:
@@ -96,83 +97,83 @@ void nr_interleaving_ldpc(uint32_t E, uint8_t Qm, uint8_t *e,uint8_t *f)
       fp  = f+k;
       fp2 = fp+96;
 
-      tmp0   = _mm256_unpacklo_epi8(e0_256[j],e1_256[j]); // e0(i) e1(i) e0(i+1) e1(i+1) .... e0(i+15) e1(i+15)
-      tmp1   = _mm256_unpacklo_epi8(e2_256[j],e3_256[j]); // e2(i) e3(i) e2(i+1) e3(i+1) .... e2(i+15) e3(i+15)
-      tmp0b  = _mm256_unpacklo_epi16(tmp0,tmp1); // e0(i) e1(i) e2(i) e3(i) ... e0(i+7) e1(i+7) e2(i+7) e3(i+7)
-      tmp1b  = _mm256_unpackhi_epi16(tmp0,tmp1); // e0(i+8) e1(i+8) e2(i+8) e3(i+8) ... e0(i+15) e1(i+15) e2(i+15) e3(i+15)
-      tmp0   = _mm256_unpacklo_epi8(e4_256[j],e5_256[j]); // e4(i) e5(i) e4(i+1) e5(i+1) .... e4(i+15) e5(i+15)
-      *((uint32_t*)fp)      = _mm256_extract_epi32(tmp0b,0);
-      *((uint16_t*)(fp+4))  = _mm256_extract_epi16(tmp0,0);
-      *((uint32_t*)(fp+6))  = _mm256_extract_epi32(tmp0b,1);
-      *((uint16_t*)(fp+10)) = _mm256_extract_epi16(tmp0,1);
-      *((uint32_t*)(fp+12)) = _mm256_extract_epi32(tmp0b,2);
-      *((uint16_t*)(fp+16)) = _mm256_extract_epi16(tmp0,2);
-      *((uint32_t*)(fp+18)) = _mm256_extract_epi32(tmp0b,3);
-      *((uint16_t*)(fp+22)) = _mm256_extract_epi16(tmp0,3);
-      *((uint32_t*)(fp+24)) = _mm256_extract_epi32(tmp0b,4);
-      *((uint16_t*)(fp+26)) = _mm256_extract_epi16(tmp0,4);
-      *((uint32_t*)(fp+30)) = _mm256_extract_epi32(tmp0b,5);
-      *((uint16_t*)(fp+34)) = _mm256_extract_epi16(tmp0,5);
-      *((uint32_t*)(fp+36)) = _mm256_extract_epi32(tmp0,6);
-      *((uint16_t*)(fp+40)) = _mm256_extract_epi16(tmp0,6);
-      *((uint32_t*)(fp+42)) = _mm256_extract_epi32(tmp0b,7);
-      *((uint16_t*)(fp+46)) = _mm256_extract_epi16(tmp0,7);
-
-      *((uint32_t*)(fp+48)) = _mm256_extract_epi32(tmp1b,0);
-      *((uint16_t*)(fp+52)) = _mm256_extract_epi16(tmp0,8);
-      *((uint32_t*)(fp+56)) = _mm256_extract_epi32(tmp1b,1);
-      *((uint16_t*)(fp+60)) = _mm256_extract_epi16(tmp0,9);
-      *((uint32_t*)(fp+62)) = _mm256_extract_epi32(tmp1b,2);
-      *((uint16_t*)(fp+66)) = _mm256_extract_epi16(tmp0,10);
-      *((uint32_t*)(fp+68)) = _mm256_extract_epi32(tmp1b,3);
-      *((uint16_t*)(fp+72)) = _mm256_extract_epi16(tmp0,11);
-      *((uint32_t*)(fp+74)) = _mm256_extract_epi32(tmp1b,4);
-      *((uint16_t*)(fp+76)) = _mm256_extract_epi16(tmp0,12);
-      *((uint32_t*)(fp+80)) = _mm256_extract_epi32(tmp1b,5);
-      *((uint16_t*)(fp+82)) = _mm256_extract_epi16(tmp0,13);
-      *((uint32_t*)(fp+86)) = _mm256_extract_epi32(tmp1b,6);
-      *((uint16_t*)(fp+90)) = _mm256_extract_epi16(tmp0,14);
-      *((uint32_t*)(fp+92)) = _mm256_extract_epi32(tmp1b,7);
-      *((uint16_t*)(fp+94)) = _mm256_extract_epi16(tmp0,15);
-
-      tmp0   = _mm256_unpackhi_epi8(e0_256[j],e1_256[j]); // e0(i+16) e1(i+16) e0(i+17) e1(i+17) .... e0(i+31) e1(i+31)
-      tmp1   = _mm256_unpackhi_epi8(e2_256[j],e3_256[j]); // e2(i+16) e3(i+16) e2(i+17) e3(i+17) .... e2(i+31) e3(i+31)
-      tmp0b  = _mm256_unpacklo_epi16(tmp0,tmp1); // e0(i+16) e1(i+16) e2(i+16) e3(i+16) ... e0(i+23) e1(i+23) e2(i+23) e3(i+23)
-      tmp1b  = _mm256_unpackhi_epi16(tmp0,tmp1); // e0(i+24) e1(i+24) e2(i+24) e3(i+24) ... e0(i+31) e1(i+31) e2(i+31) e3(i+31)
-      tmp0   = _mm256_unpackhi_epi8(e4_256[j],e5_256[j]); // e4(i+16) e5(i+16) e4(i+17) e5(i+17) .... e4(i+31) e5(i+31)
-      *((uint32_t*)fp2)      = _mm256_extract_epi32(tmp0b,0);
-      *((uint16_t*)(fp2+4))  = _mm256_extract_epi16(tmp0,0);
-      *((uint32_t*)(fp2+6))  = _mm256_extract_epi32(tmp0b,1);
-      *((uint16_t*)(fp2+10)) = _mm256_extract_epi16(tmp0,1);
-      *((uint32_t*)(fp2+12)) = _mm256_extract_epi32(tmp0b,2);
-      *((uint16_t*)(fp2+16)) = _mm256_extract_epi16(tmp0,2);
-      *((uint32_t*)(fp2+18)) = _mm256_extract_epi32(tmp0b,3);
-      *((uint16_t*)(fp2+22)) = _mm256_extract_epi16(tmp0,3);
-      *((uint32_t*)(fp2+24)) = _mm256_extract_epi32(tmp0b,4);
-      *((uint16_t*)(fp2+26)) = _mm256_extract_epi16(tmp0,4);
-      *((uint32_t*)(fp2+30)) = _mm256_extract_epi32(tmp0b,5);
-      *((uint16_t*)(fp2+34)) = _mm256_extract_epi16(tmp0,5);
-      *((uint32_t*)(fp2+36)) = _mm256_extract_epi32(tmp0,6);
-      *((uint16_t*)(fp2+40)) = _mm256_extract_epi16(tmp0,6);
-      *((uint32_t*)(fp2+42)) = _mm256_extract_epi32(tmp0b,7);
-      *((uint16_t*)(fp2+46)) = _mm256_extract_epi16(tmp0,7);
-
-      *((uint32_t*)(fp2+48)) = _mm256_extract_epi32(tmp1b,0);
-      *((uint16_t*)(fp2+52)) = _mm256_extract_epi16(tmp0,8);
-      *((uint32_t*)(fp2+56)) = _mm256_extract_epi32(tmp1b,1);
-      *((uint16_t*)(fp2+60)) = _mm256_extract_epi16(tmp0,9);
-      *((uint32_t*)(fp2+62)) = _mm256_extract_epi32(tmp1b,2);
-      *((uint16_t*)(fp2+66)) = _mm256_extract_epi16(tmp0,10);
-      *((uint32_t*)(fp2+68)) = _mm256_extract_epi32(tmp1b,3);
-      *((uint16_t*)(fp2+72)) = _mm256_extract_epi16(tmp0,11);
-      *((uint32_t*)(fp2+74)) = _mm256_extract_epi32(tmp1b,4);
-      *((uint16_t*)(fp2+76)) = _mm256_extract_epi16(tmp0,12);
-      *((uint32_t*)(fp2+80)) = _mm256_extract_epi32(tmp1b,5);
-      *((uint16_t*)(fp2+82)) = _mm256_extract_epi16(tmp0,13);
-      *((uint32_t*)(fp2+86)) = _mm256_extract_epi32(tmp1b,6);
-      *((uint16_t*)(fp2+90)) = _mm256_extract_epi16(tmp0,14);
-      *((uint32_t*)(fp2+92)) = _mm256_extract_epi32(tmp1b,7);
-      *((uint16_t*)(fp2+94)) = _mm256_extract_epi16(tmp0,15);
+      tmp0   = simde_mm256_unpacklo_epi8(e0_256[j],e1_256[j]); // e0(i) e1(i) e0(i+1) e1(i+1) .... e0(i+15) e1(i+15)
+      tmp1   = simde_mm256_unpacklo_epi8(e2_256[j],e3_256[j]); // e2(i) e3(i) e2(i+1) e3(i+1) .... e2(i+15) e3(i+15)
+      tmp0b  = simde_mm256_unpacklo_epi16(tmp0,tmp1); // e0(i) e1(i) e2(i) e3(i) ... e0(i+7) e1(i+7) e2(i+7) e3(i+7)
+      tmp1b  = simde_mm256_unpackhi_epi16(tmp0,tmp1); // e0(i+8) e1(i+8) e2(i+8) e3(i+8) ... e0(i+15) e1(i+15) e2(i+15) e3(i+15)
+      tmp0   = simde_mm256_unpacklo_epi8(e4_256[j],e5_256[j]); // e4(i) e5(i) e4(i+1) e5(i+1) .... e4(i+15) e5(i+15)
+      *((uint32_t*)fp)      = simde_mm256_extract_epi32(tmp0b,0);
+      *((uint16_t*)(fp+4))  = simde_mm256_extract_epi16(tmp0,0);
+      *((uint32_t*)(fp+6))  = simde_mm256_extract_epi32(tmp0b,1);
+      *((uint16_t*)(fp+10)) = simde_mm256_extract_epi16(tmp0,1);
+      *((uint32_t*)(fp+12)) = simde_mm256_extract_epi32(tmp0b,2);
+      *((uint16_t*)(fp+16)) = simde_mm256_extract_epi16(tmp0,2);
+      *((uint32_t*)(fp+18)) = simde_mm256_extract_epi32(tmp0b,3);
+      *((uint16_t*)(fp+22)) = simde_mm256_extract_epi16(tmp0,3);
+      *((uint32_t*)(fp+24)) = simde_mm256_extract_epi32(tmp0b,4);
+      *((uint16_t*)(fp+26)) = simde_mm256_extract_epi16(tmp0,4);
+      *((uint32_t*)(fp+30)) = simde_mm256_extract_epi32(tmp0b,5);
+      *((uint16_t*)(fp+34)) = simde_mm256_extract_epi16(tmp0,5);
+      *((uint32_t*)(fp+36)) = simde_mm256_extract_epi32(tmp0,6);
+      *((uint16_t*)(fp+40)) = simde_mm256_extract_epi16(tmp0,6);
+      *((uint32_t*)(fp+42)) = simde_mm256_extract_epi32(tmp0b,7);
+      *((uint16_t*)(fp+46)) = simde_mm256_extract_epi16(tmp0,7);
+
+      *((uint32_t*)(fp+48)) = simde_mm256_extract_epi32(tmp1b,0);
+      *((uint16_t*)(fp+52)) = simde_mm256_extract_epi16(tmp0,8);
+      *((uint32_t*)(fp+56)) = simde_mm256_extract_epi32(tmp1b,1);
+      *((uint16_t*)(fp+60)) = simde_mm256_extract_epi16(tmp0,9);
+      *((uint32_t*)(fp+62)) = simde_mm256_extract_epi32(tmp1b,2);
+      *((uint16_t*)(fp+66)) = simde_mm256_extract_epi16(tmp0,10);
+      *((uint32_t*)(fp+68)) = simde_mm256_extract_epi32(tmp1b,3);
+      *((uint16_t*)(fp+72)) = simde_mm256_extract_epi16(tmp0,11);
+      *((uint32_t*)(fp+74)) = simde_mm256_extract_epi32(tmp1b,4);
+      *((uint16_t*)(fp+76)) = simde_mm256_extract_epi16(tmp0,12);
+      *((uint32_t*)(fp+80)) = simde_mm256_extract_epi32(tmp1b,5);
+      *((uint16_t*)(fp+82)) = simde_mm256_extract_epi16(tmp0,13);
+      *((uint32_t*)(fp+86)) = simde_mm256_extract_epi32(tmp1b,6);
+      *((uint16_t*)(fp+90)) = simde_mm256_extract_epi16(tmp0,14);
+      *((uint32_t*)(fp+92)) = simde_mm256_extract_epi32(tmp1b,7);
+      *((uint16_t*)(fp+94)) = simde_mm256_extract_epi16(tmp0,15);
+
+      tmp0   = simde_mm256_unpackhi_epi8(e0_256[j],e1_256[j]); // e0(i+16) e1(i+16) e0(i+17) e1(i+17) .... e0(i+31) e1(i+31)
+      tmp1   = simde_mm256_unpackhi_epi8(e2_256[j],e3_256[j]); // e2(i+16) e3(i+16) e2(i+17) e3(i+17) .... e2(i+31) e3(i+31)
+      tmp0b  = simde_mm256_unpacklo_epi16(tmp0,tmp1); // e0(i+16) e1(i+16) e2(i+16) e3(i+16) ... e0(i+23) e1(i+23) e2(i+23) e3(i+23)
+      tmp1b  = simde_mm256_unpackhi_epi16(tmp0,tmp1); // e0(i+24) e1(i+24) e2(i+24) e3(i+24) ... e0(i+31) e1(i+31) e2(i+31) e3(i+31)
+      tmp0   = simde_mm256_unpackhi_epi8(e4_256[j],e5_256[j]); // e4(i+16) e5(i+16) e4(i+17) e5(i+17) .... e4(i+31) e5(i+31)
+      *((uint32_t*)fp2)      = simde_mm256_extract_epi32(tmp0b,0);
+      *((uint16_t*)(fp2+4))  = simde_mm256_extract_epi16(tmp0,0);
+      *((uint32_t*)(fp2+6))  = simde_mm256_extract_epi32(tmp0b,1);
+      *((uint16_t*)(fp2+10)) = simde_mm256_extract_epi16(tmp0,1);
+      *((uint32_t*)(fp2+12)) = simde_mm256_extract_epi32(tmp0b,2);
+      *((uint16_t*)(fp2+16)) = simde_mm256_extract_epi16(tmp0,2);
+      *((uint32_t*)(fp2+18)) = simde_mm256_extract_epi32(tmp0b,3);
+      *((uint16_t*)(fp2+22)) = simde_mm256_extract_epi16(tmp0,3);
+      *((uint32_t*)(fp2+24)) = simde_mm256_extract_epi32(tmp0b,4);
+      *((uint16_t*)(fp2+26)) = simde_mm256_extract_epi16(tmp0,4);
+      *((uint32_t*)(fp2+30)) = simde_mm256_extract_epi32(tmp0b,5);
+      *((uint16_t*)(fp2+34)) = simde_mm256_extract_epi16(tmp0,5);
+      *((uint32_t*)(fp2+36)) = simde_mm256_extract_epi32(tmp0,6);
+      *((uint16_t*)(fp2+40)) = simde_mm256_extract_epi16(tmp0,6);
+      *((uint32_t*)(fp2+42)) = simde_mm256_extract_epi32(tmp0b,7);
+      *((uint16_t*)(fp2+46)) = simde_mm256_extract_epi16(tmp0,7);
+
+      *((uint32_t*)(fp2+48)) = simde_mm256_extract_epi32(tmp1b,0);
+      *((uint16_t*)(fp2+52)) = simde_mm256_extract_epi16(tmp0,8);
+      *((uint32_t*)(fp2+56)) = simde_mm256_extract_epi32(tmp1b,1);
+      *((uint16_t*)(fp2+60)) = simde_mm256_extract_epi16(tmp0,9);
+      *((uint32_t*)(fp2+62)) = simde_mm256_extract_epi32(tmp1b,2);
+      *((uint16_t*)(fp2+66)) = simde_mm256_extract_epi16(tmp0,10);
+      *((uint32_t*)(fp2+68)) = simde_mm256_extract_epi32(tmp1b,3);
+      *((uint16_t*)(fp2+72)) = simde_mm256_extract_epi16(tmp0,11);
+      *((uint32_t*)(fp2+74)) = simde_mm256_extract_epi32(tmp1b,4);
+      *((uint16_t*)(fp2+76)) = simde_mm256_extract_epi16(tmp0,12);
+      *((uint32_t*)(fp2+80)) = simde_mm256_extract_epi32(tmp1b,5);
+      *((uint16_t*)(fp2+82)) = simde_mm256_extract_epi16(tmp0,13);
+      *((uint32_t*)(fp2+86)) = simde_mm256_extract_epi32(tmp1b,6);
+      *((uint16_t*)(fp2+90)) = simde_mm256_extract_epi16(tmp0,14);
+      *((uint32_t*)(fp2+92)) = simde_mm256_extract_epi32(tmp1b,7);
+      *((uint16_t*)(fp2+94)) = simde_mm256_extract_epi16(tmp0,15);
     }
     break;
   case 8:
@@ -194,33 +195,33 @@ void nr_interleaving_ldpc(uint32_t E, uint8_t Qm, uint8_t *e,uint8_t *f)
     e6_256=(__m256i *)e6;
     e7_256=(__m256i *)e7;
     for (int k=0,j=0;j<EQm>>5;j++,k+=8) {
-      tmp0   = _mm256_unpacklo_epi8(e0_256[j],e1_256[j]); // e0(i) e1(i) e0(i+1) e1(i+1) .... e0(i+15) e1(i+15)
-      tmp1   = _mm256_unpacklo_epi8(e2_256[j],e3_256[j]); // e2(i) e3(i) e2(i+1) e3(i+1) .... e2(i+15) e3(i+15)
-      tmp2   = _mm256_unpacklo_epi8(e4_256[j],e5_256[j]); // e4(i) e5(i) e4(i+1) e5(i+1) .... e4(i+15) e5(i+15)
-      tmp3   = _mm256_unpacklo_epi8(e6_256[j],e7_256[j]); // e6(i) e7(i) e6(i+1) e7(i+1) .... e6(i+15) e7(i+15)
-      tmp4   = _mm256_unpacklo_epi16(tmp0,tmp1);  // e0(i) e1(i) e2(i) e3(i) ... e0(i+7) e1(i+7) e2(i+7) e3(i+7)
-      tmp5   = _mm256_unpacklo_epi16(tmp2,tmp3);  // e4(i) e5(i) e6(i) e7(i) ... e4(i+7) e5(i+7) e6(i+7) e7(i+7)
-      f_256[k]   = _mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i) e1(i) e2(i) e3(i) e4(i) e5(i) e6(i) e7(i)... e0(i+3) e1(i+3) e2(i+3) e3(i+3) e4(i+3) e5(i+3) e6(i+3) e7(i+3))
-      f_256[k+1] = _mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+4) e1(i+4) e2(i+4) e3(i+4) e4(i+4) e5(i+4) e6(i+4) e7(i+4)... e0(i+7) e1(i+7) e2(i+7) e3(i+7) e4(i+7) e5(i+7) e6(i+7) e7(i+7))
-
-      tmp4   = _mm256_unpackhi_epi16(tmp0,tmp1);  // e0(i+8) e1(i+8) e2(i+8) e3(i+8) ... e0(i+15) e1(i+15) e2(i+15) e3(i+15)
-      tmp5   = _mm256_unpackhi_epi16(tmp2,tmp3);  // e4(i+8) e5(i+8) e6(i+8) e7(i+8) ... e4(i+15) e5(i+15) e6(i+15) e7(i+15)
-      f_256[k+2]   = _mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i+8) e1(i+8) e2(i+8) e3(i+8) e4(i+8) e5(i+8) e6(i+8) e7(i+8)... e0(i+11) e1(i+11) e2(i+11) e3(i+11) e4(i+11) e5(i+11) e6(i+11) e7(i+11))
-      f_256[k+3] = _mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+12) e1(i+12) e2(i+12) e3(i+12) e4(i+12) e5(i+12) e6(i+12) e7(i+12)... e0(i+15) e1(i+15) e2(i+15) e3(i+15) e4(i+15) e5(i+15) e6(i+15) e7(i+15))
-
-      tmp0   = _mm256_unpackhi_epi8(e0_256[j],e1_256[j]); // e0(i+16) e1(i+16) e0(i+17) e1(i+17) .... e0(i+31) e1(i+31)
-      tmp1   = _mm256_unpackhi_epi8(e2_256[j],e3_256[j]); // e2(i+16) e3(i+16) e2(i+17) e3(i+17) .... e2(i+31) e3(i+31)
-      tmp2   = _mm256_unpackhi_epi8(e4_256[j],e5_256[j]); // e4(i+16) e5(i+16) e4(i+17) e5(i+17) .... e4(i+31) e5(i+31)
-      tmp3   = _mm256_unpackhi_epi8(e6_256[j],e7_256[j]); // e6(i+16) e7(i+16) e6(i+17) e7(i+17) .... e6(i+31) e7(i+31)
-      tmp4   = _mm256_unpacklo_epi16(tmp0,tmp1);  // e0(i+!6) e1(i+16) e2(i+16) e3(i+16) ... e0(i+23) e1(i+23) e2(i+23) e3(i+23)
-      tmp5   = _mm256_unpacklo_epi16(tmp2,tmp3);  // e4(i+16) e5(i+16) e6(i+16) e7(i+16) ... e4(i+23) e5(i+23) e6(i+23) e7(i+23)
-      f_256[k+4] = _mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i+16) e1(i+16) e2(i+16) e3(i+16) e4(i+16) e5(i+16) e6(i+16) e7(i+16)... e0(i+19) e1(i+19) e2(i+19) e3(i+19) e4(i+19) e5(i+19) e6(i+19) e7(i+19))
-      f_256[k+5] = _mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+20) e1(i+20) e2(i+20) e3(i+20) e4(i+20) e5(i+20) e6(i+20) e7(i+20)... e0(i+23) e1(i+23) e2(i+23) e3(i+23) e4(i+23) e5(i+23) e6(i+23) e7(i+23))
-
-      tmp4   = _mm256_unpackhi_epi16(tmp0,tmp1);  // e0(i+24) e1(i+24) e2(i+24) e3(i+24) ... e0(i+31) e1(i+31) e2(i+31) e3(i+31)
-      tmp5   = _mm256_unpackhi_epi16(tmp2,tmp3);  // e4(i+24) e5(i+24) e6(i+24) e7(i+24) ... e4(i+31) e5(i+31) e6(i+31) e7(i+31)
-      f_256[k+6] = _mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i+24) e1(i+24) e2(i+24) e3(i+24) e4(i+24) e5(i+24) e6(i+24) e7(i+24)... e0(i+27) e1(i+27) e2(i+27) e3(i+27) e4(i+27) e5(i+27) e6(i+27) e7(i+27))
-      f_256[k+7] = _mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+28) e1(i+28) e2(i+28) e3(i+28) e4(i+28) e5(i+28) e6(i+28) e7(i+28)... e0(i+31) e1(i+31) e2(i+31) e3(i+31) e4(i+31) e5(i+31) e6(i+31) e7(i+31))
+      tmp0   = simde_mm256_unpacklo_epi8(e0_256[j],e1_256[j]); // e0(i) e1(i) e0(i+1) e1(i+1) .... e0(i+15) e1(i+15)
+      tmp1   = simde_mm256_unpacklo_epi8(e2_256[j],e3_256[j]); // e2(i) e3(i) e2(i+1) e3(i+1) .... e2(i+15) e3(i+15)
+      tmp2   = simde_mm256_unpacklo_epi8(e4_256[j],e5_256[j]); // e4(i) e5(i) e4(i+1) e5(i+1) .... e4(i+15) e5(i+15)
+      tmp3   = simde_mm256_unpacklo_epi8(e6_256[j],e7_256[j]); // e6(i) e7(i) e6(i+1) e7(i+1) .... e6(i+15) e7(i+15)
+      tmp4   = simde_mm256_unpacklo_epi16(tmp0,tmp1);  // e0(i) e1(i) e2(i) e3(i) ... e0(i+7) e1(i+7) e2(i+7) e3(i+7)
+      tmp5   = simde_mm256_unpacklo_epi16(tmp2,tmp3);  // e4(i) e5(i) e6(i) e7(i) ... e4(i+7) e5(i+7) e6(i+7) e7(i+7)
+      f_256[k]   = simde_mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i) e1(i) e2(i) e3(i) e4(i) e5(i) e6(i) e7(i)... e0(i+3) e1(i+3) e2(i+3) e3(i+3) e4(i+3) e5(i+3) e6(i+3) e7(i+3))
+      f_256[k+1] = simde_mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+4) e1(i+4) e2(i+4) e3(i+4) e4(i+4) e5(i+4) e6(i+4) e7(i+4)... e0(i+7) e1(i+7) e2(i+7) e3(i+7) e4(i+7) e5(i+7) e6(i+7) e7(i+7))
+
+      tmp4   = simde_mm256_unpackhi_epi16(tmp0,tmp1);  // e0(i+8) e1(i+8) e2(i+8) e3(i+8) ... e0(i+15) e1(i+15) e2(i+15) e3(i+15)
+      tmp5   = simde_mm256_unpackhi_epi16(tmp2,tmp3);  // e4(i+8) e5(i+8) e6(i+8) e7(i+8) ... e4(i+15) e5(i+15) e6(i+15) e7(i+15)
+      f_256[k+2]   = simde_mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i+8) e1(i+8) e2(i+8) e3(i+8) e4(i+8) e5(i+8) e6(i+8) e7(i+8)... e0(i+11) e1(i+11) e2(i+11) e3(i+11) e4(i+11) e5(i+11) e6(i+11) e7(i+11))
+      f_256[k+3] = simde_mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+12) e1(i+12) e2(i+12) e3(i+12) e4(i+12) e5(i+12) e6(i+12) e7(i+12)... e0(i+15) e1(i+15) e2(i+15) e3(i+15) e4(i+15) e5(i+15) e6(i+15) e7(i+15))
+
+      tmp0   = simde_mm256_unpackhi_epi8(e0_256[j],e1_256[j]); // e0(i+16) e1(i+16) e0(i+17) e1(i+17) .... e0(i+31) e1(i+31)
+      tmp1   = simde_mm256_unpackhi_epi8(e2_256[j],e3_256[j]); // e2(i+16) e3(i+16) e2(i+17) e3(i+17) .... e2(i+31) e3(i+31)
+      tmp2   = simde_mm256_unpackhi_epi8(e4_256[j],e5_256[j]); // e4(i+16) e5(i+16) e4(i+17) e5(i+17) .... e4(i+31) e5(i+31)
+      tmp3   = simde_mm256_unpackhi_epi8(e6_256[j],e7_256[j]); // e6(i+16) e7(i+16) e6(i+17) e7(i+17) .... e6(i+31) e7(i+31)
+      tmp4   = simde_mm256_unpacklo_epi16(tmp0,tmp1);  // e0(i+!6) e1(i+16) e2(i+16) e3(i+16) ... e0(i+23) e1(i+23) e2(i+23) e3(i+23)
+      tmp5   = simde_mm256_unpacklo_epi16(tmp2,tmp3);  // e4(i+16) e5(i+16) e6(i+16) e7(i+16) ... e4(i+23) e5(i+23) e6(i+23) e7(i+23)
+      f_256[k+4] = simde_mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i+16) e1(i+16) e2(i+16) e3(i+16) e4(i+16) e5(i+16) e6(i+16) e7(i+16)... e0(i+19) e1(i+19) e2(i+19) e3(i+19) e4(i+19) e5(i+19) e6(i+19) e7(i+19))
+      f_256[k+5] = simde_mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+20) e1(i+20) e2(i+20) e3(i+20) e4(i+20) e5(i+20) e6(i+20) e7(i+20)... e0(i+23) e1(i+23) e2(i+23) e3(i+23) e4(i+23) e5(i+23) e6(i+23) e7(i+23))
+
+      tmp4   = simde_mm256_unpackhi_epi16(tmp0,tmp1);  // e0(i+24) e1(i+24) e2(i+24) e3(i+24) ... e0(i+31) e1(i+31) e2(i+31) e3(i+31)
+      tmp5   = simde_mm256_unpackhi_epi16(tmp2,tmp3);  // e4(i+24) e5(i+24) e6(i+24) e7(i+24) ... e4(i+31) e5(i+31) e6(i+31) e7(i+31)
+      f_256[k+6] = simde_mm256_unpacklo_epi16(tmp4,tmp5);  // e0(i+24) e1(i+24) e2(i+24) e3(i+24) e4(i+24) e5(i+24) e6(i+24) e7(i+24)... e0(i+27) e1(i+27) e2(i+27) e3(i+27) e4(i+27) e5(i+27) e6(i+27) e7(i+27))
+      f_256[k+7] = simde_mm256_unpackhi_epi16(tmp4,tmp5);  // e0(i+28) e1(i+28) e2(i+28) e3(i+28) e4(i+28) e5(i+28) e6(i+28) e7(i+28)... e0(i+31) e1(i+31) e2(i+31) e3(i+31) e4(i+31) e5(i+31) e6(i+31) e7(i+31))
     }
     break;
   default: AssertFatal(1==0,"Should be here!\n");
@@ -386,6 +387,39 @@ void nr_deinterleaving_ldpc(uint32_t E, uint8_t Qm, int16_t *e,int16_t *f)
 
 }
 
+int nr_get_R_ldpc_decoder(int rvidx,
+                          int E,
+                          int BG,
+                          int Z,
+                          int *llrLen,
+                          int round) {
+  AssertFatal(BG == 1 || BG == 2, "Unknown BG %d\n", BG);
+
+  int Ncb = (BG==1)?(66*Z):(50*Z);
+  int infoBits = (index_k0[BG-1][rvidx] * Z + E);
+
+  if (round == 0) *llrLen = infoBits;
+  if (infoBits > Ncb) infoBits = Ncb;
+  if (infoBits > *llrLen) *llrLen = infoBits;
+
+  int sysBits = (BG==1)?(22*Z):(10*Z);
+  float decoderR = (float)sysBits/(infoBits + 2*Z);
+
+  if (BG == 2)
+    if (decoderR < 0.3333)
+      return 15;
+    else if (decoderR < 0.6667)
+      return 13;
+    else
+      return 23;
+  else
+    if (decoderR < 0.6667)
+      return 13;
+    else if (decoderR < 0.8889)
+      return 23;
+    else
+      return 89;
+}
 
 int nr_rate_matching_ldpc(uint32_t Tbslbrm,
                           uint8_t BG,
diff --git a/openair1/PHY/CODING/viterbi.c b/openair1/PHY/CODING/viterbi.c
index f44e2ebf8f203336cb8389d63cd00a77c5363c6e..6d900d452dfdf47d5280887dad0ae53f1ec340a5 100644
--- a/openair1/PHY/CODING/viterbi.c
+++ b/openair1/PHY/CODING/viterbi.c
@@ -193,7 +193,7 @@ void phy_viterbi_dot11_sse2(char *y,unsigned char *decoded_bytes,unsigned short
 
   __m128i *m0_ptr,*m1_ptr,*TB_ptr = &TB[offset<<2];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   uint8x16x2_t TB[2*4095*8];  // 2 int8x16_t per input bit, 8 bits / byte, 4095 is largest packet size in bytes
 
   uint8x16_t even0_30a,even0_30b,even32_62a,even32_62b,odd1_31a,odd1_31b,odd33_63a,odd33_63b,TBeven0_30,TBeven32_62,TBodd1_31,TBodd33_63;
@@ -224,7 +224,7 @@ void phy_viterbi_dot11_sse2(char *y,unsigned char *decoded_bytes,unsigned short
     metrics48_63 = _mm_setzero_si128();
   }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   if (offset == 0) {
     // set initial metrics
 
@@ -318,7 +318,7 @@ void phy_viterbi_dot11_sse2(char *y,unsigned char *decoded_bytes,unsigned short
     metrics16_31 = _mm_subs_epu8(metrics16_31,min_state);
     metrics32_47 = _mm_subs_epu8(metrics32_47,min_state);
     metrics48_63 = _mm_subs_epu8(metrics48_63,min_state);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     m0_ptr = (uint8x16_t *)&m0_table[table_offset];
     m1_ptr = (uint8x16_t *)&m1_table[table_offset];
 
diff --git a/openair1/PHY/CODING/viterbi_lte.c b/openair1/PHY/CODING/viterbi_lte.c
index 35c9a0d501d5216e9964f868b93d1204895820db..bebb5417bfba9bf62e5a69ecf6715442cc642c2c 100644
--- a/openair1/PHY/CODING/viterbi_lte.c
+++ b/openair1/PHY/CODING/viterbi_lte.c
@@ -136,7 +136,7 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n)
   
   __m128i min_state,min_state2;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   uint8x16x2_t TB[2*8192];  // 2 int8x16_t per input bit, 8 bits / byte, 8192 is largest packet size in bits
 
   uint8x16_t even0_30a,even0_30b,even32_62a,even32_62b,odd1_31a,odd1_31b,odd33_63a,odd33_63b,TBeven0_30,TBeven32_62,TBodd1_31,TBodd33_63;
@@ -165,7 +165,7 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n)
   metrics16_31 = _mm_setzero_si128();
   metrics32_47 = _mm_setzero_si128();
   metrics48_63 = _mm_setzero_si128();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     metrics0_31.val[0]  = vdupq_n_u8(0); 
     metrics0_31.val[1]  = vdupq_n_u8(0);
     metrics32_63.val[0] = vdupq_n_u8(0);
@@ -259,7 +259,7 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n)
       metrics16_31 = _mm_subs_epu8(metrics16_31,min_state);
       metrics32_47 = _mm_subs_epu8(metrics32_47,min_state);
       metrics48_63 = _mm_subs_epu8(metrics48_63,min_state);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     m0_ptr = (uint8x16_t *)&m0_table[table_offset];
     m1_ptr = (uint8x16_t *)&m1_table[table_offset];
 
@@ -353,7 +353,7 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n)
     }
 
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   for (s=0; s<16; s++)
     if (((uint8_t *)&metrics0_31.val[0])[s] > maxm) {
       maxm = ((uint8_t *)&metrics0_31.val[0])[s];
diff --git a/openair1/PHY/INIT/nr_init.c b/openair1/PHY/INIT/nr_init.c
index 55599b7cc383ac040b65adb2b1a7829f9d2d3fa7..9aa909f278fbd68fd348a41af6bd4f50c0e585d1 100644
--- a/openair1/PHY/INIT/nr_init.c
+++ b/openair1/PHY/INIT/nr_init.c
@@ -615,6 +615,9 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
   gNB->nr_csi_info->csi_gold_init = cfg->cell_config.phy_cell_id.value;
   nr_init_csi_rs(&gNB->frame_parms, gNB->nr_csi_info->nr_gold_csi_rs, cfg->cell_config.phy_cell_id.value);
 
+  //PRS init
+  nr_init_prs(gNB);
+
   for (int id=0; id<NUMBER_OF_NR_SRS_MAX; id++) {
     gNB->nr_srs_info[id] = (nr_srs_info_t *)malloc16_clear(sizeof(nr_srs_info_t));
     gNB->nr_srs_info[id]->srs_generated_signal = (int32_t**)malloc16_clear(MAX_NUM_NR_SRS_AP*sizeof(int32_t*));
@@ -661,11 +664,7 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
   int n_buf = Prx*max_ul_mimo_layers;
 
   int nb_re_pusch = N_RB_UL * NR_NB_SC_PER_RB;
-#ifdef __AVX2__
   int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
-#else
-  int nb_re_pusch2 = nb_re_pusch;
-#endif
 
   for (int ULSCH_id=0; ULSCH_id<gNB->number_of_nr_ulsch_max; ULSCH_id++) {
     pusch_vars[ULSCH_id] = (NR_gNB_PUSCH *)malloc16_clear( sizeof(NR_gNB_PUSCH) );
@@ -793,6 +792,17 @@ void phy_free_nr_gNB(PHY_VARS_gNB *gNB)
     free_and_zero(common_vars->beam_id[i]);
   }
 
+  for (int rsc=0; rsc < gNB->prs_vars.NumPRSResources; rsc++) {
+    for (int slot=0; slot<fp->slots_per_frame; slot++) {
+      for (int symb=0; symb<fp->symbols_per_slot; symb++) {
+        free_and_zero(gNB->nr_gold_prs[rsc][slot][symb]);
+      }
+      free_and_zero(gNB->nr_gold_prs[rsc][slot]);
+    }
+    free_and_zero(gNB->nr_gold_prs[rsc]);
+  }
+  free_and_zero(gNB->nr_gold_prs);
+
   /* Do NOT free per-antenna txdataF/rxdataF: the gNB gets a pointer to the
    * RU's txdataF/rxdataF, and the RU will free that */
   free_and_zero(common_vars->txdataF);
diff --git a/openair1/PHY/INIT/nr_init_ue.c b/openair1/PHY/INIT/nr_init_ue.c
index 64bbed571c82ebc0f90dfb1c1aedefb60eba9514..77502dfc12740dee046e15636c6cf18cd7d82fc1 100644
--- a/openair1/PHY/INIT/nr_init_ue.c
+++ b/openair1/PHY/INIT/nr_init_ue.c
@@ -33,6 +33,7 @@
 #include "PHY/NR_REFSIG/refsig_defs_ue.h"
 #include "PHY/NR_REFSIG/nr_refsig.h"
 #include "PHY/MODULATION/nr_modulation.h"
+#include "openair2/COMMON/prs_nr_paramdef.h"
 
 
 extern uint16_t beta_cqi[16];
@@ -135,6 +136,164 @@ void phy_term_nr_ue__PDSCH(NR_UE_PDSCH* pdsch, const NR_DL_FRAME_PARMS *const fp
   free_and_zero(pdsch->ptrs_re_per_slot);
 }
 
+void RCconfig_nrUE_prs(void *cfg)
+{
+  int j = 0, k = 0, gNB_id = 0;
+  char aprefix[MAX_OPTNAME_SIZE*2 + 8];
+  char str[7][100] = {'\0'}; int16_t n[7] = {0};
+  PHY_VARS_NR_UE *ue  = (PHY_VARS_NR_UE *)cfg;
+  prs_config_t *prs_config = NULL;
+
+  paramlist_def_t gParamList = {CONFIG_STRING_PRS_LIST,NULL,0};
+  paramdef_t gParams[] = PRS_GLOBAL_PARAMS_DESC;
+  config_getlist( &gParamList,gParams,sizeof(gParams)/sizeof(paramdef_t), NULL);
+  if (gParamList.numelt > 0)
+  {
+    ue->prs_active_gNBs = *(gParamList.paramarray[j][PRS_ACTIVE_GNBS_IDX].uptr);
+  }
+  else
+  {
+    LOG_E(PHY,"%s configuration NOT found..!! Skipped configuring UE for the PRS reception\n", CONFIG_STRING_PRS_CONFIG);
+  }
+
+  paramlist_def_t PRS_ParamList = {{0},NULL,0};
+  for(int i = 0; i < ue->prs_active_gNBs; i++)
+  {
+    paramdef_t PRS_Params[] = PRS_PARAMS_DESC;
+    sprintf(PRS_ParamList.listname, "%s%i", CONFIG_STRING_PRS_CONFIG, i);
+
+    sprintf(aprefix, "%s.[%i]", CONFIG_STRING_PRS_LIST, 0);
+    config_getlist( &PRS_ParamList,PRS_Params,sizeof(PRS_Params)/sizeof(paramdef_t), aprefix);
+
+    if (PRS_ParamList.numelt > 0) {
+      for (j = 0; j < PRS_ParamList.numelt; j++) {
+        gNB_id = *(PRS_ParamList.paramarray[j][PRS_GNB_ID].uptr);
+        if(gNB_id != i)  gNB_id = i; // force gNB_id to avoid mismatch
+
+        memset(n,0,sizeof(n));
+        ue->prs_vars[gNB_id]->NumPRSResources = *(PRS_ParamList.paramarray[j][NUM_PRS_RESOURCES].uptr);
+        for (k = 0; k < ue->prs_vars[gNB_id]->NumPRSResources; k++)
+        {
+          prs_config = &ue->prs_vars[gNB_id]->prs_resource[k].prs_cfg;
+          prs_config->PRSResourceSetPeriod[0]  = PRS_ParamList.paramarray[j][PRS_RESOURCE_SET_PERIOD_LIST].uptr[0];
+          prs_config->PRSResourceSetPeriod[1]  = PRS_ParamList.paramarray[j][PRS_RESOURCE_SET_PERIOD_LIST].uptr[1];
+          // per PRS resources parameters
+          prs_config->SymbolStart              = PRS_ParamList.paramarray[j][PRS_SYMBOL_START_LIST].uptr[k];
+          prs_config->NumPRSSymbols            = PRS_ParamList.paramarray[j][PRS_NUM_SYMBOLS_LIST].uptr[k];
+          prs_config->REOffset                 = PRS_ParamList.paramarray[j][PRS_RE_OFFSET_LIST].uptr[k];
+          prs_config->NPRSID                   = PRS_ParamList.paramarray[j][PRS_ID_LIST].uptr[k];
+          prs_config->PRSResourceOffset        = PRS_ParamList.paramarray[j][PRS_RESOURCE_OFFSET_LIST].uptr[k];
+          // Common parameters to all PRS resources
+          prs_config->NumRB                    = *(PRS_ParamList.paramarray[j][PRS_NUM_RB].uptr);
+          prs_config->RBOffset                 = *(PRS_ParamList.paramarray[j][PRS_RB_OFFSET].uptr);
+          prs_config->CombSize                 = *(PRS_ParamList.paramarray[j][PRS_COMB_SIZE].uptr);
+          prs_config->PRSResourceRepetition    = *(PRS_ParamList.paramarray[j][PRS_RESOURCE_REPETITION].uptr);
+          prs_config->PRSResourceTimeGap       = *(PRS_ParamList.paramarray[j][PRS_RESOURCE_TIME_GAP].uptr);
+
+          prs_config->MutingBitRepetition      = *(PRS_ParamList.paramarray[j][PRS_MUTING_BIT_REPETITION].uptr);
+          for (int l = 0; l < PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN1_LIST].numelt; l++)
+          {
+            prs_config->MutingPattern1[l]      = PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN1_LIST].uptr[l];
+            if (k == 0) // print only for 0th resource
+              n[5] += snprintf(str[5]+n[5],sizeof(str[5]),"%d, ",prs_config->MutingPattern1[l]);
+          }
+          for (int l = 0; l < PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN2_LIST].numelt; l++)
+          {
+            prs_config->MutingPattern2[l]      = PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN2_LIST].uptr[l];
+            if (k == 0) // print only for 0th resource
+              n[6] += snprintf(str[6]+n[6],sizeof(str[6]),"%d, ",prs_config->MutingPattern2[l]);
+          }
+
+          // print to buffer
+          n[0] += snprintf(str[0]+n[0],sizeof(str[0]),"%d, ",prs_config->SymbolStart);
+          n[1] += snprintf(str[1]+n[1],sizeof(str[1]),"%d, ",prs_config->NumPRSSymbols);
+          n[2] += snprintf(str[2]+n[2],sizeof(str[2]),"%d, ",prs_config->REOffset);
+          n[3] += snprintf(str[3]+n[3],sizeof(str[3]),"%d, ",prs_config->PRSResourceOffset);
+          n[4] += snprintf(str[4]+n[4],sizeof(str[4]),"%d, ",prs_config->NPRSID);
+        } // for k
+
+        prs_config = &ue->prs_vars[gNB_id]->prs_resource[0].prs_cfg;
+        LOG_I(PHY, "-----------------------------------------\n");
+        LOG_I(PHY, "PRS Config for gNB_id %d @ %p\n", gNB_id, prs_config);
+        LOG_I(PHY, "-----------------------------------------\n");
+        LOG_I(PHY, "NumPRSResources \t%d\n", ue->prs_vars[gNB_id]->NumPRSResources);
+        LOG_I(PHY, "PRSResourceSetPeriod \t[%d, %d]\n", prs_config->PRSResourceSetPeriod[0], prs_config->PRSResourceSetPeriod[1]);
+        LOG_I(PHY, "NumRB \t\t\t%d\n", prs_config->NumRB);
+        LOG_I(PHY, "RBOffset \t\t%d\n", prs_config->RBOffset);
+        LOG_I(PHY, "CombSize \t\t%d\n", prs_config->CombSize);
+        LOG_I(PHY, "PRSResourceRepetition \t%d\n", prs_config->PRSResourceRepetition);
+        LOG_I(PHY, "PRSResourceTimeGap \t%d\n", prs_config->PRSResourceTimeGap);
+        LOG_I(PHY, "MutingBitRepetition \t%d\n", prs_config->MutingBitRepetition);
+        LOG_I(PHY, "SymbolStart \t\t[%s\b\b]\n", str[0]);
+        LOG_I(PHY, "NumPRSSymbols \t\t[%s\b\b]\n", str[1]);
+        LOG_I(PHY, "REOffset \t\t[%s\b\b]\n", str[2]);
+        LOG_I(PHY, "PRSResourceOffset \t[%s\b\b]\n", str[3]);
+        LOG_I(PHY, "NPRS_ID \t\t[%s\b\b]\n", str[4]);
+        LOG_I(PHY, "MutingPattern1 \t\t[%s\b\b]\n", str[5]);
+        LOG_I(PHY, "MutingPattern2 \t\t[%s\b\b]\n", str[6]);
+        LOG_I(PHY, "-----------------------------------------\n");
+      }
+    }
+    else
+    {
+      LOG_E(PHY,"No %s configuration found..!!\n", PRS_ParamList.listname);
+    }
+  }
+}
+
+void init_nr_prs_ue_vars(PHY_VARS_NR_UE *ue)
+{
+  NR_UE_PRS   **const prs_vars = ue->prs_vars;
+  NR_DL_FRAME_PARMS *const fp  = &ue->frame_parms;
+
+  // PRS vars init
+  for(int idx = 0; idx < NR_MAX_PRS_COMB_SIZE; idx++)
+  {
+    prs_vars[idx]   = (NR_UE_PRS *)malloc16_clear(sizeof(NR_UE_PRS));
+    // PRS channel estimates
+
+    for(int k = 0; k < NR_MAX_PRS_RESOURCES_PER_SET; k++)
+    {
+      prs_vars[idx]->prs_resource[k].prs_meas = (prs_meas_t **)malloc16_clear( fp->nb_antennas_rx*sizeof(prs_meas_t *) );
+      AssertFatal((prs_vars[idx]->prs_resource[k].prs_meas!=NULL), "%s: PRS measurements malloc failed for gNB_id %d\n", __FUNCTION__, idx);
+
+      for (int j=0; j<fp->nb_antennas_rx; j++) {
+        prs_vars[idx]->prs_resource[k].prs_meas[j] = (prs_meas_t *)malloc16_clear(sizeof(prs_meas_t) );
+        AssertFatal((prs_vars[idx]->prs_resource[k].prs_meas[j]!=NULL), "%s: PRS measurements malloc failed for gNB_id %d, rx_ant %d\n", __FUNCTION__, idx, j);
+      }
+    }
+  }
+
+  // load the config file params
+  RCconfig_nrUE_prs(ue);
+
+  //PRS sequence init
+  ue->nr_gold_prs = (uint32_t *****)malloc16(ue->prs_active_gNBs*sizeof(uint32_t ****));
+  uint32_t *****prs = ue->nr_gold_prs;
+  AssertFatal(prs!=NULL, "%s: positioning reference signal malloc failed\n", __FUNCTION__);
+  for (int gnb = 0; gnb < ue->prs_active_gNBs; gnb++) {
+    prs[gnb] = (uint32_t ****)malloc16(ue->prs_vars[gnb]->NumPRSResources*sizeof(uint32_t ***));
+    AssertFatal(prs[gnb]!=NULL, "%s: positioning reference signal for gnb %d - malloc failed\n", __FUNCTION__, gnb);
+
+    for (int rsc = 0; rsc < ue->prs_vars[gnb]->NumPRSResources; rsc++) {
+      prs[gnb][rsc] = (uint32_t ***)malloc16(fp->slots_per_frame*sizeof(uint32_t **));
+      AssertFatal(prs[gnb][rsc]!=NULL, "%s: positioning reference signal for gnb %d rsc %d- malloc failed\n", __FUNCTION__, gnb, rsc);
+
+      for (int slot=0; slot<fp->slots_per_frame; slot++) {
+        prs[gnb][rsc][slot] = (uint32_t **)malloc16(fp->symbols_per_slot*sizeof(uint32_t *));
+        AssertFatal(prs[gnb][rsc][slot]!=NULL, "%s: positioning reference signal for gnb %d rsc %d slot %d - malloc failed\n", __FUNCTION__, gnb, rsc, slot);
+
+        for (int symb=0; symb<fp->symbols_per_slot; symb++) {
+          prs[gnb][rsc][slot][symb] = (uint32_t *)malloc16(NR_MAX_PRS_INIT_LENGTH_DWORD*sizeof(uint32_t));
+          AssertFatal(prs[gnb][rsc][slot][symb]!=NULL, "%s: positioning reference signal for gnb %d rsc %d slot %d symbol %d - malloc failed\n", __FUNCTION__, gnb, rsc, slot, symb);
+        } // for symb
+      } // for slot
+    } // for rsc
+  } // for gnb
+
+  init_nr_gold_prs(ue);
+}
+
 int init_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
 {
   // create shortcuts
@@ -146,7 +305,7 @@ int init_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
   NR_UE_CSI_RS **const csirs_vars        = ue->csirs_vars;
   NR_UE_SRS **const srs_vars             = ue->srs_vars;
 
-  int i, slot, symb, gNB_id, th_id;
+  int i, slot, symb, gNB_id;
 
   LOG_I(PHY, "Initializing UE vars for gNB TXant %u, UE RXant %u\n", fp->nb_antennas_tx, fp->nb_antennas_rx);
 
@@ -180,15 +339,13 @@ int init_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
   // init NR modulation lookup tables
   nr_generate_modulation_table();
 
-  /////////////////////////PUCCH init/////////////////////////
   ///////////
-  for (th_id = 0; th_id < RX_NB_TH_MAX; th_id++) {
-    for (gNB_id = 0; gNB_id < ue->n_connected_gNB; gNB_id++) {
-      ue->pucch_vars[th_id][gNB_id] = (NR_UE_PUCCH *)malloc16(sizeof(NR_UE_PUCCH));
-      for (i=0; i<2; i++)
-        ue->pucch_vars[th_id][gNB_id]->active[i] = false;
-    }
-  }
+  ////////////////////////////////////////////////////////////////////////////////////////////
+
+  ///////////////////////// PRS init /////////////////////////
+  ///////////
+
+  init_nr_prs_ue_vars(ue);
 
   ///////////
   ////////////////////////////////////////////////////////////////////////////////////////////
@@ -248,17 +405,11 @@ int init_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
 
   // init RX buffers
   common_vars->rxdata   = (int32_t **)malloc16( fp->nb_antennas_rx*sizeof(int32_t *) );
-
-  for (th_id=0; th_id<RX_NB_TH_MAX; th_id++) {
-    common_vars->common_vars_rx_data_per_thread[th_id].rxdataF  = (int32_t **)malloc16( fp->nb_antennas_rx*sizeof(int32_t *) );
-  }
+  common_vars->rxdataF  = (int32_t **)malloc16( fp->nb_antennas_rx*sizeof(int32_t *) );
 
   for (i=0; i<fp->nb_antennas_rx; i++) {
     common_vars->rxdata[i] = (int32_t *) malloc16_clear( (2*(fp->samples_per_frame)+2048)*sizeof(int32_t) );
-
-    for (th_id=0; th_id<RX_NB_TH_MAX; th_id++) {
-      common_vars->common_vars_rx_data_per_thread[th_id].rxdataF[i] = (int32_t *)malloc16_clear( sizeof(int32_t)*(fp->samples_per_slot_wCP) );
-    }
+    common_vars->rxdataF[i] = (int32_t *)malloc16_clear( sizeof(int32_t)*(fp->samples_per_slot_wCP) );
   }
 
   // ceil(((NB_RB<<1)*3)/32) // 3 RE *2(QPSK)
@@ -302,29 +453,19 @@ int init_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
 
   // DLSCH
   for (gNB_id = 0; gNB_id < ue->n_connected_gNB+1; gNB_id++) {
-    for (th_id=0; th_id<RX_NB_TH_MAX; th_id++) {
-      ue->pdsch_vars[th_id][gNB_id] = (NR_UE_PDSCH *)malloc16_clear(sizeof(NR_UE_PDSCH));
-    }
-    for (th_id=0; th_id<RX_NB_TH_MAX; th_id++) {
-      phy_init_nr_ue_PDSCH( ue->pdsch_vars[th_id][gNB_id], fp );
-    }
+    ue->pdsch_vars[gNB_id] = (NR_UE_PDSCH *)malloc16_clear(sizeof(NR_UE_PDSCH));
+    phy_init_nr_ue_PDSCH( ue->pdsch_vars[gNB_id], fp );
 
     int nb_codewords = NR_MAX_NB_LAYERS > 4 ? 2 : 1;
-    for (th_id=0; th_id<RX_NB_TH_MAX; th_id++) {
-      for (i=0; i<nb_codewords; i++) {
-        ue->pdsch_vars[th_id][gNB_id]->llr[i] = (int16_t *)malloc16_clear( (8*(3*8*8448))*sizeof(int16_t) );//Q_m = 8 bits/Sym, Code_Rate=3, Number of Segments =8, Circular Buffer K_cb = 8448
-      }
-      for (i=0; i<NR_MAX_NB_LAYERS; i++) {
-        ue->pdsch_vars[th_id][gNB_id]->layer_llr[i] = (int16_t *)malloc16_clear( (8*(3*8*8448))*sizeof(int16_t) );//Q_m = 8 bits/Sym, Code_Rate=3, Number of Segments =8, Circular Buffer K_cb = 8448
-      }
+    for (i=0; i<nb_codewords; i++) {
+      ue->pdsch_vars[gNB_id]->llr[i] = (int16_t *)malloc16_clear( (8*(3*8*8448))*sizeof(int16_t) );//Q_m = 8 bits/Sym, Code_Rate=3, Number of Segments =8, Circular Buffer K_cb = 8448
+    }
+    for (i=0; i<NR_MAX_NB_LAYERS; i++) {
+      ue->pdsch_vars[gNB_id]->layer_llr[i] = (int16_t *)malloc16_clear( (8*(3*8*8448))*sizeof(int16_t) );//Q_m = 8 bits/Sym, Code_Rate=3, Number of Segments =8, Circular Buffer K_cb = 8448
     }
   }
 
   for (gNB_id = 0; gNB_id < ue->n_connected_gNB; gNB_id++) {
-    for (th_id=0; th_id<RX_NB_TH_MAX; th_id++) {
-      ue->pdcch_vars[th_id][gNB_id] = (NR_UE_PDCCH *)malloc16_clear(sizeof(NR_UE_PDCCH));
-    }
-
     prach_vars[gNB_id] = (NR_UE_PRACH *)malloc16_clear(sizeof(NR_UE_PRACH));
     pbch_vars[gNB_id] = (NR_UE_PBCH *)malloc16_clear(sizeof(NR_UE_PBCH));
     csiim_vars[gNB_id] = (NR_UE_CSI_IM *)malloc16_clear(sizeof(NR_UE_CSI_IM));
@@ -355,10 +496,6 @@ int init_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
 
     ue->nr_srs_info = (nr_srs_info_t *)malloc16_clear(sizeof(nr_srs_info_t));
 
-    // RACH
-    prach_vars[gNB_id]->prachF             = (int16_t *)malloc16_clear( sizeof(int)*(7*2*sizeof(int)*(fp->ofdm_symbol_size*12)) );
-    prach_vars[gNB_id]->prach              = (int16_t *)malloc16_clear( sizeof(int)*(7*2*sizeof(int)*(fp->ofdm_symbol_size*12)) );
-
   }
 
   ue->sinr_CQI_dB = (double *) malloc16_clear( fp->N_RB_DL*12*sizeof(double) );
@@ -386,12 +523,6 @@ void term_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
   const NR_DL_FRAME_PARMS* fp = &ue->frame_parms;
   phy_term_nr_top();
 
-  for (int th_id = 0; th_id < RX_NB_TH_MAX; th_id++) {
-    for (int gNB_id = 0; gNB_id < ue->n_connected_gNB; gNB_id++) {
-      free_and_zero(ue->pucch_vars[th_id][gNB_id]);
-    }
-  }
-
   for (int slot = 0; slot < fp->slots_per_frame; slot++) {
     for (int symb = 0; symb < fp->symbols_per_slot; symb++) {
       for (int q=0; q<NR_NB_NSCID; q++)
@@ -414,12 +545,9 @@ void term_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
 
   for (int i = 0; i < fp->nb_antennas_rx; i++) {
     free_and_zero(common_vars->rxdata[i]);
-    for (int th_id = 0; th_id < RX_NB_TH_MAX; th_id++)
-      free_and_zero(common_vars->common_vars_rx_data_per_thread[th_id].rxdataF[i]);
-  }
-  for (int th_id = 0; th_id < RX_NB_TH_MAX; th_id++) {
-    free_and_zero(common_vars->common_vars_rx_data_per_thread[th_id].rxdataF);
+    free_and_zero(common_vars->rxdataF[i]);
   }
+  free_and_zero(common_vars->rxdataF);
   free_and_zero(common_vars->rxdata);
 
   for (int slot = 0; slot < fp->slots_per_frame; slot++) {
@@ -442,20 +570,14 @@ void term_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
   for (int gNB_id = 0; gNB_id < ue->n_connected_gNB+1; gNB_id++) {
 
     // PDSCH
-    for (int th_id = 0; th_id < RX_NB_TH_MAX; th_id++) {
-      free_and_zero(ue->pdsch_vars[th_id][gNB_id]->llr_shifts);
-      free_and_zero(ue->pdsch_vars[th_id][gNB_id]->llr128_2ndstream);
-      phy_term_nr_ue__PDSCH(ue->pdsch_vars[th_id][gNB_id], fp);
-      free_and_zero(ue->pdsch_vars[th_id][gNB_id]);
-    }
+    free_and_zero(ue->pdsch_vars[gNB_id]->llr_shifts);
+    free_and_zero(ue->pdsch_vars[gNB_id]->llr128_2ndstream);
+    phy_term_nr_ue__PDSCH(ue->pdsch_vars[gNB_id], fp);
+    free_and_zero(ue->pdsch_vars[gNB_id]);
   }
 
   for (int gNB_id = 0; gNB_id < ue->n_connected_gNB; gNB_id++) {
 
-    for (int th_id = 0; th_id < RX_NB_TH_MAX; th_id++) {
-      free_and_zero(ue->pdcch_vars[th_id][gNB_id]);
-    }
-
     for (int i=0; i<NR_MAX_NB_PORTS; i++) {
       free_and_zero(ue->nr_csi_info->csi_rs_generated_signal[i]);
     }
@@ -476,12 +598,40 @@ void term_nr_ue_signal(PHY_VARS_NR_UE *ue, int nb_connected_gNB)
     free_and_zero(ue->srs_vars[gNB_id]);
 
     free_and_zero(ue->pbch_vars[gNB_id]);
-
-    free_and_zero(ue->prach_vars[gNB_id]->prachF);
-    free_and_zero(ue->prach_vars[gNB_id]->prach);
     free_and_zero(ue->prach_vars[gNB_id]);
   }
 
+  for (int gnb = 0; gnb < ue->prs_active_gNBs; gnb++)
+  {
+    for (int rsc = 0; rsc < ue->prs_vars[gnb]->NumPRSResources; rsc++)
+    {
+      for (int slot=0; slot<fp->slots_per_frame; slot++)
+      {
+        for (int symb=0; symb<fp->symbols_per_slot; symb++)
+        {
+          free_and_zero(ue->nr_gold_prs[gnb][rsc][slot][symb]);
+        }
+        free_and_zero(ue->nr_gold_prs[gnb][rsc][slot]);
+      }
+      free_and_zero(ue->nr_gold_prs[gnb][rsc]);
+    }
+    free_and_zero(ue->nr_gold_prs[gnb]);
+  }
+  free_and_zero(ue->nr_gold_prs);
+
+  for(int idx = 0; idx < NR_MAX_PRS_COMB_SIZE; idx++)
+  {
+    for(int k = 0; k < NR_MAX_PRS_RESOURCES_PER_SET; k++)
+    {
+      for (int j=0; j<fp->nb_antennas_rx; j++)
+      {
+        free_and_zero(ue->prs_vars[idx]->prs_resource[k].prs_meas[j]);
+      }
+      free_and_zero(ue->prs_vars[idx]->prs_resource[k].prs_meas);
+    }
+
+    free_and_zero(ue->prs_vars[idx]);
+  }
   free_and_zero(ue->sinr_CQI_dB);
 }
 
@@ -490,11 +640,9 @@ void term_nr_ue_transport(PHY_VARS_NR_UE *ue)
   const int N_RB_DL = ue->frame_parms.N_RB_DL;
   for (int i = 0; i < NUMBER_OF_CONNECTED_gNB_MAX; i++) {
     for (int j = 0; j < 2; j++) {
-      for (int k = 0; k < RX_NB_TH_MAX; k++) {
-        free_nr_ue_dlsch(&ue->dlsch[k][i][j], N_RB_DL);
-        if (j==0)
-          free_nr_ue_ulsch(&ue->ulsch[k][i], N_RB_DL, &ue->frame_parms);
-      }
+      free_nr_ue_dlsch(&ue->dlsch[i][j], N_RB_DL);
+      if (j==0)
+        free_nr_ue_ulsch(&ue->ulsch[i], N_RB_DL, &ue->frame_parms);
     }
 
     free_nr_ue_dlsch(&ue->dlsch_SI[i], N_RB_DL);
@@ -510,13 +658,11 @@ void init_nr_ue_transport(PHY_VARS_NR_UE *ue) {
 
   for (int i = 0; i < NUMBER_OF_CONNECTED_gNB_MAX; i++) {
     for (int j=0; j<num_codeword; j++) {
-      for (int k=0; k<RX_NB_TH_MAX; k++) {
-        AssertFatal((ue->dlsch[k][i][j]  = new_nr_ue_dlsch(1,NR_MAX_DLSCH_HARQ_PROCESSES,NSOFT,ue->max_ldpc_iterations,ue->frame_parms.N_RB_DL))!=NULL,"Can't get ue dlsch structures\n");
-        LOG_D(PHY,"dlsch[%d][%d][%d] => %p\n",k,i,j,ue->dlsch[k][i][j]);
-        if (j==0) {
-          AssertFatal((ue->ulsch[k][i] = new_nr_ue_ulsch(ue->frame_parms.N_RB_UL, NR_MAX_ULSCH_HARQ_PROCESSES,&ue->frame_parms))!=NULL,"Can't get ue ulsch structures\n");
-          LOG_D(PHY,"ulsch[%d][%d] => %p\n",k,i,ue->ulsch[k][i]);
-        }
+      AssertFatal((ue->dlsch[i][j]  = new_nr_ue_dlsch(1,NR_MAX_DLSCH_HARQ_PROCESSES,NSOFT,ue->max_ldpc_iterations,ue->frame_parms.N_RB_DL))!=NULL,"Can't get ue dlsch structures\n");
+      LOG_D(PHY,"dlsch[%d][%d] => %p\n",i,j,ue->dlsch[i][j]);
+      if (j==0) {
+        AssertFatal((ue->ulsch[i] = new_nr_ue_ulsch(ue->frame_parms.N_RB_UL, NR_MAX_ULSCH_HARQ_PROCESSES,&ue->frame_parms))!=NULL,"Can't get ue ulsch structures\n");
+        LOG_D(PHY,"ulsch[%d] => %p\n",i,ue->ulsch[i]);
       }
     }
 
diff --git a/openair1/PHY/INIT/nr_parms.c b/openair1/PHY/INIT/nr_parms.c
index 458c8b34038069ceba57e0512f317c23006c3529..aef01413eeece7ffcd607e45d4530c8083d5d505 100644
--- a/openair1/PHY/INIT/nr_parms.c
+++ b/openair1/PHY/INIT/nr_parms.c
@@ -212,12 +212,12 @@ uint32_t get_samples_per_slot(int slot, NR_DL_FRAME_PARMS* fp)
 uint32_t get_slot_from_timestamp(openair0_timestamp timestamp_rx, NR_DL_FRAME_PARMS* fp)
 {
    uint32_t slot_idx = 0;
-   int samples_till_the_slot = 0;
+   int samples_till_the_slot = fp->get_samples_per_slot(slot_idx,fp)-1;
    timestamp_rx = timestamp_rx%fp->samples_per_frame;
 
     while (timestamp_rx > samples_till_the_slot) {
-        samples_till_the_slot += fp->get_samples_per_slot(slot_idx,fp);
         slot_idx++;
+        samples_till_the_slot += fp->get_samples_per_slot(slot_idx,fp);
      }
    return slot_idx; 
 }
diff --git a/openair1/PHY/INIT/phy_init.h b/openair1/PHY/INIT/phy_init.h
index 29940a4fbe99bd851d5fd7300470169e2775879f..2b11371de8f02eb02f1a2260a5e2681f91202b04 100644
--- a/openair1/PHY/INIT/phy_init.h
+++ b/openair1/PHY/INIT/phy_init.h
@@ -416,6 +416,9 @@ void fill_subframe_mask(PHY_VARS_eNB *eNB);
 void init_DLSCH_struct(PHY_VARS_gNB *gNB, processingData_L1tx_t *msg);
 void reset_DLSCH_struct(const PHY_VARS_gNB *gNB, processingData_L1tx_t *msg);
 
+void RCconfig_nrUE_prs(void *cfg);
+void init_nr_prs_ue_vars(PHY_VARS_NR_UE *ue);
+
 /** @} */
 #endif
 
diff --git a/openair1/PHY/LTE_ESTIMATION/freq_equalization.c b/openair1/PHY/LTE_ESTIMATION/freq_equalization.c
index 6d382bdd633c95870aed567cdb6b471b76a7ef8d..e864e1cfc7ccd7f6e721ae0b4f48763b13c3227a 100644
--- a/openair1/PHY/LTE_ESTIMATION/freq_equalization.c
+++ b/openair1/PHY/LTE_ESTIMATION/freq_equalization.c
@@ -301,7 +301,7 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
   rxdataF_comp128   = (__m128i *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
   ul_ch_mag128      = (__m128i *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
   ul_ch_magb128      = (__m128i *)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128;
   rxdataF_comp128   = (int16x8_t*)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
   ul_ch_mag128      = (int16x8_t*)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
@@ -330,7 +330,7 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
       ul_ch_mag128[re]  = _mm_set1_epi16(316);  // this is 512*4/sqrt(42)
       ul_ch_magb128[re] = _mm_set1_epi16(158);  // this is 512*2/sqrt(42)
     }
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxdataF_comp128[re] = vmulq_s16(rxdataF_comp128[re],inv_ch[amp]);
 
     if (Qm==4)
diff --git a/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c b/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
index d221348b44f38a9e7f2375a47a3c63db7b3d2a9c..fc77d4ae186a76887dfb4d2f9c5cc5593f4aa776 100644
--- a/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c
@@ -701,7 +701,7 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue,
       }
   }
 
-  T(T_UE_PHY_DL_CHANNEL_ESTIMATE, T_INT(eNB_id),
+  T(T_UE_PHY_DL_CHANNEL_ESTIMATE, T_INT(eNB_id), T_INT(0),
     T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].frame_rx%1024), T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].subframe_rx),
     T_INT(0), T_BUFFER(&ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][0][0], 512  * 4));
   return(0);
diff --git a/openair1/PHY/LTE_ESTIMATION/lte_est_freq_offset.c b/openair1/PHY/LTE_ESTIMATION/lte_est_freq_offset.c
index 80d9fc9cc3144576420b87459bb53185a5bd1498..22282fea5e83edbefaeaa231afd2ede71a64e1e2 100644
--- a/openair1/PHY/LTE_ESTIMATION/lte_est_freq_offset.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_est_freq_offset.c
@@ -30,7 +30,7 @@
 
 #if defined(__x86_64__) || defined(__i386__)
 __m128i avg128F;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 int32x4_t avg128F;
 #endif
 
@@ -42,7 +42,7 @@ int dl_channel_level(int16_t *dl_ch,
   int16_t rb;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x4_t *dl_ch128;
 #endif
   int avg;
@@ -61,7 +61,7 @@ int dl_channel_level(int16_t *dl_ch,
     dl_ch128+=3;
 
   }
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   avg128F = vdupq_n_s32(0);
   dl_ch128=(int16x4_t *)dl_ch;
 
diff --git a/openair1/PHY/LTE_ESTIMATION/lte_sync_time.c b/openair1/PHY/LTE_ESTIMATION/lte_sync_time.c
index b21f526fcc05851a0253edf585983aafb395e52f..80238a58ffaa62c65c3342e828df6f57a9c07516 100644
--- a/openair1/PHY/LTE_ESTIMATION/lte_sync_time.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_sync_time.c
@@ -131,14 +131,6 @@ void lte_sync_time_free(void) {
 }
 
 
-static inline int abs32(int x) {
-  return (((int)((short*)&x)[0])*((int)((short*)&x)[0]) + ((int)((short*)&x)[1])*((int)((short*)&x)[1]));
-}
-
-static inline double absF(struct complexd x) {
-  return x.r*x.r+x.i*x.i;
-}
-
 #define complexNull(c) bzero((void*) &(c), sizeof(c))
 
 #define SHIFT 17
@@ -196,16 +188,12 @@ int lte_sync_time(int **rxdata, ///rx data in time domain
     // calculate the absolute value of sync_corr[n]
 
     for (s=0; s<3; s++) {
-      double tmp = absF(sync_out[s]) + absF(sync_out2[s]);
+      double tmp = squaredMod(sync_out[s]) + squaredMod(sync_out2[s]);
 
       if (tmp>peak_val) {
         peak_val = tmp;
         peak_pos = n;
         sync_source = s;
-        /*
-        printf("s %d: n %d sync_out %d, sync_out2  %d (sync_corr %d,%d), (%d,%d) (%d,%d)\n",s,n,abs32(sync_out[s]),abs32(sync_out2[s]),sync_corr_ue0[n],
-               sync_corr_ue0[n+length],((int16_t*)&sync_out[s])[0],((int16_t*)&sync_out[s])[1],((int16_t*)&sync_out2[s])[0],((int16_t*)&sync_out2[s])[1]);
-        */
       }
     }
   }
@@ -337,7 +325,7 @@ int lte_sync_time_eNB(int32_t **rxdata, ///rx data in time domain
         result = dot_product((short *)primary_synch_time, (short *) &(rxdata[ar][n]), frame_parms->ofdm_symbol_size, SHIFT);
         //((short*)sync_corr)[2*n]   += ((short*) &result)[0];
         //((short*)sync_corr)[2*n+1] += ((short*) &result)[1];
-        sync_corr_eNB[n] += abs32(result);
+        sync_corr_eNB[n] += squaredMod(*(c16_t*)&result);
       }
     }
 
@@ -368,12 +356,6 @@ int lte_sync_time_eNB(int32_t **rxdata, ///rx data in time domain
 }
 
 
-static inline int64_t abs64(int64_t x) {
-  return (((int64_t)((int32_t *)&x)[0])*((int64_t)((int32_t *)&x)[0]) + ((int64_t)
-          ((int32_t *)&x)[1])*((int64_t)((int32_t *)&x)[1]));
-}
-
-
 int ru_sync_time(RU_t *ru,
                  int64_t *lev,
                  int64_t *avg) {
@@ -426,7 +408,7 @@ int ru_sync_time(RU_t *ru,
                                 shift);
       }
 
-      dmrs_corr += abs64(result);
+      dmrs_corr += squaredMod(*(c32_t*)&result);
     }
 
     if (ru->dmrs_corr != NULL)
diff --git a/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c b/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c
index 880c056578e6a4a0dc2fe7f7b108e798cacbb204..16b818e90f4fa89dcc1b2dd24958f6886ebd8617 100644
--- a/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c
@@ -769,7 +769,7 @@ void dlsch_channel_level_TM34_meas(int *ch00,
   _mm_empty();
   _m_empty();
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 }
@@ -1019,7 +1019,7 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
   unsigned int limit,subband;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch0_128,*dl_ch1_128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *dl_ch0_128, *dl_ch1_128;
 #endif
   int *dl_ch0,*dl_ch1;
@@ -1218,7 +1218,7 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
 
         dl_ch0_128    = (__m128i *)&ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][aarx][4];
         dl_ch1_128    = (__m128i *)&ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][2+aarx][4];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         int32x4_t pmi128_re,pmi128_im,mmtmpPMI0,mmtmpPMI1,mmtmpPMI0b,mmtmpPMI1b;
 
         dl_ch0_128    = (int16x8_t *)&ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[subframe]].dl_ch_estimates[eNB_id][aarx][4];
@@ -1233,7 +1233,7 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
 
           pmi128_re = _mm_xor_si128(pmi128_re,pmi128_re);
           pmi128_im = _mm_xor_si128(pmi128_im,pmi128_im);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
           pmi128_re = vdupq_n_s32(0);
           pmi128_im = vdupq_n_s32(0);
@@ -1291,7 +1291,7 @@ void lte_ue_measurements(PHY_VARS_UE *ue,
             pmi128_im = _mm_add_epi32(pmi128_im,mmtmpPMI1);
             //print_ints(" pmi128_im 1 ",&pmi128_im);*/
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
             mmtmpPMI0 = vmull_s16(((int16x4_t*)dl_ch0_128)[0], ((int16x4_t*)dl_ch1_128)[0]);
             mmtmpPMI1 = vmull_s16(((int16x4_t*)dl_ch0_128)[1], ((int16x4_t*)dl_ch1_128)[1]);
diff --git a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
index 45c25b59ea11187be4b57d4b08aca88a3da810c2..380dce2c4fc9cf79e604df6ae7df1b54a397a083 100644
--- a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
+++ b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c
@@ -76,7 +76,7 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF128,*ul_ref128,*ul_ch128;
   __m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF128,*ul_ref128,*ul_ch128;
   int32x4_t mmtmp0,mmtmp1,mmtmp_re,mmtmp_im;
 #endif
@@ -120,7 +120,7 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
       rxdataF128 = (__m128i *)&rxdataF_ext[aa][symbol_offset];
       ul_ch128   = (__m128i *)&ul_ch_estimates[aa][symbol_offset];
       ul_ref128  = (__m128i *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF128 = (int16x8_t *)&rxdataF_ext[aa][symbol_offset];
       ul_ch128   = (int16x8_t *)&ul_ch_estimates[aa][symbol_offset];
       ul_ref128  = (int16x8_t *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
@@ -167,7 +167,7 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
         mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
         mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
         ul_ch128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         mmtmp0 = vmull_s16(((int16x4_t *)ul_ref128)[0],((int16x4_t *)rxdataF128)[0]);
         mmtmp1 = vmull_s16(((int16x4_t *)ul_ref128)[1],((int16x4_t *)rxdataF128)[1]);
         mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
@@ -412,7 +412,7 @@ int32_t lte_ul_channel_estimation_RRU(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF128,*ul_ref128,*ul_ch128;
   __m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF128,*ul_ref128,*ul_ch128;
   int32x4_t mmtmp0,mmtmp1,mmtmp_re,mmtmp_im;
 #endif
@@ -448,7 +448,7 @@ int32_t lte_ul_channel_estimation_RRU(LTE_DL_FRAME_PARMS *frame_parms,
     rxdataF128 = (__m128i *)&rxdataF_ext[aa][symbol_offset];
     ul_ch128   = (__m128i *)&ul_ch_estimates[aa][symbol_offset];
     ul_ref128  = (__m128i *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxdataF128 = (int16x8_t *)&rxdataF_ext[aa][symbol_offset];
     ul_ch128   = (int16x8_t *)&ul_ch_estimates[aa][symbol_offset];
     ul_ref128  = (int16x8_t *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
@@ -495,7 +495,7 @@ int32_t lte_ul_channel_estimation_RRU(LTE_DL_FRAME_PARMS *frame_parms,
       mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
       mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
       ul_ch128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       mmtmp0 = vmull_s16(((int16x4_t *)ul_ref128)[0],((int16x4_t *)rxdataF128)[0]);
       mmtmp1 = vmull_s16(((int16x4_t *)ul_ref128)[1],((int16x4_t *)rxdataF128)[1]);
       mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
@@ -906,7 +906,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
     phase_idx = -phase_idx;
 
   return(phase_idx);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   return(0);
 #endif
 }
diff --git a/openair1/PHY/LTE_TRANSPORT/if4_tools.c b/openair1/PHY/LTE_TRANSPORT/if4_tools.c
index 09c0b4ba0f80a39ddf83c403035e92506b0b9062..a161a3ba28becae269e1f152a0f3ecd8baa5c6be 100644
--- a/openair1/PHY/LTE_TRANSPORT/if4_tools.c
+++ b/openair1/PHY/LTE_TRANSPORT/if4_tools.c
@@ -38,8 +38,7 @@
 //#include "SCHED/sched_eNB.h"
 lte_subframe_t subframe_select(LTE_DL_FRAME_PARMS *frame_parms,uint8_t subframe);
 
-//#include "targets/ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 #include "common/utils/LOG/vcd_signal_dumper.h"
 
 const uint8_t lin2alaw_if4p5[65536] = {213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 211, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 210, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 221, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 217, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 196, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 195, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 205, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 203, 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 245, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 244, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 247, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 246, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 241, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 240, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 243, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 248, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 237, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 238, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 232, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 235, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 151, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 146, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 152, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 129, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 142, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 180, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 183, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 182, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 179, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 188, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 191, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 186, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 166, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 175, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 174, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 169, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 168, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 125, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85};
diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c b/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c
index 5a203ab331683c5d9d7d9f98ae0551266114b5cb..51e4d40a3392c2330b711e0cd96a0d3f376518c0 100644
--- a/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c
+++ b/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c
@@ -624,17 +624,9 @@ unsigned int  ulsch_decoding(PHY_VARS_eNB *eNB,
     }
     */
 #if defined(__x86_64__) || defined(__i386__)
-#ifndef __AVX2__
-    ((__m128i *)cseq)[i2++] = ((__m128i *)unscrambling_lut)[(s&65535)<<1];
-    ((__m128i *)cseq)[i2++] = ((__m128i *)unscrambling_lut)[1+((s&65535)<<1)];
-    s>>=16;
-    ((__m128i *)cseq)[i2++] = ((__m128i *)unscrambling_lut)[(s&65535)<<1];
-    ((__m128i *)cseq)[i2++] = ((__m128i *)unscrambling_lut)[1+((s&65535)<<1)];
-#else
     ((__m256i *)cseq)[i2++] = ((__m256i *)unscrambling_lut)[s&65535];
     ((__m256i *)cseq)[i2++] = ((__m256i *)unscrambling_lut)[(s>>16)&65535];
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     ((int16x8_t *)cseq)[i2++] = ((int16x8_t *)unscrambling_lut)[(s&65535)<<1];
     ((int16x8_t *)cseq)[i2++] = ((int16x8_t *)unscrambling_lut)[1+((s&65535)<<1)];
     s>>=16;
@@ -973,14 +965,9 @@ unsigned int  ulsch_decoding(PHY_VARS_eNB *eNB,
 
     /* To be improved according to alignment of j2
     #if defined(__x86_64__)||defined(__i386__)
-    #ifndef __AVX2__
-    for (iprime=0; iprime<G;iprime+=8,j2+=8)
-      *((__m128i *)&ulsch_harq->e[iprime]) = *((__m128i *)&y[j2]);
-    #else
     for (iprime=0; iprime<G;iprime+=16,j2+=16)
       *((__m256i *)&ulsch_harq->e[iprime]) = *((__m256i *)&y[j2]);
-    #endif
-    #elif defined(__arm__)
+    #elif defined(__arm__) || defined(__aarch64__)
     for (iprime=0; iprime<G;iprime+=8,j2+=8)
       *((int16x8_t *)&ulsch_harq->e[iprime]) = *((int16x8_t *)&y[j2]);
     #endif
diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c
index 3529df8e44504435ef2d7948925dbdde41423976..39d1a86e5baed98b017872f7b1178f813262901e 100644
--- a/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c
+++ b/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c
@@ -55,7 +55,7 @@ void lte_idft(LTE_DL_FRAME_PARMS *frame_parms,uint32_t *z, uint16_t Msc_PUSCH) {
 #if defined(__x86_64__) || defined(__i386__)
   __m128i idft_in128[3][1200],idft_out128[3][1200];
   __m128i norm128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t idft_in128[3][1200],idft_out128[3][1200];
   int16x8_t norm128;
 #endif
@@ -117,7 +117,7 @@ void lte_idft(LTE_DL_FRAME_PARMS *frame_parms,uint32_t *z, uint16_t Msc_PUSCH) {
       * &(((__m128i *)z11)[i])=_mm_sign_epi16( *&(((__m128i *)z11)[i]),*(__m128i *)&conjugate2[0]);
     }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     * &(((int16x8_t *)z0)[i])=vmulq_s16( *&(((int16x8_t *)z0)[i]),*(int16x8_t *)&conjugate2[0]);
     * &(((int16x8_t *)z1)[i])=vmulq_s16( *&(((int16x8_t *)z1)[i]),*(int16x8_t *)&conjugate2[0]);
     * &(((int16x8_t *)z2)[i])=vmulq_s16( *&(((int16x8_t *)z2)[i]),*(int16x8_t *)&conjugate2[0]);
@@ -162,7 +162,7 @@ void lte_idft(LTE_DL_FRAME_PARMS *frame_parms,uint32_t *z, uint16_t Msc_PUSCH) {
       dft(DFT_12,(int16_t *)idft_in2,(int16_t *)idft_out2,0);
 #if defined(__x86_64__)||defined(__i386__)
       norm128 = _mm_set1_epi16(9459);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       norm128 = vdupq_n_s16(9459);
 #endif
 
@@ -171,7 +171,7 @@ void lte_idft(LTE_DL_FRAME_PARMS *frame_parms,uint32_t *z, uint16_t Msc_PUSCH) {
         ((__m128i *)idft_out0)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i *)idft_out0)[i],norm128),1);
         ((__m128i *)idft_out1)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i *)idft_out1)[i],norm128),1);
         ((__m128i *)idft_out2)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i *)idft_out2)[i],norm128),1);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         ((int16x8_t *)idft_out0)[i] = vqdmulhq_s16(((int16x8_t *)idft_out0)[i],norm128);
         ((int16x8_t *)idft_out1)[i] = vqdmulhq_s16(((int16x8_t *)idft_out1)[i],norm128);
         ((int16x8_t *)idft_out2)[i] = vqdmulhq_s16(((int16x8_t *)idft_out2)[i],norm128);
@@ -430,7 +430,7 @@ void lte_idft(LTE_DL_FRAME_PARMS *frame_parms,uint32_t *z, uint16_t Msc_PUSCH) {
       ((__m128i *)z11)[i]=_mm_sign_epi16(((__m128i *)z11)[i],*(__m128i *)&conjugate2[0]);
     }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     * &(((int16x8_t *)z0)[i])=vmulq_s16( *&(((int16x8_t *)z0)[i]),*(int16x8_t *)&conjugate2[0]);
     * &(((int16x8_t *)z1)[i])=vmulq_s16( *&(((int16x8_t *)z1)[i]),*(int16x8_t *)&conjugate2[0]);
     * &(((int16x8_t *)z2)[i])=vmulq_s16( *&(((int16x8_t *)z2)[i]),*(int16x8_t *)&conjugate2[0]);
@@ -470,7 +470,7 @@ int32_t ulsch_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxF=(__m128i *)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
   __m128i **llrp128 = (__m128i **)llrp;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF= (int16x8_t *)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
   int16x8_t **llrp128 = (int16x8_t **)llrp;
 #endif
@@ -503,7 +503,7 @@ void ulsch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
   __m128i mmtmpU0;
   __m128i **llrp128=(__m128i **)llrp;
   ch_mag =(__m128i *)&ul_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF=(int16x8_t *)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
   int16x8_t *ch_mag;
   int16x8_t xmm0;
@@ -518,7 +518,7 @@ void ulsch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     (*llrp128)[0] = _mm_unpacklo_epi32(rxF[i],mmtmpU0);
     (*llrp128)[1] = _mm_unpackhi_epi32(rxF[i],mmtmpU0);
     (*llrp128)+=2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm0 = vabsq_s16(rxF[i]);
     xmm0 = vqsubq_s16(ch_mag[i],xmm0);
     (*llrp16)[0] = vgetq_lane_s16(rxF[i],0);
@@ -565,7 +565,7 @@ void ulsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
   __m128i mmtmpU1,mmtmpU2;
   ch_mag =(__m128i *)&ul_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
   ch_magb =(__m128i *)&ul_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF=(int16x8_t *)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
   int16x8_t *ch_mag,*ch_magb;
   int16x8_t mmtmpU1,mmtmpU2;
@@ -595,7 +595,7 @@ void ulsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     (*llrp32)[9]  = _mm_extract_epi32(rxF[i],3);
     (*llrp32)[10] = _mm_extract_epi32(mmtmpU1,3);
     (*llrp32)[11] = _mm_extract_epi32(mmtmpU2,3);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     mmtmpU1 = vabsq_s16(rxF[i]);
     mmtmpU1 = vqsubq_s16(ch_mag[i],mmtmpU1);
     mmtmpU2 = vabsq_s16(mmtmpU1);
@@ -633,7 +633,7 @@ void ulsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
   __m128i *rxdataF_comp128_1=NULL,*ul_ch_mag128_1=NULL,*ul_ch_mag128_1b=NULL;
   __m128i *rxdataF_comp128_2=NULL,*ul_ch_mag128_2=NULL,*ul_ch_mag128_2b=NULL;
   __m128i *rxdataF_comp128_3=NULL,*ul_ch_mag128_3=NULL,*ul_ch_mag128_3b=NULL;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*ul_ch_mag128_0,*ul_ch_mag128_0b;
   int16x8_t *rxdataF_comp128_1,*ul_ch_mag128_1,*ul_ch_mag128_1b;
   int16x8_t *rxdataF_comp128_2,*ul_ch_mag128_2,*ul_ch_mag128_2b;
@@ -682,7 +682,7 @@ void ulsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
         ul_ch_mag128_0b[i]   = _mm_srai_epi16(_mm_adds_epi16(ul_ch_mag128_0b[i],_mm_adds_epi16(ul_ch_mag128_1b[i],_mm_adds_epi16(ul_ch_mag128_2b[i],ul_ch_mag128_3b[i]))),2);
         rxdataF_comp128_0[i] = _mm_add_epi16(rxdataF_comp128_0[i],(*(__m128i *)&jitterc[0]));
       }
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
     ul_ch_mag128_0      = (int16x8_t *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
@@ -786,7 +786,7 @@ void ulsch_channel_compensation(int32_t **rxdataF_ext,
   uint8_t aarx;//,symbol_mod;
   __m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x4_t *ul_ch128,*rxdataF128;
   int16x8_t *ul_ch_mag128,*ul_ch_mag128b,*rxdataF_comp128;
   uint8_t aarx;//,symbol_mod;
@@ -803,7 +803,7 @@ void ulsch_channel_compensation(int32_t **rxdataF_ext,
     ul_ch_mag128b     = (__m128i *)&ul_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12];
     rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128   = (__m128i *)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     ul_ch128          = (int16x4_t *)&ul_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
     ul_ch_mag128      = (int16x8_t *)&ul_ch_mag[aarx][symbol*frame_parms->N_RB_DL*12];
     ul_ch_mag128b     = (int16x8_t *)&ul_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12];
@@ -826,7 +826,7 @@ void ulsch_channel_compensation(int32_t **rxdataF_ext,
       mmtmpU1 = _mm_packs_epi32(mmtmpU0,mmtmpU0);
       ul_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpU1,mmtmpU1);
       //LOG_I(PHY,"comp: ant %d symbol %d rb %d => %d,%d,%d (output_shift %d)\n",aarx,symbol,rb,*((int16_t *)&ul_ch_mag128[0]),*((int16_t *)&ul_ch_mag128[1]),*((int16_t *)&ul_ch_mag128[2]),output_shift);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       mmtmpU0 = vmull_s16(ul_ch128[0], ul_ch128[0]);
       mmtmpU0 = vqshlq_s32(vqaddq_s32(mmtmpU0,vrev64q_s32(mmtmpU0)),-output_shift128);
       mmtmpU1 = vmull_s16(ul_ch128[1], ul_ch128[1]);
@@ -917,7 +917,7 @@ void ulsch_channel_compensation(int32_t **rxdataF_ext,
       ul_ch_mag128b+=3;
       rxdataF128+=3;
       rxdataF_comp128+=3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       mmtmpU0 = vmull_s16(ul_ch128[0], rxdataF128[0]);
       //mmtmpU0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])]
       mmtmpU1 = vmull_s16(ul_ch128[1], rxdataF128[1]);
@@ -986,7 +986,7 @@ void ulsch_channel_level(int32_t **drs_ch_estimates_ext,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *ul_ch128;
   __m128 avg128U;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t avg128U;
   int16x4_t *ul_ch128;
 #endif
@@ -1004,7 +1004,7 @@ void ulsch_channel_level(int32_t **drs_ch_estimates_ext,
       ul_ch128+=3;
     }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     avg128U = vdupq_n_s32(0);
     ul_ch128=(int16x4_t *)drs_ch_estimates_ext[aarx];
 
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c
index 0d8c3c9960b594508417371e03a78db11c06eff1..44168dfa95f45158ff86b727c8b9a10b99b7cfc0 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c
@@ -356,7 +356,7 @@ void pdcch_channel_level(int32_t **dl_ch_estimates_ext,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128;
   __m128i avg128P;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *dl_ch128;
   int32x4_t *avg128P;
 #else
@@ -370,9 +370,9 @@ void pdcch_channel_level(int32_t **dl_ch_estimates_ext,
 #if defined(__x86_64__) || defined(__i386__)
       avg128P = _mm_setzero_si128();
       dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
      dl_ch128=&dl_ch_estimates_ext[(aatx<<1)+aarx][0];
-#error __arm__ not yet implemented, cannot build __FILE__
+#error __arm__ or __aarch64__ not yet implemented, cannot build __FILE__
 #else
       dl_ch128=&dl_ch_estimates_ext[(aatx<<1)+aarx][0];
 #error Unsupported CPU architecture, cannot build __FILE__
@@ -383,7 +383,7 @@ void pdcch_channel_level(int32_t **dl_ch_estimates_ext,
         avg128P = _mm_add_epi32(avg128P,_mm_madd_epi16(dl_ch128[0],dl_ch128[0]));
         avg128P = _mm_add_epi32(avg128P,_mm_madd_epi16(dl_ch128[1],dl_ch128[1]));
         avg128P = _mm_add_epi32(avg128P,_mm_madd_epi16(dl_ch128[2],dl_ch128[2]));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #else
 #error Unsupported CPU architecture, cannot build __FILE__
 #endif
@@ -427,7 +427,7 @@ void pdcch_detection_mrc_i(LTE_DL_FRAME_PARMS *frame_parms,
 
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1;
 #else
 #error Unsupported CPU architecture, cannot build __FILE__
@@ -441,7 +441,7 @@ void pdcch_detection_mrc_i(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
       rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
       rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
       rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
 #else
@@ -451,7 +451,7 @@ void pdcch_detection_mrc_i(LTE_DL_FRAME_PARMS *frame_parms,
       for (i=0; i<frame_parms->N_RB_DL*3; i++) {
 #if defined(__x86_64__) || defined(__i386__)
         rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
 #endif
       }
@@ -460,14 +460,14 @@ void pdcch_detection_mrc_i(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
     rho128_0 = (__m128i *) &rho[0][symbol*frame_parms->N_RB_DL*12];
     rho128_1 = (__m128i *) &rho[1][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rho128_0 = (int16x8_t *) &rho[0][symbol*frame_parms->N_RB_DL*12];
     rho128_1 = (int16x8_t *) &rho[1][symbol*frame_parms->N_RB_DL*12];
 #endif
     for (i=0; i<frame_parms->N_RB_DL*3; i++) {
 #if defined(__x86_64__) || defined(__i386__)
       rho128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rho128_0[i],1),_mm_srai_epi16(rho128_1[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rho128_0[i] = vhaddq_s16(rho128_0[i],rho128_1[i]);
 #endif
     }
@@ -477,7 +477,7 @@ void pdcch_detection_mrc_i(LTE_DL_FRAME_PARMS *frame_parms,
     rho128_i1 = (__m128i *) &rho_i[1][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_i0   = (__m128i *)&rxdataF_comp_i[0][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_i1   = (__m128i *)&rxdataF_comp_i[1][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rho128_i0 = (int16x8_t*) &rho_i[0][symbol*frame_parms->N_RB_DL*12];
     rho128_i1 = (int16x8_t*) &rho_i[1][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_i0   = (int16x8_t *)&rxdataF_comp_i[0][symbol*frame_parms->N_RB_DL*12];
@@ -489,7 +489,7 @@ void pdcch_detection_mrc_i(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
       rxdataF_comp128_i0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_i0[i],1),_mm_srai_epi16(rxdataF_comp128_i1[i],1));
       rho128_i0[i]          = _mm_adds_epi16(_mm_srai_epi16(rho128_i0[i],1),_mm_srai_epi16(rho128_i1[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_i0[i] = vhaddq_s16(rxdataF_comp128_i0[i],rxdataF_comp128_i1[i]);
       rho128_i0[i]          = vhaddq_s16(rho128_i0[i],rho128_i1[i]);
 
@@ -989,7 +989,7 @@ void pdcch_channel_compensation(int32_t **rxdataF_ext,
   __m128i *dl_ch128,*rxdataF128,*rxdataF_comp128;
   __m128i *dl_ch128_2, *rho128;
   __m128i mmtmpPD0,mmtmpPD1,mmtmpPD2,mmtmpPD3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   uint8_t aatx,aarx,pilots=0;
@@ -1014,7 +1014,7 @@ void pdcch_channel_compensation(int32_t **rxdataF_ext,
       rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
       rxdataF_comp128   = (__m128i *)&rxdataF_comp[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -1093,7 +1093,7 @@ void pdcch_channel_compensation(int32_t **rxdataF_ext,
           rxdataF128+=2;
           rxdataF_comp128+=2;
         }
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
       }
@@ -1110,7 +1110,7 @@ void pdcch_channel_compensation(int32_t **rxdataF_ext,
       dl_ch128      = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
       dl_ch128_2    = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
       for (rb=0; rb<frame_parms->N_RB_DL; rb++) {
@@ -1205,7 +1205,7 @@ void pdcch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
 
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128_0,*rxdataF_comp128_1;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
  int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1;
 #endif
   int32_t i;
@@ -1215,7 +1215,7 @@ void pdcch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
       rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
       rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[(aatx<<1)][symbol*frame_parms->N_RB_DL*12];
       rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[(aatx<<1)+1][symbol*frame_parms->N_RB_DL*12];
 #endif
@@ -1223,7 +1223,7 @@ void pdcch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
       for (i=0; i<frame_parms->N_RB_DL*3; i++) {
 #if defined(__x86_64__) || defined(__i386__)
         rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
 #endif
       }
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_decoding.c b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_decoding.c
index 39900113b7bf3ebcf83fd5cdcebf56cd4db8f6f2..ca5d6d28defb4b70090bf0d26d3785914117af95 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_decoding.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_decoding.c
@@ -173,7 +173,7 @@ uint32_t  dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
 #ifdef DEBUG_DLSCH_DECODING
   uint16_t i;
 #endif
-  //#ifdef __AVX2__
+  //#ifdef __WASAVX2__
 #if 0
   int Kr_last,skipped_last=0;
   uint8_t (*tc_2cw)(int16_t *y,
@@ -402,7 +402,7 @@ uint32_t  dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
     printf("%d : %d\n",i,harq_process->d[r][96+i]);
     printf("\n");
     */
-    //#ifndef __AVX2__
+    //#ifndef __WASAVX2__
 #if 1
 
     if (err_flag == 0) {
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c
index 8f5e33d6e002467a8c13a3518d4bfafc554e7d2d..b9b442f6f5bbaffbf5cab95add77a31121eab932 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c
@@ -1453,7 +1453,7 @@ void dlsch_channel_compensation(int **rxdataF_ext,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   unsigned short rb;
   unsigned char aatx,aarx,symbol_mod,pilots=0;
   int16x4_t *dl_ch128,*dl_ch128_2,*rxdataF128;
@@ -1863,7 +1863,7 @@ void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1) {
   _mm_empty();
   _m_empty();
 }
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1) {
   // sqrt(2) is already taken into account in computation sqrt_rho_a, sqrt_rho_b,
   //so removed it
@@ -2159,7 +2159,7 @@ void dlsch_channel_compensation_TM56(int **rxdataF_ext,
   measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength,measurements->n0_power_tot);
   //printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol,
   //   measurements->precoded_cqi_dB[eNB_id][0]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   uint32_t rb,Nre;
   uint32_t aarx,symbol_mod,pilots=0;
   int16x4_t *dl_ch0_128,*dl_ch1_128,*rxdataF128;
@@ -2714,7 +2714,7 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms,
   //  measurements->precoded_cqi_dB[eNB_id][0]);
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   unsigned short rb,Nre;
   unsigned char aarx,symbol_mod,pilots=0;
   int precoded_signal_strength0=0,precoded_signal_strength1=0, rx_power_correction;
@@ -3064,7 +3064,7 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 }
 
@@ -3140,7 +3140,7 @@ void dlsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   unsigned char aatx;
   int i;
   int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1,
@@ -3366,7 +3366,7 @@ void dlsch_scale_channel(int **dl_ch_estimates_ext,
     }
   }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 }
 
@@ -3433,7 +3433,7 @@ void dlsch_channel_level(int **dl_ch_estimates_ext,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   short rb;
   unsigned char aatx,aarx,nre=12,symbol_mod;
   int32x4_t avg128D;
@@ -3533,7 +3533,7 @@ void dlsch_channel_level_core(int **dl_ch_estimates_ext,
   _mm_empty();
   _m_empty();
   /* FIXME This part needs to be adapted like the one above */
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   short rb;
   unsigned char aatx,aarx,nre=12,symbol_mod;
   int32x4_t avg128D;
@@ -3627,7 +3627,7 @@ void dlsch_channel_level_median(int **dl_ch_estimates_ext,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   short rb;
   unsigned char aatx,aarx,nre=12,symbol_mod;
   int32x4_t norm128D;
@@ -3997,7 +3997,7 @@ void dlsch_channel_aver_band(int **dl_ch_estimates_ext,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 }
 
@@ -4369,7 +4369,7 @@ void dlsch_channel_level_TM34(int **dl_ch_estimates_ext,
   avg_1[0] = avg_0[0];
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 }
 
@@ -4438,7 +4438,7 @@ void dlsch_channel_level_TM56(int **dl_ch_estimates_ext,
   avg[0] = cmax(avg[0],avg[1]);
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 }
 
@@ -4501,7 +4501,7 @@ void dlsch_channel_level_TM7(int **dl_bf_ch_estimates_ext,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 }
 //#define ONE_OVER_2_Q15 16384
@@ -4585,7 +4585,7 @@ void dlsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms,
 
   _mm_empty();
   _m_empty();
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 }
 
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation.c b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation.c
index 43f3b5451fc2de1fe86590983631395a9bb3a62d..b1ff7357daf712f96db063735dcc49b9cefbb9cd 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation.c
@@ -618,7 +618,7 @@ __m128i tmp_result4 __attribute__ ((aligned(16)));
 // calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
 #define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq)  tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,3); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,3); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -886,7 +886,7 @@ void qam16_llr(int16_t *stream0_in,
   __m128i *ch_mag_128 = (__m128i*)chan_magn;
   __m128i llr128[2];
   int32_t *llr32 = (int32_t*) llr;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF_128 = (int16x8_t*)stream0_in;
   int16x8_t *ch_mag_128 = (int16x8_t*)chan_magn;
   int16x8_t xmm0;
@@ -912,7 +912,7 @@ void qam16_llr(int16_t *stream0_in,
     llr32[6] = _mm_extract_epi32(llr128[1],2); //((uint32_t *)&llr128[1])[2];
     llr32[7] = _mm_extract_epi32(llr128[1],3); //((uint32_t *)&llr128[1])[3];
     llr32+=8;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm0 = vabsq_s16(rxF[i]);
     xmm0 = vqsubq_s16(ch_mag[i],xmm0);
     // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2
@@ -1139,7 +1139,7 @@ void qam64_llr(int16_t *stream0_in,
   __m128i *rxF_128 = (__m128i*)stream0_in;
   __m128i *ch_mag_128 = (__m128i*)chan_magn;
   __m128i *ch_magb_128 = (__m128i*)chan_magn_b;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF_128 = (int16x8_t*)stream0_in;
   int16x8_t *ch_mag_128 = (int16x8_t*)chan_magn;
   int16x8_t *ch_magb_128 = (int16x8_t*)chan_magn_b;
@@ -1158,7 +1158,7 @@ void qam64_llr(int16_t *stream0_in,
     xmm1 = _mm_subs_epi16(ch_mag_128[i],xmm1);
     xmm2 = _mm_abs_epi16(xmm1);
     xmm2 = _mm_subs_epi16(ch_magb_128[i],xmm2);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm1 = vabsq_s16(rxF_128[i]);
     xmm1 = vsubq_s16(ch_mag_128[i],xmm1);
     xmm2 = vabsq_s16(xmm1);
@@ -1184,7 +1184,7 @@ void qam64_llr(int16_t *stream0_in,
     llr[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1];
     llr[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j];
     llr[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr[2] = vgetq_lane_s16(xmm1,0);
     llr[3] = vgetq_lane_s16(xmm1,1);//((short *)&xmm1)[j+1];
     llr[4] = vgetq_lane_s16(xmm2,0);//((short *)&xmm2)[j];
@@ -1199,7 +1199,7 @@ void qam64_llr(int16_t *stream0_in,
     llr[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1];
     llr[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j];
     llr[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr[2] = vgetq_lane_s16(xmm1,2);
     llr[3] = vgetq_lane_s16(xmm1,3);//((short *)&xmm1)[j+1];
     llr[4] = vgetq_lane_s16(xmm2,2);//((short *)&xmm2)[j];
@@ -1214,7 +1214,7 @@ void qam64_llr(int16_t *stream0_in,
     llr[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1];
     llr[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j];
     llr[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr[2] = vgetq_lane_s16(xmm1,4);
     llr[3] = vgetq_lane_s16(xmm1,5);//((short *)&xmm1)[j+1];
     llr[4] = vgetq_lane_s16(xmm2,4);//((short *)&xmm2)[j];
@@ -1228,7 +1228,7 @@ void qam64_llr(int16_t *stream0_in,
     llr[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1];
     llr[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j];
     llr[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr[2] = vgetq_lane_s16(xmm1,6);
     llr[3] = vgetq_lane_s16(xmm1,7);//((short *)&xmm1)[j+1];
     llr[4] = vgetq_lane_s16(xmm2,6);//((short *)&xmm2)[j];
@@ -1500,7 +1500,7 @@ void qpsk_qpsk(short *stream0_in,
   __m128i *stream1_128i_in = (__m128i *)stream1_in;
   __m128i *stream0_128i_out = (__m128i *)stream0_out;
   __m128i ONE_OVER_SQRT_8 = _mm_set1_epi16(23170); //round(2^16/sqrt(8))
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rho01_128i = (int16x8_t *)rho01;
   int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in;
   int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in;
@@ -1536,7 +1536,7 @@ void qpsk_qpsk(short *stream0_in,
     // divide by sqrt(8), no shift needed ONE_OVER_SQRT_8 = Q1.16
     rho_rpi = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_8);
     rho_rmi = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_8);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 
 #endif
@@ -1560,7 +1560,7 @@ void qpsk_qpsk(short *stream0_in,
 
     y0r_over2  = _mm_srai_epi16(y0r,1);   // divide by 2
     y0i_over2  = _mm_srai_epi16(y0i,1);   // divide by 2
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 
 #endif
@@ -1762,7 +1762,7 @@ void qpsk_qam16(int16_t *stream0_in,
   __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15)
   __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15)
   __m128i ch_mag_int __attribute__((aligned(16)));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rho01_128i = (int16x8_t *)rho01;
   int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in;
   int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in;
@@ -1937,7 +1937,7 @@ void qpsk_qam16(int16_t *stream0_in,
     if (i<((length>>1) - 1)) // false if only 2 REs remain
       stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -2050,7 +2050,7 @@ void qpsk_qam64(short *stream0_in,
   __m128i ch_mag_int_with_sigma2;
   __m128i two_ch_mag_int_with_sigma2;
   __m128i three_ch_mag_int_with_sigma2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -2235,7 +2235,7 @@ void qpsk_qam64(short *stream0_in,
     if (i<((length>>1) - 1)) // false if only 2 REs remain
       stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -2311,7 +2311,7 @@ void qam16_qpsk(short *stream0_in,
   __m128i ch_mag_over_10;
   __m128i ch_mag_over_2;
   __m128i ch_mag_9_over_10;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -2702,7 +2702,7 @@ void qam16_qpsk(short *stream0_in,
     stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
     stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -2820,7 +2820,7 @@ void qam16_qam16(short *stream0_in,
   __m128i ch_mag_over_10;
   __m128i ch_mag_over_2;
   __m128i ch_mag_9_over_10;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -3254,7 +3254,7 @@ void qam16_qam16(short *stream0_in,
     stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2);
     stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
     stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -3385,7 +3385,7 @@ void qam16_qam64(int16_t *stream0_in,
   __m128i two_ch_mag_int_with_sigma2;
   __m128i three_ch_mag_int_with_sigma2;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   int i;
@@ -3887,7 +3887,7 @@ void qam16_qam64(int16_t *stream0_in,
     stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2);
     stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
     stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -4068,7 +4068,7 @@ void qam64_qpsk(int16_t *stream0_in,
   __m128i  y0i_three_over_sqrt_21;
   __m128i  y0i_five_over_sqrt_21;
   __m128i  y0i_seven_over_sqrt_21;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -5453,7 +5453,7 @@ void qam64_qpsk(int16_t *stream0_in,
     stream0_out[j + 45] = ((short *)&y0i)[7];
     stream0_out[j + 46] = ((short *)&y1i)[7];
     stream0_out[j + 47] = ((short *)&y2i)[7];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -5593,7 +5593,7 @@ void qam64_qam16(short *stream0_in,
   __m128i  y0i_five_over_sqrt_21;
   __m128i  y0i_seven_over_sqrt_21;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   int i,j;
@@ -6993,7 +6993,7 @@ void qam64_qam16(short *stream0_in,
     stream0_out[j + 46] = ((short *)&y1i)[7];
     stream0_out[j + 47] = ((short *)&y2i)[7];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -7139,7 +7139,7 @@ void qam64_qam64(short *stream0_in,
   __m128i ch_mag_int_with_sigma2;
   __m128i two_ch_mag_int_with_sigma2;
   __m128i three_ch_mag_int_with_sigma2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -8803,7 +8803,7 @@ void qam64_qam64(short *stream0_in,
     stream0_out[j + 46] = ((short *)&y1i)[7];
     stream0_out[j + 47] = ((short *)&y2i)[7];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -8878,8 +8878,6 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
              llr16,
              pllr_symbol);*/
 
-#ifdef __AVX2__
-
   // Round length up to multiple of 16 words
   uint32_t len256i = ((len+16)>>4)*16;
   int32_t *rxF_256i      = (int32_t*) malloc16_clear(len256i*4);
@@ -8908,16 +8906,6 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
   free16(ch_mag_i_256i, sizeof(ch_mag_i_256i));
   free16(rho_256i, sizeof(rho_256i));
 
-#else
-  qam64_qam64((short *)rxF,
-              (short *)rxF_i,
-              (short *)ch_mag,
-              (short *)ch_mag_i,
-              (short *)llr16,
-              (short *)rho,
-              len);
-#endif
-
   llr16 += (6*len);
   //*llr16p = (short *)llr16;
 
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c
index f6938f32343badabe40defad97b45b19802e928e..719295a5efc88be3059b6d628c452ed18690de10 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c
@@ -589,20 +589,20 @@ static __m256i tmp_result4 __attribute__ ((aligned(32)));
 // Auxiliary Makros
 
 // calculate interference magnitude
-#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result2 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result = _mm256_and_si256(tmp_result,c1); tmp_result2 = _mm256_and_si256(tmp_result2,c2); int_mag = _mm256_or_si256(tmp_result,tmp_result2);
+#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = simde_mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result2 = simde_mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result = simde_mm256_and_si256(tmp_result,c1); tmp_result2 = simde_mm256_and_si256(tmp_result2,c2); int_mag = simde_mm256_or_si256(tmp_result,tmp_result2);
 
 // calculate interference magnitude
 // tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6
-#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm256_cmpgt_epi16(int_two_ch_mag,psi); tmp_result3 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result2 = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result = _mm256_xor_si256(tmp_result,tmp_result2); tmp_result4 = _mm256_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm256_xor_si256(tmp_result3,tmp_result4); tmp_result = _mm256_and_si256(tmp_result,c3); tmp_result2 = _mm256_and_si256(tmp_result2,c1); tmp_result3 = _mm256_and_si256(tmp_result3,c5); tmp_result4 = _mm256_and_si256(tmp_result4,c7); tmp_result = _mm256_or_si256(tmp_result,tmp_result2); tmp_result3 = _mm256_or_si256(tmp_result3,tmp_result4); a = _mm256_or_si256(tmp_result,tmp_result3);
+#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = simde_mm256_cmpgt_epi16(int_two_ch_mag,psi); tmp_result3 = simde_mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result2 = simde_mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result = simde_mm256_xor_si256(tmp_result,tmp_result2); tmp_result4 = simde_mm256_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = simde_mm256_xor_si256(tmp_result3,tmp_result4); tmp_result = simde_mm256_and_si256(tmp_result,c3); tmp_result2 = simde_mm256_and_si256(tmp_result2,c1); tmp_result3 = simde_mm256_and_si256(tmp_result3,c5); tmp_result4 = simde_mm256_and_si256(tmp_result4,c7); tmp_result = simde_mm256_or_si256(tmp_result,tmp_result2); tmp_result3 = simde_mm256_or_si256(tmp_result3,tmp_result4); a = simde_mm256_or_si256(tmp_result,tmp_result3);
 
 // calculates psi_a = psi_r*a_r + psi_i*a_i
-#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm256_mulhi_epi16(psi_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); psi_a = _mm256_adds_epi16(tmp_result,tmp_result2);
+#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = simde_mm256_mulhi_epi16(psi_r,a_r); tmp_result = simde_mm256_slli_epi16(tmp_result,1); tmp_result2 = simde_mm256_mulhi_epi16(psi_i,a_i); tmp_result2 = simde_mm256_slli_epi16(tmp_result2,1); psi_a = simde_mm256_adds_epi16(tmp_result,tmp_result2);
 
 // calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor
-#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2);
+#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = simde_mm256_mulhi_epi16(a_r,a_r); tmp_result = simde_mm256_slli_epi16(tmp_result,1); tmp_result = simde_mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = simde_mm256_slli_epi16(tmp_result,1); tmp_result = simde_mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = simde_mm256_slli_epi16(tmp_result,1); tmp_result2 = simde_mm256_mulhi_epi16(a_i,a_i); tmp_result2 = simde_mm256_slli_epi16(tmp_result2,1); tmp_result2 = simde_mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = simde_mm256_slli_epi16(tmp_result2,1); tmp_result2 = simde_mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = simde_mm256_slli_epi16(tmp_result2,1); a_sq = simde_mm256_adds_epi16(tmp_result,tmp_result2);
 
 // calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
-#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq)  tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,3); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,3); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2);
+#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq)  tmp_result = simde_mm256_mulhi_epi16(a_r,a_r); tmp_result = simde_mm256_slli_epi16(tmp_result,1); tmp_result = simde_mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = simde_mm256_slli_epi16(tmp_result,3); tmp_result = simde_mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = simde_mm256_slli_epi16(tmp_result,1); tmp_result2 = simde_mm256_mulhi_epi16(a_i,a_i); tmp_result2 = simde_mm256_slli_epi16(tmp_result2,1); tmp_result2 = simde_mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = simde_mm256_slli_epi16(tmp_result2,3); tmp_result2 = simde_mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = simde_mm256_slli_epi16(tmp_result2,1); a_sq = simde_mm256_adds_epi16(tmp_result,tmp_result2);
 
 void seperate_real_imag_parts(__m256i *out_re,
                               __m256i *out_im,
@@ -612,24 +612,24 @@ void seperate_real_imag_parts(__m256i *out_re,
     __m256i tmp0;
     __m256i tmp1;
 
-    in0 = _mm256_shufflelo_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    in0 = _mm256_shufflehi_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    in0 = _mm256_shuffle_epi32(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in0 = simde_mm256_shufflelo_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in0 = simde_mm256_shufflehi_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in0 = simde_mm256_shuffle_epi32(in0,0xd8); //_MM_SHUFFLE(0,2,1,3));
 
-    in1 = _mm256_shufflelo_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    in1 = _mm256_shufflehi_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    in1 = _mm256_shuffle_epi32(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in1 = simde_mm256_shufflelo_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in1 = simde_mm256_shufflehi_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    in1 = simde_mm256_shuffle_epi32(in1,0xd8); //_MM_SHUFFLE(0,2,1,3));
 
     //in0 = [Re(0,1,2,3)   Im(0,1,2,3)   Re(4,5,6,7)     Im(4,5,6,7)]
     //in0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)]
 
-    tmp0 = _mm256_unpacklo_epi64(in0, in1);
+    tmp0 = simde_mm256_unpacklo_epi64(in0, in1);
     //axmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)]
-    tmp0 = _mm256_permute4x64_epi64(tmp0,0xd8); // Re(rho)
+    tmp0 = simde_mm256_permute4x64_epi64(tmp0,0xd8); // Re(rho)
 
-    tmp1 = _mm256_unpackhi_epi64(in0, in1);
+    tmp1 = simde_mm256_unpackhi_epi64(in0, in1);
     //axmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)]
-    tmp1 = _mm256_permute4x64_epi64(tmp1,0xd8); // Im(rho)
+    tmp1 = simde_mm256_permute4x64_epi64(tmp1,0xd8); // Im(rho)
 
     *out_re = tmp0;
     *out_im = tmp1;
@@ -668,22 +668,22 @@ void qam64_qam16_avx2(short *stream0_in,
   __m256i *ch_mag_256i     = (__m256i *)ch_mag;
   __m256i *ch_mag_256i_i   = (__m256i *)ch_mag_i;
 
-  __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16)
-  __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16)
-  __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15)
-  __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(5/sqrt(42)*2^15)
-  __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14
-  __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14
-  __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15)
-  __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14
-  __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15)
-  __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15)
-  __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15)
-  __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15)
-  __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15)
-  __m256i ONE_OVER_SQRT_10_Q15 = _mm256_broadcastw_epi16(_mm_set1_epi16(10362)); // round(1/sqrt(10)*2^15)
-  __m256i THREE_OVER_SQRT_10 = _mm256_broadcastw_epi16(_mm_set1_epi16(31086)); // round(3/sqrt(10)*2^15)
-  __m256i SQRT_10_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(25905)); // round(sqrt(10)/4*2^15)
+  __m256i ONE_OVER_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16)
+  __m256i THREE_OVER_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16)
+  __m256i FIVE_OVER_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15)
+  __m256i SEVEN_OVER_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(5/sqrt(42)*2^15)
+  __m256i FORTYNINE_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14
+  __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14
+  __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15)
+  __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14
+  __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15)
+  __m256i NINE_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15)
+  __m256i THIRTEEN_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15)
+  __m256i FIVE_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15)
+  __m256i ONE_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15)
+  __m256i ONE_OVER_SQRT_10_Q15 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(10362)); // round(1/sqrt(10)*2^15)
+  __m256i THREE_OVER_SQRT_10 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(31086)); // round(3/sqrt(10)*2^15)
+  __m256i SQRT_10_OVER_FOUR = simde_mm256_broadcastw_epi16(_mm_set1_epi16(25905)); // round(sqrt(10)/4*2^15)
 
 
   __m256i ch_mag_int;
@@ -706,7 +706,7 @@ void qam64_qam16_avx2(short *stream0_in,
   __m256i  y0i_five_over_sqrt_21;
   __m256i  y0i_seven_over_sqrt_21;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   int i,j;
@@ -732,360 +732,360 @@ void qam64_qam16_avx2(short *stream0_in,
       */
     seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]);
 
-    rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
-    rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+    rho_rpi = simde_mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = simde_mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
 
     // Compute the different rhos
-    rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
-    rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
-    rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
-    rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
-    rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
-    rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
-    rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
-    rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
-
-    rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1);
-    rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1);
-    rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2);
-    rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
-    xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
-    xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
-    xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
-    xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
-    xmm7 = _mm256_slli_epi16(xmm7, 1);
-    xmm8 = _mm256_slli_epi16(xmm8, 2);
-
-    rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6);
-    rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6);
-    rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7);
-    rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7);
-    rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8);
-    rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
-    rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5);
-    rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5);
-    rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7);
-    rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7);
-    rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8);
-    rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
-    xmm4 = _mm256_slli_epi16(xmm4, 1);
-    rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5);
-    rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5);
-    rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6);
-    rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6);
-    rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8);
-    rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
-    xmm4 = _mm256_slli_epi16(xmm4, 2);
-    rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5);
-    rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5);
-    rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6);
-    rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6);
-    rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7);
-    rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_1 = simde_mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
+    rho_rmi_1_1 = simde_mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
+    rho_rpi_3_3 = simde_mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
+    rho_rmi_3_3 = simde_mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
+    rho_rpi_5_5 = simde_mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
+    rho_rmi_5_5 = simde_mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
+    rho_rpi_7_7 = simde_mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
+    rho_rmi_7_7 = simde_mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
+
+    rho_rpi_5_5 = simde_mm256_slli_epi16(rho_rpi_5_5, 1);
+    rho_rmi_5_5 = simde_mm256_slli_epi16(rho_rmi_5_5, 1);
+    rho_rpi_7_7 = simde_mm256_slli_epi16(rho_rpi_7_7, 2);
+    rho_rmi_7_7 = simde_mm256_slli_epi16(rho_rmi_7_7, 2);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
+    xmm5 = simde_mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
+    xmm6 = simde_mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
+    xmm7 = simde_mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
+    xmm8 = simde_mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
+    xmm7 = simde_mm256_slli_epi16(xmm7, 1);
+    xmm8 = simde_mm256_slli_epi16(xmm8, 2);
+
+    rho_rpi_1_3 = simde_mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_1_3 = simde_mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_1_5 = simde_mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_1_5 = simde_mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_7 = simde_mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_1_7 = simde_mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
+    rho_rpi_3_1 = simde_mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_3_1 = simde_mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_3_5 = simde_mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_3_5 = simde_mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_3_7 = simde_mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_3_7 = simde_mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
+    xmm4 = simde_mm256_slli_epi16(xmm4, 1);
+    rho_rpi_5_1 = simde_mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_5_1 = simde_mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_5_3 = simde_mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_5_3 = simde_mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_5_7 = simde_mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_5_7 = simde_mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
+    xmm4 = simde_mm256_slli_epi16(xmm4, 2);
+    rho_rpi_7_1 = simde_mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_7_1 = simde_mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_7_3 = simde_mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_7_3 = simde_mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_7_5 = simde_mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_7_5 = simde_mm256_subs_epi16(xmm4, xmm7);
 
     // Rearrange interfering MF output
     /*
     xmm0 = stream1_128i_in[i];
     xmm1 = stream1_128i_in[i+1];
-    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
     //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
     //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
-    y1r = _mm256_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
-    y1i = _mm256_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
+    y1r = simde_mm256_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
+    y1i = simde_mm256_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
     */
 
     seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]);
 
     // Psi_r calculation from rho_rpi or rho_rmi
-    xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r);
-    psi_r_p7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r);
-    psi_r_p7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r);
-    psi_r_p7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r);
-    psi_r_p7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r);
-    psi_r_p7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r);
-    psi_r_p7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r);
-    psi_r_p7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r);
-    psi_r_p7_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r);
-    psi_r_p5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r);
-    psi_r_p5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r);
-    psi_r_p5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r);
-    psi_r_p5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r);
-    psi_r_p5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r);
-    psi_r_p5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r);
-    psi_r_p5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r);
-    psi_r_p5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r);
-    psi_r_p3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r);
-    psi_r_p3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r);
-    psi_r_p3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r);
-    psi_r_p3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r);
-    psi_r_p3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r);
-    psi_r_p3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r);
-    psi_r_p3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r);
-    psi_r_p3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r);
-    psi_r_p1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r);
-    psi_r_p1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r);
-    psi_r_p1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r);
-    psi_r_p1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r);
-    psi_r_p1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r);
-    psi_r_p1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r);
-    psi_r_p1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r);
-    psi_r_p1_m7 = _mm256_abs_epi16(xmm2);
-
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r);
-    psi_r_m1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r);
-    psi_r_m1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r);
-    psi_r_m1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r);
-    psi_r_m1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r);
-    psi_r_m1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r);
-    psi_r_m1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r);
-    psi_r_m1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r);
-    psi_r_m1_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r);
-    psi_r_m3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r);
-    psi_r_m3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r);
-    psi_r_m3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r);
-    psi_r_m3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r);
-    psi_r_m3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r);
-    psi_r_m3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r);
-    psi_r_m3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r);
-    psi_r_m3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r);
-    psi_r_m5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r);
-    psi_r_m5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r);
-    psi_r_m5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r);
-    psi_r_m5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r);
-    psi_r_m5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r);
-    psi_r_m5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r);
-    psi_r_m5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r);
-    psi_r_m5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r);
-    psi_r_m7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r);
-    psi_r_m7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r);
-    psi_r_m7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r);
-    psi_r_m7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r);
-    psi_r_m7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r);
-    psi_r_m7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r);
-    psi_r_m7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r);
-    psi_r_m7_m7 = _mm256_abs_epi16(xmm2);
+    xmm0 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_7, y1r);
+    psi_r_p7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_5, y1r);
+    psi_r_p7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_3, y1r);
+    psi_r_p7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_1, y1r);
+    psi_r_p7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_1, y1r);
+    psi_r_p7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_3, y1r);
+    psi_r_p7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_5, y1r);
+    psi_r_p7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_7, y1r);
+    psi_r_p7_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_7, y1r);
+    psi_r_p5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_5, y1r);
+    psi_r_p5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_3, y1r);
+    psi_r_p5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_1, y1r);
+    psi_r_p5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_1, y1r);
+    psi_r_p5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_3, y1r);
+    psi_r_p5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_5, y1r);
+    psi_r_p5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_7, y1r);
+    psi_r_p5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_7, y1r);
+    psi_r_p3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_5, y1r);
+    psi_r_p3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_3, y1r);
+    psi_r_p3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_1, y1r);
+    psi_r_p3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_1, y1r);
+    psi_r_p3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_3, y1r);
+    psi_r_p3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_5, y1r);
+    psi_r_p3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_7, y1r);
+    psi_r_p3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_7, y1r);
+    psi_r_p1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_5, y1r);
+    psi_r_p1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_3, y1r);
+    psi_r_p1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_1, y1r);
+    psi_r_p1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_1, y1r);
+    psi_r_p1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_3, y1r);
+    psi_r_p1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_5, y1r);
+    psi_r_p1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_7, y1r);
+    psi_r_p1_m7 = simde_mm256_abs_epi16(xmm2);
+
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_7, y1r);
+    psi_r_m1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_5, y1r);
+    psi_r_m1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_3, y1r);
+    psi_r_m1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_1, y1r);
+    psi_r_m1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_1, y1r);
+    psi_r_m1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_3, y1r);
+    psi_r_m1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_5, y1r);
+    psi_r_m1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_7, y1r);
+    psi_r_m1_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_7, y1r);
+    psi_r_m3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_5, y1r);
+    psi_r_m3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_3, y1r);
+    psi_r_m3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_1, y1r);
+    psi_r_m3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_1, y1r);
+    psi_r_m3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_3, y1r);
+    psi_r_m3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_5, y1r);
+    psi_r_m3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_7, y1r);
+    psi_r_m3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_7, y1r);
+    psi_r_m5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_5, y1r);
+    psi_r_m5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_3, y1r);
+    psi_r_m5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_1, y1r);
+    psi_r_m5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_1, y1r);
+    psi_r_m5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_3, y1r);
+    psi_r_m5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_5, y1r);
+    psi_r_m5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_7, y1r);
+    psi_r_m5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_7, y1r);
+    psi_r_m7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_5, y1r);
+    psi_r_m7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_3, y1r);
+    psi_r_m7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_1, y1r);
+    psi_r_m7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_1, y1r);
+    psi_r_m7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_3, y1r);
+    psi_r_m7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_5, y1r);
+    psi_r_m7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_7, y1r);
+    psi_r_m7_m7 = simde_mm256_abs_epi16(xmm2);
 
     // Psi_i calculation from rho_rpi or rho_rmi
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i);
-    psi_i_p7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i);
-    psi_i_p7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i);
-    psi_i_p7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i);
-    psi_i_p7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i);
-    psi_i_p7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i);
-    psi_i_p7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i);
-    psi_i_p7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i);
-    psi_i_p7_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i);
-    psi_i_p5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i);
-    psi_i_p5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i);
-    psi_i_p5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i);
-    psi_i_p5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i);
-    psi_i_p5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i);
-    psi_i_p5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i);
-    psi_i_p5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i);
-    psi_i_p5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i);
-    psi_i_p3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i);
-    psi_i_p3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i);
-    psi_i_p3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i);
-    psi_i_p3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i);
-    psi_i_p3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i);
-    psi_i_p3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i);
-    psi_i_p3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i);
-    psi_i_p3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i);
-    psi_i_p1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i);
-    psi_i_p1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i);
-    psi_i_p1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i);
-    psi_i_p1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i);
-    psi_i_p1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i);
-    psi_i_p1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i);
-    psi_i_p1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i);
-    psi_i_p1_m7 = _mm256_abs_epi16(xmm2);
-
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i);
-    psi_i_m1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i);
-    psi_i_m1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i);
-    psi_i_m1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i);
-    psi_i_m1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i);
-    psi_i_m1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i);
-    psi_i_m1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i);
-    psi_i_m1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i);
-    psi_i_m1_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i);
-    psi_i_m3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i);
-    psi_i_m3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i);
-    psi_i_m3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i);
-    psi_i_m3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i);
-    psi_i_m3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i);
-    psi_i_m3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i);
-    psi_i_m3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i);
-    psi_i_m3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i);
-    psi_i_m5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i);
-    psi_i_m5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i);
-    psi_i_m5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i);
-    psi_i_m5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i);
-    psi_i_m5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i);
-    psi_i_m5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i);
-    psi_i_m5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i);
-    psi_i_m5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i);
-    psi_i_m7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i);
-    psi_i_m7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i);
-    psi_i_m7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i);
-    psi_i_m7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i);
-    psi_i_m7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i);
-    psi_i_m7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i);
-    psi_i_m7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i);
-    psi_i_m7_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_7, y1i);
+    psi_i_p7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_7, y1i);
+    psi_i_p7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_7, y1i);
+    psi_i_p7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_7, y1i);
+    psi_i_p7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_7, y1i);
+    psi_i_p7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_7, y1i);
+    psi_i_p7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_7, y1i);
+    psi_i_p7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_7, y1i);
+    psi_i_p7_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_5, y1i);
+    psi_i_p5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_5, y1i);
+    psi_i_p5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_5, y1i);
+    psi_i_p5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_5, y1i);
+    psi_i_p5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_5, y1i);
+    psi_i_p5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_5, y1i);
+    psi_i_p5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_5, y1i);
+    psi_i_p5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_5, y1i);
+    psi_i_p5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_3, y1i);
+    psi_i_p3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_3, y1i);
+    psi_i_p3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_3, y1i);
+    psi_i_p3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_3, y1i);
+    psi_i_p3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_3, y1i);
+    psi_i_p3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_3, y1i);
+    psi_i_p3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_3, y1i);
+    psi_i_p3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_3, y1i);
+    psi_i_p3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_1, y1i);
+    psi_i_p1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_1, y1i);
+    psi_i_p1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_1, y1i);
+    psi_i_p1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_1, y1i);
+    psi_i_p1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_1, y1i);
+    psi_i_p1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_1, y1i);
+    psi_i_p1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_1, y1i);
+    psi_i_p1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_1, y1i);
+    psi_i_p1_m7 = simde_mm256_abs_epi16(xmm2);
+
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_1, y1i);
+    psi_i_m1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_1, y1i);
+    psi_i_m1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_1, y1i);
+    psi_i_m1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_1, y1i);
+    psi_i_m1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_1, y1i);
+    psi_i_m1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_1, y1i);
+    psi_i_m1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_1, y1i);
+    psi_i_m1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_1, y1i);
+    psi_i_m1_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_3, y1i);
+    psi_i_m3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_3, y1i);
+    psi_i_m3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_3, y1i);
+    psi_i_m3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_3, y1i);
+    psi_i_m3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_3, y1i);
+    psi_i_m3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_3, y1i);
+    psi_i_m3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_3, y1i);
+    psi_i_m3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_3, y1i);
+    psi_i_m3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_5, y1i);
+    psi_i_m5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_5, y1i);
+    psi_i_m5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_5, y1i);
+    psi_i_m5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_5, y1i);
+    psi_i_m5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_5, y1i);
+    psi_i_m5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_5, y1i);
+    psi_i_m5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_5, y1i);
+    psi_i_m5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_5, y1i);
+    psi_i_m5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_7, y1i);
+    psi_i_m7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_7, y1i);
+    psi_i_m7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_7, y1i);
+    psi_i_m7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_7, y1i);
+    psi_i_m7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_7, y1i);
+    psi_i_m7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_7, y1i);
+    psi_i_m7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_7, y1i);
+    psi_i_m7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_7, y1i);
+    psi_i_m7_m7 = simde_mm256_abs_epi16(xmm2);
 
 /*
     // Rearrange desired MF output
     xmm0 = stream0_128i_in[i];
     xmm1 = stream0_128i_in[i+1];
-    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
     //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)]
     //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)]
-    y0r = _mm256_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
-    y0i = _mm256_unpackhi_epi64(xmm0,xmm1);
+    y0r = simde_mm256_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)]
+    y0i = simde_mm256_unpackhi_epi64(xmm0,xmm1);
 */
     seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]);
 
@@ -1093,13 +1093,13 @@ void qam64_qam16_avx2(short *stream0_in,
     // Rearrange desired channel magnitudes
     xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10))
     xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10))
-    xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    ch_mag_des = _mm256_unpacklo_epi64(xmm2,xmm3);
+    xmm2 = simde_mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = simde_mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = simde_mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = simde_mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = simde_mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = simde_mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_des = simde_mm256_unpacklo_epi64(xmm2,xmm3);
     */
 
     seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]);
@@ -1108,64 +1108,64 @@ void qam64_qam16_avx2(short *stream0_in,
     /*
     xmm2 = ch_mag_128i_i[i];
     xmm3 = ch_mag_128i_i[i+1];
-    xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    ch_mag_int  = _mm256_unpacklo_epi64(xmm2,xmm3);
+    xmm2 = simde_mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = simde_mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm2 = simde_mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = simde_mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = simde_mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm3 = simde_mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    ch_mag_int  = simde_mm256_unpacklo_epi64(xmm2,xmm3);
     */
 
     seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]);
 
-    y0r_one_over_sqrt_21   = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
-    y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
-    y0r_five_over_sqrt_21  = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
-    y0r_five_over_sqrt_21  = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1);
-    y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
-    y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
-
-    y0i_one_over_sqrt_21   = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
-    y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
-    y0i_five_over_sqrt_21  = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
-    y0i_five_over_sqrt_21  = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1);
-    y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
-    y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
-
-    y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
-
-    y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0r_one_over_sqrt_21   = simde_mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
+    y0r_three_over_sqrt_21 = simde_mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = simde_mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = simde_mm256_slli_epi16(y0r_five_over_sqrt_21, 1);
+    y0r_seven_over_sqrt_21 = simde_mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
+    y0r_seven_over_sqrt_21 = simde_mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
+
+    y0i_one_over_sqrt_21   = simde_mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
+    y0i_three_over_sqrt_21 = simde_mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = simde_mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = simde_mm256_slli_epi16(y0i_five_over_sqrt_21, 1);
+    y0i_seven_over_sqrt_21 = simde_mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
+    y0i_seven_over_sqrt_21 = simde_mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
+
+    y0_p_7_1 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_7_3 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_7_5 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_7_7 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_5_1 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_5_3 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_5_5 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_5_7 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_3_1 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_3_3 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_3_5 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_3_7 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_1_1 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_1_3 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_1_5 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_1_7 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    y0_m_1_1 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_1_3 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_1_5 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_1_7 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_3_1 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_3_3 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_3_5 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_3_7 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_5_1 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_5_3 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_5_5 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_5_7 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_7_1 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_7_3 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_7_5 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_7_7 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
 
     interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
     interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10);
@@ -1431,637 +1431,637 @@ void qam64_qam16_avx2(short *stream0_in,
 
     // Computing different multiples of ||h0||^2
     // x=1, y=1
-    ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
-    ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
+    ch_mag_2_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
+    ch_mag_2_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
     // x=1, y=3
-    ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
-    ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
+    ch_mag_10_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
+    ch_mag_10_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
     // x=1, x=5
-    ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
-    ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
+    ch_mag_26_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_26_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
     // x=1, y=7
-    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
-    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
     // x=3, y=3
-    ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
-    ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
+    ch_mag_18_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
+    ch_mag_18_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
     // x=3, y=5
-    ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
-    ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
+    ch_mag_34_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_34_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
     // x=3, y=7
-    ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
-    ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
+    ch_mag_58_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_58_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
     // x=5, y=5
-    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
-    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
     // x=5, y=7
-    ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
-    ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
+    ch_mag_74_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
+    ch_mag_74_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
     // x=7, y=7
-    ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
-    ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
+    ch_mag_98_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_98_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
 
     // Computing Metrics
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7);
-    bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5);
-    bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3);
-    bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1);
-    bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1);
-    bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3);
-    bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5);
-    bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7);
-    bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7);
-    bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5);
-    bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3);
-    bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1);
-    bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1);
-    bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3);
-    bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5);
-    bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7);
-    bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7);
-    bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5);
-    bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3);
-    bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1);
-    bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1);
-    bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3);
-    bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5);
-    bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7);
-    bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7);
-    bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5);
-    bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3);
-    bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1);
-    bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1);
-    bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3);
-    bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5);
-    bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7);
-    bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7);
-    bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5);
-    bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3);
-    bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1);
-    bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1);
-    bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3);
-    bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5);
-    bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7);
-    bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7);
-    bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5);
-    bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3);
-    bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1);
-    bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1);
-    bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3);
-    bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5);
-    bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7);
-    bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7);
-    bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5);
-    bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3);
-    bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1);
-    bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1);
-    bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3);
-    bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5);
-    bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7);
-    bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7);
-    bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5);
-    bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3);
-    bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1);
-    bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1);
-    bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3);
-    bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5);
-    bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7);
-    bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_7);
+    bit_met_p7_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_5);
+    bit_met_p7_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_3);
+    bit_met_p7_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_1);
+    bit_met_p7_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_1);
+    bit_met_p7_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_3);
+    bit_met_p7_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_5);
+    bit_met_p7_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_7);
+    bit_met_p7_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_7);
+    bit_met_p5_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_5);
+    bit_met_p5_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_3);
+    bit_met_p5_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_1);
+    bit_met_p5_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_1);
+    bit_met_p5_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_3);
+    bit_met_p5_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_5);
+    bit_met_p5_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_7);
+    bit_met_p5_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_7);
+    bit_met_p3_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_5);
+    bit_met_p3_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_3);
+    bit_met_p3_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_1);
+    bit_met_p3_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_1);
+    bit_met_p3_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_3);
+    bit_met_p3_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_5);
+    bit_met_p3_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_7);
+    bit_met_p3_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_7);
+    bit_met_p1_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_5);
+    bit_met_p1_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_3);
+    bit_met_p1_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_1);
+    bit_met_p1_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_1);
+    bit_met_p1_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_3);
+    bit_met_p1_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_5);
+    bit_met_p1_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_7);
+    bit_met_p1_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_7);
+    bit_met_m1_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_5);
+    bit_met_m1_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_3);
+    bit_met_m1_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_1);
+    bit_met_m1_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_1);
+    bit_met_m1_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_3);
+    bit_met_m1_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_5);
+    bit_met_m1_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_7);
+    bit_met_m1_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_7);
+    bit_met_m3_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_5);
+    bit_met_m3_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_3);
+    bit_met_m3_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_1);
+    bit_met_m3_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_1);
+    bit_met_m3_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_3);
+    bit_met_m3_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_5);
+    bit_met_m3_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_7);
+    bit_met_m3_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_7);
+    bit_met_m5_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_5);
+    bit_met_m5_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_3);
+    bit_met_m5_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_1);
+    bit_met_m5_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_1);
+    bit_met_m5_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_3);
+    bit_met_m5_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_5);
+    bit_met_m5_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_7);
+    bit_met_m5_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_7);
+    bit_met_m7_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_5);
+    bit_met_m7_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_3);
+    bit_met_m7_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_1);
+    bit_met_m7_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_1);
+    bit_met_m7_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_3);
+    bit_met_m7_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_5);
+    bit_met_m7_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_7);
+    bit_met_m7_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
 
     // Detection for 1st bit (LTE mapping)
     // bit = 1
-    xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
 
     // bit = 0
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0r = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 2nd bit (LTE mapping)
     // bit = 1
-    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
 
     // bit = 0
-    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1r = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 3rd bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2r = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 4th bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0i = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
 
     // Detection for 5th bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1i = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 6th bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2i = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
     // RE 1
@@ -2179,7 +2179,7 @@ void qam64_qam16_avx2(short *stream0_in,
     stream0_out[j + 94] = ((short *)&y1i)[15];
     stream0_out[j + 95] = ((short *)&y2i)[15];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -2224,25 +2224,25 @@ void qam64_qam64_avx2(int32_t *stream0_in,
   __m256i *ch_mag_256i     = (__m256i *)ch_mag;
   __m256i *ch_mag_256i_i   = (__m256i *)ch_mag_i;
 
-  __m256i ONE_OVER_SQRT_42              = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16)
-  __m256i THREE_OVER_SQRT_42            = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16)
-  __m256i FIVE_OVER_SQRT_42             = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15)
-  __m256i SEVEN_OVER_SQRT_42            = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(7/sqrt(42)*2^14) Q2.14
-  __m256i ONE_OVER_SQRT_2               = _mm256_broadcastw_epi16(_mm_set1_epi16(23170)); // round(1/sqrt(2)*2^15)
-  __m256i ONE_OVER_SQRT_2_42            = _mm256_broadcastw_epi16(_mm_set1_epi16(3575));  // round(1/sqrt(2*42)*2^15)
-  __m256i THREE_OVER_SQRT_2_42          = _mm256_broadcastw_epi16(_mm_set1_epi16(10726)); // round(3/sqrt(2*42)*2^15)
-  __m256i FIVE_OVER_SQRT_2_42           = _mm256_broadcastw_epi16(_mm_set1_epi16(17876)); // round(5/sqrt(2*42)*2^15)
-  __m256i SEVEN_OVER_SQRT_2_42          = _mm256_broadcastw_epi16(_mm_set1_epi16(25027)); // round(7/sqrt(2*42)*2^15)
-  __m256i FORTYNINE_OVER_FOUR_SQRT_42   = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14
-  __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14
-  __m256i TWENTYFIVE_OVER_FOUR_SQRT_42  = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15)
-  __m256i TWENTYNINE_OVER_FOUR_SQRT_42  = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14
-  __m256i SEVENTEEN_OVER_FOUR_SQRT_42   = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15)
-  __m256i NINE_OVER_FOUR_SQRT_42        = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15)
-  __m256i THIRTEEN_OVER_FOUR_SQRT_42    = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15)
-  __m256i FIVE_OVER_FOUR_SQRT_42        = _mm256_broadcastw_epi16(_mm_set1_epi16(6320));  // round(5/(4*sqrt(42))*2^15)
-  __m256i ONE_OVER_FOUR_SQRT_42         = _mm256_broadcastw_epi16(_mm_set1_epi16(1264));  // round(1/(4*sqrt(42))*2^15)
-  __m256i SQRT_42_OVER_FOUR             = _mm256_broadcastw_epi16(_mm_set1_epi16(13272)); // round(sqrt(42)/4*2^13), Q3.12
+  __m256i ONE_OVER_SQRT_42              = simde_mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16)
+  __m256i THREE_OVER_SQRT_42            = simde_mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16)
+  __m256i FIVE_OVER_SQRT_42             = simde_mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15)
+  __m256i SEVEN_OVER_SQRT_42            = simde_mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(7/sqrt(42)*2^14) Q2.14
+  __m256i ONE_OVER_SQRT_2               = simde_mm256_broadcastw_epi16(_mm_set1_epi16(23170)); // round(1/sqrt(2)*2^15)
+  __m256i ONE_OVER_SQRT_2_42            = simde_mm256_broadcastw_epi16(_mm_set1_epi16(3575));  // round(1/sqrt(2*42)*2^15)
+  __m256i THREE_OVER_SQRT_2_42          = simde_mm256_broadcastw_epi16(_mm_set1_epi16(10726)); // round(3/sqrt(2*42)*2^15)
+  __m256i FIVE_OVER_SQRT_2_42           = simde_mm256_broadcastw_epi16(_mm_set1_epi16(17876)); // round(5/sqrt(2*42)*2^15)
+  __m256i SEVEN_OVER_SQRT_2_42          = simde_mm256_broadcastw_epi16(_mm_set1_epi16(25027)); // round(7/sqrt(2*42)*2^15)
+  __m256i FORTYNINE_OVER_FOUR_SQRT_42   = simde_mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14
+  __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14
+  __m256i TWENTYFIVE_OVER_FOUR_SQRT_42  = simde_mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15)
+  __m256i TWENTYNINE_OVER_FOUR_SQRT_42  = simde_mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14
+  __m256i SEVENTEEN_OVER_FOUR_SQRT_42   = simde_mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15)
+  __m256i NINE_OVER_FOUR_SQRT_42        = simde_mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15)
+  __m256i THIRTEEN_OVER_FOUR_SQRT_42    = simde_mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15)
+  __m256i FIVE_OVER_FOUR_SQRT_42        = simde_mm256_broadcastw_epi16(_mm_set1_epi16(6320));  // round(5/(4*sqrt(42))*2^15)
+  __m256i ONE_OVER_FOUR_SQRT_42         = simde_mm256_broadcastw_epi16(_mm_set1_epi16(1264));  // round(1/(4*sqrt(42))*2^15)
+  __m256i SQRT_42_OVER_FOUR             = simde_mm256_broadcastw_epi16(_mm_set1_epi16(13272)); // round(sqrt(42)/4*2^13), Q3.12
 
   __m256i ch_mag_des;
   __m256i ch_mag_int;
@@ -2266,7 +2266,7 @@ void qam64_qam64_avx2(int32_t *stream0_in,
   __m256i ch_mag_int_with_sigma2;
   __m256i two_ch_mag_int_with_sigma2;
   __m256i three_ch_mag_int_with_sigma2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -2281,371 +2281,371 @@ void qam64_qam64_avx2(int32_t *stream0_in,
       /*
     xmm0 = rho01_256i[i];
     xmm1 = rho01_256i[i+1];
-    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
 
-    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
 
     //xmm0 = [Re(0,1,2,3)   Im(0,1,2,3)   Re(4,5,6,7)     Im(4,5,6,7)]
     //xmm0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)]
 
-    xmm2 = _mm256_unpacklo_epi64(xmm0, xmm1);
+    xmm2 = simde_mm256_unpacklo_epi64(xmm0, xmm1);
     //xmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)]
-    xmm2 = _mm256_permute4x64_epi64(xmm2,0xd8); // Re(rho)
+    xmm2 = simde_mm256_permute4x64_epi64(xmm2,0xd8); // Re(rho)
 
-    xmm3 = _mm256_unpackhi_epi64(xmm0, xmm1);
+    xmm3 = simde_mm256_unpackhi_epi64(xmm0, xmm1);
     //xmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)]
-    xmm3 = _mm256_permute4x64_epi64(xmm3,0xd8); // Im(rho)
+    xmm3 = simde_mm256_permute4x64_epi64(xmm3,0xd8); // Im(rho)
       */
 
     seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]);
 
-    rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
-    rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
+    rho_rpi = simde_mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho)
+    rho_rmi = simde_mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho)
 
     // Compute the different rhos
-    rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
-    rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
-    rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
-    rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
-    rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
-    rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
-    rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
-    rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
-
-    rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1);
-    rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1);
-    rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2);
-    rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
-    xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
-    xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
-    xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
-    xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
-    xmm7 = _mm256_slli_epi16(xmm7, 1);
-    xmm8 = _mm256_slli_epi16(xmm8, 2);
-
-    rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6);
-    rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6);
-    rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7);
-    rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7);
-    rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8);
-    rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
-    rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5);
-    rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5);
-    rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7);
-    rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7);
-    rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8);
-    rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
-    xmm4 = _mm256_slli_epi16(xmm4, 1);
-    rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5);
-    rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5);
-    rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6);
-    rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6);
-    rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8);
-    rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8);
-
-    xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
-    xmm4 = _mm256_slli_epi16(xmm4, 2);
-    rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5);
-    rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5);
-    rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6);
-    rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6);
-    rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7);
-    rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_1 = simde_mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42);
+    rho_rmi_1_1 = simde_mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42);
+    rho_rpi_3_3 = simde_mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42);
+    rho_rmi_3_3 = simde_mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42);
+    rho_rpi_5_5 = simde_mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42);
+    rho_rmi_5_5 = simde_mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42);
+    rho_rpi_7_7 = simde_mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42);
+    rho_rmi_7_7 = simde_mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42);
+
+    rho_rpi_5_5 = simde_mm256_slli_epi16(rho_rpi_5_5, 1);
+    rho_rmi_5_5 = simde_mm256_slli_epi16(rho_rmi_5_5, 1);
+    rho_rpi_7_7 = simde_mm256_slli_epi16(rho_rpi_7_7, 2);
+    rho_rmi_7_7 = simde_mm256_slli_epi16(rho_rmi_7_7, 2);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42);
+    xmm5 = simde_mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42);
+    xmm6 = simde_mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42);
+    xmm7 = simde_mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42);
+    xmm8 = simde_mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42);
+    xmm7 = simde_mm256_slli_epi16(xmm7, 1);
+    xmm8 = simde_mm256_slli_epi16(xmm8, 2);
+
+    rho_rpi_1_3 = simde_mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_1_3 = simde_mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_1_5 = simde_mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_1_5 = simde_mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_1_7 = simde_mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_1_7 = simde_mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42);
+    rho_rpi_3_1 = simde_mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_3_1 = simde_mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_3_5 = simde_mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_3_5 = simde_mm256_subs_epi16(xmm4, xmm7);
+    rho_rpi_3_7 = simde_mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_3_7 = simde_mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42);
+    xmm4 = simde_mm256_slli_epi16(xmm4, 1);
+    rho_rpi_5_1 = simde_mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_5_1 = simde_mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_5_3 = simde_mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_5_3 = simde_mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_5_7 = simde_mm256_adds_epi16(xmm4, xmm8);
+    rho_rmi_5_7 = simde_mm256_subs_epi16(xmm4, xmm8);
+
+    xmm4 = simde_mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42);
+    xmm4 = simde_mm256_slli_epi16(xmm4, 2);
+    rho_rpi_7_1 = simde_mm256_adds_epi16(xmm4, xmm5);
+    rho_rmi_7_1 = simde_mm256_subs_epi16(xmm4, xmm5);
+    rho_rpi_7_3 = simde_mm256_adds_epi16(xmm4, xmm6);
+    rho_rmi_7_3 = simde_mm256_subs_epi16(xmm4, xmm6);
+    rho_rpi_7_5 = simde_mm256_adds_epi16(xmm4, xmm7);
+    rho_rmi_7_5 = simde_mm256_subs_epi16(xmm4, xmm7);
 
     // Rearrange interfering MF output
     /*
     xmm0 = stream1_256i_in[i];
     xmm1 = stream1_256i_in[i+1];
-    xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm0 = simde_mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3));
 
-    xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
-    xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
+    xmm1 = simde_mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3));
 
-    y1r = _mm256_unpacklo_epi64(xmm0, xmm1);
-    y1r = _mm256_permute4x64_epi64(y1r,0xd8); // Re(y1)
+    y1r = simde_mm256_unpacklo_epi64(xmm0, xmm1);
+    y1r = simde_mm256_permute4x64_epi64(y1r,0xd8); // Re(y1)
 
-    y1i = _mm256_unpackhi_epi64(xmm0, xmm1);
-    y1i = _mm256_permute4x64_epi64(y1i,0xd8); // Im(y1)
+    y1i = simde_mm256_unpackhi_epi64(xmm0, xmm1);
+    y1i = simde_mm256_permute4x64_epi64(y1i,0xd8); // Im(y1)
     */
 
     seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]);
 
     // Psi_r calculation from rho_rpi or rho_rmi
-    xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r);
-
-    psi_r_p7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r);
-    psi_r_p7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r);
-    psi_r_p7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r);
-    psi_r_p7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r);
-    psi_r_p7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r);
-    psi_r_p7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r);
-    psi_r_p7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r);
-    psi_r_p7_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r);
-    psi_r_p5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r);
-    psi_r_p5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r);
-    psi_r_p5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r);
-    psi_r_p5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r);
-    psi_r_p5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r);
-    psi_r_p5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r);
-    psi_r_p5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r);
-    psi_r_p5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r);
-    psi_r_p3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r);
-    psi_r_p3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r);
-    psi_r_p3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r);
-    psi_r_p3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r);
-    psi_r_p3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r);
-    psi_r_p3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r);
-    psi_r_p3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r);
-    psi_r_p3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r);
-    psi_r_p1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r);
-    psi_r_p1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r);
-    psi_r_p1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r);
-    psi_r_p1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r);
-    psi_r_p1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r);
-    psi_r_p1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r);
-    psi_r_p1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r);
-    psi_r_p1_m7 = _mm256_abs_epi16(xmm2);
-
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r);
-    psi_r_m1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r);
-    psi_r_m1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r);
-    psi_r_m1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r);
-    psi_r_m1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r);
-    psi_r_m1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r);
-    psi_r_m1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r);
-    psi_r_m1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r);
-    psi_r_m1_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r);
-    psi_r_m3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r);
-    psi_r_m3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r);
-    psi_r_m3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r);
-    psi_r_m3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r);
-    psi_r_m3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r);
-    psi_r_m3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r);
-    psi_r_m3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r);
-    psi_r_m3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r);
-    psi_r_m5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r);
-    psi_r_m5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r);
-    psi_r_m5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r);
-    psi_r_m5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r);
-    psi_r_m5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r);
-    psi_r_m5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r);
-    psi_r_m5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r);
-    psi_r_m5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r);
-    psi_r_m7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r);
-    psi_r_m7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r);
-    psi_r_m7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r);
-    psi_r_m7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r);
-    psi_r_m7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r);
-    psi_r_m7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r);
-    psi_r_m7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r);
-    psi_r_m7_m7 = _mm256_abs_epi16(xmm2);
+    xmm0 = simde_mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_7, y1r);
+
+    psi_r_p7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_5, y1r);
+    psi_r_p7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_3, y1r);
+    psi_r_p7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_1, y1r);
+    psi_r_p7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_1, y1r);
+    psi_r_p7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_3, y1r);
+    psi_r_p7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_5, y1r);
+    psi_r_p7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_7, y1r);
+    psi_r_p7_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_7, y1r);
+    psi_r_p5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_5, y1r);
+    psi_r_p5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_3, y1r);
+    psi_r_p5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_1, y1r);
+    psi_r_p5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_1, y1r);
+    psi_r_p5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_3, y1r);
+    psi_r_p5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_5, y1r);
+    psi_r_p5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_7, y1r);
+    psi_r_p5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_7, y1r);
+    psi_r_p3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_5, y1r);
+    psi_r_p3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_3, y1r);
+    psi_r_p3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_1, y1r);
+    psi_r_p3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_1, y1r);
+    psi_r_p3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_3, y1r);
+    psi_r_p3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_5, y1r);
+    psi_r_p3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_7, y1r);
+    psi_r_p3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_7, y1r);
+    psi_r_p1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_5, y1r);
+    psi_r_p1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_3, y1r);
+    psi_r_p1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_1, y1r);
+    psi_r_p1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_1, y1r);
+    psi_r_p1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_3, y1r);
+    psi_r_p1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_5, y1r);
+    psi_r_p1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_7, y1r);
+    psi_r_p1_m7 = simde_mm256_abs_epi16(xmm2);
+
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_7, y1r);
+    psi_r_m1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_5, y1r);
+    psi_r_m1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_3, y1r);
+    psi_r_m1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_1, y1r);
+    psi_r_m1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_1, y1r);
+    psi_r_m1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_3, y1r);
+    psi_r_m1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_5, y1r);
+    psi_r_m1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_7, y1r);
+    psi_r_m1_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_7, y1r);
+    psi_r_m3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_5, y1r);
+    psi_r_m3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_3, y1r);
+    psi_r_m3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_1, y1r);
+    psi_r_m3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_1, y1r);
+    psi_r_m3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_3, y1r);
+    psi_r_m3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_5, y1r);
+    psi_r_m3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_7, y1r);
+    psi_r_m3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_7, y1r);
+    psi_r_m5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_5, y1r);
+    psi_r_m5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_3, y1r);
+    psi_r_m5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_1, y1r);
+    psi_r_m5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_1, y1r);
+    psi_r_m5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_3, y1r);
+    psi_r_m5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_5, y1r);
+    psi_r_m5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_7, y1r);
+    psi_r_m5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_7, y1r);
+    psi_r_m7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_5, y1r);
+    psi_r_m7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_3, y1r);
+    psi_r_m7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_1, y1r);
+    psi_r_m7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_1, y1r);
+    psi_r_m7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_3, y1r);
+    psi_r_m7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_5, y1r);
+    psi_r_m7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_7, y1r);
+    psi_r_m7_m7 = simde_mm256_abs_epi16(xmm2);
 
     // Psi_i calculation from rho_rpi or rho_rmi
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i);
-    psi_i_p7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i);
-    psi_i_p7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i);
-    psi_i_p7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i);
-    psi_i_p7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i);
-    psi_i_p7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i);
-    psi_i_p7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i);
-    psi_i_p7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i);
-    psi_i_p7_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i);
-    psi_i_p5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i);
-    psi_i_p5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i);
-    psi_i_p5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i);
-    psi_i_p5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i);
-    psi_i_p5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i);
-    psi_i_p5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i);
-    psi_i_p5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i);
-    psi_i_p5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i);
-    psi_i_p3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i);
-    psi_i_p3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i);
-    psi_i_p3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i);
-    psi_i_p3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i);
-    psi_i_p3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i);
-    psi_i_p3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i);
-    psi_i_p3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i);
-    psi_i_p3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i);
-    psi_i_p1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i);
-    psi_i_p1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i);
-    psi_i_p1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i);
-    psi_i_p1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i);
-    psi_i_p1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i);
-    psi_i_p1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i);
-    psi_i_p1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i);
-    psi_i_p1_m7 = _mm256_abs_epi16(xmm2);
-
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i);
-    psi_i_m1_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i);
-    psi_i_m1_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i);
-    psi_i_m1_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i);
-    psi_i_m1_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i);
-    psi_i_m1_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i);
-    psi_i_m1_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i);
-    psi_i_m1_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i);
-    psi_i_m1_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i);
-    psi_i_m3_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i);
-    psi_i_m3_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i);
-    psi_i_m3_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i);
-    psi_i_m3_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i);
-    psi_i_m3_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i);
-    psi_i_m3_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i);
-    psi_i_m3_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i);
-    psi_i_m3_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i);
-    psi_i_m5_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i);
-    psi_i_m5_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i);
-    psi_i_m5_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i);
-    psi_i_m5_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i);
-    psi_i_m5_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i);
-    psi_i_m5_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i);
-    psi_i_m5_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i);
-    psi_i_m5_m7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i);
-    psi_i_m7_p7 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i);
-    psi_i_m7_p5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i);
-    psi_i_m7_p3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i);
-    psi_i_m7_p1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i);
-    psi_i_m7_m1 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i);
-    psi_i_m7_m3 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i);
-    psi_i_m7_m5 = _mm256_abs_epi16(xmm2);
-    xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i);
-    psi_i_m7_m7 = _mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_7, y1i);
+    psi_i_p7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_7, y1i);
+    psi_i_p7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_7, y1i);
+    psi_i_p7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_7, y1i);
+    psi_i_p7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_7, y1i);
+    psi_i_p7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_7, y1i);
+    psi_i_p7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_7, y1i);
+    psi_i_p7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_7, y1i);
+    psi_i_p7_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_5, y1i);
+    psi_i_p5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_5, y1i);
+    psi_i_p5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_5, y1i);
+    psi_i_p5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_5, y1i);
+    psi_i_p5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_5, y1i);
+    psi_i_p5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_5, y1i);
+    psi_i_p5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_5, y1i);
+    psi_i_p5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_5, y1i);
+    psi_i_p5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_3, y1i);
+    psi_i_p3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_3, y1i);
+    psi_i_p3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_3, y1i);
+    psi_i_p3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_3, y1i);
+    psi_i_p3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_3, y1i);
+    psi_i_p3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_3, y1i);
+    psi_i_p3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_3, y1i);
+    psi_i_p3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_3, y1i);
+    psi_i_p3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_7_1, y1i);
+    psi_i_p1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_5_1, y1i);
+    psi_i_p1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_3_1, y1i);
+    psi_i_p1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rmi_1_1, y1i);
+    psi_i_p1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_1_1, y1i);
+    psi_i_p1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_3_1, y1i);
+    psi_i_p1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_5_1, y1i);
+    psi_i_p1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rpi_7_1, y1i);
+    psi_i_p1_m7 = simde_mm256_abs_epi16(xmm2);
+
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_1, y1i);
+    psi_i_m1_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_1, y1i);
+    psi_i_m1_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_1, y1i);
+    psi_i_m1_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_1, y1i);
+    psi_i_m1_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_1, y1i);
+    psi_i_m1_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_1, y1i);
+    psi_i_m1_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_1, y1i);
+    psi_i_m1_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_1, y1i);
+    psi_i_m1_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_3, y1i);
+    psi_i_m3_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_3, y1i);
+    psi_i_m3_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_3, y1i);
+    psi_i_m3_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_3, y1i);
+    psi_i_m3_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_3, y1i);
+    psi_i_m3_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_3, y1i);
+    psi_i_m3_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_3, y1i);
+    psi_i_m3_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_3, y1i);
+    psi_i_m3_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_5, y1i);
+    psi_i_m5_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_5, y1i);
+    psi_i_m5_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_5, y1i);
+    psi_i_m5_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_5, y1i);
+    psi_i_m5_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_5, y1i);
+    psi_i_m5_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_5, y1i);
+    psi_i_m5_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_5, y1i);
+    psi_i_m5_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_5, y1i);
+    psi_i_m5_m7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_7_7, y1i);
+    psi_i_m7_p7 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_5_7, y1i);
+    psi_i_m7_p5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_3_7, y1i);
+    psi_i_m7_p3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_subs_epi16(rho_rpi_1_7, y1i);
+    psi_i_m7_p1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_1_7, y1i);
+    psi_i_m7_m1 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_3_7, y1i);
+    psi_i_m7_m3 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_5_7, y1i);
+    psi_i_m7_m5 = simde_mm256_abs_epi16(xmm2);
+    xmm2 = simde_mm256_adds_epi16(rho_rmi_7_7, y1i);
+    psi_i_m7_m7 = simde_mm256_abs_epi16(xmm2);
 
     /*
     // Rearrange desired MF output
@@ -2695,59 +2695,59 @@ void qam64_qam64_avx2(int32_t *stream0_in,
     */
     seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]);
 
-    y0r_one_over_sqrt_21   = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
-    y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
-    y0r_five_over_sqrt_21  = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
-    y0r_five_over_sqrt_21  = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1);
-    y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
-    y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
-
-    y0i_one_over_sqrt_21   = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
-    y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
-    y0i_five_over_sqrt_21  = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
-    y0i_five_over_sqrt_21  = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1);
-    y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
-    y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
-
-
-    y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
-
-    y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
-    y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
-    y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
-    y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
-    y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0r_one_over_sqrt_21   = simde_mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42);
+    y0r_three_over_sqrt_21 = simde_mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = simde_mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42);
+    y0r_five_over_sqrt_21  = simde_mm256_slli_epi16(y0r_five_over_sqrt_21, 1);
+    y0r_seven_over_sqrt_21 = simde_mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42);
+    y0r_seven_over_sqrt_21 = simde_mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14
+
+    y0i_one_over_sqrt_21   = simde_mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42);
+    y0i_three_over_sqrt_21 = simde_mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = simde_mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42);
+    y0i_five_over_sqrt_21  = simde_mm256_slli_epi16(y0i_five_over_sqrt_21, 1);
+    y0i_seven_over_sqrt_21 = simde_mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42);
+    y0i_seven_over_sqrt_21 = simde_mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14
+
+
+    y0_p_7_1 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_7_3 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_7_5 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_7_7 = simde_mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_5_1 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_5_3 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_5_5 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_5_7 = simde_mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_3_1 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_3_3 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_3_5 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_3_7 = simde_mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_p_1_1 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_p_1_3 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_p_1_5 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_p_1_7 = simde_mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+
+    y0_m_1_1 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_1_3 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_1_5 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_1_7 = simde_mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_3_1 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_3_3 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_3_5 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_3_7 = simde_mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_5_1 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_5_3 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_5_5 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_5_7 = simde_mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21);
+    y0_m_7_1 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21);
+    y0_m_7_3 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21);
+    y0_m_7_5 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21);
+    y0_m_7_7 = simde_mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21);
 
     // Detection of interference term
-    ch_mag_int_with_sigma2       = _mm256_srai_epi16(ch_mag_int, 1); // *2
+    ch_mag_int_with_sigma2       = simde_mm256_srai_epi16(ch_mag_int, 1); // *2
     two_ch_mag_int_with_sigma2   = ch_mag_int; // *4
-    three_ch_mag_int_with_sigma2 = _mm256_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6
+    three_ch_mag_int_with_sigma2 = simde_mm256_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6
 
     interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42,
                                  SEVEN_OVER_SQRT_2_42);
@@ -3074,134 +3074,134 @@ void qam64_qam64_avx2(int32_t *stream0_in,
     prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7);
 
     // Multiply by sqrt(2)
-    psi_a_p7_p7 = _mm256_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2);
-    psi_a_p7_p7 = _mm256_slli_epi16(psi_a_p7_p7, 2);
-    psi_a_p7_p5 = _mm256_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2);
-    psi_a_p7_p5 = _mm256_slli_epi16(psi_a_p7_p5, 2);
-    psi_a_p7_p3 = _mm256_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2);
-    psi_a_p7_p3 = _mm256_slli_epi16(psi_a_p7_p3, 2);
-    psi_a_p7_p1 = _mm256_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2);
-    psi_a_p7_p1 = _mm256_slli_epi16(psi_a_p7_p1, 2);
-    psi_a_p7_m1 = _mm256_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2);
-    psi_a_p7_m1 = _mm256_slli_epi16(psi_a_p7_m1, 2);
-    psi_a_p7_m3 = _mm256_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2);
-    psi_a_p7_m3 = _mm256_slli_epi16(psi_a_p7_m3, 2);
-    psi_a_p7_m5 = _mm256_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2);
-    psi_a_p7_m5 = _mm256_slli_epi16(psi_a_p7_m5, 2);
-    psi_a_p7_m7 = _mm256_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2);
-    psi_a_p7_m7 = _mm256_slli_epi16(psi_a_p7_m7, 2);
-    psi_a_p5_p7 = _mm256_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2);
-    psi_a_p5_p7 = _mm256_slli_epi16(psi_a_p5_p7, 2);
-    psi_a_p5_p5 = _mm256_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2);
-    psi_a_p5_p5 = _mm256_slli_epi16(psi_a_p5_p5, 2);
-    psi_a_p5_p3 = _mm256_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2);
-    psi_a_p5_p3 = _mm256_slli_epi16(psi_a_p5_p3, 2);
-    psi_a_p5_p1 = _mm256_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2);
-    psi_a_p5_p1 = _mm256_slli_epi16(psi_a_p5_p1, 2);
-    psi_a_p5_m1 = _mm256_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2);
-    psi_a_p5_m1 = _mm256_slli_epi16(psi_a_p5_m1, 2);
-    psi_a_p5_m3 = _mm256_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2);
-    psi_a_p5_m3 = _mm256_slli_epi16(psi_a_p5_m3, 2);
-    psi_a_p5_m5 = _mm256_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2);
-    psi_a_p5_m5 = _mm256_slli_epi16(psi_a_p5_m5, 2);
-    psi_a_p5_m7 = _mm256_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2);
-    psi_a_p5_m7 = _mm256_slli_epi16(psi_a_p5_m7, 2);
-    psi_a_p3_p7 = _mm256_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2);
-    psi_a_p3_p7 = _mm256_slli_epi16(psi_a_p3_p7, 2);
-    psi_a_p3_p5 = _mm256_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2);
-    psi_a_p3_p5 = _mm256_slli_epi16(psi_a_p3_p5, 2);
-    psi_a_p3_p3 = _mm256_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2);
-    psi_a_p3_p3 = _mm256_slli_epi16(psi_a_p3_p3, 2);
-    psi_a_p3_p1 = _mm256_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2);
-    psi_a_p3_p1 = _mm256_slli_epi16(psi_a_p3_p1, 2);
-    psi_a_p3_m1 = _mm256_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2);
-    psi_a_p3_m1 = _mm256_slli_epi16(psi_a_p3_m1, 2);
-    psi_a_p3_m3 = _mm256_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2);
-    psi_a_p3_m3 = _mm256_slli_epi16(psi_a_p3_m3, 2);
-    psi_a_p3_m5 = _mm256_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2);
-    psi_a_p3_m5 = _mm256_slli_epi16(psi_a_p3_m5, 2);
-    psi_a_p3_m7 = _mm256_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2);
-    psi_a_p3_m7 = _mm256_slli_epi16(psi_a_p3_m7, 2);
-    psi_a_p1_p7 = _mm256_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2);
-    psi_a_p1_p7 = _mm256_slli_epi16(psi_a_p1_p7, 2);
-    psi_a_p1_p5 = _mm256_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2);
-    psi_a_p1_p5 = _mm256_slli_epi16(psi_a_p1_p5, 2);
-    psi_a_p1_p3 = _mm256_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2);
-    psi_a_p1_p3 = _mm256_slli_epi16(psi_a_p1_p3, 2);
-    psi_a_p1_p1 = _mm256_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2);
-    psi_a_p1_p1 = _mm256_slli_epi16(psi_a_p1_p1, 2);
-    psi_a_p1_m1 = _mm256_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2);
-    psi_a_p1_m1 = _mm256_slli_epi16(psi_a_p1_m1, 2);
-    psi_a_p1_m3 = _mm256_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2);
-    psi_a_p1_m3 = _mm256_slli_epi16(psi_a_p1_m3, 2);
-    psi_a_p1_m5 = _mm256_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2);
-    psi_a_p1_m5 = _mm256_slli_epi16(psi_a_p1_m5, 2);
-    psi_a_p1_m7 = _mm256_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2);
-    psi_a_p1_m7 = _mm256_slli_epi16(psi_a_p1_m7, 2);
-    psi_a_m1_p7 = _mm256_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2);
-    psi_a_m1_p7 = _mm256_slli_epi16(psi_a_m1_p7, 2);
-    psi_a_m1_p5 = _mm256_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2);
-    psi_a_m1_p5 = _mm256_slli_epi16(psi_a_m1_p5, 2);
-    psi_a_m1_p3 = _mm256_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2);
-    psi_a_m1_p3 = _mm256_slli_epi16(psi_a_m1_p3, 2);
-    psi_a_m1_p1 = _mm256_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2);
-    psi_a_m1_p1 = _mm256_slli_epi16(psi_a_m1_p1, 2);
-    psi_a_m1_m1 = _mm256_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2);
-    psi_a_m1_m1 = _mm256_slli_epi16(psi_a_m1_m1, 2);
-    psi_a_m1_m3 = _mm256_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2);
-    psi_a_m1_m3 = _mm256_slli_epi16(psi_a_m1_m3, 2);
-    psi_a_m1_m5 = _mm256_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2);
-    psi_a_m1_m5 = _mm256_slli_epi16(psi_a_m1_m5, 2);
-    psi_a_m1_m7 = _mm256_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2);
-    psi_a_m1_m7 = _mm256_slli_epi16(psi_a_m1_m7, 2);
-    psi_a_m3_p7 = _mm256_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2);
-    psi_a_m3_p7 = _mm256_slli_epi16(psi_a_m3_p7, 2);
-    psi_a_m3_p5 = _mm256_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2);
-    psi_a_m3_p5 = _mm256_slli_epi16(psi_a_m3_p5, 2);
-    psi_a_m3_p3 = _mm256_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2);
-    psi_a_m3_p3 = _mm256_slli_epi16(psi_a_m3_p3, 2);
-    psi_a_m3_p1 = _mm256_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2);
-    psi_a_m3_p1 = _mm256_slli_epi16(psi_a_m3_p1, 2);
-    psi_a_m3_m1 = _mm256_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2);
-    psi_a_m3_m1 = _mm256_slli_epi16(psi_a_m3_m1, 2);
-    psi_a_m3_m3 = _mm256_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2);
-    psi_a_m3_m3 = _mm256_slli_epi16(psi_a_m3_m3, 2);
-    psi_a_m3_m5 = _mm256_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2);
-    psi_a_m3_m5 = _mm256_slli_epi16(psi_a_m3_m5, 2);
-    psi_a_m3_m7 = _mm256_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2);
-    psi_a_m3_m7 = _mm256_slli_epi16(psi_a_m3_m7, 2);
-    psi_a_m5_p7 = _mm256_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2);
-    psi_a_m5_p7 = _mm256_slli_epi16(psi_a_m5_p7, 2);
-    psi_a_m5_p5 = _mm256_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2);
-    psi_a_m5_p5 = _mm256_slli_epi16(psi_a_m5_p5, 2);
-    psi_a_m5_p3 = _mm256_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2);
-    psi_a_m5_p3 = _mm256_slli_epi16(psi_a_m5_p3, 2);
-    psi_a_m5_p1 = _mm256_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2);
-    psi_a_m5_p1 = _mm256_slli_epi16(psi_a_m5_p1, 2);
-    psi_a_m5_m1 = _mm256_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2);
-    psi_a_m5_m1 = _mm256_slli_epi16(psi_a_m5_m1, 2);
-    psi_a_m5_m3 = _mm256_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2);
-    psi_a_m5_m3 = _mm256_slli_epi16(psi_a_m5_m3, 2);
-    psi_a_m5_m5 = _mm256_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2);
-    psi_a_m5_m5 = _mm256_slli_epi16(psi_a_m5_m5, 2);
-    psi_a_m5_m7 = _mm256_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2);
-    psi_a_m5_m7 = _mm256_slli_epi16(psi_a_m5_m7, 2);
-    psi_a_m7_p7 = _mm256_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2);
-    psi_a_m7_p7 = _mm256_slli_epi16(psi_a_m7_p7, 2);
-    psi_a_m7_p5 = _mm256_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2);
-    psi_a_m7_p5 = _mm256_slli_epi16(psi_a_m7_p5, 2);
-    psi_a_m7_p3 = _mm256_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2);
-    psi_a_m7_p3 = _mm256_slli_epi16(psi_a_m7_p3, 2);
-    psi_a_m7_p1 = _mm256_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2);
-    psi_a_m7_p1 = _mm256_slli_epi16(psi_a_m7_p1, 2);
-    psi_a_m7_m1 = _mm256_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2);
-    psi_a_m7_m1 = _mm256_slli_epi16(psi_a_m7_m1, 2);
-    psi_a_m7_m3 = _mm256_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2);
-    psi_a_m7_m3 = _mm256_slli_epi16(psi_a_m7_m3, 2);
-    psi_a_m7_m5 = _mm256_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2);
-    psi_a_m7_m5 = _mm256_slli_epi16(psi_a_m7_m5, 2);
-    psi_a_m7_m7 = _mm256_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2);
-    psi_a_m7_m7 = _mm256_slli_epi16(psi_a_m7_m7, 2);
+    psi_a_p7_p7 = simde_mm256_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2);
+    psi_a_p7_p7 = simde_mm256_slli_epi16(psi_a_p7_p7, 2);
+    psi_a_p7_p5 = simde_mm256_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2);
+    psi_a_p7_p5 = simde_mm256_slli_epi16(psi_a_p7_p5, 2);
+    psi_a_p7_p3 = simde_mm256_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2);
+    psi_a_p7_p3 = simde_mm256_slli_epi16(psi_a_p7_p3, 2);
+    psi_a_p7_p1 = simde_mm256_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2);
+    psi_a_p7_p1 = simde_mm256_slli_epi16(psi_a_p7_p1, 2);
+    psi_a_p7_m1 = simde_mm256_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2);
+    psi_a_p7_m1 = simde_mm256_slli_epi16(psi_a_p7_m1, 2);
+    psi_a_p7_m3 = simde_mm256_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2);
+    psi_a_p7_m3 = simde_mm256_slli_epi16(psi_a_p7_m3, 2);
+    psi_a_p7_m5 = simde_mm256_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2);
+    psi_a_p7_m5 = simde_mm256_slli_epi16(psi_a_p7_m5, 2);
+    psi_a_p7_m7 = simde_mm256_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2);
+    psi_a_p7_m7 = simde_mm256_slli_epi16(psi_a_p7_m7, 2);
+    psi_a_p5_p7 = simde_mm256_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2);
+    psi_a_p5_p7 = simde_mm256_slli_epi16(psi_a_p5_p7, 2);
+    psi_a_p5_p5 = simde_mm256_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2);
+    psi_a_p5_p5 = simde_mm256_slli_epi16(psi_a_p5_p5, 2);
+    psi_a_p5_p3 = simde_mm256_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2);
+    psi_a_p5_p3 = simde_mm256_slli_epi16(psi_a_p5_p3, 2);
+    psi_a_p5_p1 = simde_mm256_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2);
+    psi_a_p5_p1 = simde_mm256_slli_epi16(psi_a_p5_p1, 2);
+    psi_a_p5_m1 = simde_mm256_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2);
+    psi_a_p5_m1 = simde_mm256_slli_epi16(psi_a_p5_m1, 2);
+    psi_a_p5_m3 = simde_mm256_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2);
+    psi_a_p5_m3 = simde_mm256_slli_epi16(psi_a_p5_m3, 2);
+    psi_a_p5_m5 = simde_mm256_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2);
+    psi_a_p5_m5 = simde_mm256_slli_epi16(psi_a_p5_m5, 2);
+    psi_a_p5_m7 = simde_mm256_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2);
+    psi_a_p5_m7 = simde_mm256_slli_epi16(psi_a_p5_m7, 2);
+    psi_a_p3_p7 = simde_mm256_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2);
+    psi_a_p3_p7 = simde_mm256_slli_epi16(psi_a_p3_p7, 2);
+    psi_a_p3_p5 = simde_mm256_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2);
+    psi_a_p3_p5 = simde_mm256_slli_epi16(psi_a_p3_p5, 2);
+    psi_a_p3_p3 = simde_mm256_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2);
+    psi_a_p3_p3 = simde_mm256_slli_epi16(psi_a_p3_p3, 2);
+    psi_a_p3_p1 = simde_mm256_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2);
+    psi_a_p3_p1 = simde_mm256_slli_epi16(psi_a_p3_p1, 2);
+    psi_a_p3_m1 = simde_mm256_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2);
+    psi_a_p3_m1 = simde_mm256_slli_epi16(psi_a_p3_m1, 2);
+    psi_a_p3_m3 = simde_mm256_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2);
+    psi_a_p3_m3 = simde_mm256_slli_epi16(psi_a_p3_m3, 2);
+    psi_a_p3_m5 = simde_mm256_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2);
+    psi_a_p3_m5 = simde_mm256_slli_epi16(psi_a_p3_m5, 2);
+    psi_a_p3_m7 = simde_mm256_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2);
+    psi_a_p3_m7 = simde_mm256_slli_epi16(psi_a_p3_m7, 2);
+    psi_a_p1_p7 = simde_mm256_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2);
+    psi_a_p1_p7 = simde_mm256_slli_epi16(psi_a_p1_p7, 2);
+    psi_a_p1_p5 = simde_mm256_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2);
+    psi_a_p1_p5 = simde_mm256_slli_epi16(psi_a_p1_p5, 2);
+    psi_a_p1_p3 = simde_mm256_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2);
+    psi_a_p1_p3 = simde_mm256_slli_epi16(psi_a_p1_p3, 2);
+    psi_a_p1_p1 = simde_mm256_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2);
+    psi_a_p1_p1 = simde_mm256_slli_epi16(psi_a_p1_p1, 2);
+    psi_a_p1_m1 = simde_mm256_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2);
+    psi_a_p1_m1 = simde_mm256_slli_epi16(psi_a_p1_m1, 2);
+    psi_a_p1_m3 = simde_mm256_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2);
+    psi_a_p1_m3 = simde_mm256_slli_epi16(psi_a_p1_m3, 2);
+    psi_a_p1_m5 = simde_mm256_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2);
+    psi_a_p1_m5 = simde_mm256_slli_epi16(psi_a_p1_m5, 2);
+    psi_a_p1_m7 = simde_mm256_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2);
+    psi_a_p1_m7 = simde_mm256_slli_epi16(psi_a_p1_m7, 2);
+    psi_a_m1_p7 = simde_mm256_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2);
+    psi_a_m1_p7 = simde_mm256_slli_epi16(psi_a_m1_p7, 2);
+    psi_a_m1_p5 = simde_mm256_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2);
+    psi_a_m1_p5 = simde_mm256_slli_epi16(psi_a_m1_p5, 2);
+    psi_a_m1_p3 = simde_mm256_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2);
+    psi_a_m1_p3 = simde_mm256_slli_epi16(psi_a_m1_p3, 2);
+    psi_a_m1_p1 = simde_mm256_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2);
+    psi_a_m1_p1 = simde_mm256_slli_epi16(psi_a_m1_p1, 2);
+    psi_a_m1_m1 = simde_mm256_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2);
+    psi_a_m1_m1 = simde_mm256_slli_epi16(psi_a_m1_m1, 2);
+    psi_a_m1_m3 = simde_mm256_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2);
+    psi_a_m1_m3 = simde_mm256_slli_epi16(psi_a_m1_m3, 2);
+    psi_a_m1_m5 = simde_mm256_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2);
+    psi_a_m1_m5 = simde_mm256_slli_epi16(psi_a_m1_m5, 2);
+    psi_a_m1_m7 = simde_mm256_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2);
+    psi_a_m1_m7 = simde_mm256_slli_epi16(psi_a_m1_m7, 2);
+    psi_a_m3_p7 = simde_mm256_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2);
+    psi_a_m3_p7 = simde_mm256_slli_epi16(psi_a_m3_p7, 2);
+    psi_a_m3_p5 = simde_mm256_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2);
+    psi_a_m3_p5 = simde_mm256_slli_epi16(psi_a_m3_p5, 2);
+    psi_a_m3_p3 = simde_mm256_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2);
+    psi_a_m3_p3 = simde_mm256_slli_epi16(psi_a_m3_p3, 2);
+    psi_a_m3_p1 = simde_mm256_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2);
+    psi_a_m3_p1 = simde_mm256_slli_epi16(psi_a_m3_p1, 2);
+    psi_a_m3_m1 = simde_mm256_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2);
+    psi_a_m3_m1 = simde_mm256_slli_epi16(psi_a_m3_m1, 2);
+    psi_a_m3_m3 = simde_mm256_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2);
+    psi_a_m3_m3 = simde_mm256_slli_epi16(psi_a_m3_m3, 2);
+    psi_a_m3_m5 = simde_mm256_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2);
+    psi_a_m3_m5 = simde_mm256_slli_epi16(psi_a_m3_m5, 2);
+    psi_a_m3_m7 = simde_mm256_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2);
+    psi_a_m3_m7 = simde_mm256_slli_epi16(psi_a_m3_m7, 2);
+    psi_a_m5_p7 = simde_mm256_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2);
+    psi_a_m5_p7 = simde_mm256_slli_epi16(psi_a_m5_p7, 2);
+    psi_a_m5_p5 = simde_mm256_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2);
+    psi_a_m5_p5 = simde_mm256_slli_epi16(psi_a_m5_p5, 2);
+    psi_a_m5_p3 = simde_mm256_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2);
+    psi_a_m5_p3 = simde_mm256_slli_epi16(psi_a_m5_p3, 2);
+    psi_a_m5_p1 = simde_mm256_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2);
+    psi_a_m5_p1 = simde_mm256_slli_epi16(psi_a_m5_p1, 2);
+    psi_a_m5_m1 = simde_mm256_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2);
+    psi_a_m5_m1 = simde_mm256_slli_epi16(psi_a_m5_m1, 2);
+    psi_a_m5_m3 = simde_mm256_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2);
+    psi_a_m5_m3 = simde_mm256_slli_epi16(psi_a_m5_m3, 2);
+    psi_a_m5_m5 = simde_mm256_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2);
+    psi_a_m5_m5 = simde_mm256_slli_epi16(psi_a_m5_m5, 2);
+    psi_a_m5_m7 = simde_mm256_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2);
+    psi_a_m5_m7 = simde_mm256_slli_epi16(psi_a_m5_m7, 2);
+    psi_a_m7_p7 = simde_mm256_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2);
+    psi_a_m7_p7 = simde_mm256_slli_epi16(psi_a_m7_p7, 2);
+    psi_a_m7_p5 = simde_mm256_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2);
+    psi_a_m7_p5 = simde_mm256_slli_epi16(psi_a_m7_p5, 2);
+    psi_a_m7_p3 = simde_mm256_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2);
+    psi_a_m7_p3 = simde_mm256_slli_epi16(psi_a_m7_p3, 2);
+    psi_a_m7_p1 = simde_mm256_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2);
+    psi_a_m7_p1 = simde_mm256_slli_epi16(psi_a_m7_p1, 2);
+    psi_a_m7_m1 = simde_mm256_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2);
+    psi_a_m7_m1 = simde_mm256_slli_epi16(psi_a_m7_m1, 2);
+    psi_a_m7_m3 = simde_mm256_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2);
+    psi_a_m7_m3 = simde_mm256_slli_epi16(psi_a_m7_m3, 2);
+    psi_a_m7_m5 = simde_mm256_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2);
+    psi_a_m7_m5 = simde_mm256_slli_epi16(psi_a_m7_m5, 2);
+    psi_a_m7_m7 = simde_mm256_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2);
+    psi_a_m7_m7 = simde_mm256_slli_epi16(psi_a_m7_m7, 2);
 
     // Calculation of a group of two terms in the bit metric involving squares of interference
     square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7);
@@ -3271,637 +3271,637 @@ void qam64_qam64_avx2(int32_t *stream0_in,
 
     // Computing different multiples of ||h0||^2
     // x=1, y=1
-    ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
-    ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
+    ch_mag_2_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42);
+    ch_mag_2_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1);
     // x=1, y=3
-    ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
-    ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
+    ch_mag_10_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42);
+    ch_mag_10_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1);
     // x=1, x=5
-    ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
-    ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
+    ch_mag_26_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_26_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1);
     // x=1, y=7
-    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
-    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
     // x=3, y=3
-    ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
-    ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
+    ch_mag_18_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42);
+    ch_mag_18_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1);
     // x=3, y=5
-    ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
-    ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
+    ch_mag_34_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42);
+    ch_mag_34_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1);
     // x=3, y=7
-    ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
-    ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
+    ch_mag_58_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_58_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2);
     // x=5, y=5
-    ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
-    ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42);
+    ch_mag_50_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1);
     // x=5, y=7
-    ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
-    ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
+    ch_mag_74_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42);
+    ch_mag_74_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2);
     // x=7, y=7
-    ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
-    ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
+    ch_mag_98_over_42_with_sigma2 = simde_mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42);
+    ch_mag_98_over_42_with_sigma2 = simde_mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2);
 
     // Computing Metrics
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7);
-    bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5);
-    bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3);
-    bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1);
-    bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1);
-    bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3);
-    bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5);
-    bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7);
-    bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7);
-    bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5);
-    bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3);
-    bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1);
-    bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1);
-    bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3);
-    bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5);
-    bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7);
-    bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7);
-    bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5);
-    bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3);
-    bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1);
-    bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1);
-    bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3);
-    bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5);
-    bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7);
-    bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7);
-    bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5);
-    bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3);
-    bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1);
-    bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1);
-    bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3);
-    bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5);
-    bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
-    xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7);
-    bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7);
-    bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5);
-    bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3);
-    bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1);
-    bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1);
-    bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3);
-    bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5);
-    bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7);
-    bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7);
-    bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5);
-    bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3);
-    bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1);
-    bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1);
-    bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3);
-    bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5);
-    bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7);
-    bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7);
-    bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5);
-    bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3);
-    bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1);
-    bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1);
-    bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3);
-    bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5);
-    bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7);
-    bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7);
-    bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5);
-    bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3);
-    bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1);
-    bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1);
-    bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3);
-    bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5);
-    bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
-    xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
-    xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7);
-    bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_7);
+    bit_met_p7_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_5);
+    bit_met_p7_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_3);
+    bit_met_p7_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_7_1);
+    bit_met_p7_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_1);
+    bit_met_p7_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_3);
+    bit_met_p7_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_5);
+    bit_met_p7_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_7_7);
+    bit_met_p7_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_7);
+    bit_met_p5_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_5);
+    bit_met_p5_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_3);
+    bit_met_p5_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_5_1);
+    bit_met_p5_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_1);
+    bit_met_p5_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_3);
+    bit_met_p5_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_5);
+    bit_met_p5_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_5_7);
+    bit_met_p5_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_7);
+    bit_met_p3_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_5);
+    bit_met_p3_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_3);
+    bit_met_p3_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_3_1);
+    bit_met_p3_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_1);
+    bit_met_p3_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_3);
+    bit_met_p3_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_5);
+    bit_met_p3_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_3_7);
+    bit_met_p3_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_7);
+    bit_met_p1_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_5);
+    bit_met_p1_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_3);
+    bit_met_p1_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_p_1_1);
+    bit_met_p1_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_1);
+    bit_met_p1_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_3);
+    bit_met_p1_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_5);
+    bit_met_p1_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7);
+    xmm1 = simde_mm256_adds_epi16(xmm0, y0_m_1_7);
+    bit_met_p1_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_7);
+    bit_met_m1_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_5);
+    bit_met_m1_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_3);
+    bit_met_m1_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_1_1);
+    bit_met_m1_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_1);
+    bit_met_m1_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_3);
+    bit_met_m1_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_5);
+    bit_met_m1_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_1_7);
+    bit_met_m1_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_7);
+    bit_met_m3_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_5);
+    bit_met_m3_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_3);
+    bit_met_m3_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_3_1);
+    bit_met_m3_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_1);
+    bit_met_m3_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_3);
+    bit_met_m3_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_5);
+    bit_met_m3_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_3_7);
+    bit_met_m3_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_7);
+    bit_met_m5_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_5);
+    bit_met_m5_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_3);
+    bit_met_m5_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_5_1);
+    bit_met_m5_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_1);
+    bit_met_m5_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_3);
+    bit_met_m5_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_5);
+    bit_met_m5_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_5_7);
+    bit_met_m5_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_7);
+    bit_met_m7_p7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_5);
+    bit_met_m7_p5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_3);
+    bit_met_m7_p3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_m_7_1);
+    bit_met_m7_p1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_1);
+    bit_met_m7_m1 = simde_mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_3);
+    bit_met_m7_m3 = simde_mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_5);
+    bit_met_m7_m5 = simde_mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2);
+    xmm0 = simde_mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7);
+    xmm1 = simde_mm256_subs_epi16(xmm0, y0_p_7_7);
+    bit_met_m7_m7 = simde_mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2);
 
     // Detection for 1st bit (LTE mapping)
     // bit = 1
-    xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
-    xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
 
     // bit = 0
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
-    xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0r = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 2nd bit (LTE mapping)
     // bit = 1
-    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
 
     // bit = 0
-    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1r = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 3rd bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2r = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 4th bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y0i = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
 
     // Detection for 5th bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
-    xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
-    xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y1i = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // Detection for 6th bit (LTE mapping)
-    xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4);
-    logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5);
-
-    xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-    xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
-    xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
-    xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
-    xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
-    xmm4 = _mm256_max_epi16(xmm0, xmm1);
-    xmm5 = _mm256_max_epi16(xmm2, xmm3);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4);
-    logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5);
-
-    y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm4);
+    logmax_den_re0 = simde_mm256_max_epi16(logmax_den_re0, xmm5);
+
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(xmm4, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+    xmm0 = simde_mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5);
+    xmm1 = simde_mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5);
+    xmm2 = simde_mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5);
+    xmm3 = simde_mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5);
+    xmm4 = simde_mm256_max_epi16(xmm0, xmm1);
+    xmm5 = simde_mm256_max_epi16(xmm2, xmm3);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm4);
+    logmax_num_re0 = simde_mm256_max_epi16(logmax_num_re0, xmm5);
+
+    y2i = simde_mm256_subs_epi16(logmax_num_re0, logmax_den_re0);
 
     // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs
     // RE 1
@@ -4019,7 +4019,7 @@ void qam64_qam64_avx2(int32_t *stream0_in,
     stream0_out[j + 94] = ((short *)&y1i)[15];
     stream0_out[j + 95] = ((short *)&y2i)[15];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c
index ab65bede5eac22909080bc04905265999e29b395..3abd1007d6f7ae4ab50e297f3ed87306a3ac5818 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c
@@ -151,7 +151,7 @@ int pbch_channel_level(int **dl_ch_estimates_ext,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i avg128;
   __m128i *dl_ch128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t avg128;
   int16x8_t *dl_ch128;
 #endif
@@ -165,7 +165,7 @@ int pbch_channel_level(int **dl_ch_estimates_ext,
 #if defined(__x86_64__) || defined(__i386__)
       avg128 = _mm_setzero_si128();
       dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol_mod*6*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       avg128 = vdupq_n_s32(0);
       dl_ch128=(int16x8_t *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol_mod*6*12];
 #endif
@@ -175,7 +175,7 @@ int pbch_channel_level(int **dl_ch_estimates_ext,
         avg128 = _mm_add_epi32(avg128,_mm_madd_epi16(dl_ch128[0],dl_ch128[0]));
         avg128 = _mm_add_epi32(avg128,_mm_madd_epi16(dl_ch128[1],dl_ch128[1]));
         avg128 = _mm_add_epi32(avg128,_mm_madd_epi16(dl_ch128[2],dl_ch128[2]));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         // to be filled in
 #endif
         dl_ch128+=3;
@@ -208,7 +208,7 @@ int pbch_channel_level(int **dl_ch_estimates_ext,
 
 #if defined(__x86_64__) || defined(__i386__)
   __m128i mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
 #endif
 void pbch_channel_compensation(int **rxdataF_ext,
@@ -221,7 +221,7 @@ void pbch_channel_compensation(int **rxdataF_ext,
   uint8_t aatx,aarx,symbol_mod;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128,*rxdataF128,*rxdataF_comp128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
   symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
 
@@ -231,7 +231,7 @@ void pbch_channel_compensation(int **rxdataF_ext,
       dl_ch128          = (__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol_mod*6*12];
       rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol_mod*6*12];
       rxdataF_comp128   = (__m128i *)&rxdataF_comp[(aatx<<1)+aarx][symbol_mod*6*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       // to be filled in
 #endif
 
@@ -303,7 +303,7 @@ void pbch_channel_compensation(int **rxdataF_ext,
           rxdataF_comp128+=2;
         }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         // to be filled in
 #endif
       }
@@ -322,7 +322,7 @@ void pbch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
   int i, nb_rb=6;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128_0,*rxdataF_comp128_1;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1;
 #endif
   symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
@@ -332,7 +332,7 @@ void pbch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
       rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[(aatx<<1)][symbol_mod*6*12];
       rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[(aatx<<1)+1][symbol_mod*6*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[(aatx<<1)][symbol_mod*6*12];
       rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[(aatx<<1)+1][symbol_mod*6*12];
 #endif
@@ -341,7 +341,7 @@ void pbch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
       for (i=0; i<nb_rb*3; i++) {
 #if defined(__x86_64__) || defined(__i386__)
         rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
 #endif
       }
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c
index 3c22c64a9e56a00c2c145ebd14a37e59e43b73e8..ae5a608da1c216dab6a93ffa8f87478fbaf4f67b 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c
@@ -223,7 +223,7 @@ void mch_channel_level(int **dl_ch_estimates_ext,
   int i,aarx,nre;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128,avg128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t avg128;
 #endif
 
@@ -233,7 +233,7 @@ void mch_channel_level(int **dl_ch_estimates_ext,
     avg128 = _mm_setzero_si128();
     // 5 is always a symbol with no pilots for both normal and extended prefix
     dl_ch128=(__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 
     if ((symbol == 2) || (symbol == 6) || (symbol == 10))
@@ -244,7 +244,7 @@ void mch_channel_level(int **dl_ch_estimates_ext,
     for (i=0; i<(nre>>2); i++) {
 #if defined(__x86_64__) || defined(__i386__)
       avg128 = _mm_add_epi32(avg128,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[0],dl_ch128[0]),log2_approx(nre>>2)-1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
     }
 
@@ -270,7 +270,7 @@ void mch_channel_level_khz_1dot25(int **dl_ch_estimates_ext,
   int i,aarx,nre;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128,avg128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t avg128;
 #endif
 
@@ -280,7 +280,7 @@ void mch_channel_level_khz_1dot25(int **dl_ch_estimates_ext,
     avg128 = _mm_setzero_si128();
     // 5 is always a symbol with no pilots for both normal and extended prefix
     dl_ch128=(__m128i *)&dl_ch_estimates_ext[aarx][0/*symbol*frame_parms->N_RB_DL*12*/];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
     /*if ((symbol == 2) || (symbol == 6) || (symbol == 10))
       nre = (frame_parms->N_RB_DL*6);
@@ -293,7 +293,7 @@ void mch_channel_level_khz_1dot25(int **dl_ch_estimates_ext,
 #if defined(__x86_64__) || defined(__i386__)
       //avg128 = _mm_add_epi32(avg128,_mm_madd_epi16(dl_ch128[0],dl_ch128[0]));
       avg128 = _mm_add_epi32(avg128,_mm_srai_epi32(_mm_madd_epi16(dl_ch128[0],dl_ch128[0]),log2_approx(nre>>2)-1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
     }
 
@@ -329,7 +329,7 @@ void mch_channel_compensation(int **rxdataF_ext,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128;
   __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128={0},QAM_amp128b={0};
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 
   if ((symbol == 2) || (symbol == 6) || (symbol == 10))
@@ -347,7 +347,7 @@ void mch_channel_compensation(int **rxdataF_ext,
     QAM_amp128b = _mm_set1_epi16(QAM64_n2);
   }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 
   for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
@@ -357,7 +357,7 @@ void mch_channel_compensation(int **rxdataF_ext,
     dl_ch_mag128b     = (__m128i *)&dl_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12];
     rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128   = (__m128i *)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 
     for (i=0; i<(nre>>2); i+=2) {
@@ -382,7 +382,7 @@ void mch_channel_compensation(int **rxdataF_ext,
         dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1);
         dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b);
         dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
       }
 
@@ -430,7 +430,7 @@ void mch_channel_compensation(int **rxdataF_ext,
       dl_ch_mag128b+=2;
       rxdataF128+=2;
       rxdataF_comp128+=2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
     }
   }
@@ -456,7 +456,7 @@ void mch_channel_compensation_khz_1dot25(int **rxdataF_ext,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128;
   __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128={0},QAM_amp128b={0};
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
   /*if ((symbol == 2) || (symbol == 6) || (symbol == 10))
     nre = frame_parms->N_RB_DL*6;
@@ -473,7 +473,7 @@ void mch_channel_compensation_khz_1dot25(int **rxdataF_ext,
     QAM_amp128b = _mm_set1_epi16(QAM64_n2);
   }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 
   for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
@@ -483,7 +483,7 @@ void mch_channel_compensation_khz_1dot25(int **rxdataF_ext,
     dl_ch_mag128b     = (__m128i *)&dl_ch_magb[aarx][0];
     rxdataF128        = (__m128i *)&rxdataF_ext[aarx][0];
     rxdataF_comp128   = (__m128i *)&rxdataF_comp[aarx][0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 
     for (i=0; i<(nre>>2); i+=2) {
@@ -508,7 +508,7 @@ void mch_channel_compensation_khz_1dot25(int **rxdataF_ext,
         dl_ch_mag128b[0] = _mm_slli_epi16(dl_ch_mag128b[0],1);
         dl_ch_mag128b[1] = _mm_mulhi_epi16(dl_ch_mag128b[1],QAM_amp128b);
         dl_ch_mag128b[1] = _mm_slli_epi16(dl_ch_mag128b[1],1);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
       }
 
@@ -556,7 +556,7 @@ void mch_channel_compensation_khz_1dot25(int **rxdataF_ext,
       dl_ch_mag128b+=2;
       rxdataF128+=2;
       rxdataF_comp128+=2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
     }
   }
@@ -577,7 +577,7 @@ void mch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
   int i;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b;
 #endif
 
@@ -589,7 +589,7 @@ void mch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
     dl_ch_mag128_1      = (__m128i *)&dl_ch_mag[1][symbol*frame_parms->N_RB_DL*12];
     dl_ch_mag128_0b     = (__m128i *)&dl_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
     dl_ch_mag128_1b     = (__m128i *)&dl_ch_magb[1][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
     dl_ch_mag128_0      = (int16x8_t *)&dl_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
@@ -604,7 +604,7 @@ void mch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
       rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
       dl_ch_mag128_0[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0[i],1),_mm_srai_epi16(dl_ch_mag128_1[i],1));
       dl_ch_mag128_0b[i]   = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0b[i],1),_mm_srai_epi16(dl_ch_mag128_1b[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
       dl_ch_mag128_0[i]    = vhaddq_s16(dl_ch_mag128_0[i],dl_ch_mag128_1[i]);
       dl_ch_mag128_0b[i]   = vhaddq_s16(dl_ch_mag128_0b[i],dl_ch_mag128_1b[i]);
@@ -627,7 +627,7 @@ void mch_detection_mrc_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
   int i;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b;
 #endif
 
@@ -639,7 +639,7 @@ void mch_detection_mrc_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
     dl_ch_mag128_1      = (__m128i *)&dl_ch_mag[1][0];
     dl_ch_mag128_0b     = (__m128i *)&dl_ch_magb[0][0];
     dl_ch_mag128_1b     = (__m128i *)&dl_ch_magb[1][0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][0];
     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][0];
     dl_ch_mag128_0      = (int16x8_t *)&dl_ch_mag[0][0];
@@ -654,7 +654,7 @@ void mch_detection_mrc_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
       rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
       dl_ch_mag128_0[i]    = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0[i],1),_mm_srai_epi16(dl_ch_mag128_1[i],1));
       dl_ch_mag128_0b[i]   = _mm_adds_epi16(_mm_srai_epi16(dl_ch_mag128_0b[i],1),_mm_srai_epi16(dl_ch_mag128_1b[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
       dl_ch_mag128_0[i]    = vhaddq_s16(dl_ch_mag128_0[i],dl_ch_mag128_1[i]);
       dl_ch_mag128_0b[i]   = vhaddq_s16(dl_ch_mag128_0b[i],dl_ch_mag128_1b[i]);
@@ -760,7 +760,7 @@ void mch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
   __m128i *ch_mag;
   __m128i llr128[2],xmm0;
   uint32_t *llr32;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF = (int16x8_t *)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
   int16x8_t *ch_mag;
   int16x8_t llr128[2],xmm0;
@@ -776,7 +776,7 @@ void mch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     llr32 = (uint32_t *)*llr32p;
   }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
   if (symbol==2) {
     llr16 = (int16_t *)dlsch_llr;
@@ -787,7 +787,7 @@ void mch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
 #endif
 #if defined(__x86_64__) || defined(__i386__)
   ch_mag = (__m128i *)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t *)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
 #endif
 
@@ -823,7 +823,7 @@ void mch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     llr32[6] = ((uint32_t *)&llr128[1])[2];
     llr32[7] = ((uint32_t *)&llr128[1])[3];
     llr32+=8;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm0 = vabsq_s16(rxF[i]);
     xmm0 = vsubq_s16(ch_mag[i],xmm0);
     // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2
@@ -865,7 +865,7 @@ void mch_16qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
   __m128i *ch_mag;
   __m128i llr128[2],xmm0;
   uint32_t *llr32;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF = (int16x8_t *)&rxdataF_comp[0][0];
   int16x8_t *ch_mag;
   int16x8_t llr128[2],xmm0;
@@ -879,7 +879,7 @@ void mch_16qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
   //} else {
   //llr32 = (uint32_t*)*llr32p;
   //}
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   //if (symbol==2) {
   llr16 = (int16_t *)dlsch_llr;
   //} else {
@@ -888,7 +888,7 @@ void mch_16qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
 #endif
 #if defined(__x86_64__) || defined(__i386__)
   ch_mag = (__m128i *)&dl_ch_mag[0][0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t *)&dl_ch_mag[0][0];
 #endif
   len = frame_parms->N_RB_DL*12*10;
@@ -917,7 +917,7 @@ void mch_16qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
     llr32[6] = ((uint32_t *)&llr128[1])[2];
     llr32[7] = ((uint32_t *)&llr128[1])[3];
     llr32+=8;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm0 = vabsq_s16(rxF[i]);
     xmm0 = vsubq_s16(ch_mag[i],xmm0);
     // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2
@@ -962,7 +962,7 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i xmm1,xmm2,*ch_mag,*ch_magb;
   __m128i *rxF = (__m128i *)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t xmm1,xmm2,*ch_mag,*ch_magb;
   int16x8_t *rxF = (int16x8_t *)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
 #endif
@@ -980,7 +980,7 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   ch_mag = (__m128i *)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
   ch_magb = (__m128i *)&dl_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t *)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
   ch_magb = (int16x8_t *)&dl_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)];
 #endif
@@ -1003,7 +1003,7 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     xmm1  = _mm_subs_epi16(ch_mag[i],xmm1);
     xmm2 = _mm_abs_epi16(xmm1);
     xmm2 = _mm_subs_epi16(ch_magb[i],xmm2);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm1 = vabsq_s16(rxF[i]);
     xmm1 = vsubq_s16(ch_mag[i],xmm1);
     xmm2 = vabsq_s16(xmm1);
@@ -1029,7 +1029,7 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,0);
     llr2[3] = vgetq_lane_s16(xmm1,1);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,0);//((short *)&xmm2)[j];
@@ -1043,7 +1043,7 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,2);
     llr2[3] = vgetq_lane_s16(xmm1,3);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,2);//((short *)&xmm2)[j];
@@ -1057,7 +1057,7 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,4);
     llr2[3] = vgetq_lane_s16(xmm1,5);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,4);//((short *)&xmm2)[j];
@@ -1071,7 +1071,7 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,6);
     llr2[3] = vgetq_lane_s16(xmm1,7);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,6);//((short *)&xmm2)[j];
@@ -1097,7 +1097,7 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i xmm1,xmm2,*ch_mag,*ch_magb;
   __m128i *rxF = (__m128i *)&rxdataF_comp[0][0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t xmm1,xmm2,*ch_mag,*ch_magb;
   int16x8_t *rxF = (int16x8_t *)&rxdataF_comp[0][0];
 #endif
@@ -1113,7 +1113,7 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   ch_mag = (__m128i *)&dl_ch_mag[0][0];
   ch_magb = (__m128i *)&dl_ch_magb[0][0];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t *)&dl_ch_mag[0][0];
   ch_magb = (int16x8_t *)&dl_ch_magb[0][0];
 #endif
@@ -1130,7 +1130,7 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
     xmm1  = _mm_subs_epi16(ch_mag[i],xmm1);
     xmm2 = _mm_abs_epi16(xmm1);
     xmm2 = _mm_subs_epi16(ch_magb[i],xmm2);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm1 = vabsq_s16(rxF[i]);
     xmm1 = vsubq_s16(ch_mag[i],xmm1);
     xmm2 = vabsq_s16(xmm1);
@@ -1156,7 +1156,7 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,0);
     llr2[3] = vgetq_lane_s16(xmm1,1);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,0);//((short *)&xmm2)[j];
@@ -1170,7 +1170,7 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,2);
     llr2[3] = vgetq_lane_s16(xmm1,3);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,2);//((short *)&xmm2)[j];
@@ -1184,7 +1184,7 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,4);
     llr2[3] = vgetq_lane_s16(xmm1,5);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,4);//((short *)&xmm2)[j];
@@ -1198,7 +1198,7 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,6);
     llr2[3] = vgetq_lane_s16(xmm1,7);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,6);//((short *)&xmm2)[j];
diff --git a/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c b/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c
index 6a663e89404e5a91a0187a20f52c4bf152863d16..341e1e90b648d1a54ead6d1dc020204211fcf63f 100644
--- a/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c
+++ b/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c
@@ -47,7 +47,7 @@ void dft_lte(int32_t *z,struct complex16 *input, int32_t Msc_PUSCH, uint8_t Nsym
 
 #if defined(__x86_64__) || defined(__i386__)
   __m128i dft_in128[4][1200],dft_out128[4][1200];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t dft_in128[4][1200],dft_out128[4][1200];
 #endif
   uint32_t *dft_in0=(uint32_t*)dft_in128[0],*dft_out0=(uint32_t*)dft_out128[0];
@@ -61,7 +61,7 @@ void dft_lte(int32_t *z,struct complex16 *input, int32_t Msc_PUSCH, uint8_t Nsym
   uint32_t i,ip;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i norm128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t norm128;
 #endif
   //  printf("Doing lte_dft for Msc_PUSCH %d\n",Msc_PUSCH);
@@ -119,7 +119,7 @@ void dft_lte(int32_t *z,struct complex16 *input, int32_t Msc_PUSCH, uint8_t Nsym
     */
 #if defined(__x86_64__) || defined(__i386__)
     norm128 = _mm_set1_epi16(9459);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     norm128 = vdupq_n_s16(9459);
 #endif
     for (i=0; i<12; i++) {
@@ -127,7 +127,7 @@ void dft_lte(int32_t *z,struct complex16 *input, int32_t Msc_PUSCH, uint8_t Nsym
       ((__m128i*)dft_out0)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out0)[i],norm128),1);
       ((__m128i*)dft_out1)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out1)[i],norm128),1);
       ((__m128i*)dft_out2)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out2)[i],norm128),1);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       ((int16x8_t*)dft_out0)[i] = vqdmulhq_s16(((int16x8_t*)dft_out0)[i],norm128);
       ((int16x8_t*)dft_out1)[i] = vqdmulhq_s16(((int16x8_t*)dft_out1)[i],norm128);
       ((int16x8_t*)dft_out2)[i] = vqdmulhq_s16(((int16x8_t*)dft_out2)[i],norm128);
diff --git a/openair1/PHY/MODULATION/modulation_UE.h b/openair1/PHY/MODULATION/modulation_UE.h
index 20fdaa7ad84f0fef7433432c7ac0c847f9fd1f99..6ed10a8c77c3cf355235500d93ed10a175646f9b 100644
--- a/openair1/PHY/MODULATION/modulation_UE.h
+++ b/openair1/PHY/MODULATION/modulation_UE.h
@@ -57,7 +57,8 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue,
                           UE_nr_rxtx_proc_t *proc,
                           unsigned char symbol,
                           unsigned char Ns,
-                          int sample_offset);
+                          int sample_offset,
+                          bool pbch_decoded);
 
 int slot_fep_mbsfn(PHY_VARS_UE *phy_vars_ue,
                    unsigned char l,
diff --git a/openair1/PHY/MODULATION/nr_modulation.c b/openair1/PHY/MODULATION/nr_modulation.c
index 2522e146e29279d548670b50801531c9c47f670d..5d55cb142d65e0167518844db17733452e3dbf60 100644
--- a/openair1/PHY/MODULATION/nr_modulation.c
+++ b/openair1/PHY/MODULATION/nr_modulation.c
@@ -339,7 +339,7 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH)
 {
 #if defined(__x86_64__) || +defined(__i386__)
   __m128i dft_in128[1][3240], dft_out128[1][3240];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t dft_in128[1][3240], dft_out128[1][3240];
 #endif
   uint32_t *dft_in0 = (uint32_t*)dft_in128[0], *dft_out0 = (uint32_t*)dft_out128[0];
@@ -348,7 +348,7 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH)
 
 #if defined(__x86_64__) || defined(__i386__)
   __m128i norm128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t norm128;
 #endif
 
@@ -364,13 +364,13 @@ void nr_dft(int32_t *z, int32_t *d, uint32_t Msc_PUSCH)
 
 #if defined(__x86_64__) || defined(__i386__)
       norm128 = _mm_set1_epi16(9459);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       norm128 = vdupq_n_s16(9459);
 #endif
       for (i=0; i<12; i++) {
 #if defined(__x86_64__) || defined(__i386__)
         ((__m128i*)dft_out0)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)dft_out0)[i], norm128), 1);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         ((int16x8_t*)dft_out0)[i] = vqdmulhq_s16(((int16x8_t*)dft_out0)[i], norm128);
 #endif
       }
diff --git a/openair1/PHY/MODULATION/ofdm_mod.c b/openair1/PHY/MODULATION/ofdm_mod.c
index 0f79c4532bef1a22166ed5aaf2db3d4d52da6c2e..93994b9ab6f867beab8655a5b691812ff2ccaeed 100644
--- a/openair1/PHY/MODULATION/ofdm_mod.c
+++ b/openair1/PHY/MODULATION/ofdm_mod.c
@@ -153,6 +153,10 @@ void PHY_ofdm_mod(int *input,                       /// pointer to complex input
     idftsize = IDFT_512;
     break;
 
+  case 768:
+    idftsize = IDFT_768;
+    break;
+
   case 1024:
     idftsize = IDFT_1024;
     break;
@@ -203,19 +207,11 @@ void PHY_ofdm_mod(int *input,                       /// pointer to complex input
     printf("[PHY] symbol %d/%d offset %d (%p,%p -> %p)\n",i,nb_symbols,i*fftsize+(i*nb_prefix_samples),input,&input[i*fftsize],&output[(i*fftsize) + ((i)*nb_prefix_samples)]);
 #endif
 
-#ifndef __AVX2__
-    // handle 128-bit alignment for 128-bit SIMD (SSE4,NEON,AltiVEC)
-    idft(idftsize,(int16_t *)&input[i*fftsize],
-         (fftsize==128) ? (int16_t *)temp : (int16_t *)&output[(i*fftsize) + ((1+i)*nb_prefix_samples)],
-         1);
-#else
     // on AVX2 need 256-bit alignment
     idft(idftsize,(int16_t *)&input[i*fftsize],
          (int16_t *)temp,
          1);
 
-#endif
-
     // Copy to frame buffer with Cyclic Extension
     // Note:  will have to adjust for synchronization offset!
 
@@ -227,9 +223,6 @@ void PHY_ofdm_mod(int *input,                       /// pointer to complex input
 
       //      msg("Doing cyclic prefix method\n");
 
-#ifndef __AVX2__
-      if (fftsize==128) 
-#endif
       {
         memcpy((void*)output_ptr,(void*)temp_ptr,fftsize<<2);
       }
diff --git a/openair1/PHY/MODULATION/slot_fep_nr.c b/openair1/PHY/MODULATION/slot_fep_nr.c
index 71be5ba6ed843232ca948f92eed66cb92b9e6e2f..a8bbe343551a3c9949b58c564a5495eed2363492 100644
--- a/openair1/PHY/MODULATION/slot_fep_nr.c
+++ b/openair1/PHY/MODULATION/slot_fep_nr.c
@@ -75,7 +75,7 @@ int nr_slot_fep(PHY_VARS_NR_UE *ue,
   //#endif
 
   for (unsigned char aa=0; aa<frame_parms->nb_antennas_rx; aa++) {
-    memset(&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],0,frame_parms->ofdm_symbol_size*sizeof(int32_t));
+    memset(&common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],0,frame_parms->ofdm_symbol_size*sizeof(int32_t));
 
     int16_t *rxdata_ptr = (int16_t *)&common_vars->rxdata[aa][rx_offset];
 
@@ -92,7 +92,7 @@ int nr_slot_fep(PHY_VARS_NR_UE *ue,
 
     dft(dftsize,
         rxdata_ptr,
-        (int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
+        (int16_t *)&common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
         1);
 
     stop_meas(&ue->rx_dft_stats);
@@ -108,7 +108,7 @@ int nr_slot_fep(PHY_VARS_NR_UE *ue,
 #endif
 
     c16_t *shift_rot = frame_parms->timeshift_symbol_rotation;
-    c16_t *this_symbol = (c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol];
+    c16_t *this_symbol = (c16_t *)&common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol];
 
     if (frame_parms->N_RB_DL & 1) {
       rotate_cpx_vector(this_symbol, &rot2, this_symbol,
@@ -150,7 +150,8 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue,
                           UE_nr_rxtx_proc_t *proc,
                           unsigned char symbol,
                           unsigned char Ns,
-                          int sample_offset)
+                          int sample_offset,
+                          bool pbch_decoded)
 {
   NR_DL_FRAME_PARMS *frame_parms = &ue->frame_parms;
   NR_UE_COMMON *common_vars   = &ue->common_vars;
@@ -160,7 +161,7 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue,
 
   unsigned int nb_prefix_samples;
   unsigned int nb_prefix_samples0;
-  if (ue->is_synchronized) {
+  if (pbch_decoded) {
     nb_prefix_samples  = frame_parms->nb_prefix_samples;
     nb_prefix_samples0 = frame_parms->nb_prefix_samples0;
   }
@@ -188,7 +189,7 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue,
 #endif
 
   for (unsigned char aa=0; aa<frame_parms->nb_antennas_rx; aa++) {
-    memset(&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],0,frame_parms->ofdm_symbol_size*sizeof(int32_t));
+    memset(&common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],0,frame_parms->ofdm_symbol_size*sizeof(int32_t));
 
     int16_t *rxdata_ptr;
     rx_offset%=frame_length_samples*2;
@@ -224,7 +225,7 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue,
 
     dft(dftsize,
         rxdata_ptr,
-        (int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
+        (int16_t *)&common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol],
         1);
 
     stop_meas(&ue->rx_dft_stats);
@@ -239,7 +240,7 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue,
 	   symbol+symb_offset,rot2.r,rot2.i);
 #endif
 
-    c16_t *this_symbol = (c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol];
+    c16_t *this_symbol = (c16_t *)&common_vars->rxdataF[aa][frame_parms->ofdm_symbol_size*symbol];
     rotate_cpx_vector(this_symbol, &rot2, this_symbol, frame_parms->ofdm_symbol_size, 15);
   }
 
diff --git a/openair1/PHY/MODULATION/ul_7_5_kHz.c b/openair1/PHY/MODULATION/ul_7_5_kHz.c
index 384b9d621c59ddb0246b83d96e4c5602b70eaa54..a5e6122071b6d85b9e9b17e87849e4eb1a65f634 100644
--- a/openair1/PHY/MODULATION/ul_7_5_kHz.c
+++ b/openair1/PHY/MODULATION/ul_7_5_kHz.c
@@ -34,7 +34,7 @@ void remove_7_5_kHz(RU_t *ru,uint8_t slot)
   uint32_t *kHz7_5ptr;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxptr128,*rxptr128_7_5kHz,*kHz7_5ptr128,kHz7_5_2,mmtmp_re,mmtmp_im,mmtmp_re2,mmtmp_im2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxptr128,*kHz7_5ptr128,*rxptr128_7_5kHz;
   int32x4_t mmtmp_re,mmtmp_im;
   int32x4_t mmtmp0,mmtmp1;
@@ -88,7 +88,7 @@ void remove_7_5_kHz(RU_t *ru,uint8_t slot)
     rxptr128        = (__m128i *)&rxdata[aa][slot_offset];
     rxptr128_7_5kHz = (__m128i *)&rxdata_7_5kHz[aa][slot_offset2];
     kHz7_5ptr128    = (__m128i *)kHz7_5ptr;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxptr128        = (int16x8_t *)&rxdata[aa][slot_offset];
     rxptr128_7_5kHz = (int16x8_t *)&rxdata_7_5kHz[aa][slot_offset2];
     kHz7_5ptr128    = (int16x8_t *)kHz7_5ptr;
@@ -116,7 +116,7 @@ void remove_7_5_kHz(RU_t *ru,uint8_t slot)
       rxptr128_7_5kHz++;
       kHz7_5ptr128++;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
       kHz7_5ptr128[0] = vmulq_s16(kHz7_5ptr128[0],((int16x8_t*)conjugate75_2)[0]);
       mmtmp0 = vmull_s16(((int16x4_t*)rxptr128)[0],((int16x4_t*)kHz7_5ptr128)[0]);
diff --git a/openair1/PHY/MODULATION/ul_7_5_kHz_ue.c b/openair1/PHY/MODULATION/ul_7_5_kHz_ue.c
index 3e3a839521b3e5be66c530b0af89172ac0d76ce6..ff00d251981884a5cda73e2c77673e7552d84bee 100644
--- a/openair1/PHY/MODULATION/ul_7_5_kHz_ue.c
+++ b/openair1/PHY/MODULATION/ul_7_5_kHz_ue.c
@@ -37,7 +37,7 @@ void apply_7_5_kHz(PHY_VARS_UE *ue,int32_t*txdata,uint8_t slot)
   uint32_t *kHz7_5ptr;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *txptr128,*kHz7_5ptr128,mmtmp_re,mmtmp_im,mmtmp_re2,mmtmp_im2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *txptr128,*kHz7_5ptr128;
   int32x4_t mmtmp_re,mmtmp_im;
   int32x4_t mmtmp0,mmtmp1;
@@ -84,7 +84,7 @@ void apply_7_5_kHz(PHY_VARS_UE *ue,int32_t*txdata,uint8_t slot)
 #if defined(__x86_64__) || defined(__i386__)
   txptr128 = (__m128i *)&txdata[slot_offset];
   kHz7_5ptr128 = (__m128i *)kHz7_5ptr;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   txptr128 = (int16x8_t*)&txdata[slot_offset];
   kHz7_5ptr128 = (int16x8_t*)kHz7_5ptr;
 #endif
@@ -106,7 +106,7 @@ void apply_7_5_kHz(PHY_VARS_UE *ue,int32_t*txdata,uint8_t slot)
     txptr128[0] = _mm_packs_epi32(mmtmp_re2,mmtmp_im2);
     txptr128++;
     kHz7_5ptr128++;  
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
     mmtmp0 = vmull_s16(((int16x4_t*)txptr128)[0],((int16x4_t*)kHz7_5ptr128)[0]);
         //mmtmp0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] 
diff --git a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
index 41338751396aa5947ecf815a1f5bc457a88fb260..fef8583000b64fb94788fc3114bb48a0ecb72c4f 100644
--- a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
+++ b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
@@ -89,7 +89,7 @@ __attribute__((always_inline)) inline c16_t c32x16cumulVectVectWithSteps(c16_t *
 
   int localOffset1=*offset1;
   int localOffset2=*offset2;
-  c32_t cumul={0}; 
+  c32_t cumul={0};
   for (int i=0; i<N; i++) {
     cumul=c32x16maddShift(in1[localOffset1], in2[localOffset2], cumul, 15);
     localOffset1+=step1;
@@ -202,14 +202,14 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
   //------------------------------------------------//
 
 #ifdef DEBUG_PUSCH
-  
+
   for (int i = 0; i < (6 * nb_rb_pusch); i++) {
     LOG_I(PHY, "In %s: %d + j*(%d)\n", __FUNCTION__, pilot[i].r,pilot[i].i);
   }
-  
+
 #endif
   const uint8_t b_shift = pusch_pdu->nrOfLayers == 1;
-  
+
   for (int aarx=0; aarx<gNB->frame_parms.nb_antennas_rx; aarx++) {
     c16_t *rxdataF = (c16_t *)&gNB->common_vars.rxdataF[aarx][symbol_offset];
     c16_t *ul_ch = &ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset];
@@ -221,20 +221,20 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
     LOG_I(PHY, "In %s bwp_start_subcarrier %d, k0 %d, first_carrier %d, nb_rb_pusch %d\n", __FUNCTION__, bwp_start_subcarrier, k0, gNB->frame_parms.first_carrier_offset, nb_rb_pusch);
     LOG_I(PHY, "In %s ul_ch addr %p nushift %d\n", __FUNCTION__, ul_ch, nushift);
 #endif
-    
+
     if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1 && chest_freq == 0) {
-      c16_t *pil   = pilot;    
+      c16_t *pil   = pilot;
       int re_offset = k0;
       LOG_D(PHY,"PUSCH estimation DMRS type 1, Freq-domain interpolation");
       // For configuration type 1: k = 4*n + 2*k' + delta,
       // where k' is 0 or 1, and delta is in Table 6.4.1.1.3-1 from TS 38.211
       int pilot_cnt = 0;
       int delta = nr_pusch_dmrs_delta(pusch_dmrs_type1, p);
-      
+
       for (int n = 0; n < 3*nb_rb_pusch; n++) {
         // LS estimation
         c32_t ch = {0};
-        
+
         for (int k_line = 0; k_line <= 1; k_line++) {
           re_offset = (k0 + (n << 2) + (k_line << 1) + delta) % symbolSize;
           ch=c32x16maddShift(*pil,
@@ -243,9 +243,9 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
                              15+b_shift);
           pil++;
         }
-        
+
         c16_t ch16= {.r=(int16_t)ch.r, .i=(int16_t)ch.i};
-        
+
         // Channel interpolation
         for (int k_line = 0; k_line <= 1; k_line++) {
 #ifdef DEBUG_PUSCH
@@ -254,7 +254,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
           printf("pilot %4u: pil -> (%6d,%6d), rxF -> (%4d,%4d), ch -> (%4d,%4d)\n",
                  pilot_cnt, pil->r, pil->i, rxF->r, rxF->i, ch.r, ch.i);
 #endif
-          
+
           if (pilot_cnt == 0) {
             c16multaddVectRealComplex(fl, &ch16, ul_ch, 8);
           } else if (pilot_cnt == 1) {
@@ -270,7 +270,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
           } else {
             c16multaddVectRealComplex(fm, &ch16, ul_ch, 8);
           }
-          
+
           pilot_cnt++;
         }
       }
@@ -365,7 +365,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
         ch_offset++;
         re_offset = (re_offset + 1)%symbolSize;
       }
-      
+
       // Treat last pilot specially (right edge)
       c16_t ch_l=c16mulShift(*pil,
                              rxdataF[soffset+nushift+re_offset],
@@ -381,7 +381,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
                                                       ul_ch);
       __m128i *ul_ch_128 = (__m128i *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset];
       ul_ch_128[0] = _mm_slli_epi16 (ul_ch_128[0], 2);
-    } 
+    }
 
     else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1) { // this is case without frequency-domain linear interpolation, just take average of LS channel estimates of 6 DMRS REs and use a common value for the whole PRB
       LOG_D(PHY,"PUSCH estimation DMRS type 1, no Freq-domain interpolation\n");
@@ -389,7 +389,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
       int pil_offset = 0;
       int re_offset = k0;
       c16_t ch;
-      
+
       // First PRB
       ch=c32x16cumulVectVectWithSteps(pilot, &pil_offset, 1, rxF, &re_offset, 2, symbolSize, 6);
 
@@ -428,20 +428,20 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
       }
       // Last PRB
       ch=c32x16cumulVectVectWithSteps(pilot, &pil_offset, 1, rxF, &re_offset, 2, symbolSize, 6);
-      
+
 #if NO_INTERP
       for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++)
         *ul_ch=ch;
 #else
       ul_ch[3].r += (ch.r * 1365)>>15; // 1/12*16384
       ul_ch[3].i += (ch.i * 1365)>>15; // 1/12*16384
-      
+
       ul_ch += 4;
       c16multaddVectRealComplex(filt8_avlip3,
                                          ch,
                                          ul_ch,
                                          8);
-      
+
       ul_ch += 8;
       c16multaddVectRealComplex(filt8_avlip6,
                                          ch,
@@ -529,19 +529,19 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
       ch0=c32x16mulShift(*pil, rxdataF[nushift+re_offset], 15);
       pil++;
       re_offset = (re_offset+1) % symbolSize;
-      
+
       ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15);
       pil++;
       re_offset = (re_offset+5) % symbolSize;
-      
+
       ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15);
       pil++;
       re_offset = (re_offset+1) % symbolSize;
-      
+
       ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15);
       pil++;
       re_offset = (re_offset+5) % symbolSize;
-      
+
       ch=c16x32div(ch0, 4);
 #if NO_INTERP
       for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++)
@@ -554,7 +554,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
       c16multaddVectRealComplex(filt8_avlip6, &ch, ul_ch, 8);
 #endif
     }
-    
+
 #ifdef DEBUG_PUSCH
     ul_ch = &ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset];
 
@@ -727,13 +727,9 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
                               const nr_srs_info_t *nr_srs_info,
                               const int32_t **srs_generated_signal,
                               int32_t srs_received_signal[][gNB->frame_parms.ofdm_symbol_size*(1<<srs_pdu->num_symbols)],
-                              int32_t srs_ls_estimated_channel[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size*(1<<srs_pdu->num_symbols)],
                               int32_t srs_estimated_channel_freq[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size*(1<<srs_pdu->num_symbols)],
                               int32_t srs_estimated_channel_time[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size],
                               int32_t srs_estimated_channel_time_shifted[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size],
-                              uint32_t *signal_power,
-                              uint32_t *noise_power_per_rb,
-                              uint32_t *noise_power,
                               int8_t *snr_per_rb,
                               int8_t *snr) {
 
@@ -753,11 +749,12 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
     fd_cdm = 2;
   }
 
+  c16_t srs_ls_estimated_channel[frame_parms->ofdm_symbol_size*(1<<srs_pdu->num_symbols)];
+  uint32_t noise_power_per_rb[srs_pdu->bwp_size];
   int16_t ch_real[frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS];
   int16_t ch_imag[frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS];
   int16_t noise_real[frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS];
   int16_t noise_imag[frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS];
-
   int16_t ls_estimated[2];
 
   uint8_t mem_offset = ((16 - ((long)&srs_estimated_channel_freq[0][0][subcarrier_offset + nr_srs_info->k_0_p[0][0]])) & 0xF) >> 2; // >> 2 <=> /sizeof(int32_t)
@@ -772,7 +769,7 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
 
     for (int p_index = 0; p_index < N_ap; p_index++) {
 
-      memset(srs_ls_estimated_channel[ant][p_index], 0, frame_parms->ofdm_symbol_size*(1<<srs_pdu->num_symbols)*sizeof(int32_t));
+      memset(srs_ls_estimated_channel, 0, frame_parms->ofdm_symbol_size*(1<<srs_pdu->num_symbols)*sizeof(c16_t));
       memset(srs_est, 0, (frame_parms->ofdm_symbol_size*(1<<srs_pdu->num_symbols) + mem_offset)*sizeof(int32_t));
 
 #ifdef SRS_DEBUG
@@ -815,7 +812,8 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
           }
         }
 
-        srs_ls_estimated_channel[ant][p_index][subcarrier] = ls_estimated[0] + (((int32_t)ls_estimated[1] << 16) & 0xFFFF0000);
+        srs_ls_estimated_channel[subcarrier].r = ls_estimated[0];
+        srs_ls_estimated_channel[subcarrier].i = ls_estimated[1];
 
 #ifdef SRS_DEBUG
         int subcarrier_log = subcarrier-subcarrier_offset;
@@ -828,8 +826,8 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
         }
         LOG_I(NR_PHY,"(%4i) %6i\t%6i  |  %6i\t%6i  |  %6i\t%6i\n",
               subcarrier_log,
-              (int16_t)(srs_generated_signal[p_index][subcarrier] & 0xFFFF), (int16_t)((srs_generated_signal[p_index][subcarrier] >> 16) & 0xFFFF),
-              (int16_t)(srs_received_signal[ant][subcarrier] & 0xFFFF), (int16_t)((srs_received_signal[ant][subcarrier] >> 16) & 0xFFFF),
+              ((c16_t*)srs_generated_signal[p_index])[subcarrier].r, ((c16_t*)srs_generated_signal[p_index])[subcarrier].i,
+              ((c16_t*)srs_received_signal[ant])[subcarrier].r, ((c16_t*)srs_received_signal[ant])[subcarrier].i,
               ls_estimated[0], ls_estimated[1]);
 #endif
 
@@ -896,8 +894,8 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
       for (int k = 0; k < M_sc_b_SRS; k++) {
         ch_real[base_idx+k] = ((c16_t*)srs_estimated_channel_freq[ant][p_index])[subcarrier].r;
         ch_imag[base_idx+k] = ((c16_t*)srs_estimated_channel_freq[ant][p_index])[subcarrier].i;
-        noise_real[base_idx+k] = abs(((c16_t*)srs_ls_estimated_channel[ant][p_index])[subcarrier].r - ch_real[base_idx+k]);
-        noise_imag[base_idx+k] = abs(((c16_t*)srs_ls_estimated_channel[ant][p_index])[subcarrier].i - ch_imag[base_idx+k]);
+        noise_real[base_idx+k] = abs(srs_ls_estimated_channel[subcarrier].r - ch_real[base_idx+k]);
+        noise_imag[base_idx+k] = abs(srs_ls_estimated_channel[subcarrier].i - ch_imag[base_idx+k]);
         subcarrier += K_TC;
         if (subcarrier >= frame_parms->ofdm_symbol_size) {
           subcarrier=subcarrier-frame_parms->ofdm_symbol_size;
@@ -924,10 +922,10 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
 
         LOG_I(NR_PHY,"(%4i) %6i\t%6i  |  %6i\t%6i  |  %6i\t%6i\n",
               subcarrier_log,
-              (int16_t)(srs_ls_estimated_channel[ant][p_index][subcarrier]&0xFFFF),
-              (int16_t)((srs_ls_estimated_channel[ant][p_index][subcarrier]>>16)&0xFFFF),
-              (int16_t)(srs_estimated_channel_freq[ant][p_index][subcarrier]&0xFFFF),
-              (int16_t)((srs_estimated_channel_freq[ant][p_index][subcarrier]>>16)&0xFFFF),
+              srs_ls_estimated_channel[subcarrier].r,
+              srs_ls_estimated_channel[subcarrier].i,
+              ((c16_t*)srs_estimated_channel_freq[ant][p_index])[subcarrier].r,
+              ((c16_t*)srs_estimated_channel_freq[ant][p_index])[subcarrier].i,
               noise_real[base_idx+(k/K_TC)], noise_imag[base_idx+(k/K_TC)]);
 
         // Subcarrier increment
@@ -955,21 +953,21 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
   } // for (int ant = 0; ant < frame_parms->nb_antennas_rx; ant++)
 
   // Compute signal power
-  *signal_power = calc_power(ch_real,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS)
-                  + calc_power(ch_imag,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS);
+  uint32_t signal_power = calc_power(ch_real,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS)
+                          + calc_power(ch_imag,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS);
 
 #ifdef SRS_DEBUG
-  LOG_I(NR_PHY,"signal_power = %u\n", *signal_power);
+  LOG_I(NR_PHY,"signal_power = %u\n", signal_power);
 #endif
 
-  if (*signal_power == 0) {
+  if (signal_power == 0) {
     LOG_W(NR_PHY, "Received SRS signal power is 0\n");
     return -1;
   }
 
   // Compute noise power
 
-  const uint8_t signal_power_bits = log2_approx(*signal_power);
+  const uint8_t signal_power_bits = log2_approx(signal_power);
   const uint8_t factor_bits = signal_power_bits < 32 ? 32 - signal_power_bits : 0; // 32 due to input of dB_fixed(uint32_t x)
   const int32_t factor_dB = dB_fixed(1<<factor_bits);
 
@@ -1001,7 +999,7 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
 
     noise_power_per_rb[rb] = max(sum_re2 / n_noise_est - (sum_re / n_noise_est) * (sum_re / n_noise_est) +
                                  sum_im2 / n_noise_est - (sum_im / n_noise_est) * (sum_im / n_noise_est), 1);
-    snr_per_rb[rb] = dB_fixed((int32_t)((*signal_power<<factor_bits)/noise_power_per_rb[rb])) - factor_dB;
+    snr_per_rb[rb] = dB_fixed((int32_t)((signal_power<<factor_bits)/noise_power_per_rb[rb])) - factor_dB;
 
 #ifdef SRS_DEBUG
     LOG_I(NR_PHY,"noise_power_per_rb[%i] = %i, snr_per_rb[%i] = %i dB\n", rb, noise_power_per_rb[rb], rb, snr_per_rb[rb]);
@@ -1009,13 +1007,13 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
 
   } // for (int rb = 0; rb < m_SRS_b; rb++)
 
-  *noise_power = max(calc_power(noise_real,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS)
-                     + calc_power(noise_imag,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS), 1);
+  uint32_t noise_power = max(calc_power(noise_real,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS)
+                             + calc_power(noise_imag,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS), 1);
 
-    *snr = dB_fixed((int32_t)((*signal_power<<factor_bits)/(*noise_power))) - factor_dB;
+  *snr = dB_fixed((int32_t)((signal_power<<factor_bits)/(noise_power))) - factor_dB;
 
 #ifdef SRS_DEBUG
-  LOG_I(NR_PHY,"noise_power = %u, SNR = %i dB\n", *noise_power, *snr);
+  LOG_I(NR_PHY,"noise_power = %u, SNR = %i dB\n", noise_power, *snr);
 #endif
 
   return 0;
diff --git a/openair1/PHY/NR_ESTIMATION/nr_ul_estimation.h b/openair1/PHY/NR_ESTIMATION/nr_ul_estimation.h
index f00db212f755541a8429cb560b029d77fe228bff..3fcb81c961ee3db3da025666c861a4a3a8f9473c 100644
--- a/openair1/PHY/NR_ESTIMATION/nr_ul_estimation.h
+++ b/openair1/PHY/NR_ESTIMATION/nr_ul_estimation.h
@@ -73,13 +73,9 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB,
                               const nr_srs_info_t *nr_srs_info,
                               const int32_t **srs_generated_signal,
                               int32_t srs_received_signal[][gNB->frame_parms.ofdm_symbol_size*(1<<srs_pdu->num_symbols)],
-                              int32_t srs_ls_estimated_channel[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size*(1<<srs_pdu->num_symbols)],
                               int32_t srs_estimated_channel_freq[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size*(1<<srs_pdu->num_symbols)],
                               int32_t srs_estimated_channel_time[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size],
                               int32_t srs_estimated_channel_time_shifted[][1<<srs_pdu->num_ant_ports][gNB->frame_parms.ofdm_symbol_size],
-                              uint32_t *signal_power,
-                              uint32_t *noise_power_per_rb,
-                              uint32_t *noise_power,
                               int8_t *snr_per_rb,
                               int8_t *snr);
 #endif
diff --git a/openair1/PHY/NR_REFSIG/nr_dmrs_rx.c b/openair1/PHY/NR_REFSIG/nr_dmrs_rx.c
index 4f089562f6cb30c3a9bbb45a2faa626cd1ae767b..45078d73984e94dcf5fb3e75f4fa85a60c0e1ffd 100644
--- a/openair1/PHY/NR_REFSIG/nr_dmrs_rx.c
+++ b/openair1/PHY/NR_REFSIG/nr_dmrs_rx.c
@@ -137,29 +137,33 @@ int nr_pdsch_dmrs_rx(PHY_VARS_NR_UE *ue,
   if (config_type > 1)
     LOG_E(PHY,"Bad PDSCH DMRS config type %d\n", config_type);
 
-  if ((p>=1000) && (p<((config_type==NFAPI_NR_DMRS_TYPE1) ? 1008 : 1012))) {
-      if (ue->frame_parms.Ncp == NORMAL) {
-
-        for (int i=0; i<nb_pdsch_rb*((config_type==NFAPI_NR_DMRS_TYPE1) ? 6:4); i++) {
-
-        	w = (wf[p-1000][i&1])*(wt[p-1000][lp]);
-        	mod_table = (w==1) ? nr_rx_mod_table : nr_rx_nmod_table;
-
-        	idx = ((((nr_gold_pdsch[(i<<1)>>5])>>((i<<1)&0x1f))&1)<<1) ^ (((nr_gold_pdsch[((i<<1)+1)>>5])>>(((i<<1)+1)&0x1f))&1);
-    		((int16_t*)output)[i<<1] = mod_table[(NR_MOD_TABLE_QPSK_OFFSET + idx)<<1];
-    		((int16_t*)output)[(i<<1)+1] = mod_table[((NR_MOD_TABLE_QPSK_OFFSET + idx)<<1) + 1];
+  if ((p >= 1000) && (p < ((config_type == NFAPI_NR_DMRS_TYPE1) ? 1008 : 1012))) {
+    if (ue->frame_parms.Ncp == NORMAL) {
+      for (int i = 0; i < nb_pdsch_rb * ((config_type == NFAPI_NR_DMRS_TYPE1) ? 6 : 4); i++) {
+        w = (wf[p - 1000][i & 1]) * (wt[p - 1000][lp]);
+        mod_table = (w == 1) ? nr_rx_mod_table : nr_rx_nmod_table;
+
+        idx = ((((nr_gold_pdsch[(i << 1) >> 5]) >> ((i << 1) & 0x1f)) & 1) << 1) ^ (((nr_gold_pdsch[((i << 1) + 1) >> 5]) >> (((i << 1) + 1) & 0x1f)) & 1);
+        ((int16_t *)output)[i << 1] = mod_table[(NR_MOD_TABLE_QPSK_OFFSET + idx) << 1];
+        ((int16_t *)output)[(i << 1) + 1] = mod_table[((NR_MOD_TABLE_QPSK_OFFSET + idx) << 1) + 1];
 #ifdef DEBUG_PDSCH
-    		printf("nr_pdsch_dmrs_rx dmrs config type %d port %d nb_pdsch_rb %d\n", config_type, p, nb_pdsch_rb);
-    		printf("wf[%d] = %d wt[%d]= %d\n", i&1, wf[p-1000][i&1], lp, wt[p-1000][lp]);
-    		printf("i %d idx %d pdsch gold %u b0-b1 %d-%d mod_dmrs %d %d\n", i, idx, nr_gold_pdsch[(i<<1)>>5], (((nr_gold_pdsch[(i<<1)>>5])>>((i<<1)&0x1f))&1),
-    				(((nr_gold_pdsch[((i<<1)+1)>>5])>>(((i<<1)+1)&0x1f))&1), ((int16_t*)output)[i<<1], ((int16_t*)output)[(i<<1)+1]);
+        printf("nr_pdsch_dmrs_rx dmrs config type %d port %d nb_pdsch_rb %d\n", config_type, p, nb_pdsch_rb);
+        printf("wf[%d] = %d wt[%d]= %d\n", i & 1, wf[p - 1000][i & 1], lp, wt[p - 1000][lp]);
+        printf("i %d idx %d pdsch gold %u b0-b1 %d-%d mod_dmrs %d %d\n",
+               i,
+               idx,
+               nr_gold_pdsch[(i << 1) >> 5],
+               (((nr_gold_pdsch[(i << 1) >> 5]) >> ((i << 1) & 0x1f)) & 1),
+               (((nr_gold_pdsch[((i << 1) + 1) >> 5]) >> (((i << 1) + 1) & 0x1f)) & 1),
+               ((int16_t *)output)[i << 1],
+               ((int16_t *)output)[(i << 1) + 1]);
 #endif
-       	}
-      } else {
-        LOG_E(PHY,"extended cp not supported for PDSCH DMRS yet\n");
       }
+    } else {
+      LOG_E(PHY, "extended cp not supported for PDSCH DMRS yet\n");
+    }
   } else {
-    LOG_E(PHY,"Illegal p %d PDSCH DMRS port\n",p);
+    LOG_E(PHY, "Illegal p %d PDSCH DMRS port\n", p);
   }
 
   return(0);
@@ -167,7 +171,6 @@ int nr_pdsch_dmrs_rx(PHY_VARS_NR_UE *ue,
 
 
 int nr_pdcch_dmrs_rx(PHY_VARS_NR_UE *ue,
-                     uint8_t eNB_offset,
                      unsigned int Ns,
                      unsigned int *nr_gold_pdcch,
                      int32_t *output,
diff --git a/openair1/PHY/NR_REFSIG/nr_gold.c b/openair1/PHY/NR_REFSIG/nr_gold.c
index 9b4cc50953b430b698bd0d5a060ddef8c58d84c1..134b44fc41ea3f392e8a411707ca63e8ffde5d3f 100644
--- a/openair1/PHY/NR_REFSIG/nr_gold.c
+++ b/openair1/PHY/NR_REFSIG/nr_gold.c
@@ -123,3 +123,56 @@ void nr_gold_pusch(PHY_VARS_gNB* gNB, int nscid, uint32_t nid) {
     }
   }
 }
+
+
+void nr_init_prs(PHY_VARS_gNB* gNB)
+{
+  unsigned int x1, x2;
+  uint16_t Nid;
+
+  NR_DL_FRAME_PARMS *fp = &gNB->frame_parms;
+  gNB->nr_gold_prs = (uint32_t ****)malloc16(gNB->prs_vars.NumPRSResources*sizeof(uint32_t ***));
+  uint32_t ****prs = gNB->nr_gold_prs;
+  AssertFatal(prs!=NULL, "NR init: positioning reference signal malloc failed\n");
+  for (int rsc=0; rsc < gNB->prs_vars.NumPRSResources; rsc++) {
+    prs[rsc] = (uint32_t ***)malloc16(fp->slots_per_frame*sizeof(uint32_t **));
+    AssertFatal(prs[rsc]!=NULL, "NR init: positioning reference signal for rsc %d - malloc failed\n", rsc);
+
+    for (int slot=0; slot<fp->slots_per_frame; slot++) {
+      prs[rsc][slot] = (uint32_t **)malloc16(fp->symbols_per_slot*sizeof(uint32_t *));
+      AssertFatal(prs[rsc][slot]!=NULL, "NR init: positioning reference signal for slot %d - malloc failed\n", slot);
+
+      for (int symb=0; symb<fp->symbols_per_slot; symb++) {
+        prs[rsc][slot][symb] = (uint32_t *)malloc16(NR_MAX_PRS_INIT_LENGTH_DWORD*sizeof(uint32_t));
+        AssertFatal(prs[rsc][slot][symb]!=NULL, "NR init: positioning reference signal for rsc %d slot %d symbol %d - malloc failed\n", rsc, slot, symb);
+      }
+    }
+  }
+
+  uint8_t reset;
+  uint8_t slotNum, symNum, rsc_id;
+
+  for (rsc_id = 0; rsc_id < gNB->prs_vars.NumPRSResources; rsc_id++) {
+    Nid = gNB->prs_vars.prs_cfg[rsc_id].NPRSID; // seed value
+    LOG_I(PHY, "Initiaized NR-PRS sequence with PRS_ID %3d for resource %d\n", Nid, rsc_id);
+    for (slotNum = 0; slotNum < fp->slots_per_frame; slotNum++) {
+      for (symNum = 0; symNum < fp->symbols_per_slot ; symNum++) {
+        reset = 1;
+        // initial x2 for prs as ts138.211
+        uint32_t c_init1, c_init2, c_init3;
+        uint32_t pow22=1<<22;
+        uint32_t pow10=1<<10;
+        c_init1 = pow22*ceil(Nid/1024);
+        c_init2 = pow10*(slotNum+symNum+1)*(2*(Nid%1024)+1);
+        c_init3 = Nid%1024;
+        x2 = c_init1 + c_init2 + c_init3;
+
+        for (uint8_t n=0; n<NR_MAX_PRS_INIT_LENGTH_DWORD; n++) {
+          gNB->nr_gold_prs[rsc_id][slotNum][symNum][n] = lte_gold_generic(&x1, &x2, reset);      
+          reset = 0;
+          //printf("%d \n",gNB->nr_gold_prs[slotNum][symNum][n]); 
+        }
+      }
+    }
+  }
+}
diff --git a/openair1/PHY/NR_REFSIG/nr_gold_ue.c b/openair1/PHY/NR_REFSIG/nr_gold_ue.c
index 203d6c8625213a24d64e47bcaaff196af9a16158..4f3a7d76f006384842cdc78c9f1973e5b58dd3ef 100644
--- a/openair1/PHY/NR_REFSIG/nr_gold_ue.c
+++ b/openair1/PHY/NR_REFSIG/nr_gold_ue.c
@@ -124,3 +124,40 @@ void nr_init_pusch_dmrs(PHY_VARS_NR_UE* ue,
     }
   }
 }
+
+void init_nr_gold_prs(PHY_VARS_NR_UE* ue)
+{
+  unsigned int x1, x2;
+  uint16_t Nid;
+
+  NR_DL_FRAME_PARMS *fp = &ue->frame_parms;
+  uint8_t reset;
+  uint8_t slotNum, symNum, gnb, rsc;
+  
+  for(gnb = 0; gnb < ue->prs_active_gNBs; gnb++) {
+    for(rsc = 0; rsc < ue->prs_vars[gnb]->NumPRSResources; rsc++) {
+      Nid = ue->prs_vars[gnb]->prs_resource[rsc].prs_cfg.NPRSID; // seed value
+      LOG_I(PHY,"Initialised NR-PRS sequence with PRS_ID %3d for resource %d\n",Nid, rsc);
+      for (slotNum = 0; slotNum < fp->slots_per_frame; slotNum++) {
+        for (symNum = 0; symNum < fp->symbols_per_slot ; symNum++) {
+          reset = 1;
+          // initial x2 for prs as ts138.211
+          uint32_t c_init1, c_init2, c_init3;
+          uint32_t pow22=1<<22;
+          uint32_t pow10=1<<10;
+          c_init1 = pow22*ceil(Nid/1024);
+          c_init2 = pow10*(slotNum+symNum+1)*(2*(Nid%1024)+1);
+          c_init3 = Nid%1024;
+          x2 = c_init1 + c_init2 + c_init3;
+
+          for (uint8_t n=0; n<NR_MAX_PRS_INIT_LENGTH_DWORD; n++) {
+            ue->nr_gold_prs[gnb][rsc][slotNum][symNum][n] = lte_gold_generic(&x1, &x2, reset);      
+            reset = 0;
+            //printf("%d \n",gNB->nr_gold_prs[slotNum][symNum][n]); 
+	    
+          }
+        }
+      }
+    } // for rsc
+  } // for gnb
+}
diff --git a/openair1/PHY/NR_REFSIG/nr_refsig.h b/openair1/PHY/NR_REFSIG/nr_refsig.h
index 110ff321bd992c538e0d88f8bf6a44a59329c151..fd89f122ce909879a399cbc04d7fc4f7b4d2f210 100644
--- a/openair1/PHY/NR_REFSIG/nr_refsig.h
+++ b/openair1/PHY/NR_REFSIG/nr_refsig.h
@@ -32,6 +32,13 @@
 @param PHY_VARS_gNB* gNB structure provides configuration, frame parameters and the pointers to the 32 bits sequence storage tables
  */
 void nr_init_pbch_dmrs(PHY_VARS_gNB* gNB);
+
+/*
+This function generates NR Gold Sequence(ts 138.211) for the PRS.
+@param PHY_VARS_gNB* gNB structure provides configuration, frame parameters and the pointers to the 32 bits sequence storage tables
+*/
+void nr_init_prs(PHY_VARS_gNB* gNB);
+
 /*!\brief This function generates the NR Gold sequence (38-211, Sec 5.2.1) for the PDCCH DMRS.
 @param PHY_VARS_gNB* gNB structure provides configuration, frame parameters and the pointers to the 32 bits sequence storage tables
 @param Nid is used for the initialization of x2, Physical cell Id by default or upper layer configured pdcch_scrambling_ID
diff --git a/openair1/PHY/NR_REFSIG/refsig_defs_ue.h b/openair1/PHY/NR_REFSIG/refsig_defs_ue.h
index a6e57fb29826410ecc09241ca0f9794e14d2c522..bdd1f50069f19ee7e0047b10e45e6d1d66a59606 100644
--- a/openair1/PHY/NR_REFSIG/refsig_defs_ue.h
+++ b/openair1/PHY/NR_REFSIG/refsig_defs_ue.h
@@ -39,7 +39,6 @@ int nr_pbch_dmrs_rx(int dmrss,
 @param PHY_VARS_NR_UE* ue structure provides configuration, frame parameters and the pointers to the 32 bits sequence storage tables
  */
 int nr_pdcch_dmrs_rx(PHY_VARS_NR_UE *ue,
-                     uint8_t eNB_offset,
                      unsigned int Ns,
                      unsigned int *nr_gold_pdcch,
                      int32_t *output,
@@ -69,5 +68,6 @@ void nr_init_pusch_dmrs(PHY_VARS_NR_UE* ue,
                         uint8_t n_scid);
 
 void nr_init_csi_rs(const NR_DL_FRAME_PARMS *fp, uint32_t ***csi_rs, uint32_t Nid);
+void init_nr_gold_prs(PHY_VARS_NR_UE* ue);
 
 #endif
diff --git a/openair1/PHY/NR_TRANSPORT/nr_dci.c b/openair1/PHY/NR_TRANSPORT/nr_dci.c
index c8b5af6ea595ba24c30c0b7cb66bb07c79bdea05..a80389a2004d665ea01c7dc76fc8700e307533e1 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_dci.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_dci.c
@@ -77,17 +77,16 @@ void nr_generate_dci(PHY_VARS_gNB *gNB,
   uint8_t cset_start_symb, cset_nsymb;
   int k,l,k_prime,dci_idx, dmrs_idx;
 
-  // find coreset descriptor
-    
+  // fill reg list per symbol
+  int reg_list[MAX_DCI_CORESET][NR_MAX_PDCCH_AGG_LEVEL * NR_NB_REG_PER_CCE];
+  nr_fill_reg_list(reg_list, pdcch_pdu_rel15);
+  // compute rb_offset and n_prb based on frequency allocation
   int rb_offset;
   int n_rb;
-  // compute rb_offset and n_prb based on frequency allocation
-  nr_cce_t cce_list[MAX_DCI_CORESET][NR_MAX_PDCCH_AGG_LEVEL];
-  nr_fill_cce_list(cce_list, pdcch_pdu_rel15);
   get_coreset_rballoc(pdcch_pdu_rel15->FreqDomainResource,&n_rb,&rb_offset);
   cset_start_sc = frame_parms->first_carrier_offset + (pdcch_pdu_rel15->BWPStart + rb_offset) * NR_NB_SC_PER_RB;
 
-  int16_t mod_dmrs[pdcch_pdu_rel15->StartSymbolIndex+pdcch_pdu_rel15->DurationSymbols][(n_rb+rb_offset)*6] __attribute__((aligned(16))); // 3 for the max coreset duration
+  int16_t mod_dmrs[pdcch_pdu_rel15->StartSymbolIndex+pdcch_pdu_rel15->DurationSymbols][(((n_rb+rb_offset+pdcch_pdu_rel15->BWPStart)*6+15)>>4)<<4] __attribute__((aligned(16))); // 3 for the max coreset duration
 
   for (int d=0;d<pdcch_pdu_rel15->numDlDci;d++) {
     /*The coreset is initialised
@@ -109,7 +108,7 @@ void nr_generate_dci(PHY_VARS_gNB *gNB,
     LOG_D(PHY, "pdcch: Coreset rb_offset %d, nb_rb %d BWP Start %d\n",rb_offset,n_rb,pdcch_pdu_rel15->BWPStart);
     LOG_D(PHY, "pdcch: Coreset starting subcarrier %d on symbol %d (%d symbols)\n", cset_start_sc, cset_start_symb, cset_nsymb);
     // DMRS length is per OFDM symbol
-    uint32_t dmrs_length = n_rb*6; //2(QPSK)*3(per RB)*6(REG per CCE)
+    uint32_t dmrs_length = (n_rb+pdcch_pdu_rel15->BWPStart)*6; //2(QPSK)*3(per RB)*6(REG per CCE)
     uint32_t encoded_length = dci_pdu->AggregationLevel*108; //2(QPSK)*9(per RB)*6(REG per CCE)
     if (dci_pdu->RNTI != 0xFFFF)
       LOG_D(PHY, "DL_DCI : rb_offset %d, nb_rb %d, DMRS length per symbol %d\t DCI encoded length %d (precoder_granularity %d, reg_mapping %d), Scrambling_Id %d, ScramblingRNTI %x, PayloadSizeBits %d\n",
@@ -177,80 +176,65 @@ void nr_generate_dci(PHY_VARS_gNB *gNB,
     if (cset_start_sc >= frame_parms->ofdm_symbol_size)
       cset_start_sc -= frame_parms->ofdm_symbol_size;
 
-    // Get cce_list indices by reg_idx in ascending order
-    int reg_list_index = 0;
-    int N_regs = n_rb*pdcch_pdu_rel15->DurationSymbols; // nb of REGs per coreset
-    int N_cces = N_regs / NR_NB_REG_PER_CCE; // nb of cces in coreset
-    int reg_list_order[NR_MAX_PDCCH_AGG_LEVEL] = {};
-    for (int p = 0; p < N_cces; p++) {
-      for(int p2 = 0; p2 < dci_pdu->AggregationLevel; p2++) {
-        if(cce_list[d][p2].reg_list[0].reg_idx == p * NR_NB_REG_PER_CCE) {
-          reg_list_order[reg_list_index] = p2;
-          reg_list_index++;
-          break;
-        }
-      }
-    }
-
+    int num_regs = dci_pdu->AggregationLevel * NR_NB_REG_PER_CCE / pdcch_pdu_rel15->DurationSymbols;
     /*Mapping the encoded DCI along with the DMRS */
     for(int symbol_idx = 0; symbol_idx < pdcch_pdu_rel15->DurationSymbols; symbol_idx++) {
-      for (int cce_count = 0; cce_count < dci_pdu->AggregationLevel; cce_count++) {
-
-        int8_t cce_idx = reg_list_order[cce_count];
+      // allocating rbs per symbol
+      for (int reg_count = 0; reg_count < num_regs; reg_count++) {
+        k = cset_start_sc + reg_list[d][reg_count] * NR_NB_SC_PER_RB;
+        LOG_D(PHY, "REG %d k %d\n", reg_list[d][reg_count], k);
+        if (k >= frame_parms->ofdm_symbol_size)
+          k -= frame_parms->ofdm_symbol_size;
 
-        for (int reg_in_cce_idx = symbol_idx; reg_in_cce_idx < NR_NB_REG_PER_CCE; reg_in_cce_idx+=pdcch_pdu_rel15->DurationSymbols) {
+        l = cset_start_symb + symbol_idx;
 
-          k = cset_start_sc + cce_list[d][cce_idx].reg_list[reg_in_cce_idx].start_sc_idx;
-          LOG_D(PHY,"CCE %d REG %d k %d\n",cce_idx,reg_in_cce_idx,k);
-          if (k >= frame_parms->ofdm_symbol_size)
-            k -= frame_parms->ofdm_symbol_size;
-
-          l = cset_start_symb + symbol_idx;
-
-          // dmrs index depends on reference point for k according to 38.211 7.4.1.3.2
-          int eff_reg_idx = cce_list[d][cce_idx].reg_list[reg_in_cce_idx].reg_idx/pdcch_pdu_rel15->DurationSymbols;
-          if (pdcch_pdu_rel15->CoreSetType == NFAPI_NR_CSET_CONFIG_PDCCH_CONFIG)
-            dmrs_idx = eff_reg_idx * 3;
-          else
-            dmrs_idx = (eff_reg_idx + rb_offset) * 3;
+        // dmrs index depends on reference point for k according to 38.211 7.4.1.3.2
+        if (pdcch_pdu_rel15->CoreSetType == NFAPI_NR_CSET_CONFIG_PDCCH_CONFIG)
+          dmrs_idx = (reg_list[d][reg_count] + pdcch_pdu_rel15->BWPStart) * 3;
+        else
+          dmrs_idx = (reg_list[d][reg_count] + rb_offset) * 3;
 
-          k_prime = 0;
+        k_prime = 0;
 
-          for (int m = 0; m < NR_NB_SC_PER_RB; m++) {
-            if (m == (k_prime << 2) + 1) { // DMRS if not already mapped
-              ((int16_t *) txdataF)[(l * frame_parms->ofdm_symbol_size + k) << 1] =
-                  (amp * mod_dmrs[l][dmrs_idx << 1]) >> 15;
-              ((int16_t *) txdataF)[((l * frame_parms->ofdm_symbol_size + k) << 1) + 1] =
-                  (amp * mod_dmrs[l][(dmrs_idx << 1) + 1]) >> 15;
+        for (int m = 0; m < NR_NB_SC_PER_RB; m++) {
+          if (m == (k_prime << 2) + 1) { // DMRS if not already mapped
+            ((int16_t *)txdataF)[(l * frame_parms->ofdm_symbol_size + k) << 1] = (amp * mod_dmrs[l][dmrs_idx << 1]) >> 15;
+            ((int16_t *)txdataF)[((l * frame_parms->ofdm_symbol_size + k) << 1) + 1] = (amp * mod_dmrs[l][(dmrs_idx << 1) + 1]) >> 15;
 
 #ifdef DEBUG_PDCCH_DMRS
-              LOG_D(PHY,"PDCCH DMRS %d: l %d position %d => (%d,%d)\n",dmrs_idx,l,k,((int16_t *)txdataF)[(l*frame_parms->ofdm_symbol_size + k)<<1],
-               ((int16_t *)txdataF)[((l*frame_parms->ofdm_symbol_size + k)<<1)+1]);
+            LOG_I(PHY,
+                  "PDCCH DMRS %d: l %d position %d => (%d,%d)\n",
+                  dmrs_idx,
+                  l,
+                  k,
+                  ((int16_t *)txdataF)[(l * frame_parms->ofdm_symbol_size + k) << 1],
+                  ((int16_t *)txdataF)[((l * frame_parms->ofdm_symbol_size + k) << 1) + 1]);
 #endif
 
-              dmrs_idx++;
-              k_prime++;
+            dmrs_idx++;
+            k_prime++;
 
-            } else { // DCI payload
-              ((int16_t *) txdataF)[(l * frame_parms->ofdm_symbol_size + k) << 1] = (amp * mod_dci[dci_idx << 1]) >> 15;
-              ((int16_t *) txdataF)[((l * frame_parms->ofdm_symbol_size + k) << 1) + 1] =
-                  (amp * mod_dci[(dci_idx << 1) + 1]) >> 15;
+          } else { // DCI payload
+            ((int16_t *)txdataF)[(l * frame_parms->ofdm_symbol_size + k) << 1] = (amp * mod_dci[dci_idx << 1]) >> 15;
+            ((int16_t *)txdataF)[((l * frame_parms->ofdm_symbol_size + k) << 1) + 1] = (amp * mod_dci[(dci_idx << 1) + 1]) >> 15;
 #ifdef DEBUG_DCI
-              LOG_D(PHY,"PDCCH: l %d position %d => (%d,%d)\n",l,k,((int16_t *)txdataF)[(l*frame_parms->ofdm_symbol_size + k)<<1],
-               ((int16_t *)txdataF)[((l*frame_parms->ofdm_symbol_size + k)<<1)+1]);
+            LOG_I(PHY,
+                  "PDCCH: l %d position %d => (%d,%d)\n",
+                  l,
+                  k,
+                  ((int16_t *)txdataF)[(l * frame_parms->ofdm_symbol_size + k) << 1],
+                  ((int16_t *)txdataF)[((l * frame_parms->ofdm_symbol_size + k) << 1) + 1]);
 #endif
 
-              dci_idx++;
-            }
+            dci_idx++;
+          }
 
-            k++;
+          k++;
 
-            if (k >= frame_parms->ofdm_symbol_size)
-              k -= frame_parms->ofdm_symbol_size;
-
-          } // m
-        } // reg_in_cce_idx
-      } // cce_count
+          if (k >= frame_parms->ofdm_symbol_size)
+            k -= frame_parms->ofdm_symbol_size;
+        } // m
+      } // reg_count
     } // symbol_idx
 
     LOG_D(PHY,
diff --git a/openair1/PHY/NR_TRANSPORT/nr_dci.h b/openair1/PHY/NR_TRANSPORT/nr_dci.h
index 0bf6932bb50685c622236c2b4dfdd095f1ae0e49..c36d672d18320d8fde5ef5c1e7bb8aa0c180dd8a 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_dci.h
+++ b/openair1/PHY/NR_TRANSPORT/nr_dci.h
@@ -55,6 +55,6 @@ void nr_fill_ul_dci(PHY_VARS_gNB *gNB,
 		    int slot,
 		    nfapi_nr_ul_dci_request_pdus_t *pdcch_pdu);
 
-void nr_fill_cce_list(nr_cce_t cce_list[MAX_DCI_CORESET][NR_MAX_PDCCH_AGG_LEVEL], nfapi_nr_dl_tti_pdcch_pdu_rel15_t *pdcch_pdu_rel15);
+void nr_fill_reg_list(int cce_list[MAX_DCI_CORESET][NR_MAX_PDCCH_AGG_LEVEL * NR_NB_REG_PER_CCE], nfapi_nr_dl_tti_pdcch_pdu_rel15_t *pdcch_pdu_rel15);
 
 #endif //__PHY_NR_TRANSPORT_DCI__H
diff --git a/openair1/PHY/NR_TRANSPORT/nr_dci_tools.c b/openair1/PHY/NR_TRANSPORT/nr_dci_tools.c
index 3d166fd87dd169ad03c1c64bd32dc02b98a96444..2558861e78638c117821b1db78f2aaea51a29ce8 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_dci_tools.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_dci_tools.c
@@ -37,12 +37,13 @@
 
 #include "nr_dlsch.h"
 
+int compfunc(const void *a, const void *b)
+{
+  return (*(int *)a - *(int *)b);
+}
 
-void nr_fill_cce_list(nr_cce_t cce_list[MAX_DCI_CORESET][NR_MAX_PDCCH_AGG_LEVEL], nfapi_nr_dl_tti_pdcch_pdu_rel15_t *pdcch_pdu_rel15) {
-
-  nr_cce_t* cce;
-  nr_reg_t* reg;
-
+void nr_fill_reg_list(int reg_list[MAX_DCI_CORESET][NR_MAX_PDCCH_AGG_LEVEL * NR_NB_REG_PER_CCE], nfapi_nr_dl_tti_pdcch_pdu_rel15_t *pdcch_pdu_rel15)
+{
   int bsize = pdcch_pdu_rel15->RegBundleSize;
   int R = pdcch_pdu_rel15->InterleaverSize;
   int n_shift = pdcch_pdu_rel15->ShiftIndex;
@@ -74,25 +75,25 @@ void nr_fill_cce_list(nr_cce_t cce_list[MAX_DCI_CORESET][NR_MAX_PDCCH_AGG_LEVEL]
     if (pdcch_pdu_rel15->dci_pdu[d].RNTI != 0xFFFF)
       LOG_D(PHY, "CCE list generation for candidate %d: bundle size %d ilv size %d CceIndex %d\n", d, bsize, R, pdcch_pdu_rel15->dci_pdu[d].CceIndex);
 
+    int list_idx = 0;
     for (uint8_t cce_idx=0; cce_idx<L; cce_idx++) {
-      cce = &cce_list[d][cce_idx];
-      cce->cce_idx = pdcch_pdu_rel15->dci_pdu[d].CceIndex + cce_idx;
-      LOG_D(PHY, "cce_idx %d\n", cce->cce_idx);
-
-      uint8_t j = cce->cce_idx;
+      int cce = pdcch_pdu_rel15->dci_pdu[d].CceIndex + cce_idx;
+      LOG_D(PHY, "cce_idx %d\n", cce);
       for (uint8_t bundle_idx=0; bundle_idx<NR_NB_REG_PER_CCE/bsize; bundle_idx++) {
-        uint8_t k = 6*j/bsize + bundle_idx;
+        uint8_t k = 6 * cce / bsize + bundle_idx;
         int f = cce_to_reg_interleaving(R, k, n_shift, C, bsize, N_regs);
-
-	for (uint8_t reg_idx=0; reg_idx<bsize; reg_idx++) {
-	  reg = &cce->reg_list[bundle_idx*bsize+reg_idx];
-	  reg->reg_idx = bsize*f + reg_idx;
-	  reg->start_sc_idx = (reg->reg_idx/dur) * NR_NB_SC_PER_RB;
-	  reg->symb_idx = reg->reg_idx%dur;
-	  LOG_D(PHY, "reg %d symbol %d start subcarrier %d\n", reg->reg_idx, reg->symb_idx, reg->start_sc_idx);
-	}
+        LOG_D(PHY, "Bundle index %d: f(%d) = %d\n", bundle_idx, k, f);
+        // reg_list contains the regs to be allocated per symbol
+        // the same rbs are allocated in each symbol
+        for (uint8_t reg_idx = 0; reg_idx < bsize / dur; reg_idx++) {
+          reg_list[d][list_idx] = f * bsize / dur + reg_idx;
+          LOG_D(PHY, "rb %d nb of symbols per rb %d start subcarrier %d\n", reg_list[d][list_idx], dur, reg_list[d][list_idx] * NR_NB_SC_PER_RB);
+          list_idx++;
+        }
       }
     }
+    // sorting the elements of the list (smaller goes first)
+    qsort(reg_list[d], L * NR_NB_REG_PER_CCE / dur, sizeof(int), compfunc);
   }
 }
 
diff --git a/openair1/PHY/NR_TRANSPORT/nr_prs.c b/openair1/PHY/NR_TRANSPORT/nr_prs.c
new file mode 100644
index 0000000000000000000000000000000000000000..2167990abfc6c0d96110a54af7bf41577bb9e684
--- /dev/null
+++ b/openair1/PHY/NR_TRANSPORT/nr_prs.c
@@ -0,0 +1,77 @@
+#include "PHY/defs_gNB.h"
+#include "PHY/NR_TRANSPORT/nr_transport_proto.h"
+#include "PHY/LTE_REFSIG/lte_refsig.h"
+#include "PHY/NR_REFSIG/nr_refsig.h"
+#include "PHY/sse_intrin.h"
+
+//#define DEBUG_PRS_MOD
+//#define DEBUG_PRS_MAP
+
+extern short nr_qpsk_mod_table[8];
+
+int nr_generate_prs(uint32_t **nr_gold_prs,
+                          int32_t *txdataF,
+                          int16_t amp,
+                          prs_config_t *prs_cfg,
+                          nfapi_nr_config_request_scf_t *config,
+                          NR_DL_FRAME_PARMS *frame_parms)
+{
+  
+  int k_prime = 0, k = 0, idx;
+  int16_t mod_prs[NR_MAX_PRS_LENGTH<<1];
+  int16_t k_prime_table[K_PRIME_TABLE_ROW_SIZE][K_PRIME_TABLE_COL_SIZE] = PRS_K_PRIME_TABLE;
+
+  // PRS resource mapping with combsize=k which means PRS symbols exist in every k-th subcarrier in frequency domain
+  // According to ts138.211 sec.7.4.1.7.2
+  for (int l = prs_cfg->SymbolStart; l < prs_cfg->SymbolStart + prs_cfg->NumPRSSymbols; l++) {
+
+    int symInd = l-prs_cfg->SymbolStart;
+    if (prs_cfg->CombSize == 2) {
+      k_prime = k_prime_table[0][symInd];
+    }
+    else if (prs_cfg->CombSize == 4){
+      k_prime = k_prime_table[1][symInd];
+    }
+    else if (prs_cfg->CombSize == 6){
+      k_prime = k_prime_table[2][symInd];
+    }
+    else if (prs_cfg->CombSize == 12){
+      k_prime = k_prime_table[3][symInd];
+    }
+    
+    k = (prs_cfg->REOffset+k_prime) % prs_cfg->CombSize + prs_cfg->RBOffset*12 + frame_parms->first_carrier_offset;
+    
+    // QPSK modulation
+    for (int m = 0; m < (12/prs_cfg->CombSize) * prs_cfg->NumRB; m++) {
+      idx = (((nr_gold_prs[l][(m<<1)>>5])>>((m<<1)&0x1f))&3);
+      mod_prs[m<<1] = nr_qpsk_mod_table[idx<<1];
+      mod_prs[(m<<1)+1] = nr_qpsk_mod_table[(idx<<1) + 1];
+      
+#ifdef DEBUG_PRS_MOD
+      LOG_D("m %d idx %d gold seq %d mod_prs %d %d\n", m, idx, nr_gold_prs[l][(m<<1)>>5], mod_prs[m<<1], mod_prs[(m<<1)+1]);
+#endif
+      
+#ifdef DEBUG_PRS_MAP
+      LOG_D("m %d at k %d of l %d reIdx %d\n", m, k, l, (l*frame_parms->ofdm_symbol_size + k)<<1);
+#endif
+      
+      ((int16_t *)txdataF)[(l*frame_parms->ofdm_symbol_size + k)<<1]       = (amp * mod_prs[m<<1]) >> 15;
+      ((int16_t *)txdataF)[((l*frame_parms->ofdm_symbol_size + k)<<1) + 1] = (amp * mod_prs[(m<<1) + 1]) >> 15;
+    
+#ifdef DEBUG_PRS_MAP
+      LOG_D("(%d,%d)\n",
+      ((int16_t *)txdataF)[(l*frame_parms->ofdm_symbol_size + k)<<1],
+      ((int16_t *)txdataF)[((l*frame_parms->ofdm_symbol_size + k)<<1)+1]);
+#endif
+
+      k = k +  prs_cfg->CombSize;
+    
+      if (k >= frame_parms->ofdm_symbol_size)
+        k-=frame_parms->ofdm_symbol_size;
+      }
+  }
+#ifdef DEBUG_PRS_MAP
+  LOG_M("nr_prs.m", "prs",(int16_t *)&txdataF[prs_cfg->SymbolStart*frame_parms->ofdm_symbol_size],prs_cfg->NumPRSSymbols*frame_parms->ofdm_symbol_size, 1, 1);
+#endif
+  return 0;
+}
diff --git a/openair1/PHY/NR_TRANSPORT/nr_scrambling.c b/openair1/PHY/NR_TRANSPORT/nr_scrambling.c
index 8a49a7ffe0d1f3b4920e77c218d1f77e3a0db6e1..202e0580149b6cb470303c69dacef06e491be44e 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_scrambling.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_scrambling.c
@@ -34,45 +34,14 @@ void nr_codeword_scrambling(uint8_t *in,
   uint32_t x2 = (n_RNTI<<15) + (q<<14) + Nid;
   uint32_t s = 0;
 
-#if defined(__AVX2__)
   s=lte_gold_generic(&x1, &x2, 1);
   for (int i=0; i<((size>>5)+((size&0x1f) > 0 ? 1 : 0)); i++) {
     __m256i c = ((__m256i*)in)[i];
-    uint32_t in32 = _mm256_movemask_epi8(_mm256_slli_epi16(c,7));
+    uint32_t in32 = simde_mm256_movemask_epi8(simde_mm256_slli_epi16(c,7));
     out[i]=(in32^s);
     LOG_D(PHY,"in[%d] %x => %x\n",i,in32,out[i]);
     s=lte_gold_generic(&x1, &x2, 0);
   }
-#elif defined(__SSE4__)
-  s=lte_gold_generic(&x1, &x2, 1);
-  __m128i *in128;
-  for (int i=0; i<((size>>5)+((size&0x1f) > 0 ? 1 : 0)); i++) {
-    in128=&((__m128i*)in)[i<<1];
-    uint32_t in32;
-    ((uint16_t*)&in32)[0] = _mm_movemask_epi8(_mm_slli_epi16(in128[0],7));
-    ((uint16_t*)&in32)[1] = _mm_movemask_epi8(_mm_slli_epi16(in128[1],7));
-    out[i]=(in32^s);
-    LOG_D(PHY,"in[%d] %x => %x\n",i,in32,out[i]);
-    s=lte_gold_generic(&x1, &x2, 0);
-  }
-//#elsif defined(__arm__) || defined(__aarch64)
-#else
-  uint8_t reset = 1;
-
-  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_gNB_PDSCH_CODEWORD_SCRAMBLING, 1);
-  for (int i = 0; i < size; i++) {
-    const uint8_t b_idx = i&0x1f;
-    if (b_idx==0) {
-      s = lte_gold_generic(&x1, &x2, reset);
-      reset = 0;
-      if (i)
-        out++;
-    }
-    *out ^= (((in[i])&1) ^ ((s>>b_idx)&1))<<b_idx;
-    //printf("i %d b_idx %d in %d s 0x%08x out 0x%08x\n", i, b_idx, in[i], s, *out);
-  }
-  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_gNB_PDSCH_CODEWORD_SCRAMBLING, 0);
-#endif
 }
 
 void nr_codeword_unscrambling(int16_t* llr, uint32_t size, uint8_t q, uint32_t Nid, uint32_t n_RNTI)
diff --git a/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h b/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
index e5b043c395acf9474c4358baae59920442fc4f53..ec65ee4a004f48ab3819c6dacbecd29aa994a59d 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
+++ b/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
@@ -38,6 +38,14 @@
 
 #define NR_PBCH_PDU_BITS 24
 
+
+int nr_generate_prs(uint32_t **nr_gold_prs,
+                    int32_t *txdataF,
+                    int16_t amp,
+                    prs_config_t *prs_cfg,
+                    nfapi_nr_config_request_scf_t *config,
+                    NR_DL_FRAME_PARMS *frame_parms);
+
 /*!
 \fn int nr_generate_pss
 \brief Generation of the NR PSS
diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
index e9038f25d87f037255f3b766be9feed58e1aec91..4118145f64da4c2fa12b08899409aa26a647f93c 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_decoding.c
@@ -234,6 +234,12 @@ void nr_processULSegment(void* arg) {
   short* ulsch_llr = rdata->ulsch_llr;
   int max_ldpc_iterations = p_decoderParms->numMaxIter;
   int8_t llrProcBuf[OAI_UL_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
+  p_decoderParms->R = nr_get_R_ldpc_decoder(rv_index,
+                                            E,
+                                            p_decoderParms->BG,
+                                            p_decoderParms->Z,
+                                            &ulsch_harq->llrLen,
+                                            ulsch_harq->round);
 
   int16_t  z [68*384 + 16] __attribute__ ((aligned(16)));
   int8_t   l [68*384 + 16] __attribute__ ((aligned(16)));
@@ -465,26 +471,8 @@ uint32_t nr_ulsch_decoding(PHY_VARS_gNB *phy_vars_gNB,
   int kc;
   if (p_decParams->BG == 2){
     kc = 52;
-    if (Coderate < 0.3333) {
-      p_decParams->R = 15;
-    }
-    else if (Coderate <0.6667) {
-      p_decParams->R = 13;
-    }
-    else {
-      p_decParams->R = 23;
-    }
   } else {
     kc = 68;
-    if (Coderate < 0.6667) {
-      p_decParams->R = 13;
-    }
-    else if (Coderate <0.8889) {
-      p_decParams->R = 23;
-    }
-    else {
-      p_decParams->R = 89;
-    }
   }
   
   NR_gNB_SCH_STATS_t *stats=NULL;
diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
index ac4c6f35e07c2447ee77674fe26f5158ef3bb80d..f1c9ff70d8aecb890c32fef0c9dd6a785ef20af1 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
@@ -21,7 +21,7 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
 #if defined(__x86_64__) || defined(__i386__)
   __m128i idft_in128[1][3240], idft_out128[1][3240];
   __m128i norm128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t idft_in128[1][3240], idft_out128[1][3240];
   int16x8_t norm128;
 #endif
@@ -36,7 +36,7 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
     for (i = 0; i < (Msc_PUSCH>>2); i++) {
 #if defined(__x86_64__)||defined(__i386__)
       *&(((__m128i*)z)[i]) = _mm_sign_epi16(*&(((__m128i*)z)[i]), *(__m128i*)&conjugate2[0]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       *&(((int16x8_t*)z)[i]) = vmulq_s16(*&(((int16x8_t*)z)[i]), *(int16x8_t*)&conjugate2[0]);
 #endif
     }
@@ -50,14 +50,14 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
 
 #if defined(__x86_64__)||defined(__i386__)
       norm128 = _mm_set1_epi16(9459);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       norm128 = vdupq_n_s16(9459);
 #endif
 
       for (i = 0; i < 12; i++) {
 #if defined(__x86_64__)||defined(__i386__)
         ((__m128i*)idft_out0)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)idft_out0)[i], norm128), 1);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
         ((int16x8_t*)idft_out0)[i] = vqdmulhq_s16(((int16x8_t*)idft_out0)[i], norm128);
 #endif
       }
@@ -288,7 +288,7 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
     for (i = 0; i < (Msc_PUSCH>>2); i++) {
 #if defined(__x86_64__) || defined(__i386__)
       ((__m128i*)z)[i] = _mm_sign_epi16(((__m128i*)z)[i], *(__m128i*)&conjugate2[0]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       *&(((int16x8_t*)z)[i]) = vmulq_s16(*&(((int16x8_t*)z)[i]), *(int16x8_t*)&conjugate2[0]);
 #endif
     }
@@ -328,11 +328,7 @@ void nr_ulsch_extract_rbs(int32_t **rxdataF,
   start_re = (frame_parms->first_carrier_offset + (pusch_pdu->rb_start + pusch_pdu->bwp_start) * NR_NB_SC_PER_RB)%frame_parms->ofdm_symbol_size;
   nb_re_pusch = NR_NB_SC_PER_RB * pusch_pdu->rb_size;
 
-#ifdef __AVX2__
   int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
-#else
-  int nb_re_pusch2 = nb_re_pusch;
-#endif
 
   for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
 
@@ -426,11 +422,7 @@ void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
 
   ch_amp128 = _mm_set1_epi16(ch_amp); // Q3.13
 
-#ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
-#else
-  int off = 0;
-#endif
 
   for (aatx = 0; aatx < nrOfLayers; aatx++) {
     for (aarx=0; aarx < frame_parms->nb_antennas_rx; aarx++) {
@@ -472,11 +464,7 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
   
   uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
 
-#ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
-#else
-  int off = 0;
-#endif
 
   for (aatx = 0; aatx < nrOfLayers; aatx++) {
     for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
@@ -502,7 +490,7 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
   _mm_empty();
   _m_empty();
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
   short rb;
   unsigned char aatx, aarx, nre = 12, symbol_mod;
@@ -578,11 +566,7 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
                                    unsigned short nb_rb,
                                    unsigned char output_shift) {
 
-#ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
-#else
-  int off = 0;
-#endif
 
 #ifdef DEBUG_CH_COMP
   int16_t *rxF, *ul_ch;
@@ -899,7 +883,7 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
   _mm_empty();
   _m_empty();
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
   unsigned short rb;
   unsigned char aatx,aarx,symbol_mod,is_dmrs_symbol=0;
@@ -1141,18 +1125,14 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
   int n_rx = frame_parms->nb_antennas_rx;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128[2],*ul_ch_mag128[2],*ul_ch_mag128b[2];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*ul_ch_mag128_0,*ul_ch_mag128_0b;
   int16x8_t *rxdataF_comp128_1,*ul_ch_mag128_1,*ul_ch_mag128_1b;
 #endif
   int32_t i;
   uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
 
-#ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
-#else
-  int off = 0;
-#endif
 
   if (n_rx > 1) {
     #if defined(__x86_64__) || defined(__i386__)
@@ -1177,7 +1157,7 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
         }
       }
     }
-    #elif defined(__arm__)
+    #elif defined(__arm__) || defined(__aarch64__)
     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
     ul_ch_mag128_0      = (int16x8_t *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
@@ -1548,11 +1528,7 @@ uint8_t nr_ulsch_zero_forcing_rx_2layers(int **rxdataF_comp,
   int *ch20, *ch30, *ch21, *ch31;
   uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
 
-  #ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
-  #else
-  int off = 0;
-  #endif
 
   /* we need at least alignment to 16 bytes, let's put 32 to be sure
    * (maybe not necessary but doesn't hurt)
@@ -1976,11 +1952,7 @@ void nr_rx_pusch(PHY_VARS_gNB *gNB,
   }
   stop_meas(&gNB->ulsch_channel_estimation_stats);
 
-#ifdef __AVX2__
   int off = ((rel15_ul->rb_size&1) == 1)? 4:0;
-#else
-  int off = 0;
-#endif
   uint32_t rxdataF_ext_offset = 0;
 
   for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) {
@@ -2099,12 +2071,8 @@ void nr_rx_pusch(PHY_VARS_gNB *gNB,
       stop_meas(&gNB->ulsch_mrc_stats);
 
       if (rel15_ul->transform_precoding == transformPrecoder_enabled) {
-         #ifdef __AVX2__
         // For odd number of resource blocks need byte alignment to multiple of 8
         int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
-        #else
-        int nb_re_pusch2 = nb_re_pusch;
-        #endif
 
         // perform IDFT operation on the compensated rxdata if transform precoding is enabled
         nr_idft(&gNB->pusch_vars[ulsch_id]->rxdataF_comp[0][symbol * nb_re_pusch2], nb_re_pusch);
diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
index f6eefe846ac5e3de53c76f3c044039a2c8babb3f..42fc1820ac006c3f31b2247a1a5ac43c61990757 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
@@ -72,20 +72,12 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
 {
 
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
   __m256i *rxF = (__m256i*)rxdataF_comp;
   __m256i *ch_mag;
   __m256i llr256[2];
   register __m256i xmm0;
-#else
-  __m128i *rxF = (__m128i*)rxdataF_comp;
-  __m128i *ch_mag;
-  __m128i llr128[2];
-  register __m128i xmm0;
-#endif
   uint32_t *llr32;
-
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF = (int16x8_t*)&rxdataF_comp;
   int16x8_t *ch_mag;
   int16x8_t xmm0;
@@ -95,107 +87,65 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
 
   int i;
 
-#ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
-#else
-  int off = 0;
-#endif
-
 
 #if defined(__x86_64__) || defined(__i386__)
     llr32 = (uint32_t*)ulsch_llr;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr16 = (int16_t*)ulsch_llr;
 #endif
 
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
     ch_mag = (__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
-#else
-    ch_mag = (__m128i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t*)&ul_ch_mag[(symbol*nb_rb*12)];
 #endif
-
-#ifdef __AVX2__
   unsigned char len_mod8 = nb_re&7;
   nb_re >>= 3;  // length in quad words (4 REs)
   nb_re += (len_mod8 == 0 ? 0 : 1);
-#else
-  unsigned char len_mod4 = nb_re&3;
-  nb_re >>= 2;  // length in quad words (4 REs)
-  nb_re += (len_mod4 == 0 ? 0 : 1);
-#endif
 
   for (i=0; i<nb_re; i++) {
 #if defined(__x86_64__) || defined(__i386)
-#ifdef __AVX2__
-    xmm0 = _mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
-    xmm0 = _mm256_subs_epi16(ch_mag[i],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
+    xmm0 = simde_mm256_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
+    xmm0 = simde_mm256_subs_epi16(ch_mag[i],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
  
-    llr256[0] = _mm256_unpacklo_epi32(rxF[i],xmm0); // llr128[0] contains the llrs of the 1st,2nd,5th and 6th REs
-    llr256[1] = _mm256_unpackhi_epi32(rxF[i],xmm0); // llr128[1] contains the llrs of the 3rd, 4th, 7th and 8th REs
+    llr256[0] = simde_mm256_unpacklo_epi32(rxF[i],xmm0); // llr128[0] contains the llrs of the 1st,2nd,5th and 6th REs
+    llr256[1] = simde_mm256_unpackhi_epi32(rxF[i],xmm0); // llr128[1] contains the llrs of the 3rd, 4th, 7th and 8th REs
     
     // 1st RE
-    llr32[0] = _mm256_extract_epi32(llr256[0],0); // llr32[0] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[1] = _mm256_extract_epi32(llr256[0],1); // llr32[1] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[0] = simde_mm256_extract_epi32(llr256[0],0); // llr32[0] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[1] = simde_mm256_extract_epi32(llr256[0],1); // llr32[1] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     // 2nd RE
-    llr32[2] = _mm256_extract_epi32(llr256[0],2); // llr32[2] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[3] = _mm256_extract_epi32(llr256[0],3); // llr32[3] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[2] = simde_mm256_extract_epi32(llr256[0],2); // llr32[2] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[3] = simde_mm256_extract_epi32(llr256[0],3); // llr32[3] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     // 3rd RE
-    llr32[4] = _mm256_extract_epi32(llr256[1],0); // llr32[4] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[5] = _mm256_extract_epi32(llr256[1],1); // llr32[5] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[4] = simde_mm256_extract_epi32(llr256[1],0); // llr32[4] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[5] = simde_mm256_extract_epi32(llr256[1],1); // llr32[5] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     // 4th RE
-    llr32[6] = _mm256_extract_epi32(llr256[1],2); // llr32[6] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[7] = _mm256_extract_epi32(llr256[1],3); // llr32[7] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[6] = simde_mm256_extract_epi32(llr256[1],2); // llr32[6] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[7] = simde_mm256_extract_epi32(llr256[1],3); // llr32[7] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     // 5th RE
-    llr32[8] = _mm256_extract_epi32(llr256[0],4); // llr32[8] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[9] = _mm256_extract_epi32(llr256[0],5); // llr32[9] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[8] = simde_mm256_extract_epi32(llr256[0],4); // llr32[8] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[9] = simde_mm256_extract_epi32(llr256[0],5); // llr32[9] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     // 6th RE
-    llr32[10] = _mm256_extract_epi32(llr256[0],6); // llr32[10] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[11] = _mm256_extract_epi32(llr256[0],7); // llr32[11] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[10] = simde_mm256_extract_epi32(llr256[0],6); // llr32[10] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[11] = simde_mm256_extract_epi32(llr256[0],7); // llr32[11] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     // 7th RE
-    llr32[12] = _mm256_extract_epi32(llr256[1],4); // llr32[12] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[13] = _mm256_extract_epi32(llr256[1],5); // llr32[13] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[12] = simde_mm256_extract_epi32(llr256[1],4); // llr32[12] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[13] = simde_mm256_extract_epi32(llr256[1],5); // llr32[13] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     // 8th RE
-    llr32[14] = _mm256_extract_epi32(llr256[1],6); // llr32[14] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[15] = _mm256_extract_epi32(llr256[1],7); // llr32[15] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
+    llr32[14] = simde_mm256_extract_epi32(llr256[1],6); // llr32[14] low 16 bits-> y_R        , high 16 bits-> y_I
+    llr32[15] = simde_mm256_extract_epi32(llr256[1],7); // llr32[15] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
 
     llr32+=16;
-#else
-    xmm0 = _mm_abs_epi16(rxF[i]); // registers of even index in xmm0-> |y_R|, registers of odd index in xmm0-> |y_I|
-    xmm0 = _mm_subs_epi16(ch_mag[i],xmm0); // registers of even index in xmm0-> |y_R|-|h|^2, registers of odd index in xmm0-> |y_I|-|h|^2
-
-    llr128[0] = _mm_unpacklo_epi32(rxF[i],xmm0); // llr128[0] contains the llrs of the 1st and 2nd REs
-    llr128[1] = _mm_unpackhi_epi32(rxF[i],xmm0); // llr128[1] contains the llrs of the 3rd and 4th REs
-    
-    // 1st RE
-    llr32[0] = _mm_extract_epi32(llr128[0],0); // llr32[0] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[1] = _mm_extract_epi32(llr128[0],1); // llr32[1] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
-
-    // 2nd RE
-    llr32[2] = _mm_extract_epi32(llr128[0],2); // llr32[2] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[3] = _mm_extract_epi32(llr128[0],3); // llr32[3] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
-
-    // 3rd RE
-    llr32[4] = _mm_extract_epi32(llr128[1],0); // llr32[4] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[5] = _mm_extract_epi32(llr128[1],1); // llr32[5] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
-
-    // 4th RE
-    llr32[6] = _mm_extract_epi32(llr128[1],2); // llr32[6] low 16 bits-> y_R        , high 16 bits-> y_I
-    llr32[7] = _mm_extract_epi32(llr128[1],3); // llr32[7] low 16 bits-> |h|-|y_R|^2, high 16 bits-> |h|-|y_I|^2
-
-    llr32+=8;
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm0 = vabsq_s16(rxF[i]);
     xmm0 = vqsubq_s16((*(__m128i*)&ones[0]),xmm0);
 
@@ -238,24 +188,13 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
                         uint32_t nb_re,
                         uint8_t  symbol)
 {
-
-#ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
-#else
-  int off = 0;
-#endif
 
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
   __m256i *rxF = (__m256i*)rxdataF_comp;
   __m256i *ch_mag,*ch_magb;
   register __m256i xmm0,xmm1,xmm2;
-#else
-  __m128i *rxF = (__m128i*)rxdataF_comp;
-  __m128i *ch_mag,*ch_magb;
-  register __m128i xmm0,xmm1,xmm2;
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF = (int16x8_t*)&rxdataF_comp;
   int16x8_t *ch_mag,*ch_magb; // [hna] This should be uncommented once channel estimation is implemented
   int16x8_t xmm0,xmm1,xmm2;
@@ -264,43 +203,25 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
   int i;
 
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
   ch_mag = (__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
   ch_magb = (__m256i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
-#else
-  ch_mag = (__m128i*)&ul_ch_mag[(symbol*nb_rb*12)];
-  ch_magb = (__m128i*)&ul_ch_magb[(symbol*nb_rb*12)];
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t*)&ul_ch_mag[(symbol*nb_rb*12)];
   ch_magb = (int16x8_t*)&ul_ch_magb[(symbol*nb_rb*12)];
 #endif
 
-#ifdef __AVX2__
   int len_mod8 = nb_re&7;
   nb_re    = nb_re>>3;  // length in quad words (4 REs)
   nb_re   += ((len_mod8 == 0) ? 0 : 1);
-#else
-  int len_mod4 = nb_re&3;
-  nb_re    = nb_re>>2;  // length in quad words (4 REs)
-  nb_re   += ((len_mod4 == 0) ? 0 : 1);
-#endif
 
   for (i=0; i<nb_re; i++) {
     xmm0 = rxF[i];
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
-    xmm1 = _mm256_abs_epi16(xmm0);
-    xmm1 = _mm256_subs_epi16(ch_mag[i],xmm1);
-    xmm2 = _mm256_abs_epi16(xmm1);
-    xmm2 = _mm256_subs_epi16(ch_magb[i],xmm2);
-#else
-    xmm1 = _mm_abs_epi16(xmm0);
-    xmm1 = _mm_subs_epi16(ch_mag[i],xmm1);
-    xmm2 = _mm_abs_epi16(xmm1);
-    xmm2 = _mm_subs_epi16(ch_magb[i],xmm2);
-#endif
-#elif defined(__arm__)
+    xmm1 = simde_mm256_abs_epi16(xmm0);
+    xmm1 = simde_mm256_subs_epi16(ch_mag[i],xmm1);
+    xmm2 = simde_mm256_abs_epi16(xmm1);
+    xmm2 = simde_mm256_subs_epi16(ch_magb[i],xmm2);
+#elif defined(__arm__) || defined(__aarch64__)
     xmm1 = vabsq_s16(xmm0);
     xmm1 = vsubq_s16(ch_mag[i],xmm1);
     xmm2 = vabsq_s16(xmm1);
@@ -311,22 +232,13 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
     // 1st RE
     // ---------------------------------------
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
-    ulsch_llr[0] = _mm256_extract_epi16(xmm0,0);
-    ulsch_llr[1] = _mm256_extract_epi16(xmm0,1);
-    ulsch_llr[2] = _mm256_extract_epi16(xmm1,0);
-    ulsch_llr[3] = _mm256_extract_epi16(xmm1,1);
-    ulsch_llr[4] = _mm256_extract_epi16(xmm2,0);
-    ulsch_llr[5] = _mm256_extract_epi16(xmm2,1);
-#else
-    ulsch_llr[0] = _mm_extract_epi16(xmm0,0);
-    ulsch_llr[1] = _mm_extract_epi16(xmm0,1);
-    ulsch_llr[2] = _mm_extract_epi16(xmm1,0);
-    ulsch_llr[3] = _mm_extract_epi16(xmm1,1);
-    ulsch_llr[4] = _mm_extract_epi16(xmm2,0);
-    ulsch_llr[5] = _mm_extract_epi16(xmm2,1);
-#endif
-#elif defined(__arm__)
+    ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,0);
+    ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,1);
+    ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,0);
+    ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,1);
+    ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,0);
+    ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,1);
+#elif defined(__arm__) || defined(__aarch64__)
     ulsch_llr[0] = vgetq_lane_s16(xmm0,0);
     ulsch_llr[1] = vgetq_lane_s16(xmm0,1);
     ulsch_llr[2] = vgetq_lane_s16(xmm1,0);
@@ -342,22 +254,13 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
     // 2nd RE
     // ---------------------------------------
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
-    ulsch_llr[0] = _mm256_extract_epi16(xmm0,2);
-    ulsch_llr[1] = _mm256_extract_epi16(xmm0,3);
-    ulsch_llr[2] = _mm256_extract_epi16(xmm1,2);
-    ulsch_llr[3] = _mm256_extract_epi16(xmm1,3);
-    ulsch_llr[4] = _mm256_extract_epi16(xmm2,2);
-    ulsch_llr[5] = _mm256_extract_epi16(xmm2,3);
-#else
-    ulsch_llr[0] = _mm_extract_epi16(xmm0,2);
-    ulsch_llr[1] = _mm_extract_epi16(xmm0,3);
-    ulsch_llr[2] = _mm_extract_epi16(xmm1,2);
-    ulsch_llr[3] = _mm_extract_epi16(xmm1,3);
-    ulsch_llr[4] = _mm_extract_epi16(xmm2,2);
-    ulsch_llr[5] = _mm_extract_epi16(xmm2,3);
-#endif
-#elif defined(__arm__)
+    ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,2);
+    ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,3);
+    ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,2);
+    ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,3);
+    ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,2);
+    ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,3);
+#elif defined(__arm__) || defined(__aarch64__)
     ulsch_llr[2] = vgetq_lane_s16(xmm0,2);
     ulsch_llr[3] = vgetq_lane_s16(xmm0,3);
     ulsch_llr[2] = vgetq_lane_s16(xmm1,2);
@@ -373,22 +276,13 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
     // 3rd RE
     // ---------------------------------------
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
-    ulsch_llr[0] = _mm256_extract_epi16(xmm0,4);
-    ulsch_llr[1] = _mm256_extract_epi16(xmm0,5);
-    ulsch_llr[2] = _mm256_extract_epi16(xmm1,4);
-    ulsch_llr[3] = _mm256_extract_epi16(xmm1,5);
-    ulsch_llr[4] = _mm256_extract_epi16(xmm2,4);
-    ulsch_llr[5] = _mm256_extract_epi16(xmm2,5);
-#else
-    ulsch_llr[0] = _mm_extract_epi16(xmm0,4);
-    ulsch_llr[1] = _mm_extract_epi16(xmm0,5);
-    ulsch_llr[2] = _mm_extract_epi16(xmm1,4);
-    ulsch_llr[3] = _mm_extract_epi16(xmm1,5);
-    ulsch_llr[4] = _mm_extract_epi16(xmm2,4);
-    ulsch_llr[5] = _mm_extract_epi16(xmm2,5);
-#endif
-#elif defined(__arm__)
+    ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,4);
+    ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,5);
+    ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,4);
+    ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,5);
+    ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,4);
+    ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,5);
+#elif defined(__arm__) || defined(__aarch64__)
     ulsch_llr[0] = vgetq_lane_s16(xmm0,4);
     ulsch_llr[1] = vgetq_lane_s16(xmm0,5);
     ulsch_llr[2] = vgetq_lane_s16(xmm1,4);
@@ -404,22 +298,13 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
     // 4th RE
     // ---------------------------------------
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
-    ulsch_llr[0] = _mm256_extract_epi16(xmm0,6);
-    ulsch_llr[1] = _mm256_extract_epi16(xmm0,7);
-    ulsch_llr[2] = _mm256_extract_epi16(xmm1,6);
-    ulsch_llr[3] = _mm256_extract_epi16(xmm1,7);
-    ulsch_llr[4] = _mm256_extract_epi16(xmm2,6);
-    ulsch_llr[5] = _mm256_extract_epi16(xmm2,7);
-#else
-    ulsch_llr[0] = _mm_extract_epi16(xmm0,6);
-    ulsch_llr[1] = _mm_extract_epi16(xmm0,7);
-    ulsch_llr[2] = _mm_extract_epi16(xmm1,6);
-    ulsch_llr[3] = _mm_extract_epi16(xmm1,7);
-    ulsch_llr[4] = _mm_extract_epi16(xmm2,6);
-    ulsch_llr[5] = _mm_extract_epi16(xmm2,7);
-#endif
-#elif defined(__arm__)
+    ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,6);
+    ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,7);
+    ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,6);
+    ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,7);
+    ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,6);
+    ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,7);
+#elif defined(__arm__) || defined(__aarch64__)
     ulsch_llr[0] = vgetq_lane_s16(xmm0,6);
     ulsch_llr[1] = vgetq_lane_s16(xmm0,7);
     ulsch_llr[2] = vgetq_lane_s16(xmm1,6);
@@ -430,37 +315,35 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
     // ---------------------------------------
 
     ulsch_llr+=6;
-#ifdef __AVX2__
-    ulsch_llr[0] = _mm256_extract_epi16(xmm0,8);
-    ulsch_llr[1] = _mm256_extract_epi16(xmm0,9);
-    ulsch_llr[2] = _mm256_extract_epi16(xmm1,8);
-    ulsch_llr[3] = _mm256_extract_epi16(xmm1,9);
-    ulsch_llr[4] = _mm256_extract_epi16(xmm2,8);
-    ulsch_llr[5] = _mm256_extract_epi16(xmm2,9);
-
-    ulsch_llr[6] = _mm256_extract_epi16(xmm0,10);
-    ulsch_llr[7] = _mm256_extract_epi16(xmm0,11);
-    ulsch_llr[8] = _mm256_extract_epi16(xmm1,10);
-    ulsch_llr[9] = _mm256_extract_epi16(xmm1,11);
-    ulsch_llr[10] = _mm256_extract_epi16(xmm2,10);
-    ulsch_llr[11] = _mm256_extract_epi16(xmm2,11);
-
-    ulsch_llr[12] = _mm256_extract_epi16(xmm0,12);
-    ulsch_llr[13] = _mm256_extract_epi16(xmm0,13);
-    ulsch_llr[14] = _mm256_extract_epi16(xmm1,12);
-    ulsch_llr[15] = _mm256_extract_epi16(xmm1,13);
-    ulsch_llr[16] = _mm256_extract_epi16(xmm2,12);
-    ulsch_llr[17] = _mm256_extract_epi16(xmm2,13);
-
-    ulsch_llr[18] = _mm256_extract_epi16(xmm0,14);
-    ulsch_llr[19] = _mm256_extract_epi16(xmm0,15);
-    ulsch_llr[20] = _mm256_extract_epi16(xmm1,14);
-    ulsch_llr[21] = _mm256_extract_epi16(xmm1,15);
-    ulsch_llr[22] = _mm256_extract_epi16(xmm2,14);
-    ulsch_llr[23] = _mm256_extract_epi16(xmm2,15);
+    ulsch_llr[0] = simde_mm256_extract_epi16(xmm0,8);
+    ulsch_llr[1] = simde_mm256_extract_epi16(xmm0,9);
+    ulsch_llr[2] = simde_mm256_extract_epi16(xmm1,8);
+    ulsch_llr[3] = simde_mm256_extract_epi16(xmm1,9);
+    ulsch_llr[4] = simde_mm256_extract_epi16(xmm2,8);
+    ulsch_llr[5] = simde_mm256_extract_epi16(xmm2,9);
+
+    ulsch_llr[6] = simde_mm256_extract_epi16(xmm0,10);
+    ulsch_llr[7] = simde_mm256_extract_epi16(xmm0,11);
+    ulsch_llr[8] = simde_mm256_extract_epi16(xmm1,10);
+    ulsch_llr[9] = simde_mm256_extract_epi16(xmm1,11);
+    ulsch_llr[10] = simde_mm256_extract_epi16(xmm2,10);
+    ulsch_llr[11] = simde_mm256_extract_epi16(xmm2,11);
+
+    ulsch_llr[12] = simde_mm256_extract_epi16(xmm0,12);
+    ulsch_llr[13] = simde_mm256_extract_epi16(xmm0,13);
+    ulsch_llr[14] = simde_mm256_extract_epi16(xmm1,12);
+    ulsch_llr[15] = simde_mm256_extract_epi16(xmm1,13);
+    ulsch_llr[16] = simde_mm256_extract_epi16(xmm2,12);
+    ulsch_llr[17] = simde_mm256_extract_epi16(xmm2,13);
+
+    ulsch_llr[18] = simde_mm256_extract_epi16(xmm0,14);
+    ulsch_llr[19] = simde_mm256_extract_epi16(xmm0,15);
+    ulsch_llr[20] = simde_mm256_extract_epi16(xmm1,14);
+    ulsch_llr[21] = simde_mm256_extract_epi16(xmm1,15);
+    ulsch_llr[22] = simde_mm256_extract_epi16(xmm2,14);
+    ulsch_llr[23] = simde_mm256_extract_epi16(xmm2,15);
 
     ulsch_llr+=24;
-#endif
   }
 
 #if defined(__x86_64__) || defined(__i386__)
diff --git a/openair1/PHY/NR_TRANSPORT/pucch_rx.c b/openair1/PHY/NR_TRANSPORT/pucch_rx.c
index ecef5e53cd6b07c7bb98fa9fefcddb1ee09ef9ad..dd138e5265b5553a98c61038c306319df553e1e3 100644
--- a/openair1/PHY/NR_TRANSPORT/pucch_rx.c
+++ b/openair1/PHY/NR_TRANSPORT/pucch_rx.c
@@ -1009,69 +1009,69 @@ void init_pucch2_luts() {
       __m256i *lut_i=&pucch2_lut[b-3][i<<1];
       __m256i *lut_ip1=&pucch2_lut[b-3][1+(i<<1)];
       bit = (out&0x1) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,0);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,0);
       bit = (out&0x2) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,0);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,0);
       bit = (out&0x4) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,1);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,1);
       bit = (out&0x8) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,1);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,1);
       bit = (out&0x10) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,2);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,2);
       bit = (out&0x20) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,2);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,2);
       bit = (out&0x40) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,3);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,3);
       bit = (out&0x80) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,3);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,3);
       bit = (out&0x100) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,4);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,4);
       bit = (out&0x200) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,4);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,4);
       bit = (out&0x400) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,5);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,5);
       bit = (out&0x800) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,5);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,5);
       bit = (out&0x1000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,6);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,6);
       bit = (out&0x2000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,6);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,6);
       bit = (out&0x4000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,7);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,7);
       bit = (out&0x8000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,7);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,7);
       bit = (out&0x10000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,8);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,8);
       bit = (out&0x20000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,8);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,8);
       bit = (out&0x40000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,9);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,9);
       bit = (out&0x80000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,9);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,9);
       bit = (out&0x100000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,10);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,10);
       bit = (out&0x200000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,10);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,10);
       bit = (out&0x400000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,11);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,11);
       bit = (out&0x800000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,11);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,11);
       bit = (out&0x1000000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,12);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,12);
       bit = (out&0x2000000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,12);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,12);
       bit = (out&0x4000000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,13);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,13);
       bit = (out&0x8000000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,13);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,13);
       bit = (out&0x10000000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,14);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,14);
       bit = (out&0x20000000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,14);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,14);
       bit = (out&0x40000000) > 0 ? -1 : 1;
-      *lut_i = _mm256_insert_epi16(*lut_i,bit,15);
+      *lut_i = simde_mm256_insert_epi16(*lut_i,bit,15);
       bit = (out&0x80000000) > 0 ? -1 : 1;
-      *lut_ip1 = _mm256_insert_epi16(*lut_ip1,bit,15);
+      *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,15);
     }
   }
   for (uint16_t i=0;i<16;i++) {
@@ -1497,10 +1497,10 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB,
         for (int group=0;group<ngroup;group++) {
           // do complex correlation
           for (int aa=0;aa<Prx;aa++) {
-            prod_re[aa] = /*_mm256_srai_epi16(*/_mm256_adds_epi16(_mm256_mullo_epi16(pucch2_lut[nb_bit-3][cw<<1],rp_re[aa][symb][group]),
-                                                                  _mm256_mullo_epi16(pucch2_lut[nb_bit-3][(cw<<1)+1],rp_im[aa][symb][group]))/*,5)*/;
-            prod_im[aa] = /*_mm256_srai_epi16(*/_mm256_subs_epi16(_mm256_mullo_epi16(pucch2_lut[nb_bit-3][cw<<1],rp2_im[aa][symb][group]),
-                                                                  _mm256_mullo_epi16(pucch2_lut[nb_bit-3][(cw<<1)+1],rp2_re[aa][symb][group]))/*,5)*/;
+            prod_re[aa] = /*simde_mm256_srai_epi16(*/simde_mm256_adds_epi16(simde_mm256_mullo_epi16(pucch2_lut[nb_bit-3][cw<<1],rp_re[aa][symb][group]),
+                                                                  simde_mm256_mullo_epi16(pucch2_lut[nb_bit-3][(cw<<1)+1],rp_im[aa][symb][group]))/*,5)*/;
+            prod_im[aa] = /*simde_mm256_srai_epi16(*/simde_mm256_subs_epi16(simde_mm256_mullo_epi16(pucch2_lut[nb_bit-3][cw<<1],rp2_im[aa][symb][group]),
+                                                                  simde_mm256_mullo_epi16(pucch2_lut[nb_bit-3][(cw<<1)+1],rp2_re[aa][symb][group]))/*,5)*/;
 #ifdef DEBUG_NR_PUCCH_RX
             printf("prod_re[%d] => (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)\n",aa,
                    ((int16_t*)&prod_re[aa])[0],((int16_t*)&prod_re[aa])[1],((int16_t*)&prod_re[aa])[2],((int16_t*)&prod_re[aa])[3],
@@ -1514,14 +1514,14 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB,
                    ((int16_t*)&prod_im[aa])[12],((int16_t*)&prod_im[aa])[13],((int16_t*)&prod_im[aa])[14],((int16_t*)&prod_im[aa])[15]);
 
 #endif
-            prod_re[aa] = _mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1
-            prod_im[aa] = _mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
-            prod_re[aa] = _mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3
-            prod_im[aa] = _mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
-            prod_re[aa] = _mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3+4+5+6+7
-            prod_im[aa] = _mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
-            prod_re[aa] = _mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3+4+5+6+7+8+9+10+11+12+13+14+15
-            prod_im[aa] = _mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
+            prod_re[aa] = simde_mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1
+            prod_im[aa] = simde_mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
+            prod_re[aa] = simde_mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3
+            prod_im[aa] = simde_mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
+            prod_re[aa] = simde_mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3+4+5+6+7
+            prod_im[aa] = simde_mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
+            prod_re[aa] = simde_mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3+4+5+6+7+8+9+10+11+12+13+14+15
+            prod_im[aa] = simde_mm256_hadds_epi16(prod_im[aa],prod_im[aa]);
           }
           int64_t corr_re=0,corr_im=0;
 
diff --git a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
index 809cc4dd3b06ce7d6486805c7909720522c20671..0fde06b0cb30df1aa8c16c4f20251bca0b2dbe28 100644
--- a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
+++ b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
@@ -23,6 +23,9 @@
 short filt16a_l0[16] = {
 16384,12288,8192,4096,0,0,0,0,0,0,0,0,0,0,0,0};
 
+short filt16a_mm0[16] = {
+0,4096,8192,12288,16384,12288,8192,4096,0,0,0,0,0,0,0,0};
+
 short filt16a_r0[16] = {
 0,0,0,0,0,4096,8192,12288,16384,20480,24576,28672,0,0,0,0};
 
@@ -32,6 +35,15 @@ short filt16a_m0[16] = {
 short filt16a_l1[16] = {
 20480,16384,12288,8192,4096,0,0,0,0,0,0,0,0,0,0,0};
 
+short filt16a_mm1[16] = {
+0,0,4096,8192,12288,16384,12288,8192,4096,0,0,0,0,0,0,0};
+
+short filt16a_ml1[16] = {
+-4096,0,4096,8192,12288,16384,12288,8192,4096,0,0,0,0,0,0,0};
+
+short filt16a_mr1[16] = {
+0,0,4096,8192,12288,16384,12288,8192,4096,0,-4096,-8192,0,0,0,0};
+
 short filt16a_r1[16] = {
 0,0,0,0,0,0,4096,8192,12288,16384,20480,24576,0,0,0,0};
 
@@ -41,6 +53,15 @@ short filt16a_m1[16] = {
 short filt16a_l2[16] = {
 24576,20480,16384,12288,8192,4096,0,0,0,0,0,0,0,0,0,0};
 
+short filt16a_mm2[16] = {
+0,0,0,4096,8192,12288,16384,12288,8192,4096,0,0,0,0,0,0};
+
+short filt16a_ml2[16] = {
+-8192,-4096,0,4096,8192,12288,16384,12288,8192,4096,0,0,0,0,0,0};
+
+short filt16a_mr2[16] = {
+0,0,0,4096,8192,12288,16384,12288,8192,4096,0,-4096,0,0,0,0};
+
 short filt16a_r2[16] = {
 0,0,0,0,0,0,0,4096,8192,12288,16384,20480,0,0,0,0};
 
@@ -50,6 +71,12 @@ short filt16a_m2[16] = {
 short filt16a_l3[16] = {
 28672,24576,20480,16384,12288,8192,4096,0,0,0,0,0,0,0,0,0};
 
+short filt16a_mm3[16] = {
+0,0,0,0,4096,8192,12288,16384,12288,8192,4096,0,0,0,0,0};
+
+short filt16a_ml3[16] = {
+-12288,-8192,-4096,0,4096,8192,12288,16384,12288,8192,4096,0,0,0,0,0};
+
 short filt16a_r3[16] = {
 0,0,0,0,0,0,0,0,4096,8192,12288,16384,0,0,0,0};
 
diff --git a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.h b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.h
index 4e987e5ae4b9baef58f2344efe8a49c2b2cb6f35..d8c3342af97459cdf208d3829437a80fe2dbeca2 100644
--- a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.h
+++ b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.h
@@ -28,24 +28,42 @@ extern short filt16a_r0[16];
 
 extern short filt16a_m0[16];
 
+extern short filt16a_mm0[16];
+
 extern short filt16a_l1[16];
 
 extern short filt16a_r1[16];
 
 extern short filt16a_m1[16];
 
+extern short filt16a_mm1[16];
+
+extern short filt16a_mr1[16];
+
+extern short filt16a_ml1[16];
+
 extern short filt16a_l2[16];
 
 extern short filt16a_r2[16];
 
 extern short filt16a_m2[16];
 
+extern short filt16a_mm2[16];
+
+extern short filt16a_mr2[16];
+
+extern short filt16a_ml2[16];
+
 extern short filt16a_l3[16];
 
 extern short filt16a_r3[16];
 
 extern short filt16a_m3[16];
 
+extern short filt16a_mm3[16];
+
+extern short filt16a_ml3[16];
+
 extern short filt16a_l0_dc[16];
 
 extern short filt16a_r0_dc[16];
diff --git a/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c b/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c
index ba7e61afe67b6972d542d35144c9ced773845b9c..f15b99fb1c32f0102cb943a02fdc36fb78063093 100644
--- a/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c
+++ b/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c
@@ -28,11 +28,542 @@
 #include "PHY/NR_REFSIG/dmrs_nr.h"
 #include "PHY/NR_REFSIG/ptrs_nr.h"
 #include "PHY/NR_TRANSPORT/nr_sch_dmrs.h"
+#include "PHY/NR_TRANSPORT/nr_transport_proto.h"
+#include "common/utils/nr/nr_common.h"
 #include "filt16a_32.h"
+#include "T.h"
 #include <openair1/PHY/TOOLS/phy_scope_interface.h>
 
 //#define DEBUG_PDSCH
 //#define DEBUG_PDCCH
+//#define DEBUG_PBCH
+//#define DEBUG_PRS_CHEST   // To enable PRS Matlab dumps
+//#define DEBUG_PRS_PRINTS  // To enable PRS channel estimation debug logs
+extern short nr_qpsk_mod_table[8];
+
+int nr_prs_channel_estimation(uint8_t gNB_id,
+                              uint8_t rsc_id,
+                              uint8_t rep_num,
+                              PHY_VARS_NR_UE *ue,
+                              UE_nr_rxtx_proc_t *proc,
+                              NR_DL_FRAME_PARMS *frame_params)
+{
+  uint8_t rxAnt = 0, idx = 0;
+  int32_t **rxdataF      = ue->common_vars.rxdataF;
+  prs_config_t *prs_cfg  = &ue->prs_vars[gNB_id]->prs_resource[rsc_id].prs_cfg;
+  prs_meas_t **prs_meas  = ue->prs_vars[gNB_id]->prs_resource[rsc_id].prs_meas;
+  c16_t ch_tmp_buf[ ue->frame_parms.ofdm_symbol_size] __attribute__((aligned(32)));
+  int32_t chF_interpol[frame_params->nb_antennas_rx][NR_PRS_IDFT_OVERSAMP_FACTOR*ue->frame_parms.ofdm_symbol_size] __attribute__((aligned(32)));
+  int32_t chT_interpol[frame_params->nb_antennas_rx][NR_PRS_IDFT_OVERSAMP_FACTOR*ue->frame_parms.ofdm_symbol_size] __attribute__((aligned(32)));
+  memset(ch_tmp_buf,0,sizeof(ch_tmp_buf));
+  memset(chF_interpol,0,sizeof(chF_interpol));
+  memset(chT_interpol,0,sizeof(chF_interpol));
+  
+  int slot_prs           = (proc->nr_slot_rx - rep_num*prs_cfg->PRSResourceTimeGap + frame_params->slots_per_frame)%frame_params->slots_per_frame;
+  uint32_t **nr_gold_prs = ue->nr_gold_prs[gNB_id][rsc_id][slot_prs];
+  
+  int16_t *rxF, *pil, *fl, *fm, *fmm, *fml, *fmr, *fr, mod_prs[NR_MAX_PRS_LENGTH<<1];
+  int16_t ch[2] = {0}, noiseFig[2] = {0};
+  int16_t k_prime = 0, k = 0, re_offset = 0, first_half = 0, second_half = 0;
+  int32_t ch_pwr = 0, snr = 0;
+#ifdef DEBUG_PRS_CHEST
+  char filename[64] = {0}, varname[64] = {0};
+#endif
+  int16_t *ch_tmp     = (int16_t *)ch_tmp_buf; 
+  int16_t scale_factor = (1.0f/(float)(prs_cfg->NumPRSSymbols))*(1<<15);
+  int16_t num_pilots   = (12/prs_cfg->CombSize)*prs_cfg->NumRB;
+  int16_t start_offset = (NR_PRS_IDFT_OVERSAMP_FACTOR-1)*frame_params->ofdm_symbol_size>>1;
+
+  int16_t k_prime_table[K_PRIME_TABLE_ROW_SIZE][K_PRIME_TABLE_COL_SIZE] = PRS_K_PRIME_TABLE;
+  for(int l = prs_cfg->SymbolStart; l < prs_cfg->SymbolStart+prs_cfg->NumPRSSymbols; l++)
+  {
+    int symInd = l-prs_cfg->SymbolStart;
+    if (prs_cfg->CombSize == 2) {
+      k_prime = k_prime_table[0][symInd];
+    }
+    else if (prs_cfg->CombSize == 4){
+      k_prime = k_prime_table[1][symInd];
+    }
+    else if (prs_cfg->CombSize == 6){
+      k_prime = k_prime_table[2][symInd];
+    }
+    else if (prs_cfg->CombSize == 12){
+      k_prime = k_prime_table[3][symInd];
+    }
+   
+#ifdef DEBUG_PRS_PRINTS 
+    printf("[gNB %d][rsc %d] PRS config l %d k_prime %d:\nprs_cfg->SymbolStart %d\nprs_cfg->NumPRSSymbols %d\nprs_cfg->NumRB %d\nprs_cfg->CombSize %d\n", gNB_id, rsc_id, l, k_prime, prs_cfg->SymbolStart, prs_cfg->NumPRSSymbols, prs_cfg->NumRB, prs_cfg->CombSize);
+#endif
+    // Pilots generation and modulation
+    for (int m = 0; m < num_pilots; m++) 
+    {
+      idx = (((nr_gold_prs[l][(m<<1)>>5])>>((m<<1)&0x1f))&3);
+      mod_prs[m<<1]     = nr_qpsk_mod_table[idx<<1];
+      mod_prs[(m<<1)+1] = nr_qpsk_mod_table[(idx<<1) + 1];
+    } 
+     
+    for (rxAnt=0; rxAnt < frame_params->nb_antennas_rx; rxAnt++)
+    {
+      snr = 0;
+      
+      // calculate RE offset
+      k = re_offset = (prs_cfg->REOffset+k_prime) % prs_cfg->CombSize + prs_cfg->RBOffset*12 + frame_params->first_carrier_offset;
+
+      // Channel estimation and interpolation
+      pil       = (int16_t *)&mod_prs[0];
+      rxF       = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+      
+      if(prs_cfg->CombSize == 2)
+      {
+        // Choose the interpolation filters
+        switch (k_prime) {
+          case 0:
+            fl  = filt8_l0;
+            fml = filt8_m0;
+            fmm = filt8_mm0;
+            fmr = filt8_mr0;
+            fm  = filt8_m0;
+            fr  = filt8_r0;
+            break;
+
+          case 1:
+            fl  = filt8_l1;
+            fmm = filt8_mm1;
+            fml = filt8_ml1;
+            fmr = fmm;
+            fm  = filt8_m1;
+            fr  = filt8_r1;
+            break;
+
+          default:
+            LOG_I(PHY, "%s: ERROR!! Invalid k_prime=%d for PRS comb_size %d, symbol %d\n",__FUNCTION__, k_prime, prs_cfg->CombSize, l);
+            return(-1);
+            break;
+        }
+
+        //Start pilot
+        ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+        ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+        multadd_real_vector_complex_scalar(fl,
+		    	       ch,
+		    	       ch_tmp,
+		    	       8);
+
+        //SNR estimation
+        noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+        noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+        snr += 10*log10(squaredMod(*(c16_t*)rxF) - squaredMod(*(c16_t*)noiseFig)) - 10*log10(squaredMod(*(c16_t*)noiseFig));
+#ifdef DEBUG_PRS_PRINTS
+        printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, 0, snr, rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+        pil +=2;
+        k   = (k+prs_cfg->CombSize) % frame_params->ofdm_symbol_size;
+        rxF = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+        
+        //Middle pilots
+        for(int pIdx = 1; pIdx < num_pilots-1; pIdx+=2)
+        {
+          ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+          ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+          if(pIdx == 1) // 2nd pilot
+          {
+            multadd_real_vector_complex_scalar(fml,
+		        	       ch,
+		        	       ch_tmp,
+		        	       8);
+          }
+          else
+          {
+            multadd_real_vector_complex_scalar(fm,
+		        	       ch,
+		        	       ch_tmp,
+		        	       8);
+          }
+          
+          //SNR estimation
+          noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+          noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+          snr += 10*log10(squaredMod(*(c16_t*)rxF) - squaredMod(*(c16_t*)noiseFig)) - 10*log10(squaredMod(*(c16_t*)noiseFig));
+#ifdef DEBUG_PRS_PRINTS
+          printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, pIdx, snr/(pIdx+1), rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+          pil +=2;
+          k   = (k+prs_cfg->CombSize) % frame_params->ofdm_symbol_size;
+          rxF = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+
+          ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+          ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+          if(pIdx == (num_pilots-3)) // 2nd last pilot
+          {
+            multadd_real_vector_complex_scalar(fmr,
+		        	       ch,
+		        	       ch_tmp,
+		        	       8);
+          }
+          else
+          {
+            multadd_real_vector_complex_scalar(fmm,
+		        	       ch,
+		        	       ch_tmp,
+		        	       8);
+          }
+          
+          //SNR estimation
+          noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+          noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+          snr += 10*log10(squaredMod(*((c16_t*)rxF)) - squaredMod(*((c16_t*)noiseFig))) - 10*log10(squaredMod(*((c16_t*)noiseFig)));
+#ifdef DEBUG_PRS_PRINTS
+          printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, pIdx+1, snr/(pIdx+2), rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+          pil +=2;
+          k   = (k+prs_cfg->CombSize) % frame_params->ofdm_symbol_size;
+          rxF = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+          ch_tmp +=8;
+        }
+
+        //End pilot
+        ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+        ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+        multadd_real_vector_complex_scalar(fr,
+	      	       ch,
+	      	       ch_tmp,
+	      	       8);
+
+          //SNR estimation
+          noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+          noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+          snr += 10*log10(squaredMod(*((c16_t*)rxF)) - squaredMod(*((c16_t*)noiseFig))) - 10*log10(squaredMod(*((c16_t*)noiseFig)));
+#ifdef DEBUG_PRS_PRINTS
+          printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, num_pilots-1, snr/num_pilots, rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+          // average out the SNR computed
+          snr = snr/num_pilots;
+          prs_meas[rxAnt]->snr = snr;
+      }
+      else if(prs_cfg->CombSize == 4)
+      {
+        // Choose the interpolation filters
+        switch (k_prime) {
+          case 0:
+            fl  = filt16a_l0;
+            fml = filt16a_mm0;
+            fmm = filt16a_mm0;
+            fmr = filt16a_m0;
+            fm  = filt16a_m0;
+            fr  = filt16a_r0;
+            break;
+
+          case 1:
+            fl  = filt16a_l1;
+            fml = filt16a_ml1;
+            fmm = filt16a_mm1;
+            fmr = filt16a_mr1;
+            fm  = filt16a_m1;
+            fr  = filt16a_r1;
+            break;
+
+          case 2:
+            fl  = filt16a_l2;
+            fml = filt16a_ml2;
+            fmm = filt16a_mm2;
+            fmr = filt16a_mr2;
+            fm  = filt16a_m2;
+            fr  = filt16a_r2;
+            break;
+
+          case 3:
+            fl  = filt16a_l3;
+            fml = filt16a_ml3;
+            fmm = filt16a_mm3;
+            fmr = filt16a_mm3;
+            fm  = filt16a_m3;
+            fr  = filt16a_r3;
+            break;
+
+          default:
+            LOG_I(PHY, "%s: ERROR!! Invalid k_prime=%d for PRS comb_size %d, symbol %d\n",__FUNCTION__, k_prime, prs_cfg->CombSize, l);
+            return(-1);
+            break;
+        }
+
+        //Start pilot
+        ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+        ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+        multadd_real_vector_complex_scalar(fl,
+	      	       ch,
+	      	       ch_tmp,
+	      	       16);
+        
+        //SNR estimation
+        noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+        noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+        snr += 10*log10(squaredMod(*((c16_t*)rxF)) - squaredMod(*((c16_t*)noiseFig))) - 10*log10(squaredMod(*((c16_t*)noiseFig)));
+#ifdef DEBUG_PRS_PRINTS
+        printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, 0, snr, rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+        pil +=2;
+        k   = (k+prs_cfg->CombSize) % frame_params->ofdm_symbol_size;
+        rxF = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+
+        ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+        ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+        multadd_real_vector_complex_scalar(fml,
+	      	       ch,
+	      	       ch_tmp,
+	      	       16);
+
+        //SNR estimation
+        noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+        noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+        snr += 10*log10(squaredMod(*((c16_t*)rxF)) - squaredMod(*((c16_t*)noiseFig))) - 10*log10(squaredMod(*((c16_t*)noiseFig)));
+#ifdef DEBUG_PRS_PRINTS
+        printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, 1, snr/2, rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+        pil +=2;
+        k   = (k+prs_cfg->CombSize) % frame_params->ofdm_symbol_size;
+        rxF = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+        ch_tmp +=8;
+
+        //Middle pilots
+        for(int pIdx = 2; pIdx < num_pilots-2; pIdx++)
+        {
+          ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+          ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+          multadd_real_vector_complex_scalar(fmm,
+	      	         ch,
+	      	         ch_tmp,
+	      	         16);
+        
+          //SNR estimation
+          noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+          noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+          snr += 10*log10(squaredMod(*((c16_t*)rxF)) - squaredMod(*((c16_t*)noiseFig))) - 10*log10(squaredMod(*((c16_t*)noiseFig)));
+#ifdef DEBUG_PRS_PRINTS
+          printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, pIdx, snr/(pIdx+1), rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+          pil +=2;
+          k   = (k+prs_cfg->CombSize) % frame_params->ofdm_symbol_size;
+          rxF = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+          ch_tmp +=8;
+        }
+
+        //End pilot
+        ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+        ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+        multadd_real_vector_complex_scalar(fmr,
+	      	       ch,
+	      	       ch_tmp,
+	      	       16);
+        
+        //SNR estimation
+        noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+        noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+        snr += 10*log10(squaredMod(*((c16_t*)rxF)) - squaredMod(*((c16_t*)noiseFig))) - 10*log10(squaredMod(*((c16_t*)noiseFig)));
+#ifdef DEBUG_PRS_PRINTS
+        printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, %+d) \n", rxAnt, num_pilots-2, snr/(num_pilots-1), rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+        pil +=2;
+        k   = (k+prs_cfg->CombSize) % frame_params->ofdm_symbol_size;
+        rxF = (int16_t *)&rxdataF[rxAnt][l*frame_params->ofdm_symbol_size + k];
+
+        ch[0] = (int16_t)(((int32_t)rxF[0]*pil[0] + (int32_t)rxF[1]*pil[1])>>15);
+        ch[1] = (int16_t)(((int32_t)rxF[1]*pil[0] - (int32_t)rxF[0]*pil[1])>>15);
+        multadd_real_vector_complex_scalar(fr,
+	      	       ch,
+	      	       ch_tmp,
+	      	       16);
+        
+        //SNR estimation
+        noiseFig[0] = rxF[0] - (int16_t)(((int32_t)ch[0]*pil[0] - (int32_t)ch[1]*pil[1])>>15);
+        noiseFig[1] = rxF[1] - (int16_t)(((int32_t)ch[1]*pil[0] + (int32_t)ch[0]*pil[1])>>15);
+        snr += 10*log10(squaredMod(*((c16_t*)rxF)) - squaredMod(*((c16_t*)noiseFig))) - 10*log10(squaredMod(*((c16_t*)noiseFig)));
+#ifdef DEBUG_PRS_PRINTS
+        printf("[Rx %d] pilot %3d, SNR %+2d dB: rxF - > (%+3d, %+3d) addr %p  ch -> (%+3d, %+3d), pil -> (%+d, +%d) \n", rxAnt, num_pilots-1, snr/num_pilots, rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
+#endif
+          // average out the SNR computed
+          snr = snr/num_pilots;
+          prs_meas[rxAnt]->snr = snr;
+      }
+      else
+      {
+        AssertFatal((prs_cfg->CombSize == 2)||(prs_cfg->CombSize == 4), "[%s] DL PRS CombSize other than 2 and 4 are NOT supported currently. Exiting!!!", __FUNCTION__);
+      }
+
+      //reset channel pointer
+      ch_tmp = (int16_t*)ch_tmp_buf;
+    } // for rxAnt
+  } //for l
+  
+  for (rxAnt=0; rxAnt < frame_params->nb_antennas_rx; rxAnt++)
+  {
+    // scale by averaging factor 1/NumPrsSymbols
+    multadd_complex_vector_real_scalar(ch_tmp,
+                                       scale_factor,
+                                       ch_tmp,
+                                       1,
+                                       frame_params->ofdm_symbol_size);
+
+#ifdef DEBUG_PRS_PRINTS
+    for (int rb = 0; rb < prs_cfg->NumRB; rb++)
+    {
+      printf("================================================================\n");
+      printf("\t\t\t[gNB %d][Rx %d][RB %d]\n", gNB_id, rxAnt, rb);
+      printf("================================================================\n");
+      idx = (12*rb)<<1;
+      printf("%4d %4d  %4d %4d  %4d %4d  %4d %4d  %4d %4d  %4d %4d\n", ch_tmp[idx], ch_tmp[idx+1], ch_tmp[idx+2], ch_tmp[idx+3], ch_tmp[idx+4], ch_tmp[idx+5], ch_tmp[idx+6], ch_tmp[idx+7], ch_tmp[idx+8], ch_tmp[idx+9], ch_tmp[idx+10], ch_tmp[idx+11]);
+      printf("%4d %4d  %4d %4d  %4d %4d  %4d %4d  %4d %4d  %4d %4d\n", ch_tmp[idx+12], ch_tmp[idx+13], ch_tmp[idx+14], ch_tmp[idx+15], ch_tmp[idx+16], ch_tmp[idx+17], ch_tmp[idx+18], ch_tmp[idx+19], ch_tmp[idx+20], ch_tmp[idx+21], ch_tmp[idx+22], ch_tmp[idx+23]);
+      printf("\n");
+    }
+#endif
+
+    // Place PRS channel estimates in FFT shifted format
+    first_half  = frame_params->ofdm_symbol_size - re_offset;
+    second_half = (prs_cfg->NumRB*12) - first_half;
+    if(first_half > 0)
+      memcpy((int16_t *)&chF_interpol[rxAnt][start_offset+re_offset], &ch_tmp[0], first_half*sizeof(int32_t));
+    if(second_half > 0)
+      memcpy((int16_t *)&chF_interpol[rxAnt][start_offset], &ch_tmp[(first_half<<1)], second_half*sizeof(int32_t));
+
+    // Time domain IMPULSE response
+    idft_size_idx_t idftsizeidx;
+    switch (NR_PRS_IDFT_OVERSAMP_FACTOR*frame_params->ofdm_symbol_size) {
+    case 128:
+      idftsizeidx = IDFT_128;
+      break;
+
+    case 256:
+      idftsizeidx = IDFT_256;
+      break;
+
+    case 512:
+      idftsizeidx = IDFT_512;
+      break;
+
+    case 768:
+      idftsizeidx = IDFT_768;
+      break;
+
+    case 1024:
+      idftsizeidx = IDFT_1024;
+      break;
+
+    case 1536:
+      idftsizeidx = IDFT_1536;
+      break;
+
+    case 2048:
+      idftsizeidx = IDFT_2048;
+      break;
+
+    case 3072:
+      idftsizeidx = IDFT_3072;
+      break;
+
+    case 4096:
+      idftsizeidx = IDFT_4096;
+      break;
+    // 16x IDFT oversampling
+    case 8192:
+      idftsizeidx = IDFT_8192;
+      break;
+
+    case 12288:
+      idftsizeidx = IDFT_12288;
+      break;
+
+    case 16384:
+      idftsizeidx = IDFT_16384;
+      break;
+
+    case 24576:
+      idftsizeidx = IDFT_24576;
+      break;
+
+    case 32768:
+      idftsizeidx = IDFT_32768;
+      break;
+
+    case 49152:
+      idftsizeidx = IDFT_49152;
+      break;
+
+    case 65536:
+      idftsizeidx = IDFT_65536;
+      break;
+
+    default:
+      LOG_I(PHY, "%s: unsupported ofdm symbol size \n", __FUNCTION__);
+      assert(0);
+    }
+
+    idft(idftsizeidx,
+         (int16_t *)&chF_interpol[rxAnt][0],
+         (int16_t *)&chT_interpol[rxAnt][0],1);
+
+    // peak estimator
+    peak_estimator(&chT_interpol[rxAnt][start_offset],
+                   frame_params->ofdm_symbol_size,
+                   &prs_meas[rxAnt]->dl_toa,
+                   &ch_pwr);
+
+    //prs measurements
+    prs_meas[rxAnt]->gNB_id     = gNB_id;
+    prs_meas[rxAnt]->sfn        = proc->frame_rx;
+    prs_meas[rxAnt]->slot       = proc->nr_slot_rx;
+    prs_meas[rxAnt]->rxAnt_idx  = rxAnt;
+    prs_meas[rxAnt]->dl_aoa     = rsc_id;
+    LOG_I(PHY, "[gNB %d][rsc %d][Rx %d][sfn %d][slot %d] DL PRS ToA ==> %d / %d samples, peak channel power %.1f dBm, SNR %+2d dB\n", gNB_id, rsc_id, rxAnt, proc->frame_rx, proc->nr_slot_rx, prs_meas[rxAnt]->dl_toa, frame_params->ofdm_symbol_size, 10*log10(ch_pwr/frame_params->ofdm_symbol_size)-30, prs_meas[rxAnt]->snr);
+
+#ifdef DEBUG_PRS_CHEST
+    sprintf(filename, "%s%i%s", "PRSpilot_", rxAnt, ".m");
+    LOG_M(filename, "prs_loc", &mod_prs[0], num_pilots,1,1);
+    sprintf(filename, "%s%i%s", "rxSigF_", rxAnt, ".m");
+    sprintf(varname, "%s%i", "rxF_", rxAnt);
+    LOG_M(filename, varname, &rxdataF[rxAnt][0], prs_cfg->NumPRSSymbols*frame_params->ofdm_symbol_size,1,1);
+    sprintf(filename, "%s%i%s", "prsChestF_", rxAnt, ".m");
+    sprintf(varname, "%s%i", "prsChF_", rxAnt);
+    LOG_M(filename, varname, &chF_interpol[rxAnt][start_offset], frame_params->ofdm_symbol_size,1,1);
+    sprintf(filename, "%s%i%s", "prsChestT_", rxAnt, ".m");
+    sprintf(varname, "%s%i", "prsChT_", rxAnt);
+    LOG_M(filename, varname, &chT_interpol[rxAnt][start_offset], frame_params->ofdm_symbol_size,1,1);
+#endif
+
+    // T tracer dump
+    T(T_UE_PHY_INPUT_SIGNAL, T_INT(gNB_id),
+      T_INT(proc->frame_rx), T_INT(proc->nr_slot_rx),
+      T_INT(rxAnt), T_BUFFER(&rxdataF[rxAnt][0], frame_params->samples_per_slot_wCP*sizeof(int32_t)));
+
+    T(T_UE_PHY_DL_CHANNEL_ESTIMATE_FREQ, T_INT(gNB_id), T_INT(rsc_id),
+      T_INT(proc->frame_rx), T_INT(proc->nr_slot_rx),
+      T_INT(rxAnt), T_BUFFER(&chF_interpol[rxAnt][start_offset], frame_params->ofdm_symbol_size*sizeof(int32_t)));
+
+    T(T_UE_PHY_DL_CHANNEL_ESTIMATE, T_INT(gNB_id), T_INT(rsc_id),
+      T_INT(proc->frame_rx), T_INT(proc->nr_slot_rx),
+      T_INT(rxAnt), T_BUFFER(&chT_interpol[rxAnt][start_offset], frame_params->ofdm_symbol_size*sizeof(int32_t)));
+  }
+
+  return(0);
+}
+
+#define CH_INTERP 0
+#define NO_INTERP 1
+
+/* Generic function to find the peak of channel estimation buffer */
+void peak_estimator(int32_t *buffer, int32_t buf_len, int32_t *peak_idx, int32_t *peak_val)
+{
+  int32_t max_val = 0, max_idx = 0, abs_val = 0;
+  for(int k = 0; k < buf_len; k++)
+  {
+    abs_val = squaredMod(((c16_t*)buffer)[k]);
+    if(abs_val > max_val)
+    {
+      max_val = abs_val;
+      max_idx = k;
+    }
+  }
+  *peak_val = max_val;
+  *peak_idx = max_idx;
+}
 
 #define CH_INTERP 0
 #define NO_INTERP 1
@@ -55,7 +586,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
   uint8_t nushift;
   uint8_t ssb_index=current_ssb->i_ssb;
   uint8_t n_hf=current_ssb->n_hf;
-  int **rxdataF=ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF;
+  int **rxdataF=ue->common_vars.rxdataF;
 
   nushift =  ue->frame_parms.Nid_cell%4;
   ue->frame_parms.nushift = nushift;
@@ -71,9 +602,8 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
 
   k = nushift;
 
-#ifdef DEBUG_CH
-  printf("PBCH DMRS Correlation : ThreadId %d, gNB_id %d , OFDM size %d, Ncp=%d, Ns=%d, k=%d symbol %d\n",proc->thread_id, gNB_id,ue->frame_parms.ofdm_symbol_size,
-         ue->frame_parms.Ncp,Ns,k, symbol);
+#ifdef DEBUG_PBCH
+  printf("PBCH DMRS Correlation : gNB_id %d , OFDM size %d, Ncp=%d, Ns=%d, k=%d symbol %d\n", gNB_id, ue->frame_parms.ofdm_symbol_size, ue->frame_parms.Ncp, Ns, k, symbol);
 #endif
 
   // generate pilot
@@ -85,7 +615,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
     pil   = (int16_t *)&pilot[0];
     rxF   = (int16_t *)&rxdataF[aarx][(symbol_offset+k+re_offset)];
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("pbch ch est pilot addr %p RB_DL %d\n",&pilot[0], ue->frame_parms.N_RB_DL);
     printf("k %d, first_carrier %d\n",k,ue->frame_parms.first_carrier_offset);
     printf("rxF addr %p\n", rxF);
@@ -99,7 +629,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
     current_ssb->c_re += ch[0];
     current_ssb->c_im += ch[1];
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("ch 0 %d\n",((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1]));
     printf("pilot 0 : rxF - > (%d,%d) addr %p  ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
 #endif
@@ -115,7 +645,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
     current_ssb->c_re += ch[0];
     current_ssb->c_im += ch[1];
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
 
@@ -126,7 +656,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
     current_ssb->c_re += ch[0];
     current_ssb->c_im += ch[1];
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("pilot 2 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
 
@@ -151,7 +681,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
       current_ssb->c_re += ch[0];
       current_ssb->c_im += ch[1];
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
       printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
 
@@ -166,7 +696,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
       current_ssb->c_re += ch[0];
       current_ssb->c_im += ch[1];
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
       printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
       pil += 2;
@@ -180,7 +710,7 @@ int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
       current_ssb->c_re += ch[0];
       current_ssb->c_im += ch[1];
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
       printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+2,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
 
@@ -218,7 +748,7 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
   //int slot_pbch;
 
   uint8_t nushift;
-   int **rxdataF=ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF;
+   int **rxdataF=ue->common_vars.rxdataF;
 
   nushift =  ue->frame_parms.Nid_cell%4;
   ue->frame_parms.nushift = nushift;
@@ -236,9 +766,8 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
 
   k = nushift;
 
-#ifdef DEBUG_CH
-  printf("PBCH Channel Estimation : ThreadId %d, gNB_id %d ch_offset %d, OFDM size %d, Ncp=%d, Ns=%d, k=%d symbol %d\n",proc->thread_id, gNB_id,ch_offset,ue->frame_parms.ofdm_symbol_size,
-         ue->frame_parms.Ncp,Ns,k, symbol);
+#ifdef DEBUG_PBCH
+  printf("PBCH Channel Estimation : gNB_id %d ch_offset %d, OFDM size %d, Ncp=%d, Ns=%d, k=%d symbol %d\n", gNB_id, ch_offset, ue->frame_parms.ofdm_symbol_size, ue->frame_parms.Ncp, Ns, k, symbol);
 #endif
 
   switch (k) {
@@ -287,6 +816,10 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
     idftsizeidx = IDFT_512;
     break;
     
+  case 768:
+    idftsizeidx = IDFT_768;
+    break;
+
   case 1024:
     idftsizeidx = IDFT_1024;
     break;
@@ -324,7 +857,7 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
 
     memset(dl_ch,0,sizeof(struct complex16)*(ue->frame_parms.ofdm_symbol_size));
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("pbch ch est pilot addr %p RB_DL %d\n",&pilot[0], ue->frame_parms.N_RB_DL);
     printf("k %d, first_carrier %d\n",k,ue->frame_parms.first_carrier_offset);
     printf("rxF addr %p\n", rxF);
@@ -336,7 +869,7 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
     ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
     ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("ch 0 %d\n",((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1]));
     printf("pilot 0 : rxF - > (%d,%d) addr %p  ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],&rxF[0],ch[0],ch[1],pil[0],pil[1]);
 #endif
@@ -355,7 +888,7 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
     ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
 
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
     multadd_real_vector_complex_scalar(fm,
@@ -369,7 +902,7 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
     ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
     ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
     printf("pilot 2 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
 
@@ -397,7 +930,7 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
       ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
       ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
       printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
       multadd_real_vector_complex_scalar(fl,
@@ -416,7 +949,7 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
       ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
       ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
       printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
       multadd_real_vector_complex_scalar(fm,
@@ -426,12 +959,11 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
       pil += 2;
       re_offset = (re_offset+4) % ue->frame_parms.ofdm_symbol_size;
       rxF   = (int16_t *)&rxdataF[aarx][(symbol_offset+k+re_offset)];
-        
 
       ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
       ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
 
-#ifdef DEBUG_CH
+#ifdef DEBUG_PBCH
       printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+2,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
 #endif
 
@@ -463,16 +995,16 @@ int nr_pbch_channel_estimation(PHY_VARS_NR_UE *ue,
   return(0);
 }
 
-int nr_pdcch_channel_estimation(PHY_VARS_NR_UE *ue,
-                                UE_nr_rxtx_proc_t *proc,
-                                uint8_t gNB_id,
-                                unsigned char Ns,
-                                unsigned char symbol,
-                                unsigned short scrambling_id,
-                                unsigned short coreset_start_subcarrier,
-                                unsigned short nb_rb_coreset,
-                                int32_t pdcch_est_size,
-                                int32_t pdcch_dl_ch_estimates[][pdcch_est_size])
+void nr_pdcch_channel_estimation(PHY_VARS_NR_UE *ue,
+                                 UE_nr_rxtx_proc_t *proc,
+                                 uint8_t gNB_id,
+                                 unsigned char Ns,
+                                 unsigned char symbol,
+                                 fapi_nr_coreset_t *coreset,
+                                 uint16_t first_carrier_offset,
+                                 uint16_t BWPStart,
+                                 int32_t pdcch_est_size,
+                                 int32_t pdcch_dl_ch_estimates[][pdcch_est_size])
 {
 
   unsigned char aarx;
@@ -481,16 +1013,27 @@ int nr_pdcch_channel_estimation(PHY_VARS_NR_UE *ue,
   int16_t ch[2],*pil,*rxF,*dl_ch;
   int ch_offset,symbol_offset;
 
-  int **rxdataF=ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF;
+  int **rxdataF=ue->common_vars.rxdataF;
 
   ch_offset     = ue->frame_parms.ofdm_symbol_size*symbol;
 
   symbol_offset = ue->frame_parms.ofdm_symbol_size*symbol;
 
+  int nb_rb_coreset=0;
+  int coreset_start_rb=0;
+  get_coreset_rballoc(coreset->frequency_domain_resource,&nb_rb_coreset,&coreset_start_rb);
+  if(nb_rb_coreset==0) return;
+
+#ifdef DEBUG_PDCCH
+  printf(PHY, "pdcch_channel_estimation: first_carrier_offset %d, BWPStart %d, coreset_start_rb %d, coreset_nb_rb %d\n",
+         first_carrier_offset, BWPStart, coreset_start_rb, nb_rb_coreset);
+#endif
+
+  unsigned short coreset_start_subcarrier = first_carrier_offset+(BWPStart + coreset_start_rb)*12;
 
 #ifdef DEBUG_PDCCH
-  printf("PDCCH Channel Estimation : ThreadId %d, gNB_id %d ch_offset %d, OFDM size %d, Ncp=%d, Ns=%d, symbol %d\n",
-         proc->thread_id, gNB_id,ch_offset,ue->frame_parms.ofdm_symbol_size,ue->frame_parms.Ncp,Ns,symbol);
+  printf("PDCCH Channel Estimation : gNB_id %d ch_offset %d, OFDM size %d, Ncp=%d, Ns=%d, symbol %d\n",
+         gNB_id,ch_offset,ue->frame_parms.ofdm_symbol_size,ue->frame_parms.Ncp,Ns,symbol);
 #endif
 
 #if CH_INTERP
@@ -499,28 +1042,32 @@ int nr_pdcch_channel_estimation(PHY_VARS_NR_UE *ue,
   int16_t *fr = filt16a_r1;
 #endif
 
+  unsigned short scrambling_id = coreset->pdcch_dmrs_scrambling_id;
   // checking if re-initialization of scrambling IDs is needed (should be done here but scrambling ID for PDCCH is not taken from RRC)
   if (scrambling_id != ue->scramblingID_pdcch){
     ue->scramblingID_pdcch = scrambling_id;
     nr_gold_pdcch(ue,ue->scramblingID_pdcch);
   }
 
-  // generate pilot
-  int pilot[nb_rb_coreset * 3] __attribute__((aligned(16))); 
-  nr_pdcch_dmrs_rx(ue,gNB_id,Ns,ue->nr_gold_pdcch[gNB_id][Ns][symbol], &pilot[0],2000,nb_rb_coreset);
+  int dmrs_ref = 0;
+  if (coreset->CoreSetType == NFAPI_NR_CSET_CONFIG_PDCCH_CONFIG)
+    dmrs_ref = BWPStart;
 
+  // generate pilot
+  int pilot[(nb_rb_coreset + dmrs_ref) * 3] __attribute__((aligned(16)));
+  nr_pdcch_dmrs_rx(ue,Ns,ue->nr_gold_pdcch[gNB_id][Ns][symbol], &pilot[0],2000,(nb_rb_coreset+dmrs_ref));
 
   for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) {
 
     k = coreset_start_subcarrier;
-    pil   = (int16_t *)&pilot[0];
+    pil   = (int16_t *)&pilot[dmrs_ref*3];
     rxF   = (int16_t *)&rxdataF[aarx][(symbol_offset+k+1)];
     dl_ch = (int16_t *)&pdcch_dl_ch_estimates[aarx][ch_offset];
 
     memset(dl_ch,0,4*(ue->frame_parms.ofdm_symbol_size));
 
 #ifdef DEBUG_PDCCH
-    printf("pdcch ch est pilot addr %p RB_DL %d\n",&pilot[0], ue->frame_parms.N_RB_DL);
+    printf("pdcch ch est pilot addr %p RB_DL %d\n",&pilot[dmrs_ref*3], ue->frame_parms.N_RB_DL);
     printf("k %d, first_carrier %d\n",k,ue->frame_parms.first_carrier_offset);
     printf("rxF addr %p\n", rxF);
 
@@ -687,8 +1234,6 @@ int nr_pdcch_channel_estimation(PHY_VARS_NR_UE *ue,
     //}
 
   }
-
-  return(0);
 }
 
 int nr_pdsch_channel_estimation(PHY_VARS_NR_UE *ue,
@@ -714,8 +1259,8 @@ int nr_pdsch_channel_estimation(PHY_VARS_NR_UE *ue,
   int ch_offset,symbol_offset;
 
   uint8_t nushift;
-  int **dl_ch_estimates = ue->pdsch_vars[proc->thread_id][gNB_id]->dl_ch_estimates;
-  int **rxdataF=ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF;
+  int **dl_ch_estimates = ue->pdsch_vars[gNB_id]->dl_ch_estimates;
+  int **rxdataF=ue->common_vars.rxdataF;
 
   ch_offset     = ue->frame_parms.ofdm_symbol_size*symbol;
 
@@ -725,8 +1270,15 @@ int nr_pdsch_channel_estimation(PHY_VARS_NR_UE *ue,
   int re_offset = k;
 
 #ifdef DEBUG_PDSCH
-  printf("PDSCH Channel Estimation : ThreadId %d, gNB_id %d ch_offset %d, symbol_offset %d OFDM size %d, Ncp=%d, Ns=%d, k=%d symbol %d\n",proc->thread_id, gNB_id,ch_offset,symbol_offset,ue->frame_parms.ofdm_symbol_size,
-         ue->frame_parms.Ncp,Ns,k, symbol);
+  printf("PDSCH Channel Estimation : gNB_id %d ch_offset %d, symbol_offset %d OFDM size %d, Ncp=%d, Ns=%d, k=%d symbol %d\n",
+         gNB_id,
+         ch_offset,
+         symbol_offset,
+         ue->frame_parms.ofdm_symbol_size,
+         ue->frame_parms.Ncp,
+         Ns,
+         k,
+         symbol);
 #endif
 
   // generate pilot for gNB port number 1000+p
diff --git a/openair1/PHY/NR_UE_ESTIMATION/nr_estimation.h b/openair1/PHY/NR_UE_ESTIMATION/nr_estimation.h
index 52b3af9a96519ee5ccd181e8e1b152c1b0b523d4..f01a2af60618642c23a308683d82d75754374abb 100644
--- a/openair1/PHY/NR_UE_ESTIMATION/nr_estimation.h
+++ b/openair1/PHY/NR_UE_ESTIMATION/nr_estimation.h
@@ -32,6 +32,17 @@
 /*!\brief Timing drift hysterisis in samples*/
 #define SYNCH_HYST 2
 
+/* A function to perform the channel estimation of DL PRS signal */
+int nr_prs_channel_estimation(uint8_t gNB_id,
+                              uint8_t rsc_id,
+                              uint8_t rep_num,
+                              PHY_VARS_NR_UE *ue,
+                              UE_nr_rxtx_proc_t *proc,
+                              NR_DL_FRAME_PARMS *frame_params);
+
+/* Generic function to find the peak of channel estimation buffer */
+void peak_estimator(int32_t *buffer, int32_t buf_len, int32_t *peak_idx, int32_t *peak_val);
+
 /*!
 \brief This function performs channel estimation including frequency and temporal interpolation
 \param ue Pointer to UE PHY variables
@@ -39,16 +50,16 @@
 \param Ns slot number (0..19)
 \param symbol symbol within slot
 */
-int nr_pdcch_channel_estimation(PHY_VARS_NR_UE *ue,
-                                UE_nr_rxtx_proc_t *proc,
-                                uint8_t gNB_id,
-                                unsigned char Ns,
-                                unsigned char symbol,
-                                unsigned short scrambling_id,
-                                unsigned short coreset_start_subcarrier,
-                                unsigned short nb_rb_coreset,
-                                int32_t pdcch_est_size,
-                                int32_t pdcch_dl_ch_estimates[][pdcch_est_size]);
+void nr_pdcch_channel_estimation(PHY_VARS_NR_UE *ue,
+                                 UE_nr_rxtx_proc_t *proc,
+                                 uint8_t gNB_id,
+                                 unsigned char Ns,
+                                 unsigned char symbol,
+                                 fapi_nr_coreset_t *coreset,
+                                 uint16_t first_carrier_offset,
+                                 uint16_t BWPStart,
+                                 int32_t pdcch_est_size,
+                                 int32_t pdcch_dl_ch_estimates[][pdcch_est_size]);
 
 int nr_pbch_dmrs_correlation(PHY_VARS_NR_UE *ue,
                              UE_nr_rxtx_proc_t *proc,
diff --git a/openair1/PHY/NR_UE_ESTIMATION/nr_ue_measurements.c b/openair1/PHY/NR_UE_ESTIMATION/nr_ue_measurements.c
index bf7b93b180bb8fb4478b3353071c0152a5bfb728..b0756282a3dadf8877a66f2fb71f6d26e286459f 100644
--- a/openair1/PHY/NR_UE_ESTIMATION/nr_ue_measurements.c
+++ b/openair1/PHY/NR_UE_ESTIMATION/nr_ue_measurements.c
@@ -117,7 +117,7 @@ void nr_ue_measurements(PHY_VARS_NR_UE *ue,
   int aarx, aatx, gNB_id = 0;
   NR_DL_FRAME_PARMS *frame_parms = &ue->frame_parms;
   int ch_offset = frame_parms->ofdm_symbol_size*2;
-  NR_UE_DLSCH_t *dlsch = ue->dlsch[proc->thread_id][gNB_id][0];
+  NR_UE_DLSCH_t *dlsch = ue->dlsch[gNB_id][0];
   uint8_t harq_pid = dlsch->current_harq_pid;
   int N_RB_DL = dlsch->harq_processes[harq_pid]->nb_rb;
 
@@ -134,7 +134,7 @@ void nr_ue_measurements(PHY_VARS_NR_UE *ue,
 
       for (aatx = 0; aatx < frame_parms->nb_antenna_ports_gNB; aatx++){
 
-        ue->measurements.rx_spatial_power[gNB_id][aatx][aarx] = (signal_energy_nodc(&ue->pdsch_vars[proc->thread_id][0]->dl_ch_estimates[gNB_id][ch_offset], N_RB_DL*NR_NB_SC_PER_RB));
+        ue->measurements.rx_spatial_power[gNB_id][aatx][aarx] = (signal_energy_nodc(&ue->pdsch_vars[0]->dl_ch_estimates[gNB_id][ch_offset], N_RB_DL*NR_NB_SC_PER_RB));
 
         if (ue->measurements.rx_spatial_power[gNB_id][aatx][aarx]<0)
           ue->measurements.rx_spatial_power[gNB_id][aatx][aarx] = 0;
@@ -229,7 +229,7 @@ void nr_ue_ssb_rsrp_measurements(PHY_VARS_NR_UE *ue,
 
   for (int aarx = 0; aarx < ue->frame_parms.nb_antennas_rx; aarx++) {
 
-    int16_t *rxF_sss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aarx][(l_sss*ue->frame_parms.ofdm_symbol_size) + ssb_offset];
+    int16_t *rxF_sss = (int16_t *)&ue->common_vars.rxdataF[aarx][(l_sss*ue->frame_parms.ofdm_symbol_size) + ssb_offset];
 
     for(int k = k_start; k < k_end; k++){
 
@@ -283,7 +283,7 @@ void nr_ue_rrc_measurements(PHY_VARS_NR_UE *ue,
 
     nb_nulls = 0;
     ue->measurements.n0_power[aarx] = 0;
-    rxF_sss = (int16_t *)&ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aarx][(l_sss*ue->frame_parms.ofdm_symbol_size) + ssb_offset];
+    rxF_sss = (int16_t *)&ue->common_vars.rxdataF[aarx][(l_sss*ue->frame_parms.ofdm_symbol_size) + ssb_offset];
 
     //-ve spectrum from SSS
     for(k = k_left; k < k_left + k_length; k++){
diff --git a/openair1/PHY/NR_UE_ESTIMATION/plot_prs_Ttracer_dumps.m b/openair1/PHY/NR_UE_ESTIMATION/plot_prs_Ttracer_dumps.m
new file mode 100644
index 0000000000000000000000000000000000000000..e552a81338fdc896ade8247fe38e8484b1cefd9d
--- /dev/null
+++ b/openair1/PHY/NR_UE_ESTIMATION/plot_prs_Ttracer_dumps.m
@@ -0,0 +1,52 @@
+clc; clear all;
+dir            = input('Enter the directory path to T tracer dumps: ');
+fft_size       = input('Enter the OFDM FFT size used for file parsing: ');
+num_resources  = input('Enter number of PRS respurces: ');
+num_gnb        = input('Enter number of active gNBs: ');
+num_prs_symb   = 1;
+start_resource = 0;
+buff_offset    = start_resource*2*fft_size;
+
+%% Channel Impulse Response(CIR)
+figure, hold on;
+for i=0:num_gnb-1
+  for j=0:num_resources-1
+    file = [dir '/chT_gnb', num2str(i), '_', num2str(j), '.raw'];
+    fid = fopen(file, 'r');
+    if (fid > 0)
+        x = fread(fid, Inf, 'int16');
+    else
+        disp(['Failed to open the file ', file, '..!!'])
+        return;
+    end
+    fclose(fid);
+    
+    y = x(buff_offset+1:2:num_prs_symb*2*fft_size) + 1j*x(buff_offset+2:2:num_prs_symb*2*fft_size);
+    plot(abs(fftshift(y)));
+  end
+end
+xlabel('FFT Index'); ylabel('ABS');
+title('CHANNEL IMPULSE RESPONSE');
+hold off;
+
+%% Channel Frequncy Response(CFR)
+figure, hold on;
+for i=0:num_gnb-1
+  for j=0:num_resources-1
+    file = [dir '/chF_gnb', num2str(i), '_', num2str(j), '.raw'];
+    fid = fopen(file, 'r');
+    if (fid > 0)
+        x = fread(fid, Inf, 'int16');
+    else
+        disp(['Failed to open the file ', file, '..!!'])
+        return;
+    end
+    fclose(fid);
+    
+    y = x(buff_offset+1:2:num_prs_symb*2*fft_size) + 1j*x(buff_offset+2:2:num_prs_symb*2*fft_size);
+    plot(abs(y));
+  end
+end
+xlabel('FFT Index'); ylabel('ABS');
+title('CHANNEL FREQUENCY RESPONSE');
+hold off;
\ No newline at end of file
diff --git a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c
index ae8b733301a5cce8b7a9315e4e1506772315c30b..64222cab9a34dd7e71b473a7c7987f48a23e765e 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c
@@ -192,7 +192,7 @@ int nr_get_csi_rs_signal(const PHY_VARS_NR_UE *ue,
                          uint32_t *rsrp,
                          int *rsrp_dBm) {
 
-  int32_t **rxdataF  =  ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF;
+  int32_t **rxdataF  =  ue->common_vars.rxdataF;
   const NR_DL_FRAME_PARMS *frame_parms = &ue->frame_parms;
   uint16_t meas_count = 0;
   uint32_t rsrp_sum = 0;
@@ -731,7 +731,7 @@ int nr_csi_im_power_estimation(const PHY_VARS_NR_UE *ue,
                                const fapi_nr_dl_config_csiim_pdu_rel15_t *csiim_config_pdu,
                                uint32_t *interference_plus_noise_power) {
 
-  int32_t **rxdataF = ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF;
+  int32_t **rxdataF = ue->common_vars.rxdataF;
   const NR_DL_FRAME_PARMS *frame_parms = &ue->frame_parms;
 
   const uint16_t end_rb = csiim_config_pdu->start_rb + csiim_config_pdu->nr_of_rbs > csiim_config_pdu->bwp_size ?
diff --git a/openair1/PHY/NR_UE_TRANSPORT/dci_nr.c b/openair1/PHY/NR_UE_TRANSPORT/dci_nr.c
index 68a0ec56d8fb5ef0b78f3d2f8c1f49960218a4aa..956a910decfb61f53108927d32f3fe30edcacced 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/dci_nr.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/dci_nr.c
@@ -148,7 +148,8 @@ static void nr_pdcch_demapping_deinterleaving(uint32_t *llr,
 
   // for each bundle
   for (int nb = 0; nb < max_bundles; nb++) {
-    if (coreset_interleaved == 0) f_bundle_j = nb;
+    if (coreset_interleaved == 0)
+      f_bundle_j = nb;
     else {
       if (r == coreset_interleaver_size_R) {
         r = 0;
@@ -283,7 +284,7 @@ void nr_pdcch_channel_level(int32_t rx_size,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128;
   __m128i avg128P;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *dl_ch128;
   int32x4_t *avg128P;
 #endif
@@ -293,7 +294,7 @@ void nr_pdcch_channel_level(int32_t rx_size,
 #if defined(__x86_64__) || defined(__i386__)
     avg128P = _mm_setzero_si128();
     dl_ch128=(__m128i *)&dl_ch_estimates_ext[aarx][symbol*nb_rb*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     dl_ch128=(int16x8_t *)&dl_ch_estimates_ext[aarx][symbol*nb_rb*12];
 #endif
 
@@ -302,7 +303,7 @@ void nr_pdcch_channel_level(int32_t rx_size,
       avg128P = _mm_add_epi32(avg128P,_mm_madd_epi16(dl_ch128[0],dl_ch128[0]));
       avg128P = _mm_add_epi32(avg128P,_mm_madd_epi16(dl_ch128[1],dl_ch128[1]));
       avg128P = _mm_add_epi32(avg128P,_mm_madd_epi16(dl_ch128[2],dl_ch128[2]));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
       //      for (int i=0;i<24;i+=2) printf("pdcch channel re %d (%d,%d)\n",(rb*12)+(i>>1),((int16_t*)dl_ch128)[i],((int16_t*)dl_ch128)[i+1]);
       dl_ch128+=3;
@@ -331,7 +332,7 @@ void nr_pdcch_channel_level(int32_t rx_size,
 
 #if defined(__x86_64) || defined(__i386__)
   __m128i mmtmpPD0,mmtmpPD1,mmtmpPD2,mmtmpPD3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -529,12 +530,12 @@ void nr_pdcch_channel_compensation(int32_t rx_size, int32_t rxdataF_ext[][rx_siz
   uint8_t aarx;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
 #endif
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *dl_ch128,*rxdataF128,*rxdataF_comp128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #endif
 
   for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
@@ -545,7 +546,7 @@ void nr_pdcch_channel_compensation(int32_t rx_size, int32_t rxdataF_ext[][rx_siz
     //printf("ch compensation dl_ch ext addr %p \n", &dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*20*12]);
     //printf("rxdataf ext addr %p symbol %d\n", &rxdataF_ext[aarx][symbol*20*12], symbol);
     //printf("rxdataf_comp addr %p\n",&rxdataF_comp[(aatx<<1)+aarx][symbol*20*12]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     // to be filled in
 #endif
 
@@ -616,7 +617,7 @@ void nr_pdcch_channel_compensation(int32_t rx_size, int32_t rxdataF_ext[][rx_siz
       dl_ch128+=3;
       rxdataF128+=3;
       rxdataF_comp128+=3;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       // to be filled in
 #endif
     }
@@ -635,7 +636,7 @@ void nr_pdcch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
                          uint8_t symbol) {
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128_0,*rxdataF_comp128_1;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1;
 #endif
   int32_t i;
@@ -644,7 +645,7 @@ void nr_pdcch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
     rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
 #endif
@@ -653,7 +654,7 @@ void nr_pdcch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
     for (i=0; i<frame_parms->N_RB_DL*3; i++) {
 #if defined(__x86_64__) || defined(__i386__)
       rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
 #endif
     }
@@ -698,7 +699,7 @@ int32_t nr_rx_pdcch(PHY_VARS_NR_UE *ue,
   for (int s=rel15->coreset.StartSymbolIndex; s<(rel15->coreset.StartSymbolIndex+rel15->coreset.duration); s++) {
     LOG_D(PHY,"in nr_pdcch_extract_rbs_single(rxdataF -> rxdataF_ext || dl_ch_estimates -> dl_ch_estimates_ext)\n");
 
-    nr_pdcch_extract_rbs_single(common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF,
+    nr_pdcch_extract_rbs_single(common_vars->rxdataF,
                                 pdcch_est_size,
                                 pdcch_dl_ch_estimates,
                                 rx_size,
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
index 0087a307ef3eaad0f4bf2197ff8e2fca7db671fe..e858e8d5644b0fe71039832864f135353a118315 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
@@ -251,6 +251,12 @@ void nr_processDLSegment(void* arg) {
   short* dlsch_llr = rdata->dlsch_llr;
   rdata->decodeIterations = dlsch->max_ldpc_iterations + 1;
   int8_t llrProcBuf[OAI_UL_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
+  p_decoderParms->R = nr_get_R_ldpc_decoder(rdata->rv_index,
+                                            E,
+                                            p_decoderParms->BG,
+                                            p_decoderParms->Z,
+                                            &harq_process->llrLen,
+                                            harq_process->DLround);
 
   int16_t  z [68*384 + 16] __attribute__ ((aligned(16)));
   int8_t   l [68*384 + 16] __attribute__ ((aligned(16)));
@@ -476,25 +482,9 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
   if ((A <=292) || ((A <= NR_MAX_PDSCH_TBS) && (Coderate <= 0.6667)) || Coderate <= 0.25) {
     p_decParams->BG = 2;
     kc = 52;
-
-    if (Coderate < 0.3333) {
-      p_decParams->R = 15;
-    } else if (Coderate <0.6667) {
-      p_decParams->R = 13;
-    } else {
-      p_decParams->R = 23;
-    }
   } else {
     p_decParams->BG = 1;
     kc = 68;
-
-    if (Coderate < 0.6667) {
-      p_decParams->R = 13;
-    } else if (Coderate <0.8889) {
-      p_decParams->R = 23;
-    } else {
-      p_decParams->R = 89;
-    }
   }
 
   if (harq_process->first_rx == 1) {
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
index 93bbfd1737d3c44749bb250fa671c6b6fe97474d..75e3717c36df0f4900725117c2c664da770c60d5 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c
@@ -177,22 +177,22 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
 
   switch (type) {
   case SI_PDSCH:
-    pdsch_vars = ue->pdsch_vars[proc->thread_id];
+    pdsch_vars = ue->pdsch_vars;
     dlsch = &ue->dlsch_SI[gNB_id];
     dlsch0_harq = dlsch[0]->harq_processes[harq_pid];
 
     break;
 
   case RA_PDSCH:
-    pdsch_vars = ue->pdsch_vars[proc->thread_id];
+    pdsch_vars = ue->pdsch_vars;
     dlsch = &ue->dlsch_ra[gNB_id];
     dlsch0_harq = dlsch[0]->harq_processes[harq_pid];
 
     break;
 
   case PDSCH:
-    pdsch_vars = ue->pdsch_vars[proc->thread_id];
-    dlsch = ue->dlsch[proc->thread_id][gNB_id];
+    pdsch_vars = ue->pdsch_vars;
+    dlsch = ue->dlsch[gNB_id];
     dlsch0_harq = dlsch[0]->harq_processes[harq_pid];
     if (NR_MAX_NB_LAYERS>4)
       dlsch1_harq = dlsch[1]->harq_processes[harq_pid];
@@ -306,8 +306,8 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
   //----------------------------------------------------------
   //--------------------- RBs extraction ---------------------
   //----------------------------------------------------------
-  start_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
-  nr_dlsch_extract_rbs(common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF,
+  start_meas(&ue->generic_stat_bis[slot]);
+  nr_dlsch_extract_rbs(common_vars->rxdataF,
                        pdsch_vars[gNB_id]->dl_ch_estimates,
                        pdsch_vars[gNB_id]->rxdataF_ext,
                        pdsch_vars[gNB_id]->dl_ch_estimates_ext,
@@ -321,10 +321,10 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
                        frame_parms,
                        dlsch0_harq->dlDmrsSymbPos,
                        ue->chest_time);
-  stop_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  stop_meas(&ue->generic_stat_bis[slot]);
   if (cpumeas(CPUMEAS_GETSTATE))
     LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d type %d: Pilot/Data extraction %5.2f \n",
-	  frame,nr_slot_rx,slot,symbol,type,ue->generic_stat_bis[proc->thread_id][slot].p_time/(cpuf*1000.0));
+	  frame,nr_slot_rx,slot,symbol,type,ue->generic_stat_bis[slot].p_time/(cpuf*1000.0));
 
   int nl = dlsch0_harq->Nl;
   int n_rx = frame_parms->nb_antennas_rx;
@@ -332,7 +332,7 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
   //----------------------------------------------------------
   //--------------------- Channel Scaling --------------------
   //----------------------------------------------------------
-  start_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  start_meas(&ue->generic_stat_bis[slot]);
   nr_dlsch_scale_channel(pdsch_vars[gNB_id]->dl_ch_estimates_ext,
                          frame_parms,
                          nl,
@@ -342,16 +342,16 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
                          pilots,
                          nb_re_pdsch,
                          nb_rb_pdsch);
-  stop_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  stop_meas(&ue->generic_stat_bis[slot]);
 
   if (cpumeas(CPUMEAS_GETSTATE))
     LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d: Channel Scale  %5.2f \n",
-          frame,nr_slot_rx,slot,symbol,ue->generic_stat_bis[proc->thread_id][slot].p_time/(cpuf*1000.0));
+          frame,nr_slot_rx,slot,symbol,ue->generic_stat_bis[slot].p_time/(cpuf*1000.0));
 
   //----------------------------------------------------------
   //--------------------- Channel Level Calc. ----------------
   //----------------------------------------------------------
-  start_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  start_meas(&ue->generic_stat_bis[slot]);
   if (first_symbol_flag==1) {
     nr_dlsch_channel_level(pdsch_vars[gNB_id]->dl_ch_estimates_ext,
                            frame_parms,
@@ -392,7 +392,7 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
           avg[0],
           avgs);
   }
-  stop_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  stop_meas(&ue->generic_stat_bis[slot]);
 
 #if T_TRACER
   if (type == PDSCH)
@@ -403,13 +403,15 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
 #endif
 
   if (cpumeas(CPUMEAS_GETSTATE))
-    LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d first_symbol_flag %d: Channel Level  %5.2f \n",frame,nr_slot_rx,slot,symbol,first_symbol_flag,ue->generic_stat_bis[proc->thread_id][slot].p_time/(cpuf*1000.0));
+    LOG_D(PHY,
+          "[AbsSFN %u.%d] Slot%d Symbol %d first_symbol_flag %d: Channel Level  %5.2f \n",
+          frame, nr_slot_rx, slot, symbol, first_symbol_flag, ue->generic_stat_bis[slot].p_time / (cpuf * 1000.0));
 
   //----------------------------------------------------------
   //--------------------- channel compensation ---------------
   //----------------------------------------------------------
   // Disable correlation measurement for optimizing UE
-  start_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  start_meas(&ue->generic_stat_bis[slot]);
   nr_dlsch_channel_compensation(pdsch_vars[gNB_id]->rxdataF_ext,
                                 pdsch_vars[gNB_id]->dl_ch_estimates_ext,
                                 pdsch_vars[gNB_id]->dl_ch_mag0,
@@ -426,11 +428,11 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
                                 nb_rb_pdsch,
                                 pdsch_vars[gNB_id]->log2_maxh,
                                 measurements); // log2_maxh+I0_shift
-    stop_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+    stop_meas(&ue->generic_stat_bis[slot]);
     if (cpumeas(CPUMEAS_GETSTATE))
-      LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d log2_maxh %d channel_level %d: Channel Comp  %5.2f \n", frame, nr_slot_rx, slot, symbol, pdsch_vars[gNB_id]->log2_maxh, proc->channel_level, ue->generic_stat_bis[proc->thread_id][slot].p_time/(cpuf*1000.0));
+      LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d log2_maxh %d channel_level %d: Channel Comp  %5.2f \n", frame, nr_slot_rx, slot, symbol, pdsch_vars[gNB_id]->log2_maxh, proc->channel_level, ue->generic_stat_bis[slot].p_time/(cpuf*1000.0));
 
-    start_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+    start_meas(&ue->generic_stat_bis[slot]);
 
   if (n_rx > 1) {
     nr_dlsch_detection_mrc(pdsch_vars[gNB_id]->rxdataF_comp0,
@@ -457,16 +459,16 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
                                  symbol,
                                  nb_re_pdsch);
   }
-  stop_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  stop_meas(&ue->generic_stat_bis[slot]);
 
   //printf("start compute LLR\n");
   rxdataF_comp_ptr = pdsch_vars[gNB_id_i]->rxdataF_comp0;
   dl_ch_mag_ptr = pdsch_vars[gNB_id_i]->dl_ch_mag0;
-  
+
   if (cpumeas(CPUMEAS_GETSTATE))
-    LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d: Channel Combine and zero forcing %5.2f \n",frame,nr_slot_rx,slot,symbol,ue->generic_stat_bis[proc->thread_id][slot].p_time/(cpuf*1000.0));
+    LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d: Channel Combine and zero forcing %5.2f \n", frame, nr_slot_rx, slot, symbol, ue->generic_stat_bis[slot].p_time / (cpuf * 1000.0));
 
-  start_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  start_meas(&ue->generic_stat_bis[slot]);
   /* Store the valid DL RE's */
   pdsch_vars[gNB_id]->dl_valid_re[symbol-1] = nb_re_pdsch;
 
@@ -542,17 +544,17 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
                              pdsch_vars[gNB_id]->layer_llr);    
   }
   
-  stop_meas(&ue->generic_stat_bis[proc->thread_id][slot]);
+  stop_meas(&ue->generic_stat_bis[slot]);
   if (cpumeas(CPUMEAS_GETSTATE))
-    LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d: LLR Computation  %5.2f \n",frame,nr_slot_rx,slot,symbol,ue->generic_stat_bis[proc->thread_id][slot].p_time/(cpuf*1000.0));
-  
+    LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d: LLR Computation  %5.2f \n", frame, nr_slot_rx, slot, symbol, ue->generic_stat_bis[slot].p_time / (cpuf * 1000.0));
+
   // Please keep it: useful for debugging
 #ifdef DEBUG_PDSCH_RX
   char filename[50];
   uint8_t aa = 0;
   
   snprintf(filename, 50, "rxdataF0_symb_%d_nr_slot_rx_%d.m", symbol, nr_slot_rx);
-  write_output(filename, "rxdataF0", &common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[0][0], NR_SYMBOLS_PER_SLOT*frame_parms->ofdm_symbol_size, 1, 1);
+  write_output(filename, "rxdataF0", &common_vars->rxdataF[0][0], NR_SYMBOLS_PER_SLOT*frame_parms->ofdm_symbol_size, 1, 1);
 
   snprintf(filename, 50, "dl_ch_estimates0%d_symb_%d_nr_slot_rx_%d.m", aa, symbol, nr_slot_rx);
   write_output(filename, "dl_ch_estimates", &pdsch_vars[gNB_id]->dl_ch_estimates[aa][0], NR_SYMBOLS_PER_SLOT*frame_parms->ofdm_symbol_size, 1, 1);
@@ -635,21 +637,21 @@ void nr_dlsch_deinterleaving(uint8_t symbol,
 //==============================================================================================
 
 void nr_dlsch_channel_compensation(int **rxdataF_ext,
-                                int **dl_ch_estimates_ext,
-                                int **dl_ch_mag,
-                                int **dl_ch_magb,
-                                int **dl_ch_magr,
-                                int **rxdataF_comp,
-                                int ***rho,
-                                NR_DL_FRAME_PARMS *frame_parms,
-                                uint8_t nb_aatx,
-                                unsigned char symbol,
-                                int length,
-                                uint8_t first_symbol_flag,
-                                unsigned char mod_order,
-                                unsigned short nb_rb,
-                                unsigned char output_shift,
-                                PHY_NR_MEASUREMENTS *measurements)
+                                   int **dl_ch_estimates_ext,
+                                   int **dl_ch_mag,
+                                   int **dl_ch_magb,
+                                   int **dl_ch_magr,
+                                   int **rxdataF_comp,
+                                   int ***rho,
+                                   NR_DL_FRAME_PARMS *frame_parms,
+                                   uint8_t nb_aatx,
+                                   unsigned char symbol,
+                                   int length,
+                                   uint8_t first_symbol_flag,
+                                   unsigned char mod_order,
+                                   unsigned short nb_rb,
+                                   unsigned char output_shift,
+                                   PHY_NR_MEASUREMENTS *measurements)
 {
 
 #if defined(__i386) || defined(__x86_64)
@@ -951,7 +953,7 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext,
   _mm_empty();
   _m_empty();
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
   unsigned short rb;
   unsigned char aatx,aarx,symbol_mod;
@@ -1388,7 +1390,7 @@ void nr_dlsch_scale_channel(int **dl_ch_estimates_ext,
     }
   }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 }
@@ -1443,7 +1445,7 @@ void nr_dlsch_channel_level(int **dl_ch_estimates_ext,
   _mm_empty();
   _m_empty();
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
   short rb;
   unsigned char aatx,aarx,nre=12,symbol_mod;
@@ -1553,7 +1555,7 @@ void nr_dlsch_channel_level_median(int **dl_ch_estimates_ext,
   _mm_empty();
   _m_empty();
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
   short rb;
   unsigned char aatx,aarx,nre=12,symbol_mod;
@@ -1647,7 +1649,6 @@ void nr_dlsch_extract_rbs(int **rxdataF,
           } else {
             int neg_length = frame_parms->ofdm_symbol_size - start_re;
             int pos_length = nb_rb_pdsch * NR_NB_SC_PER_RB - neg_length;
-
             memcpy(rxF_ext, &rxF[start_re], neg_length * sizeof(int32_t));
             memcpy(&rxF_ext[neg_length], rxF, pos_length * sizeof(int32_t));
           }
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c
index ca15a5b5be8433cc6521819c203e2ce5cc970871..97efa049329851b2dcca5b3a3e4355f5545d62bb 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c
@@ -616,7 +616,7 @@ __m128i tmp_result4 __attribute__ ((aligned(16)));
 // calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM
 #define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq)  tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,3); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,3); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -658,7 +658,7 @@ int nr_dlsch_qpsk_llr(NR_DL_FRAME_PARMS *frame_parms,
     //*llr32 = *rxF;
     llr32->r = rxF->r >> 3;
     llr32->i = rxF->i >> 3;
-    //printf("dlsch_qpsk_llr %d : (%d,%d)\n", i, llr32->r, llr32->i);
+    LOG_D(PHY,"dlsch_qpsk_llr %d : (%d,%d)\n", i, llr32->r, llr32->i);
     rxF++;
     llr32++;
   }
@@ -686,7 +686,7 @@ void nr_dlsch_16qam_llr(NR_DL_FRAME_PARMS *frame_parms,
   __m128i *ch_mag;
   __m128i llr128[2];
   uint32_t *llr32;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF = (int16x8_t*)&rxdataF_comp[(symbol*nb_rb*12)];
   int16x8_t *ch_mag;
   int16x8_t xmm0;
@@ -700,13 +700,13 @@ void nr_dlsch_16qam_llr(NR_DL_FRAME_PARMS *frame_parms,
 
 #if defined(__x86_64__) || defined(__i386__)
     llr32 = (uint32_t*)dlsch_llr;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr16 = (int16_t*)dlsch_llr;
 #endif
 
 #if defined(__x86_64__) || defined(__i386__)
   ch_mag = (__m128i*)&dl_ch_mag[(symbol*nb_rb*12)];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t*)&dl_ch_mag[(symbol*nb_rb*12)];
 #endif
 
@@ -736,7 +736,7 @@ void nr_dlsch_16qam_llr(NR_DL_FRAME_PARMS *frame_parms,
     llr32[6] = _mm_extract_epi32(llr128[1],2); //((uint32_t *)&llr128[1])[2];
     llr32[7] = _mm_extract_epi32(llr128[1],3); //((uint32_t *)&llr128[1])[3];
     llr32+=8;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm0 = vabsq_s16(rxF[i]);
     xmm0 = vqsubq_s16(ch_mag[i],xmm0);
     // lambda_1=y_R, lambda_2=|y_R|-|h|^2, lamda_3=y_I, lambda_4=|y_I|-|h|^2
@@ -786,7 +786,7 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxF = (__m128i*)&rxdataF_comp[(symbol*nb_rb*12)];
   __m128i *ch_mag,*ch_magb;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxF = (int16x8_t*)&rxdataF_comp[(symbol*nb_rb*12)];
   int16x8_t *ch_mag,*ch_magb,xmm1,xmm2;
 #endif
@@ -799,7 +799,7 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
   ch_mag = (__m128i*)&dl_ch_mag[(symbol*nb_rb*12)];
   ch_magb = (__m128i*)&dl_ch_magb[(symbol*nb_rb*12)];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   ch_mag = (int16x8_t*)&dl_ch_mag[(symbol*nb_rb*12)];
   ch_magb = (int16x8_t*)&dl_ch_magb[(symbol*nb_rb*12)];
 #endif
@@ -824,7 +824,7 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
     xmm1 = _mm_subs_epi16(ch_mag[i],xmm1);
     xmm2 = _mm_abs_epi16(xmm1);
     xmm2 = _mm_subs_epi16(ch_magb[i],xmm2);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     xmm1 = vabsq_s16(rxF[i]);
     xmm1 = vsubq_s16(ch_mag[i],xmm1);
     xmm2 = vabsq_s16(xmm1);
@@ -850,7 +850,7 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,1);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,0);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,1);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,0);
     llr2[3] = vgetq_lane_s16(xmm1,1);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,0);//((short *)&xmm2)[j];
@@ -865,7 +865,7 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,3);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,2);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,3);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,2);
     llr2[3] = vgetq_lane_s16(xmm1,3);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,2);//((short *)&xmm2)[j];
@@ -880,7 +880,7 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,5);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,4);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,5);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,4);
     llr2[3] = vgetq_lane_s16(xmm1,5);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,4);//((short *)&xmm2)[j];
@@ -894,7 +894,7 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
     llr2[3] = _mm_extract_epi16(xmm1,7);//((short *)&xmm1)[j+1];
     llr2[4] = _mm_extract_epi16(xmm2,6);//((short *)&xmm2)[j];
     llr2[5] = _mm_extract_epi16(xmm2,7);//((short *)&xmm2)[j+1];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     llr2[2] = vgetq_lane_s16(xmm1,6);
     llr2[3] = vgetq_lane_s16(xmm1,7);//((short *)&xmm1)[j+1];
     llr2[4] = vgetq_lane_s16(xmm2,6);//((short *)&xmm2)[j];
@@ -1241,7 +1241,7 @@ void nr_qpsk_qpsk(short *stream0_in,
   __m128i *stream1_128i_in = (__m128i *)stream1_in;
   __m128i *stream0_128i_out = (__m128i *)stream0_out;
   __m128i ONE_OVER_SQRT_8 = _mm_set1_epi16(23170); //round(2^16/sqrt(8))
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rho01_128i = (int16x8_t *)rho01;
   int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in;
   int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in;
@@ -1277,7 +1277,7 @@ void nr_qpsk_qpsk(short *stream0_in,
     // divide by sqrt(8), no shift needed ONE_OVER_SQRT_8 = Q1.16
     rho_rpi = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_8);
     rho_rmi = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_8);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 
 #endif
@@ -1301,7 +1301,7 @@ void nr_qpsk_qpsk(short *stream0_in,
 
     y0r_over2  = _mm_srai_epi16(y0r,1);   // divide by 2
     y0i_over2  = _mm_srai_epi16(y0i,1);   // divide by 2
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 
 #endif
@@ -1503,7 +1503,7 @@ void nr_qpsk_qam16(int16_t *stream0_in,
   __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15)
   __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15)
   __m128i ch_mag_int __attribute__((aligned(16)));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rho01_128i = (int16x8_t *)rho01;
   int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in;
   int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in;
@@ -1678,7 +1678,7 @@ void nr_qpsk_qam16(int16_t *stream0_in,
     if (i<((length>>1) - 1)) // false if only 2 REs remain
       stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -1791,7 +1791,7 @@ void nr_qpsk_qam64(short *stream0_in,
   __m128i ch_mag_int_with_sigma2;
   __m128i two_ch_mag_int_with_sigma2;
   __m128i three_ch_mag_int_with_sigma2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -1976,7 +1976,7 @@ void nr_qpsk_qam64(short *stream0_in,
     if (i<((length>>1) - 1)) // false if only 2 REs remain
       stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -2052,7 +2052,7 @@ void nr_qam16_qpsk(short *stream0_in,
   __m128i ch_mag_over_10;
   __m128i ch_mag_over_2;
   __m128i ch_mag_9_over_10;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -2443,7 +2443,7 @@ void nr_qam16_qpsk(short *stream0_in,
     stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
     stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -2561,7 +2561,7 @@ void nr_qam16_qam16(short *stream0_in,
   __m128i ch_mag_over_10;
   __m128i ch_mag_over_2;
   __m128i ch_mag_9_over_10;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -2995,7 +2995,7 @@ void nr_qam16_qam16(short *stream0_in,
     stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2);
     stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
     stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -3112,7 +3112,7 @@ void nr_qam16_qam64(int16_t *stream0_in,
   __m128i two_ch_mag_int_with_sigma2;
   __m128i three_ch_mag_int_with_sigma2;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   int i;
@@ -3614,7 +3614,7 @@ void nr_qam16_qam64(int16_t *stream0_in,
     stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2);
     stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3);
     stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -3795,7 +3795,7 @@ void nr_qam64_qpsk(int16_t *stream0_in,
   __m128i  y0i_three_over_sqrt_21;
   __m128i  y0i_five_over_sqrt_21;
   __m128i  y0i_seven_over_sqrt_21;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -5180,7 +5180,7 @@ void nr_qam64_qpsk(int16_t *stream0_in,
     stream0_out[j + 45] = ((short *)&y0i)[7];
     stream0_out[j + 46] = ((short *)&y1i)[7];
     stream0_out[j + 47] = ((short *)&y2i)[7];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -5320,7 +5320,7 @@ void nr_qam64_qam16(short *stream0_in,
   __m128i  y0i_five_over_sqrt_21;
   __m128i  y0i_seven_over_sqrt_21;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   int i,j;
@@ -6720,7 +6720,7 @@ void nr_qam64_qam16(short *stream0_in,
     stream0_out[j + 46] = ((short *)&y1i)[7];
     stream0_out[j + 47] = ((short *)&y2i)[7];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
   }
@@ -6867,7 +6867,7 @@ void qam64_qam64(short *stream0_in,
   __m128i ch_mag_int_with_sigma2;
   __m128i two_ch_mag_int_with_sigma2;
   __m128i three_ch_mag_int_with_sigma2;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -8531,7 +8531,7 @@ void qam64_qam64(short *stream0_in,
     stream0_out[j + 46] = ((short *)&y1i)[7];
     stream0_out[j + 47] = ((short *)&y2i)[7];
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 #endif
 
@@ -8593,8 +8593,6 @@ int nr_dlsch_64qam_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
              llr16,
              pllr_symbol);*/
 
-#ifdef __AVX2__
-
   // Round length up to multiple of 16 words
   uint32_t len256i = ((len+16)>>4)*16;
   int32_t *rxF_256i      = (int32_t*) malloc16_clear(len256i*4);
@@ -8633,16 +8631,6 @@ int nr_dlsch_64qam_64qam_llr(NR_DL_FRAME_PARMS *frame_parms,
   free16(ch_mag_i_256i, sizeof(ch_mag_i_256i));
   free16(rho_256i, sizeof(rho_256i));
 
-#else
-  qam64_qam64((short *)rxF,
-              (short *)rxF_i,
-              (short *)ch_mag,
-              (short *)ch_mag_i,
-              (short *)llr16,
-              (short *)rho,
-              len);
-#endif
-
   llr16 += (6*len);
   //*llr16p = (short *)llr16;
 
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c b/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c
index aa2d6171129db8bd779313fc1bec637701cda737..efaf386fe1e7cfe53efa5981073a072503cee33a 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c
@@ -298,7 +298,8 @@ int nr_initial_sync(UE_nr_rxtx_proc_t *proc,
                               proc,
                               i,
                               0,
-                              is*fp->samples_per_frame+ue->ssb_offset);
+                              is*fp->samples_per_frame+ue->ssb_offset,
+                              false);
 
 #ifdef DEBUG_INITIAL_SYNCH
       LOG_I(PHY,"Calling sss detection (normal CP)\n");
@@ -339,6 +340,7 @@ int nr_initial_sync(UE_nr_rxtx_proc_t *proc,
       }
 
       if (ret == 0) {
+
         // sync at symbol ue->symbol_offset
         // computing the offset wrt the beginning of the frame
         int mu = fp->numerology_index;
@@ -529,36 +531,33 @@ int nr_initial_sync(UE_nr_rxtx_proc_t *proc,
   if (sa==1 && ret==0) {
     bool dec = false;
     int gnb_id = 0; //FIXME
-    int coreset_nb_rb=0;
-    int coreset_start_rb=0;
 
     // Hold the channel estimates in frequency domain.
     int32_t pdcch_est_size = ((((fp->symbols_per_slot*(fp->ofdm_symbol_size+LTE_CE_FILTER_LENGTH))+15)/16)*16);
     __attribute__ ((aligned(16))) int32_t pdcch_dl_ch_estimates[4*fp->nb_antennas_rx][pdcch_est_size];
 
-
     for(int n_ss = 0; n_ss<phy_pdcch_config.nb_search_space; n_ss++) {
+      proc->nr_slot_rx = phy_pdcch_config.slot; // setting PDCCH slot to proc
       uint8_t nb_symb_pdcch = phy_pdcch_config.pdcch_config[n_ss].coreset.duration;
       int start_symb = phy_pdcch_config.pdcch_config[n_ss].coreset.StartSymbolIndex;
-      get_coreset_rballoc(phy_pdcch_config.pdcch_config[n_ss].coreset.frequency_domain_resource,&coreset_nb_rb,&coreset_start_rb);
       for (uint16_t l=start_symb; l<start_symb+nb_symb_pdcch; l++) {
         nr_slot_fep_init_sync(ue,
                               proc,
                               l, // the UE PHY has no notion of the symbols to be monitored in the search space
                               phy_pdcch_config.slot,
-                              is*fp->samples_per_frame+phy_pdcch_config.sfn*fp->samples_per_frame+ue->rx_offset);
-
-        if (coreset_nb_rb > 0)
-          nr_pdcch_channel_estimation(ue,
-                                      proc,
-                                      0,
-                                      phy_pdcch_config.slot,
-                                      l,
-                                      fp->Nid_cell,
-                                      fp->first_carrier_offset+(phy_pdcch_config.pdcch_config[n_ss].BWPStart + coreset_start_rb)*12,
-                                      coreset_nb_rb,
-                                      pdcch_est_size,
-                                      pdcch_dl_ch_estimates);
+                              is*fp->samples_per_frame+phy_pdcch_config.sfn*fp->samples_per_frame+ue->rx_offset,
+                              true);
+
+        nr_pdcch_channel_estimation(ue,
+                                    proc,
+                                    0,
+                                    phy_pdcch_config.slot,
+                                    l,
+                                    &phy_pdcch_config.pdcch_config[n_ss].coreset,
+                                    fp->first_carrier_offset,
+                                    phy_pdcch_config.pdcch_config[n_ss].BWPStart,
+                                    pdcch_est_size,
+                                    pdcch_dl_ch_estimates);
 
       }
       int  dci_cnt = nr_ue_pdcch_procedures(gnb_id, ue, proc, pdcch_est_size, pdcch_dl_ch_estimates, &phy_pdcch_config, n_ss);
@@ -575,7 +574,8 @@ int nr_initial_sync(UE_nr_rxtx_proc_t *proc,
                                   proc,
                                   m,
                                   phy_pdcch_config.slot,  // same slot and offset as pdcch
-                                  is*fp->samples_per_frame+phy_pdcch_config.sfn*fp->samples_per_frame+ue->rx_offset);
+                                  is*fp->samples_per_frame+phy_pdcch_config.sfn*fp->samples_per_frame+ue->rx_offset,
+                                  true);
           }
 
           int ret = nr_ue_pdsch_procedures(ue,
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c b/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c
index d7b1c93e1d78eabe56009b2f0016a00b32bfb9ba..d26aaca3330b100bc16aed07959737642753f2d0 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c
@@ -197,7 +197,7 @@ int nr_pbch_channel_level(struct complex16 dl_ch_estimates_ext[][PBCH_MAX_RE_PER
 #if defined(__x86_64__) || defined(__i386__)
   __m128i avg128;
   __m128i *dl_ch128;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t avg128;
   int16x8_t *dl_ch128;
 #endif
@@ -208,7 +208,7 @@ int nr_pbch_channel_level(struct complex16 dl_ch_estimates_ext[][PBCH_MAX_RE_PER
 #if defined(__x86_64__) || defined(__i386__)
     avg128 = _mm_setzero_si128();
     dl_ch128=(__m128i *)dl_ch_estimates_ext[aarx];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     avg128 = vdupq_n_s32(0);
     dl_ch128=(int16x8_t *)dl_ch_estimates_ext[aarx];
 #endif
@@ -218,7 +218,7 @@ int nr_pbch_channel_level(struct complex16 dl_ch_estimates_ext[][PBCH_MAX_RE_PER
       avg128 = _mm_add_epi32(avg128,_mm_madd_epi16(dl_ch128[0],dl_ch128[0]));
       avg128 = _mm_add_epi32(avg128,_mm_madd_epi16(dl_ch128[1],dl_ch128[1]));
       avg128 = _mm_add_epi32(avg128,_mm_madd_epi16(dl_ch128[2],dl_ch128[2]));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       abort();
       // to be filled in
 #endif
@@ -271,7 +271,7 @@ void nr_pbch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
   int i, nb_rb=6;
 #if defined(__x86_64__) || defined(__i386__)
   __m128i *rxdataF_comp128_0,*rxdataF_comp128_1;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *rxdataF_comp128_0,*rxdataF_comp128_1;
 #endif
   symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
@@ -280,7 +280,7 @@ void nr_pbch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
 #if defined(__x86_64__) || defined(__i386__)
     rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[0][symbol_mod*6*12];
     rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[1][symbol_mod*6*12];
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][symbol_mod*6*12];
     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][symbol_mod*6*12];
 #endif
@@ -289,7 +289,7 @@ void nr_pbch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
     for (i=0; i<nb_rb*3; i++) {
 #if defined(__x86_64__) || defined(__i386__)
       rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
       rxdataF_comp128_0[i] = vhaddq_s16(rxdataF_comp128_0[i],rxdataF_comp128_1[i]);
 #endif
     }
@@ -422,9 +422,9 @@ int nr_rx_pbch( PHY_VARS_NR_UE *ue,
     symbol_offset=0;
 
 #ifdef DEBUG_PBCH
-  //printf("address dataf %p",nr_ue_common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF);
+  //printf("address dataf %p",nr_ue_common_vars->rxdataF);
   write_output("rxdataF0_pbch.m","rxF0pbch",
-               &nr_ue_common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[0][(symbol_offset+1)*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size*3,1,1);
+               &nr_ue_common_vars->rxdataF[0][(symbol_offset+1)*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size*3,1,1);
 #endif
   // symbol refers to symbol within SSB. symbol_offset is the offset of the SSB wrt start of slot
   double log2_maxh;
@@ -434,7 +434,7 @@ int nr_rx_pbch( PHY_VARS_NR_UE *ue,
     __attribute__ ((aligned(32))) struct complex16 rxdataF_ext[frame_parms->nb_antennas_rx][PBCH_MAX_RE_PER_SYMBOL];
     __attribute__ ((aligned(32))) struct complex16 dl_ch_estimates_ext[frame_parms->nb_antennas_rx][PBCH_MAX_RE_PER_SYMBOL];
     memset(dl_ch_estimates_ext,0, sizeof  dl_ch_estimates_ext);
-    nr_pbch_extract(nr_ue_common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF,
+    nr_pbch_extract(nr_ue_common_vars->rxdataF,
                     estimateSz,
                     dl_ch_estimates,
                     rxdataF_ext,
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_prach.c b/openair1/PHY/NR_UE_TRANSPORT/nr_prach.c
index 8437b8f8f3782eda710138cffe054c74ea02a40f..f56f01f764daefbb826d8cf5d67eab2a63a39c9a 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_prach.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_prach.c
@@ -67,7 +67,8 @@ int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, int frame, uint8_t
   uint8_t Mod_id, fd_occasion, preamble_index, restricted_set, not_found;
   uint16_t rootSequenceIndex, prach_fmt_id, NCS, *prach_root_sequence_map, preamble_offset = 0;
   uint16_t preamble_shift = 0, preamble_index0, n_shift_ra, n_shift_ra_bar, d_start=INT16_MAX, numshift, N_ZC, u, offset, offset2, first_nonzero_root_idx;
-  int16_t prach_tmp[(4688+4*24576)*4*2] __attribute__((aligned(32)));
+  int16_t prach_tmp[(4688+4*24576)*4*2] __attribute__((aligned(32))) = {0};
+  int16_t prachF_tmp[(4688+4*24576)*4*2] __attribute__((aligned(32))) = {0};
 
   int16_t Ncp = 0, amp, *prach, *prach2, *prachF, *Xu;
   int32_t Xu_re, Xu_im;
@@ -79,8 +80,8 @@ int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, int frame, uint8_t
   dftlen                  = 0;
   first_nonzero_root_idx  = 0;
   prach                   = prach_tmp;
-  prachF                  = ue->prach_vars[gNB_id]->prachF;
   amp                     = ue->prach_vars[gNB_id]->amp;
+  prachF                  = prachF_tmp;
   Mod_id                  = ue->Mod_id;
   prach_sequence_length   = nrUE_config->prach_config.prach_sequence_length;
   N_ZC                    = (prach_sequence_length == 0) ? 839:139;
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
index 82b56754ec14cf2ba426465c1010b4382b7e2a87..d0ecc50fe13a1af15888008a769a16ed22c5a30a 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
@@ -1009,7 +1009,6 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
                                unsigned char harq_pid,
                                uint32_t frame,
                                uint8_t slot,
-                               uint8_t thread_id,
                                int gNB_id);
 
 
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h
index 96f31a1624be335f01edb38492edc5ad05dd34d5..dc9ca67de4c90393a1cb260671db79cd7dc09d04 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h
@@ -259,6 +259,9 @@ typedef struct {
   uint16_t dlDmrsScramblingId;
   /// PDU BITMAP 
   uint16_t pduBitmap;
+  /// Last index of LLR buffer that contains information.
+  /// Used for computing LDPC decoder R
+  int llrLen;
 } NR_DL_UE_HARQ_t;
 
 typedef struct {
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
index 2e8344acfe6e1d7eb1cbe06a0d8a9088137cff64..fad9019835edd26fc34b0519e353ae338e3cce57 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
@@ -110,7 +110,6 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
                             unsigned char harq_pid,
                             uint32_t frame,
                             uint8_t slot,
-                            uint8_t thread_id,
                             int gNB_id) {
 
   LOG_D(PHY,"nr_ue_ulsch_procedures hard_id %d %d.%d\n",harq_pid,frame,slot);
@@ -127,7 +126,7 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
   int      N_PRB_oh = 0; // higher layer (RRC) parameter xOverhead in PUSCH-ServingCellConfig
   uint16_t number_dmrs_symbols = 0;
 
-  NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[thread_id][gNB_id];
+  NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[gNB_id];
   NR_UL_UE_HARQ_t *harq_process_ul_ue = ulsch_ue->harq_processes[harq_pid];
   nfapi_nr_ue_pusch_pdu_t *pusch_pdu = &harq_process_ul_ue->pusch_pdu;
 
@@ -573,7 +572,7 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
   }// port loop
 
   NR_UL_UE_HARQ_t *harq_process_ulsch=NULL;
-  harq_process_ulsch = UE->ulsch[thread_id][gNB_id]->harq_processes[harq_pid];
+  harq_process_ulsch = UE->ulsch[gNB_id]->harq_processes[harq_pid];
   harq_process_ulsch->status = SCH_IDLE;
 
   for (int nl = 0; nl < Nl; nl++) {
diff --git a/openair1/PHY/NR_UE_TRANSPORT/pss_nr.c b/openair1/PHY/NR_UE_TRANSPORT/pss_nr.c
index d2034f3abf45d567d1915be0ce40736227bd613f..e73671823704f40afabc7a18c42c59b212099f08 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/pss_nr.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/pss_nr.c
@@ -600,24 +600,6 @@ int pss_synchro_nr(PHY_VARS_NR_UE *PHY_vars_UE, int is, int rate_change)
 }
 
 
-static inline int abs32(int x)
-{
-  return (((int)((short*)&x)[0])*((int)((short*)&x)[0]) + ((int)((short*)&x)[1])*((int)((short*)&x)[1]));
-}
-
-static inline int64_t abs64(int64_t x)
-{
-  return (((int64_t)((int32_t*)&x)[0])*((int64_t)((int32_t*)&x)[0]) + ((int64_t)((int32_t*)&x)[1])*((int64_t)((int32_t*)&x)[1]));
-}
-
-static inline double angle64(int64_t x)
-{
-
-  double re=((int32_t*)&x)[0];
-  double im=((int32_t*)&x)[1];
-  return (atan2(im,re));
-}
-
 
 /*******************************************************************
 *
@@ -726,7 +708,7 @@ int pss_search_time_nr(int **rxdata, ///rx data in time domain
                                 (short*)&(rxdata[ar][n+is*frame_parms->samples_per_frame]),
                                 frame_parms->ofdm_symbol_size,
                                 shift);
-        pss_corr_ue += abs64(result);
+        pss_corr_ue += squaredMod(*(c32_t*)&result);
         //((short*)pss_corr_ue[pss_index])[2*n] += ((short*) &result)[0];   /* real part */
         //((short*)pss_corr_ue[pss_index])[2*n+1] += ((short*) &result)[1]; /* imaginary part */
         //((short*)&synchro_out)[0] += ((int*) &result)[0];               /* real part */
diff --git a/openair1/PHY/NR_UE_TRANSPORT/sss_nr.c b/openair1/PHY/NR_UE_TRANSPORT/sss_nr.c
index ff8d5d42ff803118d1b2650d2044bd1647e3af73..53191f09462d46d5d411f09a0409e60516ab685c 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/sss_nr.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/sss_nr.c
@@ -332,7 +332,7 @@ int do_pss_sss_extract_nr(PHY_VARS_NR_UE *ue,
     pss_symbol = 0;
     sss_symbol = SSS_SYMBOL_NB-PSS_SYMBOL_NB;
 
-    rxdataF  =  ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF;
+    rxdataF  =  ue->common_vars.rxdataF;
 
     unsigned int ofdm_symbol_size = frame_parms->ofdm_symbol_size;
 
@@ -444,8 +444,8 @@ int rx_sss_nr(PHY_VARS_NR_UE *ue, UE_nr_rxtx_proc_t *proc, int32_t *tot_metric,
 #ifdef DEBUG_PLOT_SSS
 
   write_output("rxsig0.m","rxs0",&ue->common_vars.rxdata[0][0],ue->frame_parms.samples_per_subframe,1,1);
-  write_output("rxdataF0_pss.m","rxF0_pss",&ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF[0][0],frame_parms->ofdm_symbol_size,1,1);
-  write_output("rxdataF0_sss.m","rxF0_sss",&ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF[0][(SSS_SYMBOL_NB-PSS_SYMBOL_NB)*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
+  write_output("rxdataF0_pss.m","rxF0_pss",&ue->common_vars.rxdataF[0][0],frame_parms->ofdm_symbol_size,1,1);
+  write_output("rxdataF0_sss.m","rxF0_sss",&ue->common_vars.rxdataF[0][(SSS_SYMBOL_NB-PSS_SYMBOL_NB)*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
   write_output("pss_ext.m","pss_ext",pss_ext,LENGTH_PSS_NR,1,1);
 
 #endif
diff --git a/openair1/PHY/TOOLS/Makefile b/openair1/PHY/TOOLS/Makefile
index a59700bb7c058bc159cfda8befbb62e3c9370c9f..d7b14d501063edca65f689d9d37de6af79f4b195 100644
--- a/openair1/PHY/TOOLS/Makefile
+++ b/openair1/PHY/TOOLS/Makefile
@@ -1,15 +1,15 @@
 oai_dfts_sse4: oai_dfts.c
-	gcc-7 -O3 -std=gnu99 -msse4.1 -o oai_dfts_sse4 oai_dfts.c time_meas.c  ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
+	gcc -O3 -std=gnu99 -msse4.1 -o oai_dfts_sse4 oai_dfts.c time_meas.c  ../../SIMULATION/TOOLS/taus.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_HOME/sdr/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
 
 oai_dfts_avx2: oai_dfts.c
-	gcc -O2 -std=gnu99 -mavx2 -g -ggdb -o oai_dfts_avx2 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
+	gcc -O2 -std=gnu99 -mavx2 -g -ggdb -o oai_dfts_avx2 oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_HOME/sdr/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
 
 oai_dfts_avx2.s: oai_dfts.c
-	gcc -O2 -std=gnu99 -mavx2 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
+	gcc -O2 -std=gnu99 -mavx2 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_HOME/sdr/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
 
 
 oai_dfts_sse4.s: oai_dfts.c
-	gcc -O2 -std=gnu99 -msse4.1 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_TARGETS/ARCH/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
+	gcc -O2 -std=gnu99 -msse4.1 -S oai_dfts.c time_meas.c ../../SIMULATION/TOOLS/taus.c $$OPENAIR_HOME/common/utils/backtrace.c -I$$OPENAIR_HOME -I$$OPENAIR1_DIR -I$$OPENAIR_TARGETS -I$$OPENAIR_TARGETS/COMMON -I$$OPENAIR_HOME/sdr/COMMON -I$$OPENAIR2_DIR -I$$OPENAIR2_DIR/COMMON -I$$OPENAIR_HOME/common/utils -I$$OPENAIR_HOME/common/utils/T -I$$OPENAIR_HOME/common/utils/msc -I$$OPENAIR_HOME/nfapi/open-nFAPI/nfapi/public_inc -DMR_MAIN -DNB_ANTENNAS_RX=1 -lm -lpthread # -DD256STATS #-DD64STATS
 
 
 dft_cycles_avx2: oai_dfts_avx2
diff --git a/openair1/PHY/TOOLS/calibration_test.c b/openair1/PHY/TOOLS/calibration_test.c
index 650d5e420cacfe5d361f947922a6f2dc0b3de098..9e611b676690676191465a9fe34cf5fb29cafc5a 100644
--- a/openair1/PHY/TOOLS/calibration_test.c
+++ b/openair1/PHY/TOOLS/calibration_test.c
@@ -1,6 +1,6 @@
 #include <stdint.h>
 #include <openair1/PHY/impl_defs_top.h>
-#include <targets/ARCH/COMMON/common_lib.h>
+#include <sdr/COMMON/common_lib.h>
 #include <executables/softmodem-common.h>
 #include <openair1/PHY/TOOLS/calibration_scope.h>
 
diff --git a/openair1/PHY/TOOLS/cdot_prod.c b/openair1/PHY/TOOLS/cdot_prod.c
index d48517805317b7a279957560d0de5c8da4386252..2156d807f7cf58ba45ad8108065fbe219d3cfa29 100644
--- a/openair1/PHY/TOOLS/cdot_prod.c
+++ b/openair1/PHY/TOOLS/cdot_prod.c
@@ -113,7 +113,7 @@ int32_t dot_product(int16_t *x,
  
   return(result);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x4_t *x_128=(int16x4_t*)x;
   int16x4_t *y_128=(int16x4_t*)y;
   int32x4_t tmp_re,tmp_im;
@@ -233,7 +233,7 @@ int64_t dot_product64(int16_t *x,
   _m_empty();
  
   return(result);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x4_t *x_128=(int16x4_t*)x;
   int16x4_t *y_128=(int16x4_t*)y;
   int32x4_t tmp_re,tmp_im;
diff --git a/openair1/PHY/TOOLS/cmult_sv.c b/openair1/PHY/TOOLS/cmult_sv.c
index a596446a44ab2609afdda1ca05db823347892014..e699189ee8508eb45e4b0ce34c4bd46f8bab3184 100644
--- a/openair1/PHY/TOOLS/cmult_sv.c
+++ b/openair1/PHY/TOOLS/cmult_sv.c
@@ -22,7 +22,6 @@
 #include "PHY/sse_intrin.h"
 #include "tools_defs.h"
 
-
 void multadd_complex_vector_real_scalar(int16_t *x,
                                         int16_t alpha,
                                         int16_t *y,
@@ -77,7 +76,7 @@ void multadd_real_vector_complex_scalar(int16_t *x,
     _mm_storeu_si128(y_128++, _mm_adds_epi16(tmp, _mm_unpacklo_epi16(yr, yi)));
     const simd_q15_t tmp2 = _mm_loadu_si128(y_128);
     _mm_storeu_si128(y_128++, _mm_adds_epi16(tmp2, _mm_unpackhi_epi16(yr, yi)));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined (__aarch64__)
     int16x8x2_t yint;
     yint = vzipq_s16(yr,yi);
     *y_128 = adds_int16(*y_128, yint.val[0]);
@@ -89,7 +88,6 @@ void multadd_real_vector_complex_scalar(int16_t *x,
   }
 }
 
-
 void rotate_cpx_vector(c16_t *x,
                        c16_t *alpha,
                        c16_t *y,
@@ -101,27 +99,26 @@ void rotate_cpx_vector(c16_t *x,
   // N is the number of complex numbers
   // output_shift reduces the result of the multiplication by this number of bits
   //AssertFatal(N%8==0, "To be developped");
-#ifdef __AVX2__
   if ( (intptr_t)x%32 == 0  && !(intptr_t)y%32 == 0 && __builtin_cpu_supports("avx2")) {
     // output is 32 bytes aligned, but not the input
     
     const c16_t for_re={alpha->r, -alpha->i};
-    __m256i const alpha_for_real =  _mm256_set1_epi32(*(uint32_t*)&for_re);
+    __m256i const alpha_for_real =  simde_mm256_set1_epi32(*(uint32_t*)&for_re);
     const c16_t for_im={alpha->i, alpha->r};
-    __m256i const alpha_for_im= _mm256_set1_epi32(*(uint32_t*)&for_im);
+    __m256i const alpha_for_im= simde_mm256_set1_epi32(*(uint32_t*)&for_im);
     __m256i const perm_mask =
-      _mm256_set_epi8(31,30,23,22,29,28,21,20,27,26,19,18,25,24,17,16,
-                      15,14,7,6,13,12,5,4,11,10,3,2,9,8,1,0);
+      simde_mm256_set_epi8(31,30,23,22,29,28,21,20,27,26,19,18,25,24,17,16,
+			   15,14,7,6,13,12,5,4,11,10,3,2,9,8,1,0);
     __m256i* xd= (__m256i*)x;
     const __m256i *end=xd+N/8;
     for( __m256i* yd = (__m256i *)y; xd<end ; yd++, xd++) {
-      const __m256i xre = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_real),
-                                            output_shift);
-      const __m256i xim = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_im),
-                                            output_shift);
+      const __m256i xre = simde_mm256_srai_epi32(simde_mm256_madd_epi16(*xd,alpha_for_real),
+						 output_shift);
+      const __m256i xim = simde_mm256_srai_epi32(simde_mm256_madd_epi16(*xd,alpha_for_im),
+						 output_shift);
       // a bit faster than unpacklo+unpackhi+packs
-      const __m256i tmp=_mm256_packs_epi32(xre,xim);
-      *yd=_mm256_shuffle_epi8(tmp,perm_mask);
+      const __m256i tmp=simde_mm256_packs_epi32(xre,xim);
+      *yd=simde_mm256_shuffle_epi8(tmp,perm_mask);
     }
     c16_t* alpha16=(c16_t*) alpha, *yLast;
     yLast=((c16_t*)y)+(N/8)*8;
@@ -131,7 +128,6 @@ void rotate_cpx_vector(c16_t *x,
       *yLast=c16mulShift(*xTail,*alpha16,output_shift);
     }
   } else {
-#endif
     // Multiply elementwise two complex vectors of N elements
     // x        - input 1    in the format  |Re0  Im0 |,......,|Re(N-1) Im(N-1)|
     //            We assume x1 with a dynamic of 15 bit maximum
@@ -164,7 +160,7 @@ void rotate_cpx_vector(c16_t *x,
     ((int16_t *)&alpha_128)[5] = -alpha->i;
     ((int16_t *)&alpha_128)[6] = alpha->i;
     ((int16_t *)&alpha_128)[7] = alpha->r;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     int32x4_t shift;
     int32x4_t ab_re0,ab_re1,ab_im0,ab_im1,re32,im32;
     int16_t reflip[8]  __attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1};
@@ -196,7 +192,7 @@ void rotate_cpx_vector(c16_t *x,
 
       y_128[0] = _mm_packs_epi32(m2,m3);        // pack in 16bit integers with saturation [re im re im re im re im]
       //print_ints("y_128[0]=", &y_128[0]);
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
       ab_re0 = vmull_s16(((int16x4_t*)xd)[0],((int16x4_t*)&bconj)[0]);
       ab_re1 = vmull_s16(((int16x4_t*)xd)[1],((int16x4_t*)&bconj)[1]);
@@ -214,7 +210,6 @@ void rotate_cpx_vector(c16_t *x,
       y_128[0] = vcombine_s16(vmovn_s32(xtmp.val[0]),vmovn_s32(xtmp.val[1]));
 
 #endif
-
       xd+=4;
       y_128+=1;
     }
diff --git a/openair1/PHY/TOOLS/cmult_vv.c b/openair1/PHY/TOOLS/cmult_vv.c
index ce3ebec65fe0e29f4bd34e816f484902e7531112..41ed9c2e6c5b2f1500bcdd93713e7f977c718e4c 100644
--- a/openair1/PHY/TOOLS/cmult_vv.c
+++ b/openair1/PHY/TOOLS/cmult_vv.c
@@ -31,7 +31,7 @@ int16_t conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
 #define simdshort_q15_t __m64
 #define set1_int16(a) _mm_set1_epi16(a)
 #define setr_int16(a0, a1, a2, a3, a4, a5, a6, a7) _mm_setr_epi16(a0, a1, a2, a3, a4, a5, a6, a7 )
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 int16_t conjug[4]__attribute__((aligned(16))) = {-1,1,-1,1} ;
 #define simd_q15_t int16x8_t
 #define simdshort_q15_t int16x4_t
@@ -70,7 +70,7 @@ int mult_cpx_conj_vector(int16_t *x1,
   simd_q15_t tmp_re,tmp_im;
   simd_q15_t tmpy0,tmpy1;
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t tmp_re,tmp_im;
   int32x4_t tmp_re1,tmp_im1;
   int16x4x2_t tmpy;
@@ -99,7 +99,7 @@ int mult_cpx_conj_vector(int16_t *x1,
     else
       *y_128 += _mm_packs_epi32(tmpy0,tmpy1);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
     tmp_re  = vmull_s16(((simdshort_q15_t *)x1_128)[0], ((simdshort_q15_t*)x2_128)[0]);
     //tmp_re = [Re(x1[0])Re(x2[0]) Im(x1[0])Im(x2[0]) Re(x1[1])Re(x2[1]) Im(x1[1])Im(x2[1])]
@@ -227,7 +227,7 @@ int multadd_cpx_vector(int16_t *x1,
 #if defined(__x86_64__) || defined(__i386__)
   simd_q15_t tmp_re,tmp_im;
   simd_q15_t tmpy0,tmpy1;
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int32x4_t tmp_re,tmp_im;
   int32x4_t tmp_re1,tmp_im1;
   int16x4x2_t tmpy;
@@ -255,8 +255,8 @@ int multadd_cpx_vector(int16_t *x1,
     else
       *y_128 = _mm_adds_epi16(*y_128,_mm_packs_epi32(tmpy0,tmpy1));
     //print_shorts("*y_128:",&y_128[i]);
-#elif defined(__arm__)
-    msg("mult_cpx_vector not implemented for __arm__");
+#elif defined(__arm__) || defined(__aarch64__)
+    msg("mult_cpx_vector not implemented for __arm__ nor __aarch64__");
 #endif
     x1_128++;
     x2_128++;
diff --git a/openair1/PHY/TOOLS/nr_phy_scope.c b/openair1/PHY/TOOLS/nr_phy_scope.c
index 73f932457084e8025a28d69049aad426fe57124a..d1a47a299a85b5b0f0a5d78891a995678792fdd2 100644
--- a/openair1/PHY/TOOLS/nr_phy_scope.c
+++ b/openair1/PHY/TOOLS/nr_phy_scope.c
@@ -601,7 +601,7 @@ static void ueFreqWaterFall (scopeGraphData_t **data, OAIgraph_t *graph,PHY_VARS
   NR_DL_FRAME_PARMS *frame_parms=&phy_vars_ue->frame_parms;
   //use 1st antenna
   genericWaterFall(graph,
-                   (scopeSample_t *)phy_vars_ue->common_vars.common_vars_rx_data_per_thread[0].rxdataF[0],
+                   (scopeSample_t *)phy_vars_ue->common_vars.rxdataF[0],
                    frame_parms->samples_per_slot_wCP,
                    phy_vars_ue->frame_parms.slots_per_frame,
                    "X axis: one frame frequency" );
@@ -719,7 +719,7 @@ static void uePcchIQ  (scopeGraphData_t **data, OAIgraph_t *graph, PHY_VARS_NR_U
 }
 static void uePdschLLR  (scopeGraphData_t **data, OAIgraph_t *graph, PHY_VARS_NR_UE *phy_vars_ue, int eNB_id, int UE_id) {
   // PDSCH LLRs
-  if (!phy_vars_ue->pdsch_vars[0][eNB_id]->llr[0])
+  if (!phy_vars_ue->pdsch_vars[eNB_id]->llr[0])
     return;
 
   int num_re = 4500;
@@ -730,7 +730,7 @@ static void uePdschLLR  (scopeGraphData_t **data, OAIgraph_t *graph, PHY_VARS_NR
   int base=0;
 
   for (int thr=0 ; thr < RX_NB_TH_MAX ; thr ++ ) {
-    int16_t *pdsch_llr = (int16_t *) phy_vars_ue->pdsch_vars[thr][eNB_id]->llr[0]; // stream 0
+    int16_t *pdsch_llr = (int16_t *) phy_vars_ue->pdsch_vars[eNB_id]->llr[0]; // stream 0
 
     for (int i=0; i<coded_bits_per_codeword; i++) {
       llr[base+i] = (float) pdsch_llr[i];
@@ -746,7 +746,7 @@ static void uePdschLLR  (scopeGraphData_t **data, OAIgraph_t *graph, PHY_VARS_NR
 }
 static void uePdschIQ  (scopeGraphData_t **data, OAIgraph_t *graph, PHY_VARS_NR_UE *phy_vars_ue, int eNB_id, int UE_id) {
   // PDSCH I/Q of MF Output
-  if (!phy_vars_ue->pdsch_vars[0][eNB_id]->rxdataF_comp0[0])
+  if (!phy_vars_ue->pdsch_vars[eNB_id]->rxdataF_comp0[0])
     return;
 
   NR_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->frame_parms;
@@ -758,7 +758,7 @@ static void uePdschIQ  (scopeGraphData_t **data, OAIgraph_t *graph, PHY_VARS_NR_
   memset(Q+base, 0, sz*RX_NB_TH_MAX * sizeof(*Q));
 
   for (int thr=0 ; thr < RX_NB_TH_MAX ; thr ++ ) {
-    scopeSample_t *pdsch_comp = (scopeSample_t *) phy_vars_ue->pdsch_vars[thr][eNB_id]->rxdataF_comp0[0];
+    scopeSample_t *pdsch_comp = (scopeSample_t *) phy_vars_ue->pdsch_vars[eNB_id]->rxdataF_comp0[0];
 
     for (int s=0; s<sz; s++) {
       I[s+base] += pdsch_comp[s].r;
diff --git a/openair1/PHY/TOOLS/oai_dfts.c b/openair1/PHY/TOOLS/oai_dfts.c
index cbedfd815a47e7001762c3377c17eb933a9e8dc5..b14b64b1a806b63a5adb01e82d80a2f3e6d5ea5d 100644
--- a/openair1/PHY/TOOLS/oai_dfts.c
+++ b/openair1/PHY/TOOLS/oai_dfts.c
@@ -108,22 +108,21 @@ static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
   *im32 = _mm_add_epi32(*im32,cmac_tmp_im32);
 }
 
-#ifdef __AVX2__
 static inline void cmac_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
 static inline void cmac_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
 {
 
   __m256i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32;
-  __m256i imshuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  __m256i imshuffle = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
-  cmac_tmp       = _mm256_sign_epi16(b,*(__m256i*)reflip);
-  cmac_tmp_re32  = _mm256_madd_epi16(a,cmac_tmp);
+  cmac_tmp       = simde_mm256_sign_epi16(b,*(__m256i*)reflip);
+  cmac_tmp_re32  = simde_mm256_madd_epi16(a,cmac_tmp);
 
-  cmac_tmp       = _mm256_shuffle_epi8(b,imshuffle);
-  cmac_tmp_im32  = _mm256_madd_epi16(cmac_tmp,a);
+  cmac_tmp       = simde_mm256_shuffle_epi8(b,imshuffle);
+  cmac_tmp_im32  = simde_mm256_madd_epi16(cmac_tmp,a);
 
-  *re32 = _mm256_add_epi32(*re32,cmac_tmp_re32);
-  *im32 = _mm256_add_epi32(*im32,cmac_tmp_im32);
+  *re32 = simde_mm256_add_epi32(*re32,cmac_tmp_re32);
+  *im32 = simde_mm256_add_epi32(*im32,cmac_tmp_im32);
 }
 
 static inline void cmacc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
@@ -131,21 +130,19 @@ static inline void cmacc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
 {
 
   __m256i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32;
-  __m256i imshuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  __m256i imshuffle = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
-  cmac_tmp_re32   = _mm256_madd_epi16(a,b);
+  cmac_tmp_re32   = simde_mm256_madd_epi16(a,b);
 
 
-  cmac_tmp        = _mm256_sign_epi16(b,*(__m256i*)reflip);
-  cmac_tmp        = _mm256_shuffle_epi8(b,imshuffle);
-  cmac_tmp_im32   = _mm256_madd_epi16(cmac_tmp,a);
+  cmac_tmp        = simde_mm256_sign_epi16(b,*(__m256i*)reflip);
+  cmac_tmp        = simde_mm256_shuffle_epi8(b,imshuffle);
+  cmac_tmp_im32   = simde_mm256_madd_epi16(cmac_tmp,a);
 
-  *re32 = _mm256_add_epi32(*re32,cmac_tmp_re32);
-  *im32 = _mm256_add_epi32(*im32,cmac_tmp_im32);
+  *re32 = simde_mm256_add_epi32(*re32,cmac_tmp_re32);
+  *im32 = simde_mm256_add_epi32(*im32,cmac_tmp_im32);
 }
 
-#endif
-
 static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
 
 static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
@@ -162,24 +159,21 @@ static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
 
 }
 
-#ifdef __AVX2__
 static inline void cmult_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
 
 static inline void cmult_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
 {
 
   register __m256i mmtmpb;
-  __m256i const perm_mask = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  __m256i const perm_mask = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
-  mmtmpb    = _mm256_sign_epi16(b,*(__m256i*)reflip);
-  *re32     = _mm256_madd_epi16(a,mmtmpb);
-  mmtmpb    = _mm256_shuffle_epi8(b,perm_mask);
-  *im32     = _mm256_madd_epi16(a,mmtmpb);
+  mmtmpb    = simde_mm256_sign_epi16(b,*(__m256i*)reflip);
+  *re32     = simde_mm256_madd_epi16(a,mmtmpb);
+  mmtmpb    = simde_mm256_shuffle_epi8(b,perm_mask);
+  *im32     = simde_mm256_madd_epi16(a,mmtmpb);
 
 }
 
-#endif
-
 static inline void cmultc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
 
 static inline void cmultc(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
@@ -194,24 +188,21 @@ static inline void cmultc(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
 
 }
 
-#ifdef __AVX2__
 static inline void cmultc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
 
 static inline void cmultc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
 {
 
   register __m256i mmtmpb;
-  __m256i const perm_mask = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  __m256i const perm_mask = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
-  *re32     = _mm256_madd_epi16(a,b);
-  mmtmpb    = _mm256_sign_epi16(b,*(__m256i*)reflip);
-  mmtmpb    = _mm256_shuffle_epi8(mmtmpb,perm_mask);
-  *im32     = _mm256_madd_epi16(a,mmtmpb);
+  *re32     = simde_mm256_madd_epi16(a,b);
+  mmtmpb    = simde_mm256_sign_epi16(b,*(__m256i*)reflip);
+  mmtmpb    = simde_mm256_shuffle_epi8(mmtmpb,perm_mask);
+  *im32     = simde_mm256_madd_epi16(a,mmtmpb);
 
 }
 
-#endif
-
 static inline __m128i cpack(__m128i xre,__m128i xim) __attribute__((always_inline));
 
 static inline __m128i cpack(__m128i xre,__m128i xim)
@@ -225,7 +216,6 @@ static inline __m128i cpack(__m128i xre,__m128i xim)
 
 }
 
-#ifdef __AVX2__
 static inline __m256i cpack_256(__m256i xre,__m256i xim) __attribute__((always_inline));
 
 static inline __m256i cpack_256(__m256i xre,__m256i xim)
@@ -233,14 +223,12 @@ static inline __m256i cpack_256(__m256i xre,__m256i xim)
 
   register __m256i cpack_tmp1,cpack_tmp2;
 
-  cpack_tmp1 = _mm256_unpacklo_epi32(xre,xim);
-  cpack_tmp2 = _mm256_unpackhi_epi32(xre,xim);
-  return(_mm256_packs_epi32(_mm256_srai_epi32(cpack_tmp1,15),_mm256_srai_epi32(cpack_tmp2,15)));
+  cpack_tmp1 = simde_mm256_unpacklo_epi32(xre,xim);
+  cpack_tmp2 = simde_mm256_unpackhi_epi32(xre,xim);
+  return(simde_mm256_packs_epi32(simde_mm256_srai_epi32(cpack_tmp1,15),simde_mm256_srai_epi32(cpack_tmp2,15)));
 
 }
 
-#endif
-
 static inline void packed_cmult(__m128i a,__m128i b, __m128i *c) __attribute__((always_inline));
 
 static inline void packed_cmult(__m128i a,__m128i b, __m128i *c)
@@ -252,7 +240,6 @@ static inline void packed_cmult(__m128i a,__m128i b, __m128i *c)
 
 }
 
-#ifdef __AVX2__
 static inline void packed_cmult_256(__m256i a,__m256i b, __m256i *c) __attribute__((always_inline));
 
 static inline void packed_cmult_256(__m256i a,__m256i b, __m256i *c)
@@ -263,7 +250,6 @@ static inline void packed_cmult_256(__m256i a,__m256i b, __m256i *c)
   *c = cpack_256(cre,cim);
 
 }
-#endif
 
 static inline void packed_cmultc(__m128i a,__m128i b, __m128i *c) __attribute__((always_inline));
 
@@ -277,7 +263,6 @@ static inline void packed_cmultc(__m128i a,__m128i b, __m128i *c)
 
 }
 
-#ifdef __AVX2__
 static inline void packed_cmultc_256(__m256i a,__m256i b, __m256i *c) __attribute__((always_inline));
 
 static inline void packed_cmultc_256(__m256i a,__m256i b, __m256i *c)
@@ -289,7 +274,6 @@ static inline void packed_cmultc_256(__m256i a,__m256i b, __m256i *c)
   *c = cpack_256(cre,cim);
 
 }
-#endif
 
 static inline __m128i packed_cmult2(__m128i a,__m128i b,__m128i b2) __attribute__((always_inline));
 
@@ -306,7 +290,6 @@ static inline __m128i packed_cmult2(__m128i a,__m128i b,__m128i b2)
 
 }
 
-#ifdef __AVX2__
 static inline __m256i packed_cmult2_256(__m256i a,__m256i b,__m256i b2) __attribute__((always_inline));
 
 static inline __m256i packed_cmult2_256(__m256i a,__m256i b,__m256i b2)
@@ -315,15 +298,14 @@ static inline __m256i packed_cmult2_256(__m256i a,__m256i b,__m256i b2)
 
   register __m256i cre,cim;
 
-  cre       = _mm256_madd_epi16(a,b);
-  cim       = _mm256_madd_epi16(a,b2);
+  cre       = simde_mm256_madd_epi16(a,b);
+  cim       = simde_mm256_madd_epi16(a,b2);
 
   return(cpack_256(cre,cim));
 
 }
-#endif
 
-#elif defined (__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void cmac(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline));
 static inline void cmac(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32)
 {
@@ -477,7 +459,7 @@ static inline int16x8_t packed_cmult2(int16x8_t a,int16x8_t b,  int16x8_t b2)
 
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 const static int16_t W0s[16]__attribute__((aligned(32))) = {32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0};
 
@@ -498,7 +480,6 @@ const __m128i *W25 = (__m128i *)W25s;
 const __m128i *W35 = (__m128i *)W35s;
 const __m128i *W45 = (__m128i *)W45s;
 
-#ifdef __AVX2__
 const __m256i *W0_256 =  (__m256i *)W0s;
 const __m256i *W13_256 = (__m256i *)W13s;
 const __m256i *W23_256 = (__m256i *)W23s;
@@ -506,9 +487,8 @@ const __m256i *W15_256 = (__m256i *)W15s;
 const __m256i *W25_256 = (__m256i *)W25s;
 const __m256i *W35_256 = (__m256i *)W35s;
 const __m256i *W45_256 = (__m256i *)W45s;
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 int16x8_t *W0  = (int16x8_t *)W0s;
 int16x8_t *W13 = (int16x8_t *)W13s;
 int16x8_t *W23 = (int16x8_t *)W23s;
@@ -516,7 +496,8 @@ int16x8_t *W15 = (int16x8_t *)W15s;
 int16x8_t *W25 = (int16x8_t *)W25s;
 int16x8_t *W35 = (int16x8_t *)W35s;
 int16x8_t *W45 = (int16x8_t *)W45s;
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
+
 const static int16_t dft_norm_table[16] = {9459,  //12
 					   6689,//24
 					   5461,//36
@@ -563,8 +544,6 @@ static inline void bfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m12
   *y1 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2);
 }
 
-#ifdef __AVX2__
-
 static inline void bfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)__attribute__((always_inline));
 
 static inline void bfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)
@@ -576,24 +555,22 @@ static inline void bfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,_
   cmult_256(*(x0),*(W0_256),&x0r_2,&x0i_2);
   cmult_256(*(x1),*(tw),&x1r_2,&x1i_2);
 
-  dy0r = _mm256_srai_epi32(_mm256_add_epi32(x0r_2,x1r_2),15);
-  dy1r = _mm256_srai_epi32(_mm256_sub_epi32(x0r_2,x1r_2),15);
-  dy0i = _mm256_srai_epi32(_mm256_add_epi32(x0i_2,x1i_2),15);
+  dy0r = simde_mm256_srai_epi32(simde_mm256_add_epi32(x0r_2,x1r_2),15);
+  dy1r = simde_mm256_srai_epi32(simde_mm256_sub_epi32(x0r_2,x1r_2),15);
+  dy0i = simde_mm256_srai_epi32(simde_mm256_add_epi32(x0i_2,x1i_2),15);
   //  printf("y0i %d\n",((int16_t *)y0i)[0]);
-  dy1i = _mm256_srai_epi32(_mm256_sub_epi32(x0i_2,x1i_2),15);
+  dy1i = simde_mm256_srai_epi32(simde_mm256_sub_epi32(x0i_2,x1i_2),15);
 
-  bfly2_tmp1 = _mm256_unpacklo_epi32(dy0r,dy0i);
-  bfly2_tmp2 = _mm256_unpackhi_epi32(dy0r,dy0i);
-  *y0 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+  bfly2_tmp1 = simde_mm256_unpacklo_epi32(dy0r,dy0i);
+  bfly2_tmp2 = simde_mm256_unpackhi_epi32(dy0r,dy0i);
+  *y0 = simde_mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
 
-  bfly2_tmp1 = _mm256_unpacklo_epi32(dy1r,dy1i);
-  bfly2_tmp2 = _mm256_unpackhi_epi32(dy1r,dy1i);
-  *y1 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+  bfly2_tmp1 = simde_mm256_unpacklo_epi32(dy1r,dy1i);
+  bfly2_tmp2 = simde_mm256_unpackhi_epi32(dy1r,dy1i);
+  *y1 = simde_mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
 }
 
-#endif
-
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 static inline void bfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw)__attribute__((always_inline));
 
@@ -615,7 +592,7 @@ static inline void bfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *
 }
 
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void bfly2_tw1(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1)__attribute__((always_inline));
@@ -628,7 +605,7 @@ static inline void bfly2_tw1(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1)
 
 }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 static inline void bfly2_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1)__attribute__((always_inline));
 
@@ -639,12 +616,10 @@ static inline void bfly2_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x
   *y1  = vqsubq_s16(*x0,*x1);
 
 }
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
  
 #if defined(__x86_64__) || defined(__i386__)
 
-
-
 static inline void bfly2_16(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1, __m128i *tw, __m128i *twb)__attribute__((always_inline));
 
 static inline void bfly2_16(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1, __m128i *tw, __m128i *twb)
@@ -666,8 +641,6 @@ static inline void bfly2_16(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1,
       print_shorts("y1",(int16_t*)y1);*/
 }
 
-#ifdef __AVX2__
-
 static inline void bfly2_16_256(__m256i *x0, __m256i *x1, __m256i *y0, __m256i *y1, __m256i *tw, __m256i *twb)__attribute__((always_inline));
 
 static inline void bfly2_16_256(__m256i *x0, __m256i *x1, __m256i *y0, __m256i *y1, __m256i *tw, __m256i *twb)
@@ -683,16 +656,14 @@ static inline void bfly2_16_256(__m256i *x0, __m256i *x1, __m256i *y0, __m256i *
   print_shorts256("tw",(int16_t*)tw);
   print_shorts256("twb",(int16_t*)twb);
   print_shorts256("x1t",(int16_t*)&x1t);*/
-  *y0  = _mm256_adds_epi16(*x0,x1t);
-  *y1  = _mm256_subs_epi16(*x0,x1t);
+  *y0  = simde_mm256_adds_epi16(*x0,x1t);
+  *y1  = simde_mm256_subs_epi16(*x0,x1t);
   
   /*print_shorts256("y0",(int16_t*)y0);
     print_shorts256("y1",(int16_t*)y1);*/
 }
-#endif
 
-
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1, int16x8_t *tw, int16x8_t *twb)__attribute__((always_inline));
 
@@ -703,7 +674,7 @@ static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8
   *y1  = vqsubq_s16(*x0,*x1);
 
 }
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void ibfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw)__attribute__((always_inline));
@@ -732,7 +703,6 @@ static inline void ibfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m1
   *y1 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2);
 }
 
-#ifdef __AVX2__
 static inline void ibfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)__attribute__((always_inline));
 
 static inline void ibfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)
@@ -744,23 +714,22 @@ static inline void ibfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,
   cmultc_256(*(x0),*(W0_256),&x0r_2,&x0i_2);
   cmultc_256(*(x1),*(tw),&x1r_2,&x1i_2);
 
-  dy0r = _mm256_srai_epi32(_mm256_add_epi32(x0r_2,x1r_2),15);
-  dy1r = _mm256_srai_epi32(_mm256_sub_epi32(x0r_2,x1r_2),15);
-  dy0i = _mm256_srai_epi32(_mm256_add_epi32(x0i_2,x1i_2),15);
+  dy0r = simde_mm256_srai_epi32(simde_mm256_add_epi32(x0r_2,x1r_2),15);
+  dy1r = simde_mm256_srai_epi32(simde_mm256_sub_epi32(x0r_2,x1r_2),15);
+  dy0i = simde_mm256_srai_epi32(simde_mm256_add_epi32(x0i_2,x1i_2),15);
   //  printf("y0i %d\n",((int16_t *)y0i)[0]);
-  dy1i = _mm256_srai_epi32(_mm256_sub_epi32(x0i_2,x1i_2),15);
+  dy1i = simde_mm256_srai_epi32(simde_mm256_sub_epi32(x0i_2,x1i_2),15);
 
-  bfly2_tmp1 = _mm256_unpacklo_epi32(dy0r,dy0i);
-  bfly2_tmp2 = _mm256_unpackhi_epi32(dy0r,dy0i);
-  *y0 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+  bfly2_tmp1 = simde_mm256_unpacklo_epi32(dy0r,dy0i);
+  bfly2_tmp2 = simde_mm256_unpackhi_epi32(dy0r,dy0i);
+  *y0 = simde_mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
 
-  bfly2_tmp1 = _mm256_unpacklo_epi32(dy1r,dy1i);
-  bfly2_tmp2 = _mm256_unpackhi_epi32(dy1r,dy1i);
-  *y1 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+  bfly2_tmp1 = simde_mm256_unpacklo_epi32(dy1r,dy1i);
+  bfly2_tmp2 = simde_mm256_unpackhi_epi32(dy1r,dy1i);
+  *y1 = simde_mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void ibfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw)
 {
 
@@ -779,9 +748,7 @@ static inline void ibfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t
 
 }
 
-#endif
-
-
+#endif // defined(__x86_64__) || defined(__i386__)
 
 
 // This is the radix-3 butterfly (fft)
@@ -812,8 +779,6 @@ static inline void bfly3(__m128i *x0,__m128i *x1,__m128i *x2,
   *(y2) = _mm_adds_epi16(*(x0),*(y2));
 }
 
-#ifdef __AVX2__
-
 static inline void bfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
 			     __m256i *y0,__m256i *y1,__m256i *y2,
 			     __m256i *tw1,__m256i *tw2) __attribute__((always_inline));
@@ -827,19 +792,18 @@ static inline void bfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
 
   packed_cmult_256(*(x1),*(tw1),&x1_2);
   packed_cmult_256(*(x2),*(tw2),&x2_2);
-  *(y0)  = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,x2_2));
+  *(y0)  = simde_mm256_adds_epi16(*(x0),simde_mm256_adds_epi16(x1_2,x2_2));
   cmult_256(x1_2,*(W13_256),&tmpre,&tmpim);
   cmac_256(x2_2,*(W23_256),&tmpre,&tmpim);
   *(y1) = cpack_256(tmpre,tmpim);
-  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  *(y1) = simde_mm256_adds_epi16(*(x0),*(y1));
   cmult_256(x1_2,*(W23_256),&tmpre,&tmpim);
   cmac_256(x2_2,*(W13_256),&tmpre,&tmpim);
   *(y2) = cpack_256(tmpre,tmpim);
-  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+  *(y2) = simde_mm256_adds_epi16(*(x0),*(y2));
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void bfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
                          int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,
                          int16x8_t *tw1,int16x8_t *tw2) __attribute__((always_inline));
@@ -865,7 +829,7 @@ static inline void bfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
   *(y2) = vqaddq_s16(*(x0),*(y2));
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void ibfly3(__m128i *x0,__m128i *x1,__m128i *x2,
@@ -892,8 +856,6 @@ static inline void ibfly3(__m128i *x0,__m128i *x1,__m128i *x2,
   *(y2) = _mm_adds_epi16(*(x0),*(y2));
 }
 
-#ifdef __AVX2__
-
 static inline void ibfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
 			      __m256i *y0,__m256i *y1,__m256i *y2,
 			      __m256i *tw1,__m256i *tw2) __attribute__((always_inline));
@@ -907,19 +869,18 @@ static inline void ibfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
 
   packed_cmultc_256(*(x1),*(tw1),&x1_2);
   packed_cmultc_256(*(x2),*(tw2),&x2_2);
-  *(y0)  = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,x2_2));
+  *(y0)  = simde_mm256_adds_epi16(*(x0),simde_mm256_adds_epi16(x1_2,x2_2));
   cmultc_256(x1_2,*(W13_256),&tmpre,&tmpim);
   cmacc_256(x2_2,*(W23_256),&tmpre,&tmpim);
   *(y1) = cpack_256(tmpre,tmpim);
-  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  *(y1) = simde_mm256_adds_epi16(*(x0),*(y1));
   cmultc_256(x1_2,*(W23_256),&tmpre,&tmpim);
   cmacc_256(x2_2,*(W13_256),&tmpre,&tmpim);
   *(y2) = cpack_256(tmpre,tmpim);
-  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+  *(y2) = simde_mm256_adds_epi16(*(x0),*(y2));
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void ibfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
 			  int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,
 			  int16x8_t *tw1,int16x8_t *tw2) __attribute__((always_inline));
@@ -944,7 +905,7 @@ static inline void ibfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
   *(y2) = cpack(tmpre,tmpim);
   *(y2) = vqaddq_s16(*(x0),*(y2));
 }
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void bfly3_tw1(__m128i *x0,__m128i *x1,__m128i *x2,
@@ -967,8 +928,6 @@ static inline void bfly3_tw1(__m128i *x0,__m128i *x1,__m128i *x2,
   *(y2) = _mm_adds_epi16(*(x0),*(y2));
 }
 
-#ifdef __AVX2__
-
 static inline void bfly3_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,
 				 __m256i *y0,__m256i *y1,__m256i *y2) __attribute__((always_inline));
 
@@ -978,19 +937,18 @@ static inline void bfly3_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,
 
   __m256i tmpre,tmpim;
 
-  *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(*(x1),*(x2)));
+  *(y0) = simde_mm256_adds_epi16(*(x0),simde_mm256_adds_epi16(*(x1),*(x2)));
   cmult_256(*(x1),*(W13_256),&tmpre,&tmpim);
   cmac_256(*(x2),*(W23_256),&tmpre,&tmpim);
   *(y1) = cpack_256(tmpre,tmpim);
-  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  *(y1) = simde_mm256_adds_epi16(*(x0),*(y1));
   cmult_256(*(x1),*(W23_256),&tmpre,&tmpim);
   cmac_256(*(x2),*(W13_256),&tmpre,&tmpim);
   *(y2) = cpack_256(tmpre,tmpim);
-  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+  *(y2) = simde_mm256_adds_epi16(*(x0),*(y2));
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void bfly3_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
                              int16x8_t *y0,int16x8_t *y1,int16x8_t *y2) __attribute__((always_inline));
 
@@ -1012,7 +970,7 @@ static inline void bfly3_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
 
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void bfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
@@ -1056,7 +1014,6 @@ static inline void bfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
   *(y3) = _mm_add_epi16(*(x0),cpack(dy3r,dy3i));
 }
 
-#ifdef __AVX2__
 static inline void bfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
 			     __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
 			     __m256i *tw1,__m256i *tw2,__m256i *tw3)__attribute__((always_inline));
@@ -1075,30 +1032,30 @@ static inline void bfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
   //  dy0r = _mm_add_epi32(x0r_2,_mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2)));
   //  dy0i = _mm_add_epi32(x0i_2,_mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2)));
   //  *(y0)  = cpack(dy0r,dy0i);
-  dy0r = _mm256_add_epi32(x1r_2,_mm256_add_epi32(x2r_2,x3r_2));
-  dy0i = _mm256_add_epi32(x1i_2,_mm256_add_epi32(x2i_2,x3i_2));
-  *(y0)  = _mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i));
+  dy0r = simde_mm256_add_epi32(x1r_2,simde_mm256_add_epi32(x2r_2,x3r_2));
+  dy0i = simde_mm256_add_epi32(x1i_2,simde_mm256_add_epi32(x2i_2,x3i_2));
+  *(y0)  = simde_mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i));
   //  dy1r = _mm_add_epi32(x0r_2,_mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2)));
   //  dy1i = _mm_sub_epi32(x0i_2,_mm_add_epi32(x1r_2,_mm_sub_epi32(x2i_2,x3r_2)));
   //  *(y1)  = cpack(dy1r,dy1i);
-  dy1r = _mm256_sub_epi32(x1i_2,_mm256_add_epi32(x2r_2,x3i_2));
-  dy1i = _mm256_sub_epi32(_mm256_sub_epi32(x3r_2,x2i_2),x1r_2);
-  *(y1)  = _mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i));
+  dy1r = simde_mm256_sub_epi32(x1i_2,simde_mm256_add_epi32(x2r_2,x3i_2));
+  dy1i = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x3r_2,x2i_2),x1r_2);
+  *(y1)  = simde_mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i));
   //  dy2r = _mm_sub_epi32(x0r_2,_mm_sub_epi32(x1r_2,_mm_sub_epi32(x2r_2,x3r_2)));
   //  dy2i = _mm_sub_epi32(x0i_2,_mm_sub_epi32(x1i_2,_mm_sub_epi32(x2i_2,x3i_2)));
   //  *(y2)  = cpack(dy2r,dy2i);
-  dy2r = _mm256_sub_epi32(_mm256_sub_epi32(x2r_2,x3r_2),x1r_2);
-  dy2i = _mm256_sub_epi32(_mm256_sub_epi32(x2i_2,x3i_2),x1i_2);
-  *(y2)  = _mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i));
+  dy2r = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x2r_2,x3r_2),x1r_2);
+  dy2i = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x2i_2,x3i_2),x1i_2);
+  *(y2)  = simde_mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i));
   //  dy3r = _mm_sub_epi32(x0r_2,_mm_add_epi32(x1i_2,_mm_sub_epi32(x2r_2,x3i_2)));
   //  dy3i = _mm_add_epi32(x0i_2,_mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2)));
   //  *(y3) = cpack(dy3r,dy3i);
-  dy3r = _mm256_sub_epi32(_mm256_sub_epi32(x3i_2,x2r_2),x1i_2);
-  dy3i = _mm256_sub_epi32(x1r_2,_mm256_add_epi32(x2i_2,x3r_2));
-  *(y3) = _mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i));
+  dy3r = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x3i_2,x2r_2),x1i_2);
+  dy3i = simde_mm256_sub_epi32(x1r_2,simde_mm256_add_epi32(x2i_2,x3r_2));
+  *(y3) = simde_mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i));
 }
-#endif
-#elif defined(__arm__)
+
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void bfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
                          int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
                          int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3)__attribute__((always_inline));
@@ -1140,7 +1097,7 @@ static inline void bfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3
   *(y3) = vqaddq_s16(*(x0),cpack(dy3r,dy3i));
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void ibfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
@@ -1173,8 +1130,6 @@ static inline void ibfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
   *(y1) = _mm_add_epi16(*(x0),cpack(dy1r,dy1i));
 }
 
-#ifdef __AVX2__
-
 static inline void ibfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
 			      __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
 			      __m256i *tw1,__m256i *tw2,__m256i *tw3)__attribute__((always_inline));
@@ -1191,22 +1146,21 @@ static inline void ibfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
   cmultc_256(*(x2),*(tw2),&x2r_2,&x2i_2);
   cmultc_256(*(x3),*(tw3),&x3r_2,&x3i_2);
 
-  dy0r = _mm256_add_epi32(x1r_2,_mm256_add_epi32(x2r_2,x3r_2));
-  dy0i = _mm256_add_epi32(x1i_2,_mm256_add_epi32(x2i_2,x3i_2));
-  *(y0)  = _mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i));
-  dy3r = _mm256_sub_epi32(x1i_2,_mm256_add_epi32(x2r_2,x3i_2));
-  dy3i = _mm256_sub_epi32(_mm256_sub_epi32(x3r_2,x2i_2),x1r_2);
-  *(y3)  = _mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i));
-  dy2r = _mm256_sub_epi32(_mm256_sub_epi32(x2r_2,x3r_2),x1r_2);
-  dy2i = _mm256_sub_epi32(_mm256_sub_epi32(x2i_2,x3i_2),x1i_2);
-  *(y2)  = _mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i));
-  dy1r = _mm256_sub_epi32(_mm256_sub_epi32(x3i_2,x2r_2),x1i_2);
-  dy1i = _mm256_sub_epi32(x1r_2,_mm256_add_epi32(x2i_2,x3r_2));
-  *(y1) = _mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i));
+  dy0r = simde_mm256_add_epi32(x1r_2,simde_mm256_add_epi32(x2r_2,x3r_2));
+  dy0i = simde_mm256_add_epi32(x1i_2,simde_mm256_add_epi32(x2i_2,x3i_2));
+  *(y0)  = simde_mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i));
+  dy3r = simde_mm256_sub_epi32(x1i_2,simde_mm256_add_epi32(x2r_2,x3i_2));
+  dy3i = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x3r_2,x2i_2),x1r_2);
+  *(y3)  = simde_mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i));
+  dy2r = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x2r_2,x3r_2),x1r_2);
+  dy2i = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x2i_2,x3i_2),x1i_2);
+  *(y2)  = simde_mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i));
+  dy1r = simde_mm256_sub_epi32(simde_mm256_sub_epi32(x3i_2,x2r_2),x1i_2);
+  dy1i = simde_mm256_sub_epi32(x1r_2,simde_mm256_add_epi32(x2i_2,x3r_2));
+  *(y1) = simde_mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i));
 }
 
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 static inline void ibfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
                           int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
@@ -1238,7 +1192,7 @@ static inline void ibfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x
   *(y1) = vqaddq_s16(*(x0),cpack(dy1r,dy1i));
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 
@@ -1276,8 +1230,6 @@ static inline void bfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
   */
 }
 
-#ifdef __AVX2__
-
 static inline void bfly4_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
 				 __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3)__attribute__((always_inline));
 
@@ -1285,24 +1237,23 @@ static inline void bfly4_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3
 				 __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3)
 {
   register __m256i x1_flip,x3_flip,x02t,x13t;
-  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
-
-  x02t    = _mm256_adds_epi16(*(x0),*(x2));
-  x13t    = _mm256_adds_epi16(*(x1),*(x3));
-  *(y0)   = _mm256_adds_epi16(x02t,x13t);
-  *(y2)   = _mm256_subs_epi16(x02t,x13t);
-  x1_flip = _mm256_sign_epi16(*(x1),*(__m256i*)conjugatedft);
-  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
-  x3_flip = _mm256_sign_epi16(*(x3),*(__m256i*)conjugatedft);
-  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
-  x02t    = _mm256_subs_epi16(*(x0),*(x2));
-  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
-  *(y1)   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
-  *(y3)   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+  register __m256i complex_shuffle = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  x02t    = simde_mm256_adds_epi16(*(x0),*(x2));
+  x13t    = simde_mm256_adds_epi16(*(x1),*(x3));
+  *(y0)   = simde_mm256_adds_epi16(x02t,x13t);
+  *(y2)   = simde_mm256_subs_epi16(x02t,x13t);
+  x1_flip = simde_mm256_sign_epi16(*(x1),*(__m256i*)conjugatedft);
+  x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = simde_mm256_sign_epi16(*(x3),*(__m256i*)conjugatedft);
+  x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = simde_mm256_subs_epi16(*(x0),*(x2));
+  x13t    = simde_mm256_subs_epi16(x1_flip,x3_flip);
+  *(y1)   = simde_mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y3)   = simde_mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 static inline void bfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
                              int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)__attribute__((always_inline));
@@ -1321,7 +1272,7 @@ static inline void bfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t
   *(y3)   = vqsubq_s16(*(x0),vqaddq_s16(x1_flip,vqsubq_s16(*(x2),x3_flip)));
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 
@@ -1350,7 +1301,7 @@ static inline void ibfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
 }
 
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void ibfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
 			      int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)__attribute__((always_inline));
 
@@ -1368,7 +1319,7 @@ static inline void ibfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_
   *(y3)   = vqaddq_s16(*(x0),vqsubq_s16(x1_flip,vqaddq_s16(*(x2),x3_flip)));
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void bfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
@@ -1419,7 +1370,6 @@ static inline void bfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
 
 }
 
-#ifdef __AVX2__
 static inline void bfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
 				__m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
 				__m256i *tw1,__m256i *tw2,__m256i *tw3,
@@ -1433,7 +1383,7 @@ static inline void bfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
 
   register __m256i x1t,x2t,x3t,x02t,x13t;
   register __m256i x1_flip,x3_flip;
-  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  register __m256i complex_shuffle = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
   // each input xi is assumed to be to consecutive vectors xi0 xi1 on which to perform the 8 butterflies
   // [xi00 xi01 xi02 xi03 xi10 xi20 xi30 xi40]
@@ -1443,25 +1393,23 @@ static inline void bfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
   x2t = packed_cmult2_256(*(x2),*(tw2),*(tw2b));
   x3t = packed_cmult2_256(*(x3),*(tw3),*(tw3b));
 
-  x02t  = _mm256_adds_epi16(*(x0),x2t);
-  x13t  = _mm256_adds_epi16(x1t,x3t);
-  *(y0)   = _mm256_adds_epi16(x02t,x13t);
-  *(y2)   = _mm256_subs_epi16(x02t,x13t);
+  x02t  = simde_mm256_adds_epi16(*(x0),x2t);
+  x13t  = simde_mm256_adds_epi16(x1t,x3t);
+  *(y0)   = simde_mm256_adds_epi16(x02t,x13t);
+  *(y2)   = simde_mm256_subs_epi16(x02t,x13t);
 
-  x1_flip = _mm256_sign_epi16(x1t,*(__m256i*)conjugatedft);
-  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
-  x3_flip = _mm256_sign_epi16(x3t,*(__m256i*)conjugatedft);
-  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
-  x02t  = _mm256_subs_epi16(*(x0),x2t);
-  x13t  = _mm256_subs_epi16(x1_flip,x3_flip);
-  *(y1)   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
-  *(y3)   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+  x1_flip = simde_mm256_sign_epi16(x1t,*(__m256i*)conjugatedft);
+  x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = simde_mm256_sign_epi16(x3t,*(__m256i*)conjugatedft);
+  x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t  = simde_mm256_subs_epi16(*(x0),x2t);
+  x13t  = simde_mm256_subs_epi16(x1_flip,x3_flip);
+  *(y1)   = simde_mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y3)   = simde_mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
 }
 
-#endif
-
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 static inline void bfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
                             int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
@@ -1494,7 +1442,7 @@ static inline void bfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t
   *(y1)   = vqaddq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
   *(y3)   = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
 }
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void ibfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
@@ -1545,7 +1493,6 @@ static inline void ibfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
 
 }
 
-#ifdef __AVX2__
 static inline void ibfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
 				 __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
 				 __m256i *tw1,__m256i *tw2,__m256i *tw3,
@@ -1559,7 +1506,7 @@ static inline void ibfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3
 
   register __m256i x1t,x2t,x3t,x02t,x13t;
   register __m256i x1_flip,x3_flip;
-  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  register __m256i complex_shuffle = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
   // each input xi is assumed to be to consecutive vectors xi0 xi1 on which to perform the 8 butterflies
   // [xi00 xi01 xi02 xi03 xi10 xi20 xi30 xi40]
@@ -1569,24 +1516,23 @@ static inline void ibfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3
   x2t = packed_cmult2_256(*(x2),*(tw2),*(tw2b));
   x3t = packed_cmult2_256(*(x3),*(tw3),*(tw3b));
 
-  x02t  = _mm256_adds_epi16(*(x0),x2t);
-  x13t  = _mm256_adds_epi16(x1t,x3t);
-  *(y0)   = _mm256_adds_epi16(x02t,x13t);
-  *(y2)   = _mm256_subs_epi16(x02t,x13t);
+  x02t  = simde_mm256_adds_epi16(*(x0),x2t);
+  x13t  = simde_mm256_adds_epi16(x1t,x3t);
+  *(y0)   = simde_mm256_adds_epi16(x02t,x13t);
+  *(y2)   = simde_mm256_subs_epi16(x02t,x13t);
 
-  x1_flip = _mm256_sign_epi16(x1t,*(__m256i*)conjugatedft);
-  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
-  x3_flip = _mm256_sign_epi16(x3t,*(__m256i*)conjugatedft);
-  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
-  x02t  = _mm256_subs_epi16(*(x0),x2t);
-  x13t  = _mm256_subs_epi16(x1_flip,x3_flip);
-  *(y3)   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
-  *(y1)   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+  x1_flip = simde_mm256_sign_epi16(x1t,*(__m256i*)conjugatedft);
+  x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = simde_mm256_sign_epi16(x3t,*(__m256i*)conjugatedft);
+  x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t  = simde_mm256_subs_epi16(*(x0),x2t);
+  x13t  = simde_mm256_subs_epi16(x1_flip,x3_flip);
+  *(y3)   = simde_mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y1)   = simde_mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void ibfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
 			     int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
 			     int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,
@@ -1617,7 +1563,7 @@ static inline void ibfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t
   *(y1)   = vqsubq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void bfly5(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4,
@@ -1670,8 +1616,6 @@ static inline void bfly5(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m1
 
 }
 
-#ifdef __AVX2__
-
 static inline void bfly5_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4,
 			     __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4,
 			     __m256i *tw1,__m256i *tw2,__m256i *tw3,__m256i *tw4)__attribute__((always_inline));
@@ -1690,40 +1634,39 @@ static inline void bfly5_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,
   packed_cmult_256(*(x3),*(tw3),&x3_2);
   packed_cmult_256(*(x4),*(tw4),&x4_2);
 
-  *(y0)  = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,_mm256_adds_epi16(x2_2,_mm256_adds_epi16(x3_2,x4_2))));
+  *(y0)  = simde_mm256_adds_epi16(*(x0),simde_mm256_adds_epi16(x1_2,simde_mm256_adds_epi16(x2_2,simde_mm256_adds_epi16(x3_2,x4_2))));
   cmult_256(x1_2,*(W15_256),&tmpre,&tmpim);
   cmac_256(x2_2,*(W25_256),&tmpre,&tmpim);
   cmac_256(x3_2,*(W35_256),&tmpre,&tmpim);
   cmac_256(x4_2,*(W45_256),&tmpre,&tmpim);
   *(y1) = cpack_256(tmpre,tmpim);
-  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  *(y1) = simde_mm256_adds_epi16(*(x0),*(y1));
 
   cmult_256(x1_2,*(W25_256),&tmpre,&tmpim);
   cmac_256(x2_2,*(W45_256),&tmpre,&tmpim);
   cmac_256(x3_2,*(W15_256),&tmpre,&tmpim);
   cmac_256(x4_2,*(W35_256),&tmpre,&tmpim);
   *(y2) = cpack_256(tmpre,tmpim);
-  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+  *(y2) = simde_mm256_adds_epi16(*(x0),*(y2));
 
   cmult_256(x1_2,*(W35_256),&tmpre,&tmpim);
   cmac_256(x2_2,*(W15_256),&tmpre,&tmpim);
   cmac_256(x3_2,*(W45_256),&tmpre,&tmpim);
   cmac_256(x4_2,*(W25_256),&tmpre,&tmpim);
   *(y3) = cpack_256(tmpre,tmpim);
-  *(y3) = _mm256_adds_epi16(*(x0),*(y3));
+  *(y3) = simde_mm256_adds_epi16(*(x0),*(y3));
 
   cmult_256(x1_2,*(W45_256),&tmpre,&tmpim);
   cmac_256(x2_2,*(W35_256),&tmpre,&tmpim);
   cmac_256(x3_2,*(W25_256),&tmpre,&tmpim);
   cmac_256(x4_2,*(W15_256),&tmpre,&tmpim);
   *(y4) = cpack_256(tmpre,tmpim);
-  *(y4) = _mm256_adds_epi16(*(x0),*(y4));
+  *(y4) = simde_mm256_adds_epi16(*(x0),*(y4));
 
 
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void bfly5(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4,
                          int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4,
                          int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,int16x8_t *tw4)__attribute__((always_inline));
@@ -1776,7 +1719,7 @@ static inline void bfly5(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t
 }
 
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 static inline void bfly5_tw1(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4,
@@ -1815,7 +1758,6 @@ static inline void bfly5_tw1(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,
   *(y4) = _mm_adds_epi16(*(x0),*(y4));
 }
 
-#ifdef __AVX2__
 static inline void bfly5_tw1_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4,
 				 __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4) __attribute__((always_inline));
 
@@ -1825,34 +1767,34 @@ static inline void bfly5_tw1_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i
 
   __m256i tmpre,tmpim;
 
-  *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(*(x1),_mm256_adds_epi16(*(x2),_mm256_adds_epi16(*(x3),*(x4)))));
+  *(y0) = simde_mm256_adds_epi16(*(x0),simde_mm256_adds_epi16(*(x1),simde_mm256_adds_epi16(*(x2),simde_mm256_adds_epi16(*(x3),*(x4)))));
   cmult_256(*(x1),*(W15_256),&tmpre,&tmpim);
   cmac_256(*(x2),*(W25_256),&tmpre,&tmpim);
   cmac_256(*(x3),*(W35_256),&tmpre,&tmpim);
   cmac_256(*(x4),*(W45_256),&tmpre,&tmpim);
   *(y1) = cpack_256(tmpre,tmpim);
-  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  *(y1) = simde_mm256_adds_epi16(*(x0),*(y1));
   cmult_256(*(x1),*(W25_256),&tmpre,&tmpim);
   cmac_256(*(x2),*(W45_256),&tmpre,&tmpim);
   cmac_256(*(x3),*(W15_256),&tmpre,&tmpim);
   cmac_256(*(x4),*(W35_256),&tmpre,&tmpim);
   *(y2) = cpack_256(tmpre,tmpim);
-  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+  *(y2) = simde_mm256_adds_epi16(*(x0),*(y2));
   cmult_256(*(x1),*(W35_256),&tmpre,&tmpim);
   cmac_256(*(x2),*(W15_256),&tmpre,&tmpim);
   cmac_256(*(x3),*(W45_256),&tmpre,&tmpim);
   cmac_256(*(x4),*(W25_256),&tmpre,&tmpim);
   *(y3) = cpack_256(tmpre,tmpim);
-  *(y3) = _mm256_adds_epi16(*(x0),*(y3));
+  *(y3) = simde_mm256_adds_epi16(*(x0),*(y3));
   cmult_256(*(x1),*(W45_256),&tmpre,&tmpim);
   cmac_256(*(x2),*(W35_256),&tmpre,&tmpim);
   cmac_256(*(x3),*(W25_256),&tmpre,&tmpim);
   cmac_256(*(x4),*(W15_256),&tmpre,&tmpim);
   *(y4) = cpack_256(tmpre,tmpim);
-  *(y4) = _mm256_adds_epi16(*(x0),*(y4));
+  *(y4) = simde_mm256_adds_epi16(*(x0),*(y4));
 }
-#endif
-#elif defined(__arm__)
+
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void bfly5_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4,
                              int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4) __attribute__((always_inline));
 
@@ -1889,7 +1831,8 @@ static inline void bfly5_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x
   *(y4) = vqaddq_s16(*(x0),*(y4));
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
+
 // performs 4x4 transpose of input x (complex interleaved) using 128bit SIMD intrinsics
 // i.e. x = [x0r x0i x1r x1i ... x15r x15i], y = [x0r x0i x4r x4i x8r x8i x12r x12i x1r x1i x5r x5i x9r x9i x13r x13i x2r x2i ... x15r x15i]
 
@@ -1909,7 +1852,7 @@ static inline void transpose16(__m128i *x,__m128i *y)
   y[3]    = _mm_unpackhi_epi64(ytmp1,ytmp3);
 }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void transpose16(int16x8_t *x,int16x8_t *y) __attribute__((always_inline));
 static inline void transpose16(int16x8_t *x,int16x8_t *y)
 {
@@ -1924,7 +1867,8 @@ static inline void transpose16(int16x8_t *x,int16x8_t *y)
   y[3]  = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1]));
 }
 
-# endif
+#endif // defined(__x86_64__) || defined(__i386__)
+
 // same as above but output is offset by off
 #if defined(__x86_64__) || defined(__i386__)
 static inline void transpose16_ooff(__m128i *x,__m128i *y,int off) __attribute__((always_inline));
@@ -1947,35 +1891,32 @@ static inline void transpose16_ooff(__m128i *x,__m128i *y,int off)
   *y2     = _mm_unpackhi_epi64(ytmp1,ytmp3); // x03 x13 x23 x33
 }
 
-#ifdef __AVX2__
-
 static inline void transpose16_ooff_simd256(__m256i *x,__m256i *y,int off) __attribute__((always_inline));
 static inline void transpose16_ooff_simd256(__m256i *x,__m256i *y,int off)
 {
   register __m256i ytmp0,ytmp1,ytmp2,ytmp3,ytmp4,ytmp5,ytmp6,ytmp7;
   __m256i *y2=y;
-  __m256i const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
-
-  ytmp0 = _mm256_permutevar8x32_epi32(x[0],perm_mask);  // x00 x10 x01 x11 x02 x12 x03 x13
-  ytmp1 = _mm256_permutevar8x32_epi32(x[1],perm_mask);  // x20 x30 x21 x31 x22 x32 x23 x33
-  ytmp2 = _mm256_permutevar8x32_epi32(x[2],perm_mask);  // x40 x50 x41 x51 x42 x52 x43 x53
-  ytmp3 = _mm256_permutevar8x32_epi32(x[3],perm_mask);  // x60 x70 x61 x71 x62 x72 x63 x73
-  ytmp4 = _mm256_unpacklo_epi64(ytmp0,ytmp1);           // x00 x10 x20 x30 x01 x11 x21 x31
-  ytmp5 = _mm256_unpackhi_epi64(ytmp0,ytmp1);           // x02 x12 x22 x32 x03 x13 x23 x33
-  ytmp6 = _mm256_unpacklo_epi64(ytmp2,ytmp3);           // x40 x50 x60 x70 x41 x51 x61 x71
-  ytmp7 = _mm256_unpackhi_epi64(ytmp2,ytmp3);           // x42 x52 x62 x72 x43 x53 x63 x73
-
-  *y2    = _mm256_insertf128_si256(ytmp4,_mm256_extracti128_si256(ytmp6,0),1);  //x00 x10 x20 x30 x40 x50 x60 x70
+  __m256i const perm_mask = simde_mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
+
+  ytmp0 = simde_mm256_permutevar8x32_epi32(x[0],perm_mask);  // x00 x10 x01 x11 x02 x12 x03 x13
+  ytmp1 = simde_mm256_permutevar8x32_epi32(x[1],perm_mask);  // x20 x30 x21 x31 x22 x32 x23 x33
+  ytmp2 = simde_mm256_permutevar8x32_epi32(x[2],perm_mask);  // x40 x50 x41 x51 x42 x52 x43 x53
+  ytmp3 = simde_mm256_permutevar8x32_epi32(x[3],perm_mask);  // x60 x70 x61 x71 x62 x72 x63 x73
+  ytmp4 = simde_mm256_unpacklo_epi64(ytmp0,ytmp1);           // x00 x10 x20 x30 x01 x11 x21 x31
+  ytmp5 = simde_mm256_unpackhi_epi64(ytmp0,ytmp1);           // x02 x12 x22 x32 x03 x13 x23 x33
+  ytmp6 = simde_mm256_unpacklo_epi64(ytmp2,ytmp3);           // x40 x50 x60 x70 x41 x51 x61 x71
+  ytmp7 = simde_mm256_unpackhi_epi64(ytmp2,ytmp3);           // x42 x52 x62 x72 x43 x53 x63 x73
+
+  *y2    = simde_mm256_insertf128_si256(ytmp4,simde_mm256_extracti128_si256(ytmp6,0),1);  //x00 x10 x20 x30 x40 x50 x60 x70
   y2+=off;  
-  *y2    = _mm256_insertf128_si256(ytmp6,_mm256_extracti128_si256(ytmp4,1),0);  //x01 x11 x21 x31 x41 x51 x61 x71
+  *y2    = simde_mm256_insertf128_si256(ytmp6,simde_mm256_extracti128_si256(ytmp4,1),0);  //x01 x11 x21 x31 x41 x51 x61 x71
   y2+=off;  
-  *y2    = _mm256_insertf128_si256(ytmp5,_mm256_extracti128_si256(ytmp7,0),1);  //x00 x10 x20 x30 x40 x50 x60 x70
+  *y2    = simde_mm256_insertf128_si256(ytmp5,simde_mm256_extracti128_si256(ytmp7,0),1);  //x00 x10 x20 x30 x40 x50 x60 x70
   y2+=off;  
-  *y2    = _mm256_insertf128_si256(ytmp7,_mm256_extracti128_si256(ytmp5,1),0);  //x01 x11 x21 x31 x41 x51 x61 x71
+  *y2    = simde_mm256_insertf128_si256(ytmp7,simde_mm256_extracti128_si256(ytmp5,1),0);  //x01 x11 x21 x31 x41 x51 x61 x71
 }
-#endif
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 static inline void transpose16_ooff(int16x8_t *x,int16x8_t *y,int off) __attribute__((always_inline));
 
 static inline void transpose16_ooff(int16x8_t *x,int16x8_t *y,int off)
@@ -1994,7 +1935,7 @@ static inline void transpose16_ooff(int16x8_t *x,int16x8_t *y,int off)
 
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 #if defined(__x86_64__) || defined(__i386__)
 
@@ -2009,24 +1950,24 @@ static inline void transpose4_ooff(__m64 *x,__m64 *y,int off)
   // y[0] = [x0 x2]
   // y[off] = [x1 x3]
 }
-#ifdef __AVX2__
+
 static inline void transpose4_ooff_simd256(__m256i *x,__m256i *y,int off)__attribute__((always_inline));
 static inline void transpose4_ooff_simd256(__m256i *x,__m256i *y,int off)
 {
-  __m256i const perm_mask = _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0);
+  __m256i const perm_mask = simde_mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0);
   __m256i perm_tmp0,perm_tmp1;
 
   // x[0] = [x0 x1 x2 x3 x4 x5 x6 x7]
   // x[1] = [x8 x9 x10 x11 x12 x13 x14]
   // y[0] = [x0 x2 x4 x6 x8 x10 x12 x14]
   // y[off] = [x1 x3 x5 x7 x9 x11 x13 x15]
-  perm_tmp0 = _mm256_permutevar8x32_epi32(x[0],perm_mask);
-  perm_tmp1 = _mm256_permutevar8x32_epi32(x[1],perm_mask);
-  y[0]   = _mm256_insertf128_si256(perm_tmp0,_mm256_extracti128_si256(perm_tmp1,0),1);
-  y[off] = _mm256_insertf128_si256(perm_tmp1,_mm256_extracti128_si256(perm_tmp0,1),0);
+  perm_tmp0 = simde_mm256_permutevar8x32_epi32(x[0],perm_mask);
+  perm_tmp1 = simde_mm256_permutevar8x32_epi32(x[1],perm_mask);
+  y[0]   = simde_mm256_insertf128_si256(perm_tmp0,simde_mm256_extracti128_si256(perm_tmp1,0),1);
+  y[off] = simde_mm256_insertf128_si256(perm_tmp1,simde_mm256_extracti128_si256(perm_tmp0,1),0);
 }
-#endif
-#elif (__arm__)
+
+#elif defined(__arm__) || defined(__aarch64__)
 
 static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off)__attribute__((always_inline));
 static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off)
@@ -2037,7 +1978,7 @@ static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off)
   y[off] = (int16x4_t)ytmp.val[1];
 }
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 // 16-point optimized DFT kernel
 
@@ -2061,8 +2002,6 @@ const static int16_t tw16c[24] __attribute__((aligned(32))) = { 0,32767,12540,30
                                                    0,32767,30273,12539,23170,-23170,-12539,-30273
                                                  };
 
-#ifdef __AVX2__
-
 const static int16_t tw16rep[48] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,
 						     32767,0,23169,-23170,0     ,-32767,-23170,-23170,32767,0,23169,-23170,0     ,-32767,-23170,-23170,
 						     32767,0,12539,-30273,-23170,-23170,-30273,12539,32767,0,12539,-30273,-23170,-23170,-30273,12539
@@ -2083,10 +2022,6 @@ const static int16_t tw16crep[48] __attribute__((aligned(32))) = { 0,32767,12540
 						      0,32767,30273,12539,23170,-23170,-12539,-30273,0,32767,30273,12539,23170,-23170,-12539,-30273
                                                     };
 
-#endif /* __AVX2__ */
-
-
-
 static inline void dft16(int16_t *x,int16_t *y) __attribute__((always_inline));
 
 static inline void dft16(int16_t *x,int16_t *y)
@@ -2156,7 +2091,7 @@ static inline void dft16(int16_t *x,int16_t *y)
   y128[1] = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
   y128[3] = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
   int16x8_t *tw16a_128=(int16x8_t *)tw16a,*tw16b_128=(int16x8_t *)tw16b,*x128=(int16x8_t *)x,*y128=(int16x8_t *)y;
 
@@ -2223,11 +2158,11 @@ static inline void dft16(int16_t *x,int16_t *y)
   y128[3] = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 }
 
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
+
 // Does two 16-point DFTS (x[0 .. 15] is 128 LSBs of input vector, x[16..31] is in 128 MSBs) 
 static inline void dft16_simd256(int16_t *x,int16_t *y) __attribute__((always_inline));
 static inline void dft16_simd256(int16_t *x,int16_t *y)
@@ -2237,36 +2172,36 @@ static inline void dft16_simd256(int16_t *x,int16_t *y)
 
   __m256i x1_flip,x3_flip,x02t,x13t;
   __m256i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3;
-  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  register __m256i complex_shuffle = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
   // First stage : 4 Radix-4 butterflies without input twiddles
 
-  x02t    = _mm256_adds_epi16(x256[0],x256[2]);
-  x13t    = _mm256_adds_epi16(x256[1],x256[3]);
-  xtmp0   = _mm256_adds_epi16(x02t,x13t);
-  xtmp2   = _mm256_subs_epi16(x02t,x13t);
-  x1_flip = _mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft);
-  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
-  x3_flip = _mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft);
-  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
-  x02t    = _mm256_subs_epi16(x256[0],x256[2]);
-  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
-  xtmp1   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
-  xtmp3   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+  x02t    = simde_mm256_adds_epi16(x256[0],x256[2]);
+  x13t    = simde_mm256_adds_epi16(x256[1],x256[3]);
+  xtmp0   = simde_mm256_adds_epi16(x02t,x13t);
+  xtmp2   = simde_mm256_subs_epi16(x02t,x13t);
+  x1_flip = simde_mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft);
+  x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = simde_mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft);
+  x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = simde_mm256_subs_epi16(x256[0],x256[2]);
+  x13t    = simde_mm256_subs_epi16(x1_flip,x3_flip);
+  xtmp1   = simde_mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp3   = simde_mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
   /*  print_shorts256("xtmp0",(int16_t*)&xtmp0);
       print_shorts256("xtmp1",(int16_t*)&xtmp1);
   print_shorts256("xtmp2",(int16_t*)&xtmp2);
   print_shorts256("xtmp3",(int16_t*)&xtmp3);*/
 
-  ytmp0   = _mm256_unpacklo_epi32(xtmp0,xtmp1);  
-  ytmp1   = _mm256_unpackhi_epi32(xtmp0,xtmp1);
-  ytmp2   = _mm256_unpacklo_epi32(xtmp2,xtmp3);
-  ytmp3   = _mm256_unpackhi_epi32(xtmp2,xtmp3);
-  xtmp0   = _mm256_unpacklo_epi64(ytmp0,ytmp2);
-  xtmp1   = _mm256_unpackhi_epi64(ytmp0,ytmp2);
-  xtmp2   = _mm256_unpacklo_epi64(ytmp1,ytmp3);
-  xtmp3   = _mm256_unpackhi_epi64(ytmp1,ytmp3);
+  ytmp0   = simde_mm256_unpacklo_epi32(xtmp0,xtmp1);  
+  ytmp1   = simde_mm256_unpackhi_epi32(xtmp0,xtmp1);
+  ytmp2   = simde_mm256_unpacklo_epi32(xtmp2,xtmp3);
+  ytmp3   = simde_mm256_unpackhi_epi32(xtmp2,xtmp3);
+  xtmp0   = simde_mm256_unpacklo_epi64(ytmp0,ytmp2);
+  xtmp1   = simde_mm256_unpackhi_epi64(ytmp0,ytmp2);
+  xtmp2   = simde_mm256_unpacklo_epi64(ytmp1,ytmp3);
+  xtmp3   = simde_mm256_unpackhi_epi64(ytmp1,ytmp3);
 
   // Second stage : 4 Radix-4 butterflies with input twiddles
   xtmp1 = packed_cmult2_256(xtmp1,tw16a_256[0],tw16b_256[0]);
@@ -2278,18 +2213,18 @@ static inline void dft16_simd256(int16_t *x,int16_t *y)
   print_shorts256("xtmp2",(int16_t*)&xtmp2);
   print_shorts256("xtmp3",(int16_t*)&xtmp3);*/
 
-  x02t    = _mm256_adds_epi16(xtmp0,xtmp2);
-  x13t    = _mm256_adds_epi16(xtmp1,xtmp3);
-  ytmp0   = _mm256_adds_epi16(x02t,x13t);
-  ytmp2   = _mm256_subs_epi16(x02t,x13t);
-  x1_flip = _mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft);
-  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
-  x3_flip = _mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft);
-  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
-  x02t    = _mm256_subs_epi16(xtmp0,xtmp2);
-  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
-  ytmp1   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
-  ytmp3   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+  x02t    = simde_mm256_adds_epi16(xtmp0,xtmp2);
+  x13t    = simde_mm256_adds_epi16(xtmp1,xtmp3);
+  ytmp0   = simde_mm256_adds_epi16(x02t,x13t);
+  ytmp2   = simde_mm256_subs_epi16(x02t,x13t);
+  x1_flip = simde_mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft);
+  x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = simde_mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft);
+  x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = simde_mm256_subs_epi16(xtmp0,xtmp2);
+  x13t    = simde_mm256_subs_epi16(x1_flip,x3_flip);
+  ytmp1   = simde_mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  ytmp3   = simde_mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
  
 
   // [y0  y1  y2  y3  y16 y17 y18 y19]
@@ -2297,10 +2232,10 @@ static inline void dft16_simd256(int16_t *x,int16_t *y)
   // [y8  y9  y10 y11 y24 y25 y26 y27]
   // [y12 y13 y14 y15 y28 y29 y30 y31]
 
-  y256[0] = _mm256_insertf128_si256(ytmp0,_mm256_extracti128_si256(ytmp1,0),1);
-  y256[1] = _mm256_insertf128_si256(ytmp2,_mm256_extracti128_si256(ytmp3,0),1);
-  y256[2] = _mm256_insertf128_si256(ytmp1,_mm256_extracti128_si256(ytmp0,1),0);
-  y256[3] = _mm256_insertf128_si256(ytmp3,_mm256_extracti128_si256(ytmp2,1),0);
+  y256[0] = simde_mm256_insertf128_si256(ytmp0,simde_mm256_extracti128_si256(ytmp1,0),1);
+  y256[1] = simde_mm256_insertf128_si256(ytmp2,simde_mm256_extracti128_si256(ytmp3,0),1);
+  y256[2] = simde_mm256_insertf128_si256(ytmp1,simde_mm256_extracti128_si256(ytmp0,1),0);
+  y256[3] = simde_mm256_insertf128_si256(ytmp3,simde_mm256_extracti128_si256(ytmp2,1),0);
 
   // [y0  y1  y2  y3  y4  y5  y6  y7]
   // [y8  y9  y10 y11 y12 y13 y14 y15]
@@ -2308,8 +2243,8 @@ static inline void dft16_simd256(int16_t *x,int16_t *y)
   // [y24 y25 y26 y27 y28 y29 y30 y31]
 }
 
-#endif  
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
+
 static inline void idft16(int16_t *x,int16_t *y) __attribute__((always_inline));
 
 static inline void idft16(int16_t *x,int16_t *y)
@@ -2374,7 +2309,7 @@ static inline void idft16(int16_t *x,int16_t *y)
   y128[3] = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
   y128[1] = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   int16x8_t *tw16a_128=(int16x8_t *)tw16,*tw16b_128=(int16x8_t *)tw16c,*x128=(int16x8_t *)x,*y128=(int16x8_t *)y;
 
   /*  This is the original version before unrolling
@@ -2438,7 +2373,7 @@ static inline void idft16(int16_t *x,int16_t *y)
   y128[3] = vqaddq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
   y128[1] = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 }
 
 void idft16f(int16_t *x,int16_t *y) {
@@ -2446,7 +2381,7 @@ void idft16f(int16_t *x,int16_t *y) {
 }
 
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
+
 // Does two 16-point IDFTS (x[0 .. 15] is 128 LSBs of input vector, x[16..31] is in 128 MSBs) 
 static inline void idft16_simd256(int16_t *x,int16_t *y) __attribute__((always_inline));
 static inline void idft16_simd256(int16_t *x,int16_t *y)
@@ -2455,63 +2390,62 @@ static inline void idft16_simd256(int16_t *x,int16_t *y)
   __m256i *tw16a_256=(__m256i *)tw16rep,*tw16b_256=(__m256i *)tw16crep,*x256=(__m256i *)x,*y256=(__m256i *)y;
   register __m256i x1_flip,x3_flip,x02t,x13t;
   register __m256i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3;
-  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+  register __m256i complex_shuffle = simde_mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
 
   // First stage : 4 Radix-4 butterflies without input twiddles
 
-  x02t    = _mm256_adds_epi16(x256[0],x256[2]);
-  x13t    = _mm256_adds_epi16(x256[1],x256[3]);
-  xtmp0   = _mm256_adds_epi16(x02t,x13t);
-  xtmp2   = _mm256_subs_epi16(x02t,x13t);
-  x1_flip = _mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft);
-  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
-  x3_flip = _mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft);
-  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
-  x02t    = _mm256_subs_epi16(x256[0],x256[2]);
-  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
-  xtmp3   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
-  xtmp1   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
-
-  ytmp0   = _mm256_unpacklo_epi32(xtmp0,xtmp1);  
-  ytmp1   = _mm256_unpackhi_epi32(xtmp0,xtmp1);
-  ytmp2   = _mm256_unpacklo_epi32(xtmp2,xtmp3);
-  ytmp3   = _mm256_unpackhi_epi32(xtmp2,xtmp3);
-  xtmp0   = _mm256_unpacklo_epi64(ytmp0,ytmp2);
-  xtmp1   = _mm256_unpackhi_epi64(ytmp0,ytmp2);
-  xtmp2   = _mm256_unpacklo_epi64(ytmp1,ytmp3);
-  xtmp3   = _mm256_unpackhi_epi64(ytmp1,ytmp3);
+  x02t    = simde_mm256_adds_epi16(x256[0],x256[2]);
+  x13t    = simde_mm256_adds_epi16(x256[1],x256[3]);
+  xtmp0   = simde_mm256_adds_epi16(x02t,x13t);
+  xtmp2   = simde_mm256_subs_epi16(x02t,x13t);
+  x1_flip = simde_mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft);
+  x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = simde_mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft);
+  x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = simde_mm256_subs_epi16(x256[0],x256[2]);
+  x13t    = simde_mm256_subs_epi16(x1_flip,x3_flip);
+  xtmp3   = simde_mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp1   = simde_mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  ytmp0   = simde_mm256_unpacklo_epi32(xtmp0,xtmp1);  
+  ytmp1   = simde_mm256_unpackhi_epi32(xtmp0,xtmp1);
+  ytmp2   = simde_mm256_unpacklo_epi32(xtmp2,xtmp3);
+  ytmp3   = simde_mm256_unpackhi_epi32(xtmp2,xtmp3);
+  xtmp0   = simde_mm256_unpacklo_epi64(ytmp0,ytmp2);
+  xtmp1   = simde_mm256_unpackhi_epi64(ytmp0,ytmp2);
+  xtmp2   = simde_mm256_unpacklo_epi64(ytmp1,ytmp3);
+  xtmp3   = simde_mm256_unpackhi_epi64(ytmp1,ytmp3);
 
   // Second stage : 4 Radix-4 butterflies with input twiddles
   xtmp1 = packed_cmult2_256(xtmp1,tw16a_256[0],tw16b_256[0]);
   xtmp2 = packed_cmult2_256(xtmp2,tw16a_256[1],tw16b_256[1]);
   xtmp3 = packed_cmult2_256(xtmp3,tw16a_256[2],tw16b_256[2]);
 
-  x02t    = _mm256_adds_epi16(xtmp0,xtmp2);
-  x13t    = _mm256_adds_epi16(xtmp1,xtmp3);
-  ytmp0   = _mm256_adds_epi16(x02t,x13t);
-  ytmp2   = _mm256_subs_epi16(x02t,x13t);
-  x1_flip = _mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft);
-  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
-  x3_flip = _mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft);
-  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
-  x02t    = _mm256_subs_epi16(xtmp0,xtmp2);
-  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
-  ytmp3   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
-  ytmp1   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+  x02t    = simde_mm256_adds_epi16(xtmp0,xtmp2);
+  x13t    = simde_mm256_adds_epi16(xtmp1,xtmp3);
+  ytmp0   = simde_mm256_adds_epi16(x02t,x13t);
+  ytmp2   = simde_mm256_subs_epi16(x02t,x13t);
+  x1_flip = simde_mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft);
+  x1_flip = simde_mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = simde_mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft);
+  x3_flip = simde_mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = simde_mm256_subs_epi16(xtmp0,xtmp2);
+  x13t    = simde_mm256_subs_epi16(x1_flip,x3_flip);
+  ytmp3   = simde_mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  ytmp1   = simde_mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
 
   // [y0  y1  y2  y3  y16 y17 y18 y19]
   // [y4  y5  y6  y7  y20 y21 y22 y23]
   // [y8  y9  y10 y11 y24 y25 y26 y27]
   // [y12 y13 y14 y15 y28 y29 y30 y31]
 
-  y256[0] = _mm256_insertf128_si256(ytmp0,_mm256_extracti128_si256(ytmp1,0),1);
-  y256[1] = _mm256_insertf128_si256(ytmp2,_mm256_extracti128_si256(ytmp3,0),1);
-  y256[2] = _mm256_insertf128_si256(ytmp1,_mm256_extracti128_si256(ytmp0,1),0);
-  y256[3] = _mm256_insertf128_si256(ytmp3,_mm256_extracti128_si256(ytmp2,1),0);
+  y256[0] = simde_mm256_insertf128_si256(ytmp0,simde_mm256_extracti128_si256(ytmp1,0),1);
+  y256[1] = simde_mm256_insertf128_si256(ytmp2,simde_mm256_extracti128_si256(ytmp3,0),1);
+  y256[2] = simde_mm256_insertf128_si256(ytmp1,simde_mm256_extracti128_si256(ytmp0,1),0);
+  y256[3] = simde_mm256_insertf128_si256(ytmp3,simde_mm256_extracti128_si256(ytmp2,1),0);
 
 }
-#endif  
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
 
 // 64-point optimized DFT
 
@@ -2576,14 +2510,12 @@ const static int16_t tw64c[96] __attribute__((aligned(32))) = {
 #define simdshort_q15_t __m64
 #define shiftright_int16(a,shift) _mm_srai_epi16(a,shift)
 #define mulhi_int16(a,b) _mm_mulhrs_epi16 (a,b)
-#ifdef __AVX2__
 #define simd256_q15_t __m256i
-#define shiftright_int16_simd256(a,shift) _mm256_srai_epi16(a,shift)
-#define set1_int16_simd256(a) _mm256_set1_epi16(a);
-#define mulhi_int16_simd256(a,b) _mm256_mulhrs_epi16(a,b); //_mm256_slli_epi16(_mm256_mulhi_epi16(a,b),1);
-#endif
+#define shiftright_int16_simd256(a,shift) simde_mm256_srai_epi16(a,shift)
+#define set1_int16_simd256(a) simde_mm256_set1_epi16(a);
+#define mulhi_int16_simd256(a,b) simde_mm256_mulhrs_epi16(a,b); //simde_mm256_slli_epi16(simde_mm256_mulhi_epi16(a,b),1);
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #define simd_q15_t int16x8_t
 #define simdshort_q15_t int16x4_t
 #define shiftright_int16(a,shift) vshrq_n_s16(a,shift)
@@ -2592,129 +2524,14 @@ const static int16_t tw64c[96] __attribute__((aligned(32))) = {
 #define _mm_empty() 
 #define _m_empty()
 
-#endif
-
-#ifndef __AVX2__
-void dft64(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[16],ytmp[16],*tw64a_128=(simd_q15_t *)tw64a,*tw64b_128=(simd_q15_t *)tw64b,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y;
-
-
-#ifdef D64STATS
-  time_stats_t ts_t,ts_d,ts_b;
-
-  reset_meas(&ts_t);
-  reset_meas(&ts_d);
-  reset_meas(&ts_b);
-  start_meas(&ts_t);
-#endif
-
-
-  transpose16_ooff(x128,xtmp,4);
-  // xtmp0  = x00 x10 x20 x30
-  // xtmp4  = x01 x11 x21 x31
-  // xtmp8  = x02 x12 x22 x32
-  // xtmp12 = x03 x13 x23 x33
-  transpose16_ooff(x128+4,xtmp+1,4);
-  // xtmp1  = x40 x50 x60 x70
-  // xtmp5  = x41 x51 x61 x71
-  // xtmp9  = x42 x52 x62 x72
-  // xtmp13 = x43 x53 x63 x73
-  transpose16_ooff(x128+8,xtmp+2,4);
-  // xtmp2  = x80 x90 xa0 xb0
-  // xtmp6  = x41 x51 x61 x71
-  // xtmp10 = x82 x92 xa2 xb2
-  // xtmp14 = x83 x93 xa3 xb3
-  transpose16_ooff(x128+12,xtmp+3,4);
-  // xtmp3  = xc0 xd0 xe0 xf0
-  // xtmp7  = xc1 xd1 xe1 xf1
-  // xtmp11 = xc2 xd2 xe2 xf2
-  // xtmp15 = xc3 xd3 xe3 xf3
-
-#ifdef D64STATS
-  stop_meas(&ts_t);
-  start_meas(&ts_d);
-#endif
-
-  // xtmp0  = x00 x10 x20 x30
-  // xtmp1  = x40 x50 x60 x70
-  // xtmp2  = x80 x90 xa0 xb0
-  // xtmp3  = xc0 xd0 xe0 xf0
-  dft16((int16_t*)(xtmp),(int16_t*)ytmp);
-
-  // xtmp4  = x01 x11 x21 x31
-  // xtmp5  = x41 x51 x61 x71
-  // xtmp6  = x81 x91 xa1 xb1
-  // xtmp7  = xc1 xd1 xe1 xf1
-  dft16((int16_t*)(xtmp+4),(int16_t*)(ytmp+4));
-  dft16((int16_t*)(xtmp+8),(int16_t*)(ytmp+8));
-  dft16((int16_t*)(xtmp+12),(int16_t*)(ytmp+12));
-
-
-#ifdef D64STATS
-  stop_meas(&ts_d);
-  start_meas(&ts_b);
-#endif
-
-
-  bfly4_16(ytmp,ytmp+4,ytmp+8,ytmp+12,
-           y128,y128+4,y128+8,y128+12,
-           tw64a_128,tw64a_128+4,tw64a_128+8,
-           tw64b_128,tw64b_128+4,tw64b_128+8);
-
-  bfly4_16(ytmp+1,ytmp+5,ytmp+9,ytmp+13,
-           y128+1,y128+5,y128+9,y128+13,
-           tw64a_128+1,tw64a_128+5,tw64a_128+9,
-           tw64b_128+1,tw64b_128+5,tw64b_128+9);
-
-  bfly4_16(ytmp+2,ytmp+6,ytmp+10,ytmp+14,
-           y128+2,y128+6,y128+10,y128+14,
-           tw64a_128+2,tw64a_128+6,tw64a_128+10,
-           tw64b_128+2,tw64b_128+6,tw64b_128+10);
-
-  bfly4_16(ytmp+3,ytmp+7,ytmp+11,ytmp+15,
-           y128+3,y128+7,y128+11,y128+15,
-           tw64a_128+3,tw64a_128+7,tw64a_128+11,
-           tw64b_128+3,tw64b_128+7,tw64b_128+11);
-
-#ifdef D64STATS
-  stop_meas(&ts_b);
-  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
-#endif
-
-
-  if (scale>0) {
-    y128[0]  = shiftright_int16(y128[0],3);
-    y128[1]  = shiftright_int16(y128[1],3);
-    y128[2]  = shiftright_int16(y128[2],3);
-    y128[3]  = shiftright_int16(y128[3],3);
-    y128[4]  = shiftright_int16(y128[4],3);
-    y128[5]  = shiftright_int16(y128[5],3);
-    y128[6]  = shiftright_int16(y128[6],3);
-    y128[7]  = shiftright_int16(y128[7],3);
-    y128[8]  = shiftright_int16(y128[8],3);
-    y128[9]  = shiftright_int16(y128[9],3);
-    y128[10] = shiftright_int16(y128[10],3);
-    y128[11] = shiftright_int16(y128[11],3);
-    y128[12] = shiftright_int16(y128[12],3);
-    y128[13] = shiftright_int16(y128[13],3);
-    y128[14] = shiftright_int16(y128[14],3);
-    y128[15] = shiftright_int16(y128[15],3);
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
+#endif // defined(__x86_64__) || defined(__i386__)
 
-#else // __AVX2__
 void dft64(int16_t *x,int16_t *y,unsigned char scale)
 {
 
   simd256_q15_t xtmp[16],ytmp[16],*tw64a_256=(simd256_q15_t *)tw64a,*tw64b_256=(simd256_q15_t *)tw64b,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y;
   simd256_q15_t xintl0,xintl1,xintl2,xintl3,xintl4,xintl5,xintl6,xintl7;
-  simd256_q15_t const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
+  simd256_q15_t const perm_mask = simde_mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
 
 
 #ifdef D64STATS
@@ -2740,14 +2557,14 @@ void dft64(int16_t *x,int16_t *y,unsigned char scale)
   print_shorts256("x2566",(int16_t*)(x256+6));
   print_shorts256("x2567",(int16_t*)(x256+7));
   */
-  xintl0 = _mm256_permutevar8x32_epi32(x256[0],perm_mask);  // x0  x4  x1  x5  x2  x6  x3  x7
-  xintl1 = _mm256_permutevar8x32_epi32(x256[1],perm_mask);  // x8  x12 x9  x13 x10 x14 x11 x15
-  xintl2 = _mm256_permutevar8x32_epi32(x256[2],perm_mask);  // x16 x20 x17 x21 x18 x22 x19 x23
-  xintl3 = _mm256_permutevar8x32_epi32(x256[3],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
-  xintl4 = _mm256_permutevar8x32_epi32(x256[4],perm_mask);  // x32 x28 x25 x29 x26 x30 x27 x31
-  xintl5 = _mm256_permutevar8x32_epi32(x256[5],perm_mask);  // x40 x28 x25 x29 x26 x30 x27 x31
-  xintl6 = _mm256_permutevar8x32_epi32(x256[6],perm_mask);  // x48 x28 x25 x29 x26 x30 x27 x31
-  xintl7 = _mm256_permutevar8x32_epi32(x256[7],perm_mask);  // x56 x28 x25 x29 x26 x30 x27 x31
+  xintl0 = simde_mm256_permutevar8x32_epi32(x256[0],perm_mask);  // x0  x4  x1  x5  x2  x6  x3  x7
+  xintl1 = simde_mm256_permutevar8x32_epi32(x256[1],perm_mask);  // x8  x12 x9  x13 x10 x14 x11 x15
+  xintl2 = simde_mm256_permutevar8x32_epi32(x256[2],perm_mask);  // x16 x20 x17 x21 x18 x22 x19 x23
+  xintl3 = simde_mm256_permutevar8x32_epi32(x256[3],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl4 = simde_mm256_permutevar8x32_epi32(x256[4],perm_mask);  // x32 x28 x25 x29 x26 x30 x27 x31
+  xintl5 = simde_mm256_permutevar8x32_epi32(x256[5],perm_mask);  // x40 x28 x25 x29 x26 x30 x27 x31
+  xintl6 = simde_mm256_permutevar8x32_epi32(x256[6],perm_mask);  // x48 x28 x25 x29 x26 x30 x27 x31
+  xintl7 = simde_mm256_permutevar8x32_epi32(x256[7],perm_mask);  // x56 x28 x25 x29 x26 x30 x27 x31
   /*
   print_shorts256("xintl0",(int16_t*)&xintl0);
   print_shorts256("xintl1",(int16_t*)&xintl1);
@@ -2758,14 +2575,14 @@ void dft64(int16_t *x,int16_t *y,unsigned char scale)
   print_shorts256("xintl6",(int16_t*)&xintl6);
   print_shorts256("xintl7",(int16_t*)&xintl7);
   */
-  xtmp[0] = _mm256_unpacklo_epi64(xintl0,xintl1);        // x0  x4  x8  x12 x1  x5  x9  x13
-  xtmp[4] = _mm256_unpackhi_epi64(xintl0,xintl1);        // x2  x6  x10 x14 x3  x7  x11 x15
-  xtmp[1] = _mm256_unpacklo_epi64(xintl2,xintl3);        // x16 x20 x24 x28 x17 x21 x25 x29
-  xtmp[5] = _mm256_unpackhi_epi64(xintl2,xintl3);        // x18 x22 x26 x30 x19 x23 x27 x31
-  xtmp[2] = _mm256_unpacklo_epi64(xintl4,xintl5);        // x32 x36 x40 x44 x33 x37 x41 x45
-  xtmp[6] = _mm256_unpackhi_epi64(xintl4,xintl5);        // x34 x38 x42 x46 x35 x39 x43 x47
-  xtmp[3] = _mm256_unpacklo_epi64(xintl6,xintl7);        // x48 x52 x56 x60 x49 x53 x57 x61
-  xtmp[7] = _mm256_unpackhi_epi64(xintl6,xintl7);        // x50 x54 x58 x62 x51 x55 x59 x63
+  xtmp[0] = simde_mm256_unpacklo_epi64(xintl0,xintl1);        // x0  x4  x8  x12 x1  x5  x9  x13
+  xtmp[4] = simde_mm256_unpackhi_epi64(xintl0,xintl1);        // x2  x6  x10 x14 x3  x7  x11 x15
+  xtmp[1] = simde_mm256_unpacklo_epi64(xintl2,xintl3);        // x16 x20 x24 x28 x17 x21 x25 x29
+  xtmp[5] = simde_mm256_unpackhi_epi64(xintl2,xintl3);        // x18 x22 x26 x30 x19 x23 x27 x31
+  xtmp[2] = simde_mm256_unpacklo_epi64(xintl4,xintl5);        // x32 x36 x40 x44 x33 x37 x41 x45
+  xtmp[6] = simde_mm256_unpackhi_epi64(xintl4,xintl5);        // x34 x38 x42 x46 x35 x39 x43 x47
+  xtmp[3] = simde_mm256_unpacklo_epi64(xintl6,xintl7);        // x48 x52 x56 x60 x49 x53 x57 x61
+  xtmp[7] = simde_mm256_unpackhi_epi64(xintl6,xintl7);        // x50 x54 x58 x62 x51 x55 x59 x63
   /*
   print_shorts256("xtmp0",(int16_t*)xtmp);
   print_shorts256("xtmp1",(int16_t*)(xtmp+1));
@@ -2853,108 +2670,14 @@ void dft64(int16_t *x,int16_t *y,unsigned char scale)
   _m_empty();
 
 
-}
-#endif
-
-#ifndef __AVX2__
-void idft64(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[16],ytmp[16],*tw64a_128=(simd_q15_t *)tw64,*tw64b_128=(simd_q15_t *)tw64c,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y;
-
-
-#ifdef D64STATS
-  time_stats_t ts_t,ts_d,ts_b;
-
-  reset_meas(&ts_t);
-  reset_meas(&ts_d);
-  reset_meas(&ts_b);
-  start_meas(&ts_t);
-#endif
-
-
-  transpose16_ooff(x128,xtmp,4);
-  transpose16_ooff(x128+4,xtmp+1,4);
-  transpose16_ooff(x128+8,xtmp+2,4);
-  transpose16_ooff(x128+12,xtmp+3,4);
-
-
-#ifdef D64STATS
-  stop_meas(&ts_t);
-  start_meas(&ts_d);
-#endif
-
-
-  idft16((int16_t*)(xtmp),(int16_t*)ytmp);
-  idft16((int16_t*)(xtmp+4),(int16_t*)(ytmp+4));
-  idft16((int16_t*)(xtmp+8),(int16_t*)(ytmp+8));
-  idft16((int16_t*)(xtmp+12),(int16_t*)(ytmp+12));
-
-
-#ifdef D64STATS
-  stop_meas(&ts_d);
-  start_meas(&ts_b);
-#endif
-
-
-  ibfly4_16(ytmp,ytmp+4,ytmp+8,ytmp+12,
-            y128,y128+4,y128+8,y128+12,
-            tw64a_128,tw64a_128+4,tw64a_128+8,
-            tw64b_128,tw64b_128+4,tw64b_128+8);
-  ibfly4_16(ytmp+1,ytmp+5,ytmp+9,ytmp+13,
-            y128+1,y128+5,y128+9,y128+13,
-            tw64a_128+1,tw64a_128+5,tw64a_128+9,
-            tw64b_128+1,tw64b_128+5,tw64b_128+9);
-
-  ibfly4_16(ytmp+2,ytmp+6,ytmp+10,ytmp+14,
-            y128+2,y128+6,y128+10,y128+14,
-            tw64a_128+2,tw64a_128+6,tw64a_128+10,
-            tw64b_128+2,tw64b_128+6,tw64b_128+10);
-
-  ibfly4_16(ytmp+3,ytmp+7,ytmp+11,ytmp+15,
-            y128+3,y128+7,y128+11,y128+15,
-            tw64a_128+3,tw64a_128+7,tw64a_128+11,
-            tw64b_128+3,tw64b_128+7,tw64b_128+11);
-
-#ifdef D64STATS
-  stop_meas(&ts_b);
-  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
-#endif
-
-
-  if (scale>0) {
-
-    y128[0]  = shiftright_int16(y128[0],3);
-    y128[1]  = shiftright_int16(y128[1],3);
-    y128[2]  = shiftright_int16(y128[2],3);
-    y128[3]  = shiftright_int16(y128[3],3);
-    y128[4]  = shiftright_int16(y128[4],3);
-    y128[5]  = shiftright_int16(y128[5],3);
-    y128[6]  = shiftright_int16(y128[6],3);
-    y128[7]  = shiftright_int16(y128[7],3);
-    y128[8]  = shiftright_int16(y128[8],3);
-    y128[9]  = shiftright_int16(y128[9],3);
-    y128[10] = shiftright_int16(y128[10],3);
-    y128[11] = shiftright_int16(y128[11],3);
-    y128[12] = shiftright_int16(y128[12],3);
-    y128[13] = shiftright_int16(y128[13],3);
-    y128[14] = shiftright_int16(y128[14],3);
-    y128[15] = shiftright_int16(y128[15],3);
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
 }
 
-#else // __AVX2__
 void idft64(int16_t *x,int16_t *y,unsigned char scale)
 {
 
   simd256_q15_t xtmp[16],ytmp[16],*tw64a_256=(simd256_q15_t *)tw64,*tw64b_256=(simd256_q15_t *)tw64c,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y;
   register simd256_q15_t xintl0,xintl1,xintl2,xintl3,xintl4,xintl5,xintl6,xintl7;
-  simd256_q15_t const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
+  simd256_q15_t const perm_mask = simde_mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
 
 
 #ifdef D64STATS
@@ -2971,23 +2694,23 @@ void idft64(int16_t *x,int16_t *y,unsigned char scale)
   start_meas(&ts_d);
 #endif
 
-  xintl0 = _mm256_permutevar8x32_epi32(x256[0],perm_mask);  // x0  x4  x1  x5  x2  x6  x3  x7
-  xintl1 = _mm256_permutevar8x32_epi32(x256[1],perm_mask);  // x8  x12 x9  x13 x10 x14 x11 x15
-  xintl2 = _mm256_permutevar8x32_epi32(x256[2],perm_mask);  // x16 x20 x17 x21 x18 x22 x19 x23
-  xintl3 = _mm256_permutevar8x32_epi32(x256[3],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
-  xintl4 = _mm256_permutevar8x32_epi32(x256[4],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
-  xintl5 = _mm256_permutevar8x32_epi32(x256[5],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
-  xintl6 = _mm256_permutevar8x32_epi32(x256[6],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
-  xintl7 = _mm256_permutevar8x32_epi32(x256[7],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
-
-  xtmp[0] = _mm256_unpacklo_epi64(xintl0,xintl1);        // x0  x4  x8  x12 x1  x5  x9  x13
-  xtmp[4] = _mm256_unpackhi_epi64(xintl0,xintl1);        // x2  x6  x10 x14 x3  x7  x11 x15
-  xtmp[1] = _mm256_unpacklo_epi64(xintl2,xintl3);        // x16 x20 x24 x28 x17 x21 x25 x29
-  xtmp[5] = _mm256_unpackhi_epi64(xintl2,xintl3);        // x18 x22 x26 x30 x19 x23 x27 x31
-  xtmp[2] = _mm256_unpacklo_epi64(xintl4,xintl5);        // x32 x36 x40 x44 x33 x37 x41 x45
-  xtmp[6] = _mm256_unpackhi_epi64(xintl4,xintl5);        // x34 x38 x42 x46 x35 x39 x43 x47
-  xtmp[3] = _mm256_unpacklo_epi64(xintl6,xintl7);        // x48 x52 x56 x60 x49 x53 x57 x61
-  xtmp[7] = _mm256_unpackhi_epi64(xintl6,xintl7);        // x50 x54 x58 x62 x51 x55 x59 x63
+  xintl0 = simde_mm256_permutevar8x32_epi32(x256[0],perm_mask);  // x0  x4  x1  x5  x2  x6  x3  x7
+  xintl1 = simde_mm256_permutevar8x32_epi32(x256[1],perm_mask);  // x8  x12 x9  x13 x10 x14 x11 x15
+  xintl2 = simde_mm256_permutevar8x32_epi32(x256[2],perm_mask);  // x16 x20 x17 x21 x18 x22 x19 x23
+  xintl3 = simde_mm256_permutevar8x32_epi32(x256[3],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl4 = simde_mm256_permutevar8x32_epi32(x256[4],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl5 = simde_mm256_permutevar8x32_epi32(x256[5],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl6 = simde_mm256_permutevar8x32_epi32(x256[6],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl7 = simde_mm256_permutevar8x32_epi32(x256[7],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+
+  xtmp[0] = simde_mm256_unpacklo_epi64(xintl0,xintl1);        // x0  x4  x8  x12 x1  x5  x9  x13
+  xtmp[4] = simde_mm256_unpackhi_epi64(xintl0,xintl1);        // x2  x6  x10 x14 x3  x7  x11 x15
+  xtmp[1] = simde_mm256_unpacklo_epi64(xintl2,xintl3);        // x16 x20 x24 x28 x17 x21 x25 x29
+  xtmp[5] = simde_mm256_unpackhi_epi64(xintl2,xintl3);        // x18 x22 x26 x30 x19 x23 x27 x31
+  xtmp[2] = simde_mm256_unpacklo_epi64(xintl4,xintl5);        // x32 x36 x40 x44 x33 x37 x41 x45
+  xtmp[6] = simde_mm256_unpackhi_epi64(xintl4,xintl5);        // x34 x38 x42 x46 x35 x39 x43 x47
+  xtmp[3] = simde_mm256_unpacklo_epi64(xintl6,xintl7);        // x48 x52 x56 x60 x49 x53 x57 x61
+  xtmp[7] = simde_mm256_unpackhi_epi64(xintl6,xintl7);        // x50 x54 x58 x62 x51 x55 x59 x63
 
 
   idft16_simd256((int16_t*)(xtmp),(int16_t*)ytmp);
@@ -3048,7 +2771,6 @@ void idft64(int16_t *x,int16_t *y,unsigned char scale)
   _m_empty();
 
 }
-#endif
 
 int16_t tw128[128] __attribute__((aligned(32))) = {  32767,0,32727,-1608,32609,-3212,32412,-4808,32137,-6393,31785,-7962,31356,-9512,30851,-11039,30272,-12540,29621,-14010,28897,-15447,28105,-16846,27244,-18205,26318,-19520,25329,-20788,24278,-22005,23169,-23170,22004,-24279,20787,-25330,19519,-26319,18204,-27245,16845,-28106,15446,-28898,14009,-29622,12539,-30273,11038,-30852,9511,-31357,7961,-31786,6392,-32138,4807,-32413,3211,-32610,1607,-32728,0,-32767,-1608,-32728,-3212,-32610,-4808,-32413,-6393,-32138,-7962,-31786,-9512,-31357,-11039,-30852,-12540,-30273,-14010,-29622,-15447,-28898,-16846,-28106,-18205,-27245,-19520,-26319,-20788,-25330,-22005,-24279,-23170,-23170,-24279,-22005,-25330,-20788,-26319,-19520,-27245,-18205,-28106,-16846,-28898,-15447,-29622,-14010,-30273,-12540,-30852,-11039,-31357,-9512,-31786,-7962,-32138,-6393,-32413,-4808,-32610,-3212,-32728,-1608};
 
@@ -3058,118 +2780,6 @@ int16_t tw128b[128] __attribute__((aligned(32))) = {0,32767,-1608,32727,-3212,32
 
 int16_t tw128c[128] __attribute__((aligned(32))) = {0,32767,1608,32727,3212,32609,4808,32412,6393,32137,7962,31785,9512,31356,11039,30851,12540,30272,14010,29621,15447,28897,16846,28105,18205,27244,19520,26318,20788,25329,22005,24278,23170,23169,24279,22004,25330,20787,26319,19519,27245,18204,28106,16845,28898,15446,29622,14009,30273,12539,30852,11038,31357,9511,31786,7961,32138,6392,32413,4807,32610,3211,32728,1607,32767,0,32728,-1608,32610,-3212,32413,-4808,32138,-6393,31786,-7962,31357,-9512,30852,-11039,30273,-12540,29622,-14010,28898,-15447,28106,-16846,27245,-18205,26319,-19520,25330,-20788,24279,-22005,23170,-23170,22005,-24279,20788,-25330,19520,-26319,18205,-27245,16846,-28106,15447,-28898,14010,-29622,12540,-30273,11039,-30852,9512,-31357,7962,-31786,6393,-32138,4808,-32413,3212,-32610,1608,-32728};
 
-#ifndef __AVX2__
-void dft128(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[64],*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[32],*tw128a_128p=(simd_q15_t *)tw128a,*tw128b_128p=(simd_q15_t *)tw128b,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-
-
-  transpose4_ooff(x64  ,xtmp,32);
-  transpose4_ooff(x64+2,xtmp+1,32);
-  transpose4_ooff(x64+4,xtmp+2,32);
-  transpose4_ooff(x64+6,xtmp+3,32);
-  transpose4_ooff(x64+8,xtmp+4,32);
-  transpose4_ooff(x64+10,xtmp+5,32);
-  transpose4_ooff(x64+12,xtmp+6,32);
-  transpose4_ooff(x64+14,xtmp+7,32);
-  transpose4_ooff(x64+16,xtmp+8,32);
-  transpose4_ooff(x64+18,xtmp+9,32);
-  transpose4_ooff(x64+20,xtmp+10,32);
-  transpose4_ooff(x64+22,xtmp+11,32);
-  transpose4_ooff(x64+24,xtmp+12,32);
-  transpose4_ooff(x64+26,xtmp+13,32);
-  transpose4_ooff(x64+28,xtmp+14,32);
-  transpose4_ooff(x64+30,xtmp+15,32);
-  transpose4_ooff(x64+32,xtmp+16,32);
-  transpose4_ooff(x64+34,xtmp+17,32);
-  transpose4_ooff(x64+36,xtmp+18,32);
-  transpose4_ooff(x64+38,xtmp+19,32);
-  transpose4_ooff(x64+40,xtmp+20,32);
-  transpose4_ooff(x64+42,xtmp+21,32);
-  transpose4_ooff(x64+44,xtmp+22,32);
-  transpose4_ooff(x64+46,xtmp+23,32);
-  transpose4_ooff(x64+48,xtmp+24,32);
-  transpose4_ooff(x64+50,xtmp+25,32);
-  transpose4_ooff(x64+52,xtmp+26,32);
-  transpose4_ooff(x64+54,xtmp+27,32);
-  transpose4_ooff(x64+56,xtmp+28,32);
-  transpose4_ooff(x64+58,xtmp+29,32);
-  transpose4_ooff(x64+60,xtmp+30,32);
-  transpose4_ooff(x64+62,xtmp+31,32);
-
-  dft64((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  dft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+16),1);
-#ifndef MR_MAIN
-  if (LOG_DUMPFLAG(DEBUG_DFT)) {
-    LOG_M("dft128a.m","dfta",ytmp,64,1,1);
-    LOG_M("dft128b.m","dftb",ytmp+16,64,1,1);
-  }
-#endif
-  for (i=0; i<16; i++) {
-    bfly2_16(ytmpp,ytmpp+16,
-             y128p,y128p+16,
-             tw128a_128p,
-             tw128b_128p);
-    tw128a_128p++;
-    tw128b_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    y128[0] = mulhi_int16(y128[0],ONE_OVER_SQRT2_Q15_128);
-    y128[1] = mulhi_int16(y128[1],ONE_OVER_SQRT2_Q15_128);
-    y128[2] = mulhi_int16(y128[2],ONE_OVER_SQRT2_Q15_128);
-    y128[3] = mulhi_int16(y128[3],ONE_OVER_SQRT2_Q15_128);
-    y128[4] = mulhi_int16(y128[4],ONE_OVER_SQRT2_Q15_128);
-    y128[5] = mulhi_int16(y128[5],ONE_OVER_SQRT2_Q15_128);
-    y128[6] = mulhi_int16(y128[6],ONE_OVER_SQRT2_Q15_128);
-    y128[7] = mulhi_int16(y128[7],ONE_OVER_SQRT2_Q15_128);
-    y128[8] = mulhi_int16(y128[8],ONE_OVER_SQRT2_Q15_128);
-    y128[9] = mulhi_int16(y128[9],ONE_OVER_SQRT2_Q15_128);
-    y128[10] = mulhi_int16(y128[10],ONE_OVER_SQRT2_Q15_128);
-    y128[11] = mulhi_int16(y128[11],ONE_OVER_SQRT2_Q15_128);
-    y128[12] = mulhi_int16(y128[12],ONE_OVER_SQRT2_Q15_128);
-    y128[13] = mulhi_int16(y128[13],ONE_OVER_SQRT2_Q15_128);
-    y128[14] = mulhi_int16(y128[14],ONE_OVER_SQRT2_Q15_128);
-    y128[15] = mulhi_int16(y128[15],ONE_OVER_SQRT2_Q15_128);
-    y128[16] = mulhi_int16(y128[16],ONE_OVER_SQRT2_Q15_128);
-    y128[17] = mulhi_int16(y128[17],ONE_OVER_SQRT2_Q15_128);
-    y128[18] = mulhi_int16(y128[18],ONE_OVER_SQRT2_Q15_128);
-    y128[19] = mulhi_int16(y128[19],ONE_OVER_SQRT2_Q15_128);
-    y128[20] = mulhi_int16(y128[20],ONE_OVER_SQRT2_Q15_128);
-    y128[21] = mulhi_int16(y128[21],ONE_OVER_SQRT2_Q15_128);
-    y128[22] = mulhi_int16(y128[22],ONE_OVER_SQRT2_Q15_128);
-    y128[23] = mulhi_int16(y128[23],ONE_OVER_SQRT2_Q15_128);
-    y128[24] = mulhi_int16(y128[24],ONE_OVER_SQRT2_Q15_128);
-    y128[25] = mulhi_int16(y128[25],ONE_OVER_SQRT2_Q15_128);
-    y128[26] = mulhi_int16(y128[26],ONE_OVER_SQRT2_Q15_128);
-    y128[27] = mulhi_int16(y128[27],ONE_OVER_SQRT2_Q15_128);
-    y128[28] = mulhi_int16(y128[28],ONE_OVER_SQRT2_Q15_128);
-    y128[29] = mulhi_int16(y128[29],ONE_OVER_SQRT2_Q15_128);
-    y128[30] = mulhi_int16(y128[30],ONE_OVER_SQRT2_Q15_128);
-    y128[31] = mulhi_int16(y128[31],ONE_OVER_SQRT2_Q15_128);
-
-
-  }
-#ifndef MR_MAIN
-  if (LOG_DUMPFLAG(DEBUG_DFT)) {
-     LOG_M("dft128out.m","dft128",y,128,1,1);
-     exit(-1);
-  }
-#endif
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else // __AVX2__
 void dft128(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -3241,108 +2851,6 @@ void dft128(int16_t *x,int16_t *y,unsigned char scale)
 #endif
 }
 
-#endif
-
-#ifndef __AVX2__
-void idft128(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[64],*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[32],*tw128_128p=(simd_q15_t *)tw128,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-
-
-  transpose4_ooff(x64  ,xtmp,32);
-  transpose4_ooff(x64+2,xtmp+1,32);
-  transpose4_ooff(x64+4,xtmp+2,32);
-  transpose4_ooff(x64+6,xtmp+3,32);
-  transpose4_ooff(x64+8,xtmp+4,32);
-  transpose4_ooff(x64+10,xtmp+5,32);
-  transpose4_ooff(x64+12,xtmp+6,32);
-  transpose4_ooff(x64+14,xtmp+7,32);
-  transpose4_ooff(x64+16,xtmp+8,32);
-  transpose4_ooff(x64+18,xtmp+9,32);
-  transpose4_ooff(x64+20,xtmp+10,32);
-  transpose4_ooff(x64+22,xtmp+11,32);
-  transpose4_ooff(x64+24,xtmp+12,32);
-  transpose4_ooff(x64+26,xtmp+13,32);
-  transpose4_ooff(x64+28,xtmp+14,32);
-  transpose4_ooff(x64+30,xtmp+15,32);
-  transpose4_ooff(x64+32,xtmp+16,32);
-  transpose4_ooff(x64+34,xtmp+17,32);
-  transpose4_ooff(x64+36,xtmp+18,32);
-  transpose4_ooff(x64+38,xtmp+19,32);
-  transpose4_ooff(x64+40,xtmp+20,32);
-  transpose4_ooff(x64+42,xtmp+21,32);
-  transpose4_ooff(x64+44,xtmp+22,32);
-  transpose4_ooff(x64+46,xtmp+23,32);
-  transpose4_ooff(x64+48,xtmp+24,32);
-  transpose4_ooff(x64+50,xtmp+25,32);
-  transpose4_ooff(x64+52,xtmp+26,32);
-  transpose4_ooff(x64+54,xtmp+27,32);
-  transpose4_ooff(x64+56,xtmp+28,32);
-  transpose4_ooff(x64+58,xtmp+29,32);
-  transpose4_ooff(x64+60,xtmp+30,32);
-  transpose4_ooff(x64+62,xtmp+31,32);
-
-  idft64((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  idft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+16),1);
-
-
-  for (i=0; i<16; i++) {
-    ibfly2(ytmpp,ytmpp+16,
-           y128p,y128p+16,
-           tw128_128p);
-    tw128_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    y128[0]  = mulhi_int16(y128[0],ONE_OVER_SQRT2_Q15_128);
-    y128[1]  = mulhi_int16(y128[1],ONE_OVER_SQRT2_Q15_128);
-    y128[2]  = mulhi_int16(y128[2],ONE_OVER_SQRT2_Q15_128);
-    y128[3]  = mulhi_int16(y128[3],ONE_OVER_SQRT2_Q15_128);
-    y128[4]  = mulhi_int16(y128[4],ONE_OVER_SQRT2_Q15_128);
-    y128[5]  = mulhi_int16(y128[5],ONE_OVER_SQRT2_Q15_128);
-    y128[6]  = mulhi_int16(y128[6],ONE_OVER_SQRT2_Q15_128);
-    y128[7]  = mulhi_int16(y128[7],ONE_OVER_SQRT2_Q15_128);
-    y128[8]  = mulhi_int16(y128[8],ONE_OVER_SQRT2_Q15_128);
-    y128[9]  = mulhi_int16(y128[9],ONE_OVER_SQRT2_Q15_128);
-    y128[10] = mulhi_int16(y128[10],ONE_OVER_SQRT2_Q15_128);
-    y128[11] = mulhi_int16(y128[11],ONE_OVER_SQRT2_Q15_128);
-    y128[12] = mulhi_int16(y128[12],ONE_OVER_SQRT2_Q15_128);
-    y128[13] = mulhi_int16(y128[13],ONE_OVER_SQRT2_Q15_128);
-    y128[14] = mulhi_int16(y128[14],ONE_OVER_SQRT2_Q15_128);
-    y128[15] = mulhi_int16(y128[15],ONE_OVER_SQRT2_Q15_128);
-    y128[16] = mulhi_int16(y128[16],ONE_OVER_SQRT2_Q15_128);
-    y128[17] = mulhi_int16(y128[17],ONE_OVER_SQRT2_Q15_128);
-    y128[18] = mulhi_int16(y128[18],ONE_OVER_SQRT2_Q15_128);
-    y128[19] = mulhi_int16(y128[19],ONE_OVER_SQRT2_Q15_128);
-    y128[20] = mulhi_int16(y128[20],ONE_OVER_SQRT2_Q15_128);
-    y128[21] = mulhi_int16(y128[21],ONE_OVER_SQRT2_Q15_128);
-    y128[22] = mulhi_int16(y128[22],ONE_OVER_SQRT2_Q15_128);
-    y128[23] = mulhi_int16(y128[23],ONE_OVER_SQRT2_Q15_128);
-    y128[24] = mulhi_int16(y128[24],ONE_OVER_SQRT2_Q15_128);
-    y128[25] = mulhi_int16(y128[25],ONE_OVER_SQRT2_Q15_128);
-    y128[26] = mulhi_int16(y128[26],ONE_OVER_SQRT2_Q15_128);
-    y128[27] = mulhi_int16(y128[27],ONE_OVER_SQRT2_Q15_128);
-    y128[28] = mulhi_int16(y128[28],ONE_OVER_SQRT2_Q15_128);
-    y128[29] = mulhi_int16(y128[29],ONE_OVER_SQRT2_Q15_128);
-    y128[30] = mulhi_int16(y128[30],ONE_OVER_SQRT2_Q15_128);
-    y128[31] = mulhi_int16(y128[31],ONE_OVER_SQRT2_Q15_128);
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else // __AVX2__
 void idft128(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -3399,8 +2907,6 @@ void idft128(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
-#endif
-
 int16_t tw256[384] __attribute__((aligned(32))) = {  32767,0,32757,-805,32727,-1608,32678,-2411,32609,-3212,32520,-4012,32412,-4808,32284,-5602,32137,-6393,31970,-7180,31785,-7962,31580,-8740,31356,-9512,31113,-10279,30851,-11039,30571,-11793,30272,-12540,29955,-13279,29621,-14010,29268,-14733,28897,-15447,28510,-16151,28105,-16846,27683,-17531,27244,-18205,26789,-18868,26318,-19520,25831,-20160,25329,-20788,24811,-21403,24278,-22005,23731,-22595,23169,-23170,22594,-23732,22004,-24279,21402,-24812,20787,-25330,20159,-25832,19519,-26319,18867,-26790,18204,-27245,17530,-27684,16845,-28106,16150,-28511,15446,-28898,14732,-29269,14009,-29622,13278,-29956,12539,-30273,11792,-30572,11038,-30852,10278,-31114,9511,-31357,8739,-31581,7961,-31786,7179,-31971,6392,-32138,5601,-32285,4807,-32413,4011,-32521,3211,-32610,2410,-32679,1607,-32728,804,-32758,
                                                      32767,0,32727,-1608,32609,-3212,32412,-4808,32137,-6393,31785,-7962,31356,-9512,30851,-11039,30272,-12540,29621,-14010,28897,-15447,28105,-16846,27244,-18205,26318,-19520,25329,-20788,24278,-22005,23169,-23170,22004,-24279,20787,-25330,19519,-26319,18204,-27245,16845,-28106,15446,-28898,14009,-29622,12539,-30273,11038,-30852,9511,-31357,7961,-31786,6392,-32138,4807,-32413,3211,-32610,1607,-32728,0,-32767,-1608,-32728,-3212,-32610,-4808,-32413,-6393,-32138,-7962,-31786,-9512,-31357,-11039,-30852,-12540,-30273,-14010,-29622,-15447,-28898,-16846,-28106,-18205,-27245,-19520,-26319,-20788,-25330,-22005,-24279,-23170,-23170,-24279,-22005,-25330,-20788,-26319,-19520,-27245,-18205,-28106,-16846,-28898,-15447,-29622,-14010,-30273,-12540,-30852,-11039,-31357,-9512,-31786,-7962,-32138,-6393,-32413,-4808,-32610,-3212,-32728,-1608,
                                                      32767,0,32678,-2411,32412,-4808,31970,-7180,31356,-9512,30571,-11793,29621,-14010,28510,-16151,27244,-18205,25831,-20160,24278,-22005,22594,-23732,20787,-25330,18867,-26790,16845,-28106,14732,-29269,12539,-30273,10278,-31114,7961,-31786,5601,-32285,3211,-32610,804,-32758,-1608,-32728,-4012,-32521,-6393,-32138,-8740,-31581,-11039,-30852,-13279,-29956,-15447,-28898,-17531,-27684,-19520,-26319,-21403,-24812,-23170,-23170,-24812,-21403,-26319,-19520,-27684,-17531,-28898,-15447,-29956,-13279,-30852,-11039,-31581,-8740,-32138,-6393,-32521,-4012,-32728,-1608,-32758,804,-32610,3211,-32285,5601,-31786,7961,-31114,10278,-30273,12539,-29269,14732,-28106,16845,-26790,18867,-25330,20787,-23732,22594,-22005,24278,-20160,25831,-18205,27244,-16151,28510,-14010,29621,-11793,30571,-9512,31356,-7180,31970,-4808,32412,-2411,32678
@@ -3415,192 +2921,21 @@ int16_t tw256b[384] __attribute__((aligned(32))) = {0,32767,-805,32757,-1608,327
                                                     0,32767,-1608,32727,-3212,32609,-4808,32412,-6393,32137,-7962,31785,-9512,31356,-11039,30851,-12540,30272,-14010,29621,-15447,28897,-16846,28105,-18205,27244,-19520,26318,-20788,25329,-22005,24278,-23170,23169,-24279,22004,-25330,20787,-26319,19519,-27245,18204,-28106,16845,-28898,15446,-29622,14009,-30273,12539,-30852,11038,-31357,9511,-31786,7961,-32138,6392,-32413,4807,-32610,3211,-32728,1607,-32767,0,-32728,-1608,-32610,-3212,-32413,-4808,-32138,-6393,-31786,-7962,-31357,-9512,-30852,-11039,-30273,-12540,-29622,-14010,-28898,-15447,-28106,-16846,-27245,-18205,-26319,-19520,-25330,-20788,-24279,-22005,-23170,-23170,-22005,-24279,-20788,-25330,-19520,-26319,-18205,-27245,-16846,-28106,-15447,-28898,-14010,-29622,-12540,-30273,-11039,-30852,-9512,-31357,-7962,-31786,-6393,-32138,-4808,-32413,-3212,-32610,-1608,-32728,
                                                     0,32767,-2411,32678,-4808,32412,-7180,31970,-9512,31356,-11793,30571,-14010,29621,-16151,28510,-18205,27244,-20160,25831,-22005,24278,-23732,22594,-25330,20787,-26790,18867,-28106,16845,-29269,14732,-30273,12539,-31114,10278,-31786,7961,-32285,5601,-32610,3211,-32758,804,-32728,-1608,-32521,-4012,-32138,-6393,-31581,-8740,-30852,-11039,-29956,-13279,-28898,-15447,-27684,-17531,-26319,-19520,-24812,-21403,-23170,-23170,-21403,-24812,-19520,-26319,-17531,-27684,-15447,-28898,-13279,-29956,-11039,-30852,-8740,-31581,-6393,-32138,-4012,-32521,-1608,-32728,804,-32758,3211,-32610,5601,-32285,7961,-31786,10278,-31114,12539,-30273,14732,-29269,16845,-28106,18867,-26790,20787,-25330,22594,-23732,24278,-22005,25831,-20160,27244,-18205,28510,-16151,29621,-14010,30571,-11793,31356,-9512,31970,-7180,32412,-4808,32678,-2411
                                                    };
-#ifndef __AVX2__
 void dft256(int16_t *x,int16_t *y,unsigned char scale)
 {
 
-  simd_q15_t xtmp[64],ytmp[64],*tw256a_128p=(simd_q15_t *)tw256a,*tw256b_128p=(simd_q15_t *)tw256b,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
+  simd256_q15_t xtmp[32],ytmp[32],*tw256a_256p=(simd256_q15_t *)tw256a,*tw256b_256p=(simd256_q15_t *)tw256b,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
   int i;
 
-#ifdef D256STATS
-  time_stats_t ts_t,ts_d,ts_b;
-
-  reset_meas(&ts_t);
-  reset_meas(&ts_d);
-  reset_meas(&ts_b);
-  start_meas(&ts_t);
-#endif
-  /*
-  for (i=0,j=0;i<64;i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,16);
-  }
-  */
-  transpose16_ooff(x128+0,xtmp+0,16);
-  transpose16_ooff(x128+4,xtmp+1,16);
-  transpose16_ooff(x128+8,xtmp+2,16);
-  transpose16_ooff(x128+12,xtmp+3,16);
-  transpose16_ooff(x128+16,xtmp+4,16);
-  transpose16_ooff(x128+20,xtmp+5,16);
-  transpose16_ooff(x128+24,xtmp+6,16);
-  transpose16_ooff(x128+28,xtmp+7,16);
-  transpose16_ooff(x128+32,xtmp+8,16);
-  transpose16_ooff(x128+36,xtmp+9,16);
-  transpose16_ooff(x128+40,xtmp+10,16);
-  transpose16_ooff(x128+44,xtmp+11,16);
-  transpose16_ooff(x128+48,xtmp+12,16);
-  transpose16_ooff(x128+52,xtmp+13,16);
-  transpose16_ooff(x128+56,xtmp+14,16);
-  transpose16_ooff(x128+60,xtmp+15,16);
-
-#ifdef D256STATS
-  stop_meas(&ts_t);
-  start_meas(&ts_d);
-#endif
-
-  dft64((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  dft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1);
-  dft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
-  dft64((int16_t*)(xtmp+48),(int16_t*)(ytmp+48),1);
-
-#ifdef D256STATS
-  stop_meas(&ts_d);
-  start_meas(&ts_b);
-#endif
-
-  for (i=0; i<16; i+=4) {
-    bfly4_16(ytmpp,ytmpp+16,ytmpp+32,ytmpp+48,
-             y128p,y128p+16,y128p+32,y128p+48,
-             tw256a_128p,tw256a_128p+16,tw256a_128p+32,
-             tw256b_128p,tw256b_128p+16,tw256b_128p+32);
-    bfly4_16(ytmpp+1,ytmpp+17,ytmpp+33,ytmpp+49,
-             y128p+1,y128p+17,y128p+33,y128p+49,
-             tw256a_128p+1,tw256a_128p+17,tw256a_128p+33,
-             tw256b_128p+1,tw256b_128p+17,tw256b_128p+33);
-    bfly4_16(ytmpp+2,ytmpp+18,ytmpp+34,ytmpp+50,
-             y128p+2,y128p+18,y128p+34,y128p+50,
-             tw256a_128p+2,tw256a_128p+18,tw256a_128p+34,
-             tw256b_128p+2,tw256b_128p+18,tw256b_128p+34);
-    bfly4_16(ytmpp+3,ytmpp+19,ytmpp+35,ytmpp+51,
-             y128p+3,y128p+19,y128p+35,y128p+51,
-             tw256a_128p+3,tw256a_128p+19,tw256a_128p+35,
-             tw256b_128p+3,tw256b_128p+19,tw256b_128p+35);
-    tw256a_128p+=4;
-    tw256b_128p+=4;
-    y128p+=4;
-    ytmpp+=4;
-  }
-
-#ifdef D256STATS
-  stop_meas(&ts_b);
-  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
-#endif
-
-  if (scale>0) {
-
-    for (i=0; i<4; i++) {
-      y128[0]  = shiftright_int16(y128[0],1);
-      y128[1]  = shiftright_int16(y128[1],1);
-      y128[2]  = shiftright_int16(y128[2],1);
-      y128[3]  = shiftright_int16(y128[3],1);
-      y128[4]  = shiftright_int16(y128[4],1);
-      y128[5]  = shiftright_int16(y128[5],1);
-      y128[6]  = shiftright_int16(y128[6],1);
-      y128[7]  = shiftright_int16(y128[7],1);
-      y128[8]  = shiftright_int16(y128[8],1);
-      y128[9]  = shiftright_int16(y128[9],1);
-      y128[10] = shiftright_int16(y128[10],1);
-      y128[11] = shiftright_int16(y128[11],1);
-      y128[12] = shiftright_int16(y128[12],1);
-      y128[13] = shiftright_int16(y128[13],1);
-      y128[14] = shiftright_int16(y128[14],1);
-      y128[15] = shiftright_int16(y128[15],1);
-
-      y128+=16;
-    }
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-
-
-void idft256(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[64],ytmp[64],*tw256_128p=(simd_q15_t *)tw256,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i,j;
-
-  for (i=0,j=0; i<64; i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,16);
-  }
-
-
-  idft64((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  idft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1);
-  idft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
-  idft64((int16_t*)(xtmp+48),(int16_t*)(ytmp+48),1);
-
-  for (i=0; i<16; i++) {
-    ibfly4(ytmpp,ytmpp+16,ytmpp+32,ytmpp+48,
-           y128p,y128p+16,y128p+32,y128p+48,
-           tw256_128p,tw256_128p+16,tw256_128p+32);
-    tw256_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    for (i=0; i<4; i++) {
-      y128[0]  = shiftright_int16(y128[0],1);
-      y128[1]  = shiftright_int16(y128[1],1);
-      y128[2]  = shiftright_int16(y128[2],1);
-      y128[3]  = shiftright_int16(y128[3],1);
-      y128[4]  = shiftright_int16(y128[4],1);
-      y128[5]  = shiftright_int16(y128[5],1);
-      y128[6]  = shiftright_int16(y128[6],1);
-      y128[7]  = shiftright_int16(y128[7],1);
-      y128[8]  = shiftright_int16(y128[8],1);
-      y128[9]  = shiftright_int16(y128[9],1);
-      y128[10] = shiftright_int16(y128[10],1);
-      y128[11] = shiftright_int16(y128[11],1);
-      y128[12] = shiftright_int16(y128[12],1);
-      y128[13] = shiftright_int16(y128[13],1);
-      y128[14] = shiftright_int16(y128[14],1);
-      y128[15] = shiftright_int16(y128[15],1);
-
-      y128+=16;
-    }
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else //__AVX2__
-
-void dft256(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd256_q15_t xtmp[32],ytmp[32],*tw256a_256p=(simd256_q15_t *)tw256a,*tw256b_256p=(simd256_q15_t *)tw256b,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
-  simd256_q15_t *ytmpp = &ytmp[0];
-  int i;
-
-  transpose16_ooff_simd256(x256+0,xtmp+0,8);
-  transpose16_ooff_simd256(x256+4,xtmp+1,8);
-  transpose16_ooff_simd256(x256+8,xtmp+2,8);
-  transpose16_ooff_simd256(x256+12,xtmp+3,8);
-  transpose16_ooff_simd256(x256+16,xtmp+4,8);
-  transpose16_ooff_simd256(x256+20,xtmp+5,8);
-  transpose16_ooff_simd256(x256+24,xtmp+6,8);
-  transpose16_ooff_simd256(x256+28,xtmp+7,8);
+  transpose16_ooff_simd256(x256+0,xtmp+0,8);
+  transpose16_ooff_simd256(x256+4,xtmp+1,8);
+  transpose16_ooff_simd256(x256+8,xtmp+2,8);
+  transpose16_ooff_simd256(x256+12,xtmp+3,8);
+  transpose16_ooff_simd256(x256+16,xtmp+4,8);
+  transpose16_ooff_simd256(x256+20,xtmp+5,8);
+  transpose16_ooff_simd256(x256+24,xtmp+6,8);
+  transpose16_ooff_simd256(x256+28,xtmp+7,8);
   /*
   char vname[10];
   for (i=0;i<32;i++) {
@@ -3763,7 +3098,6 @@ void idft256(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
-#endif
 int16_t tw512[512] __attribute__((aligned(32))) = {
   32767,0,32764,-403,32757,-805,32744,-1207,32727,-1608,32705,-2010,32678,-2411,32646,-2812,32609,-3212,32567,-3612,32520,-4012,32468,-4410,32412,-4808,32350,-5206,32284,-5602,32213,-5998,32137,-6393,32056,-6787,31970,-7180,31880,-7572,31785,-7962,31684,-8352,31580,-8740,31470,-9127,31356,-9512,31236,-9896,31113,-10279,30984,-10660,30851,-11039,30713,-11417,30571,-11793,30424,-12167,30272,-12540,30116,-12910,29955,-13279,29790,-13646,29621,-14010,29446,-14373,29268,-14733,29085,-15091,28897,-15447,28706,-15800,28510,-16151,28309,-16500,28105,-16846,27896,-17190,27683,-17531,27466,-17869,27244,-18205,27019,-18538,26789,-18868,26556,-19195,26318,-19520,26077,-19841,25831,-20160,25582,-20475,25329,-20788,25072,-21097,24811,-21403,24546,-21706,24278,-22005,24006,-22302,23731,-22595,23452,-22884,23169,-23170,22883,-23453,22594,-23732,22301,-24007,22004,-24279,21705,-24547,21402,-24812,21096,-25073,20787,-25330,20474,-25583,20159,-25832,19840,-26078,19519,-26319,19194,-26557,18867,-26790,18537,-27020,18204,-27245,17868,-27467,17530,-27684,17189,-27897,16845,-28106,16499,-28310,16150,-28511,15799,-28707,15446,-28898,15090,-29086,14732,-29269,14372,-29447,14009,-29622,13645,-29791,13278,-29956,12909,-30117,12539,-30273,12166,-30425,11792,-30572,11416,-30714,11038,-30852,10659,-30985,10278,-31114,9895,-31237,9511,-31357,9126,-31471,8739,-31581,8351,-31685,7961,-31786,7571,-31881,7179,-31971,6786,-32057,6392,-32138,5997,-32214,5601,-32285,5205,-32351,4807,-32413,4409,-32469,4011,-32521,3611,-32568,3211,-32610,2811,-32647,2410,-32679,2009,-32706,1607,-32728,1206,-32745,804,-32758,402,-32765,0,-32767,-403,-32765,-805,-32758,-1207,-32745,-1608,-32728,-2010,-32706,-2411,-32679,-2812,-32647,-3212,-32610,-3612,-32568,-4012,-32521,-4410,-32469,-4808,-32413,-5206,-32351,-5602,-32285,-5998,-32214,-6393,-32138,-6787,-32057,-7180,-31971,-7572,-31881,-7962,-31786,-8352,-31685,-8740,-31581,-9127,-31471,-9512,-31357,-9896,-31237,-10279,-31114,-10660,-30985,-11039,-30852,-11417,-30714,-11793,-30572,-12167,-30425,-12540,-30273,-12910,-30117,-13279,-29956,-13646,-29791,-14010,-29622,-14373,-29447,-14733,-29269,-15091,-29086,-15447,-28898,-15800,-28707,-16151,-28511,-16500,-28310,-16846,-28106,-17190,-27897,-17531,-27684,-17869,-27467,-18205,-27245,-18538,-27020,-18868,-26790,-19195,-26557,-19520,-26319,-19841,-26078,-20160,-25832,-20475,-25583,-20788,-25330,-21097,-25073,-21403,-24812,-21706,-24547,-22005,-24279,-22302,-24007,-22595,-23732,-22884,-23453,-23170,-23170,-23453,-22884,-23732,-22595,-24007,-22302,-24279,-22005,-24547,-21706,-24812,-21403,-25073,-21097,-25330,-20788,-25583,-20475,-25832,-20160,-26078,-19841,-26319,-19520,-26557,-19195,-26790,-18868,-27020,-18538,-27245,-18205,-27467,-17869,-27684,-17531,-27897,-17190,-28106,-16846,-28310,-16500,-28511,-16151,-28707,-15800,-28898,-15447,-29086,-15091,-29269,-14733,-29447,-14373,-29622,-14010,-29791,-13646,-29956,-13279,-30117,-12910,-30273,-12540,-30425,-12167,-30572,-11793,-30714,-11417,-30852,-11039,-30985,-10660,-31114,-10279,-31237,-9896,-31357,-9512,-31471,-9127,-31581,-8740,-31685,-8352,-31786,-7962,-31881,-7572,-31971,-7180,-32057,-6787,-32138,-6393,-32214,-5998,-32285,-5602,-32351,-5206,-32413,-4808,-32469,-4410,-32521,-4012,-32568,-3612,-32610,-3212,-32647,-2812,-32679,-2411,-32706,-2010,-32728,-1608,-32745,-1207,-32758,-805,-32765,-403
 };
@@ -3782,219 +3116,6 @@ int16_t tw512c[512] __attribute__((aligned(32))) = {
   0,32767,403,32764,805,32757,1207,32744,1608,32727,2010,32705,2411,32678,2812,32646,3212,32609,3612,32567,4012,32520,4410,32468,4808,32412,5206,32350,5602,32284,5998,32213,6393,32137,6787,32056,7180,31970,7572,31880,7962,31785,8352,31684,8740,31580,9127,31470,9512,31356,9896,31236,10279,31113,10660,30984,11039,30851,11417,30713,11793,30571,12167,30424,12540,30272,12910,30116,13279,29955,13646,29790,14010,29621,14373,29446,14733,29268,15091,29085,15447,28897,15800,28706,16151,28510,16500,28309,16846,28105,17190,27896,17531,27683,17869,27466,18205,27244,18538,27019,18868,26789,19195,26556,19520,26318,19841,26077,20160,25831,20475,25582,20788,25329,21097,25072,21403,24811,21706,24546,22005,24278,22302,24006,22595,23731,22884,23452,23170,23169,23453,22883,23732,22594,24007,22301,24279,22004,24547,21705,24812,21402,25073,21096,25330,20787,25583,20474,25832,20159,26078,19840,26319,19519,26557,19194,26790,18867,27020,18537,27245,18204,27467,17868,27684,17530,27897,17189,28106,16845,28310,16499,28511,16150,28707,15799,28898,15446,29086,15090,29269,14732,29447,14372,29622,14009,29791,13645,29956,13278,30117,12909,30273,12539,30425,12166,30572,11792,30714,11416,30852,11038,30985,10659,31114,10278,31237,9895,31357,9511,31471,9126,31581,8739,31685,8351,31786,7961,31881,7571,31971,7179,32057,6786,32138,6392,32214,5997,32285,5601,32351,5205,32413,4807,32469,4409,32521,4011,32568,3611,32610,3211,32647,2811,32679,2410,32706,2009,32728,1607,32745,1206,32758,804,32765,402,32767,0,32765,-403,32758,-805,32745,-1207,32728,-1608,32706,-2010,32679,-2411,32647,-2812,32610,-3212,32568,-3612,32521,-4012,32469,-4410,32413,-4808,32351,-5206,32285,-5602,32214,-5998,32138,-6393,32057,-6787,31971,-7180,31881,-7572,31786,-7962,31685,-8352,31581,-8740,31471,-9127,31357,-9512,31237,-9896,31114,-10279,30985,-10660,30852,-11039,30714,-11417,30572,-11793,30425,-12167,30273,-12540,30117,-12910,29956,-13279,29791,-13646,29622,-14010,29447,-14373,29269,-14733,29086,-15091,28898,-15447,28707,-15800,28511,-16151,28310,-16500,28106,-16846,27897,-17190,27684,-17531,27467,-17869,27245,-18205,27020,-18538,26790,-18868,26557,-19195,26319,-19520,26078,-19841,25832,-20160,25583,-20475,25330,-20788,25073,-21097,24812,-21403,24547,-21706,24279,-22005,24007,-22302,23732,-22595,23453,-22884,23170,-23170,22884,-23453,22595,-23732,22302,-24007,22005,-24279,21706,-24547,21403,-24812,21097,-25073,20788,-25330,20475,-25583,20160,-25832,19841,-26078,19520,-26319,19195,-26557,18868,-26790,18538,-27020,18205,-27245,17869,-27467,17531,-27684,17190,-27897,16846,-28106,16500,-28310,16151,-28511,15800,-28707,15447,-28898,15091,-29086,14733,-29269,14373,-29447,14010,-29622,13646,-29791,13279,-29956,12910,-30117,12540,-30273,12167,-30425,11793,-30572,11417,-30714,11039,-30852,10660,-30985,10279,-31114,9896,-31237,9512,-31357,9127,-31471,8740,-31581,8352,-31685,7962,-31786,7572,-31881,7180,-31971,6787,-32057,6393,-32138,5998,-32214,5602,-32285,5206,-32351,4808,-32413,4410,-32469,4012,-32521,3612,-32568,3212,-32610,2812,-32647,2411,-32679,2010,-32706,1608,-32728,1207,-32745,805,-32758,403,-32765
 };
 
-#ifndef __AVX2__
-void dft512(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[256],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[128],*tw512a_128p=(simd_q15_t *)tw512a,*tw512b_128p=(simd_q15_t *)tw512b,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-
-  xtmpp = xtmp;
-
-  for (i=0; i<4; i++) {
-    transpose4_ooff(x64  ,xtmpp,128);
-    transpose4_ooff(x64+2,xtmpp+1,128);
-    transpose4_ooff(x64+4,xtmpp+2,128);
-    transpose4_ooff(x64+6,xtmpp+3,128);
-    transpose4_ooff(x64+8,xtmpp+4,128);
-    transpose4_ooff(x64+10,xtmpp+5,128);
-    transpose4_ooff(x64+12,xtmpp+6,128);
-    transpose4_ooff(x64+14,xtmpp+7,128);
-    transpose4_ooff(x64+16,xtmpp+8,128);
-    transpose4_ooff(x64+18,xtmpp+9,128);
-    transpose4_ooff(x64+20,xtmpp+10,128);
-    transpose4_ooff(x64+22,xtmpp+11,128);
-    transpose4_ooff(x64+24,xtmpp+12,128);
-    transpose4_ooff(x64+26,xtmpp+13,128);
-    transpose4_ooff(x64+28,xtmpp+14,128);
-    transpose4_ooff(x64+30,xtmpp+15,128);
-    transpose4_ooff(x64+32,xtmpp+16,128);
-    transpose4_ooff(x64+34,xtmpp+17,128);
-    transpose4_ooff(x64+36,xtmpp+18,128);
-    transpose4_ooff(x64+38,xtmpp+19,128);
-    transpose4_ooff(x64+40,xtmpp+20,128);
-    transpose4_ooff(x64+42,xtmpp+21,128);
-    transpose4_ooff(x64+44,xtmpp+22,128);
-    transpose4_ooff(x64+46,xtmpp+23,128);
-    transpose4_ooff(x64+48,xtmpp+24,128);
-    transpose4_ooff(x64+50,xtmpp+25,128);
-    transpose4_ooff(x64+52,xtmpp+26,128);
-    transpose4_ooff(x64+54,xtmpp+27,128);
-    transpose4_ooff(x64+56,xtmpp+28,128);
-    transpose4_ooff(x64+58,xtmpp+29,128);
-    transpose4_ooff(x64+60,xtmpp+30,128);
-    transpose4_ooff(x64+62,xtmpp+31,128);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  dft256((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  dft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+64),1);
-
-
-  for (i=0; i<64; i+=8) {
-    bfly2_16(ytmpp,ytmpp+64,
-             y128p,y128p+64,
-             tw512a_128p,
-             tw512b_128p);
-    bfly2_16(ytmpp+1,ytmpp+65,
-             y128p+1,y128p+65,
-             tw512a_128p+1,
-             tw512b_128p+1);
-    bfly2_16(ytmpp+2,ytmpp+66,
-             y128p+2,y128p+66,
-             tw512a_128p+2,
-             tw512b_128p+2);
-    bfly2_16(ytmpp+3,ytmpp+67,
-             y128p+3,y128p+67,
-             tw512a_128p+3,
-             tw512b_128p+3);
-    bfly2_16(ytmpp+4,ytmpp+68,
-             y128p+4,y128p+68,
-             tw512a_128p+4,
-             tw512b_128p+4);
-    bfly2_16(ytmpp+5,ytmpp+69,
-             y128p+5,y128p+69,
-             tw512a_128p+5,
-             tw512b_128p+5);
-    bfly2_16(ytmpp+6,ytmpp+70,
-             y128p+6,y128p+70,
-             tw512a_128p+6,
-             tw512b_128p+6);
-    bfly2_16(ytmpp+7,ytmpp+71,
-             y128p+7,y128p+71,
-             tw512a_128p+7,
-             tw512b_128p+7);
-    tw512a_128p+=8;
-    tw512b_128p+=8;
-    y128p+=8;
-    ytmpp+=8;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<8; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-void idft512(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[256],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[128],*tw512_128p=(simd_q15_t *)tw512,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-
-  xtmpp = xtmp;
-
-  for (i=0; i<4; i++) {
-    transpose4_ooff(x64  ,xtmpp,128);
-    transpose4_ooff(x64+2,xtmpp+1,128);
-    transpose4_ooff(x64+4,xtmpp+2,128);
-    transpose4_ooff(x64+6,xtmpp+3,128);
-    transpose4_ooff(x64+8,xtmpp+4,128);
-    transpose4_ooff(x64+10,xtmpp+5,128);
-    transpose4_ooff(x64+12,xtmpp+6,128);
-    transpose4_ooff(x64+14,xtmpp+7,128);
-    transpose4_ooff(x64+16,xtmpp+8,128);
-    transpose4_ooff(x64+18,xtmpp+9,128);
-    transpose4_ooff(x64+20,xtmpp+10,128);
-    transpose4_ooff(x64+22,xtmpp+11,128);
-    transpose4_ooff(x64+24,xtmpp+12,128);
-    transpose4_ooff(x64+26,xtmpp+13,128);
-    transpose4_ooff(x64+28,xtmpp+14,128);
-    transpose4_ooff(x64+30,xtmpp+15,128);
-    transpose4_ooff(x64+32,xtmpp+16,128);
-    transpose4_ooff(x64+34,xtmpp+17,128);
-    transpose4_ooff(x64+36,xtmpp+18,128);
-    transpose4_ooff(x64+38,xtmpp+19,128);
-    transpose4_ooff(x64+40,xtmpp+20,128);
-    transpose4_ooff(x64+42,xtmpp+21,128);
-    transpose4_ooff(x64+44,xtmpp+22,128);
-    transpose4_ooff(x64+46,xtmpp+23,128);
-    transpose4_ooff(x64+48,xtmpp+24,128);
-    transpose4_ooff(x64+50,xtmpp+25,128);
-    transpose4_ooff(x64+52,xtmpp+26,128);
-    transpose4_ooff(x64+54,xtmpp+27,128);
-    transpose4_ooff(x64+56,xtmpp+28,128);
-    transpose4_ooff(x64+58,xtmpp+29,128);
-    transpose4_ooff(x64+60,xtmpp+30,128);
-    transpose4_ooff(x64+62,xtmpp+31,128);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  idft256((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  idft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+64),1);
-
-
-  for (i=0; i<64; i++) {
-    ibfly2(ytmpp,ytmpp+64,
-           y128p,y128p+64,
-           tw512_128p);
-    tw512_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<8; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else //__AVX2__
-
 void dft512(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -4159,124 +3280,8 @@ void idft512(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
-#endif
-
 int16_t tw1024[1536] __attribute__((aligned(32)));
 
-#ifndef __AVX2__
-void dft1024(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[256],ytmp[256],*tw1024_128p=(simd_q15_t *)tw1024,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i,j;
-
-  for (i=0,j=0; i<256; i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,64);
-  }
-
-
-  dft256((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  dft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1);
-  dft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
-  dft256((int16_t*)(xtmp+192),(int16_t*)(ytmp+192),1);
-
-  for (i=0; i<64; i++) {
-    bfly4(ytmpp,ytmpp+64,ytmpp+128,ytmpp+192,
-          y128p,y128p+64,y128p+128,y128p+192,
-          tw1024_128p,tw1024_128p+64,tw1024_128p+128);
-    tw1024_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    for (i=0; i<16; i++) {
-      y128[0]  = shiftright_int16(y128[0],1);
-      y128[1]  = shiftright_int16(y128[1],1);
-      y128[2]  = shiftright_int16(y128[2],1);
-      y128[3]  = shiftright_int16(y128[3],1);
-      y128[4]  = shiftright_int16(y128[4],1);
-      y128[5]  = shiftright_int16(y128[5],1);
-      y128[6]  = shiftright_int16(y128[6],1);
-      y128[7]  = shiftright_int16(y128[7],1);
-      y128[8]  = shiftright_int16(y128[8],1);
-      y128[9]  = shiftright_int16(y128[9],1);
-      y128[10] = shiftright_int16(y128[10],1);
-      y128[11] = shiftright_int16(y128[11],1);
-      y128[12] = shiftright_int16(y128[12],1);
-      y128[13] = shiftright_int16(y128[13],1);
-      y128[14] = shiftright_int16(y128[14],1);
-      y128[15] = shiftright_int16(y128[15],1);
-
-      y128+=16;
-    }
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-void idft1024(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[256],ytmp[256],*tw1024_128p=(simd_q15_t *)tw1024,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i,j;
-
-  for (i=0,j=0; i<256; i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,64);
-  }
-
-
-  idft256((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  idft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1);
-  idft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
-  idft256((int16_t*)(xtmp+192),(int16_t*)(ytmp+192),1);
-
-  for (i=0; i<64; i++) {
-    ibfly4(ytmpp,ytmpp+64,ytmpp+128,ytmpp+192,
-           y128p,y128p+64,y128p+128,y128p+192,
-           tw1024_128p,tw1024_128p+64,tw1024_128p+128);
-    tw1024_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    for (i=0; i<16; i++) {
-      y128[0]  = shiftright_int16(y128[0],1);
-      y128[1]  = shiftright_int16(y128[1],1);
-      y128[2]  = shiftright_int16(y128[2],1);
-      y128[3]  = shiftright_int16(y128[3],1);
-      y128[4]  = shiftright_int16(y128[4],1);
-      y128[5]  = shiftright_int16(y128[5],1);
-      y128[6]  = shiftright_int16(y128[6],1);
-      y128[7]  = shiftright_int16(y128[7],1);
-      y128[8]  = shiftright_int16(y128[8],1);
-      y128[9]  = shiftright_int16(y128[9],1);
-      y128[10] = shiftright_int16(y128[10],1);
-      y128[11] = shiftright_int16(y128[11],1);
-      y128[12] = shiftright_int16(y128[12],1);
-      y128[13] = shiftright_int16(y128[13],1);
-      y128[14] = shiftright_int16(y128[14],1);
-      y128[15] = shiftright_int16(y128[15],1);
-
-      y128+=16;
-    }
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else //__AVX2__
 void dft1024(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -4388,194 +3393,9 @@ void idft1024(int16_t *x,int16_t *y,unsigned char scale)
   _m_empty();
 
 }
-#endif
 
 int16_t tw2048[2048] __attribute__((aligned(32)));
 
-#ifndef __AVX2__
-void dft2048(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[1024],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[512],*tw2048_128p=(simd_q15_t *)tw2048,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-
-  xtmpp = xtmp;
-
-  for (i=0; i<16; i++) {
-    transpose4_ooff(x64  ,xtmpp,512);
-    transpose4_ooff(x64+2,xtmpp+1,512);
-    transpose4_ooff(x64+4,xtmpp+2,512);
-    transpose4_ooff(x64+6,xtmpp+3,512);
-    transpose4_ooff(x64+8,xtmpp+4,512);
-    transpose4_ooff(x64+10,xtmpp+5,512);
-    transpose4_ooff(x64+12,xtmpp+6,512);
-    transpose4_ooff(x64+14,xtmpp+7,512);
-    transpose4_ooff(x64+16,xtmpp+8,512);
-    transpose4_ooff(x64+18,xtmpp+9,512);
-    transpose4_ooff(x64+20,xtmpp+10,512);
-    transpose4_ooff(x64+22,xtmpp+11,512);
-    transpose4_ooff(x64+24,xtmpp+12,512);
-    transpose4_ooff(x64+26,xtmpp+13,512);
-    transpose4_ooff(x64+28,xtmpp+14,512);
-    transpose4_ooff(x64+30,xtmpp+15,512);
-    transpose4_ooff(x64+32,xtmpp+16,512);
-    transpose4_ooff(x64+34,xtmpp+17,512);
-    transpose4_ooff(x64+36,xtmpp+18,512);
-    transpose4_ooff(x64+38,xtmpp+19,512);
-    transpose4_ooff(x64+40,xtmpp+20,512);
-    transpose4_ooff(x64+42,xtmpp+21,512);
-    transpose4_ooff(x64+44,xtmpp+22,512);
-    transpose4_ooff(x64+46,xtmpp+23,512);
-    transpose4_ooff(x64+48,xtmpp+24,512);
-    transpose4_ooff(x64+50,xtmpp+25,512);
-    transpose4_ooff(x64+52,xtmpp+26,512);
-    transpose4_ooff(x64+54,xtmpp+27,512);
-    transpose4_ooff(x64+56,xtmpp+28,512);
-    transpose4_ooff(x64+58,xtmpp+29,512);
-    transpose4_ooff(x64+60,xtmpp+30,512);
-    transpose4_ooff(x64+62,xtmpp+31,512);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  dft1024((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  dft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+256),1);
-
-
-  for (i=0; i<256; i++) {
-    bfly2(ytmpp,ytmpp+256,
-          y128p,y128p+256,
-          tw2048_128p);
-    tw2048_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<32; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-
-}
-
-void idft2048(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[1024],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[512],*tw2048_128p=(simd_q15_t *)tw2048,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-
-  xtmpp = xtmp;
-
-  for (i=0; i<16; i++) {
-    transpose4_ooff(x64  ,xtmpp,512);
-    transpose4_ooff(x64+2,xtmpp+1,512);
-    transpose4_ooff(x64+4,xtmpp+2,512);
-    transpose4_ooff(x64+6,xtmpp+3,512);
-    transpose4_ooff(x64+8,xtmpp+4,512);
-    transpose4_ooff(x64+10,xtmpp+5,512);
-    transpose4_ooff(x64+12,xtmpp+6,512);
-    transpose4_ooff(x64+14,xtmpp+7,512);
-    transpose4_ooff(x64+16,xtmpp+8,512);
-    transpose4_ooff(x64+18,xtmpp+9,512);
-    transpose4_ooff(x64+20,xtmpp+10,512);
-    transpose4_ooff(x64+22,xtmpp+11,512);
-    transpose4_ooff(x64+24,xtmpp+12,512);
-    transpose4_ooff(x64+26,xtmpp+13,512);
-    transpose4_ooff(x64+28,xtmpp+14,512);
-    transpose4_ooff(x64+30,xtmpp+15,512);
-    transpose4_ooff(x64+32,xtmpp+16,512);
-    transpose4_ooff(x64+34,xtmpp+17,512);
-    transpose4_ooff(x64+36,xtmpp+18,512);
-    transpose4_ooff(x64+38,xtmpp+19,512);
-    transpose4_ooff(x64+40,xtmpp+20,512);
-    transpose4_ooff(x64+42,xtmpp+21,512);
-    transpose4_ooff(x64+44,xtmpp+22,512);
-    transpose4_ooff(x64+46,xtmpp+23,512);
-    transpose4_ooff(x64+48,xtmpp+24,512);
-    transpose4_ooff(x64+50,xtmpp+25,512);
-    transpose4_ooff(x64+52,xtmpp+26,512);
-    transpose4_ooff(x64+54,xtmpp+27,512);
-    transpose4_ooff(x64+56,xtmpp+28,512);
-    transpose4_ooff(x64+58,xtmpp+29,512);
-    transpose4_ooff(x64+60,xtmpp+30,512);
-    transpose4_ooff(x64+62,xtmpp+31,512);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  idft1024((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  idft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+256),1);
-
-
-  for (i=0; i<256; i++) {
-    ibfly2(ytmpp,ytmpp+256,
-           y128p,y128p+256,
-           tw2048_128p);
-    tw2048_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<32; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else // __AVX2__
-
 void dft2048(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -4757,128 +3577,8 @@ void idft2048(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
-#endif
-
-
-
 int16_t tw4096[3*2*1024];
 
-#ifndef __AVX2__
-void dft4096(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[1024],ytmp[1024],*tw4096_128p=(simd_q15_t *)tw4096,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i,j;
-
-  for (i=0,j=0; i<1024; i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,256);
-  }
-
-
-  dft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  dft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1);
-  dft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
-  dft1024((int16_t*)(xtmp+768),(int16_t*)(ytmp+768),1);
-
-  for (i=0; i<256; i++) {
-    bfly4(ytmpp,ytmpp+256,ytmpp+512,ytmpp+768,
-          y128p,y128p+256,y128p+512,y128p+768,
-          tw4096_128p,tw4096_128p+256,tw4096_128p+512);
-    tw4096_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    for (i=0; i<64; i++) {
-      y128[0]  = shiftright_int16(y128[0],1);
-      y128[1]  = shiftright_int16(y128[1],1);
-      y128[2]  = shiftright_int16(y128[2],1);
-      y128[3]  = shiftright_int16(y128[3],1);
-      y128[4]  = shiftright_int16(y128[4],1);
-      y128[5]  = shiftright_int16(y128[5],1);
-      y128[6]  = shiftright_int16(y128[6],1);
-      y128[7]  = shiftright_int16(y128[7],1);
-      y128[8]  = shiftright_int16(y128[8],1);
-      y128[9]  = shiftright_int16(y128[9],1);
-      y128[10] = shiftright_int16(y128[10],1);
-      y128[11] = shiftright_int16(y128[11],1);
-      y128[12] = shiftright_int16(y128[12],1);
-      y128[13] = shiftright_int16(y128[13],1);
-      y128[14] = shiftright_int16(y128[14],1);
-      y128[15] = shiftright_int16(y128[15],1);
-
-      y128+=16;
-    }
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
- 
-
-void idft4096(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[1024],ytmp[1024],*tw4096_128p=(simd_q15_t *)tw4096,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i,j;
-
-  for (i=0,j=0; i<1024; i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,256);
-  }
-
-
-  idft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  idft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1);
-  idft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
-  idft1024((int16_t*)(xtmp+768),(int16_t*)(ytmp+768),1);
-
-  for (i=0; i<256; i++) {
-    ibfly4(ytmpp,ytmpp+256,ytmpp+512,ytmpp+768,
-           y128p,y128p+256,y128p+512,y128p+768,
-           tw4096_128p,tw4096_128p+256,tw4096_128p+512);
-    tw4096_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    for (i=0; i<64; i++) {
-      y128[0]  = shiftright_int16(y128[0],scale);
-      y128[1]  = shiftright_int16(y128[1],scale);
-      y128[2]  = shiftright_int16(y128[2],scale);
-      y128[3]  = shiftright_int16(y128[3],scale);
-      y128[4]  = shiftright_int16(y128[4],scale);
-      y128[5]  = shiftright_int16(y128[5],scale);
-      y128[6]  = shiftright_int16(y128[6],scale);
-      y128[7]  = shiftright_int16(y128[7],scale);
-      y128[8]  = shiftright_int16(y128[8],scale);
-      y128[9]  = shiftright_int16(y128[9],scale);
-      y128[10] = shiftright_int16(y128[10],scale);
-      y128[11] = shiftright_int16(y128[11],scale);
-      y128[12] = shiftright_int16(y128[12],scale);
-      y128[13] = shiftright_int16(y128[13],scale);
-      y128[14] = shiftright_int16(y128[14],scale);
-      y128[15] = shiftright_int16(y128[15],scale);
-
-      y128+=16;
-    }
- 
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else //__AVX2__
 void dft4096(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -4991,193 +3691,8 @@ void idft4096(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
-#endif //__AVX2__
-
-
 int16_t tw8192[2*4096] __attribute__((aligned(32)));
 
-#ifndef __AVX2__
-void dft8192(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[4096],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[1024],*tw8192_128p=(simd_q15_t *)tw8192,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-  
-  xtmpp = xtmp;
-
-  for (i=0; i<64; i++) {
-    transpose4_ooff(x64  ,xtmpp,2048);
-    transpose4_ooff(x64+2,xtmpp+1,2048);
-    transpose4_ooff(x64+4,xtmpp+2,2048);
-    transpose4_ooff(x64+6,xtmpp+3,2048);
-    transpose4_ooff(x64+8,xtmpp+4,2048);
-    transpose4_ooff(x64+10,xtmpp+5,2048);
-    transpose4_ooff(x64+12,xtmpp+6,2048);
-    transpose4_ooff(x64+14,xtmpp+7,2048);
-    transpose4_ooff(x64+16,xtmpp+8,2048);
-    transpose4_ooff(x64+18,xtmpp+9,2048);
-    transpose4_ooff(x64+20,xtmpp+10,2048);
-    transpose4_ooff(x64+22,xtmpp+11,2048);
-    transpose4_ooff(x64+24,xtmpp+12,2048);
-    transpose4_ooff(x64+26,xtmpp+13,2048);
-    transpose4_ooff(x64+28,xtmpp+14,2048);
-    transpose4_ooff(x64+30,xtmpp+15,2048);
-    transpose4_ooff(x64+32,xtmpp+16,2048);
-    transpose4_ooff(x64+34,xtmpp+17,2048);
-    transpose4_ooff(x64+36,xtmpp+18,2048);
-    transpose4_ooff(x64+38,xtmpp+19,2048);
-    transpose4_ooff(x64+40,xtmpp+20,2048);
-    transpose4_ooff(x64+42,xtmpp+21,2048);
-    transpose4_ooff(x64+44,xtmpp+22,2048);
-    transpose4_ooff(x64+46,xtmpp+23,2048);
-    transpose4_ooff(x64+48,xtmpp+24,2048);
-    transpose4_ooff(x64+50,xtmpp+25,2048);
-    transpose4_ooff(x64+52,xtmpp+26,2048);
-    transpose4_ooff(x64+54,xtmpp+27,2048);
-    transpose4_ooff(x64+56,xtmpp+28,2048);
-    transpose4_ooff(x64+58,xtmpp+29,2048);
-    transpose4_ooff(x64+60,xtmpp+30,2048);
-    transpose4_ooff(x64+62,xtmpp+31,2048);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  dft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  dft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+1024),1);
-
-
-  for (i=0; i<1024; i++) {
-    bfly2(ytmpp,ytmpp+1024,
-          y128p,y128p+1024,
-          tw8192_128p);
-    tw8192_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<128; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-void idft8192(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[4096],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[2048],*tw8192_128p=(simd_q15_t *)tw8192,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-  
-  xtmpp = xtmp;
-
-  for (i=0; i<64; i++) {
-    transpose4_ooff(x64  ,xtmpp,2048);
-    transpose4_ooff(x64+2,xtmpp+1,2048);
-    transpose4_ooff(x64+4,xtmpp+2,2048);
-    transpose4_ooff(x64+6,xtmpp+3,2048);
-    transpose4_ooff(x64+8,xtmpp+4,2048);
-    transpose4_ooff(x64+10,xtmpp+5,2048);
-    transpose4_ooff(x64+12,xtmpp+6,2048);
-    transpose4_ooff(x64+14,xtmpp+7,2048);
-    transpose4_ooff(x64+16,xtmpp+8,2048);
-    transpose4_ooff(x64+18,xtmpp+9,2048);
-    transpose4_ooff(x64+20,xtmpp+10,2048);
-    transpose4_ooff(x64+22,xtmpp+11,2048);
-    transpose4_ooff(x64+24,xtmpp+12,2048);
-    transpose4_ooff(x64+26,xtmpp+13,2048);
-    transpose4_ooff(x64+28,xtmpp+14,2048);
-    transpose4_ooff(x64+30,xtmpp+15,2048);
-    transpose4_ooff(x64+32,xtmpp+16,2048);
-    transpose4_ooff(x64+34,xtmpp+17,2048);
-    transpose4_ooff(x64+36,xtmpp+18,2048);
-    transpose4_ooff(x64+38,xtmpp+19,2048);
-    transpose4_ooff(x64+40,xtmpp+20,2048);
-    transpose4_ooff(x64+42,xtmpp+21,2048);
-    transpose4_ooff(x64+44,xtmpp+22,2048);
-    transpose4_ooff(x64+46,xtmpp+23,2048);
-    transpose4_ooff(x64+48,xtmpp+24,2048);
-    transpose4_ooff(x64+50,xtmpp+25,2048);
-    transpose4_ooff(x64+52,xtmpp+26,2048);
-    transpose4_ooff(x64+54,xtmpp+27,2048);
-    transpose4_ooff(x64+56,xtmpp+28,2048);
-    transpose4_ooff(x64+58,xtmpp+29,2048);
-    transpose4_ooff(x64+60,xtmpp+30,2048);
-    transpose4_ooff(x64+62,xtmpp+31,2048);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  idft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  idft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+1024),1);
-
-
-  for (i=0; i<1024; i++) {
-    ibfly2(ytmpp,ytmpp+1024,
-           y128p,y128p+1024,
-           tw8192_128p);
-    tw8192_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<128; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else // __AVX2__
 void dft8192(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -5317,161 +3832,41 @@ void idft8192(int16_t *x,int16_t *y,unsigned char scale)
     xtmpp+=32;
   }
 
-  idft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  idft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
-
-
-  for (i=0; i<512; i++) {
-    ibfly2_256(ytmpp,ytmpp+512,
-	       y256p,y256p+512,
-	       tw8192_256p);
-    tw8192_256p++;
-    y256p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y256p = y256;
-
-    for (i=0; i<64; i++) {
-      y256p[0]  = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
-      y256p[1]  = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
-      y256p[2]  = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
-      y256p[3]  = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
-      y256p[4]  = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
-      y256p[5]  = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
-      y256p[6]  = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
-      y256p[7]  = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
-      y256p[8]  = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
-      y256p[9]  = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
-      y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
-      y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
-      y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
-      y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
-      y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
-      y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
-      y256p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-
-#endif
-
-int16_t tw16384[3*2*4096];
-
-#ifndef __AVX2__
-void dft16384(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[4096],ytmp[4096],*tw16384_128p=(simd_q15_t *)tw16384,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i,j;
-
-  for (i=0,j=0; i<4096; i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,1024);
-  }
-
-
-  dft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  dft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
-  dft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
-  dft4096((int16_t*)(xtmp+3072),(int16_t*)(ytmp+3072),1);
-
-  for (i=0; i<1024; i++) {
-    bfly4(ytmpp,ytmpp+1024,ytmpp+2048,ytmpp+3072,
-          y128p,y128p+1024,y128p+2048,y128p+3072,
-          tw16384_128p,tw16384_128p+1024,tw16384_128p+2048);
-    tw16384_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-
-    for (i=0; i<256; i++) {
-      y128[0]  = shiftright_int16(y128[0],1);
-      y128[1]  = shiftright_int16(y128[1],1);
-      y128[2]  = shiftright_int16(y128[2],1);
-      y128[3]  = shiftright_int16(y128[3],1);
-      y128[4]  = shiftright_int16(y128[4],1);
-      y128[5]  = shiftright_int16(y128[5],1);
-      y128[6]  = shiftright_int16(y128[6],1);
-      y128[7]  = shiftright_int16(y128[7],1);
-      y128[8]  = shiftright_int16(y128[8],1);
-      y128[9]  = shiftright_int16(y128[9],1);
-      y128[10] = shiftright_int16(y128[10],1);
-      y128[11] = shiftright_int16(y128[11],1);
-      y128[12] = shiftright_int16(y128[12],1);
-      y128[13] = shiftright_int16(y128[13],1);
-      y128[14] = shiftright_int16(y128[14],1);
-      y128[15] = shiftright_int16(y128[15],1);
-
-      y128+=16;
-    }
-
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
- 
-
-void idft16384(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simd_q15_t xtmp[4096],ytmp[4096],*tw16384_128p=(simd_q15_t *)tw16384,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i,j;
-
-  for (i=0,j=0; i<4096; i+=4,j++) {
-    transpose16_ooff(x128+i,xtmp+j,1024);
-  }
+  idft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
 
 
-  idft4096((int16_t*)(xtmp),(int16_t*)(ytmp),1);
-  idft4096((int16_t*)(xtmp+1024),(int16_t*)(ytmp+1024),1);
-  idft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
-  idft4096((int16_t*)(xtmp+3072),(int16_t*)(ytmp+3072),1);
-
-  for (i=0; i<1024; i++) {
-    ibfly4(ytmpp,ytmpp+1024,ytmpp+2048,ytmpp+3072,
-           y128p,y128p+1024,y128p+2048,y128p+3072,
-           tw16384_128p,tw16384_128p+1024,tw16384_128p+2048);
-    tw16384_128p++;
-    y128p++;
+  for (i=0; i<512; i++) {
+    ibfly2_256(ytmpp,ytmpp+512,
+	       y256p,y256p+512,
+	       tw8192_256p);
+    tw8192_256p++;
+    y256p++;
     ytmpp++;
   }
 
   if (scale>0) {
+    y256p = y256;
 
-    for (i=0; i<256; i++) {
-      y128[0]  = shiftright_int16(y128[0],scale);
-      y128[1]  = shiftright_int16(y128[1],scale);
-      y128[2]  = shiftright_int16(y128[2],scale);
-      y128[3]  = shiftright_int16(y128[3],scale);
-      y128[4]  = shiftright_int16(y128[4],scale);
-      y128[5]  = shiftright_int16(y128[5],scale);
-      y128[6]  = shiftright_int16(y128[6],scale);
-      y128[7]  = shiftright_int16(y128[7],scale);
-      y128[8]  = shiftright_int16(y128[8],scale);
-      y128[9]  = shiftright_int16(y128[9],scale);
-      y128[10] = shiftright_int16(y128[10],scale);
-      y128[11] = shiftright_int16(y128[11],scale);
-      y128[12] = shiftright_int16(y128[12],scale);
-      y128[13] = shiftright_int16(y128[13],scale);
-      y128[14] = shiftright_int16(y128[14],scale);
-      y128[15] = shiftright_int16(y128[15],scale);
-
-      y128+=16;
+    for (i=0; i<64; i++) {
+      y256p[0]  = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
+      y256p[1]  = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
+      y256p[2]  = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
+      y256p[3]  = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
+      y256p[4]  = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
+      y256p[5]  = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
+      y256p[6]  = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
+      y256p[7]  = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
+      y256p[8]  = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
+      y256p[9]  = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
+      y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
+      y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
+      y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
+      y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
+      y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
+      y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
+      y256p+=16;
     }
- 
   }
 
   _mm_empty();
@@ -5479,7 +3874,8 @@ void idft16384(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
-#else //__AVX2__
+int16_t tw16384[3*2*4096] __attribute__((aligned(32)));
+
 void dft16384(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -5592,192 +3988,8 @@ void idft16384(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
-#endif //__AVX2__
-
 int16_t tw32768[2*16384] __attribute__((aligned(32)));
 
-#ifndef __AVX2__
-void dft32768(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[16384],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[8192],*tw32768_128p=(simd_q15_t *)tw32768,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-
-  xtmpp = xtmp;
-
-  for (i=0; i<256; i++) {
-    transpose4_ooff(x64  ,xtmpp,8192);
-    transpose4_ooff(x64+2,xtmpp+1,8192);
-    transpose4_ooff(x64+4,xtmpp+2,8192);
-    transpose4_ooff(x64+6,xtmpp+3,8192);
-    transpose4_ooff(x64+8,xtmpp+4,8192);
-    transpose4_ooff(x64+10,xtmpp+5,8192);
-    transpose4_ooff(x64+12,xtmpp+6,8192);
-    transpose4_ooff(x64+14,xtmpp+7,8192);
-    transpose4_ooff(x64+16,xtmpp+8,8192);
-    transpose4_ooff(x64+18,xtmpp+9,8192);
-    transpose4_ooff(x64+20,xtmpp+10,8192);
-    transpose4_ooff(x64+22,xtmpp+11,8192);
-    transpose4_ooff(x64+24,xtmpp+12,8192);
-    transpose4_ooff(x64+26,xtmpp+13,8192);
-    transpose4_ooff(x64+28,xtmpp+14,8192);
-    transpose4_ooff(x64+30,xtmpp+15,8192);
-    transpose4_ooff(x64+32,xtmpp+16,8192);
-    transpose4_ooff(x64+34,xtmpp+17,8192);
-    transpose4_ooff(x64+36,xtmpp+18,8192);
-    transpose4_ooff(x64+38,xtmpp+19,8192);
-    transpose4_ooff(x64+40,xtmpp+20,8192);
-    transpose4_ooff(x64+42,xtmpp+21,8192);
-    transpose4_ooff(x64+44,xtmpp+22,8192);
-    transpose4_ooff(x64+46,xtmpp+23,8192);
-    transpose4_ooff(x64+48,xtmpp+24,8192);
-    transpose4_ooff(x64+50,xtmpp+25,8192);
-    transpose4_ooff(x64+52,xtmpp+26,8192);
-    transpose4_ooff(x64+54,xtmpp+27,8192);
-    transpose4_ooff(x64+56,xtmpp+28,8192);
-    transpose4_ooff(x64+58,xtmpp+29,8192);
-    transpose4_ooff(x64+60,xtmpp+30,8192);
-    transpose4_ooff(x64+62,xtmpp+31,8192);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  dft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  dft16384((int16_t*)(xtmp+8192),(int16_t*)(ytmp+4096),1);
-
-
-  for (i=0; i<4096; i++) {
-    bfly2(ytmpp,ytmpp+4096,
-          y128p,y128p+4096,
-          tw32768_128p);
-    tw32768_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<512; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-void idft32768(int16_t *x,int16_t *y,unsigned char scale)
-{
-
-  simdshort_q15_t xtmp[16384],*xtmpp,*x64 = (simdshort_q15_t *)x;
-  simd_q15_t ytmp[8192],*tw32768_128p=(simd_q15_t *)tw32768,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
-  simd_q15_t *ytmpp = &ytmp[0];
-  int i;
-  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
-  
-  xtmpp = xtmp;
-
-  for (i=0; i<256; i++) {
-    transpose4_ooff(x64  ,xtmpp,8192);
-    transpose4_ooff(x64+2,xtmpp+1,8192);
-    transpose4_ooff(x64+4,xtmpp+2,8192);
-    transpose4_ooff(x64+6,xtmpp+3,8192);
-    transpose4_ooff(x64+8,xtmpp+4,8192);
-    transpose4_ooff(x64+10,xtmpp+5,8192);
-    transpose4_ooff(x64+12,xtmpp+6,8192);
-    transpose4_ooff(x64+14,xtmpp+7,8192);
-    transpose4_ooff(x64+16,xtmpp+8,8192);
-    transpose4_ooff(x64+18,xtmpp+9,8192);
-    transpose4_ooff(x64+20,xtmpp+10,8192);
-    transpose4_ooff(x64+22,xtmpp+11,8192);
-    transpose4_ooff(x64+24,xtmpp+12,8192);
-    transpose4_ooff(x64+26,xtmpp+13,8192);
-    transpose4_ooff(x64+28,xtmpp+14,8192);
-    transpose4_ooff(x64+30,xtmpp+15,8192);
-    transpose4_ooff(x64+32,xtmpp+16,8192);
-    transpose4_ooff(x64+34,xtmpp+17,8192);
-    transpose4_ooff(x64+36,xtmpp+18,8192);
-    transpose4_ooff(x64+38,xtmpp+19,8192);
-    transpose4_ooff(x64+40,xtmpp+20,8192);
-    transpose4_ooff(x64+42,xtmpp+21,8192);
-    transpose4_ooff(x64+44,xtmpp+22,8192);
-    transpose4_ooff(x64+46,xtmpp+23,8192);
-    transpose4_ooff(x64+48,xtmpp+24,8192);
-    transpose4_ooff(x64+50,xtmpp+25,8192);
-    transpose4_ooff(x64+52,xtmpp+26,8192);
-    transpose4_ooff(x64+54,xtmpp+27,8192);
-    transpose4_ooff(x64+56,xtmpp+28,8192);
-    transpose4_ooff(x64+58,xtmpp+29,8192);
-    transpose4_ooff(x64+60,xtmpp+30,8192);
-    transpose4_ooff(x64+62,xtmpp+31,8192);
-    x64+=64;
-    xtmpp+=32;
-  }
-
-  idft16384((int16_t*)(xtmp),(int16_t*)ytmp,1);
-  idft16384((int16_t*)(xtmp+8192),(int16_t*)(ytmp+4096),1);
-
-
-  for (i=0; i<4096; i++) {
-    ibfly2(ytmpp,ytmpp+4096,
-           y128p,y128p+4096,
-           tw32768_128p);
-    tw32768_128p++;
-    y128p++;
-    ytmpp++;
-  }
-
-  if (scale>0) {
-    y128p = y128;
-
-    for (i=0; i<512; i++) {
-      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
-      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
-      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
-      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
-      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
-      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
-      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
-      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
-      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
-      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
-      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
-      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
-      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
-      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
-      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
-      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
-      y128p+=16;
-    }
-  }
-
-  _mm_empty();
-  _m_empty();
-
-}
-
-#else // __AVX2__
 void dft32768(int16_t *x,int16_t *y,unsigned char scale)
 {
 
@@ -5959,8 +4171,124 @@ void idft32768(int16_t *x,int16_t *y,unsigned char scale)
 
 }
 
+int16_t twa768[512],twb768[512];
+
+// 256 x 3
+void idft768(int16_t *input, int16_t *output, unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][256]__attribute__((aligned(32)));
+  uint32_t tmpo[3][256] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<256; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  idft256((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  idft256((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  idft256((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  for (i=0,i2=0; i<512; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+512+i),(simd_q15_t*)(output+1024+i),
+          (simd_q15_t*)(twa768+i),(simd_q15_t*)(twb768+i));
+  }
+
+
+  if (scale==1) {
+    for (i=0; i<12; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void dft768(int16_t *input, int16_t *output, unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][256] __attribute__((aligned(32)));
+  uint32_t tmpo[3][256] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<256; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  dft256((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  dft256((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  dft256((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
 
+  /*
+  for (i=1; i<512; i++) {
+    tmpo[0][i] = tmpo[0][i<<1];
+    tmpo[1][i] = tmpo[1][i<<1];
+    tmpo[2][i] = tmpo[2][i<<1];
+    }*/
+#ifndef MR_MAIN
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("dft768out0.m","o0",tmpo[0],1024,1,1);
+    LOG_M("dft768out1.m","o1",tmpo[1],1024,1,1);
+    LOG_M("dft768out2.m","o2",tmpo[2],1024,1,1);
+  }
 #endif
+  for (i=0,i2=0; i<512; i+=8,i2+=4)  {
+    bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+512+i),(simd_q15_t*)(output+1024+i),
+          (simd_q15_t*)(twa768+i),(simd_q15_t*)(twb768+i));
+  }
+
+  if (scale==1) {
+    for (i=0; i<12; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
 int16_t twa1536[1024],twb1536[1024];
 
 // 512 x 3
@@ -6894,6 +5222,63 @@ void idft49152(int16_t *input, int16_t *output,uint8_t scale) {
   _m_empty();
 }
 
+int16_t tw65536[3*2*16384] __attribute__((aligned(32)));
+
+void idft65536(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[8192],ytmp[8192],*tw65536_256p=(simd256_q15_t *)tw65536,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<8192; i+=4,j++) {
+    transpose16_ooff_simd256(x256+i,xtmp+j,2048);
+  }
+
+
+  idft16384((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  idft16384((int16_t*)(xtmp+2048),(int16_t*)(ytmp+2048),1);
+  idft16384((int16_t*)(xtmp+4096),(int16_t*)(ytmp+4096),1);
+  idft16384((int16_t*)(xtmp+6144),(int16_t*)(ytmp+6144),1);
+
+  for (i=0; i<2048; i++) {
+    ibfly4_256(ytmpp,ytmpp+2048,ytmpp+4096,ytmpp+6144,
+           y256p,y256p+2048,y256p+4096,y256p+6144,
+           tw65536_256p,tw65536_256p+4096,tw65536_256p+8192);
+    tw65536_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<512; i++) {
+      y256[0]  = shiftright_int16_simd256(y256[0],scale);
+      y256[1]  = shiftright_int16_simd256(y256[1],scale);
+      y256[2]  = shiftright_int16_simd256(y256[2],scale);
+      y256[3]  = shiftright_int16_simd256(y256[3],scale);
+      y256[4]  = shiftright_int16_simd256(y256[4],scale);
+      y256[5]  = shiftright_int16_simd256(y256[5],scale);
+      y256[6]  = shiftright_int16_simd256(y256[6],scale);
+      y256[7]  = shiftright_int16_simd256(y256[7],scale);
+      y256[8]  = shiftright_int16_simd256(y256[8],scale);
+      y256[9]  = shiftright_int16_simd256(y256[9],scale);
+      y256[10] = shiftright_int16_simd256(y256[10],scale);
+      y256[11] = shiftright_int16_simd256(y256[11],scale);
+      y256[12] = shiftright_int16_simd256(y256[12],scale);
+      y256[13] = shiftright_int16_simd256(y256[13],scale);
+      y256[14] = shiftright_int16_simd256(y256[14],scale);
+      y256[15] = shiftright_int16_simd256(y256[15],scale);
+
+      y256+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+}
+
 int16_t twa73728[49152] __attribute__((aligned(32)));
 int16_t twb73728[49152] __attribute__((aligned(32)));
 // 24576 x 3
@@ -7192,8 +5577,6 @@ void dft12(int16_t *x,int16_t *y ,unsigned char scale_flag)
 
 }
 
-#ifdef __AVX2__
-
 static int16_t W1_12s_256[16]__attribute__((aligned(32))) = {28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383};
 static int16_t W2_12s_256[16]__attribute__((aligned(32))) = {16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377};
 static int16_t W3_12s_256[16]__attribute__((aligned(32))) = {0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767};
@@ -7376,8 +5759,6 @@ void dft12_simd256(int16_t *x,int16_t *y)
 
 }
 
-#endif
-
 static int16_t tw24[88]__attribute__((aligned(32)));
 
 void dft24(int16_t *x,int16_t *y,unsigned char scale_flag)
@@ -9049,18 +7430,18 @@ void dft720(int16_t *x,int16_t *y,unsigned char scale_flag)  // 180 x 4
 
 };
 
-static int16_t twa768[191*2*4];
-static int16_t twb768[191*2*4];
-static int16_t twc768[191*2*4];
+static int16_t twa768p[191*2*4];
+static int16_t twb768p[191*2*4];
+static int16_t twc768p[191*2*4];
 
-void dft768(int16_t *x,int16_t *y,unsigned char scale_flag) { // 192x 4;
+void dft768p(int16_t *x,int16_t *y,unsigned char scale_flag) { // 192x 4;
 
   int i,j;
   simd_q15_t *x128=(simd_q15_t *)x;
   simd_q15_t *y128=(simd_q15_t *)y;
-  simd_q15_t *twa128=(simd_q15_t *)&twa768[0];
-  simd_q15_t *twb128=(simd_q15_t *)&twb768[0];
-  simd_q15_t *twc128=(simd_q15_t *)&twc768[0];
+  simd_q15_t *twa128=(simd_q15_t *)&twa768p[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb768p[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc768p[0];
   simd_q15_t x2128[768];// = (simd_q15_t *)&x2128array[0];
   simd_q15_t ytmp128[768];//=&ytmp128array2[0];
 
@@ -9105,6 +7486,60 @@ void dft768(int16_t *x,int16_t *y,unsigned char scale_flag) { // 192x 4;
   _mm_empty();
   _m_empty();
 
+}
+
+static int16_t twa384i[256];
+static int16_t twb384i[256];
+// 128 x 3
+void idft384(int16_t *input, int16_t *output, unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][128]__attribute__((aligned(32)));
+  uint32_t tmpo[3][128] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<128; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  idft128((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  idft128((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  idft128((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  for (i=0,i2=0; i<256; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+256+i),(simd_q15_t*)(output+512+i),
+          (simd_q15_t*)(twa384+i),(simd_q15_t*)(twb384+i));
+  }
+
+
+  if (scale==1) {
+    for (i=0; i<6; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
 
 }
 
@@ -10532,7 +8967,10 @@ int dfts_autoinit(void)
   init_rad2(8192,tw8192);
   init_rad4(16384,tw16384);
   init_rad2(32768,tw32768);
+  init_rad4(65536,tw65536);
 
+  init_rad3(384,twa384i,twb384i);
+  init_rad3(768,twa768,twb768);
   init_rad3(1536,twa1536,twb1536);
   init_rad3(3072,twa3072,twb3072);
   init_rad3(6144,twa6144,twb6144);
@@ -10569,7 +9007,7 @@ int dfts_autoinit(void)
   init_rad2_rep(600,twa600);
   init_rad3_rep(648,twa648,twb648);
   init_rad4_rep(720,twa720,twb720,twc720);
-  init_rad4_rep(768,twa768,twb768,twc768);
+  init_rad4_rep(768,twa768p,twb768p,twc768p);
   init_rad3_rep(864,twa864,twb864);
   init_rad3_rep(900,twa900,twb900);
   init_rad4_rep(960,twa960,twb960,twc960);
@@ -10605,10 +9043,8 @@ int dfts_autoinit(void)
 void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){
 	AssertFatal((sizeidx >= 0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx);
         int algn=0xF;
-        #ifdef __AVX2__
         if ( (dft_ftab[sizeidx].size%3) != 0 ) // there is no AVX2 implementation for multiples of 3 DFTs
           algn=0x1F;
-        #endif 
         AssertFatal(((intptr_t)output&algn)==0,"Buffers should be aligned %p",output);
         if (((intptr_t)input)&algn) {
           LOG_D(PHY, "DFT called with input not aligned, add a memcpy, size %d\n", sizeidx);
@@ -10625,9 +9061,7 @@ void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_fla
 void idft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){
 	AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx);
         int algn=0xF;
-        #ifdef __AVX2__
-          algn=0x1F;
-        #endif
+	algn=0x1F;
         AssertFatal( ((intptr_t)output&algn)==0,"Buffers should be 16 bytes aligned %p",output);
         if (((intptr_t)input)&algn ) {  
           LOG_D(PHY, "DFT called with input not aligned, add a memcpy\n");
@@ -10809,11 +9243,7 @@ int main(int argc, char**argv)
 
 
   time_stats_t ts;
-#ifdef __AVX2__
   simd256_q15_t x[16384],x2[16384],y[16384],tw0,tw1,tw2,tw3;
-#else
-  simd_q15_t x[32768],y[32768],tw0,tw1,tw2,tw3;
-#endif
   int i;
   simd_q15_t *x128=(simd_q15_t*)x,*y128=(simd_q15_t*)y;
 
@@ -10860,17 +9290,12 @@ int main(int argc, char**argv)
  */
     for (i=0;i<300;i++) {
 #if defined(__x86_64__) || defined(__i386__)
-#ifndef __AVX2__
-      x[i] = _mm_set1_epi32(taus());
-      x[i] = _mm_srai_epi16(x[i],4);
-#else
-      x[i] = _mm256_set1_epi32(taus());
-      x[i] = _mm256_srai_epi16(x[i],4);
-#endif
-#elif defined(__arm__)
+      x[i] = simde_mm256_set1_epi32(taus());
+      x[i] = simde_mm256_srai_epi16(x[i],4);
+#elif defined(__arm__) || defined(__aarch64__)
       x[i] = (int16x8_t)vdupq_n_s32(taus());
       x[i] = vshrq_n_s16(x[i],4);
-#endif
+#endif // defined(__x86_64__) || defined(__i386__)
     }
       /*
     bfly2_tw1(x,x+1,y,y+1);
diff --git a/openair1/PHY/TOOLS/signal_energy.c b/openair1/PHY/TOOLS/signal_energy.c
index 96ee211285bef6569a68737f2a409e4bcc0fcb4d..9eeb3d92a0424d1c932096129756a585699d2182 100644
--- a/openair1/PHY/TOOLS/signal_energy.c
+++ b/openair1/PHY/TOOLS/signal_energy.c
@@ -29,7 +29,7 @@
 
 #define shift 4
 //#define shift_DC 0
-#define SHRT_MIN -32768
+//#define SHRT_MIN -32768
 
 #if defined(__x86_64__) || defined(__i386__)
 #ifdef LOCALIZATION
@@ -174,7 +174,7 @@ int32_t signal_energy_nodc(int32_t *input,uint32_t length)
   return temp;
 }
 
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 
 int32_t signal_energy(int32_t *input,uint32_t length)
 {
@@ -318,7 +318,6 @@ main(int argc,char **argv)
 }
 #endif
 
-#define SHRT_MIN -32768
 int32_t signal_power(int32_t *input, uint32_t length)
 {
 
diff --git a/openair1/PHY/TOOLS/tools_defs.h b/openair1/PHY/TOOLS/tools_defs.h
index 6b81575feeb30458bf428c35e8b2b73d9ff9ad83..2d8a032570e303605a0145195071635ce282ed63 100644
--- a/openair1/PHY/TOOLS/tools_defs.h
+++ b/openair1/PHY/TOOLS/tools_defs.h
@@ -44,7 +44,7 @@
 #define mulhi_s1_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),2)
 #define adds_int16(a,b) _mm_adds_epi16(a,b)
 #define mullo_int16(a,b) _mm_mullo_epi16(a,b)
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
 #define simd_q15_t int16x8_t
 #define simdshort_q15_t int16x4_t
 #define shiftright_int16(a,shift) vshrq_n_s16(a,shift)
@@ -53,7 +53,7 @@
 #define mulhi_s1_int16(a,b) vshlq_n_s16(vqdmulhq_s16(a,b),1)
 #define adds_int16(a,b) vqaddq_s16(a,b)
 #define mullo_int16(a,b) vmulq_s16(a,b)
-#define _mm_empty() 
+#define _mm_empty()
 #define _m_empty()
 #endif
 
@@ -74,6 +74,11 @@ extern "C" {
     float i;
   } cf_t;
 
+  typedef struct complex8 {
+    int8_t r;
+    int8_t i;
+  } c8_t;
+
   typedef struct complex16 {
     int16_t r;
     int16_t i;
@@ -98,14 +103,14 @@ extern "C" {
       .i = (int16_t)((a.r * b.i + a.i * b.r) >> Shift)
     };
   }
-  
+
   __attribute__((always_inline)) inline c16_t c16divShift(const c16_t a, const c16_t b, const int Shift) {
     return (c16_t) {
       .r = (int16_t)((a.r * b.r + a.i * b.i) >> Shift),
       .i = (int16_t)((a.r * b.i - a.i * b.r) >> Shift)
     };
   }
-  
+
   __attribute__((always_inline)) inline c16_t c16maddShift(const c16_t a, const c16_t b, c16_t c, const int Shift) {
     return (c16_t) {
       .r = (int16_t)(((a.r * b.r - a.i * b.i ) >> Shift) + c.r),
@@ -136,14 +141,15 @@ extern "C" {
 
 
   // On N complex numbers
-  //   y.r += (x * alpha.r) >> 14 
-  //   y.i += (x * alpha.i) >> 14 
+  //   y.r += (x * alpha.r) >> 14
+  //   y.i += (x * alpha.i) >> 14
   // See regular C implementation at the end
-  __attribute__((always_inline)) inline void c16multaddVectRealComplex(const int16_t *x,
+  static __attribute__((always_inline)) inline void c16multaddVectRealComplex(const int16_t *x,
                                                                        const c16_t *alpha,
                                                                        c16_t *y,
                                                                        const int N) {
-#ifdef __AVX2__
+#if defined(__x86_64__) || defined(__i386__)
+    // Default implementation for x86
     const int8_t makePairs[32] __attribute__((aligned(32)))={
       0,1,0+16,1+16,
       2,3,2+16,3+16,
@@ -154,26 +160,27 @@ extern "C" {
       12,13,12+16,13+16,
       14,15,14+16,15+16};
     
-    __m256i alpha256= _mm256_set1_epi32(*(int32_t *)alpha);
+    __m256i alpha256= simde_mm256_set1_epi32(*(int32_t *)alpha);
     __m128i *x128=(__m128i *)x;
     __m128i *y128=(__m128i *)y;
     AssertFatal(N%8==0,"Not implemented\n");
     for (int i=0; i<N/8; i++) {
-      const __m256i xduplicate=_mm256_broadcastsi128_si256(*x128);
-      const __m256i x_duplicate_ordered=_mm256_shuffle_epi8(xduplicate,*(__m256i*)makePairs);
-      const __m256i x_mul_alpha_shift15 =_mm256_mulhrs_epi16(alpha256, x_duplicate_ordered);
+      const __m256i xduplicate=simde_mm256_broadcastsi128_si256(*x128);
+      const __m256i x_duplicate_ordered=simde_mm256_shuffle_epi8(xduplicate,*(__m256i*)makePairs);
+      const __m256i x_mul_alpha_shift15 =simde_mm256_mulhrs_epi16(alpha256, x_duplicate_ordered);
       // Existing multiplication normalization is weird, constant table in alpha need to be doubled
-      const __m256i x_mul_alpha_x2= _mm256_adds_epi16(x_mul_alpha_shift15,x_mul_alpha_shift15);
-      *y128= _mm_adds_epi16(_mm256_extracti128_si256(x_mul_alpha_x2,0),*y128);
+      const __m256i x_mul_alpha_x2= simde_mm256_adds_epi16(x_mul_alpha_shift15,x_mul_alpha_shift15);
+      *y128= _mm_adds_epi16(simde_mm256_extracti128_si256(x_mul_alpha_x2,0),*y128);
       y128++;
-      *y128= _mm_adds_epi16(_mm256_extracti128_si256(x_mul_alpha_x2,1),*y128);
+      *y128= _mm_adds_epi16(simde_mm256_extracti128_si256(x_mul_alpha_x2,1),*y128);
       y128++;
       x128++;
     } 
     
-#elif defined(__x86_64__) || defined(__i386__) ||  defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
+    // Default implementation for ARM
     uint32_t i;
-    
+
     // do 8 multiplications at a time
     simd_q15_t alpha_r_128,alpha_i_128,yr,yi,*x_128=(simd_q15_t*)x,*y_128=(simd_q15_t*)y;
     int j;
@@ -188,23 +195,16 @@ extern "C" {
 
       yr     = mulhi_s1_int16(alpha_r_128,x_128[i]);
       yi     = mulhi_s1_int16(alpha_i_128,x_128[i]);
-#if defined(__x86_64__) || defined(__i386__)
-      y_128[j]   = _mm_adds_epi16(y_128[j],_mm_unpacklo_epi16(yr,yi));
-      j++;
-      y_128[j]   = _mm_adds_epi16(y_128[j],_mm_unpackhi_epi16(yr,yi));
-      j++;
-#elif defined(__arm__)
       int16x8x2_t yint;
       yint = vzipq_s16(yr,yi);
       y_128[j]   = adds_int16(y_128[j],yint.val[0]);
       j++;
       y_128[j]   = adds_int16(y_128[j],yint.val[1]);
- 
+
       j++;
-#endif
     }
-
 #else
+    // Almost dead code (BMC)
     for (int i=0; i<N; i++) {
       int tmpr=y[i].r+((x[i]*alpha->r)>>14);
       if (tmpr>INT16_MAX)
@@ -219,7 +219,6 @@ extern "C" {
       y[i].r=(int16_t)tmpr;
       y[i].i=(int16_t)tmpi;
     }
-
 #endif
   }
 //cmult_sv.h
@@ -260,7 +259,7 @@ __attribute__((always_inline)) inline void multadd_real_four_symbols_vector_comp
   _mm_storeu_si128((simd_q15_t*)y, y_128);
 
 }
-  
+
 /*!\fn void multadd_complex_vector_real_scalar(int16_t *x,int16_t alpha,int16_t *y,uint8_t zero_flag,uint32_t N)
 This function performs componentwise multiplication and accumulation of a real scalar and a complex vector.
 @param x Vector input (Q1.15) in the format |Re0 Im0|Re1 Im 1| ...
@@ -431,6 +430,7 @@ This function performs optimized fixed-point radix-2 FFT/IFFT.
   SZ_DEF(128) \
   SZ_DEF(256) \
   SZ_DEF(512) \
+  SZ_DEF(768) \
   SZ_DEF(1024) \
   SZ_DEF(1536) \
   SZ_DEF(2048) \
@@ -440,10 +440,13 @@ This function performs optimized fixed-point radix-2 FFT/IFFT.
   SZ_DEF(8192) \
   SZ_DEF(9216) \
   SZ_DEF(12288) \
+  SZ_DEF(16384) \
   SZ_DEF(18432) \
   SZ_DEF(24576) \
+  SZ_DEF(32768) \
   SZ_DEF(36864) \
   SZ_DEF(49152) \
+  SZ_DEF(65536) \
   SZ_DEF(73728) \
   SZ_DEF(98304)
 
@@ -502,6 +505,8 @@ dft_size_idx_t get_dft(int ofdm_symbol_size)
       return DFT_256;
     case 512:
       return DFT_512;
+    case 768:
+      return DFT_768;
     case 1024:
       return DFT_1024;
     case 1536:
@@ -586,6 +591,8 @@ idft_size_idx_t get_idft(int ofdm_symbol_size)
       return IDFT_256;
     case 512:
       return IDFT_512;
+    case 768:
+      return IDFT_768;
     case 1024:
       return IDFT_1024;
     case 1536:
diff --git a/openair1/PHY/defs_L1_NB_IoT.h b/openair1/PHY/defs_L1_NB_IoT.h
index b4aee78310692e6bbb89a68dc24d7bb7e441e5c6..cdf696d2a62521f9cbef6934b652ce4c3fa4e45c 100644
--- a/openair1/PHY/defs_L1_NB_IoT.h
+++ b/openair1/PHY/defs_L1_NB_IoT.h
@@ -60,11 +60,7 @@
 #define msg_nrt printf
 //use msg_nrt in the non real-time context (for initialization, ...)
 #ifndef malloc16
-  #ifdef __AVX2__
     #define malloc16(x) memalign(32,x)
-  #else
-    #define malloc16(x) memalign(16,x)
-  #endif
 #endif
 #define free16(y,x) free(y)
 #define bigmalloc malloc
@@ -85,11 +81,7 @@
 /*
 static inline void* malloc16_clear( size_t size )
 {
-#ifdef __AVX2__
   void* ptr = memalign(32, size);
-#else
-  void* ptr = memalign(16, size);
-#endif
   DevAssert(ptr);
   memset( ptr, 0, size );
   return ptr;
@@ -136,7 +128,7 @@ static inline void* malloc16_clear( size_t size )
 #include "PHY/LTE_TRANSPORT/defs_NB_IoT.h"
 #include <pthread.h>
 
-#include "targets/ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 #include "openairinterface5g_limits.h"
 
 #define NUM_DCI_MAX_NB_IoT 32
diff --git a/openair1/PHY/defs_common.h b/openair1/PHY/defs_common.h
index 892fb7c8644344529aadb050d54fc8f569d2970b..13306d07d69c44ebfc0986d5b4c25db54559f990 100644
--- a/openair1/PHY/defs_common.h
+++ b/openair1/PHY/defs_common.h
@@ -98,6 +98,7 @@
 
 #define NB_RX_ANTENNAS_MAX 64
 
+#define TC_NSEC_x32768 16667
 
 typedef enum {TDD=1,FDD=0} frame_type_t;
 
diff --git a/openair1/PHY/defs_gNB.h b/openair1/PHY/defs_gNB.h
index 675c41c6bedd1a99ed1f08bf77b2e4047aaabd58..77b8548d83d5040b16dc3ada6608e70977c2484e 100644
--- a/openair1/PHY/defs_gNB.h
+++ b/openair1/PHY/defs_gNB.h
@@ -225,6 +225,11 @@ typedef struct {
   gNB_PRACH_list_t list[NUMBER_OF_NR_PRACH_MAX];
 } NR_gNB_PRACH;
 
+typedef struct {
+  uint8_t NumPRSResources;
+  prs_config_t prs_cfg[NR_MAX_PRS_RESOURCES_PER_SET];
+} NR_gNB_PRS;
+
 typedef struct {
   /// Nfapi ULSCH PDU
   nfapi_nr_pusch_pdu_t ulsch_pdu;
@@ -337,6 +342,9 @@ typedef struct {
   int16_t q_RI[MAX_RI_PAYLOAD];
   /// Temporary h sequence to flag PUSCH_x/PUSCH_y symbols which are not scrambled
   uint8_t h[MAX_NUM_CHANNEL_BITS];
+  /// Last index of LLR buffer that contains information.
+  /// Used for computing LDPC decoder R
+  int llrLen;
   //////////////////////////////////////////////////////////////
 } NR_UL_gNB_HARQ_t;
 
@@ -737,6 +745,7 @@ typedef struct PHY_VARS_gNB_s {
   NR_gNB_PBCH        pbch;
   NR_gNB_COMMON      common_vars;
   NR_gNB_PRACH       prach_vars;
+  NR_gNB_PRS         prs_vars;
   NR_gNB_PUSCH       *pusch_vars[NUMBER_OF_NR_ULSCH_MAX];
   NR_gNB_PUCCH_t     *pucch[NUMBER_OF_NR_PUCCH_MAX];
   NR_gNB_SRS_t       *srs[NUMBER_OF_NR_SRS_MAX];
@@ -790,6 +799,9 @@ typedef struct PHY_VARS_gNB_s {
   // Mask of occupied RBs, per symbol and PRB
   uint32_t rb_mask_ul[14][9];
 
+  /// PRS sequence
+  uint32_t ****nr_gold_prs;
+
   /// Indicator set to 0 after first SR
   uint8_t first_sr[NUMBER_OF_NR_SR_MAX];
 
diff --git a/openair1/PHY/defs_nr_UE.h b/openair1/PHY/defs_nr_UE.h
index 951050349e1c313f31ef898af1de2361a4379494..6febce47066d9096934f835d284f0516fd447258 100644
--- a/openair1/PHY/defs_nr_UE.h
+++ b/openair1/PHY/defs_nr_UE.h
@@ -61,11 +61,7 @@
 #define msg_nrt printf
 //use msg_nrt in the non real-time context (for initialization, ...)
 #ifndef malloc16
-  #ifdef __AVX2__
     #define malloc16(x) memalign(32,x)
-  #else
-    #define malloc16(x) memalign(16,x)
-  #endif
 #endif
 #define free16(y,x) free(y)
 #define bigmalloc malloc
@@ -108,7 +104,8 @@
 
 #include "PHY/NR_UE_TRANSPORT/dci_nr.h"
 #include <pthread.h>
-#include "targets/ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
+#include "NR_IF_Module.h"
 
 /// Context data structure for gNB subframe processing
 typedef struct {
@@ -213,19 +210,10 @@ typedef struct {
 
 } PHY_NR_MEASUREMENTS;
 
-typedef struct {
-
-  /// \brief Holds the received data in the frequency domain.
-  /// - first index: rx antenna [0..nb_antennas_rx[
-  /// - second index: symbol [0..28*ofdm_symbol_size[
-  int32_t **rxdataF;
-
-} NR_UE_COMMON_PER_THREAD;
-
 typedef struct {
   bool active[2];
   fapi_nr_ul_config_pucch_pdu pucch_pdu[2];
-  } NR_UE_PUCCH;
+} NR_UE_PUCCH;
 
 typedef struct {
   /// \brief Holds the transmit data in time domain.
@@ -245,7 +233,10 @@ typedef struct {
   /// - second index: sample [0..2*FRAME_LENGTH_COMPLEX_SAMPLES+2048[
   int32_t **rxdata;
 
-  NR_UE_COMMON_PER_THREAD common_vars_rx_data_per_thread[RX_NB_TH_MAX];
+  /// \brief Holds the received data in the frequency domain.
+  /// - first index: rx antenna [0..nb_antennas_rx[
+  /// - second index: symbol [0..28*ofdm_symbol_size[
+  int32_t **rxdataF;
 
   /// holds output of the sync correlator
   int32_t *sync_corr;
@@ -346,6 +337,18 @@ typedef struct {
   int32_t **ptrs_re_per_slot;
 } NR_UE_PDSCH;
 
+#define NR_PRS_IDFT_OVERSAMP_FACTOR 1  // IDFT oversampling factor for NR PRS channel estimates in time domain, ALLOWED value 16x, and 1x is default(ie. IDFT size is frame_params->ofdm_symbol_size)
+typedef struct {
+  prs_config_t prs_cfg;
+  int32_t reserved;
+  prs_meas_t **prs_meas;
+} NR_PRS_RESOURCE_t;
+
+typedef struct {
+  uint8_t NumPRSResources;
+  NR_PRS_RESOURCE_t prs_resource[NR_MAX_PRS_RESOURCES_PER_SET];
+} NR_UE_PRS;
+
 #define NR_PDCCH_DEFS_NR_UE
 #define NR_NBR_CORESET_ACT_BWP      3  // The number of CoreSets per BWP is limited to 3 (including initial CORESET: ControlResourceId 0)
 #define NR_NBR_SEARCHSPACE_ACT_BWP  10 // The number of SearchSpaces per BWP is limited to 10 (including initial SEARCHSPACE: SearchSpaceId 0)
@@ -586,8 +589,6 @@ typedef struct {
 
 typedef struct {
   int16_t amp;
-  int16_t *prachF;
-  int16_t *prach;
   fapi_nr_ul_config_prach_pdu prach_pdu;
 } NR_UE_PRACH;
 
@@ -644,8 +645,6 @@ typedef struct NR_UL_TIME_ALIGNMENT {
   uint8_t          tag_id;
 } NR_UL_TIME_ALIGNMENT_t;
 
-#include "NR_IF_Module.h"
-
 /// Top-level PHY Data Structure for UE
 typedef struct {
   /// \brief Module ID indicator for this instance
@@ -710,21 +709,21 @@ typedef struct {
 
   fapi_nr_config_request_t nrUE_config;
 
-  NR_UE_PDSCH     *pdsch_vars[RX_NB_TH_MAX][NUMBER_OF_CONNECTED_gNB_MAX+1]; // two RxTx Threads
+  NR_UE_PDSCH     *pdsch_vars[NUMBER_OF_CONNECTED_gNB_MAX+1];
   NR_UE_PBCH      *pbch_vars[NUMBER_OF_CONNECTED_gNB_MAX];
-  NR_UE_PDCCH     *pdcch_vars[RX_NB_TH_MAX][NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_PRACH     *prach_vars[NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_CSI_IM    *csiim_vars[NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_CSI_RS    *csirs_vars[NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_SRS       *srs_vars[NUMBER_OF_CONNECTED_gNB_MAX];
-  NR_UE_PUCCH     *pucch_vars[RX_NB_TH_MAX][NUMBER_OF_CONNECTED_gNB_MAX];
-  NR_UE_DLSCH_t   *dlsch[RX_NB_TH_MAX][NUMBER_OF_CONNECTED_gNB_MAX][NR_MAX_NB_LAYERS>4 ? 2:1]; // two RxTx Threads
-  NR_UE_ULSCH_t   *ulsch[RX_NB_TH_MAX][NUMBER_OF_CONNECTED_gNB_MAX];
+  NR_UE_DLSCH_t   *dlsch[NUMBER_OF_CONNECTED_gNB_MAX][NR_MAX_NB_LAYERS>4 ? 2:1];
+  NR_UE_ULSCH_t   *ulsch[NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_DLSCH_t   *dlsch_SI[NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_DLSCH_t   *dlsch_ra[NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_DLSCH_t   *dlsch_p[NUMBER_OF_CONNECTED_gNB_MAX];
   NR_UE_DLSCH_t   *dlsch_MCH[NUMBER_OF_CONNECTED_gNB_MAX];
-
+  NR_UE_PRS       *prs_vars[NR_MAX_PRS_COMB_SIZE];
+  uint8_t          prs_active_gNBs;
+  
   //Paging parameters
   uint32_t              IMSImod1024;
   uint32_t              PF;
@@ -768,6 +767,9 @@ typedef struct {
   /// PUSCH DMRS sequence
   uint32_t ****nr_gold_pusch_dmrs;
 
+  // PRS sequence per gNB, per resource
+  uint32_t *****nr_gold_prs;
+  
   uint32_t X_u[64][839];
 
   // flag to activate PRB based averaging of channel estimates
@@ -910,9 +912,9 @@ typedef struct {
   /// Transmission mode per gNB
   uint8_t transmission_mode[NUMBER_OF_CONNECTED_gNB_MAX];
 
-  time_stats_t phy_proc[RX_NB_TH];
+  time_stats_t phy_proc;
   time_stats_t phy_proc_tx;
-  time_stats_t phy_proc_rx[RX_NB_TH];
+  time_stats_t phy_proc_rx;
 
   time_stats_t ue_ul_indication_stats;
 
@@ -928,13 +930,13 @@ typedef struct {
   time_stats_t ulsch_multiplexing_stats;
 
   time_stats_t generic_stat;
-  time_stats_t generic_stat_bis[RX_NB_TH][LTE_SLOTS_PER_SUBFRAME];
-  time_stats_t ue_front_end_stat[RX_NB_TH];
-  time_stats_t ue_front_end_per_slot_stat[RX_NB_TH][LTE_SLOTS_PER_SUBFRAME];
-  time_stats_t pdcch_procedures_stat[RX_NB_TH];
-  time_stats_t pdsch_procedures_stat[RX_NB_TH];
-  time_stats_t pdsch_procedures_per_slot_stat[RX_NB_TH][LTE_SLOTS_PER_SUBFRAME];
-  time_stats_t dlsch_procedures_stat[RX_NB_TH];
+  time_stats_t generic_stat_bis[LTE_SLOTS_PER_SUBFRAME];
+  time_stats_t ue_front_end_stat;
+  time_stats_t ue_front_end_per_slot_stat[LTE_SLOTS_PER_SUBFRAME];
+  time_stats_t pdcch_procedures_stat;
+  time_stats_t pdsch_procedures_stat;
+  time_stats_t pdsch_procedures_per_slot_stat[LTE_SLOTS_PER_SUBFRAME];
+  time_stats_t dlsch_procedures_stat;
 
   time_stats_t rx_pdsch_stats;
   time_stats_t ofdm_demod_stats;
@@ -942,13 +944,13 @@ typedef struct {
   time_stats_t rx_dft_stats;
   time_stats_t dlsch_channel_estimation_stats;
   time_stats_t dlsch_freq_offset_estimation_stats;
-  time_stats_t dlsch_decoding_stats[2];
+  time_stats_t dlsch_decoding_stats;
   time_stats_t dlsch_demodulation_stats;
   time_stats_t dlsch_rate_unmatching_stats;
   time_stats_t dlsch_ldpc_decoding_stats;
   time_stats_t dlsch_deinterleaving_stats;
   time_stats_t dlsch_llr_stats;
-  time_stats_t dlsch_llr_stats_parallelization[RX_NB_TH][LTE_SLOTS_PER_SUBFRAME];
+  time_stats_t dlsch_llr_stats_parallelization[LTE_SLOTS_PER_SUBFRAME];
   time_stats_t dlsch_unscrambling_stats;
   time_stats_t dlsch_rate_matching_stats;
   time_stats_t dlsch_ldpc_encoding_stats;
@@ -980,6 +982,9 @@ typedef struct {
   void* scopeData;
 } PHY_VARS_NR_UE;
 
+typedef struct nr_phy_data_s {
+  NR_UE_PUCCH pucch_vars;
+} nr_phy_data_t;
 /* this structure is used to pass both UE phy vars and
  * proc to the function UE_thread_rxn_txnp4
  */
diff --git a/openair1/PHY/defs_nr_common.h b/openair1/PHY/defs_nr_common.h
index c8a063fe19d55d7b9f36f2573bd9ab7ebc7af44a..10c4c9df6db935a38aa448eb9db48503cebd7625 100644
--- a/openair1/PHY/defs_nr_common.h
+++ b/openair1/PHY/defs_nr_common.h
@@ -63,13 +63,19 @@
 #define NR_PSS_LENGTH 127
 #define NR_SSS_LENGTH 127
 
+#define NR_MAX_PRS_LENGTH 3264 //272*6(max allocation per RB)*2(QPSK)
+#define NR_MAX_PRS_INIT_LENGTH_DWORD 102 // ceil(NR_MAX_CSI_RS_LENGTH/32)
+#define NR_MAX_NUM_PRS_SYMB 12
+#define NR_MAX_PRS_COMB_SIZE 12
+#define NR_MAX_PRS_RESOURCES_PER_SET 64
+#define NR_MAX_PRS_MUTING_PATTERN_LENGTH 32
+
 #define NR_PBCH_DMRS_LENGTH 144 // in mod symbols
 #define NR_PBCH_DMRS_LENGTH_DWORD 10 // ceil(2(QPSK)*NR_PBCH_DMRS_LENGTH/32)
 
 /*used for the resource mapping*/
 #define NR_MAX_PDCCH_DMRS_LENGTH 576 // 16(L)*2(QPSK)*3(3 DMRS symbs per REG)*6(REG per CCE)
-#define  NR_MAX_PDCCH_SIZE 8192 // It seems it is the max polar coded block size
-
+#define NR_MAX_PDCCH_SIZE 8192 // It seems it is the max polar coded block size
 #define NR_MAX_DCI_PAYLOAD_SIZE 64
 #define NR_MAX_DCI_SIZE 1728 //16(L)*2(QPSK)*9(12 RE per REG - 3(DMRS))*6(REG per CCE)
 #define NR_MAX_DCI_SIZE_DWORD 54 // ceil(NR_MAX_DCI_SIZE/32)
@@ -197,17 +203,6 @@ typedef struct NR_BWP_PARMS {
   nr_prg_parms_t prg_parms;
 } NR_BWP_PARMS;
 
-typedef struct {
-  uint8_t reg_idx;
-  uint16_t start_sc_idx;
-  uint8_t symb_idx;
-} nr_reg_t;
-
-typedef struct {
-  uint8_t cce_idx;
-  nr_reg_t reg_list[NR_NB_REG_PER_CCE];
-} nr_cce_t;
-
 typedef struct {
   /// PRACH format retrieved from prach_ConfigIndex
   uint16_t prach_format;
@@ -383,7 +378,42 @@ struct NR_DL_FRAME_PARMS {
   uint32_t ofdm_offset_divisor;
 };
 
+// PRS config structures
+typedef struct {
+    uint16_t PRSResourceSetPeriod[2];   // [slot period, slot offset] of a PRS resource set
+    uint16_t PRSResourceOffset;         // Slot offset of each PRS resource defined relative to the slot offset of the PRS resource set (0...511)
+    uint8_t  PRSResourceRepetition;     // Repetition factor for all PRS resources in resource set (1 /*default*/, 2, 4, 6, 8, 16, 32)
+    uint8_t  PRSResourceTimeGap;        // Slot offset between two consecutive repetition indices of all PRS resources in a PRS resource set (1 /*default*/, 2, 4, 6, 8, 16, 32)
+    uint16_t NumRB;                     // Number of PRBs allocated to all PRS resources in a PRS resource set (<= 272 and multiples of 4)
+    uint8_t  NumPRSSymbols;             // Number of OFDM symbols in a slot allocated to each PRS resource in a PRS resource set
+    uint8_t  SymbolStart;               // Starting OFDM symbol of each PRS resource in a PRS resource set
+    uint16_t RBOffset;                  // Starting PRB index of all PRS resources in a PRS resource set
+    uint8_t  CombSize;                  // RE density of all PRS resources in a PRS resource set (2, 4, 6, 12)
+    uint8_t  REOffset;                  // Starting RE offset in the first OFDM symbol of each PRS resource in a PRS resource set
+    uint32_t MutingPattern1[32];        // Muting bit pattern option-1, specified as [] or a binary-valued vector of length 2, 4, 6, 8, 16, or 32
+    uint32_t MutingPattern2[32];        // Muting bit pattern option-2, specified as [] or a binary-valued vector of length 2, 4, 6, 8, 16, or 32
+    uint8_t  MutingBitRepetition;       // Muting bit repetition factor, specified as 1, 2, 4, or 8
+    uint16_t NPRSID;                    // Sequence identity of each PRS resource in a PRS resource set, specified in the range [0, 4095]
+} prs_config_t;
 
+typedef struct {
+    int8_t  gNB_id;
+    int32_t sfn;
+    int8_t  slot;
+    int8_t  rxAnt_idx;
+    int32_t dl_toa;
+    int32_t dl_aoa;
+    int32_t snr;
+    int32_t reserved;
+} prs_meas_t;
+
+// rel16 prs k_prime table as per ts138.211 sec.7.4.1.7.2
+#define K_PRIME_TABLE_ROW_SIZE 4
+#define K_PRIME_TABLE_COL_SIZE 12
+#define PRS_K_PRIME_TABLE { {0,1,0,1,0,1,0,1,0,1,0,1}, \
+                            {0,2,1,3,0,2,1,3,0,2,1,3}, \
+                            {0,3,1,4,2,5,0,3,1,4,2,5}, \
+                            {0,6,3,9,1,7,4,10,2,8,5,11} };
 
 #define KHz (1000UL)
 #define MHz (1000*KHz)
diff --git a/openair1/PHY/impl_defs_nr.h b/openair1/PHY/impl_defs_nr.h
index 87d193b4a31aa0c692dfba9d268a249d6576a46f..742f77186dd8a9987515a4423036645b2e6ad634 100644
--- a/openair1/PHY/impl_defs_nr.h
+++ b/openair1/PHY/impl_defs_nr.h
@@ -344,11 +344,6 @@ typedef struct {
 #define MAX_NR_OF_DL_ALLOCATIONS             16
 #define MAX_NR_OF_UL_ALLOCATIONS            (16)
 
-typedef enum {
-  typeA = 0,
-  typeB = 1
-} mappingType_t;
-
 typedef enum {
   pdsch_dmrs_pos0 = 0,
   pdsch_dmrs_pos1 = 1,
@@ -368,12 +363,7 @@ typedef enum {
   pusch_dmrs_type1 = 0,
   pusch_dmrs_type2 = 1
 } pusch_dmrs_type_t;
-typedef enum {
-  pusch_dmrs_pos0 = 0,
-  pusch_dmrs_pos1 = 1,
-  pusch_dmrs_pos2 = 2,
-  pusch_dmrs_pos3 = 3,
-} pusch_dmrs_AdditionalPosition_t;
+
 typedef enum {
   pusch_len1 = 1,
   pusch_len2 = 2
diff --git a/openair1/PHY/sse_intrin.h b/openair1/PHY/sse_intrin.h
index 9995909accde60036099aec9da0d85e241f32682..fc6836fd30faeaa985cb025722b41cf1ce4f0f36 100644
--- a/openair1/PHY/sse_intrin.h
+++ b/openair1/PHY/sse_intrin.h
@@ -26,6 +26,18 @@
  * The host CPU needs to have support for SSE2 at least. SSE3 and SSE4.1 functions are emulated if the CPU lacks support for them.
  * This will slow down the softmodem, but may be valuable if only offline signal processing is required.
  *
+ * 
+ * Has been changed in August 2022 to rely on SIMD Everywhere (SIMDE) from MIT
+ * by bruno.mongazon-cazavet@nokia-bell-labs.com
+ *
+ * All AVX22 code is mapped to SIMDE which transparently relies on AVX2 HW (avx2-capable host) or SIMDE emulation
+ * (non-avx2-capable host).
+ * To force using SIMDE emulation on avx2-capable host use the --noavx2 flag. 
+ * avx512 code is not mapped to SIMDE. It depends on --noavx512 flag.
+ * If the --noavx512 is set the OAI AVX512 emulation using AVX2 is used.
+ * If the --noavx512 is not set, AVX512 HW is used on avx512-capable host while OAI AVX512 emulation using AVX2
+ * is used on non-avx512-capable host. 
+ *
  * \author S. Held, Laurent THOMAS
  * \email sebastian.held@imst.de, laurent.thomas@open-cells.com	
  * \company IMST GmbH, Open Cells Project
@@ -39,306 +51,37 @@
 
 #if defined(__x86_64) || defined(__i386__)
 
-#ifndef __SSE2__
-  #  error SSE2 processor intrinsics disabled
-#endif
+/* x86 processors */
 
-#include <emmintrin.h>
-#include <xmmintrin.h>
-
-#ifdef __SSE3__
-  #include <pmmintrin.h>
-  #include <tmmintrin.h>
-#endif
+#include <simde/x86/mmx.h>
+#include <simde/x86/sse.h>
+#include <simde/x86/sse2.h>
+#include <simde/x86/sse3.h>
+#include <simde/x86/ssse3.h>
+#include <simde/x86/sse4.1.h>
+#include <simde/x86/sse4.2.h>
+#include <simde/x86/avx2.h>
+#include <simde/x86/fma.h>
 
-#ifdef __SSE4_1__
-  #include <smmintrin.h>
+#if defined(__AVX512BW__) || defined(__AVX512F__)
+#include <immintrin.h>
 #endif
 
-#ifdef __AVX2__
-  #include <immintrin.h>
-#endif
-
-// ------------------------------------------------
-// compatibility functions if SSE3 or SSE4 is not available
-// ------------------------------------------------
-#if !defined(__SSE3__) || !defined(__SSE4_1__)
-/*! \brief SSE vector type.
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
- */
-typedef union {
-  __m128  f;
-  __m128d d;
-  __m128i i;
-  __m64       m64[ 2];
-  signed char  s8[16];
-} ssp_m128;
-#endif
-
-
-// ------------------------------------------------
-// compatibility functions if SSE3 is not available
-// ------------------------------------------------
-#ifndef __SSE3__
-#warning SSE3 instruction set not preset
-#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(_mm_setzero_si128(),(xmmx)))
-#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(_mm_setzero_si128(),(xmmy)))
-#define _mm_hadd_epi32(xmmx,xmmy) _mm_unpacklo_epi64(_mm_add_epi32(_mm_shuffle_epi32((xmmx),_MM_SHUFFLE(0,2,0,2)),_mm_shuffle_epi32((xmmx),_MM_SHUFFLE(1,3,1,3))),_mm_add_epi32(_mm_shuffle_epi32((xmmy),_MM_SHUFFLE(0,2,0,2)),_mm_shuffle_epi32((xmmy),_MM_SHUFFLE(1,3,1,3))))
-
-// variant from lte_ul_channel_estimation.c and dlsch_demodulation.c and pmch.c
-//#define _mm_abs_epi16(xmmx) _mm_add_epi16(_mm_xor_si128((xmmx),_mm_cmpgt_epi16(_mm_setzero_si128(),(xmmx))),_mm_srli_epi16(_mm_cmpgt_epi16(_mm_setzero_si128(),(xmmx)),15))
-
-// variant from cdot_prod.c
-//#define _mm_abs_epi16(xmmx) xmmx=_mm_xor_si128((xmmx),_mm_cmpgt_epi16(_mm_setzero_si128(),(xmmx)))
-
-#define _mm_shuffle_epi8(xmmx,xmmy) ssp_shuffle_epi8_SSE2(xmmx,xmmy)
-
-/*! \brief Helper function.
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
- */
-static inline __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b) {
-  __m128i c;
-  c = _mm_cmpgt_epi8( a, b );
-  a = _mm_cmpeq_epi8( a, b );
-  a = _mm_or_si128  ( a, c );
-  return a;
-}
-
-/*! \brief SSE2 emulation of SSE3 _mm_shuffle_epi8().
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
- */
-static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask) {
-  ssp_m128 A,B, MASK, maskZero;
-  A.i        = a;
-  maskZero.i = ssp_comge_epi8_SSE2( mask, _mm_setzero_si128()        );
-  MASK.i     = _mm_and_si128      ( mask, _mm_set1_epi8( (char)0x0F) );
-  B.s8[ 0] = A.s8[ (MASK.s8[ 0]) ];
-  B.s8[ 1] = A.s8[ (MASK.s8[ 1]) ];
-  B.s8[ 2] = A.s8[ (MASK.s8[ 2]) ];
-  B.s8[ 3] = A.s8[ (MASK.s8[ 3]) ];
-  B.s8[ 4] = A.s8[ (MASK.s8[ 4]) ];
-  B.s8[ 5] = A.s8[ (MASK.s8[ 5]) ];
-  B.s8[ 6] = A.s8[ (MASK.s8[ 6]) ];
-  B.s8[ 7] = A.s8[ (MASK.s8[ 7]) ];
-  B.s8[ 8] = A.s8[ (MASK.s8[ 8]) ];
-  B.s8[ 9] = A.s8[ (MASK.s8[ 9]) ];
-  B.s8[10] = A.s8[ (MASK.s8[10]) ];
-  B.s8[11] = A.s8[ (MASK.s8[11]) ];
-  B.s8[12] = A.s8[ (MASK.s8[12]) ];
-  B.s8[13] = A.s8[ (MASK.s8[13]) ];
-  B.s8[14] = A.s8[ (MASK.s8[14]) ];
-  B.s8[15] = A.s8[ (MASK.s8[15]) ];
-  B.i = _mm_and_si128( B.i, maskZero.i );
-  return B.i;
-}
-#endif // __SSE3__
-
-
-
-
-
-// ------------------------------------------------
-// compatibility functions if SSE4 is not available
-// ------------------------------------------------
-#ifndef __SSE4_1__
-#warning SSE4_1 instruction set not preset
-
-// https://gitorious.org/vc/vc/commit/ee49857ffe5b74c74bc57d501b05519443fc609a license LGPL3
-#define _mm_extract_epi32(xmmx,index) _mm_cvtsi128_si32(_mm_srli_si128(xmmx, (index) * 4))
-
-#define _mm_insert_epi8(x,y,z) ssp_insert_epi8_SSE2(x,y,z)
-#define _mm_cvtepi8_epi16(x) ssp_cvtepi8_epi16_SSE2(x)
-#define _mm_max_epi8(x,y) ssp_max_epi8_SSE2(x,y)
-#define _mm_cvtepi16_epi32(x) ssp_cvtepi16_epi32_SSE2(x)
-
-/*! \brief SSE2 emulation of SSE4 _mm_insert_epi8().
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
- */
-static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx ) {
-  ssp_m128 Ahi, Alo;
-  b = b & 0xFF;                                           /* Convert to 8-bit integer */
-  Ahi.i = _mm_unpackhi_epi8( a, _mm_setzero_si128() );    /* Ahi = a_8[8:15]  Simulate 8bit integers as 16-bit integers */
-  Alo.i = _mm_unpacklo_epi8( a, _mm_setzero_si128() );    /* Alo = a_8[0:7]   Simulate 8bit integers as 16-bit integers */
-
-  /* Insert b as a 16-bit integer to upper or lower half of a */
-  switch( ndx & 0xF ) {
-    case 0:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 0 );
-      break;
+#elif defined(__arm__) || defined(__aarch64__)
 
-    case 1:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 1 );
-      break;
+/* ARM processors */
 
-    case 2:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 2 );
-      break;
+#include <simde/arm/neon.h>
 
-    case 3:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 3 );
-      break;
-
-    case 4:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 4 );
-      break;
-
-    case 5:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 5 );
-      break;
-
-    case 6:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 6 );
-      break;
-
-    case 7:
-      Alo.i = _mm_insert_epi16( Alo.i, b, 7 );
-      break;
-
-    case 8:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 0 );
-      break;
-
-    case 9:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 1 );
-      break;
-
-    case 10:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 2 );
-      break;
-
-    case 11:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 3 );
-      break;
-
-    case 12:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 4 );
-      break;
-
-    case 13:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 5 );
-      break;
-
-    case 14:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 6 );
-      break;
-
-    default:
-      Ahi.i = _mm_insert_epi16( Ahi.i, b, 7 );
-  }
-
-  return _mm_packus_epi16( Alo.i, Ahi.i ); // Pack the 16-bit integers to 8bit again.
-  ///* Another implementation, but slower: */
-  //ssp_m128 A, B, mask;
-  //mask.i = _mm_setzero_si128();
-  //mask.s8[ ndx & 0x0F ] = (ssp_s8)0xFF;
-  //B.i    = _mm_set1_epi8( (ssp_s8)b );
-  //A.i    = _mm_andnot_si128( mask.i, a );
-  //mask.i = _mm_and_si128( mask.i, B.i );
-  //A.i = _mm_or_si128( A.i, mask.i );
-  //return A.i;
-}
-
-/*! \brief SSE2 emulation of SSE4 _mm_cvtepi8_epi16().
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
- */
-static inline __m128i ssp_cvtepi8_epi16_SSE2 ( __m128i a) {
-  __m128i b = _mm_setzero_si128 ();
-  __m128i c = _mm_unpacklo_epi8(a, b);
-  __m128i d = _mm_set1_epi16 (128);
-  b = _mm_and_si128(d, c);
-  d = _mm_set1_epi16(0x1FE);
-  b = _mm_mullo_epi16(b, d);
-  return _mm_add_epi16(c, b);
-}
-
-/*! \brief Helper function.
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
- */
-static inline __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m128i mask ) { // Bitwise (mask ? a : b)
-  a = _mm_and_si128   ( a,    mask );                                 // clear a where mask = 0
-  b = _mm_andnot_si128( mask, b    );                                 // clear b where mask = 1
-  a = _mm_or_si128    ( a,    b    );                                 // a = a OR b
-  return a;
-}
-
-/*! \brief SSE2 emulation of SSE4 _mm_max_epi8().
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
- */
-static inline __m128i ssp_max_epi8_SSE2( __m128i a, __m128i b ) {
-  __m128i mask  = _mm_cmpgt_epi8( a, b );                             // FFFFFFFF where a > b
-  a = ssp_logical_bitwise_select_SSE2( a, b, mask );
-  return a;
-}
+#endif // x86_64 || i386
 
-/*! \brief SSE2 emulation of SSE4 _mm_cvtepi16_epi32().
- *
- * this source code fragment is Copyright (c) 2006-2008 Advanced Micro Devices, Inc.
- * It is licensed under Apache License 2.0 (compatible to our GPL3).
- * see http://sourceforge.net/projects/sseplus
- * \author Advanced Micro Devices, Inc.
- * \date 2006-2008
- * \copyright Apache License 2.0
+/*
+ * OAI specific
  */
-static inline __m128i ssp_cvtepi16_epi32_SSE2 ( __m128i a) {
-  __m128i b = _mm_set1_epi32 (-1);         //0xFFFFFFFF
-  __m128i c = _mm_unpacklo_epi16(a, b);    //FFFFa0**FFFFa1**....
-  __m128i d = _mm_set1_epi32 (0x8000);     //0x8000
-  b = _mm_andnot_si128(c, d);              // 0x80 for positive, 0x00 for negative
-  d = _mm_slli_epi32(b, 1);                // 0x100 for positive, 0x000 for negative
-  return _mm_add_epi32(c, d);
-}
-#endif // __SSE4_1__
-
-#elif defined(__arm__)
-#include <arm_neon.h>
-
-#endif // x86_64 || i386
 
 #if defined(__x86_64__) || defined(__i386__)
   #define vect128 __m128i
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   #define vect128 int16x8_t
 #endif
 
@@ -356,7 +99,7 @@ static inline vect128 mulByConjugate128(vect128 *a, vect128 *b, int8_t output_sh
   vect128 lowPart = _mm_unpacklo_epi32(realPart,imagPart);
   vect128 highPart = _mm_unpackhi_epi32(realPart,imagPart);
   return ( _mm_packs_epi32(lowPart,highPart));
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   AssertFatal(false, "not developped\n");
 #endif
 }
@@ -374,7 +117,7 @@ static inline vect128 mulByConjugate128(vect128 *a, vect128 *b, int8_t output_sh
            _mm_extract_epi16(x,6),\
            _mm_extract_epi16(x,7));\
   }
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   displaySamples128(vect) {}
 //TBD
 #endif
diff --git a/openair1/PHY/thread_NR_UE.h b/openair1/PHY/thread_NR_UE.h
index a3cc2d97820456d4e6b3d9fd83c2eb7c94caefde..83cb00862f0f177481e88d03c3b3293b8c4c0079 100644
--- a/openair1/PHY/thread_NR_UE.h
+++ b/openair1/PHY/thread_NR_UE.h
@@ -1,20 +1,21 @@
 #ifndef __thread_NR_UE__
 #define __thread_NR_UE__
 #include <pthread.h>
-#include <targets/ARCH/COMMON/common_lib.h>
+#include <sdr/COMMON/common_lib.h>
 /// Context data structure for RX/TX portion of subframe processing
 typedef struct {
-  /// index of the current UE RX/TX thread
-  int                  thread_id;
   /// Component Carrier index
   uint8_t              CC_id;
   /// timestamp transmitted to HW
   openair0_timestamp timestamp_tx;
   //#ifdef UE_NR_PHY_DEMO
+  int gNB_id;
   /// NR slot index within frame_tx [0 .. slots_per_frame - 1] to act upon for transmission
   int nr_slot_tx;
+  int rx_slot_type;
   /// NR slot index within frame_rx [0 .. slots_per_frame - 1] to act upon for transmission
   int nr_slot_rx;
+  int tx_slot_type;
   //#endif
   /// frame to act upon for transmission
   int frame_tx;
diff --git a/openair1/SCHED_NR/phy_procedures_nr_gNB.c b/openair1/SCHED_NR/phy_procedures_nr_gNB.c
index d0270ba9a028dee81021a19f1219a1abbef2717d..68eb160c156b086987c8a05b4eed3f88eddd049a 100644
--- a/openair1/SCHED_NR/phy_procedures_nr_gNB.c
+++ b/openair1/SCHED_NR/phy_procedures_nr_gNB.c
@@ -121,8 +121,9 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx,
   PHY_VARS_gNB *gNB = msgTx->gNB;
   NR_DL_FRAME_PARMS *fp=&gNB->frame_parms;
   nfapi_nr_config_request_scf_t *cfg = &gNB->gNB_config;
-  int offset = gNB->CC_id;
+  int offset = gNB->CC_id, slot_prs;
   int txdataF_offset = slot*fp->samples_per_slot_wCP;
+  prs_config_t *prs_config = NULL;
 
   if ((cfg->cell_config.frame_duplex_type.value == TDD) &&
       (nr_slot_select(cfg,frame,slot) == NR_UPLINK_SLOT)) return;
@@ -135,6 +136,21 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx,
     memset(&gNB->common_vars.beam_id[aa][slot*fp->symbols_per_slot],255,fp->symbols_per_slot*sizeof(uint8_t));
   }
 
+  // Check for PRS slot - section 7.4.1.7.4 in 3GPP rel16 38.211
+  for(int rsc_id = 0; rsc_id < gNB->prs_vars.NumPRSResources; rsc_id++)
+  {
+    prs_config = &gNB->prs_vars.prs_cfg[rsc_id];
+    for (int i = 0; i < prs_config->PRSResourceRepetition; i++)
+    {
+      if( (((frame*fp->slots_per_frame + slot) - (prs_config->PRSResourceSetPeriod[1] + prs_config->PRSResourceOffset)+prs_config->PRSResourceSetPeriod[0])%prs_config->PRSResourceSetPeriod[0]) == i*prs_config->PRSResourceTimeGap )
+      {
+        slot_prs = (slot - i*prs_config->PRSResourceTimeGap + fp->slots_per_frame)%fp->slots_per_frame;
+        LOG_D(PHY,"gNB_TX: frame %d, slot %d, slot_prs %d, PRS Resource ID %d\n",frame, slot, slot_prs, rsc_id);
+        nr_generate_prs(gNB->nr_gold_prs[rsc_id][slot_prs],&gNB->common_vars.txdataF[0][txdataF_offset], AMP, prs_config, cfg, fp);
+      }
+    }
+  }
+
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_gNB_COMMON_TX,1);
   for (int i=0; i<fp->Lmax; i++) {
     if (msgTx->ssb[i].active) {
@@ -182,6 +198,10 @@ void phy_procedures_gNB_TX(processingData_L1tx_t *msgTx,
   //apply the OFDM symbol rotation here
   for (aa=0; aa<cfg->carrier_config.num_tx_ant.value; aa++) {
     apply_nr_rotation(fp,(int16_t*) &gNB->common_vars.txdataF[aa][txdataF_offset],slot,0,fp->Ncp==EXTENDED?12:14);
+
+    T(T_GNB_PHY_DL_OUTPUT_SIGNAL, T_INT(0),
+      T_INT(frame), T_INT(slot),
+      T_INT(aa), T_BUFFER(&gNB->common_vars.txdataF[aa][txdataF_offset], fp->samples_per_slot_wCP*sizeof(int32_t)));
   }
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_gNB_TX+offset,0);
@@ -248,11 +268,7 @@ void nr_postDecode(PHY_VARS_gNB *gNB, notifiedFIFO_elt_t *req) {
     }
 /*
     if (ulsch_harq->ulsch_pdu.mcs_index == 0 && dumpsig==1) {
-#ifdef __AVX2__
       int off = ((ulsch_harq->ulsch_pdu.rb_size&1) == 1)? 4:0;
-#else
-      int off = 0;
-#endif
 
       LOG_M("rxsigF0.m","rxsF0",&gNB->common_vars.rxdataF[0][(ulsch_harq->slot&3)*gNB->frame_parms.ofdm_symbol_size*gNB->frame_parms.symbols_per_slot],gNB->frame_parms.ofdm_symbol_size*gNB->frame_parms.symbols_per_slot,1,1);
       LOG_M("rxsigF0_ext.m","rxsF0_ext",
@@ -422,11 +438,8 @@ void nr_fill_indication(PHY_VARS_gNB *gNB, int frame, int slot_rx, int ULSCH_id,
 
   if (0/*pusch_pdu->mcs_index == 9*/) {
       __attribute__((unused))
-#ifdef __AVX2__
       int off = ((pusch_pdu->rb_size&1) == 1)? 4:0;
-#else
-      int off = 0;
-#endif
+
       LOG_M("rxsigF0.m","rxsF0",&gNB->common_vars.rxdataF[0][(slot_rx&3)*gNB->frame_parms.ofdm_symbol_size*gNB->frame_parms.symbols_per_slot],gNB->frame_parms.ofdm_symbol_size*gNB->frame_parms.symbols_per_slot,1,1);
       LOG_M("rxsigF0_ext.m","rxsF0_ext",
              &gNB->pusch_vars[0]->rxdataF_ext[0][pusch_pdu->start_symbol_index*NR_NB_SC_PER_RB * pusch_pdu->rb_size],pusch_pdu->nr_of_symbols*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
@@ -584,27 +597,77 @@ void fill_ul_rb_mask(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
 
 }
 
-int fill_srs_reported_symbol_list(nfapi_nr_srs_indication_reported_symbol_t *reported_symbol_list,
+int fill_srs_reported_symbol_list(nfapi_nr_srs_reported_symbol_t *prgs,
                                   const nfapi_nr_srs_pdu_t *srs_pdu,
                                   const int N_RB_UL,
                                   const int8_t *snr_per_rb,
                                   const int srs_est) {
 
-  reported_symbol_list->num_rbs = srs_bandwidth_config[srs_pdu->config_index][srs_pdu->bandwidth_index][0];
+  prgs->num_prgs = srs_pdu->beamforming.num_prgs;
 
-  if (!reported_symbol_list->rb_list) {
-    reported_symbol_list->rb_list = (nfapi_nr_srs_indication_reported_symbol_resource_block_t*) calloc(1, N_RB_UL*sizeof(nfapi_nr_srs_indication_reported_symbol_resource_block_t));
+  if (!prgs->prg_list) {
+    prgs->prg_list = (nfapi_nr_srs_reported_symbol_prgs_t*) calloc(1, N_RB_UL*sizeof(nfapi_nr_srs_reported_symbol_prgs_t));
   }
 
-  for(int rb = 0; rb < reported_symbol_list->num_rbs; rb++) {
+  for(int prg_idx = 0; prg_idx < prgs->num_prgs; prg_idx++) {
     if (srs_est<0) {
-      reported_symbol_list->rb_list[rb].rb_snr = 0xFF;
-    } else if (snr_per_rb[rb] < -64) {
-      reported_symbol_list->rb_list[rb].rb_snr = 0;
-    } else if (snr_per_rb[rb] > 63) {
-      reported_symbol_list->rb_list[rb].rb_snr = 0xFE;
+      prgs->prg_list[prg_idx].rb_snr = 0xFF;
+    } else if (snr_per_rb[prg_idx] < -64) {
+      prgs->prg_list[prg_idx].rb_snr = 0;
+    } else if (snr_per_rb[prg_idx] > 63) {
+      prgs->prg_list[prg_idx].rb_snr = 0xFE;
     } else {
-      reported_symbol_list->rb_list[rb].rb_snr = (snr_per_rb[rb] + 64)<<1;
+      prgs->prg_list[prg_idx].rb_snr = (snr_per_rb[prg_idx] + 64) << 1;
+    }
+  }
+
+  return 0;
+}
+
+int fill_srs_channel_matrix(uint8_t *channel_matrix,
+                            const nfapi_nr_srs_pdu_t *srs_pdu,
+                            const nr_srs_info_t *nr_srs_info,
+                            const uint8_t normalized_iq_representation,
+                            const uint16_t num_gnb_antenna_elements,
+                            const uint16_t num_ue_srs_ports,
+                            const uint16_t prg_size,
+                            const uint16_t num_prgs,
+                            const NR_DL_FRAME_PARMS *frame_parms,
+                            const int32_t srs_estimated_channel_freq[][1<<srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size*(1<<srs_pdu->num_symbols)]) {
+
+  const uint64_t subcarrier_offset = frame_parms->first_carrier_offset + srs_pdu->bwp_start*NR_NB_SC_PER_RB;
+  const uint16_t step = prg_size*NR_NB_SC_PER_RB;
+
+  c16_t *channel_matrix16 = (c16_t*)channel_matrix;
+  c8_t *channel_matrix8 = (c8_t*)channel_matrix;
+
+  for(int uI = 0; uI < num_ue_srs_ports; uI++) {
+    for(int gI = 0; gI < num_gnb_antenna_elements; gI++) {
+
+      uint16_t subcarrier = subcarrier_offset + nr_srs_info->k_0_p[uI][0];
+      if (subcarrier>frame_parms->ofdm_symbol_size) {
+        subcarrier -= frame_parms->ofdm_symbol_size;
+      }
+
+      for(int pI = 0; pI < num_prgs; pI++) {
+
+        c16_t *srs_estimated_channel16 = (c16_t *)&srs_estimated_channel_freq[gI][uI][subcarrier];
+        uint16_t index = uI*num_gnb_antenna_elements*num_prgs + gI*num_prgs + pI;
+
+        if (normalized_iq_representation == 0) {
+          channel_matrix8[index].r = (int8_t)(srs_estimated_channel16->r>>8);
+          channel_matrix8[index].i = (int8_t)(srs_estimated_channel16->i>>8);
+        } else {
+          channel_matrix16[index].r = srs_estimated_channel16->r;
+          channel_matrix16[index].i = srs_estimated_channel16->i;
+        }
+
+        // Subcarrier increment
+        subcarrier += step;
+        if (subcarrier >= frame_parms->ofdm_symbol_size) {
+          subcarrier=subcarrier-frame_parms->ofdm_symbol_size;
+        }
+      }
     }
   }
 
@@ -796,29 +859,24 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
     }
   }
 
-  for (int i=0;i<NUMBER_OF_NR_SRS_MAX;i++) {
+  for (int i = 0; i < NUMBER_OF_NR_SRS_MAX; i++) {
     NR_gNB_SRS_t *srs = gNB->srs[i];
     if (srs) {
       if ((srs->active == 1) && (srs->frame == frame_rx) && (srs->slot == slot_rx)) {
-
         LOG_D(NR_PHY, "(%d.%d) gNB is waiting for SRS, id = %i\n", frame_rx, slot_rx, i);
 
         NR_DL_FRAME_PARMS *frame_parms = &gNB->frame_parms;
         nfapi_nr_srs_pdu_t *srs_pdu = &srs->srs_pdu;
-        uint8_t N_symb_SRS = 1<<srs_pdu->num_symbols;
-        int32_t srs_received_signal[frame_parms->nb_antennas_rx][frame_parms->ofdm_symbol_size*N_symb_SRS];
-        int32_t srs_ls_estimated_channel[frame_parms->nb_antennas_rx][1<<srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size*N_symb_SRS];
-        int32_t srs_estimated_channel_freq[frame_parms->nb_antennas_rx][1<<srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size*N_symb_SRS] __attribute__ ((aligned(32)));
-        int32_t srs_estimated_channel_time[frame_parms->nb_antennas_rx][1<<srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size] __attribute__ ((aligned(32)));
-        int32_t srs_estimated_channel_time_shifted[frame_parms->nb_antennas_rx][1<<srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size];
-        uint32_t noise_power_per_rb[srs_pdu->bwp_size];
+        uint8_t N_symb_SRS = 1 << srs_pdu->num_symbols;
+        int32_t srs_received_signal[frame_parms->nb_antennas_rx][frame_parms->ofdm_symbol_size * N_symb_SRS];
+        int32_t srs_estimated_channel_freq[frame_parms->nb_antennas_rx][1 << srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size * N_symb_SRS] __attribute__((aligned(32)));
+        int32_t srs_estimated_channel_time[frame_parms->nb_antennas_rx][1 << srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size] __attribute__((aligned(32)));
+        int32_t srs_estimated_channel_time_shifted[frame_parms->nb_antennas_rx][1 << srs_pdu->num_ant_ports][frame_parms->ofdm_symbol_size];
         int8_t snr_per_rb[srs_pdu->bwp_size];
-        uint32_t signal_power;
-        uint32_t noise_power;
         int8_t snr;
 
         // At least currently, the configuration is constant, so it is enough to generate the sequence just once.
-        if(gNB->nr_srs_info[i]->srs_generated_signal_bits == 0) {
+        if (gNB->nr_srs_info[i]->srs_generated_signal_bits == 0) {
           generate_srs_nr(srs_pdu, frame_parms, gNB->nr_srs_info[i]->srs_generated_signal, 0, gNB->nr_srs_info[i], AMP, frame_rx, slot_rx);
         }
 
@@ -830,15 +888,11 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
                                     slot_rx,
                                     srs_pdu,
                                     gNB->nr_srs_info[i],
-                                    (const int32_t **) gNB->nr_srs_info[i]->srs_generated_signal,
+                                    (const int32_t **)gNB->nr_srs_info[i]->srs_generated_signal,
                                     srs_received_signal,
-                                    srs_ls_estimated_channel,
                                     srs_estimated_channel_freq,
                                     srs_estimated_channel_time,
                                     srs_estimated_channel_time_shifted,
-                                    &signal_power,
-                                    noise_power_per_rb,
-                                    &noise_power,
                                     snr_per_rb,
                                     &snr);
         }
@@ -870,31 +924,143 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
         nfapi_nr_srs_indication_pdu_t *srs_indication = &gNB->srs_pdu_list[gNB->UL_INFO.srs_ind.number_of_pdus];
         srs_indication->handle = srs_pdu->handle;
         srs_indication->rnti = srs_pdu->rnti;
-        srs_indication->timing_advance = srs_est >= 0 ? nr_est_timing_advance_srs(frame_parms, srs_estimated_channel_time[0]) : 0xFFFF;
-        srs_indication->num_symbols = 1 << srs_pdu->num_symbols;
-        srs_indication->wide_band_snr = srs_est >= 0 ? (snr + 64) << 1 : 0xFF; // 0xFF will be set if this field is invalid
-        srs_indication->num_reported_symbols = 1 << srs_pdu->num_symbols;
-        if (!srs_indication->reported_symbol_list) {
-          srs_indication->reported_symbol_list = (nfapi_nr_srs_indication_reported_symbol_t *)calloc(1, srs_indication->num_reported_symbols * sizeof(nfapi_nr_srs_indication_reported_symbol_t));
+        srs_indication->timing_advance_offset = srs_est >= 0 ? nr_est_timing_advance_srs(frame_parms, srs_estimated_channel_time[0]) : 0xFFFF;
+        srs_indication->timing_advance_offset_nsec = srs_est >= 0 ? (int16_t)((((int32_t)srs_indication->timing_advance_offset - 31) * ((int32_t)TC_NSEC_x32768)) >> 15) : 0xFFFF;
+        switch (srs_pdu->srs_parameters_v4.usage) {
+          case 0:
+            LOG_W(NR_PHY, "SRS report was not requested by MAC\n");
+            return 0;
+          case 1 << NR_SRS_ResourceSet__usage_beamManagement:
+            srs_indication->srs_usage = NR_SRS_ResourceSet__usage_beamManagement;
+            break;
+          case 1 << NR_SRS_ResourceSet__usage_codebook:
+            srs_indication->srs_usage = NR_SRS_ResourceSet__usage_codebook;
+            break;
+          case 1 << NR_SRS_ResourceSet__usage_nonCodebook:
+            srs_indication->srs_usage = NR_SRS_ResourceSet__usage_nonCodebook;
+            break;
+          case 1 << NR_SRS_ResourceSet__usage_antennaSwitching:
+            srs_indication->srs_usage = NR_SRS_ResourceSet__usage_antennaSwitching;
+            break;
+          default:
+            LOG_E(NR_PHY, "Invalid srs_pdu->srs_parameters_v4.usage %i\n", srs_pdu->srs_parameters_v4.usage);
         }
-        fill_srs_reported_symbol_list(&srs_indication->reported_symbol_list[0], srs_pdu, frame_parms->N_RB_UL, snr_per_rb, srs_est);
-
-        gNB->UL_INFO.srs_ind.number_of_pdus += 1;
+        srs_indication->report_type = srs_pdu->srs_parameters_v4.report_type[0];
 
 #ifdef SRS_IND_DEBUG
         LOG_I(NR_PHY, "gNB->UL_INFO.srs_ind.sfn = %i\n", gNB->UL_INFO.srs_ind.sfn);
         LOG_I(NR_PHY, "gNB->UL_INFO.srs_ind.slot = %i\n", gNB->UL_INFO.srs_ind.slot);
-        LOG_I(NR_PHY, "gNB->srs_pdu_list[%i].rnti = 0x%04x\n", num_srs, gNB->srs_pdu_list[num_srs].rnti);
-        LOG_I(NR_PHY, "gNB->srs_pdu_list[%i].timing_advance = %i\n", num_srs, gNB->srs_pdu_list[num_srs].timing_advance);
-        LOG_I(NR_PHY, "gNB->srs_pdu_list[%i].num_symbols = %i\n", num_srs, gNB->srs_pdu_list[num_srs].num_symbols);
-        LOG_I(NR_PHY, "gNB->srs_pdu_list[%i].wide_band_snr = %i\n", num_srs, gNB->srs_pdu_list[num_srs].wide_band_snr);
-        LOG_I(NR_PHY, "gNB->srs_pdu_list[%i].num_reported_symbols = %i\n", num_srs, gNB->srs_pdu_list[num_srs].num_reported_symbols);
-        LOG_I(NR_PHY, "gNB->srs_pdu_list[%i].reported_symbol_list[0].num_rbs = %i\n", num_srs, gNB->srs_pdu_list[num_srs].reported_symbol_list[0].num_rbs);
-        for (int rb = 0; rb < gNB->srs_pdu_list[num_srs].reported_symbol_list[0].num_rbs; rb++) {
-          LOG_I(NR_PHY, "gNB->srs_pdu_list[%i].reported_symbol_list[0].rb_list[%3i].rb_snr = %i\n", num_srs, rb, gNB->srs_pdu_list[num_srs].reported_symbol_list[0].rb_list[rb].rb_snr);
+        LOG_I(NR_PHY, "srs_indication->rnti = %04x\n", srs_indication->rnti);
+        LOG_I(NR_PHY, "srs_indication->timing_advance = %i\n", srs_indication->timing_advance_offset);
+        LOG_I(NR_PHY, "srs_indication->timing_advance_offset_nsec = %i\n", srs_indication->timing_advance_offset_nsec);
+        LOG_I(NR_PHY, "srs_indication->srs_usage = %i\n", srs_indication->srs_usage);
+        LOG_I(NR_PHY, "srs_indication->report_type = %i\n", srs_indication->report_type);
+#endif
+
+        if (!srs_indication->report_tlv) {
+          srs_indication->report_tlv = (nfapi_srs_report_tlv_t *)calloc(1, sizeof(nfapi_srs_report_tlv_t));
+        }
+        srs_indication->report_tlv->tag = 0;
+        srs_indication->report_tlv->length = 0;
+
+        switch (srs_indication->srs_usage) {
+          case NR_SRS_ResourceSet__usage_beamManagement: {
+            nfapi_nr_srs_beamforming_report_t nr_srs_beamforming_report;
+            nr_srs_beamforming_report.prg_size = srs_pdu->beamforming.prg_size;
+            nr_srs_beamforming_report.num_symbols = 1 << srs_pdu->num_symbols;
+            nr_srs_beamforming_report.wide_band_snr = srs_est >= 0 ? (snr + 64) << 1 : 0xFF; // 0xFF will be set if this field is invalid
+            nr_srs_beamforming_report.num_reported_symbols = 1 << srs_pdu->num_symbols;
+            nr_srs_beamforming_report.prgs = (nfapi_nr_srs_reported_symbol_t *)calloc(1, nr_srs_beamforming_report.num_reported_symbols * sizeof(nfapi_nr_srs_reported_symbol_t));
+            fill_srs_reported_symbol_list(&nr_srs_beamforming_report.prgs[0], srs_pdu, frame_parms->N_RB_UL, snr_per_rb, srs_est);
+
+#ifdef SRS_IND_DEBUG
+            LOG_I(NR_PHY, "nr_srs_beamforming_report.prg_size = %i\n", nr_srs_beamforming_report.prg_size);
+            LOG_I(NR_PHY, "nr_srs_beamforming_report.num_symbols = %i\n", nr_srs_beamforming_report.num_symbols);
+            LOG_I(NR_PHY, "nr_srs_beamforming_report.wide_band_snr = %i (%i dB)\n", nr_srs_beamforming_report.wide_band_snr, (nr_srs_beamforming_report.wide_band_snr >> 1) - 64);
+            LOG_I(NR_PHY, "nr_srs_beamforming_report.num_reported_symbols = %i\n", nr_srs_beamforming_report.num_reported_symbols);
+            LOG_I(NR_PHY, "nr_srs_beamforming_report.prgs[0].num_prgs = %i\n", nr_srs_beamforming_report.prgs[0].num_prgs);
+            for (int prg_idx = 0; prg_idx < nr_srs_beamforming_report.prgs[0].num_prgs; prg_idx++) {
+              LOG_I(NR_PHY,
+                    "nr_srs_beamforming_report.prgs[0].prg_list[%3i].rb_snr = %i (%i dB)\n",
+                    prg_idx,
+                    nr_srs_beamforming_report.prgs[0].prg_list[prg_idx].rb_snr,
+                    (nr_srs_beamforming_report.prgs[0].prg_list[prg_idx].rb_snr >> 1) - 64);
+            }
+#endif
+
+            srs_indication->report_tlv->length = pack_nr_srs_beamforming_report(&nr_srs_beamforming_report, srs_indication->report_tlv->value, 16384 * sizeof(uint32_t));
+            break;
+          }
+
+          case NR_SRS_ResourceSet__usage_codebook: {
+            nfapi_nr_srs_normalized_channel_iq_matrix_t nr_srs_normalized_channel_iq_matrix;
+            nr_srs_normalized_channel_iq_matrix.normalized_iq_representation = srs_pdu->srs_parameters_v4.iq_representation;
+            nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements = gNB->frame_parms.nb_antennas_rx;
+            nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports = srs_pdu->srs_parameters_v4.num_total_ue_antennas;
+            nr_srs_normalized_channel_iq_matrix.prg_size = srs_pdu->srs_parameters_v4.prg_size;
+            nr_srs_normalized_channel_iq_matrix.num_prgs = srs_pdu->srs_parameters_v4.srs_bandwidth_size / srs_pdu->srs_parameters_v4.prg_size;
+            fill_srs_channel_matrix(nr_srs_normalized_channel_iq_matrix.channel_matrix,
+                                    srs_pdu,
+                                    gNB->nr_srs_info[i],
+                                    nr_srs_normalized_channel_iq_matrix.normalized_iq_representation,
+                                    nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements,
+                                    nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports,
+                                    nr_srs_normalized_channel_iq_matrix.prg_size,
+                                    nr_srs_normalized_channel_iq_matrix.num_prgs,
+                                    &gNB->frame_parms,
+                                    srs_estimated_channel_freq);
+
+#ifdef SRS_IND_DEBUG
+            LOG_I(NR_PHY, "nr_srs_normalized_channel_iq_matrix.normalized_iq_representation = %i\n", nr_srs_normalized_channel_iq_matrix.normalized_iq_representation);
+            LOG_I(NR_PHY, "nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements = %i\n", nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements);
+            LOG_I(NR_PHY, "nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports = %i\n", nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports);
+            LOG_I(NR_PHY, "nr_srs_normalized_channel_iq_matrix.prg_size = %i\n", nr_srs_normalized_channel_iq_matrix.prg_size);
+            LOG_I(NR_PHY, "nr_srs_normalized_channel_iq_matrix.num_prgs = %i\n", nr_srs_normalized_channel_iq_matrix.num_prgs);
+            c16_t *channel_matrix16 = (c16_t *)nr_srs_normalized_channel_iq_matrix.channel_matrix;
+            c8_t *channel_matrix8 = (c8_t *)nr_srs_normalized_channel_iq_matrix.channel_matrix;
+            for (int uI = 0; uI < nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports; uI++) {
+              for (int gI = 0; gI < nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements; gI++) {
+                for (int pI = 0; pI < nr_srs_normalized_channel_iq_matrix.num_prgs; pI++) {
+                  uint16_t index =
+                      uI * nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements * nr_srs_normalized_channel_iq_matrix.num_prgs + gI * nr_srs_normalized_channel_iq_matrix.num_prgs + pI;
+                  LOG_I(NR_PHY,
+                        "(uI %i, gI %i, pI %i) channel_matrix --> real %i, imag %i\n",
+                        uI,
+                        gI,
+                        pI,
+                        nr_srs_normalized_channel_iq_matrix.normalized_iq_representation == 0 ? channel_matrix8[index].r : channel_matrix16[index].r,
+                        nr_srs_normalized_channel_iq_matrix.normalized_iq_representation == 0 ? channel_matrix8[index].i : channel_matrix16[index].i);
+                }
+              }
+            }
+#endif
+
+            srs_indication->report_tlv->length = pack_nr_srs_normalized_channel_iq_matrix(&nr_srs_normalized_channel_iq_matrix,
+                                                                                          srs_indication->report_tlv->value,
+                                                                                          16384 * sizeof(uint32_t));
+
+            break;
+          }
+
+          case NR_SRS_ResourceSet__usage_nonCodebook:
+          case NR_SRS_ResourceSet__usage_antennaSwitching:
+            LOG_W(NR_PHY, "PHY procedures for this SRS usage are not implemented yet!\n");
+            break;
+
+          default:
+            AssertFatal(1 == 0, "Invalid SRS usage\n");
+        }
+
+#ifdef SRS_IND_DEBUG
+        LOG_I(NR_PHY, "srs_indication->report_tlv->tag = %i\n", srs_indication->report_tlv->tag);
+        LOG_I(NR_PHY, "srs_indication->report_tlv->length = %i\n", srs_indication->report_tlv->length);
+        char *value = (char *)srs_indication->report_tlv->value;
+        for (int b = 0; b < srs_indication->report_tlv->length; b++) {
+          LOG_I(NR_PHY, "value[%i] = 0x%02x\n", b, value[b] & 0xFF);
         }
 #endif
 
+        gNB->UL_INFO.srs_ind.number_of_pdus += 1;
         srs->active = 0;
       }
     }
diff --git a/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c b/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
index f4c863bb408c417ae5728986f229a3880fc5b9f9..ae80536abf86892d9813145fcdb7e486760c69dd 100644
--- a/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
+++ b/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
@@ -352,14 +352,13 @@ int8_t nr_ue_scheduled_response(nr_scheduled_response_t *scheduled_response){
   if(scheduled_response != NULL){
 
     module_id_t module_id = scheduled_response->module_id;
-    uint8_t cc_id = scheduled_response->CC_id, thread_id;
+    uint8_t cc_id = scheduled_response->CC_id;
     int slot = scheduled_response->slot;
 
     // Note: we have to handle the thread IDs for this. To be revisited completely.
-    thread_id = scheduled_response->thread_id;
     NR_UE_DLSCH_t *dlsch0 = NULL;
-    NR_UE_ULSCH_t *ulsch = PHY_vars_UE_g[module_id][cc_id]->ulsch[thread_id][0];
-    NR_UE_PUCCH *pucch_vars = PHY_vars_UE_g[module_id][cc_id]->pucch_vars[thread_id][0];
+    NR_UE_ULSCH_t *ulsch = PHY_vars_UE_g[module_id][cc_id]->ulsch[0];
+    NR_UE_PUCCH *pucch_vars = &((nr_phy_data_t *)scheduled_response->phy_data)->pucch_vars;
     NR_UE_CSI_IM *csiim_vars = PHY_vars_UE_g[module_id][cc_id]->csiim_vars[0];
     NR_UE_CSI_RS *csirs_vars = PHY_vars_UE_g[module_id][cc_id]->csirs_vars[0];
     NR_UE_PDCCH_CONFIG *phy_pdcch_config = NULL;
@@ -418,7 +417,7 @@ int8_t nr_ue_scheduled_response(nr_scheduled_response_t *scheduled_response){
             break;
           case FAPI_NR_DL_CONFIG_TYPE_DLSCH:
             dlsch_config_pdu = &dl_config->dl_config_list[i].dlsch_config_pdu.dlsch_config_rel15;
-            dlsch0 = PHY_vars_UE_g[module_id][cc_id]->dlsch[thread_id][0][0];
+            dlsch0 = PHY_vars_UE_g[module_id][cc_id]->dlsch[0][0];
             configure_dlsch(dlsch0, dlsch_config_pdu, module_id,
                             dl_config->dl_config_list[i].dlsch_config_pdu.rnti);
             break;
diff --git a/openair1/SCHED_NR_UE/harq_nr.c b/openair1/SCHED_NR_UE/harq_nr.c
index dc62ea646c86ca36e2118631b0d28a8a8af26cdd..25e23551e0220fbbae04be1c111e3e5ddf81519e 100644
--- a/openair1/SCHED_NR_UE/harq_nr.c
+++ b/openair1/SCHED_NR_UE/harq_nr.c
@@ -113,7 +113,7 @@
 *
 *********************************************************************/
 
-void config_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id, int code_word_idx, uint8_t number_harq_processes_pusch)
+void config_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int code_word_idx, uint8_t number_harq_processes_pusch)
 {
   NR_UE_ULSCH_t *ulsch;
 
@@ -123,7 +123,7 @@ void config_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id, i
 
     memset(ulsch,0,sizeof(NR_UE_ULSCH_t));
 
-    ue->ulsch[thread_id][gNB_id] = ulsch;
+    ue->ulsch[gNB_id] = ulsch;
   }
   else {
     LOG_E(PHY, "Fatal memory allocation problem at line %d in function %s of file %s \n", __LINE__ , __func__, __FILE__);
@@ -148,7 +148,7 @@ void config_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id, i
   }
 
   for (int slot_tx = 0; slot_tx < NR_MAX_SLOTS_PER_FRAME; slot_tx++) {
-    ue->ulsch[thread_id][gNB_id]->harq_process_id[slot_tx] = NR_MAX_HARQ_PROCESSES;
+    ue->ulsch[gNB_id]->harq_process_id[slot_tx] = NR_MAX_HARQ_PROCESSES;
   }
 }
 
@@ -165,9 +165,9 @@ void config_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id, i
 *
 *********************************************************************/
 
-void release_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id, int code_word_idx)
+void release_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int code_word_idx)
 {
-  NR_UE_ULSCH_t *ulsch = ue->ulsch[thread_id][gNB_id];
+  NR_UE_ULSCH_t *ulsch = ue->ulsch[gNB_id];
 
   for (int process_id = 0; process_id < ulsch->number_harq_processes_for_pusch; process_id++) {
 
@@ -178,7 +178,7 @@ void release_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id,
 
   free16(ulsch, sizeof(NR_UE_ULSCH_t));
 
-  ue->ulsch[thread_id][gNB_id] = NULL;
+  ue->ulsch[gNB_id] = NULL;
 }
 
 /*******************************************************************
diff --git a/openair1/SCHED_NR_UE/harq_nr.h b/openair1/SCHED_NR_UE/harq_nr.h
index 5586518d7c7646e598b89b7369c74cf82edf3e8c..b707e2d9831283e3c03f16c8c5efb3663d4c90a1 100644
--- a/openair1/SCHED_NR_UE/harq_nr.h
+++ b/openair1/SCHED_NR_UE/harq_nr.h
@@ -69,21 +69,19 @@
 /** \brief This function configures uplink HARQ context
     @param PHY_VARS_NR_UE ue context
     @param gNB_id gNodeB identifier
-    @param thread_id RXTX thread index
     @param code_word_idx code word index
     @param number_harq_processes_pusch maximum number of uplink HARQ processes
     @returns none */
 
-void config_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id, int code_word_idx, uint8_t number_harq_processes_pusch);
+void config_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int code_word_idx, uint8_t number_harq_processes_pusch);
 
 /** \brief This function releases uplink HARQ context
     @param PHY_VARS_NR_UE ue context
     @param gNB_id gNodeB identifier
-    @param thread_id RXTX thread index
     @param code_word_idx code word index
     @returns none */
 
-void release_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int thread_id, int code_word_idx);
+void release_uplink_harq_process(PHY_VARS_NR_UE *ue, int gNB_id, int code_word_idx);
 
 /** \brief This function stores slot for transmission in HARQ context
     @param ulsch uplink context
diff --git a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
index c68702fa3fa90cfba5378461118b4f84a32728e4..d74e5ee0e8daa14e0e06313d305fc56598d9d827 100644
--- a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
+++ b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
@@ -99,7 +99,6 @@ void nr_fill_dl_indication(nr_downlink_indication_t *dl_ind,
   dl_ind->cc_id     = ue->CC_id;
   dl_ind->frame     = proc->frame_rx;
   dl_ind->slot      = proc->nr_slot_rx;
-  dl_ind->thread_id = proc->thread_id;
   dl_ind->phy_data  = phy_data;
 
   if (dci_ind) {
@@ -298,20 +297,19 @@ void phy_procedures_nrUE_TX(PHY_VARS_NR_UE *ue,
 
   if (ue->UE_mode[gNB_id] <= PUSCH){
 
-    for (uint8_t harq_pid = 0; harq_pid < ue->ulsch[proc->thread_id][gNB_id]->number_harq_processes_for_pusch; harq_pid++) {
-      if (ue->ulsch[proc->thread_id][gNB_id]->harq_processes[harq_pid]->status == ACTIVE)
-        nr_ue_ulsch_procedures(ue, harq_pid, frame_tx, slot_tx, proc->thread_id, gNB_id);
+    for (uint8_t harq_pid = 0; harq_pid < ue->ulsch[gNB_id]->number_harq_processes_for_pusch; harq_pid++) {
+      if (ue->ulsch[gNB_id]->harq_processes[harq_pid]->status == ACTIVE)
+        nr_ue_ulsch_procedures(ue, harq_pid, frame_tx, slot_tx, gNB_id);
     }
   }
 
   if (ue->UE_mode[gNB_id] == PUSCH) {
     ue_srs_procedures_nr(ue, proc, gNB_id);
   }
+  LOG_D(PHY,"****** end TX-Chain for AbsSubframe %d.%d ******\n", proc->frame_tx, proc->nr_slot_tx);
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_TX, VCD_FUNCTION_OUT);
   stop_meas(&ue->phy_proc_tx);
-
-
 }
 
 void nr_ue_measurement_procedures(uint16_t l,
@@ -545,7 +543,6 @@ int nr_ue_pdcch_procedures(uint8_t gNB_id,
       dci_ind->dci_list[i].rnti,
       dci_ind->dci_list[i].dci_format);
   }
-  ue->pdcch_vars[proc->thread_id][gNB_id]->dci_received += dci_cnt;
 
   dci_ind->number_of_dcis = dci_cnt;
 
@@ -611,7 +608,7 @@ int nr_ue_pdsch_procedures(PHY_VARS_NR_UE *ue, UE_nr_rxtx_proc_t *proc, int gNB_
           char filename[100];
           for (uint8_t aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) {
             sprintf(filename,"PDSCH_CHANNEL_frame%d_slot%d_sym%d_port%d_rx%d.m", nr_frame_rx, nr_slot_rx, m, aatx,aarx);
-            int **dl_ch_estimates = ue->pdsch_vars[proc->thread_id][gNB_id]->dl_ch_estimates;
+            int **dl_ch_estimates = ue->pdsch_vars[gNB_id]->dl_ch_estimates;
             LOG_M(filename,"channel_F",&dl_ch_estimates[aatx*ue->frame_parms.nb_antennas_rx+aarx][ue->frame_parms.ofdm_symbol_size*m],ue->frame_parms.ofdm_symbol_size, 1, 1);
           }
 #endif
@@ -621,7 +618,7 @@ int nr_ue_pdsch_procedures(PHY_VARS_NR_UE *ue, UE_nr_rxtx_proc_t *proc, int gNB_
 
     if (ue->chest_time == 1) { // averaging time domain channel estimates
       nr_chest_time_domain_avg(&ue->frame_parms,
-                               ue->pdsch_vars[proc->thread_id][gNB_id]->dl_ch_estimates,
+                               ue->pdsch_vars[gNB_id]->dl_ch_estimates,
                                dlsch0_harq->nb_symbols,
                                dlsch0_harq->start_symbol,
                                dlsch0_harq->dlDmrsSymbPos,
@@ -654,7 +651,7 @@ int nr_ue_pdsch_procedures(PHY_VARS_NR_UE *ue, UE_nr_rxtx_proc_t *proc, int gNB_
       uint8_t slot = 0;
       if(m >= ue->frame_parms.symbols_per_slot>>1)
         slot = 1;
-      start_meas(&ue->dlsch_llr_stats_parallelization[proc->thread_id][slot]);
+      start_meas(&ue->dlsch_llr_stats_parallelization[slot]);
       // process DLSCH received symbols in the slot
       // symbol by symbol processing (if data/DMRS are multiplexed is checked inside the function)
       if (pdsch == PDSCH || pdsch == SI_PDSCH || pdsch == RA_PDSCH) {
@@ -673,9 +670,9 @@ int nr_ue_pdsch_procedures(PHY_VARS_NR_UE *ue, UE_nr_rxtx_proc_t *proc, int gNB_
           return -1;
       } else AssertFatal(1==0,"Not RA_PDSCH, SI_PDSCH or PDSCH\n");
 
-      stop_meas(&ue->dlsch_llr_stats_parallelization[proc->thread_id][slot]);
+      stop_meas(&ue->dlsch_llr_stats_parallelization[slot]);
       if (cpumeas(CPUMEAS_GETSTATE))
-        LOG_D(PHY, "[AbsSFN %d.%d] LLR Computation Symbol %d %5.2f \n",frame_rx,nr_slot_rx,m,ue->dlsch_llr_stats_parallelization[proc->thread_id][slot].p_time/(cpuf*1000.0));
+        LOG_D(PHY, "[AbsSFN %d.%d] LLR Computation Symbol %d %5.2f \n",frame_rx,nr_slot_rx,m,ue->dlsch_llr_stats_parallelization[slot].p_time/(cpuf*1000.0));
       if(first_symbol_flag) {
         proc->first_symbol_available = 1;
       }
@@ -743,7 +740,7 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
     case RA_PDSCH:
     case P_PDSCH:
     case PDSCH:
-      pdsch_vars = ue->pdsch_vars[proc->thread_id][gNB_id];
+      pdsch_vars = ue->pdsch_vars[gNB_id];
       break;
     case PMCH:
     case PDSCH1:
@@ -760,16 +757,6 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
     if (frame_rx < *dlsch_errors)
       *dlsch_errors=0;
 
-    if (pdsch == RA_PDSCH) {
-      if (ue->prach_resources[gNB_id]!=NULL)
-        dlsch0->rnti = ue->prach_resources[gNB_id]->ra_RNTI;
-      else {
-        LOG_E(PHY,"[UE %d] Frame %d, nr_slot_rx %d: FATAL, prach_resources is NULL\n", ue->Mod_id, frame_rx, nr_slot_rx);
-        //mac_xface->macphy_exit("prach_resources is NULL");
-        return false;
-      }
-    }
-
     // exit dlsch procedures as there are no active dlsch
     if (is_cw0_active != ACTIVE && is_cw1_active != ACTIVE)
       return false;
@@ -782,15 +769,15 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
                                                    dlsch0->harq_processes[harq_pid]->Qm,
                                                    dlsch0->harq_processes[harq_pid]->Nl);
 
-      start_meas(&ue->dlsch_unscrambling_stats);
-      nr_dlsch_unscrambling(pdsch_vars->llr[0],
-                            dlsch0->harq_processes[harq_pid]->G,
-                            0,
-                            ue->frame_parms.Nid_cell,
-                            dlsch0->rnti);
+    start_meas(&ue->dlsch_unscrambling_stats);
+    nr_dlsch_unscrambling(pdsch_vars->llr[0],
+                          dlsch0->harq_processes[harq_pid]->G,
+                          0,
+                          ue->frame_parms.Nid_cell,
+                          dlsch0->rnti);
       
 
-      stop_meas(&ue->dlsch_unscrambling_stats);
+    stop_meas(&ue->dlsch_unscrambling_stats);
 
 
 #if 0
@@ -801,11 +788,11 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
       LOG_I(PHY,"start ldpc decode for CW 0 for AbsSubframe %d.%d / %d  --> Nl %d \n", frame_rx, nr_slot_rx, harq_pid, dlsch0->harq_processes[harq_pid]->Nl);
       LOG_I(PHY,"start ldpc decode for CW 0 for AbsSubframe %d.%d / %d  --> G  %d \n", frame_rx, nr_slot_rx, harq_pid, dlsch0->harq_processes[harq_pid]->G);
       LOG_I(PHY,"start ldpc decode for CW 0 for AbsSubframe %d.%d / %d  --> Kmimo  %d \n", frame_rx, nr_slot_rx, harq_pid, dlsch0->Kmimo);
-      LOG_I(PHY,"start ldpc decode for CW 0 for AbsSubframe %d.%d / %d  --> Pdcch Sym  %d \n", frame_rx, nr_slot_rx, harq_pid, ue->pdcch_vars[proc->thread_id][gNB_id]->num_pdcch_symbols);
+      LOG_I(PHY,"start ldpc decode for CW 0 for AbsSubframe %d.%d / %d  --> Pdcch Sym  %d \n", frame_rx, nr_slot_rx, harq_pid, ue->pdcch_vars[gNB_id]->num_pdcch_symbols);
 #endif
 
 
-   start_meas(&ue->dlsch_decoding_stats[proc->thread_id]);
+   start_meas(&ue->dlsch_decoding_stats);
 
     ret = nr_dlsch_decoding(ue,
                             proc,
@@ -846,12 +833,12 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
 
     LOG_D(PHY, "In %s DL PDU length in bits: %d, in bytes: %d \n", __FUNCTION__, dlsch0->harq_processes[harq_pid]->TBS, dlsch0->harq_processes[harq_pid]->TBS / 8);
 
-    stop_meas(&ue->dlsch_decoding_stats[proc->thread_id]);
+    stop_meas(&ue->dlsch_decoding_stats);
     if (cpumeas(CPUMEAS_GETSTATE))  {
       LOG_D(PHY, " --> Unscrambling for CW0 %5.3f\n",
             (ue->dlsch_unscrambling_stats.p_time)/(cpuf*1000.0));
       LOG_D(PHY, "AbsSubframe %d.%d --> LDPC Decoding for CW0 %5.3f\n",
-            frame_rx%1024, nr_slot_rx,(ue->dlsch_decoding_stats[proc->thread_id].p_time)/(cpuf*1000.0));
+            frame_rx%1024, nr_slot_rx,(ue->dlsch_decoding_stats.p_time)/(cpuf*1000.0));
     }
 
     if(is_cw1_active) {
@@ -877,10 +864,10 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
           LOG_I(PHY,"start ldpc decode for CW 1 for AbsSubframe %d.%d / %d  --> Nl %d \n", frame_rx, nr_slot_rx, harq_pid, dlsch1->harq_processes[harq_pid]->Nl);
           LOG_I(PHY,"start ldpc decode for CW 1 for AbsSubframe %d.%d / %d  --> G  %d \n", frame_rx, nr_slot_rx, harq_pid, dlsch1->harq_processes[harq_pid]->G);
           LOG_I(PHY,"start ldpc decode for CW 1 for AbsSubframe %d.%d / %d  --> Kmimo  %d \n", frame_rx, nr_slot_rx, harq_pid, dlsch1->Kmimo);
-          LOG_I(PHY,"start ldpc decode for CW 1 for AbsSubframe %d.%d / %d  --> Pdcch Sym  %d \n", frame_rx, nr_slot_rx, harq_pid, ue->pdcch_vars[proc->thread_id][gNB_id]->num_pdcch_symbols);
+          LOG_I(PHY,"start ldpc decode for CW 1 for AbsSubframe %d.%d / %d  --> Pdcch Sym  %d \n", frame_rx, nr_slot_rx, harq_pid, ue->pdcch_vars[gNB_id]->num_pdcch_symbols);
 #endif
 
-      start_meas(&ue->dlsch_decoding_stats[proc->thread_id]);
+      start_meas(&ue->dlsch_decoding_stats);
 
 
       ret1 = nr_dlsch_decoding(ue,
@@ -897,12 +884,12 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
                                pdsch==PDSCH);//proc->decoder_switch
       LOG_T(PHY,"CW dlsch decoding, ret1 = %d\n", ret1);
 
-      stop_meas(&ue->dlsch_decoding_stats[proc->thread_id]);
+      stop_meas(&ue->dlsch_decoding_stats);
       if (cpumeas(CPUMEAS_GETSTATE)) {
         LOG_D(PHY, " --> Unscrambling for CW1 %5.3f\n",
               (ue->dlsch_unscrambling_stats.p_time)/(cpuf*1000.0));
         LOG_D(PHY, "AbsSubframe %d.%d --> ldpc Decoding for CW1 %5.3f\n",
-              frame_rx%1024, nr_slot_rx,(ue->dlsch_decoding_stats[proc->thread_id].p_time)/(cpuf*1000.0));
+              frame_rx%1024, nr_slot_rx,(ue->dlsch_decoding_stats.p_time)/(cpuf*1000.0));
         }
     LOG_D(PHY, "harq_pid: %d, TBS expected dlsch1: %d \n", harq_pid, dlsch1->harq_processes[harq_pid]->TBS);
     }
@@ -1331,15 +1318,13 @@ int phy_procedures_nrUE_RX(PHY_VARS_NR_UE *ue,
   int frame_rx = proc->frame_rx;
   int nr_slot_rx = proc->nr_slot_rx;
   fapi_nr_config_request_t *cfg = &ue->nrUE_config;
-
   NR_DL_FRAME_PARMS *fp = &ue->frame_parms;
   
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_RX, VCD_FUNCTION_IN);
-  start_meas(&ue->phy_proc_rx[proc->thread_id]);
+  start_meas(&ue->phy_proc_rx);
 
-  LOG_D(PHY," ****** start RX-Chain for Frame.Slot %d.%d (energy %d dB)******  \n",
-        frame_rx%1024, nr_slot_rx,
-        dB_fixed(signal_energy(ue->common_vars.common_vars_rx_data_per_thread[proc->thread_id].rxdataF[0],2048*14)));
+  LOG_D(PHY," ****** start RX-Chain for Frame.Slot %d.%d ******  \n",
+        frame_rx%1024, nr_slot_rx);
 
   // checking if current frame is compatible with SSB periodicity
   if (cfg->ssb_table.ssb_period == 0 ||
@@ -1410,6 +1395,29 @@ int phy_procedures_nrUE_RX(PHY_VARS_NR_UE *ue,
     }
   }
 
+  // Check for PRS slot - section 7.4.1.7.4 in 3GPP rel16 38.211
+  for(int gNB_id = 0; gNB_id < ue->prs_active_gNBs; gNB_id++)
+  {
+    for(int rsc_id = 0; rsc_id < ue->prs_vars[gNB_id]->NumPRSResources; rsc_id++)
+    {
+      prs_config_t *prs_config = &ue->prs_vars[gNB_id]->prs_resource[rsc_id].prs_cfg;
+      for (int i = 0; i < prs_config->PRSResourceRepetition; i++)
+      {
+        if( (((frame_rx*fp->slots_per_frame + nr_slot_rx) - (prs_config->PRSResourceSetPeriod[1] + prs_config->PRSResourceOffset) + prs_config->PRSResourceSetPeriod[0])%prs_config->PRSResourceSetPeriod[0]) == i*prs_config->PRSResourceTimeGap)
+        {
+          for(int j = prs_config->SymbolStart; j < (prs_config->SymbolStart+prs_config->NumPRSSymbols); j++)
+          {
+            nr_slot_fep(ue,
+                        proc,
+                        (j%fp->symbols_per_slot),
+                        nr_slot_rx);
+          }
+          nr_prs_channel_estimation(gNB_id,rsc_id,i,ue,proc,fp);
+        }
+      } // for i
+    } // for rsc_id
+  } // for gNB_id
+
   if ((frame_rx%64 == 0) && (nr_slot_rx==0)) {
     LOG_I(NR_PHY,"============================================\n");
     // fixed text + 8 HARQs rounds à 10 ("999999999/") + NULL
@@ -1445,31 +1453,22 @@ int phy_procedures_nrUE_RX(PHY_VARS_NR_UE *ue,
   int32_t pdcch_est_size = ((((fp->symbols_per_slot*(fp->ofdm_symbol_size+LTE_CE_FILTER_LENGTH))+15)/16)*16);
   __attribute__ ((aligned(16))) int32_t pdcch_dl_ch_estimates[4*fp->nb_antennas_rx][pdcch_est_size];
 
-  int coreset_nb_rb=0;
-  int coreset_start_rb=0;
-
-  if (phy_pdcch_config->nb_search_space > 0)
-    get_coreset_rballoc(phy_pdcch_config->pdcch_config[0].coreset.frequency_domain_resource,&coreset_nb_rb,&coreset_start_rb);
-
   uint8_t dci_cnt = 0;
   for(int n_ss = 0; n_ss<phy_pdcch_config->nb_search_space; n_ss++) {
     for (uint16_t l=0; l<nb_symb_pdcch; l++) {
 
       // note: this only works if RBs for PDCCH are contigous!
-      LOG_D(PHY, "pdcch_channel_estimation: first_carrier_offset %d, BWPStart %d, coreset_start_rb %d, coreset_nb_rb %d\n",
-            fp->first_carrier_offset, phy_pdcch_config->pdcch_config[n_ss].BWPStart, coreset_start_rb, coreset_nb_rb);
-
-      if (coreset_nb_rb > 0)
-        nr_pdcch_channel_estimation(ue,
-                                    proc,
-                                    gNB_id,
-                                    nr_slot_rx,
-                                    l,
-                                    phy_pdcch_config->pdcch_config[n_ss].coreset.pdcch_dmrs_scrambling_id,
-                                    fp->first_carrier_offset+(phy_pdcch_config->pdcch_config[n_ss].BWPStart + coreset_start_rb)*12,
-                                    coreset_nb_rb,
-                                    pdcch_est_size,
-                                    pdcch_dl_ch_estimates);
+
+      nr_pdcch_channel_estimation(ue,
+                                  proc,
+                                  gNB_id,
+                                  nr_slot_rx,
+                                  l,
+                                  &phy_pdcch_config->pdcch_config[n_ss].coreset,
+                                  fp->first_carrier_offset,
+                                  phy_pdcch_config->pdcch_config[n_ss].BWPStart,
+                                  pdcch_est_size,
+                                  pdcch_dl_ch_estimates);
 
       stop_meas(&ue->ofdm_demod_stats);
 
@@ -1483,8 +1482,8 @@ int phy_procedures_nrUE_RX(PHY_VARS_NR_UE *ue,
     LOG_D(PHY,"[UE %d] Frame %d, nr_slot_rx %d: found %d DCIs\n", ue->Mod_id, frame_rx, nr_slot_rx, dci_cnt);
 
     NR_UE_DLSCH_t *dlsch = NULL;
-    if (ue->dlsch[proc->thread_id][gNB_id][0]->active == 1){
-      dlsch = ue->dlsch[proc->thread_id][gNB_id][0];
+    if (ue->dlsch[gNB_id][0]->active == 1){
+      dlsch = ue->dlsch[gNB_id][0];
     } else if (ue->dlsch_SI[0]->active == 1){
       dlsch = ue->dlsch_SI[0];
     } else if (ue->dlsch_ra[0]->active == 1){
@@ -1520,18 +1519,18 @@ int phy_procedures_nrUE_RX(PHY_VARS_NR_UE *ue,
   nr_rxtx_thread_data_t *curMsg=(nr_rxtx_thread_data_t *)NotifiedFifoData(newElt);
   curMsg->proc = *proc;
   curMsg->UE = ue;
-  curMsg->ue_sched_mode = ONLY_PUSCH;
+  curMsg->ue_sched_mode = SCHED_PUSCH;
   pushTpool(&(get_nrUE_params()->Tpool), newElt);
   start_meas(&ue->generic_stat);
   // do procedures for C-RNTI
   int ret_pdsch = 0;
-  if (ue->dlsch[proc->thread_id][gNB_id][0]->active == 1) {
+  if (ue->dlsch[gNB_id][0]->active == 1) {
     VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC_C, VCD_FUNCTION_IN);
     ret_pdsch = nr_ue_pdsch_procedures(ue,
                                        proc,
                                        gNB_id,
                                        PDSCH,
-                                       ue->dlsch[proc->thread_id][gNB_id][0],
+                                       ue->dlsch[gNB_id][0],
                                        NULL);
 
     nr_ue_measurement_procedures(2, ue, proc, gNB_id, nr_slot_rx);
@@ -1609,34 +1608,34 @@ int phy_procedures_nrUE_RX(PHY_VARS_NR_UE *ue,
     VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC_RA, VCD_FUNCTION_OUT);
   }
   // do procedures for C-RNTI
-  if (ue->dlsch[proc->thread_id][gNB_id][0]->active == 1) {
+  if (ue->dlsch[gNB_id][0]->active == 1) {
 
     LOG_D(PHY, "DLSCH data reception at nr_slot_rx: %d\n", nr_slot_rx);
     VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC, VCD_FUNCTION_IN);
 
-    start_meas(&ue->dlsch_procedures_stat[proc->thread_id]);
+    start_meas(&ue->dlsch_procedures_stat);
 
     NR_UE_DLSCH_t *dlsch1 = NULL;
     if (NR_MAX_NB_LAYERS>4)
-      dlsch1 = ue->dlsch[proc->thread_id][gNB_id][1];
+      dlsch1 = ue->dlsch[gNB_id][1];
 
     if (ret_pdsch >= 0)
       nr_ue_dlsch_procedures(ue,
 			   proc,
 			   gNB_id,
 			   PDSCH,
-			   ue->dlsch[proc->thread_id][gNB_id][0],
+			   ue->dlsch[gNB_id][0],
 			   dlsch1,
 			   &ue->dlsch_errors[gNB_id]);
 
-  stop_meas(&ue->dlsch_procedures_stat[proc->thread_id]);
+  stop_meas(&ue->dlsch_procedures_stat);
   if (cpumeas(CPUMEAS_GETSTATE)) {
-    LOG_D(PHY, "[SFN %d] Slot1:       Pdsch Proc %5.2f\n",nr_slot_rx,ue->pdsch_procedures_stat[proc->thread_id].p_time/(cpuf*1000.0));
-    LOG_D(PHY, "[SFN %d] Slot0 Slot1: Dlsch Proc %5.2f\n",nr_slot_rx,ue->dlsch_procedures_stat[proc->thread_id].p_time/(cpuf*1000.0));
+    LOG_D(PHY, "[SFN %d] Slot1:       Pdsch Proc %5.2f\n",nr_slot_rx,ue->pdsch_procedures_stat.p_time/(cpuf*1000.0));
+    LOG_D(PHY, "[SFN %d] Slot0 Slot1: Dlsch Proc %5.2f\n",nr_slot_rx,ue->dlsch_procedures_stat.p_time/(cpuf*1000.0));
   }
 
   // deactivate dlsch once dlsch proc is done
-  ue->dlsch[proc->thread_id][gNB_id][0]->active = 0;
+  ue->dlsch[gNB_id][0]->active = 0;
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PDSCH_PROC, VCD_FUNCTION_OUT);
 
@@ -1709,9 +1708,9 @@ int phy_procedures_nrUE_RX(PHY_VARS_NR_UE *ue,
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_RX, VCD_FUNCTION_OUT);
 
-  stop_meas(&ue->phy_proc_rx[proc->thread_id]);
+  stop_meas(&ue->phy_proc_rx);
   if (cpumeas(CPUMEAS_GETSTATE))
-    LOG_D(PHY, "------FULL RX PROC [SFN %d]: %5.2f ------\n",nr_slot_rx,ue->phy_proc_rx[proc->thread_id].p_time/(cpuf*1000.0));
+    LOG_D(PHY, "------FULL RX PROC [SFN %d]: %5.2f ------\n",nr_slot_rx,ue->phy_proc_rx.p_time/(cpuf*1000.0));
 
   LOG_D(PHY," ****** end RX-Chain  for AbsSubframe %d.%d ******  \n", frame_rx%1024, nr_slot_rx);
   return (0);
diff --git a/openair1/SCHED_NR_UE/pucch_uci_ue_nr.c b/openair1/SCHED_NR_UE/pucch_uci_ue_nr.c
index 77ac105e140f3776b26e6ecf290c235c20370862..4e1f8a338a9738250d0ffd9c1196570f0911e984 100644
--- a/openair1/SCHED_NR_UE/pucch_uci_ue_nr.c
+++ b/openair1/SCHED_NR_UE/pucch_uci_ue_nr.c
@@ -203,11 +203,12 @@ void nr_generate_pucch3_4(int32_t **txdataF,
 
 void pucch_procedures_ue_nr(PHY_VARS_NR_UE *ue, 
                             uint8_t gNB_id,
-                            UE_nr_rxtx_proc_t *proc) {
+                            UE_nr_rxtx_proc_t *proc,
+                            nr_phy_data_t *phy_data) {
 
   int       nr_slot_tx = proc->nr_slot_tx;
   fapi_nr_ul_config_pucch_pdu *pucch_pdu;
-  NR_UE_PUCCH *pucch_vars = ue->pucch_vars[proc->thread_id][gNB_id];
+  NR_UE_PUCCH *pucch_vars = &phy_data->pucch_vars;
 
   for (int i=0; i<2; i++) {
     if(pucch_vars->active[i]) {
diff --git a/openair1/SCHED_NR_UE/pucch_uci_ue_nr.h b/openair1/SCHED_NR_UE/pucch_uci_ue_nr.h
index 37e24f43bcfa60bef4fcd794ee1771788606800e..1ff1b7cc5c7a11aece7ebb06582036973320030c 100644
--- a/openair1/SCHED_NR_UE/pucch_uci_ue_nr.h
+++ b/openair1/SCHED_NR_UE/pucch_uci_ue_nr.h
@@ -61,7 +61,8 @@
 
 void pucch_procedures_ue_nr(PHY_VARS_NR_UE *ue, 
                             uint8_t gNB_id,
-                            UE_nr_rxtx_proc_t *proc);
+                            UE_nr_rxtx_proc_t *proc,
+                            nr_phy_data_t *phy_data);
 
 
 void set_csi_nr(int csi_status, uint32_t csi_payload);
diff --git a/openair1/SIMULATION/LTE_PHY/dlsim.c b/openair1/SIMULATION/LTE_PHY/dlsim.c
index fef67c3b920916eb2eed0666ad332a359cfa45fe..e2f78e90368231129771242f78ae8387a581d20a 100644
--- a/openair1/SIMULATION/LTE_PHY/dlsim.c
+++ b/openair1/SIMULATION/LTE_PHY/dlsim.c
@@ -604,7 +604,7 @@ int main(int argc, char **argv) {
   nfapi_tx_request_t TX_req;
   Sched_Rsp_t sched_resp;
   int pa=dB0;
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
   FILE    *proc_fd = NULL;
   char buf[64];
   memset(buf,0,sizeof(buf));
diff --git a/openair1/SIMULATION/LTE_PHY/dlsim_tm4.c b/openair1/SIMULATION/LTE_PHY/dlsim_tm4.c
index 57c06e3b1300c846abf4a32bd31f766ae357fa70..d6a248059c7852f97519c797fde975b265d1a87a 100644
--- a/openair1/SIMULATION/LTE_PHY/dlsim_tm4.c
+++ b/openair1/SIMULATION/LTE_PHY/dlsim_tm4.c
@@ -310,7 +310,7 @@ int main(int argc, char **argv)
 
   opp_enabled=1; // to enable the time meas
 
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
   FILE    *proc_fd = NULL;
   char buf[64];
 
diff --git a/openair1/SIMULATION/LTE_PHY/dlsim_tm7.c b/openair1/SIMULATION/LTE_PHY/dlsim_tm7.c
index bd164150c598a260610f90a2f83ffe87a1ca7c2c..2f254ea3d220923822ec8d077a36a3e9711721d0 100644
--- a/openair1/SIMULATION/LTE_PHY/dlsim_tm7.c
+++ b/openair1/SIMULATION/LTE_PHY/dlsim_tm7.c
@@ -194,7 +194,7 @@ int main(int argc, char **argv) {
   int CCE_table[800];
   int threequarter_fs=0;
   opp_enabled=1; // to enable the time meas
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
   FILE    *proc_fd = NULL;
   char buf[64];
   proc_fd = fopen("/sys/devices/system/cpu/cpu4/cpufreq/cpuinfo_cur_freq", "r");
diff --git a/openair1/SIMULATION/LTE_PHY/framegen.c b/openair1/SIMULATION/LTE_PHY/framegen.c
index b9f0221b2d4ef2479e93452518d6b45bb37d4607..9203f0a0957f8f000ba07d4848faf89eb3d7b9bc 100644
--- a/openair1/SIMULATION/LTE_PHY/framegen.c
+++ b/openair1/SIMULATION/LTE_PHY/framegen.c
@@ -36,7 +36,6 @@
 
 #include "SCHED/defs.h"
 #include "SCHED/vars.h"
-#include "ARCH/CBMIMO1/DEVICE_DRIVER/vars.h"
 
 #include "LAYER2/MAC/defs.h"
 #include "PHY_INTERFACE/defs.h"
diff --git a/openair1/SIMULATION/LTE_PHY/mbmssim.c b/openair1/SIMULATION/LTE_PHY/mbmssim.c
index 5aad30405bf49f487566d9dbdd265b77498ff824..0f7a1375ff85395d4f216441e572c946393a0f49 100644
--- a/openair1/SIMULATION/LTE_PHY/mbmssim.c
+++ b/openair1/SIMULATION/LTE_PHY/mbmssim.c
@@ -679,7 +679,7 @@ int main(int argc, char **argv) {
   nfapi_tx_request_t TX_req;
   Sched_Rsp_t sched_resp;
   int pa=dB0;
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
   FILE    *proc_fd = NULL;
   char buf[64];
   memset(buf,0,sizeof(buf));
diff --git a/openair1/SIMULATION/LTE_PHY/syncsim.c b/openair1/SIMULATION/LTE_PHY/syncsim.c
index e8ee7d3d5e8b086a1804867772a2cffcf367bc94..dfcce8fd0ba40770095ac1ec3bdc17820353b6c4 100644
--- a/openair1/SIMULATION/LTE_PHY/syncsim.c
+++ b/openair1/SIMULATION/LTE_PHY/syncsim.c
@@ -41,9 +41,7 @@
 #endif
 #include "SCHED/defs.h"
 #include "SCHED/vars.h"
-#include "ARCH/CBMIMO1/DEVICE_DRIVER/vars.h"
-#include "ARCH/CBMIMO1/DEVICE_DRIVER/cbmimo1_device.h"
-#include "ARCH/COMMON/defs.h"
+#include "sdr/COMMON/defs.h"
 #include "LAYER2/MAC/vars.h"
 
 #ifdef XFORMS
diff --git a/openair1/SIMULATION/NR_PHY/dlschsim.c b/openair1/SIMULATION/NR_PHY/dlschsim.c
index f429409d5ab4a1993e0b03ca2acf4d1e060d2d3c..768501e99ab88fb610db35b8c6ef3c95c6271261 100644
--- a/openair1/SIMULATION/NR_PHY/dlschsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlschsim.c
@@ -85,31 +85,22 @@ nrUE_params_t *get_nrUE_params(void) {
 int main(int argc, char **argv)
 {
   char c;
-  int i; //,j,l,aa;
+  int i;
   double SNR, SNR_lin, snr0 = -2.0, snr1 = 2.0;
   double snr_step = 0.1;
   uint8_t snr1set = 0;
   int **txdata;
   double **s_re, **s_im, **r_re, **r_im;
-  //  int sync_pos, sync_pos_slot;
-  //  FILE *rx_frame_file;
   FILE *output_fd = NULL;
   //uint8_t write_output_file = 0;
-  //  int subframe_offset;
-  //  char fname[40], vname[40];
   int trial, n_trials = 1, n_errors = 0, n_false_positive = 0;
   uint8_t n_tx = 1, n_rx = 1;
-  //uint8_t transmission_mode = 1;
   uint16_t Nid_cell = 0;
   channel_desc_t *gNB2UE;
   uint8_t extended_prefix_flag = 0;
-  //int8_t interf1 = -21, interf2 = -21;
   FILE *input_fd = NULL, *pbch_file_fd = NULL;
-  //char input_val_str[50],input_val_str2[50];
-  //uint16_t NB_RB=25;
   SCM_t channel_model = AWGN;  //Rayleigh1_anticorr;
   uint16_t N_RB_DL = 106, mu = 1;
-  //unsigned char frame_type = 0;
   unsigned char pbch_phase = 0;
   int frame = 0, slot = 0;
   int frame_length_complex_samples;
@@ -120,7 +111,6 @@ int main(int argc, char **argv)
   double sigma;
   unsigned char qbits = 8;
   int ret;
-  //int run_initial_sync=0;
   int loglvl = OAILOG_WARNING;
   uint8_t dlsch_threads = 0;
   float target_error_rate = 0.01;
@@ -195,14 +185,6 @@ int main(int argc, char **argv)
 
 			break;
 
-		/*case 'i':
-			interf1 = atoi(optarg);
-			break;
-
-		case 'j':
-			interf2 = atoi(optarg);
-			break;*/
-
 		case 'n':
 			n_trials = atoi(optarg);
 			break;
@@ -312,13 +294,10 @@ int main(int argc, char **argv)
 		  gNBthreads[sizeof(gNBthreads)-1]=0;
 		  break;
 
-		/*case 'x':
-			transmission_mode = atoi(optarg);
-			break;*/
 
 		default:
 		case 'h':
-			printf("%s -h(elp) -p(extended_prefix) -N cell_id -f output_filename -F input_filename -g channel_model -n n_frames -t Delayspread -s snr0 -S snr1 -x transmission_mode -y TXant -z RXant -i Intefrence0 -j Interference1 -A interpolation_file -C(alibration offset dB) -N CellId\n", argv[0]);
+			printf("%s -h(elp) -p(extended_prefix) -N cell_id -f output_filename -F input_filename -g channel_model -n n_frames -t Delayspread -s snr0 -S snr1  -y TXant -z RXant -i Intefrence0 -j Interference1 -A interpolation_file -C(alibration offset dB) -N CellId\n", argv[0]);
 			printf("-h This message\n");
 			printf("-p Use extended prefix mode\n");
 			printf("-V Enable VCD dumb functions\n");
@@ -333,7 +312,7 @@ int main(int argc, char **argv)
 			printf("-z Number of RX antennas used in UE\n");
 			//printf("-i Relative strength of first intefering eNB (in dB) - cell_id mod 3 = 1\n");
 			//printf("-j Relative strength of second intefering eNB (in dB) - cell_id mod 3 = 2\n");
-  		    printf("-M Multiple SSB positions in burst\n");
+                        printf("-M Multiple SSB positions in burst\n");
 			printf("-N Nid_cell\n");
 			printf("-R N_RB_DL\n");
 			printf("-O oversampling factor (1,2,4,8,16)\n");
@@ -421,17 +400,15 @@ int main(int argc, char **argv)
 	//nr_init_frame_parms_ue(&UE->frame_parms);
 	//init_nr_ue_transport(UE, 0);
         int num_codeword = NR_MAX_NB_LAYERS > 4? 2:1;
-	for (int sf = 0; sf < 2; sf++) {
-		for (i = 0; i < num_codeword; i++) {
-			UE->dlsch[sf][0][i] = new_nr_ue_dlsch(Kmimo, 8, Nsoft, 5, N_RB_DL);
-			if (!UE->dlsch[sf][0][i]) {
-				printf("Can't get ue dlsch structures\n");
-				exit(-1);
-			}
+  for (i = 0; i < num_codeword; i++) {
+    UE->dlsch[0][i] = new_nr_ue_dlsch(Kmimo, 8, Nsoft, 5, N_RB_DL);
+    if (!UE->dlsch[0][i]) {
+      printf("Can't get ue dlsch structures\n");
+      exit(-1);
+    }
 
-			UE->dlsch[sf][0][i]->rnti = n_rnti;
-		}
-	}
+    UE->dlsch[0][i]->rnti = n_rnti;
+  }
 
 	unsigned char harq_pid = 0; //dlsch->harq_ids[subframe];
         processingData_L1tx_t msgDataTx;
@@ -476,7 +453,7 @@ int main(int argc, char **argv)
 	unsigned char test_input_bit[16 * 68 * 384];
 	//estimated_output = (unsigned char *) malloc16(sizeof(unsigned char) * 16 * 68 * 384);
 	unsigned char estimated_output_bit[16 * 68 * 384];
-	NR_UE_DLSCH_t *dlsch0_ue = UE->dlsch[0][0][0];
+	NR_UE_DLSCH_t *dlsch0_ue = UE->dlsch[0][0];
 	NR_DL_UE_HARQ_t *harq_process = dlsch0_ue->harq_processes[harq_pid];
 	harq_process->mcs = Imcs;
 	harq_process->mcs_table = mcs_table;
@@ -638,9 +615,8 @@ int main(int argc, char **argv)
   free(RC.gNB);
 
   int num_cw = NR_MAX_NB_LAYERS > 4? 2:1;
-  for (int sf = 0; sf < 2; sf++)
-    for (int i = 0; i < num_cw; i++)
-      free_nr_ue_dlsch(&UE->dlsch[sf][0][i], N_RB_DL);
+  for (int i = 0; i < num_cw; i++)
+    free_nr_ue_dlsch(&UE->dlsch[0][i], N_RB_DL);
   term_nr_ue_signal(UE, 1);
   free(UE);
 
diff --git a/openair1/SIMULATION/NR_PHY/dlsim.c b/openair1/SIMULATION/NR_PHY/dlsim.c
index 4836a570f39658d0c8faff1f2dc66a62e31d63c1..59a74c9e91829f4329be1c4920a80c6d9676f80b 100644
--- a/openair1/SIMULATION/NR_PHY/dlsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlsim.c
@@ -314,15 +314,6 @@ void nr_dlsim_preprocessor(module_id_t module_id,
   AssertFatal(CCEIndex>=0, "%4d.%2d could not find CCE for DL DCI UE %d/RNTI %04x\n", frame, slot, 0, UE_info->rnti);
   sched_ctrl->cce_index = CCEIndex;
 
-  NR_pdsch_semi_static_t *ps = &sched_ctrl->pdsch_semi_static;
-
-  nr_set_pdsch_semi_static(current_BWP,
-                           scc,
-                           /* tda = */ 0,
-                           g_nrOfLayers,
-                           sched_ctrl,
-                           ps);
-
   NR_sched_pdsch_t *sched_pdsch = &sched_ctrl->sched_pdsch;
   sched_pdsch->rbStart = g_rbStart;
   sched_pdsch->rbSize = g_rbSize;
@@ -331,17 +322,26 @@ void nr_dlsim_preprocessor(module_id_t module_id,
   /* the following might override the table that is mandated by RRC
    * configuration */
   current_BWP->mcsTableIdx = g_mcsTableIdx;
+  sched_pdsch->time_domain_allocation = get_dl_tda(RC.nrmac[module_id], scc, slot);
+  AssertFatal(sched_pdsch->time_domain_allocation>=0,"Unable to find PDSCH time domain allocation in list\n");
+
+  sched_pdsch->tda_info = nr_get_pdsch_tda_info(current_BWP, sched_pdsch->time_domain_allocation);
+
+  sched_pdsch->dmrs_parms = get_dl_dmrs_params(scc,
+                                               current_BWP,
+                                               &sched_pdsch->tda_info,
+                                               sched_pdsch->nrOfLayers);
 
   sched_pdsch->Qm = nr_get_Qm_dl(sched_pdsch->mcs, current_BWP->mcsTableIdx);
   sched_pdsch->R = nr_get_code_rate_dl(sched_pdsch->mcs, current_BWP->mcsTableIdx);
   sched_pdsch->tb_size = nr_compute_tbs(sched_pdsch->Qm,
                                         sched_pdsch->R,
                                         sched_pdsch->rbSize,
-                                        ps->nrOfSymbols,
-                                        ps->N_PRB_DMRS * ps->N_DMRS_SLOT,
+                                        sched_pdsch->tda_info.nrOfSymbols,
+                                        sched_pdsch->dmrs_parms.N_PRB_DMRS * sched_pdsch->dmrs_parms.N_DMRS_SLOT,
                                         0 /* N_PRB_oh, 0 for initialBWP */,
                                         0 /* tb_scaling */,
-                                        ps->nrOfLayers) >> 3;
+                                        sched_pdsch->nrOfLayers) >> 3;
 
   /* the simulator assumes the HARQ PID is equal to the slot number */
   sched_pdsch->dl_harq_pid = slot;
@@ -1028,7 +1028,6 @@ int main(int argc, char **argv)
   scheduled_response.CC_id     = 0;
   scheduled_response.frame = frame;
   scheduled_response.slot  = slot;
-  scheduled_response.thread_id = 0;
   scheduled_response.phy_data = &phy_pdcch_config;
 
   nr_ue_phy_config_request(&UE_mac->phy_config);
@@ -1077,14 +1076,13 @@ int main(int argc, char **argv)
       //multipath_channel(gNB2UE,s_re,s_im,r_re,r_im,frame_length_complex_samples,0);
 
       UE->rx_offset=0;
-      UE_proc.thread_id  = 0;
       UE_proc.frame_rx   = frame;
       UE_proc.nr_slot_rx = slot;
       
       dcireq.frame     = frame;
       dcireq.slot      = slot;
 
-      NR_UE_DLSCH_t *dlsch0 = UE->dlsch[UE_proc.thread_id][0][0];
+      NR_UE_DLSCH_t *dlsch0 = UE->dlsch[0][0];
 
       int harq_pid = slot;
       NR_DL_UE_HARQ_t *UE_harq_process = dlsch0->harq_processes[harq_pid];
@@ -1269,11 +1267,11 @@ int main(int argc, char **argv)
         //---------------------- count errors ----------------------
         //----------------------------------------------------------
 
-        if (UE->dlsch[UE_proc.thread_id][0][0]->last_iteration_cnt >=
-          UE->dlsch[UE_proc.thread_id][0][0]->max_ldpc_iterations+1)
+        if (UE->dlsch[0][0]->last_iteration_cnt >=
+          UE->dlsch[0][0]->max_ldpc_iterations+1)
           n_errors[round][snrRun]++;
 
-        NR_UE_PDSCH **pdsch_vars = UE->pdsch_vars[UE_proc.thread_id];
+        NR_UE_PDSCH **pdsch_vars = UE->pdsch_vars;
         int16_t *UE_llr = pdsch_vars[0]->llr[0];
 
         TBS                  = UE_harq_process->TBS;//rel15->TBSize[0];
@@ -1407,9 +1405,9 @@ int main(int argc, char **argv)
       printStatIndent(&UE->dlsch_unscrambling_stats,"DLSCH unscrambling time");
       printStatIndent(&UE->dlsch_rate_unmatching_stats,"DLSCH Rate Unmatching");
       printf("|__ DLSCH Turbo Decoding(%d bits), avg iterations: %.1f       %.2f us (%d cycles, %d trials)\n",
-	     UE->dlsch[UE_proc.thread_id][0][0]->harq_processes[0]->Cminus ?
-	     UE->dlsch[UE_proc.thread_id][0][0]->harq_processes[0]->Kminus :
-	     UE->dlsch[UE_proc.thread_id][0][0]->harq_processes[0]->Kplus,
+	     UE->dlsch[0][0]->harq_processes[0]->Cminus ?
+	     UE->dlsch[0][0]->harq_processes[0]->Kminus :
+	     UE->dlsch[0][0]->harq_processes[0]->Kplus,
 	     UE->dlsch_tc_intl1_stats.trials/(double)UE->dlsch_tc_init_stats.trials,
 	     (double)UE->dlsch_turbo_decoding_stats.diff/UE->dlsch_turbo_decoding_stats.trials*timeBase,
 	     (int)((double)UE->dlsch_turbo_decoding_stats.diff/UE->dlsch_turbo_decoding_stats.trials),
@@ -1429,11 +1427,11 @@ int main(int argc, char **argv)
       LOG_M("rxsig0.m","rxs0", UE->common_vars.rxdata[0], frame_length_complex_samples, 1, 1);
       if (UE->frame_parms.nb_antennas_rx>1)
 	LOG_M("rxsig1.m","rxs1", UE->common_vars.rxdata[1], frame_length_complex_samples, 1, 1);
-      LOG_M("rxF0.m","rxF0", UE->common_vars.common_vars_rx_data_per_thread[UE_proc.thread_id].rxdataF[0], frame_parms->samples_per_slot_wCP, 1, 1);
-      LOG_M("rxF_ext.m","rxFe",&UE->pdsch_vars[0][0]->rxdataF_ext[0][0],g_rbSize*12*14,1,1);
-      LOG_M("chestF0.m","chF0",&UE->pdsch_vars[0][0]->dl_ch_estimates_ext[0][0],g_rbSize*12*14,1,1);
-      write_output("rxF_comp.m","rxFc",&UE->pdsch_vars[0][0]->rxdataF_comp0[0][0],N_RB_DL*12*14,1,1);
-      LOG_M("rxF_llr.m","rxFllr",UE->pdsch_vars[UE_proc.thread_id][0]->llr[0],available_bits,1,0);
+      LOG_M("rxF0.m","rxF0", UE->common_vars.rxdataF[0], frame_parms->samples_per_slot_wCP, 1, 1);
+      LOG_M("rxF_ext.m","rxFe",&UE->pdsch_vars[0]->rxdataF_ext[0][0],g_rbSize*12*14,1,1);
+      LOG_M("chestF0.m","chF0",&UE->pdsch_vars[0]->dl_ch_estimates_ext[0][0],g_rbSize*12*14,1,1);
+      write_output("rxF_comp.m","rxFc",&UE->pdsch_vars[0]->rxdataF_comp0[0][0],N_RB_DL*12*14,1,1);
+      LOG_M("rxF_llr.m","rxFllr",UE->pdsch_vars[0]->llr[0],available_bits,1,0);
       break;
     }
 
diff --git a/openair1/SIMULATION/NR_PHY/nr_dummy_functions.c b/openair1/SIMULATION/NR_PHY/nr_dummy_functions.c
index 75e3d998ff9a1fb5771e99f86d30a4f6cdffb181..ca9cf6111519e5fe08f80c6627abc47f836d78bd 100644
--- a/openair1/SIMULATION/NR_PHY/nr_dummy_functions.c
+++ b/openair1/SIMULATION/NR_PHY/nr_dummy_functions.c
@@ -5,25 +5,73 @@ int oai_nfapi_hi_dci0_req(nfapi_hi_dci0_request_t *hi_dci0_req)             { re
 int oai_nfapi_tx_req(nfapi_tx_request_t *tx_req)                            { return(0);  }
 int oai_nfapi_dl_config_req(nfapi_dl_config_request_t *dl_config_req)       { return(0);  }
 //int oai_nfapi_ul_config_req(nfapi_ul_config_request_t *ul_config_req)       { return(0);  }
-int oai_nfapi_dl_tti_req(nfapi_nr_dl_tti_request_t *dl_config_req) { return(0);  }
-int oai_nfapi_tx_data_req(nfapi_nr_tx_data_request_t *tx_data_req){ return(0);  }
-int oai_nfapi_ul_dci_req(nfapi_nr_ul_dci_request_t *ul_dci_req){ return(0);  }
-int oai_nfapi_ul_tti_req(nfapi_nr_ul_tti_request_t *ul_tti_req){ return(0);  }
-int oai_nfapi_nr_rx_data_indication(nfapi_nr_rx_data_indication_t *ind) { return(0);  }
-int oai_nfapi_nr_crc_indication(nfapi_nr_crc_indication_t *ind) { return(0);  }
-int oai_nfapi_nr_srs_indication(nfapi_nr_srs_indication_t *ind) { return(0);  }
-int oai_nfapi_nr_uci_indication(nfapi_nr_uci_indication_t *ind) { return(0);  }
-int oai_nfapi_nr_rach_indication(nfapi_nr_rach_indication_t *ind) { return(0);  }
+int oai_nfapi_dl_tti_req(nfapi_nr_dl_tti_request_t *dl_config_req)
+{
+  return (0);
+}
+int oai_nfapi_tx_data_req(nfapi_nr_tx_data_request_t *tx_data_req)
+{
+  return (0);
+}
+int oai_nfapi_ul_dci_req(nfapi_nr_ul_dci_request_t *ul_dci_req)
+{
+  return (0);
+}
+int oai_nfapi_ul_tti_req(nfapi_nr_ul_tti_request_t *ul_tti_req)
+{
+  return (0);
+}
+int oai_nfapi_nr_rx_data_indication(nfapi_nr_rx_data_indication_t *ind)
+{
+  return (0);
+}
+int oai_nfapi_nr_crc_indication(nfapi_nr_crc_indication_t *ind)
+{
+  return (0);
+}
+int oai_nfapi_nr_srs_indication(nfapi_nr_srs_indication_t *ind)
+{
+  return (0);
+}
+int oai_nfapi_nr_uci_indication(nfapi_nr_uci_indication_t *ind)
+{
+  return (0);
+}
+int oai_nfapi_nr_rach_indication(nfapi_nr_rach_indication_t *ind)
+{
+  return (0);
+}
+
+int pack_nr_srs_beamforming_report(void *pMessageBuf, void *pPackedBuf, uint32_t packedBufLen)
+{
+  return 0;
+}
+int pack_nr_srs_normalized_channel_iq_matrix(void *pMessageBuf, void *pPackedBuf, uint32_t packedBufLen)
+{
+  return 0;
+}
 
-int32_t get_uldl_offset(int nr_bandP)                                       { return(0);  }
-NR_IF_Module_t *NR_IF_Module_init(int Mod_id)                               {return(NULL);}
+int32_t get_uldl_offset(int nr_bandP)
+{
+  return (0);
+}
+NR_IF_Module_t *NR_IF_Module_init(int Mod_id)
+{
+  return (NULL);
+}
 nfapi_mode_t nfapi_mod;
-nfapi_mode_t nfapi_getmode(void) {
+nfapi_mode_t nfapi_getmode(void)
+{
   return nfapi_mod;
 }
-void nfapi_setmode(nfapi_mode_t nfapi_mode) {}
+void nfapi_setmode(nfapi_mode_t nfapi_mode)
+{
+}
 
-int dummy_nr_ue_dl_indication(nr_downlink_indication_t *dl_info)            { return(0);  }
+int dummy_nr_ue_dl_indication(nr_downlink_indication_t *dl_info)
+{
+  return (0);
+}
 int dummy_nr_ue_ul_indication(nr_uplink_indication_t *ul_info)              { return(0);  }
 void nr_fill_dl_indication(nr_downlink_indication_t *dl_ind,
                            fapi_nr_dci_indication_t *dci_ind,
diff --git a/openair1/SIMULATION/NR_PHY/nr_unitary_defs.h b/openair1/SIMULATION/NR_PHY/nr_unitary_defs.h
index d3af2acd77f14041ed64cb44593eb5582588cf28..c556f6636191b633844e87a2095012578504f174 100644
--- a/openair1/SIMULATION/NR_PHY/nr_unitary_defs.h
+++ b/openair1/SIMULATION/NR_PHY/nr_unitary_defs.h
@@ -63,7 +63,6 @@ void fill_scc_sim(NR_ServingCellConfigCommon_t *scc,uint64_t *ssb_bitmap,int N_R
 void fix_scc(NR_ServingCellConfigCommon_t *scc,uint64_t ssbmap);
 void prepare_scc(NR_ServingCellConfigCommon_t *scc);
 void prepare_scd(NR_ServingCellConfig_t *scd);
-ngap_gNB_config_t ngap_config;
 uint32_t ngap_generate_gNB_id(void) {return 0;}
 void configure_nfapi_pnf(char *vnf_ip_addr, int vnf_p5_port, char *pnf_ip_addr, int pnf_p7_port, int vnf_p7_port) { return;}
 void configure_nfapi_vnf(char *vnf_addr, int vnf_p5_port) { return;}
diff --git a/openair1/SIMULATION/NR_PHY/ulschsim.c b/openair1/SIMULATION/NR_PHY/ulschsim.c
index 6ef8bff116bf3b09b33afa9e620eba8e46262ff0..91377e3d31c0ff195c76813465ad4e326548c4a8 100644
--- a/openair1/SIMULATION/NR_PHY/ulschsim.c
+++ b/openair1/SIMULATION/NR_PHY/ulschsim.c
@@ -121,7 +121,7 @@ nrUE_params_t *get_nrUE_params(void) {
 int main(int argc, char **argv)
 {
   char c;
-  int i,sf;
+  int i;
   double SNR, snr0 = -2.0, snr1 = 2.0, SNR_lin;
   double snr_step = 0.1;
   uint8_t snr1set = 0;
@@ -428,12 +428,10 @@ int main(int argc, char **argv)
     exit(-1);
   }
 
-  for (sf = 0; sf < 2; sf++) {
-    UE->ulsch[sf][0] = new_nr_ue_ulsch(N_RB_UL, 8, frame_parms);
-    if (!UE->ulsch[sf][0]) {
-      printf("Can't get ue ulsch structures.\n");
-      exit(-1);
-    }
+  UE->ulsch[0] = new_nr_ue_ulsch(N_RB_UL, 8, frame_parms);
+  if (!UE->ulsch[0]) {
+    printf("Can't get ue ulsch structures.\n");
+    exit(-1);
   }
 
   unsigned char harq_pid = 0;
@@ -451,7 +449,7 @@ int main(int argc, char **argv)
   NR_UL_gNB_HARQ_t *harq_process_gNB = ulsch_gNB->harq_processes[harq_pid];
   nfapi_nr_pusch_pdu_t *rel15_ul = &harq_process_gNB->ulsch_pdu;
 
-  NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[0][0];
+  NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[0];
 
   if ((Nl==4)||(Nl==3))
     nb_re_dmrs = nb_re_dmrs*2;
@@ -636,8 +634,7 @@ int main(int argc, char **argv)
     printf("\n");
   }
 
-  for (sf = 0; sf < 2; sf++)
-    free_nr_ue_ulsch(&UE->ulsch[sf][0], N_RB_UL, frame_parms);
+  free_nr_ue_ulsch(&UE->ulsch[0], N_RB_UL, frame_parms);
 
   term_nr_ue_signal(UE, 1);
   free(UE);
diff --git a/openair1/SIMULATION/NR_PHY/ulsim.c b/openair1/SIMULATION/NR_PHY/ulsim.c
index fb15cc1c878dfb326ca47d4eca8623a6a3ec4c50..dd6e30302defdbec32c8606bd33830d011372bb4 100644
--- a/openair1/SIMULATION/NR_PHY/ulsim.c
+++ b/openair1/SIMULATION/NR_PHY/ulsim.c
@@ -868,7 +868,7 @@ int main(int argc, char **argv)
 
   nfapi_nr_pusch_pdu_t  *pusch_pdu = &UL_tti_req->pdus_list[0].pusch_pdu;
 
-  NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[0][0];
+  NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[0];
 
   unsigned char *estimated_output_bit;
   unsigned char *test_input_bit;
@@ -1104,7 +1104,6 @@ int main(int argc, char **argv)
       gNB->ulsch[0]->harq_processes[harq_pid]->round = round;
       rv_index = nr_rv_round_map[round];
 
-      UE_proc.thread_id = 0;
       UE_proc.nr_slot_tx = slot;
       UE_proc.frame_tx = frame;
 
@@ -1187,7 +1186,6 @@ int main(int argc, char **argv)
       scheduled_response.CC_id = 0;
       scheduled_response.frame = frame;
       scheduled_response.slot = slot;
-      scheduled_response.thread_id = UE_proc.thread_id;
       scheduled_response.dl_config = NULL;
       scheduled_response.ul_config = &ul_config;
       scheduled_response.tx_request = &tx_req;
@@ -1409,13 +1407,8 @@ int main(int argc, char **argv)
 
 
     if (n_trials == 1  && round==0) {
-#ifdef __AVX2__
       __attribute__((unused))
       int off = ((nb_rb&1) == 1)? 4:0;
-#else
-      __attribute__((unused))
-      int off = 0;
-#endif
 
       LOG_M("rxsigF0_ext.m","rxsF0_ext",
             &gNB->pusch_vars[0]->rxdataF_ext[0][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
diff --git a/openair1/SIMULATION/TOOLS/DOC/rtusage.md b/openair1/SIMULATION/TOOLS/DOC/rtusage.md
index 37fc62b496e9aa9769de4e2285f481c5f7ebe2d1..13a54c258c8113150f5f9fcf201ce412d6e6fb5b 100644
--- a/openair1/SIMULATION/TOOLS/DOC/rtusage.md
+++ b/openair1/SIMULATION/TOOLS/DOC/rtusage.md
@@ -63,7 +63,7 @@ softmodem_enb>
 
 
 
-The [rfsimulator documentation](../../../../targets/ARCH/rfsimulator/README.md ) has also some specific information when using the channel simulation via this tool. 
+The [rfsimulator documentation](../../../../sdr/rfsimulator/README.md ) has also some specific information when using the channel simulation via this tool. 
 
 [channel simulation main page](channel_simulation.md)
-[oai Wikis home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home)
\ No newline at end of file
+[oai Wikis home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home)
diff --git a/openair2/COMMON/ngap_messages_types.h b/openair2/COMMON/ngap_messages_types.h
index ca7dd0b0bb2c08ef0227c9a53db8a59863fc6650..961599a9ede79eccab633e0e981b9f80602a8d34 100644
--- a/openair2/COMMON/ngap_messages_types.h
+++ b/openair2/COMMON/ngap_messages_types.h
@@ -133,6 +133,18 @@ typedef struct ngap_ambr_s {
 typedef enum ngap_priority_level_s {
   NGAP_PRIORITY_LEVEL_SPARE       = 0,
   NGAP_PRIORITY_LEVEL_HIGHEST     = 1,
+  NGAP_PRIORITY_LEVEL_2           = 2,
+  NGAP_PRIORITY_LEVEL_3           = 3,
+  NGAP_PRIORITY_LEVEL_4           = 4,
+  NGAP_PRIORITY_LEVEL_5           = 5,
+  NGAP_PRIORITY_LEVEL_6           = 6,
+  NGAP_PRIORITY_LEVEL_7           = 7,
+  NGAP_PRIORITY_LEVEL_8           = 8,
+  NGAP_PRIORITY_LEVEL_9           = 9,
+  NGAP_PRIORITY_LEVEL_10          = 10,
+  NGAP_PRIORITY_LEVEL_11          = 11,
+  NGAP_PRIORITY_LEVEL_12          = 12,
+  NGAP_PRIORITY_LEVEL_13          = 13,
   NGAP_PRIORITY_LEVEL_LOWEST      = 14,
   NGAP_PRIORITY_LEVEL_NO_PRIORITY = 15
 } ngap_priority_level_t;
@@ -281,6 +293,8 @@ typedef struct pdusession_s {
   ngap_transport_layer_addr_t      upf_addr;
   /* S-GW Tunnel endpoint identifier */
   uint32_t                         gtp_teid;
+  /* Stores the DRB ID of the DRBs used by this PDU Session */
+  uint8_t                          used_drbs[NGAP_MAX_DRBS_PER_UE];
 } pdusession_t;
 
 
diff --git a/openair2/COMMON/platform_constants.h b/openair2/COMMON/platform_constants.h
index d40965052899ee87e6c1fe15e40764372f25f555..f053fe7f1e5f69eed557cc92bb54b636be123653 100644
--- a/openair2/COMMON/platform_constants.h
+++ b/openair2/COMMON/platform_constants.h
@@ -102,6 +102,9 @@
 #define NB_RB_MAX      (LTE_maxDRB + 3) /* was 11, now 14, maxDRB comes from asn1_constants.h, + 3 because of 3 SRB, one invisible id 0, then id 1 and 2 */
 #define NR_NB_RB_MAX   (NR_maxDRB + 3)
 
+#define NGAP_MAX_PDU_SESSION (256)  /* As defined in TS 38.413 9.2.1.1 Range Bound for PDU Sessions. */
+#define NGAP_MAX_DRBS_PER_UE (32)   /* As defined in TS 38.413 9.2.1.1 - maxnoofDRBs */
+
 #define NB_RB_MBMS_MAX (LTE_maxSessionPerPMCH*LTE_maxServiceCount)
 
 #define NB_RAB_MAX     LTE_maxDRB       /* was 8, now 11 */
diff --git a/openair2/COMMON/prs_nr_paramdef.h b/openair2/COMMON/prs_nr_paramdef.h
new file mode 100644
index 0000000000000000000000000000000000000000..fe16b01fb22e7fc0cc7e4ef21e91309b32f43710
--- /dev/null
+++ b/openair2/COMMON/prs_nr_paramdef.h
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file openair2/COMMON/prs_nr_paramdef.f
+ * \brief definition of configuration parameters for PRS 
+ * \author
+ * \date 2022
+ * \version 0.1
+ * \company EURECOM
+ * \email:
+ * \note
+ * \warning
+ */
+
+#ifndef __PRS_NR_PARAMDEF__H__
+#define __PRS_NR_PARAMDEF__H__
+
+/*-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------*/
+
+/* PRS configuration section names */
+#define CONFIG_STRING_PRS_LIST                              "PRSs"
+#define CONFIG_STRING_PRS_CONFIG                            "prs_config"
+
+
+/* Global parameters */
+#define CONFIG_STRING_ACTIVE_GNBs                           "Active_gNBs"
+#define HELP_STRING_ACTIVE_GNBs                             "Number of active gNBs simultaneously transmitting PRS signal to a UE\n"
+/*----------------------------------------------------------------------------------------------------------------------------------------------------*/
+/*                                            PRS configuration          parameters                                                                   */
+/*   optname                                         helpstr            paramflags    XXXptr              defXXXval                type        numelt */
+/*----------------------------------------------------------------------------------------------------------------------------------------------------*/
+#define PRS_GLOBAL_PARAMS_DESC { \
+{CONFIG_STRING_ACTIVE_GNBs,                   HELP_STRING_ACTIVE_GNBs,      0,        uptr:NULL,         defuintval:0,           TYPE_UINT,     0}    \
+}
+
+#define PRS_ACTIVE_GNBS_IDX                          0
+/*----------------------------------------------------------------------------------------------------------------------------------------------------*/
+
+/* PRS configuration parameters names */
+#define CONFIG_STRING_GNB_ID                                "gNB_id"
+#define CONFIG_STRING_NUM_PRS_RESOURCES                     "NumPRSResources"
+#define CONFIG_STRING_PRS_RESOURCE_SET_PERIOD_LIST          "PRSResourceSetPeriod"
+#define CONFIG_STRING_PRS_SYMBOL_START_LIST                 "SymbolStart"
+#define CONFIG_STRING_PRS_NUM_SYMBOLS_LIST                  "NumPRSSymbols"
+#define CONFIG_STRING_PRS_NUM_RB                            "NumRB"
+#define CONFIG_STRING_PRS_RB_OFFSET                         "RBOffset"
+#define CONFIG_STRING_PRS_COMB_SIZE                         "CombSize"
+#define CONFIG_STRING_PRS_RE_OFFSET_LIST                    "REOffset"
+#define CONFIG_STRING_PRS_RESOURCE_OFFSET_LIST              "PRSResourceOffset"
+#define CONFIG_STRING_PRS_RESOURCE_REPETITION               "PRSResourceRepetition"
+#define CONFIG_STRING_PRS_RESOURCE_TIME_GAP                 "PRSResourceTimeGap"
+#define CONFIG_STRING_PRS_ID_LIST                           "NPRS_ID"
+#define CONFIG_STRING_PRS_MUTING_PATTERN1_LIST              "MutingPattern1"
+#define CONFIG_STRING_PRS_MUTING_PATTERN2_LIST              "MutingPattern2"
+#define CONFIG_STRING_PRS_MUTING_BIT_REPETITION             "MutingBitRepetition"
+
+/* Help string for PRS parameters */
+#define HELP_STRING_GNB_ID                                  "gNB index for UE (<= CombSize)\n"
+#define HELP_STRING_NUM_PRS_RESOURCES                       "Number of PRS resources in a PRS resource set\n"
+#define HELP_STRING_PRS_RESOURCE_SET_PERIOD_LIST            "[slot period, slot offset] of a PRS resource set\n"
+#define HELP_STRING_PRS_SYMBOL_START_LIST                   "Starting OFDM symbol of each PRS resource in a PRS resource set\n"
+#define HELP_STRING_PRS_NUM_SYMBOLS_LIST                    "Number of OFDM symbols in a slot for each PRS resource in a PRS resource set\n"
+#define HELP_STRING_PRS_NUM_RB                              "Number of PRBs allocated to all PRS resources in a PRS resource set (<= 272 and multiples of 4)\n"
+#define HELP_STRING_PRS_RB_OFFSET                           "Starting PRB index of all PRS resources in a PRS resource set\n"
+#define HELP_STRING_PRS_COMB_SIZE                           "RE density of all PRS resources in a PRS resource set (2, 4, 6, 12)\n"
+#define HELP_STRING_PRS_RE_OFFSET_LIST                      "Starting RE offset in the first OFDM symbol of each PRS resource in a PRS resource set\n"
+#define HELP_STRING_PRS_RESOURCE_OFFSET_LIST                "Slot offset of each PRS resource defined relative to the slot offset of the PRS resource set (0...511)\n"
+#define HELP_STRING_PRS_RESOURCE_REPETITION                 "Repetition factor for all PRS resources in resource set (1 /*default*/, 2, 4, 6, 8, 16, 32)\n"
+#define HELP_STRING_PRS_RESOURCE_TIME_GAP                   "Slot offset between two consecutive repetition indices of all PRS resources in a PRS resource set (1 /*default*/, 2, 4, 6, 8, 16, 32)\n"
+#define HELP_STRING_PRS_ID_LIST                             "Sequence identity of each PRS resource in a PRS resource set, specified in the range [0, 4095]\n"
+#define HELP_STRING_PRS_MUTING_PATTERN1_LIST                "Muting bit pattern option-1, specified as [] or a binary-valued vector of length 2, 4, 6, 8, 16, or 32\n"
+#define HELP_STRING_PRS_MUTING_PATTERN2_LIST                "Muting bit pattern option-2, specified as [] or a binary-valued vector of length 2, 4, 6, 8, 16, or 32\n"
+#define HELP_STRING_PRS_MUTING_BIT_REPETITION               "Muting bit repetition factor, specified as 1, 2, 4, or 8\n"
+
+
+/*----------------------------------------------------------------------------------------------------------------------------------------------------------------*/
+/*                                            PRS configuration                parameters                                                                         */
+/*   optname                                         helpstr                  paramflags    XXXptr              defXXXval                  type           numelt  */
+/*----------------------------------------------------------------------------------------------------------------------------------------------------------------*/
+#define PRS_PARAMS_DESC { \
+{CONFIG_STRING_GNB_ID,                        HELP_STRING_GNB_ID,                    0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0},  \
+{CONFIG_STRING_NUM_PRS_RESOURCES,             HELP_STRING_NUM_PRS_RESOURCES,         0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0},  \
+{CONFIG_STRING_PRS_RESOURCE_SET_PERIOD_LIST,  HELP_STRING_PRS_RESOURCE_SET_PERIOD_LIST, 0,   uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_SYMBOL_START_LIST,         HELP_STRING_PRS_SYMBOL_START_LIST,     0,      uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_NUM_SYMBOLS_LIST,          HELP_STRING_PRS_NUM_SYMBOLS_LIST,      0,      uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_NUM_RB,                    HELP_STRING_PRS_NUM_RB,                0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0},  \
+{CONFIG_STRING_PRS_RB_OFFSET,                 HELP_STRING_PRS_RB_OFFSET,             0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0},  \
+{CONFIG_STRING_PRS_COMB_SIZE,                 HELP_STRING_PRS_COMB_SIZE,             0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0},  \
+{CONFIG_STRING_PRS_RE_OFFSET_LIST,            HELP_STRING_PRS_RE_OFFSET_LIST,        0,      uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_RESOURCE_OFFSET_LIST,      HELP_STRING_PRS_RESOURCE_OFFSET_LIST,  0,      uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_RESOURCE_REPETITION,       HELP_STRING_PRS_RESOURCE_REPETITION,   0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0},  \
+{CONFIG_STRING_PRS_RESOURCE_TIME_GAP,         HELP_STRING_PRS_RESOURCE_TIME_GAP,     0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0},  \
+{CONFIG_STRING_PRS_ID_LIST,                   HELP_STRING_PRS_ID_LIST,               0,      uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_MUTING_PATTERN1_LIST,      HELP_STRING_PRS_MUTING_PATTERN1_LIST,  0,      uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_MUTING_PATTERN2_LIST,      HELP_STRING_PRS_MUTING_PATTERN2_LIST,  0,      uptr:NULL,         defintarrayval:0,          TYPE_UINTARRAY,  0},  \
+{CONFIG_STRING_PRS_MUTING_BIT_REPETITION,     HELP_STRING_PRS_MUTING_BIT_REPETITION, 0,      uptr:NULL,         defuintval:0,              TYPE_UINT,       0}   \
+}
+
+#define PRS_GNB_ID                                   0
+#define NUM_PRS_RESOURCES                            1
+#define PRS_RESOURCE_SET_PERIOD_LIST                 2
+#define PRS_SYMBOL_START_LIST                        3
+#define PRS_NUM_SYMBOLS_LIST                         4
+#define PRS_NUM_RB                                   5
+#define PRS_RB_OFFSET                                6
+#define PRS_COMB_SIZE                                7
+#define PRS_RE_OFFSET_LIST                           8
+#define PRS_RESOURCE_OFFSET_LIST                     9
+#define PRS_RESOURCE_REPETITION                      10
+#define PRS_RESOURCE_TIME_GAP                        11
+#define PRS_ID_LIST                                  12
+#define PRS_MUTING_PATTERN1_LIST                     13
+#define PRS_MUTING_PATTERN2_LIST                     14
+#define PRS_MUTING_BIT_REPETITION                    15
+
+/*----------------------------------------------------------------------------------------------------------------------------------------------------*/
+
+#endif
diff --git a/openair2/COMMON/rrc_messages_types.h b/openair2/COMMON/rrc_messages_types.h
index 18e7c4bd18b08296e2d567eb214e03f3b0d81374..64f3b68bfb2c89f692f6d179d8ea304ecb89bdf7 100644
--- a/openair2/COMMON/rrc_messages_types.h
+++ b/openair2/COMMON/rrc_messages_types.h
@@ -427,6 +427,7 @@ typedef struct NRRrcConfigurationReq_s {
   int                     pusch_TargetSNRx10;
   int                     pucch_TargetSNRx10;
   bool                    enable_sdap;
+  int                     drbs;
 } gNB_RrcConfigurationReq;
 
 typedef struct NRDuDlReq_s {
diff --git a/openair2/ENB_APP/NB_IoT_config.c b/openair2/ENB_APP/NB_IoT_config.c
index 26ba8989e17ae258a29d8490c6ad0e956e5cb03b..617954bf3a04909eddbb113ad7f9b244b54b20ea 100644
--- a/openair2/ENB_APP/NB_IoT_config.c
+++ b/openair2/ENB_APP/NB_IoT_config.c
@@ -39,7 +39,7 @@
 #include "SystemInformationBlockType2.h"
 
 #include "PHY/phy_extern.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 #include "common/config/config_userapi.h"
 #include "RRC_config_tools.h"
 #include "RRC_paramsvalues.h"
diff --git a/openair2/ENB_APP/enb_config.c b/openair2/ENB_APP/enb_config.c
index bce15f9050b25e46a9b32cf52f1f616b6156739b..5e2ce872dbfcb57885783ce28b03b7ed2773adf6 100644
--- a/openair2/ENB_APP/enb_config.c
+++ b/openair2/ENB_APP/enb_config.c
@@ -46,7 +46,7 @@
 #include "LAYER2/MAC/mac_proto.h"
 #include "PHY/phy_extern.h"
 #include "PHY/INIT/phy_init.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 #include "nfapi_vnf.h"
 #include "nfapi_pnf.h"
 #include "targets/RT/USER/lte-softmodem.h"
diff --git a/openair2/ENB_APP/enb_paramdef.h b/openair2/ENB_APP/enb_paramdef.h
index 0f7e762810d252abb5d7f36e6454defe921a51e2..20a5807616d84c3f03e474e3022b3432a98ef7b9 100644
--- a/openair2/ENB_APP/enb_paramdef.h
+++ b/openair2/ENB_APP/enb_paramdef.h
@@ -103,6 +103,8 @@ typedef enum {
 #define CONFIG_STRING_RU_SL_AHEAD                 "sl_ahead"
 #define CONFIG_STRING_RU_NR_FLAG                  "nr_flag"
 #define CONFIG_STRING_RU_NR_SCS_FOR_RASTER        "nr_scs_for_raster"
+#define CONFIG_STRING_RU_TX_SUBDEV                "tx_subdev"
+#define CONFIG_STRING_RU_RX_SUBDEV                "rx_subdev"
 #define CONFIG_STRING_RU_RXFH_CORE_ID             "rxfh_core_id"
 #define CONFIG_STRING_RU_TXFH_CORE_ID             "txfh_core_id"
 #define CONFIG_STRING_RU_TP_CORES                 "tp_cores"
@@ -154,12 +156,14 @@ typedef enum {
 #define RU_SL_AHEAD                   30 
 #define RU_NR_FLAG                    31 
 #define RU_NR_SCS_FOR_RASTER          32
-#define RU_RXFH_CORE_ID               33
-#define RU_TXFH_CORE_ID               34
-#define RU_TP_CORES                   35
-#define RU_NUM_TP_CORES               36
-#define RU_NUM_INTERFACES             37
-#define RU_HALF_SLOT_PARALLELIZATION  38
+#define RU_TX_SUBDEV                  33
+#define RU_RX_SUBDEV                  34
+#define RU_RXFH_CORE_ID               35
+#define RU_TXFH_CORE_ID               36
+#define RU_TP_CORES                   37
+#define RU_NUM_TP_CORES               38
+#define RU_NUM_INTERFACES             39
+#define RU_HALF_SLOT_PARALLELIZATION  40
 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
 /*                                            RU configuration parameters                                                                  */
 /*   optname                                   helpstr   paramflags    XXXptr          defXXXval                   type      numelt        */
@@ -198,6 +202,8 @@ typedef enum {
     {CONFIG_STRING_RU_SL_AHEAD,          HLP_RU_SL_AHEAD,      0,       iptr:NULL,       defintval:6,             TYPE_INT,         0}, \
     {CONFIG_STRING_RU_NR_FLAG,           HLP_RU_NR_FLAG,       0,       iptr:NULL,       defintval:0,             TYPE_INT,         0}, \
     {CONFIG_STRING_RU_NR_SCS_FOR_RASTER, HLP_RU_NR_SCS_FOR_RASTER, 0,   iptr:NULL,       defintval:1,             TYPE_INT,         0}, \
+    {CONFIG_STRING_RU_TX_SUBDEV,                   NULL,       0,       strptr:NULL,     defstrval:"",            TYPE_STRING,      0}, \
+    {CONFIG_STRING_RU_RX_SUBDEV,                   NULL,       0,       strptr:NULL,     defstrval:"",            TYPE_STRING,      0}, \
     {CONFIG_STRING_RU_RXFH_CORE_ID, HLP_RU_RXFH_CORE_ID,       0,       uptr:NULL,       defintval:0,             TYPE_UINT,         0}, \
     {CONFIG_STRING_RU_TXFH_CORE_ID, HLP_RU_TXFH_CORE_ID,       0,       uptr:NULL,       defintval:0,             TYPE_UINT,         0}, \
     {CONFIG_STRING_RU_TP_CORES, HLP_RU_TP_CORES,               0,       uptr:NULL,       defintarrayval:DEFRUTPCORES,  TYPE_INTARRAY,    8}, \
diff --git a/openair2/GNB_APP/gnb_app.c b/openair2/GNB_APP/gnb_app.c
index 821e2def84afca4c56a60feffaef75f987423c9c..9263525b318a527ceff50ede286429e4940dc807 100644
--- a/openair2/GNB_APP/gnb_app.c
+++ b/openair2/GNB_APP/gnb_app.c
@@ -88,7 +88,7 @@ static uint32_t gNB_app_register(uint32_t gnb_id_start, uint32_t gnb_id_end)//,
 
   for (gnb_id = gnb_id_start; (gnb_id < gnb_id_end) ; gnb_id++) {
     {
-      if(NGAP_CONF_MODE){
+      if(get_softmodem_params()->sa){
         ngap_register_gnb_req_t *ngap_register_gNB; //Type Temporarily reuse
           
         // note:  there is an implicit relationship between the data structure and the message name
@@ -155,6 +155,7 @@ void *gNB_app_task(void *args_p)
   LOG_I(PHY, "%s() Task ready initialize structures\n", __FUNCTION__);
 
   RCconfig_NR_L1();
+  RCconfig_nr_prs();
 
   if (RC.nb_nr_macrlc_inst>0) RCconfig_nr_macrlc();
 
@@ -187,7 +188,7 @@ void *gNB_app_task(void *args_p)
 
   /* For the CU case the gNB registration with the AMF might have to take place after the F1 setup, as the PLMN info
      * can originate from the DU. Add check on whether x2ap is enabled to account for ENDC NSA scenario.*/
-  if ((AMF_MODE_ENABLED || is_x2ap_enabled()) && !NODE_IS_DU(RC.nrrrc[0]->node_type) ) { //&& !NODE_IS_CU(RC.nrrrc[0]->node_type)) {
+  if ((get_softmodem_params()->sa || is_x2ap_enabled()) && !NODE_IS_DU(RC.nrrrc[0]->node_type) ) { //&& !NODE_IS_CU(RC.nrrrc[0]->node_type)) {
     /* Try to register each gNB */
     //registered_gnb = 0;
     __attribute__((unused)) uint32_t register_gnb_pending = gNB_app_register (gnb_id_start, gnb_id_end);
diff --git a/openair2/GNB_APP/gnb_config.c b/openair2/GNB_APP/gnb_config.c
index ef610f6f64168495fd709118eb6a24143ba2aee2..bbd1098887c63576ca142b14f107e92bc95f9c00 100644
--- a/openair2/GNB_APP/gnb_config.c
+++ b/openair2/GNB_APP/gnb_config.c
@@ -34,6 +34,7 @@
 #include "common/utils/nr/nr_common.h"
 #include "common/utils/LOG/log_extern.h"
 #include "assertions.h"
+#include "executables/softmodem-common.h"
 #include "gnb_config.h"
 #include "gnb_paramdef.h"
 #include "enb_paramdef.h"
@@ -49,11 +50,12 @@
 // #include "LAYER2/MAC/extern.h"
 // #include "LAYER2/MAC/proto.h"
 #include "PHY/INIT/phy_init.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 #include "nfapi_vnf.h"
 #include "nfapi_pnf.h"
 
 //#include "L1_paramdef.h"
+#include "prs_nr_paramdef.h"
 #include "L1_nr_paramdef.h"
 #include "MACRLC_nr_paramdef.h"
 #include "common/config/config_userapi.h"
@@ -639,7 +641,7 @@ void RCconfig_nr_flexran()
     /* gNB ID from configuration, as read in by RCconfig_RRC() */
     if (!GNBParamList.paramarray[i][GNB_GNB_ID_IDX].uptr) {
       // Calculate a default gNB ID
-    if (AMF_MODE_ENABLED) 
+    if (get_softmodem_params()->sa) 
       gnb_id = i + (ngap_generate_gNB_id () & 0xFFFFFF8);
     else
       gnb_id = i;
@@ -670,6 +672,100 @@ void RCconfig_nr_flexran()
   }
 }
 
+void RCconfig_nr_prs(void)
+{
+  uint16_t  j = 0, k = 0;
+  prs_config_t *prs_config = NULL;
+  char str[7][100] = {0};
+
+  paramdef_t PRS_Params[] = PRS_PARAMS_DESC;
+  paramlist_def_t PRS_ParamList = {CONFIG_STRING_PRS_CONFIG,NULL,0};
+  if (RC.gNB == NULL) {
+    RC.gNB                       = (PHY_VARS_gNB **)malloc((1+NUMBER_OF_gNB_MAX)*sizeof(PHY_VARS_gNB*));
+    LOG_I(NR_PHY,"RC.gNB = %p\n",RC.gNB);
+    memset(RC.gNB,0,(1+NUMBER_OF_gNB_MAX)*sizeof(PHY_VARS_gNB*));
+  }
+
+  config_getlist( &PRS_ParamList,PRS_Params,sizeof(PRS_Params)/sizeof(paramdef_t), NULL);
+
+  if (PRS_ParamList.numelt > 0) {
+    for (j = 0; j < RC.nb_nr_L1_inst; j++) {
+
+      if (RC.gNB[j] == NULL) {
+        RC.gNB[j]                       = (PHY_VARS_gNB *)malloc(sizeof(PHY_VARS_gNB));
+        LOG_I(NR_PHY,"RC.gNB[%d] = %p\n",j,RC.gNB[j]);
+        memset(RC.gNB[j],0,sizeof(PHY_VARS_gNB));
+	      RC.gNB[j]->Mod_id  = j;
+      }
+
+      RC.gNB[j]->prs_vars.NumPRSResources = *(PRS_ParamList.paramarray[j][NUM_PRS_RESOURCES].uptr);
+      for (k = 0; k < RC.gNB[j]->prs_vars.NumPRSResources; k++)
+      {
+        prs_config = &RC.gNB[j]->prs_vars.prs_cfg[k];
+        prs_config->PRSResourceSetPeriod[0]  = PRS_ParamList.paramarray[j][PRS_RESOURCE_SET_PERIOD_LIST].uptr[0];
+        prs_config->PRSResourceSetPeriod[1]  = PRS_ParamList.paramarray[j][PRS_RESOURCE_SET_PERIOD_LIST].uptr[1];
+        // per PRS resources parameters
+        prs_config->SymbolStart              = PRS_ParamList.paramarray[j][PRS_SYMBOL_START_LIST].uptr[k];
+        prs_config->NumPRSSymbols            = PRS_ParamList.paramarray[j][PRS_NUM_SYMBOLS_LIST].uptr[k];
+        prs_config->REOffset                 = PRS_ParamList.paramarray[j][PRS_RE_OFFSET_LIST].uptr[k];
+        prs_config->PRSResourceOffset        = PRS_ParamList.paramarray[j][PRS_RESOURCE_OFFSET_LIST].uptr[k];
+        prs_config->NPRSID                   = PRS_ParamList.paramarray[j][PRS_ID_LIST].uptr[k];
+        // Common parameters to all PRS resources
+        prs_config->NumRB                    = *(PRS_ParamList.paramarray[j][PRS_NUM_RB].uptr);
+        prs_config->RBOffset                 = *(PRS_ParamList.paramarray[j][PRS_RB_OFFSET].uptr);
+        prs_config->CombSize                 = *(PRS_ParamList.paramarray[j][PRS_COMB_SIZE].uptr);
+        prs_config->PRSResourceRepetition    = *(PRS_ParamList.paramarray[j][PRS_RESOURCE_REPETITION].uptr);
+        prs_config->PRSResourceTimeGap       = *(PRS_ParamList.paramarray[j][PRS_RESOURCE_TIME_GAP].uptr);
+        prs_config->MutingBitRepetition      = *(PRS_ParamList.paramarray[j][PRS_MUTING_BIT_REPETITION].uptr);
+        for (int l = 0; l < PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN1_LIST].numelt; l++)
+        {
+          prs_config->MutingPattern1[l]      = PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN1_LIST].uptr[l];
+          if (k == 0) // print only for 0th resource 
+            snprintf(str[5]+strlen(str[5]),sizeof(str[5])-strlen(str[5]),"%d, ",prs_config->MutingPattern1[l]);
+        }
+        for (int l = 0; l < PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN2_LIST].numelt; l++)
+        {
+          prs_config->MutingPattern2[l]      = PRS_ParamList.paramarray[j][PRS_MUTING_PATTERN2_LIST].uptr[l];
+          if (k == 0) // print only for 0th resource
+            snprintf(str[6]+strlen(str[6]),sizeof(str[6])-strlen(str[6]),"%d, ",prs_config->MutingPattern2[l]);
+        }
+
+        // print to buffer
+        snprintf(str[0]+strlen(str[0]),sizeof(str[0])-strlen(str[0]),"%d, ",prs_config->SymbolStart);
+        snprintf(str[1]+strlen(str[1]),sizeof(str[1])-strlen(str[1]),"%d, ",prs_config->NumPRSSymbols);
+        snprintf(str[2]+strlen(str[2]),sizeof(str[2])-strlen(str[2]),"%d, ",prs_config->REOffset);
+        snprintf(str[3]+strlen(str[3]),sizeof(str[3])-strlen(str[3]),"%d, ",prs_config->PRSResourceOffset);
+        snprintf(str[4]+strlen(str[4]),sizeof(str[4])-strlen(str[4]),"%d, ",prs_config->NPRSID);
+      } // for k
+
+      prs_config = &RC.gNB[j]->prs_vars.prs_cfg[0];
+      LOG_I(PHY, "-----------------------------------------\n");
+      LOG_I(PHY, "PRS Config for gNB_id %d @ %p\n", j, prs_config);
+      LOG_I(PHY, "-----------------------------------------\n");
+      LOG_I(PHY, "NumPRSResources \t%d\n", RC.gNB[j]->prs_vars.NumPRSResources);
+      LOG_I(PHY, "PRSResourceSetPeriod \t[%d, %d]\n", prs_config->PRSResourceSetPeriod[0], prs_config->PRSResourceSetPeriod[1]);
+      LOG_I(PHY, "NumRB \t\t\t%d\n", prs_config->NumRB);
+      LOG_I(PHY, "RBOffset \t\t%d\n", prs_config->RBOffset);
+      LOG_I(PHY, "CombSize \t\t%d\n", prs_config->CombSize);
+      LOG_I(PHY, "PRSResourceRepetition \t%d\n", prs_config->PRSResourceRepetition);
+      LOG_I(PHY, "PRSResourceTimeGap \t%d\n", prs_config->PRSResourceTimeGap);
+      LOG_I(PHY, "MutingBitRepetition \t%d\n", prs_config->MutingBitRepetition);
+      LOG_I(PHY, "SymbolStart \t\t[%s\b\b]\n", str[0]);
+      LOG_I(PHY, "NumPRSSymbols \t\t[%s\b\b]\n", str[1]);
+      LOG_I(PHY, "REOffset \t\t[%s\b\b]\n", str[2]);
+      LOG_I(PHY, "PRSResourceOffset \t[%s\b\b]\n", str[3]);
+      LOG_I(PHY, "NPRS_ID \t\t[%s\b\b]\n", str[4]);
+      LOG_I(PHY, "MutingPattern1 \t\t[%s\b\b]\n", str[5]);
+      LOG_I(PHY, "MutingPattern2 \t\t[%s\b\b]\n", str[6]);
+      LOG_I(PHY, "-----------------------------------------\n");
+    } // for j
+  }
+  else
+  {
+    LOG_E(PHY,"No " CONFIG_STRING_PRS_CONFIG " configuration found..!!\n");
+  }
+}
+
 void RCconfig_NR_L1(void) {
   int j;
   paramdef_t GNBSParams[] = GNBSPARAMS_DESC;
@@ -1065,24 +1161,6 @@ void RCconfig_NRRRC(MessageDef *msg_p, uint32_t i, gNB_RRC_INST *rrc) {
   num_gnbs = GNBSParams[GNB_ACTIVE_GNBS_IDX].numelt;
   AssertFatal (i<num_gnbs,"Failed to parse config file no %ith element in %s \n",i, GNB_CONFIG_STRING_ACTIVE_GNBS);
 
-  /*
-  if (AMF_MODE_ENABLED) {
-    if (strcasecmp( *(GNBSParams[GNB_ASN1_VERBOSITY_IDX].strptr), GNB_CONFIG_STRING_ASN1_VERBOSITY_NONE) == 0) {
-      asn_debug      = 0;
-      asn1_xer_print = 0;
-    } else if (strcasecmp( *(GNBSParams[GNB_ASN1_VERBOSITY_IDX].strptr), GNB_CONFIG_STRING_ASN1_VERBOSITY_INFO) == 0) {
-      asn_debug      = 1;
-      asn1_xer_print = 1;
-    } else if (strcasecmp(*(GNBSParams[GNB_ASN1_VERBOSITY_IDX].strptr) , GNB_CONFIG_STRING_ASN1_VERBOSITY_ANNOYING) == 0) {
-      asn_debug      = 1;
-      asn1_xer_print = 2;
-    } else {
-      asn_debug      = 0;
-      asn1_xer_print = 0;
-    }
-  }
-  */
-
   if (num_gnbs>0) {
 
     // Output a list of all gNBs. ////////// Identification parameters
@@ -1090,7 +1168,7 @@ void RCconfig_NRRRC(MessageDef *msg_p, uint32_t i, gNB_RRC_INST *rrc) {
     
     if (GNBParamList.paramarray[i][GNB_GNB_ID_IDX].uptr == NULL) {
     // Calculate a default gNB ID
-      if (AMF_MODE_ENABLED) { 
+      if (get_softmodem_params()->sa) { 
         uint32_t hash;
         hash = ngap_generate_gNB_id ();
         gnb_id = i + (hash & 0xFFFFFF8);
@@ -1267,6 +1345,8 @@ void RCconfig_NRRRC(MessageDef *msg_p, uint32_t i, gNB_RRC_INST *rrc) {
         NRRRC_CONFIGURATION_REQ (msg_p).scd = scd;
         NRRRC_CONFIGURATION_REQ (msg_p).enable_sdap = *GNBParamList.paramarray[i][GNB_ENABLE_SDAP_IDX].iptr;
         LOG_I(GNB_APP, "SDAP layer is %s\n", NRRRC_CONFIGURATION_REQ (msg_p).enable_sdap ? "enabled" : "disabled");
+        NRRRC_CONFIGURATION_REQ (msg_p).drbs = *GNBParamList.paramarray[i][GNB_DRBS].iptr;
+        LOG_I(GNB_APP, "Data Radio Bearer count %d\n", NRRRC_CONFIGURATION_REQ (msg_p).drbs);
 
       }//
     }//End for (k=0; k <num_gnbs ; k++)
@@ -1350,26 +1430,8 @@ int RCconfig_NR_NG(MessageDef *msg_p, uint32_t i) {
 
   /* get global parameters, defined outside any section in the config file */
   config_get( GNBSParams,sizeof(GNBSParams)/sizeof(paramdef_t),NULL); 
-
-  /*
-  if (AMF_MODE_ENABLED) {
-    if (strcasecmp( *(GNBSParams[GNB_ASN1_VERBOSITY_IDX].strptr), GNB_CONFIG_STRING_ASN1_VERBOSITY_NONE) == 0) {
-      asn_debug      = 0;
-      asn1_xer_print = 0;
-    } else if (strcasecmp( *(GNBSParams[GNB_ASN1_VERBOSITY_IDX].strptr), GNB_CONFIG_STRING_ASN1_VERBOSITY_INFO) == 0) {
-      asn_debug      = 1;
-      asn1_xer_print = 1;
-    } else if (strcasecmp(*(GNBSParams[GNB_ASN1_VERBOSITY_IDX].strptr) , GNB_CONFIG_STRING_ASN1_VERBOSITY_ANNOYING) == 0) {
-      asn_debug      = 1;
-      asn1_xer_print = 2;
-    } else {
-      asn_debug      = 0;
-      asn1_xer_print = 0;
-    }
-  }
-  */
   
-    AssertFatal (i<GNBSParams[GNB_ACTIVE_GNBS_IDX].numelt,
+  AssertFatal (i<GNBSParams[GNB_ACTIVE_GNBS_IDX].numelt,
      "Failed to parse config file %s, %uth attribute %s \n",
      RC.config_file_name, i, GNB_CONFIG_STRING_ACTIVE_GNBS);
     
@@ -1381,7 +1443,7 @@ int RCconfig_NR_NG(MessageDef *msg_p, uint32_t i) {
       for (k = 0; k < GNBParamList.numelt; k++) {
         if (GNBParamList.paramarray[k][GNB_GNB_ID_IDX].uptr == NULL) {
           // Calculate a default gNB ID
-          if (AMF_MODE_ENABLED) {
+          if (get_softmodem_params()->sa) {
             uint32_t hash;
           
           hash = ngap_generate_gNB_id ();
@@ -1537,7 +1599,7 @@ int RCconfig_NR_NG(MessageDef *msg_p, uint32_t i) {
             // SCTP SETTING
             NGAP_REGISTER_GNB_REQ (msg_p).sctp_out_streams = SCTP_OUT_STREAMS;
             NGAP_REGISTER_GNB_REQ (msg_p).sctp_in_streams  = SCTP_IN_STREAMS;
-            if (AMF_MODE_ENABLED) {
+            if (get_softmodem_params()->sa) {
               sprintf(aprefix,"%s.[%i].%s",GNB_CONFIG_STRING_GNB_LIST,k,GNB_CONFIG_STRING_SCTP_CONFIG);
               config_get( SCTPParams,sizeof(SCTPParams)/sizeof(paramdef_t),aprefix); 
               NGAP_REGISTER_GNB_REQ (msg_p).sctp_in_streams = (uint16_t)*(SCTPParams[GNB_SCTP_INSTREAMS_IDX].uptr);
@@ -1661,7 +1723,7 @@ int RCconfig_NR_X2(MessageDef *msg_p, uint32_t i) {
       for (k = 0; k < GNBParamList.numelt; k++) {
         if (GNBParamList.paramarray[k][GNB_GNB_ID_IDX].uptr == NULL) {
           // Calculate a default eNB ID
-          if (AMF_MODE_ENABLED) {
+          if (get_softmodem_params()->sa) {
             uint32_t hash;
             hash = ngap_generate_gNB_id ();
             gnb_id = k + (hash & 0xFFFFFF8);
@@ -1804,7 +1866,7 @@ int RCconfig_NR_X2(MessageDef *msg_p, uint32_t i) {
             X2AP_REGISTER_ENB_REQ (msg_p).sctp_out_streams = SCTP_OUT_STREAMS;
             X2AP_REGISTER_ENB_REQ (msg_p).sctp_in_streams  = SCTP_IN_STREAMS;
 
-            if (AMF_MODE_ENABLED) {
+            if (get_softmodem_params()->sa) {
               sprintf(aprefix,"%s.[%i].%s",GNB_CONFIG_STRING_GNB_LIST,k,GNB_CONFIG_STRING_SCTP_CONFIG);
               config_get( SCTPParams,sizeof(SCTPParams)/sizeof(paramdef_t),aprefix);
               X2AP_REGISTER_ENB_REQ (msg_p).sctp_in_streams = (uint16_t)*(SCTPParams[GNB_SCTP_INSTREAMS_IDX].uptr);
@@ -2297,6 +2359,7 @@ void nr_read_config_and_init(void) {
   uint32_t    gnb_nb = RC.nb_nr_inst;
 
   RCconfig_NR_L1();
+  RCconfig_nr_prs();
   RCconfig_nr_macrlc();
 
   LOG_I(PHY, "%s() RC.nb_nr_L1_inst:%d\n", __FUNCTION__, RC.nb_nr_L1_inst);
diff --git a/openair2/GNB_APP/gnb_config.h b/openair2/GNB_APP/gnb_config.h
index 013997f4397e34bf748ccb59014308af4c3b9e53..b75277545c9e944122b41a70427a45308753d42f 100644
--- a/openair2/GNB_APP/gnb_config.h
+++ b/openair2/GNB_APP/gnb_config.h
@@ -92,6 +92,7 @@ typedef struct ru_config_s {
 */
 extern void NRRCconfig_RU(void);
 extern void RCconfig_nr_flexran(void);
+extern void RCconfig_nr_prs(void);
 extern void RCconfig_NR_L1(void);
 extern void RCconfig_nr_macrlc(void);
 extern int  RCconfig_nr_gtpu(void );
diff --git a/openair2/GNB_APP/gnb_paramdef.h b/openair2/GNB_APP/gnb_paramdef.h
index 9572d16d8f4c1fce65fee032bf6752b60d6bfdb4..43780429790be58e9fd0e8e34a69cb93af9eedde 100644
--- a/openair2/GNB_APP/gnb_paramdef.h
+++ b/openair2/GNB_APP/gnb_paramdef.h
@@ -128,6 +128,8 @@ typedef enum {
 #define GNB_CONFIG_STRING_ENABLE_SDAP                   "enable_sdap"
 #define GNB_CONFIG_HLP_STRING_ENABLE_SDAP               "enable the SDAP layer\n"
 #define GNB_CONFIG_HLP_FORCE256QAMOFF                   "suppress activation of 256 QAM despite UE support"
+#define GNB_CONFIG_STRING_DRBS                          "drbs"
+#define GNB_CONFIG_HLP_STRING_DRBS                      "Number of total DRBs to establish, including the mandatory for PDU SEssion (default=1)\n"
 
 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
 /*                                            cell configuration parameters                                                                */
@@ -161,6 +163,7 @@ typedef enum {
 {GNB_CONFIG_STRING_UMONDEFAULTDRB,               NULL,   0,            uptr:NULL,   defuintval:0,                TYPE_UINT,      0},  \
 {GNB_CONFIG_STRING_FORCE256QAMOFF, GNB_CONFIG_HLP_FORCE256QAMOFF, PARAMFLAG_BOOL, iptr:NULL, defintval:0,        TYPE_INT,       0},  \
 {GNB_CONFIG_STRING_ENABLE_SDAP, GNB_CONFIG_HLP_STRING_ENABLE_SDAP, PARAMFLAG_BOOL, iptr:NULL, defintval:0,       TYPE_INT,       0},  \
+{GNB_CONFIG_STRING_DRBS, GNB_CONFIG_HLP_STRING_DRBS,     0,            iptr:NULL,   defintval:1,                 TYPE_INT,       0},  \
 }
 
 #define GNB_GNB_ID_IDX                  0
@@ -190,6 +193,7 @@ typedef enum {
 #define GNB_UMONDEFAULTDRB_IDX          24
 #define GNB_FORCE256QAMOFF_IDX          25
 #define GNB_ENABLE_SDAP_IDX             26
+#define GNB_DRBS                        27
 
 #define TRACKING_AREA_CODE_OKRANGE {0x0001,0xFFFD}
 #define GNBPARAMS_CHECK {                                         \
diff --git a/openair2/LAYER2/MAC/mac.h b/openair2/LAYER2/MAC/mac.h
index 1fd7182c6f9d5ab0b4affc4eb50a71cea2044ceb..12ffaf4a5b782d1bdbf57eda7dafe35fe55ff827 100644
--- a/openair2/LAYER2/MAC/mac.h
+++ b/openair2/LAYER2/MAC/mac.h
@@ -74,7 +74,7 @@
 #include "PHY/defs_common.h" // for PRACH_RESOURCES_t
 #include "PHY/LTE_TRANSPORT/transport_common.h"
 
-#include "targets/ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 
 /** @defgroup _mac  MAC
  * @ingroup _oai2
diff --git a/openair2/LAYER2/NR_MAC_COMMON/nr_mac.h b/openair2/LAYER2/NR_MAC_COMMON/nr_mac.h
index 01c1d39de91bff49ec13574fff6ed77799fa4e26..ef9e6360b6ae9f3bfdcd6e68cef091d3314cc6d6 100644
--- a/openair2/LAYER2/NR_MAC_COMMON/nr_mac.h
+++ b/openair2/LAYER2/NR_MAC_COMMON/nr_mac.h
@@ -540,5 +540,29 @@ typedef struct nr_csi_report {
   int N2;
 } nr_csi_report_t;
 
+typedef enum {
+  NR_SRS_SRI_0 = 0,
+  NR_SRS_SRI_1,
+  NR_SRS_SRI_2,
+  NR_SRS_SRI_3,
+  NR_SRS_SRI_0_1,
+  NR_SRS_SRI_0_2,
+  NR_SRS_SRI_0_3,
+  NR_SRS_SRI_1_2,
+  NR_SRS_SRI_1_3,
+  NR_SRS_SRI_2_3,
+  NR_SRS_SRI_0_1_2,
+  NR_SRS_SRI_0_1_3,
+  NR_SRS_SRI_0_2_3,
+  NR_SRS_SRI_1_2_3,
+  NR_SRS_SRI_0_1_2_3
+} nr_srs_sri_t;
+
+typedef struct nr_srs_feedback {
+  uint8_t sri;
+  uint8_t ul_ri;
+  uint8_t tpmi;
+} nr_srs_feedback_t;
+
 #endif /*__LAYER2_MAC_H__ */
 
diff --git a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
index 4b4e1f34b1661f6a183ac453b0a427c53425e6eb..94bb084f89b1235f1c5a9cd2ea64a35ea927af02 100644
--- a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
+++ b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
@@ -312,6 +312,80 @@ const uint8_t table_5_1_2_1_1_5_time_dom_res_alloc_C_dmrs_typeA_pos3[16][4]={
     {1,0,2,6}   // row index 16
 };
 
+// TS 38.211 - Table 6.3.1.5-1: Precoding matrix W for single-layer transmission using two antenna ports, 'n' = -1 and 'o' = -j
+const char table_38211_6_3_1_5_1[6][2][1] = {
+    {{'1'}, {'0'}}, // tpmi 0
+    {{'0'}, {'1'}}, // tpmi 1
+    {{'1'}, {'1'}}, // tpmi 2
+    {{'1'}, {'n'}}, // tpmi 3
+    {{'1'}, {'j'}}, // tpmi 4
+    {{'1'}, {'o'}}  // tpmi 5
+};
+
+// TS 38.211 - Table 6.3.1.5-2: Precoding matrix W for single-layer transmission using four antenna ports with transform precoding enabled, 'n' = -1 and 'o' = -j
+const char table_38211_6_3_1_5_2[28][4][1] = {
+    {{'1'}, {'0'}, {'0'}, {'0'}}, // tpmi 0
+    {{'0'}, {'1'}, {'0'}, {'0'}}, // tpmi 1
+    {{'0'}, {'0'}, {'1'}, {'0'}}, // tpmi 2
+    {{'0'}, {'0'}, {'0'}, {'1'}}, // tpmi 3
+    {{'1'}, {'0'}, {'1'}, {'0'}}, // tpmi 4
+    {{'1'}, {'0'}, {'n'}, {'0'}}, // tpmi 5
+    {{'1'}, {'0'}, {'j'}, {'0'}}, // tpmi 6
+    {{'1'}, {'0'}, {'o'}, {'0'}}, // tpmi 7
+    {{'0'}, {'1'}, {'0'}, {'1'}}, // tpmi 8
+    {{'0'}, {'1'}, {'0'}, {'n'}}, // tpmi 9
+    {{'0'}, {'1'}, {'0'}, {'j'}}, // tpmi 10
+    {{'0'}, {'1'}, {'0'}, {'o'}}, // tpmi 11
+    {{'1'}, {'1'}, {'1'}, {'n'}}, // tpmi 12
+    {{'1'}, {'1'}, {'j'}, {'j'}}, // tpmi 13
+    {{'1'}, {'1'}, {'n'}, {'1'}}, // tpmi 14
+    {{'1'}, {'1'}, {'o'}, {'o'}}, // tpmi 15
+    {{'1'}, {'j'}, {'1'}, {'j'}}, // tpmi 16
+    {{'1'}, {'j'}, {'j'}, {'1'}}, // tpmi 17
+    {{'1'}, {'j'}, {'n'}, {'o'}}, // tpmi 18
+    {{'1'}, {'j'}, {'o'}, {'n'}}, // tpmi 19
+    {{'1'}, {'n'}, {'1'}, {'1'}}, // tpmi 20
+    {{'1'}, {'n'}, {'j'}, {'o'}}, // tpmi 21
+    {{'1'}, {'n'}, {'n'}, {'n'}}, // tpmi 22
+    {{'1'}, {'n'}, {'o'}, {'j'}}, // tpmi 23
+    {{'1'}, {'o'}, {'1'}, {'o'}}, // tpmi 24
+    {{'1'}, {'o'}, {'j'}, {'n'}}, // tpmi 25
+    {{'1'}, {'o'}, {'n'}, {'j'}}, // tpmi 26
+    {{'1'}, {'o'}, {'o'}, {'1'}}  // tpmi 27
+};
+
+// TS 38.211 - Table 6.3.1.5-3: Precoding matrix W for single-layer transmission using four antenna ports with transform precoding disabled, 'n' = -1 and 'o' = -j
+const char table_38211_6_3_1_5_3[28][4][1] = {
+    {{'1'}, {'0'}, {'0'}, {'0'}}, // tpmi 0
+    {{'0'}, {'1'}, {'0'}, {'0'}}, // tpmi 1
+    {{'0'}, {'0'}, {'1'}, {'0'}}, // tpmi 2
+    {{'0'}, {'0'}, {'0'}, {'1'}}, // tpmi 3
+    {{'1'}, {'0'}, {'1'}, {'0'}}, // tpmi 4
+    {{'1'}, {'0'}, {'n'}, {'0'}}, // tpmi 5
+    {{'1'}, {'0'}, {'j'}, {'0'}}, // tpmi 6
+    {{'1'}, {'0'}, {'o'}, {'0'}}, // tpmi 7
+    {{'0'}, {'1'}, {'0'}, {'1'}}, // tpmi 8
+    {{'0'}, {'1'}, {'0'}, {'n'}}, // tpmi 9
+    {{'0'}, {'1'}, {'0'}, {'j'}}, // tpmi 10
+    {{'0'}, {'1'}, {'0'}, {'o'}}, // tpmi 11
+    {{'1'}, {'1'}, {'1'}, {'1'}}, // tpmi 12
+    {{'1'}, {'1'}, {'j'}, {'j'}}, // tpmi 13
+    {{'1'}, {'1'}, {'n'}, {'n'}}, // tpmi 14
+    {{'1'}, {'1'}, {'o'}, {'o'}}, // tpmi 15
+    {{'1'}, {'j'}, {'1'}, {'j'}}, // tpmi 16
+    {{'1'}, {'j'}, {'j'}, {'n'}}, // tpmi 17
+    {{'1'}, {'j'}, {'n'}, {'o'}}, // tpmi 18
+    {{'1'}, {'j'}, {'o'}, {'1'}}, // tpmi 19
+    {{'1'}, {'n'}, {'1'}, {'n'}}, // tpmi 20
+    {{'1'}, {'n'}, {'j'}, {'o'}}, // tpmi 21
+    {{'1'}, {'n'}, {'n'}, {'1'}}, // tpmi 22
+    {{'1'}, {'n'}, {'o'}, {'j'}}, // tpmi 23
+    {{'1'}, {'o'}, {'1'}, {'o'}}, // tpmi 24
+    {{'1'}, {'o'}, {'j'}, {'1'}}, // tpmi 25
+    {{'1'}, {'o'}, {'n'}, {'j'}}, // tpmi 26
+    {{'1'}, {'o'}, {'o'}, {'n'}}  // tpmi 27
+};
+
 void get_info_from_tda_tables(int default_abc,
                               int tda,
                               int dmrs_TypeA_Position,
@@ -1956,6 +2030,46 @@ int32_t table_6_4_1_1_3_4_pusch_dmrs_positions_l [12][8] = {
 {0,         3072,          -1,         -1,          3,       1539,        -1,         -1},       //14              // (DMRS l' position)
 };
 
+// TS 38.212
+uint16_t table_7_3_1_1_2_2_1layer[28] = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47};
+uint16_t table_7_3_1_1_2_2_2layers[22] = {4, 5, 6, 7, 8, 9, 20, 21, 22, 23, 24, 25, 26, 27, 48, 49, 50, 51, 52, 53, 54, 55};
+uint16_t table_7_3_1_1_2_2_3layers[7] = {10, 28, 29, 56, 57, 58, 59};
+uint16_t table_7_3_1_1_2_2_4layers[5] = {11, 30, 31, 60, 61};
+uint16_t table_7_3_1_1_2_2B_1layer[16] = {0, 1, 2, 3, 15, 16, 17, 18, 19, 20, 21, 22, 23, 12, 24, 25};
+uint16_t table_7_3_1_1_2_2B_2layers[14] = {4, 5, 6, 7, 8, 9, 13, 26, 27, 28, 29, 30, 31, 32};
+uint16_t table_7_3_1_1_2_2B_3layers[3] = {10, 14, 33};
+uint16_t table_7_3_1_1_2_2B_4layers[3] = {11, 34, 35};
+uint16_t table_7_3_1_1_2_2A_1layer[16] = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 10, 21, 22};
+uint16_t table_7_3_1_1_2_2A_2layers[14] = {4, 5, 6, 7, 8, 9, 11, 23, 24, 25, 26, 27, 28, 29};
+uint16_t table_7_3_1_1_2_3A[16] = {0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 4, 14, 15};
+uint16_t table_7_3_1_1_2_4_1layer_fullyAndPartialAndNonCoherent[6] = {0, 1, 3, 4, 5, 6};
+uint16_t table_7_3_1_1_2_4_2layers_fullyAndPartialAndNonCoherent[3] = {2, 7, 8};
+uint16_t table_7_3_1_1_2_4A_1layer[3] = {0, 1, 3};
+uint16_t table_7_3_1_1_2_28[3][15] = {
+    {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+};
+uint16_t table_7_3_1_1_2_29[3][15] = {
+    {0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 0, 3, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0},
+};
+uint16_t table_7_3_1_1_2_30[3][15] = {
+    {0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 0, 3, 4, 0, 5, 0, 0, 6, 0, 0, 0, 0},
+    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0},
+};
+uint16_t table_7_3_1_1_2_31[3][15] = {
+    {0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 0, 3, 4, 0, 5, 0, 0, 6, 0, 0, 0, 0},
+    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+};
+uint16_t table_7_3_1_1_2_32[3][15] = {
+    {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+};
 
 void get_delta_arfcn(int i, uint32_t nrarfcn, uint64_t N_OFFs){
 
@@ -2599,13 +2713,18 @@ uint8_t get_pusch_nb_antenna_ports(NR_PUSCH_Config_t *pusch_Config,
     n_antenna_port = *pusch_Config->maxRank;
   }
   else {
-    if(srs_config != NULL && srs_resource_indicator.nbits > 0) {
+    uint8_t sri = srs_resource_indicator.nbits > 0 ? srs_resource_indicator.val : 0;
+    if(srs_config != NULL) {
       for(int rs = 0; rs < srs_config->srs_ResourceSetToAddModList->list.count; rs++) {
         NR_SRS_ResourceSet_t *srs_resource_set = srs_config->srs_ResourceSetToAddModList->list.array[rs];
-        if(srs_resource_set->usage == NR_SRS_ResourceSet__usage_codebook) {
-          NR_SRS_Resource_t *srs_resource = srs_config->srs_ResourceToAddModList->list.array[srs_resource_indicator.val];
-          AssertFatal(srs_resource != NULL,"SRS resource indicated by DCI does not exist\n");
-          n_antenna_port = 1<<srs_resource->nrofSRS_Ports;
+        // When multiple SRS resources are configured by SRS-ResourceSet with usage set to 'codebook',
+        // the UE shall expect that higher layer parameters nrofSRS-Ports in SRS-Resource in SRS-ResourceSet
+        // shall be configured with the same value for all these SRS resources.
+        if (srs_resource_set->usage == NR_SRS_ResourceSet__usage_codebook) {
+          NR_SRS_Resource_t *srs_resource = srs_config->srs_ResourceToAddModList->list.array[sri];
+          AssertFatal(srs_resource != NULL, "SRS resource indicated by DCI does not exist\n");
+          n_antenna_port = 1 << srs_resource->nrofSRS_Ports;
+          break;
         }
       }
     }
@@ -2613,6 +2732,372 @@ uint8_t get_pusch_nb_antenna_ports(NR_PUSCH_Config_t *pusch_Config,
   return n_antenna_port;
 }
 
+// #define DEBUG_SRS_RESOURCE_IND
+uint8_t compute_srs_resource_indicator(NR_PUSCH_ServingCellConfig_t *pusch_servingcellconfig,
+                                       NR_PUSCH_Config_t *pusch_Config,
+                                       NR_SRS_Config_t *srs_config,
+                                       nr_srs_feedback_t *srs_feedback,
+                                       uint16_t *val)
+{
+  uint8_t nbits = 0;
+
+  // SRI occupies a number of bits which is dependent upon the uplink transmission scheme, and it is used to determine
+  // the antenna ports and uplink transmission beam to use for PUSCH transmission. In the case of codebook based
+  // transmission, the SRI is used to select between SRS Resources belonging to different antenna panels
+  // (kind of directional antenna). There can be up to 2 SRS Resources (2 antenna panels). In the case of non-codebook
+  // based transmission, the SRI is used to select one or more SRS Resources from a set of N_SRS resources. The number
+  // of SRS Resources selected corresponds to the number of layers (rank) to be transmitted.
+  if (val) {
+    *val = 0;
+  }
+
+  if (srs_config && pusch_Config && pusch_Config->txConfig != NULL) {
+
+    if (*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook) {
+
+#ifdef DEBUG_SRS_RESOURCE_IND
+      LOG_I(NR_MAC, "*pusch_Config->txConfig = NR_PUSCH_Config__txConfig_codebook\n");
+#endif
+
+      // TS 38.212 - Section 7.3.1.1.2: SRS resource indicator has ceil(log2(N_SRS)) bits according to
+      // Tables 7.3.1.1.2-32, 7.3.1.1.2-32A and 7.3.1.1.2-32B if the higher layer parameter txConfig = codebook,
+      // where N_SRS is the number of configured SRS resources in the SRS resource set configured by higher layer
+      // parameter srs-ResourceSetToAddModList, and associated with the higher layer parameter usage of value codeBook.
+      int count = 0;
+      for (int i=0; i<srs_config->srs_ResourceSetToAddModList->list.count; i++) {
+        if (srs_config->srs_ResourceSetToAddModList->list.array[i]->usage == NR_SRS_ResourceSet__usage_codebook) {
+          count++;
+        }
+      }
+      if (count>0) {
+        nbits = ceil(log2(count));
+        if (val && srs_feedback && nbits > 0) {
+          *val = table_7_3_1_1_2_32[count-2][srs_feedback->sri];
+        }
+      }
+
+#ifdef DEBUG_SRS_RESOURCE_IND
+      LOG_I(NR_MAC, "srs_config->srs_ResourceSetToAddModList->list.count = %i\n", srs_config->srs_ResourceSetToAddModList->list.count);
+      LOG_I(NR_MAC, "count = %i\n", count);
+#endif
+
+    } else {
+
+#ifdef DEBUG_SRS_RESOURCE_IND
+      LOG_I(NR_MAC, "*pusch_Config->txConfig = NR_PUSCH_Config__txConfig_nonCodebook\n");
+#endif
+
+      // TS 38.212 - Section 7.3.1.1.2: SRS resource indicator has ceil(log2(sum(k = 1 until min(Lmax,N_SRS) of binomial(N_SRS,k))))
+      // bits according to Tables 7.3.1.1.2-28/29/30/31 if the higher layer parameter txConfig = nonCodebook, where
+      // N_SRS is the number of configured SRS resources in the SRS resource set configured by higher layer parameter
+      // srs-ResourceSetToAddModList, and associated with the higher layer parameter usage of value nonCodeBook and:
+      //
+      // - if UE supports operation with maxMIMO-Layers and the higher layer parameter maxMIMO-Layers of
+      // PUSCH-ServingCellConfig of the serving cell is configured, Lmax is given by that parameter;
+      //
+      // - otherwise, Lmax is given by the maximum number of layers for PUSCH supported by the UE for the serving cell
+      // for non-codebook based operation.
+      int Lmax = 0;
+      if (pusch_servingcellconfig != NULL) {
+        if (pusch_servingcellconfig->ext1->maxMIMO_Layers != NULL) {
+          Lmax = *pusch_servingcellconfig->ext1->maxMIMO_Layers;
+        } else {
+          AssertFatal(1 == 0, "MIMO on PUSCH not supported, maxMIMO_Layers needs to be set to 1\n");
+        }
+      } else {
+        AssertFatal(1 == 0, "MIMO on PUSCH not supported, maxMIMO_Layers needs to be set to 1\n");
+      }
+      int lmin = 0;
+      int lsum = 0;
+      int count = 0;
+      for (int i = 0; i < srs_config->srs_ResourceSetToAddModList->list.count; i++) {
+        if (srs_config->srs_ResourceSetToAddModList->list.array[i]->usage == NR_SRS_ResourceSet__usage_nonCodebook) {
+          count++;
+        }
+      }
+      lmin = count < Lmax ? count : Lmax;
+      for (int k=1;k<=lmin;k++) {
+        lsum += binomial(count,k);
+      }
+      if (lsum>0) {
+        nbits = ceil(log2(lsum));
+        if (val && srs_feedback && nbits > 0) {
+          switch(Lmax) {
+            case 1:
+              *val = table_7_3_1_1_2_28[count-2][srs_feedback->sri];
+              break;
+            case 2:
+              *val = table_7_3_1_1_2_29[count-2][srs_feedback->sri];
+              break;
+            case 3:
+              *val = table_7_3_1_1_2_30[count-2][srs_feedback->sri];
+              break;
+            case 4:
+              *val = table_7_3_1_1_2_31[count-2][srs_feedback->sri];
+              break;
+            default:
+              LOG_E(NR_MAC, "%s (%d) - Invalid Lmax %d\n", __FUNCTION__, __LINE__, Lmax);
+          }
+        }
+      }
+
+#ifdef DEBUG_SRS_RESOURCE_IND
+      LOG_I(NR_MAC, "srs_config->srs_ResourceSetToAddModList->list.count = %i\n", srs_config->srs_ResourceSetToAddModList->list.count);
+      LOG_I(NR_MAC, "count = %i\n", count);
+      LOG_I(NR_MAC, "Lmax = %i\n", Lmax);
+      LOG_I(NR_MAC, "lsum = %i\n", lsum);
+#endif
+
+    }
+  }
+
+  return nbits;
+}
+
+uint8_t compute_precoding_information(NR_PUSCH_Config_t *pusch_Config,
+                                      NR_SRS_Config_t *srs_config,
+                                      dci_field_t srs_resource_indicator,
+                                      nr_srs_feedback_t *srs_feedback,
+                                      const uint8_t *nrOfLayers,
+                                      uint16_t *val) {
+
+  // It is only applicable to codebook based transmission. This field occupies 0 bits for non-codebook based
+  // transmission. It also occupies 0 bits for codebook based transmission using a single antenna port.
+  uint8_t nbits = 0;
+  if (val) {
+    *val = 0;
+  }
+
+  uint8_t pusch_antenna_ports = get_pusch_nb_antenna_ports(pusch_Config, srs_config, srs_resource_indicator);
+  if ((pusch_Config && pusch_Config->txConfig != NULL && *pusch_Config->txConfig == NR_PUSCH_Config__txConfig_nonCodebook) ||
+      pusch_antenna_ports == 1) {
+    return nbits;
+  }
+
+  long max_rank = *pusch_Config->maxRank;
+  long *ul_FullPowerTransmission = pusch_Config->ext1 ? pusch_Config->ext1->ul_FullPowerTransmission_r16 : NULL;
+  long *codebookSubset = pusch_Config->codebookSubset;
+
+  if (pusch_antenna_ports == 2) {
+
+    if (max_rank == 1) {
+      // - 1 or 3 bits according to Table 7.3.1.1.2-5 for 2 antenna ports, if txConfig = codebook, ul-FullPowerTransmission
+      //   is not configured or configured to fullpowerMode2 or configured to fullpower, and according to whether transform
+      //   precoder is enabled or disabled, and the values of higher layer parameters maxRank and codebookSubset;
+      // - 2 bits according to Table 7.3.1.1.2-5A for 2 antenna ports, if txConfig = codebook, ul-FullPowerTransmission =
+      //   fullpowerMode1, maxRank=1, and according to whether transform precoder is enabled or disabled, and the values
+      //   of higher layer parameter codebookSubset;
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        nbits = 2;
+        if (val && srs_feedback) {
+          AssertFatal(srs_feedback->tpmi <= 2,"TPMI %d is invalid!\n", srs_feedback->tpmi);
+          *val = srs_feedback->tpmi;
+        }
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          nbits = 1;
+          if (val && srs_feedback) {
+            AssertFatal(srs_feedback->tpmi <= 1,"TPMI %d is invalid!\n", srs_feedback->tpmi);
+            *val = srs_feedback->tpmi;
+          }
+        } else {
+          nbits = 3;
+          if (val && srs_feedback) {
+            AssertFatal(srs_feedback->tpmi <= 5,"TPMI %d is invalid!\n", srs_feedback->tpmi);
+            *val = srs_feedback->tpmi;
+          }
+        }
+      }
+    } else {
+      // - 2 or 4 bits according to Table 7.3.1.1.2-4 for 2 antenna ports, if txConfig = codebook, ul-FullPowerTransmission
+      //   is not configured or configured to fullpowerMode2 or configured to fullpower, and according to whether transform
+      //   precoder is enabled or disabled, and the values of higher layer parameters maxRank and codebookSubset;
+      // - 2 bits according to Table 7.3.1.1.2-4A for 2 antenna ports, if txConfig = codebook, ul-FullPowerTransmission =
+      //   fullpowerMode1, transform precoder is disabled, maxRank=2, and codebookSubset=nonCoherent;
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        nbits = 2;
+        if (val && srs_feedback) {
+          AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 2) || (*nrOfLayers==2 && srs_feedback->tpmi == 0),
+                      "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          *val = *nrOfLayers==1 ? table_7_3_1_1_2_4A_1layer[srs_feedback->tpmi] : 2;
+        }
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          nbits = 2;
+          if (val && srs_feedback) {
+            AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 1) || (*nrOfLayers==2 && srs_feedback->tpmi == 0),
+                        "TPMI %d is invalid!\n", srs_feedback->tpmi);
+            *val = *nrOfLayers==1 ? srs_feedback->tpmi : 2;
+          }
+        } else {
+          nbits = 4;
+          if (val && srs_feedback) {
+            AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 5) || (*nrOfLayers==2 && srs_feedback->tpmi <= 2),
+                        "TPMI %d is invalid!\n", srs_feedback->tpmi);
+            *val = *nrOfLayers==1 ? table_7_3_1_1_2_4_1layer_fullyAndPartialAndNonCoherent[srs_feedback->tpmi] :
+                                    table_7_3_1_1_2_4_2layers_fullyAndPartialAndNonCoherent[srs_feedback->tpmi];
+          }
+        }
+      }
+    }
+
+  } else if (pusch_antenna_ports == 4) {
+
+    if (max_rank == 1) {
+      // - 2, 4, or 5 bits according to Table 7.3.1.1.2-3 for 4 antenna ports, if txConfig = codebook, ul-FullPowerTransmission
+      //   is not configured or configured to fullpowerMode2 or configured to fullpower, and according to whether transform
+      //   precoder is enabled or disabled, and the values of higher layer parameters maxRank, and codebookSubset;
+      // - 3 or 4 bits according to Table 7.3.1.1.2-3A for 4 antenna ports, if txConfig = codebook, ul-FullPowerTransmission =
+      //   fullpowerMode1, maxRank=1, and according to whether transform precoder is enabled or disabled, and the values
+      //   of higher layer parameter codebookSubset;
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          nbits = 3;
+          if (val && srs_feedback) {
+            AssertFatal(srs_feedback->tpmi <= 3 || srs_feedback->tpmi == 13, "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        } else {
+          nbits = 4;
+          if (val && srs_feedback) {
+            AssertFatal(srs_feedback->tpmi <= 15, "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        }
+        if (val && srs_feedback) {
+          *val = table_7_3_1_1_2_3A[srs_feedback->tpmi];
+        }
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          nbits = 2;
+          if (val && srs_feedback) {
+            AssertFatal(srs_feedback->tpmi <= 3, "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        } else if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent) {
+          nbits = 4;
+          if (val && srs_feedback) {
+            AssertFatal(srs_feedback->tpmi <= 11, "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        } else {
+          nbits = 5;
+          if (val && srs_feedback) {
+            AssertFatal(srs_feedback->tpmi <= 27, "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        }
+        if (val && srs_feedback) {
+          *val = srs_feedback->tpmi;
+        }
+      }
+    } else {
+      // - 4, 5, or 6 bits according to Table 7.3.1.1.2-2 for 4 antenna ports, if txConfig = codebook, ul-FullPowerTransmission
+      //   is not configured or configured to fullpowerMode2 or configured to fullpower, and according to whether transform
+      //   precoder is enabled or disabled, and the values of higher layer parameters maxRank, and codebookSubset;
+      // - 4 or 5 bits according to Table 7.3.1.1.2-2A for 4 antenna ports, if txConfig = codebook, ul-FullPowerTransmission =
+      //   fullpowerMode1, maxRank=2, transform precoder is disabled, and according to the values of higher layer parameter
+      //   codebookSubset;
+      // - 4 or 6 bits according to Table 7.3.1.1.2-2B for 4 antenna ports, if txConfig = codebook, ul-FullPowerTransmission =
+      //   fullpowerMode1, maxRank=3 or 4, transform precoder is disabled, and according to the values of higher layer
+      //   parameter codebookSubset;
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        if (max_rank == 2) {
+          if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+            nbits = 4;
+            if (val && srs_feedback) {
+              AssertFatal((*nrOfLayers==1 && (srs_feedback->tpmi <= 3 || srs_feedback->tpmi==13)) || (*nrOfLayers==2 && srs_feedback->tpmi <= 6),
+                          "TPMI %d is invalid!\n", srs_feedback->tpmi);
+            }
+          } else {
+            nbits = 5;
+            if (val && srs_feedback) {
+              AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 15) || (*nrOfLayers==2 && srs_feedback->tpmi <= 13),
+                          "TPMI %d is invalid!\n", srs_feedback->tpmi);
+            }
+          }
+          if (val && srs_feedback) {
+            *val = *nrOfLayers==1 ? table_7_3_1_1_2_2A_1layer[srs_feedback->tpmi] : table_7_3_1_1_2_2A_2layers[srs_feedback->tpmi];
+          }
+        } else {
+          if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+            nbits = 4;
+            if (val && srs_feedback) {
+              AssertFatal((*nrOfLayers==1 && (srs_feedback->tpmi <= 3 || srs_feedback->tpmi == 13)) || (*nrOfLayers==2 && srs_feedback->tpmi <= 6) ||
+                          (*nrOfLayers==3 && srs_feedback->tpmi <= 1) || (*nrOfLayers==4 && srs_feedback->tpmi == 0),
+                          "TPMI %d is invalid!\n", srs_feedback->tpmi);
+            }
+          } else {
+            nbits = 6;
+            if (val && srs_feedback) {
+              AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 15) || (*nrOfLayers==2 && srs_feedback->tpmi <= 13) ||
+                          (*nrOfLayers==3 && srs_feedback->tpmi <= 2) || (*nrOfLayers==4 && srs_feedback->tpmi <= 2),
+                          "TPMI %d is invalid!\n", srs_feedback->tpmi);
+            }
+          }
+          if (val && srs_feedback) {
+            switch (*nrOfLayers) {
+              case 1:
+                *val = table_7_3_1_1_2_2B_1layer[srs_feedback->tpmi];
+                break;
+              case 2:
+                *val = table_7_3_1_1_2_2B_2layers[srs_feedback->tpmi];
+                break;
+              case 3:
+                *val = table_7_3_1_1_2_2B_3layers[srs_feedback->tpmi];
+                break;
+              case 4:
+                *val = table_7_3_1_1_2_2B_4layers[srs_feedback->tpmi];
+                break;
+              default:
+                LOG_E(NR_MAC,"Number of layers %d is invalid!\n", *nrOfLayers);
+            }
+          }
+        }
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          nbits = 4;
+          if (val && srs_feedback) {
+            AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 3) || (*nrOfLayers==2 && srs_feedback->tpmi <= 5) ||
+                        (*nrOfLayers==3 && srs_feedback->tpmi == 0) || (*nrOfLayers==4 && srs_feedback->tpmi == 0),
+                        "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        } else if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent) {
+          nbits = 5;
+          if (val && srs_feedback) {
+            AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 11) || (*nrOfLayers==2 && srs_feedback->tpmi <= 13) ||
+                        (*nrOfLayers==3 && srs_feedback->tpmi <= 2) || (*nrOfLayers==4 && srs_feedback->tpmi <= 2),
+                        "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        } else {
+          nbits = 6;
+          if (val && srs_feedback) {
+            AssertFatal((*nrOfLayers==1 && srs_feedback->tpmi <= 28) || (*nrOfLayers==2 && srs_feedback->tpmi <= 22) ||
+                        (*nrOfLayers==3 && srs_feedback->tpmi <= 7) || (*nrOfLayers==4 && srs_feedback->tpmi <= 5),
+                        "TPMI %d is invalid!\n", srs_feedback->tpmi);
+          }
+        }
+        if (val && srs_feedback) {
+          switch (*nrOfLayers) {
+            case 1:
+              *val = table_7_3_1_1_2_2_1layer[srs_feedback->tpmi];
+              break;
+            case 2:
+              *val = table_7_3_1_1_2_2_2layers[srs_feedback->tpmi];
+              break;
+            case 3:
+              *val = table_7_3_1_1_2_2_3layers[srs_feedback->tpmi];
+              break;
+            case 4:
+              *val = table_7_3_1_1_2_2_4layers[srs_feedback->tpmi];
+              break;
+            default:
+              LOG_E(NR_MAC,"Number of layers %d is invalid!\n", *nrOfLayers);
+          }
+        }
+      }
+    }
+
+  }
+
+  return nbits;
+}
+
 uint16_t nr_dci_size(const NR_BWP_DownlinkCommon_t *initialDownlinkBWP,
                      const NR_BWP_UplinkCommon_t *initialUplinkBWP,
                      const NR_CellGroupConfig_t *cg,
@@ -2629,6 +3114,11 @@ uint16_t nr_dci_size(const NR_BWP_DownlinkCommon_t *initialDownlinkBWP,
   long rbg_size_config;
   int num_entries = 0;
 
+  NR_UplinkConfig_t	*uplinkConfig = NULL;
+  if (cg && cg->spCellConfig && cg->spCellConfig->spCellConfigDedicated) {
+    uplinkConfig = cg->spCellConfig->spCellConfigDedicated->uplinkConfig;
+  }
+
   const NR_BWP_DownlinkDedicated_t *bwpd = NULL;
   const NR_BWP_UplinkDedicated_t *ubwpd = NULL;
   const NR_BWP_DownlinkCommon_t *bwpc = NULL;
@@ -2640,23 +3130,21 @@ uint16_t nr_dci_size(const NR_BWP_DownlinkCommon_t *initialDownlinkBWP,
   NR_SRS_Config_t *srs_config = NULL;
   if(bwp_id > 0) {
     AssertFatal(cg!=NULL,"Cellgroup is null and bwp_id!=0");
-    bwpd=cg->spCellConfig->spCellConfigDedicated->downlinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Dedicated;
-    bwpc=cg->spCellConfig->spCellConfigDedicated->downlinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Common;
-    ubwpd=cg->spCellConfig->spCellConfigDedicated->uplinkConfig->uplinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Dedicated;
-    ubwpc=cg->spCellConfig->spCellConfigDedicated->uplinkConfig->uplinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Common;
+    bwpd = cg->spCellConfig->spCellConfigDedicated->downlinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Dedicated;
+    bwpc = cg->spCellConfig->spCellConfigDedicated->downlinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Common;
+    ubwpd = uplinkConfig ? uplinkConfig->uplinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Dedicated : NULL;
+    ubwpc = uplinkConfig ? uplinkConfig->uplinkBWP_ToAddModList->list.array[bwp_id-1]->bwp_Common : NULL;
     pdsch_Config = (bwpd->pdsch_Config) ? bwpd->pdsch_Config->choice.setup : NULL;
     pdcch_Config = (bwpd->pdcch_Config) ? bwpd->pdcch_Config->choice.setup : NULL;
     pucch_Config = (ubwpd->pucch_Config) ? ubwpd->pucch_Config->choice.setup : NULL;
     pusch_Config = (ubwpd->pusch_Config) ? ubwpd->pusch_Config->choice.setup : NULL;
     srs_config = (ubwpd->srs_Config) ? ubwpd->srs_Config->choice.setup : NULL;
-  }
-  else if (cg){
+  } else if (cg) {
     bwpc = initialDownlinkBWP;
     ubwpc = initialUplinkBWP;
     bwpd = cg->spCellConfig && cg->spCellConfig->spCellConfigDedicated ?
            cg->spCellConfig->spCellConfigDedicated->initialDownlinkBWP : NULL;
-    ubwpd = cg->spCellConfig && cg->spCellConfig->spCellConfigDedicated && cg->spCellConfig->spCellConfigDedicated->uplinkConfig ?
-            cg->spCellConfig->spCellConfigDedicated->uplinkConfig->initialUplinkBWP : NULL;
+    ubwpd = uplinkConfig ? uplinkConfig->initialUplinkBWP : NULL;
     pdsch_Config = (bwpd && bwpd->pdsch_Config) ? bwpd->pdsch_Config->choice.setup : NULL;
     pdcch_Config = (bwpd && bwpd->pdcch_Config) ? bwpd->pdcch_Config->choice.setup : NULL;
     pucch_Config = (ubwpd && ubwpd->pucch_Config) ? ubwpd->pucch_Config->choice.setup : NULL;
@@ -2739,101 +3227,35 @@ uint16_t nr_dci_size(const NR_BWP_DownlinkCommon_t *initialDownlinkBWP,
         size += 1;
       }
       // 1st DAI
-      if (cg->physicalCellGroupConfig &&
-          cg->physicalCellGroupConfig->pdsch_HARQ_ACK_Codebook==NR_PhysicalCellGroupConfig__pdsch_HARQ_ACK_Codebook_dynamic)
+      if (cg->physicalCellGroupConfig && cg->physicalCellGroupConfig->pdsch_HARQ_ACK_Codebook == NR_PhysicalCellGroupConfig__pdsch_HARQ_ACK_Codebook_dynamic)
         dci_pdu->dai[0].nbits = 2;
       else
         dci_pdu->dai[0].nbits = 1;
       size += dci_pdu->dai[0].nbits;
-      LOG_D(NR_MAC,"DAI1 nbits %d\n",dci_pdu->dai[0].nbits);
+      LOG_D(NR_MAC, "DAI1 nbits %d\n", dci_pdu->dai[0].nbits);
       // 2nd DAI
-      if (cg->spCellConfig->spCellConfigDedicated->pdsch_ServingCellConfig && 
-          cg->spCellConfig->spCellConfigDedicated->pdsch_ServingCellConfig->choice.setup &&
-          cg->spCellConfig->spCellConfigDedicated->pdsch_ServingCellConfig->choice.setup->codeBlockGroupTransmission != NULL) { //TODO not sure about that
+      if (cg->spCellConfig->spCellConfigDedicated->pdsch_ServingCellConfig && cg->spCellConfig->spCellConfigDedicated->pdsch_ServingCellConfig->choice.setup
+          && cg->spCellConfig->spCellConfigDedicated->pdsch_ServingCellConfig->choice.setup->codeBlockGroupTransmission != NULL) { // TODO not sure about that
         dci_pdu->dai[1].nbits = 2;
         size += dci_pdu->dai[1].nbits;
       }
       // SRS resource indicator
-      if (srs_config &&
-          pusch_Config && 
-          pusch_Config->txConfig != NULL){
-        int count=0;
-        if (*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook){
-          for (int i=0; i<srs_config->srs_ResourceSetToAddModList->list.count; i++) {
-            if (srs_config->srs_ResourceSetToAddModList->list.array[i]->usage == NR_SRS_ResourceSet__usage_codebook)
-              count++;
-          }
-          if (count>1) {
-            dci_pdu->srs_resource_indicator.nbits = 1;
-            size += dci_pdu->srs_resource_indicator.nbits;
-          }
-        }
-        else {
-          int lmin,Lmax = 0;
-          int lsum = 0;
-          if ( cg->spCellConfig->spCellConfigDedicated->uplinkConfig &&
-               cg->spCellConfig->spCellConfigDedicated->uplinkConfig->pusch_ServingCellConfig != NULL) {
-            if ( cg->spCellConfig->spCellConfigDedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers != NULL)
-              Lmax = *cg->spCellConfig->spCellConfigDedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers;
-            else
-              AssertFatal(1==0,"MIMO on PUSCH not supported, maxMIMO_Layers needs to be set to 1\n");
-          }
-          else
-            AssertFatal(1==0,"MIMO on PUSCH not supported, maxMIMO_Layers needs to be set to 1\n");
-          for (int i=0; i<srs_config->srs_ResourceSetToAddModList->list.count; i++) {
-            if (srs_config->srs_ResourceSetToAddModList->list.array[i]->usage == NR_SRS_ResourceSet__usage_nonCodebook)
-              count++;
-            if (count < Lmax) lmin = count;
-            else lmin = Lmax;
-            for (int k=1;k<=lmin;k++) {
-              lsum += binomial(count,k);
-            }
-          }
-          dci_pdu->srs_resource_indicator.nbits = (int)ceil(log2(lsum));
-          size += dci_pdu->srs_resource_indicator.nbits;
-        }
-      } else dci_pdu->srs_resource_indicator.nbits = 0;
-      LOG_D(NR_MAC,"dci_pdu->srs_resource_indicator.nbits %d\n",dci_pdu->srs_resource_indicator.nbits);
+      NR_PUSCH_ServingCellConfig_t *pusch_servingcellconfig = uplinkConfig && uplinkConfig->pusch_ServingCellConfig ? uplinkConfig->pusch_ServingCellConfig->choice.setup : NULL;
+      dci_pdu->srs_resource_indicator.nbits = compute_srs_resource_indicator(pusch_servingcellconfig, pusch_Config, srs_config, NULL, NULL);
+      size += dci_pdu->srs_resource_indicator.nbits;
+      LOG_D(NR_MAC, "dci_pdu->srs_resource_indicator.nbits %d\n", dci_pdu->srs_resource_indicator.nbits);
       // Precoding info and number of layers
-      long transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, ubwpd, (uint8_t*)&format, rnti_type, 0);
-       
-      uint8_t pusch_antenna_ports = get_pusch_nb_antenna_ports(pusch_Config, srs_config, dci_pdu->srs_resource_indicator);
-	   
-      dci_pdu->precoding_information.nbits=0;
-      if (pusch_Config && 
-          pusch_Config->txConfig != NULL){
-        if (*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook){
-          if (pusch_antenna_ports > 1) {
-            if (pusch_antenna_ports == 4) {
-              if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) && (*pusch_Config->maxRank>1))
-                dci_pdu->precoding_information.nbits = 6-(*pusch_Config->codebookSubset);
-              else {
-                if(*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent)
-                  dci_pdu->precoding_information.nbits = 2;
-                else
-                  dci_pdu->precoding_information.nbits = 5-(*pusch_Config->codebookSubset);
-              }
-            }
-            else {
-              AssertFatal(pusch_antenna_ports==2,"Not valid number of antenna ports");
-              if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) && (*pusch_Config->maxRank==2))
-                dci_pdu->precoding_information.nbits = 4-(*pusch_Config->codebookSubset);
-              else
-                dci_pdu->precoding_information.nbits = 3-(*pusch_Config->codebookSubset);
-            }
-          }
-        }
-      }
+      dci_pdu->precoding_information.nbits = compute_precoding_information(pusch_Config, srs_config, dci_pdu->srs_resource_indicator, NULL, NULL, NULL);
       size += dci_pdu->precoding_information.nbits;
-      LOG_D(NR_MAC,"dci_pdu->precoding_informaiton.nbits=%d\n",dci_pdu->precoding_information.nbits);
+      LOG_D(NR_MAC, "dci_pdu->precoding_informaiton.nbits=%d\n", dci_pdu->precoding_information.nbits);
       // Antenna ports
+      long transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, ubwpd, (uint8_t *)&format, rnti_type, 0);
       NR_DMRS_UplinkConfig_t *NR_DMRS_UplinkConfig = NULL;
-      int xa=0;
-      int xb=0;
-      if(pusch_Config &&
-         pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA != NULL){
+      int xa = 0;
+      int xb = 0;
+      if (pusch_Config && pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA != NULL) {
         NR_DMRS_UplinkConfig = pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA->choice.setup;
-        xa = ul_ant_bits(NR_DMRS_UplinkConfig,transformPrecoder);
+        xa = ul_ant_bits(NR_DMRS_UplinkConfig, transformPrecoder);
       }
       if(pusch_Config &&
          pusch_Config->dmrs_UplinkForPUSCH_MappingTypeB != NULL){
@@ -3155,26 +3577,28 @@ int is_nr_UL_slot(NR_TDD_UL_DL_ConfigCommon_t	*tdd_UL_DL_ConfigurationCommon, sl
   else return(slot_in_period >= slots1+tdd_UL_DL_ConfigurationCommon->pattern2->nrofDownlinkSlots ? 1 : 0);    
 }
 
-int16_t fill_dmrs_mask(NR_PDSCH_Config_t *pdsch_Config,int dmrs_TypeA_Position,int NrOfSymbols, int startSymbol, int mappingtype_fromDCI, int length) {
+int16_t fill_dmrs_mask(const NR_PDSCH_Config_t *pdsch_Config,int dmrs_TypeA_Position,int NrOfSymbols, int startSymbol, mappingType_t mappingtype, int length) {
 
-  int l0;int dmrs_AdditionalPosition = 0;
+  int dmrs_AdditionalPosition = 0;
   NR_DMRS_DownlinkConfig_t *dmrs_config = NULL;
 
-  LOG_D(MAC, "NrofSymbols:%d, startSymbol:%d, mappingtype:%d, dmrs_TypeA_Position:%d\n", NrOfSymbols, startSymbol, mappingtype_fromDCI, dmrs_TypeA_Position);
-
-  if (dmrs_TypeA_Position == NR_ServingCellConfigCommon__dmrs_TypeA_Position_pos2) l0=2;
-  else if (dmrs_TypeA_Position == NR_ServingCellConfigCommon__dmrs_TypeA_Position_pos3) l0=3;
-  else AssertFatal(1==0,"Illegal dmrs_TypeA_Position %d\n",(int)dmrs_TypeA_Position);
+  LOG_D(MAC, "NrofSymbols:%d, startSymbol:%d, mappingtype:%d, dmrs_TypeA_Position:%d\n", NrOfSymbols, startSymbol, mappingtype, dmrs_TypeA_Position);
 
+  int l0 = 0; // type B
+  if (mappingtype == typeA) {
+    if (dmrs_TypeA_Position == NR_ServingCellConfigCommon__dmrs_TypeA_Position_pos2) l0=2;
+    else if (dmrs_TypeA_Position == NR_ServingCellConfigCommon__dmrs_TypeA_Position_pos3) l0=3;
+    else AssertFatal(1==0,"Illegal dmrs_TypeA_Position %d\n",(int)dmrs_TypeA_Position);
+  }
   // in case of DCI FORMAT 1_0 or dedicated pdsch config not received additionposition = pos2, len1 should be used
   // referred to section 5.1.6.2 in 38.214
   dmrs_AdditionalPosition = 2;
 
   if (pdsch_Config != NULL) {
-    if (mappingtype_fromDCI == typeA) { // Type A
+    if (mappingtype == typeA) { // Type A
       if (pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA && pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->present == NR_SetupRelease_DMRS_DownlinkConfig_PR_setup)
         dmrs_config = (NR_DMRS_DownlinkConfig_t *)pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->choice.setup;
-    } else if (mappingtype_fromDCI == typeB) {
+    } else if (mappingtype == typeB) {
       if (pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeB && pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeB->present == NR_SetupRelease_DMRS_DownlinkConfig_PR_setup)
         dmrs_config = (NR_DMRS_DownlinkConfig_t *)pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeB->choice.setup;
     } else {
@@ -3191,17 +3615,17 @@ int16_t fill_dmrs_mask(NR_PDSCH_Config_t *pdsch_Config,int dmrs_TypeA_Position,i
   int32_t l_prime = -1;
 
   // columns 0-3 for TypeA, 4-7 for TypeB
-  column = (mappingtype_fromDCI == typeA) ? dmrs_AdditionalPosition : (dmrs_AdditionalPosition + 4);
+  column = (mappingtype == typeA) ? dmrs_AdditionalPosition : (dmrs_AdditionalPosition + 4);
 
   // Section 7.4.1.1.2 in Spec 38.211
   // For PDSCH Mapping TypeA, ld is duration between first OFDM of the slot and last OFDM symbol of the scheduled PDSCH resources
   // For TypeB, ld is the duration of the scheduled PDSCH resources
-  ld = (mappingtype_fromDCI == typeA) ? (NrOfSymbols + startSymbol) : NrOfSymbols;
+  ld = (mappingtype == typeA) ? (NrOfSymbols + startSymbol) : NrOfSymbols;
 
   AssertFatal(ld > 2 && ld < 15,"Illegal NrOfSymbols according to Table 5.1.2.1-1 Spec 38.214 %d\n",ld);
   AssertFatal((NrOfSymbols + startSymbol) < 15,"Illegal S+L according to Table 5.1.2.1-1 Spec 38.214 S:%d L:%d\n",startSymbol, NrOfSymbols);
 
-  if (mappingtype_fromDCI == typeA) {
+  if (mappingtype == typeA) {
 
     // Section 7.4.1.1.2 in Spec 38.211
     AssertFatal((l0 == 2) || (l0 == 3 && dmrs_AdditionalPosition != 3),"Wrong config, If dmrs_TypeA_Position POS3, ADD POS cannot be POS3 \n");
@@ -3229,7 +3653,7 @@ int16_t fill_dmrs_mask(NR_PDSCH_Config_t *pdsch_Config,int dmrs_TypeA_Position,i
   LOG_D(MAC, "l0:%d, ld:%d,row:%d, column:%d, addpos:%d, maxlen:%d\n", l0, ld, row, column, dmrs_AdditionalPosition, length);
   AssertFatal(l_prime>=0,"ERROR in configuration.Check Time Domain allocation of this Grant. l_prime < 1. row:%d, column:%d\n", row, column);
 
-  l_prime = (mappingtype_fromDCI == typeA) ? (l_prime | l0) : (l_prime << startSymbol);
+  l_prime = (mappingtype == typeA) ? (l_prime | l0) : (l_prime << startSymbol);
   LOG_D(MAC, " PDSCH DMRS MASK in HEX:%x\n", l_prime);
 
   return l_prime;
diff --git a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.h b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.h
index d45c01b4b05c2c00fe7fe75ca404aaa6b25005da..9a8750ae51160438c1f2e9e7df5376567b16a05d 100644
--- a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.h
+++ b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.h
@@ -38,6 +38,18 @@
 #include "openair1/PHY/impl_defs_nr.h"
 #include "common/utils/nr/nr_common.h"
 
+typedef enum {
+  pusch_dmrs_pos0 = 0,
+  pusch_dmrs_pos1 = 1,
+  pusch_dmrs_pos2 = 2,
+  pusch_dmrs_pos3 = 3,
+} pusch_dmrs_AdditionalPosition_t;
+
+typedef enum {
+  typeA = 0,
+  typeB = 1
+} mappingType_t;
+
 uint32_t get_Y(NR_SearchSpace_t *ss, int slot, rnti_t rnti);
 
 uint8_t get_BG(uint32_t A, uint16_t R);
@@ -46,12 +58,25 @@ uint64_t from_nrarfcn(int nr_bandP, uint8_t scs_index, uint32_t dl_nrarfcn);
 
 uint32_t to_nrarfcn(int nr_bandP, uint64_t dl_CarrierFreq, uint8_t scs_index, uint32_t bw);
 
-int16_t fill_dmrs_mask(NR_PDSCH_Config_t *pdsch_Config,int dmrs_TypeA_Position,int NrOfSymbols,int startSymbol,int mappingtype_fromDCI,int length);
+int16_t fill_dmrs_mask(const NR_PDSCH_Config_t *pdsch_Config,int dmrs_TypeA_Position,int NrOfSymbols,int startSymbol,mappingType_t mappingtype,int length);
 
 int is_nr_DL_slot(NR_TDD_UL_DL_ConfigCommon_t *tdd_UL_DL_ConfigurationCommon,slot_t slotP);
 
 int is_nr_UL_slot(NR_TDD_UL_DL_ConfigCommon_t *tdd_UL_DL_ConfigurationCommon, slot_t slotP, frame_type_t frame_type);
 
+uint8_t compute_srs_resource_indicator(NR_PUSCH_ServingCellConfig_t *pusch_servingcellconfig,
+                                       NR_PUSCH_Config_t *pusch_Config,
+                                       NR_SRS_Config_t *srs_config,
+                                       nr_srs_feedback_t *srs_feedback,
+                                       uint16_t *val);
+
+uint8_t compute_precoding_information(NR_PUSCH_Config_t *pusch_Config,
+                                      NR_SRS_Config_t *srs_config,
+                                      dci_field_t srs_resource_indicator,
+                                      nr_srs_feedback_t *srs_feedback,
+                                      const uint8_t *nrOfLayers,
+                                      uint16_t *val);
+
 uint16_t nr_dci_size(const NR_BWP_DownlinkCommon_t *initialDLBWP,
                      const NR_BWP_UplinkCommon_t *initialULBWP,
                      const NR_CellGroupConfig_t *cg,
diff --git a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_extern.h b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_extern.h
index 7bbe8d68a751c4cb9ce36b10be00332bc53b14a3..01864f02c52d60a5894aec85bf618e7ba1a50aa8 100644
--- a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_extern.h
+++ b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_extern.h
@@ -143,4 +143,7 @@ extern const float   table_38213_13_12_c3[16];
 
 extern const int32_t table_38213_10_1_1_c2[5];
 
+extern const char table_38211_6_3_1_5_1[6][2][1];
+extern const char table_38211_6_3_1_5_2[28][4][1];
+extern const char table_38211_6_3_1_5_3[28][4][1];
 #endif //DEF_H
diff --git a/openair2/LAYER2/NR_MAC_UE/mac_proto.h b/openair2/LAYER2/NR_MAC_UE/mac_proto.h
index 161799fb6677c94a132cc6ac647683be26b0592e..7dae4e92793d65e521e5b692e2cf5207d86a54fb 100644
--- a/openair2/LAYER2/NR_MAC_UE/mac_proto.h
+++ b/openair2/LAYER2/NR_MAC_UE/mac_proto.h
@@ -141,7 +141,6 @@ void fill_scheduled_response(nr_scheduled_response_t *scheduled_response,
                              int cc_id,
                              frame_t frame,
                              int slot,
-                             int thread_id,
                              void *phy_data);
 
 /*! \fn int8_t nr_ue_get_SR(module_id_t module_idP, frame_t frameP, slot_t slotP);
@@ -353,11 +352,10 @@ and fills the PRACH PDU per each FD occasion.
 @param module_idP Index of UE instance
 @param frameP Frame index
 @param slotP Slot index
-@param thread_id RX/TX Thread ID
 @returns void
 */
-void nr_ue_prach_scheduler(module_id_t module_idP, frame_t frameP, sub_frame_t slotP, int thread_id);
-void nr_ue_pucch_scheduler(module_id_t module_idP, frame_t frameP, int slotP, int thread_id);
+void nr_ue_prach_scheduler(module_id_t module_idP, frame_t frameP, sub_frame_t slotP);
+void nr_ue_pucch_scheduler(module_id_t module_idP, frame_t frameP, int slotP, void *phy_data);
 void nr_schedule_csirs_reception(NR_UE_MAC_INST_t *mac, int frame, int slot);
 void nr_schedule_csi_for_im(NR_UE_MAC_INST_t *mac, int frame, int slot);
 
diff --git a/openair2/LAYER2/NR_MAC_UE/main_ue_nr.c b/openair2/LAYER2/NR_MAC_UE/main_ue_nr.c
index 4adee2bced396dae590bb5948aac0cccd36151a2..f8b313efd8c81a557b2fdd90ea7cc1cb588d86da 100644
--- a/openair2/LAYER2/NR_MAC_UE/main_ue_nr.c
+++ b/openair2/LAYER2/NR_MAC_UE/main_ue_nr.c
@@ -32,7 +32,7 @@
 
 //#include "defs.h"
 #include "mac_proto.h"
-#include "../../ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 //#undef MALLOC
 #include "assertions.h"
 #include "PHY/types.h"
diff --git a/openair2/LAYER2/NR_MAC_UE/nr_ra_procedures.c b/openair2/LAYER2/NR_MAC_UE/nr_ra_procedures.c
index 507cdb2661df4f2598189be7611ab9e599e1aca0..00117ec4ad65edf98fc65ebc133d7d1077a933bc 100644
--- a/openair2/LAYER2/NR_MAC_UE/nr_ra_procedures.c
+++ b/openair2/LAYER2/NR_MAC_UE/nr_ra_procedures.c
@@ -503,28 +503,28 @@ void ra_preambles_config(NR_PRACH_RESOURCES_t *prach_resources, NR_UE_MAC_INST_t
     if(ra->ra_PreambleIndex < 0 || ra->ra_PreambleIndex > 63) {
       if (noGroupB) {
         // use Group A preamble
-        ra->ra_PreambleIndex = ra->starting_preamble_nb + (nrand48(seed) % ra->cb_preambles_per_ssb);
+        ra->ra_PreambleIndex = ra->starting_preamble_nb + (rand_r((unsigned int *)seed) % ra->cb_preambles_per_ssb);
         ra->RA_usedGroupA = 1;
       } else if ((ra->Msg3_size < messageSizeGroupA) && (dl_pathloss > PLThreshold)) {
         // Group B is configured and RA preamble Group A is used
         // - todo add condition on CCCH_sdu_size for initiation by CCCH
-        ra->ra_PreambleIndex = ra->starting_preamble_nb + (nrand48(seed) % sizeOfRA_PreamblesGroupA);
+        ra->ra_PreambleIndex = ra->starting_preamble_nb + (rand_r((unsigned int *)seed) % sizeOfRA_PreamblesGroupA);
         ra->RA_usedGroupA = 1;
       } else {
         // Group B preamble is configured and used
         // the first sizeOfRA_PreamblesGroupA RA preambles belong to RA Preambles Group A
         // the remaining belong to RA Preambles Group B
-        ra->ra_PreambleIndex = ra->starting_preamble_nb + sizeOfRA_PreamblesGroupA + (nrand48(seed) % (ra->cb_preambles_per_ssb - sizeOfRA_PreamblesGroupA));
+        ra->ra_PreambleIndex = ra->starting_preamble_nb + sizeOfRA_PreamblesGroupA + (rand_r((unsigned int *)seed) % (ra->cb_preambles_per_ssb - sizeOfRA_PreamblesGroupA));
         ra->RA_usedGroupA = 0;
       }
     }
   } else { // Msg3 is being retransmitted
     if (ra->RA_usedGroupA && noGroupB) {
-      ra->ra_PreambleIndex = ra->starting_preamble_nb + (nrand48(seed) % ra->cb_preambles_per_ssb);
+      ra->ra_PreambleIndex = ra->starting_preamble_nb + (rand_r((unsigned int *)seed) % ra->cb_preambles_per_ssb);
     } else if (ra->RA_usedGroupA && !noGroupB){
-      ra->ra_PreambleIndex = ra->starting_preamble_nb + (nrand48(seed) % sizeOfRA_PreamblesGroupA);
+      ra->ra_PreambleIndex = ra->starting_preamble_nb + (rand_r((unsigned int *)seed) % sizeOfRA_PreamblesGroupA);
     } else {
-      ra->ra_PreambleIndex = ra->starting_preamble_nb + sizeOfRA_PreamblesGroupA + (nrand48(seed) % (ra->cb_preambles_per_ssb - sizeOfRA_PreamblesGroupA));
+      ra->ra_PreambleIndex = ra->starting_preamble_nb + sizeOfRA_PreamblesGroupA + (rand_r((unsigned int *)seed) % (ra->cb_preambles_per_ssb - sizeOfRA_PreamblesGroupA));
     }
   }
   prach_resources->ra_PreambleIndex = ra->ra_PreambleIndex;
@@ -754,7 +754,7 @@ uint8_t nr_ue_get_rach(NR_PRACH_RESOURCES_t *prach_resources,
 
         // fill ulsch_buffer with random data
         for (int i = 0; i < TBS_bytes; i++){
-          mac_sdus[i] = (unsigned char) (lrand48()&0xff);
+          mac_sdus[i] = (unsigned char) (rand()&0xff);
         }
         //Sending SDUs with size 1
         //Initialize elements of sdu_lengths
@@ -1009,7 +1009,7 @@ void nr_ra_failed(uint8_t mod_id, uint8_t CC_id, NR_PRACH_RESOURCES_t *prach_res
     LOG_D(MAC, "In %s: [UE %d][%d.%d] Maximum number of RACH attempts (%d) reached, selecting backoff time...\n",
           __FUNCTION__, mod_id, frame, slot, ra->preambleTransMax);
 
-    ra->RA_backoff_cnt = nrand48(seed) % (prach_resources->RA_PREAMBLE_BACKOFF + 1);
+    ra->RA_backoff_cnt = rand_r((unsigned int *)seed) % (prach_resources->RA_PREAMBLE_BACKOFF + 1);
     prach_resources->RA_PREAMBLE_TRANSMISSION_COUNTER = 1;
     prach_resources->RA_PREAMBLE_POWER_RAMPING_STEP += 2; // 2 dB increment
     prach_resources->ra_PREAMBLE_RECEIVED_TARGET_POWER = nr_get_Po_NOMINAL_PUSCH(prach_resources, mod_id, CC_id);
diff --git a/openair2/LAYER2/NR_MAC_UE/nr_ue_procedures.c b/openair2/LAYER2/NR_MAC_UE/nr_ue_procedures.c
index e525300d3655a276496ab0517e5435ea4799ab68..161cdb7079a82afe2a6b63829e1f09b375deacc0 100644
--- a/openair2/LAYER2/NR_MAC_UE/nr_ue_procedures.c
+++ b/openair2/LAYER2/NR_MAC_UE/nr_ue_procedures.c
@@ -1550,6 +1550,7 @@ void nr_ue_configure_pucch(NR_UE_MAC_INST_t *mac,
 
   LOG_D(NR_MAC,"initial_pucch_id %d, pucch_resource %p\n",pucch->initial_pucch_id,pucch->pucch_resource);
   // configure pucch from Table 9.2.1-1
+  // only for ack/nack
   if (pucch->initial_pucch_id > -1 &&
       pucch->pucch_resource == NULL) {
 
@@ -1582,25 +1583,12 @@ void nr_ue_configure_pucch(NR_UE_MAC_INST_t *mac,
     pucch_pdu->freq_hop_flag = 1;
     pucch_pdu->time_domain_occ_idx = 0;
 
-    if (O_SR == 0 || pucch->sr_payload == 0) {  /* only ack is transmitted TS 36.213 9.2.3 UE procedure for reporting HARQ-ACK */
-      if (O_ACK == 1)
-        pucch_pdu->mcs = sequence_cyclic_shift_1_harq_ack_bit[pucch->ack_payload & 0x1];   /* only harq of 1 bit */
-      else
-        pucch_pdu->mcs = sequence_cyclic_shift_2_harq_ack_bits[pucch->ack_payload & 0x3];  /* only harq with 2 bits */
-    }
-    else { /* SR + eventually ack are transmitted TS 36.213 9.2.5.1 UE procedure for multiplexing HARQ-ACK or CSI and SR */
-      if (pucch->sr_payload == 1) {                /* positive scheduling request */
-        if (O_ACK == 1)
-          pucch_pdu->mcs = sequence_cyclic_shift_1_harq_ack_bit_positive_sr[pucch->ack_payload & 0x1];   /* positive SR and harq of 1 bit */
-        else if (O_ACK == 2)
-          pucch_pdu->mcs = sequence_cyclic_shift_2_harq_ack_bits_positive_sr[pucch->ack_payload & 0x3];  /* positive SR and harq with 2 bits */
-        else
-          pucch_pdu->mcs = 0;  /* only positive SR */
-      }
-    }
+    if (O_ACK == 1)
+      pucch_pdu->mcs = sequence_cyclic_shift_1_harq_ack_bit[pucch->ack_payload & 0x1];   /* only harq of 1 bit */
+    else
+      pucch_pdu->mcs = sequence_cyclic_shift_2_harq_ack_bits[pucch->ack_payload & 0x3];  /* only harq with 2 bits */
 
-    // TODO verify if SR can be transmitted in this mode
-    pucch_pdu->payload = (pucch->sr_payload << O_ACK) | pucch->ack_payload;
+    pucch_pdu->payload = pucch->ack_payload;
   }
   else if (pucch->pucch_resource != NULL) {
 
diff --git a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
index aee3e369cc15910009adf5201565fcd651547d7a..cac302980c620c01a51dfe25570cfd5e05d9f8fd 100644
--- a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
+++ b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
@@ -94,7 +94,6 @@ void fill_scheduled_response(nr_scheduled_response_t *scheduled_response,
                              int cc_id,
                              frame_t frame,
                              int slot,
-                             int thread_id,
                              void *phy_data){
 
   scheduled_response->dl_config  = dl_config;
@@ -104,7 +103,6 @@ void fill_scheduled_response(nr_scheduled_response_t *scheduled_response,
   scheduled_response->CC_id      = cc_id;
   scheduled_response->frame      = frame;
   scheduled_response->slot       = slot;
-  scheduled_response->thread_id  = thread_id;
   scheduled_response->phy_data   = phy_data;
 
 }
@@ -1126,7 +1124,7 @@ NR_UE_L2_STATE_t nr_ue_scheduler(nr_downlink_indication_t *dl_info, nr_uplink_in
       nr_schedule_csi_for_im(mac, rx_frame, rx_slot);
       dcireq.dl_config_req = mac->dl_config_request;
 
-      fill_scheduled_response(&scheduled_response, &dcireq.dl_config_req, NULL, NULL, mod_id, cc_id, rx_frame, rx_slot, dl_info->thread_id, dl_info->phy_data);
+      fill_scheduled_response(&scheduled_response, &dcireq.dl_config_req, NULL, NULL, mod_id, cc_id, rx_frame, rx_slot, dl_info->phy_data);
       if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL) {
         LOG_D(NR_MAC,"1# scheduled_response transmitted, %d, %d\n", rx_frame, rx_slot);
         mac->if_module->scheduled_response(&scheduled_response);
@@ -1144,7 +1142,7 @@ NR_UE_L2_STATE_t nr_ue_scheduler(nr_downlink_indication_t *dl_info, nr_uplink_in
         fill_dci_search_candidates(mac->ra.ss, rel15, -1 , -1);
         dl_config->number_pdus = 1;
         LOG_D(MAC,"mac->cg %p: Calling fill_scheduled_response rnti %x, type0_pdcch, num_pdus %d\n",mac->cg,rel15->rnti,dl_config->number_pdus);
-        fill_scheduled_response(&scheduled_response, dl_config, NULL, NULL, mod_id, cc_id, rx_frame, rx_slot, dl_info->thread_id, dl_info->phy_data);
+        fill_scheduled_response(&scheduled_response, dl_config, NULL, NULL, mod_id, cc_id, rx_frame, rx_slot, dl_info->phy_data);
         if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL)
           mac->if_module->scheduled_response(&scheduled_response);
       }
@@ -1253,7 +1251,7 @@ NR_UE_L2_STATE_t nr_ue_scheduler(nr_downlink_indication_t *dl_info, nr_uplink_in
         }
         pthread_mutex_unlock(&ul_config->mutex_ul_config); // avoid double lock
 
-        fill_scheduled_response(&scheduled_response, NULL, ul_config, &tx_req, mod_id, cc_id, frame_tx, slot_tx, ul_info->thread_id, NULL);
+        fill_scheduled_response(&scheduled_response, NULL, ul_config, &tx_req, mod_id, cc_id, frame_tx, slot_tx, NULL);
         if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL){
           LOG_D(NR_MAC,"3# scheduled_response transmitted,%d, %d\n", frame_tx, slot_tx);
           mac->if_module->scheduled_response(&scheduled_response);
@@ -2318,7 +2316,7 @@ void build_ssb_to_ro_map(NR_UE_MAC_INST_t *mac) {
 }
 
 
-void nr_ue_pucch_scheduler(module_id_t module_idP, frame_t frameP, int slotP, int thread_id) {
+void nr_ue_pucch_scheduler(module_id_t module_idP, frame_t frameP, int slotP, void *phy_data) {
 
   NR_UE_MAC_INST_t *mac = get_mac_inst(module_idP);
   int O_SR = 0;
@@ -2410,7 +2408,7 @@ void nr_ue_pucch_scheduler(module_id_t module_idP, frame_t frameP, int slotP, in
                           O_SR, O_ACK, O_CSI);
     LOG_D(NR_MAC, "Configuring pucch, is_common = %d\n", pucch.is_common);
     nr_scheduled_response_t scheduled_response;
-    fill_scheduled_response(&scheduled_response, NULL, ul_config, NULL, module_idP, 0 /*TBR fix*/, frameP, slotP, thread_id, NULL);
+    fill_scheduled_response(&scheduled_response, NULL, ul_config, NULL, module_idP, 0 /*TBR fix*/, frameP, slotP, phy_data);
     if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL)
       mac->if_module->scheduled_response(&scheduled_response);
   }
@@ -2642,7 +2640,7 @@ void nr_schedule_csirs_reception(NR_UE_MAC_INST_t *mac, int frame, int slot) {
 // PRACH formats 9, 10, 11 are corresponding to dual PRACH format configurations A1/B1, A2/B2, A3/B3.
 // - todo:
 // - Partial configuration is actually already stored in (fapi_nr_prach_config_t) &mac->phy_config.config_req->prach_config
-void nr_ue_prach_scheduler(module_id_t module_idP, frame_t frameP, sub_frame_t slotP, int thread_id) {
+void nr_ue_prach_scheduler(module_id_t module_idP, frame_t frameP, sub_frame_t slotP) {
 
   uint16_t format, format0, format1, ncs;
   int is_nr_prach_slot;
@@ -2781,7 +2779,7 @@ void nr_ue_prach_scheduler(module_id_t module_idP, frame_t frameP, sub_frame_t s
         }
       } // if format1
 
-      fill_scheduled_response(&scheduled_response, NULL, ul_config, NULL, module_idP, 0 /*TBR fix*/, frameP, slotP, thread_id, NULL);
+      fill_scheduled_response(&scheduled_response, NULL, ul_config, NULL, module_idP, 0 /*TBR fix*/, frameP, slotP, NULL);
       if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL)
         mac->if_module->scheduled_response(&scheduled_response);
     } // is_nr_prach_slot
@@ -2845,7 +2843,7 @@ void nr_ue_sib1_scheduler(module_id_t module_idP,
     slot_s = mac->type0_PDCCH_CSS_config.n_c;
   }
   LOG_D(MAC,"Calling fill_scheduled_response, type0_pdcch, num_pdus %d\n",dl_config->number_pdus);
-  fill_scheduled_response(&scheduled_response, dl_config, NULL, NULL, module_idP, cc_id, frame_s, slot_s, 0, phy_data); // TODO fix thread_id, for now assumed 0
+  fill_scheduled_response(&scheduled_response, dl_config, NULL, NULL, module_idP, cc_id, frame_s, slot_s, phy_data);
 
   if (dl_config->number_pdus) {
     if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL)
diff --git a/openair2/LAYER2/NR_MAC_gNB/config.c b/openair2/LAYER2/NR_MAC_gNB/config.c
index 85badb7e0a60a485a6e3660db0386babc8ed797e..4334525637e9d56b62782129f4a8d02ad951d4f7 100644
--- a/openair2/LAYER2/NR_MAC_gNB/config.c
+++ b/openair2/LAYER2/NR_MAC_gNB/config.c
@@ -512,9 +512,8 @@ int rrc_mac_config_req_gNB(module_id_t Mod_idP,
     int nr_dl_slots = n;
     int nr_ulstart_slot = 0;
     if (tdd) {
-      nr_ulstart_slot = tdd->nrofDownlinkSlots + (tdd->nrofUplinkSymbols == 0);
       nr_dl_slots = tdd->nrofDownlinkSlots + (tdd->nrofDownlinkSymbols != 0);
-      nr_ulstart_slot = tdd->nrofDownlinkSlots + (tdd->nrofUplinkSymbols == 0);
+      nr_ulstart_slot = tdd->nrofDownlinkSlots;
       nr_slots_period /= get_nb_periods_per_frame(tdd->dl_UL_TransmissionPeriodicity);
     }
     else
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler.c
index b51e16d0a80f2a4b61c4058c5341b3d1b533bbe0..5cf4bc295c16092d4bcdda6a111a7e429ce65d35 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler.c
@@ -73,7 +73,7 @@ void clear_nr_nfapi_information(gNB_MAC_INST * gNB,
   nfapi_nr_dl_tti_request_t    *DL_req = &gNB->DL_req[0];
   nfapi_nr_dl_tti_pdcch_pdu_rel15_t **pdcch = (nfapi_nr_dl_tti_pdcch_pdu_rel15_t **)gNB->pdcch_pdu_idx[CC_idP];
   nfapi_nr_ul_tti_request_t    *future_ul_tti_req =
-      &gNB->UL_tti_req_ahead[CC_idP][(slotP + num_slots - 1) % num_slots];
+    &gNB->UL_tti_req_ahead[CC_idP][(slotP + num_slots - 1) % num_slots];
   nfapi_nr_ul_dci_request_t    *UL_dci_req = &gNB->UL_dci_req[0];
   nfapi_nr_tx_data_request_t   *TX_req = &gNB->TX_req[0];
 
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
index 2d8dd574e3d6d8851e5cc4d752334612fc9cae0d..7155a633ee7484c31d8ea722da1e07779990538f 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
@@ -636,7 +636,7 @@ void nr_initiate_ra_proc(module_id_t module_idP,
 
     ra->RA_rnti = ra_rnti;
     ra->preamble_index = preamble_index;
-    ra->beam_id = beam_index;
+    ra->beam_id = cc->ssb_index[beam_index];
 
     LOG_I(NR_MAC,
           "[gNB %d][RAPROC] CC_id %d Frame %d Activating Msg2 generation in frame %d, slot %d using RA rnti %x SSB, new rnti %04x "
@@ -647,7 +647,7 @@ void nr_initiate_ra_proc(module_id_t module_idP,
           ra->Msg2_frame,
           ra->Msg2_slot,
           ra->RA_rnti,
-    ra->rnti,
+          ra->rnti,
           cc->ssb_index[beam_index],
           i);
 
@@ -830,6 +830,7 @@ void nr_generate_Msg3_retransmission(module_id_t module_idP, int CC_id, frame_t
                  scc,
                  pusch_pdu,
                  &uldci_payload,
+                 NULL,
                  ra->Msg3_tda_id,
                  ra->msg3_TPC,
                  &ra->UL_BWP);
@@ -885,7 +886,7 @@ void nr_get_Msg3alloc(module_id_t module_id,
   int StartSymbolIndex = 0;
   int NrOfSymbols = 0;
   int startSymbolAndLength = 0;
-  int temp_slot = 0;
+  int abs_slot = 0;
   ra->Msg3_tda_id = 16; // initialization to a value above limit
 
   NR_PUSCH_TimeDomainResourceAllocationList_t *pusch_TimeDomainAllocationList = ul_bwp->tdaList;
@@ -894,8 +895,11 @@ void nr_get_Msg3alloc(module_id_t module_id,
   const int n_slots_frame = nr_slots_per_frame[mu];
   uint8_t k2 = 0;
   if (frame_type == TDD) {
-    int nb_periods_per_frame = get_nb_periods_per_frame(scc->tdd_UL_DL_ConfigurationCommon->pattern1.dl_UL_TransmissionPeriodicity);
-    int nb_slots_per_period = ((1<<mu)*10)/nb_periods_per_frame;
+    int msg3_slot = tdd->nrofDownlinkSlots; // first uplink slot
+    if (tdd->nrofUplinkSymbols > 0 && tdd->nrofUplinkSymbols < 3)
+      msg3_slot++; // we can't trasmit msg3 in mixed slot if there are less than 3 symbols
+    const int nb_periods_per_frame = get_nb_periods_per_frame(tdd->dl_UL_TransmissionPeriodicity);
+    const int nb_slots_per_period = ((1<<mu)*10)/nb_periods_per_frame;
     for (int i=0; i<pusch_TimeDomainAllocationList->list.count; i++) {
       startSymbolAndLength = pusch_TimeDomainAllocationList->list.array[i]->startSymbolAndLength;
       SLIV2SL(startSymbolAndLength, &StartSymbolIndex, &NrOfSymbols);
@@ -903,19 +907,16 @@ void nr_get_Msg3alloc(module_id_t module_id,
       int start_symbol_index,nr_of_symbols;
       SLIV2SL(pusch_TimeDomainAllocationList->list.array[i]->startSymbolAndLength, &start_symbol_index, &nr_of_symbols);
       LOG_D(NR_MAC,"Checking Msg3 TDA %d : k2 %d, sliv %d,S %d L %d\n",i,(int)k2,(int)pusch_TimeDomainAllocationList->list.array[i]->startSymbolAndLength,start_symbol_index,nr_of_symbols);
-      // we want to transmit in the uplink symbols of mixed slot AND assuming Msg2 was in the mixed slot
-      if ((k2 + DELTA[mu])%nb_slots_per_period == 0) {
-        temp_slot = current_slot + k2 + DELTA[mu]; // msg3 slot according to 8.3 in 38.213
-        ra->Msg3_slot = temp_slot%nr_slots_per_frame[mu];
-
-        if (is_xlsch_in_slot(RC.nrmac[module_id]->ulsch_slot_bitmap[ra->Msg3_slot / 64], ra->Msg3_slot) &&
-            nr_of_symbols<=scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols&&
-            start_symbol_index>=(14-scc->tdd_UL_DL_ConfigurationCommon->pattern1.nrofUplinkSymbols)) {
-          ra->Msg3_tda_id = i;
-          ra->msg3_startsymb = StartSymbolIndex;
-          ra->msg3_nrsymb = NrOfSymbols;
-          break;
-        }
+      // we want to transmit in the uplink symbols of mixed slot or the first uplink slot
+      abs_slot = (current_slot + k2 + DELTA[mu]);
+      int temp_slot = abs_slot % nr_slots_per_frame[mu]; // msg3 slot according to 8.3 in 38.213
+      if ((temp_slot % nb_slots_per_period) == msg3_slot &&
+          is_xlsch_in_slot(RC.nrmac[module_id]->ulsch_slot_bitmap[temp_slot / 64], temp_slot)) {
+        ra->Msg3_tda_id = i;
+        ra->msg3_startsymb = StartSymbolIndex;
+        ra->msg3_nrsymb = NrOfSymbols;
+        ra->Msg3_slot = temp_slot;
+        break;
       }
     }
     AssertFatal(ra->Msg3_tda_id < 16, "Couldn't find an appropriate TD allocation for Msg3\n");
@@ -923,16 +924,16 @@ void nr_get_Msg3alloc(module_id_t module_id,
   else {
     ra->Msg3_tda_id = 0;
     k2 = *pusch_TimeDomainAllocationList->list.array[0]->k2;
-    temp_slot = current_slot + k2 + DELTA[mu]; // msg3 slot according to 8.3 in 38.213
-    ra->Msg3_slot = temp_slot%nr_slots_per_frame[mu];
+    abs_slot = current_slot + k2 + DELTA[mu]; // msg3 slot according to 8.3 in 38.213
+    ra->Msg3_slot = abs_slot % nr_slots_per_frame[mu];
   }
 
   AssertFatal(ra->Msg3_tda_id<16,"Unable to find Msg3 time domain allocation in list\n");
 
-  if (n_slots_frame > temp_slot)
+  if (n_slots_frame > abs_slot)
     ra->Msg3_frame = current_frame;
   else
-    ra->Msg3_frame = (current_frame + (temp_slot / n_slots_frame)) % 1024;
+    ra->Msg3_frame = (current_frame + (abs_slot / n_slots_frame)) % 1024;
 
   // beam association for FR2
   if (*scc->downlinkConfigCommon->frequencyInfoDL->frequencyBandList.list.array[0] >= 257) {
@@ -1040,10 +1041,10 @@ void fill_msg3_pusch_pdu(nfapi_nr_pusch_pdu_t *pusch_pdu,
   int num_dmrs_symb = 0;
   for(int i = start_symbol_index; i < start_symbol_index+nr_of_symbols; i++)
     num_dmrs_symb += (pusch_pdu->ul_dmrs_symb_pos >> i) & 1;
-
   int TBS = 0;
   while(TBS<7) {  // TBS for msg3 is 7 bytes (except for RRCResumeRequest1 currently not implemented)
     mcsindex++;
+    AssertFatal(mcsindex<28,"Exceeding MCS limit for Msg3\n");
     int R = nr_get_code_rate_ul(mcsindex,pusch_pdu->mcs_table);
     pusch_pdu->target_code_rate = R;
     pusch_pdu->qam_mod_order = nr_get_Qm_ul(mcsindex,pusch_pdu->mcs_table);
@@ -1059,7 +1060,6 @@ void fill_msg3_pusch_pdu(nfapi_nr_pusch_pdu_t *pusch_pdu,
     pusch_pdu->mcs_index = mcsindex;
     pusch_pdu->pusch_data.tb_size = TBS;
     pusch_pdu->maintenance_parms_v3.ldpcBaseGraph = get_BG(TBS<<3,R);
-
   }
 }
 
@@ -1145,8 +1145,6 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
     NR_ServingCellConfigCommon_t *scc = cc->ServingCellConfigCommon;
     NR_SearchSpace_t *ss = ra->ra_ss;
 
-    NR_PDSCH_TimeDomainResourceAllocationList_t *pdsch_TimeDomainAllocationList = dl_bwp->tdaList;
-
     long BWPStart = 0;
     long BWPSize = 0;
     NR_Type0_PDCCH_CSS_config_t *type0_PDCCH_CSS_config = NULL;
@@ -1161,10 +1159,8 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
 
     // Calculate number of symbols
     int time_domain_assignment = get_dl_tda(nr_mac, scc, slotP);
-    int startSymbolIndex, nrOfSymbols;
-    const int startSymbolAndLength = pdsch_TimeDomainAllocationList->list.array[time_domain_assignment]->startSymbolAndLength;
-    SLIV2SL(startSymbolAndLength, &startSymbolIndex, &nrOfSymbols);
-    AssertFatal(startSymbolIndex >= 0, "StartSymbolIndex is negative\n");
+    NR_tda_info_t tda_info = nr_get_pdsch_tda_info(dl_bwp,
+                                                   time_domain_assignment);
 
     NR_ControlResourceSet_t *coreset = ra->coreset;
 
@@ -1172,7 +1168,7 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
 
     uint16_t *vrb_map = cc[CC_id].vrb_map;
     for (int i = 0; (i < rbSize) && (rbStart <= (BWPSize - rbSize)); i++) {
-      if (vrb_map[BWPStart + rbStart + i]&SL_to_bitmap(startSymbolIndex, nrOfSymbols)) {
+      if (vrb_map[BWPStart + rbStart + i]&SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols)) {
         rbStart += i;
         i = 0;
       }
@@ -1212,9 +1208,7 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
       return;
     }
 
-    LOG_D(NR_MAC,"Msg2 startSymbolIndex.nrOfSymbols %d.%d\n",startSymbolIndex,nrOfSymbols);
-
-    int mappingtype = pdsch_TimeDomainAllocationList->list.array[time_domain_assignment]->mappingType;
+    LOG_D(NR_MAC,"Msg2 startSymbolIndex.nrOfSymbols %d.%d\n",tda_info.startSymbolIndex,tda_info.nrOfSymbols);
 
     // look up the PDCCH PDU for this CC, BWP, and CORESET. If it does not exist, create it. This is especially
     // important if we have multiple RAs, and the DLSCH has to reuse them, so we need to mark them
@@ -1244,16 +1238,12 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
     // SCF222: PDU index incremented for each PDSCH PDU sent in TX control message. This is used to associate control
     // information to data and is reset every slot.
     const int pduindex = nr_mac->pdu_index[CC_id]++;
-
-    NR_PDSCH_Config_t *pdsch_Config = dl_bwp->pdsch_Config;
     uint8_t mcsTableIdx = dl_bwp->mcsTableIdx;
 
-    int dmrsConfigType=0;
-    if (pdsch_Config &&
-        pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA &&
-        pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->choice.setup &&
-        pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->choice.setup->dmrs_Type)
-      dmrsConfigType = 1;
+   NR_pdsch_dmrs_t dmrs_parms = get_dl_dmrs_params(scc,
+                                                   dl_bwp,
+                                                   &tda_info,
+                                                   1);
 
     pdsch_pdu_rel15->pduBitmap = 0;
     pdsch_pdu_rel15->rnti = ra->RA_rnti;
@@ -1269,37 +1259,22 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
     pdsch_pdu_rel15->nrOfLayers = 1;
     pdsch_pdu_rel15->transmissionScheme = 0;
     pdsch_pdu_rel15->refPoint = 0;
-    pdsch_pdu_rel15->dmrsConfigType = dmrsConfigType;
+    pdsch_pdu_rel15->dmrsConfigType = dmrs_parms.dmrsConfigType;
     pdsch_pdu_rel15->dlDmrsScramblingId = *scc->physCellId;
     pdsch_pdu_rel15->SCID = 0;
-    pdsch_pdu_rel15->numDmrsCdmGrpsNoData = nrOfSymbols <= 2 ? 1 : 2;
+    pdsch_pdu_rel15->numDmrsCdmGrpsNoData = dmrs_parms.numDmrsCdmGrpsNoData;
     pdsch_pdu_rel15->dmrsPorts = 1;
     pdsch_pdu_rel15->resourceAlloc = 1;
     pdsch_pdu_rel15->rbStart = rbStart;
     pdsch_pdu_rel15->rbSize = rbSize;
     pdsch_pdu_rel15->VRBtoPRBMapping = 0;
-    pdsch_pdu_rel15->StartSymbolIndex = startSymbolIndex;
-    pdsch_pdu_rel15->NrOfSymbols = nrOfSymbols;
-    pdsch_pdu_rel15->dlDmrsSymbPos = fill_dmrs_mask(pdsch_Config,
-                                                    nr_mac->common_channels->ServingCellConfigCommon->dmrs_TypeA_Position,
-                                                    nrOfSymbols,
-                                                    startSymbolIndex,
-                                                    mappingtype,
-                                                    1);
+    pdsch_pdu_rel15->StartSymbolIndex = tda_info.startSymbolIndex;
+    pdsch_pdu_rel15->NrOfSymbols = tda_info.nrOfSymbols;
+    pdsch_pdu_rel15->dlDmrsSymbPos = dmrs_parms.dl_dmrs_symb_pos;
 
     uint8_t tb_scaling = 0;
     int R, Qm;
-    uint8_t N_PRB_DMRS;
     uint32_t TBS=0;
-    if (dmrsConfigType == NFAPI_NR_DMRS_TYPE1) {
-      // if no data in dmrs cdm group is 1 only even REs have no data
-      // if no data in dmrs cdm group is 2 both odd and even REs have no data
-      N_PRB_DMRS = pdsch_pdu_rel15->numDmrsCdmGrpsNoData*6;
-    }
-    else {
-      N_PRB_DMRS = pdsch_pdu_rel15->numDmrsCdmGrpsNoData*4;
-    }
-    uint16_t dmrs_length = get_num_dmrs(pdsch_pdu_rel15->dlDmrsSymbPos);
 
     while(TBS<9) {  // min TBS for RAR is 9 bytes
       mcsIndex++;
@@ -1308,8 +1283,8 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
       TBS = nr_compute_tbs(Qm,
                            R,
                            rbSize,
-                           nrOfSymbols,
-                           N_PRB_DMRS*dmrs_length,
+                           tda_info.nrOfSymbols,
+                           dmrs_parms.N_PRB_DMRS*dmrs_parms.N_DMRS_SLOT,
                            0, // overhead
                            tb_scaling,  // tb scaling
 		           1)>>3;  // layers
@@ -1414,7 +1389,7 @@ void nr_generate_Msg2(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
                        CCEIndex,
                        aggregation_level);
     for (int rb = 0; rb < rbSize; rb++) {
-      vrb_map[BWPStart + rb + rbStart] |= SL_to_bitmap(startSymbolIndex, nrOfSymbols);
+      vrb_map[BWPStart + rb + rbStart] |= SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols);
     }
 
     ra->state = WAIT_Msg3;
@@ -1428,17 +1403,14 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
   NR_COMMON_channels_t *cc = &nr_mac->common_channels[CC_id];
   NR_UE_DL_BWP_t *dl_bwp = &ra->DL_BWP;
 
-  if (ra->Msg4_frame == frameP && ra->Msg4_slot == slotP ) {
-
-    uint8_t time_domain_assignment = 0;
-    uint8_t mcsIndex = 0;
+  // if it is a DL slot, if the RA is in MSG4 state
+  if (is_xlsch_in_slot(nr_mac->dlsch_slot_bitmap[slotP / 64], slotP) &&
+      ra->state == Msg4) {
 
     NR_ServingCellConfigCommon_t *scc = cc->ServingCellConfigCommon;
     NR_SearchSpace_t *ss = ra->ra_ss;
 
     NR_ControlResourceSet_t *coreset = ra->coreset;
-    NR_PDSCH_TimeDomainResourceAllocationList_t *pdsch_TimeDomainAllocationList = dl_bwp->tdaList;
-
     AssertFatal(coreset!=NULL,"Coreset cannot be null for RA-Msg4\n");
 
     rnti_t tc_rnti = ra->rnti;
@@ -1449,11 +1421,12 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
 
     NR_UE_info_t * UE = find_nr_UE(&nr_mac->UE_info, ra->rnti);
     if (!UE) {
-        LOG_E(NR_MAC,"want to generate Msg4, but rnti %04x not in the table\n", ra->rnti);
-        return;
+      LOG_E(NR_MAC,"want to generate Msg4, but rnti %04x not in the table\n", ra->rnti);
+      return;
     }
 
-    LOG_I(NR_MAC,"Generate msg4, rnti: %04x\n", ra->rnti);
+    if(UE->enc_rval.encoded <= 0) return; // need to wait until RRCSetup is encoded
+
     NR_UE_sched_ctrl_t *sched_ctrl = &UE->UE_sched_ctrl;
 
     long BWPStart = 0;
@@ -1468,26 +1441,6 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
       BWPSize = type0_PDCCH_CSS_config->num_rbs;
     }
 
-    /* get the PID of a HARQ process awaiting retrnasmission, or -1 otherwise */
-    int current_harq_pid = sched_ctrl->retrans_dl_harq.head;
-    // HARQ management
-    if (current_harq_pid < 0) {
-      AssertFatal(sched_ctrl->available_dl_harq.head >= 0,
-                  "UE context not initialized: no HARQ processes found\n");
-      current_harq_pid = sched_ctrl->available_dl_harq.head;
-      remove_front_nr_list(&sched_ctrl->available_dl_harq);
-    }
-    NR_UE_harq_t *harq = &sched_ctrl->harq_processes[current_harq_pid];
-    DevAssert(!harq->is_waiting);
-    add_tail_nr_list(&sched_ctrl->feedback_dl_harq, current_harq_pid);
-    harq->is_waiting = true;
-    ra->harq_pid = current_harq_pid;
-
-    // Remove UE associated to TC-RNTI
-    if(harq->round==0 && ra->msg3_dcch_dtch) {
-      mac_remove_nr_ue(nr_mac, tc_rnti);
-    }
-
     // get CCEindex, needed also for PUCCH and then later for PDCCH
     uint8_t aggregation_level;
     uint8_t nr_of_candidates;
@@ -1517,7 +1470,33 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
     LOG_D(NR_MAC,"[RAPROC] Msg4 r_pucch %d (CCEIndex %d, nb_of_candidates %d, delta_PRI %d)\n", r_pucch, CCEIndex, nr_of_candidates, delta_PRI);
 
     int alloc = nr_acknack_scheduling(module_idP, UE, frameP, slotP, r_pucch, 1);
-    AssertFatal(alloc>=0,"Couldn't find a pucch allocation for ack nack (msg4)\n");
+    if (alloc<0) {
+      LOG_D(NR_MAC,"Couldn't find a pucch allocation for ack nack (msg4) in frame %d slot %d\n",frameP,slotP);
+      return;
+    }
+
+    LOG_I(NR_MAC,"Generate msg4, rnti: %04x\n", ra->rnti);
+
+    /* get the PID of a HARQ process awaiting retrnasmission, or -1 otherwise */
+    int current_harq_pid = sched_ctrl->retrans_dl_harq.head;
+    // HARQ management
+    if (current_harq_pid < 0) {
+      AssertFatal(sched_ctrl->available_dl_harq.head >= 0,
+                  "UE context not initialized: no HARQ processes found\n");
+      current_harq_pid = sched_ctrl->available_dl_harq.head;
+      remove_front_nr_list(&sched_ctrl->available_dl_harq);
+    }
+    NR_UE_harq_t *harq = &sched_ctrl->harq_processes[current_harq_pid];
+    DevAssert(!harq->is_waiting);
+    add_tail_nr_list(&sched_ctrl->feedback_dl_harq, current_harq_pid);
+    harq->is_waiting = true;
+    ra->harq_pid = current_harq_pid;
+
+    // Remove UE associated to TC-RNTI
+    if(harq->round==0 && ra->msg3_dcch_dtch) {
+      mac_remove_nr_ue(nr_mac, tc_rnti);
+    }
+
     NR_sched_pucch_t *pucch = &sched_ctrl->sched_pucch[alloc];
     harq->feedback_slot = pucch->ul_slot;
     harq->feedback_frame = pucch->frame;
@@ -1554,51 +1533,15 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
       }
     }
 
-    // Calculate number of symbols
-    int startSymbolIndex, nrOfSymbols;
-    const int startSymbolAndLength = pdsch_TimeDomainAllocationList->list.array[time_domain_assignment]->startSymbolAndLength;
-    SLIV2SL(startSymbolAndLength, &startSymbolIndex, &nrOfSymbols);
-    AssertFatal(startSymbolIndex >= 0, "StartSymbolIndex is negative\n");
-
-    int mappingtype = pdsch_TimeDomainAllocationList->list.array[time_domain_assignment]->mappingType;
-
-    uint16_t dlDmrsSymbPos = fill_dmrs_mask(NULL,
-                                            scc->dmrs_TypeA_Position,
-                                            nrOfSymbols,
-                                            startSymbolIndex,
-                                            mappingtype,
-                                            1);
-
-    uint16_t N_DMRS_SLOT = get_num_dmrs(dlDmrsSymbPos);
-
-    NR_PDSCH_Config_t *pdsch_Config = dl_bwp->pdsch_Config;
-    int dmrsConfigType=0;
-    if (pdsch_Config &&
-        pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA &&
-        pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->choice.setup &&
-        pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->choice.setup->dmrs_Type)
-      dmrsConfigType = 1;
-
-    nr_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData = 2;
-    if (nrOfSymbols == 2) {
-      nr_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData = 1;
-    }
-
-    AssertFatal(nr_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData == 1
-                || nr_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData == 2,
-                "nr_mac->schedCtrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData %d is not possible",
-                nr_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData);
-
-    uint8_t N_PRB_DMRS = 0;
-    if (dmrsConfigType==NFAPI_NR_DMRS_TYPE1) {
-      N_PRB_DMRS = nr_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData * 6;
-    }
-    else {
-      N_PRB_DMRS = nr_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData * 4;
-    }
+    uint8_t time_domain_assignment = get_dl_tda(nr_mac, scc, slotP);
+    NR_tda_info_t msg4_tda = nr_get_pdsch_tda_info(dl_bwp, time_domain_assignment);
+    NR_pdsch_dmrs_t dmrs_info = get_dl_dmrs_params(scc,
+                                                   dl_bwp,
+                                                   &msg4_tda,
+                                                   1);
 
     uint8_t mcsTableIdx = dl_bwp->mcsTableIdx;
-
+    uint8_t mcsIndex = 0;
     int rbStart = 0;
     int rbSize = 0;
     uint8_t tb_scaling = 0;
@@ -1609,17 +1552,17 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
         rbSize++;
       else
         mcsIndex++;
-      LOG_D(NR_MAC,"Calling nr_compute_tbs with N_PRB_DMRS %d, N_DMRS_SLOT %d\n",N_PRB_DMRS,N_DMRS_SLOT);
+      LOG_D(NR_MAC,"Calling nr_compute_tbs with N_PRB_DMRS %d, N_DMRS_SLOT %d\n",dmrs_info.N_PRB_DMRS,dmrs_info.N_DMRS_SLOT);
       harq->tb_size = nr_compute_tbs(nr_get_Qm_dl(mcsIndex, mcsTableIdx),
                                      nr_get_code_rate_dl(mcsIndex, mcsTableIdx),
-                                     rbSize, nrOfSymbols, N_PRB_DMRS * N_DMRS_SLOT, 0, tb_scaling,1) >> 3;
+                                     rbSize, msg4_tda.nrOfSymbols, dmrs_info.N_PRB_DMRS * dmrs_info.N_DMRS_SLOT, 0, tb_scaling,1) >> 3;
     } while (harq->tb_size < ra->mac_pdu_length && mcsIndex<=28);
 
     AssertFatal(harq->tb_size >= ra->mac_pdu_length,"Cannot allocate Msg4\n");
 
     int i = 0;
     while ((i < rbSize) && (rbStart + rbSize <= BWPSize)) {
-      if (vrb_map[BWPStart + rbStart + i]&SL_to_bitmap(startSymbolIndex, nrOfSymbols)) {
+      if (vrb_map[BWPStart + rbStart + i]&SL_to_bitmap(msg4_tda.startSymbolIndex, msg4_tda.nrOfSymbols)) {
         rbStart += i+1;
         i = 0;
       } else {
@@ -1687,18 +1630,18 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
     pdsch_pdu_rel15->nrOfLayers = 1;
     pdsch_pdu_rel15->transmissionScheme = 0;
     pdsch_pdu_rel15->refPoint = 0;
-    pdsch_pdu_rel15->dmrsConfigType = dmrsConfigType;
+    pdsch_pdu_rel15->dmrsConfigType = dmrs_info.dmrsConfigType;
     pdsch_pdu_rel15->dlDmrsScramblingId = *scc->physCellId;
     pdsch_pdu_rel15->SCID = 0;
-    pdsch_pdu_rel15->numDmrsCdmGrpsNoData = nrOfSymbols <= 2 ? 1 : 2;
+    pdsch_pdu_rel15->numDmrsCdmGrpsNoData = dmrs_info.numDmrsCdmGrpsNoData;
     pdsch_pdu_rel15->dmrsPorts = 1;
     pdsch_pdu_rel15->resourceAlloc = 1;
     pdsch_pdu_rel15->rbStart = rbStart;
     pdsch_pdu_rel15->rbSize = rbSize;
     pdsch_pdu_rel15->VRBtoPRBMapping = 0;
-    pdsch_pdu_rel15->StartSymbolIndex = startSymbolIndex;
-    pdsch_pdu_rel15->NrOfSymbols = nrOfSymbols;
-    pdsch_pdu_rel15->dlDmrsSymbPos = dlDmrsSymbPos;
+    pdsch_pdu_rel15->StartSymbolIndex = msg4_tda.startSymbolIndex;
+    pdsch_pdu_rel15->NrOfSymbols = msg4_tda.nrOfSymbols;
+    pdsch_pdu_rel15->dlDmrsSymbPos = dmrs_info.dl_dmrs_symb_pos;
 
     int x_Overhead = 0;
     nr_get_tbs_dl(&dl_tti_pdsch_pdu->pdsch_pdu, x_Overhead, pdsch_pdu_rel15->numDmrsCdmGrpsNoData, tb_scaling);
@@ -1811,7 +1754,7 @@ void nr_generate_Msg4(module_id_t module_idP, int CC_id, frame_t frameP, sub_fra
                        CCEIndex,
                        aggregation_level);
     for (int rb = 0; rb < pdsch_pdu_rel15->rbSize; rb++) {
-      vrb_map[BWPStart + rb + pdsch_pdu_rel15->rbStart] |= SL_to_bitmap(startSymbolIndex, nrOfSymbols);
+      vrb_map[BWPStart + rb + pdsch_pdu_rel15->rbStart] |= SL_to_bitmap(msg4_tda.startSymbolIndex, msg4_tda.nrOfSymbols);
     }
 
     LOG_D(NR_MAC,"BWPSize: %i\n", pdcch_pdu_rel15->BWPSize);
@@ -1894,8 +1837,6 @@ void nr_check_Msg4_Ack(module_id_t module_id, int CC_id, frame_t frame, sub_fram
       }
     } else {
       LOG_I(NR_MAC, "(UE %04x) Received Nack of RA-Msg4. Preparing retransmission!\n", ra->rnti);
-      ra->Msg4_frame = (frame + 1) % 1024;
-      ra->Msg4_slot = 1;
       ra->state = Msg4;
     }
   }
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_bch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_bch.c
index 3895ffe42cff0cdca6619c6f383b57159cdb35a0..0438070da3987fa798c89eeb7ae0aa64f17f44eb 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_bch.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_bch.c
@@ -281,9 +281,8 @@ uint32_t schedule_control_sib1(module_id_t module_id,
                                int CC_id,
                                NR_Type0_PDCCH_CSS_config_t *type0_PDCCH_CSS_config,
                                int time_domain_allocation,
-                               int startSymbolIndex,
-                               int nrOfSymbols,
-                               uint16_t dlDmrsSymbPos,
+                               NR_pdsch_dmrs_t *dmrs_parms,
+                               NR_tda_info_t *tda_info,
                                uint8_t candidate_idx,
                                uint16_t num_total_bytes) {
 
@@ -309,8 +308,11 @@ uint32_t schedule_control_sib1(module_id_t module_id,
                                                                  type0_PDCCH_CSS_config);
   }
 
-  gNB_mac->sched_ctrlCommon->pdsch_semi_static.time_domain_allocation = time_domain_allocation;
-  gNB_mac->sched_ctrlCommon->sched_pdsch.mcs = 0; // starting from mcs 0
+  NR_sched_pdsch_t *pdsch = &gNB_mac->sched_ctrlCommon->sched_pdsch;
+  pdsch->time_domain_allocation = time_domain_allocation;
+  pdsch->dmrs_parms = *dmrs_parms;
+  pdsch->tda_info = *tda_info;
+  pdsch->mcs = 0; // starting from mcs 0
   gNB_mac->sched_ctrlCommon->num_total_bytes = num_total_bytes;
 
   uint8_t nr_of_candidates;
@@ -333,58 +335,49 @@ uint32_t schedule_control_sib1(module_id_t module_id,
   const uint16_t bwpSize = type0_PDCCH_CSS_config->num_rbs;
   int rbStart = type0_PDCCH_CSS_config->cset_start_rb;
 
-  // TODO: There are exceptions to this in table 5.1.2.1.1-4,5 (Default time domain allocation tables B, C)
-  int mappingtype = (startSymbolIndex <= 3)? typeA: typeB;
-
-  if (nrOfSymbols == 2) {
-    gNB_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData = 1;
-  } else {
-    gNB_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData = 2;
-  }
-
   // Calculate number of PRB_DMRS
-  uint8_t N_PRB_DMRS = gNB_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData * 6;
-  uint16_t dmrs_length = get_num_dmrs(dlDmrsSymbPos);
-  LOG_D(MAC,"dlDmrsSymbPos %x\n",dlDmrsSymbPos);
+  uint8_t N_PRB_DMRS = pdsch->dmrs_parms.N_PRB_DMRS;
+  uint16_t dmrs_length = pdsch->dmrs_parms.N_DMRS_SLOT;
+  LOG_D(MAC,"dlDmrsSymbPos %x\n",pdsch->dmrs_parms.dl_dmrs_symb_pos);
   int mcsTableIdx = 0;
   int rbSize = 0;
   uint32_t TBS = 0;
   do {
-    if(rbSize < bwpSize && !(vrb_map[rbStart + rbSize]&SL_to_bitmap(startSymbolIndex, nrOfSymbols)))
+    if(rbSize < bwpSize && !(vrb_map[rbStart + rbSize]&SL_to_bitmap(tda_info->startSymbolIndex, tda_info->nrOfSymbols)))
       rbSize++;
     else{
-      if (gNB_mac->sched_ctrlCommon->sched_pdsch.mcs<10)
-        gNB_mac->sched_ctrlCommon->sched_pdsch.mcs++;
+      if (pdsch->mcs<10)
+        pdsch->mcs++;
       else
         break;
     }
-    TBS = nr_compute_tbs(nr_get_Qm_dl(gNB_mac->sched_ctrlCommon->sched_pdsch.mcs, mcsTableIdx),
-                         nr_get_code_rate_dl(gNB_mac->sched_ctrlCommon->sched_pdsch.mcs, mcsTableIdx),
-                         rbSize, nrOfSymbols, N_PRB_DMRS * dmrs_length,0, 0,1) >> 3;
+    TBS = nr_compute_tbs(nr_get_Qm_dl(pdsch->mcs, mcsTableIdx),
+                         nr_get_code_rate_dl(pdsch->mcs, mcsTableIdx),
+                         rbSize, tda_info->nrOfSymbols, N_PRB_DMRS * dmrs_length,0, 0,1) >> 3;
   } while (TBS < gNB_mac->sched_ctrlCommon->num_total_bytes);
 
   AssertFatal(TBS>=gNB_mac->sched_ctrlCommon->num_total_bytes,"Couldn't allocate enough resources for %d bytes in SIB1 PDSCH\n",
               gNB_mac->sched_ctrlCommon->num_total_bytes);
 
-  gNB_mac->sched_ctrlCommon->sched_pdsch.rbSize = rbSize;
-  gNB_mac->sched_ctrlCommon->sched_pdsch.rbStart = 0;
+  pdsch->rbSize = rbSize;
+  pdsch->rbStart = 0;
 
-  LOG_D(NR_MAC,"mcs = %i\n", gNB_mac->sched_ctrlCommon->sched_pdsch.mcs);
-  LOG_D(NR_MAC,"startSymbolIndex = %i\n", startSymbolIndex);
-  LOG_D(NR_MAC,"nrOfSymbols = %i\n", nrOfSymbols);
-  LOG_D(NR_MAC, "rbSize = %i\n", gNB_mac->sched_ctrlCommon->sched_pdsch.rbSize);
+  LOG_D(NR_MAC,"mcs = %i\n", pdsch->mcs);
+  LOG_D(NR_MAC,"startSymbolIndex = %i\n", tda_info->startSymbolIndex);
+  LOG_D(NR_MAC,"nrOfSymbols = %i\n", tda_info->nrOfSymbols);
+  LOG_D(NR_MAC, "rbSize = %i\n", pdsch->rbSize);
   LOG_D(NR_MAC,"TBS = %i\n", TBS);
   LOG_D(NR_MAC,"dmrs_length %d\n",dmrs_length);
   LOG_D(NR_MAC,"N_PRB_DMRS = %d\n",N_PRB_DMRS);
-  LOG_D(NR_MAC,"mappingtype = %d\n", mappingtype);
+  LOG_D(NR_MAC,"mappingtype = %d\n", tda_info->mapping_type);
   // Mark the corresponding RBs as used
   fill_pdcch_vrb_map(gNB_mac,
                      CC_id,
                      &gNB_mac->sched_ctrlCommon->sched_pdcch,
                      gNB_mac->sched_ctrlCommon->cce_index,
                      gNB_mac->sched_ctrlCommon->aggregation_level);
-  for (int rb = 0; rb < gNB_mac->sched_ctrlCommon->sched_pdsch.rbSize; rb++) {
-    vrb_map[rb + rbStart] |= SL_to_bitmap(startSymbolIndex, nrOfSymbols);
+  for (int rb = 0; rb < pdsch->rbSize; rb++) {
+    vrb_map[rb + rbStart] |= SL_to_bitmap(tda_info->startSymbolIndex, tda_info->nrOfSymbols);
   }
   return TBS;
 }
@@ -395,13 +388,13 @@ void nr_fill_nfapi_dl_sib1_pdu(int Mod_idP,
                                NR_Type0_PDCCH_CSS_config_t *type0_PDCCH_CSS_config,
                                uint32_t TBS,
                                int StartSymbolIndex,
-                               int NrOfSymbols,
-                               uint16_t dlDmrsSymbPos) {
+                               int NrOfSymbols) {
 
   gNB_MAC_INST *gNB_mac = RC.nrmac[Mod_idP];
   NR_COMMON_channels_t *cc = gNB_mac->common_channels;
   NR_ServingCellConfigCommon_t *scc = cc->ServingCellConfigCommon;
   int mcsTableIdx = 0;
+  NR_sched_pdsch_t *pdsch = &gNB_mac->sched_ctrlCommon->sched_pdsch;
   nfapi_nr_dl_tti_request_pdu_t *dl_tti_pdcch_pdu = &dl_req->dl_tti_pdu_list[dl_req->nPDUs];
   memset((void*)dl_tti_pdcch_pdu,0,sizeof(nfapi_nr_dl_tti_request_pdu_t));
   dl_tti_pdcch_pdu->PDUType = NFAPI_NR_DL_TTI_PDCCH_PDU_TYPE;
@@ -433,11 +426,11 @@ void nr_fill_nfapi_dl_sib1_pdu(int Mod_idP,
   pdsch_pdu_rel15->CyclicPrefix = 0;
 
   pdsch_pdu_rel15->NrOfCodewords = 1;
-  pdsch_pdu_rel15->targetCodeRate[0] = nr_get_code_rate_dl(gNB_mac->sched_ctrlCommon->sched_pdsch.mcs, mcsTableIdx);
-  pdsch_pdu_rel15->qamModOrder[0] = nr_get_Qm_dl(gNB_mac->sched_ctrlCommon->sched_pdsch.mcs, mcsTableIdx);
-  pdsch_pdu_rel15->mcsIndex[0] = gNB_mac->sched_ctrlCommon->sched_pdsch.mcs;
+  pdsch_pdu_rel15->targetCodeRate[0] = nr_get_code_rate_dl(pdsch->mcs, mcsTableIdx);
+  pdsch_pdu_rel15->qamModOrder[0] = nr_get_Qm_dl(pdsch->mcs, mcsTableIdx);
+  pdsch_pdu_rel15->mcsIndex[0] = pdsch->mcs;
   pdsch_pdu_rel15->mcsTable[0] = mcsTableIdx;
-  pdsch_pdu_rel15->rvIndex[0] = 0;
+  pdsch_pdu_rel15->rvIndex[0] = nr_rv_round_map[0];
   pdsch_pdu_rel15->dataScramblingId = *scc->physCellId;
   pdsch_pdu_rel15->nrOfLayers = 1;
   pdsch_pdu_rel15->transmissionScheme = 0;
@@ -445,16 +438,16 @@ void nr_fill_nfapi_dl_sib1_pdu(int Mod_idP,
   pdsch_pdu_rel15->dmrsConfigType = 0;
   pdsch_pdu_rel15->dlDmrsScramblingId = *scc->physCellId;
   pdsch_pdu_rel15->SCID = 0;
-  pdsch_pdu_rel15->numDmrsCdmGrpsNoData = gNB_mac->sched_ctrlCommon->pdsch_semi_static.numDmrsCdmGrpsNoData;
+  pdsch_pdu_rel15->numDmrsCdmGrpsNoData = pdsch->dmrs_parms.numDmrsCdmGrpsNoData;
   pdsch_pdu_rel15->dmrsPorts = 1;
   pdsch_pdu_rel15->resourceAlloc = 1;
-  pdsch_pdu_rel15->rbStart = gNB_mac->sched_ctrlCommon->sched_pdsch.rbStart;
-  pdsch_pdu_rel15->rbSize = gNB_mac->sched_ctrlCommon->sched_pdsch.rbSize;
+  pdsch_pdu_rel15->rbStart = pdsch->rbStart;
+  pdsch_pdu_rel15->rbSize = pdsch->rbSize;
   pdsch_pdu_rel15->VRBtoPRBMapping = 0;
   pdsch_pdu_rel15->TBSize[0] = TBS;
   pdsch_pdu_rel15->StartSymbolIndex = StartSymbolIndex;
   pdsch_pdu_rel15->NrOfSymbols = NrOfSymbols;
-  pdsch_pdu_rel15->dlDmrsSymbPos = dlDmrsSymbPos;
+  pdsch_pdu_rel15->dlDmrsSymbPos = pdsch->dmrs_parms.dl_dmrs_symb_pos;
   LOG_D(NR_MAC,"sib1:bwpStart %d, bwpSize %d\n",pdsch_pdu_rel15->BWPStart,pdsch_pdu_rel15->BWPSize);
   LOG_D(NR_MAC,"sib1:rbStart %d, rbSize %d\n",pdsch_pdu_rel15->rbStart,pdsch_pdu_rel15->rbSize);
   LOG_D(NR_MAC,"sib1:dlDmrsSymbPos = 0x%x\n", pdsch_pdu_rel15->dlDmrsSymbPos);
@@ -485,8 +478,8 @@ void nr_fill_nfapi_dl_sib1_pdu(int Mod_idP,
   dci_payload.frequency_domain_assignment.val = PRBalloc_to_locationandbandwidth0(
       pdsch_pdu_rel15->rbSize, pdsch_pdu_rel15->rbStart, type0_PDCCH_CSS_config->num_rbs);
 
-  dci_payload.time_domain_assignment.val = gNB_mac->sched_ctrlCommon->pdsch_semi_static.time_domain_allocation;
-  dci_payload.mcs = gNB_mac->sched_ctrlCommon->sched_pdsch.mcs;
+  dci_payload.time_domain_assignment.val = gNB_mac->sched_ctrlCommon->sched_pdsch.time_domain_allocation;
+  dci_payload.mcs = pdsch->mcs;
   dci_payload.rv = pdsch_pdu_rel15->rvIndex[0];
   dci_payload.harq_pid = 0;
   dci_payload.ndi = 0;
@@ -587,22 +580,29 @@ void schedule_nr_sib1(module_id_t module_idP, frame_t frameP, sub_frame_t slotP)
 
       AssertFatal((startSymbolIndex+nrOfSymbols)<14,"SIB1 TDA %d would cause overlap with CSI-RS. Please select a different SIB1 TDA.\n",time_domain_allocation);
 
-      int mappingtype = is_typeA? typeA: typeB;
-      uint16_t dlDmrsSymbPos = fill_dmrs_mask(NULL, gNB_mac->common_channels->ServingCellConfigCommon->dmrs_TypeA_Position, nrOfSymbols, startSymbolIndex, mappingtype, 1);
+      NR_tda_info_t tda_info = {
+        .mapping_type = is_typeA ? typeA : typeB,
+        .startSymbolIndex = startSymbolIndex,
+        .nrOfSymbols = nrOfSymbols
+      };
+
+      NR_pdsch_dmrs_t dmrs_parms = get_dl_dmrs_params(scc,
+                                                      NULL,
+                                                      &tda_info,
+                                                      1);
 
       // Configure sched_ctrlCommon for SIB1
       uint32_t TBS = schedule_control_sib1(module_idP, CC_id,
                                            type0_PDCCH_CSS_config,
                                            time_domain_allocation,
-                                           startSymbolIndex,
-                                           nrOfSymbols,
-                                           dlDmrsSymbPos,
+                                           &dmrs_parms,
+                                           &tda_info,
                                            candidate_idx,
                                            sib1_sdu_length);
 
       nfapi_nr_dl_tti_request_body_t *dl_req = &gNB_mac->DL_req[CC_id].dl_tti_request_body;
       int pdu_index = gNB_mac->pdu_index[0]++;
-      nr_fill_nfapi_dl_sib1_pdu(module_idP, dl_req, pdu_index, type0_PDCCH_CSS_config, TBS, startSymbolIndex, nrOfSymbols, dlDmrsSymbPos);
+      nr_fill_nfapi_dl_sib1_pdu(module_idP, dl_req, pdu_index, type0_PDCCH_CSS_config, TBS, startSymbolIndex, nrOfSymbols);
 
       const int ntx_req = gNB_mac->TX_req[CC_id].Number_of_PDUs;
       nfapi_nr_pdu_t *tx_req = &gNB_mac->TX_req[CC_id].pdu_list[ntx_req];
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
index 597477c3d346a37d57a53401a73ad8d8de138583..3215ab176458e1670e0b5c0bd9d2feb7b0044943 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
@@ -67,7 +67,7 @@ const int get_dl_tda(const gNB_MAC_INST *nrmac, const NR_ServingCellConfigCommon
     if ((slot%nr_slots_period) == tdd->nrofDownlinkSlots)
       return 2;
   }
-  return 0; // if FDD or not mixed slot in TDD, for now use default TDA (TODO handle CSI-RS slots)
+  return 0; // if FDD or not mixed slot in TDD, for now use default TDA
 }
 
 // Compute and write all MAC CEs and subheaders, and return number of written
@@ -391,17 +391,13 @@ bool allocate_dl_retransmission(module_id_t module_id,
   NR_UE_DL_BWP_t *dl_bwp = &UE->current_DL_BWP;
   NR_UE_UL_BWP_t *ul_bwp = &UE->current_UL_BWP;
   NR_sched_pdsch_t *retInfo = &sched_ctrl->harq_processes[current_harq_pid].sched_pdsch;
-  NR_pdsch_semi_static_t *ps = &sched_ctrl->pdsch_semi_static;
-
-  //TODO remove this and handle retransmission with old nrOfLayers
-  //     once ps structure is removed
-  if(ps->nrOfLayers < retInfo->nrOfLayers) {
-    LOG_W(NR_MAC,"Cannot schedule retransmission. RI changed from %d to %d\n",
-          retInfo->nrOfLayers, ps->nrOfLayers);
-    abort_nr_dl_harq(UE, current_harq_pid);
-    remove_front_nr_list(&sched_ctrl->retrans_dl_harq);
-    return false;
-  }
+  NR_sched_pdsch_t *curInfo = &sched_ctrl->sched_pdsch;
+
+  // If the RI changed between current rtx and a previous transmission
+  // we need to verify if it is not decreased
+  // othwise it wouldn't be possible to transmit the same TBS
+  int layers = (curInfo->nrOfLayers < retInfo->nrOfLayers) ? curInfo->nrOfLayers : retInfo->nrOfLayers;
+  int pm_index = (curInfo->nrOfLayers < retInfo->nrOfLayers) ? curInfo->pm_index : retInfo->pm_index;
 
   const int coresetid = sched_ctrl->coreset->controlResourceSetId;
   const uint16_t bwpSize = coresetid == 0 ? RC.nrmac[module_id]->cset0_bwp_size : dl_bwp->BWPSize;
@@ -411,13 +407,16 @@ bool allocate_dl_retransmission(module_id_t module_id,
   const int tda = get_dl_tda(RC.nrmac[module_id], scc, slot);
   AssertFatal(tda>=0,"Unable to find PDSCH time domain allocation in list\n");
 
-  if (tda == retInfo->time_domain_allocation) {
+  if (tda == retInfo->time_domain_allocation &&
+      layers == retInfo->nrOfLayers) {
+
+    NR_tda_info_t *tda_info = &retInfo->tda_info;
     /* Check that there are enough resources for retransmission */
     while (rbSize < retInfo->rbSize) {
       rbStart += rbSize; /* last iteration rbSize was not enough, skip it */
       rbSize = 0;
 
-      const int slbitmap = SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+      const int slbitmap = SL_to_bitmap(tda_info->startSymbolIndex, tda_info->nrOfSymbols);
       while (rbStart < bwpSize && (rballoc_mask[rbStart] & slbitmap) != slbitmap)
         rbStart++;
 
@@ -431,30 +430,16 @@ bool allocate_dl_retransmission(module_id_t module_id,
              rbSize < retInfo->rbSize)
         rbSize++;
     }
-
-    /* check whether we need to switch the TDA allocation since the last
-     * (re-)transmission */
-    if (ps->time_domain_allocation != tda) {
-      nr_set_pdsch_semi_static(dl_bwp,
-                               scc,
-                               tda,
-                               ps->nrOfLayers,
-                               sched_ctrl,
-                               ps);
-    }
   } else {
     /* the retransmission will use a different time domain allocation, check
      * that we have enough resources */
-    NR_pdsch_semi_static_t temp_ps = *ps;
+    NR_tda_info_t temp_tda = nr_get_pdsch_tda_info(dl_bwp, tda);
+    NR_pdsch_dmrs_t temp_dmrs = get_dl_dmrs_params(scc,
+                                                   dl_bwp,
+                                                   &temp_tda,
+                                                   layers);
 
-    nr_set_pdsch_semi_static(dl_bwp,
-                             scc,
-                             tda,
-                             ps->nrOfLayers,
-                             sched_ctrl,
-                             &temp_ps);
-
-    const uint16_t slbitmap = SL_to_bitmap(temp_ps.startSymbolIndex, temp_ps.nrOfSymbols);
+    const uint16_t slbitmap = SL_to_bitmap(temp_tda.startSymbolIndex, temp_tda.nrOfSymbols);
     while (rbStart < bwpSize && (rballoc_mask[rbStart] & slbitmap) != slbitmap)
       rbStart++;
 
@@ -465,9 +450,9 @@ bool allocate_dl_retransmission(module_id_t module_id,
     uint16_t new_rbSize;
     bool success = nr_find_nb_rb(retInfo->Qm,
                                  retInfo->R,
-                                 temp_ps.nrOfLayers,
-                                 temp_ps.nrOfSymbols,
-                                 temp_ps.N_PRB_DMRS * temp_ps.N_DMRS_SLOT,
+                                 layers,
+                                 temp_tda.nrOfSymbols,
+                                 temp_dmrs.N_PRB_DMRS * temp_dmrs.N_DMRS_SLOT,
                                  retInfo->tb_size,
                                  1, /* minimum of 1RB: need to find exact TBS, don't preclude any number */
                                  rbSize,
@@ -484,7 +469,10 @@ bool allocate_dl_retransmission(module_id_t module_id,
     retInfo->tb_size = new_tbs;
     retInfo->rbSize = new_rbSize;
     retInfo->time_domain_allocation = tda;
-    sched_ctrl->pdsch_semi_static = temp_ps;
+    retInfo->nrOfLayers = layers;
+    retInfo->pm_index = pm_index;
+    retInfo->dmrs_parms = temp_dmrs;
+    retInfo->tda_info = temp_tda;
   }
 
   /* Find a free CCE */
@@ -544,7 +532,7 @@ bool allocate_dl_retransmission(module_id_t module_id,
   *n_rb_sched -= sched_ctrl->sched_pdsch.rbSize;
 
   for (int rb = 0; rb < sched_ctrl->sched_pdsch.rbSize; rb++)
-    rballoc_mask[rb + sched_ctrl->sched_pdsch.rbStart] ^= SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+    rballoc_mask[rb + sched_ctrl->sched_pdsch.rbStart] ^= SL_to_bitmap(retInfo->tda_info.startSymbolIndex, retInfo->tda_info.nrOfSymbols);
 
   return true;
 }
@@ -585,10 +573,8 @@ void pf_dl(module_id_t module_id,
 
     const NR_mac_dir_stats_t *stats = &UE->mac_stats.dl;
     NR_sched_pdsch_t *sched_pdsch = &sched_ctrl->sched_pdsch;
-    NR_pdsch_semi_static_t *ps = &sched_ctrl->pdsch_semi_static;
     /* get the PID of a HARQ process awaiting retrnasmission, or -1 otherwise */
     sched_pdsch->dl_harq_pid = sched_ctrl->retrans_dl_harq.head;
-    UE->layers = ps->nrOfLayers; // initialization of layers to the previous value in the structure
     /* Calculate Throughput */
     const float a = 0.0005f; // corresponds to 200ms window
     const uint32_t b = UE->mac_stats.dl.current_bytes;
@@ -633,7 +619,10 @@ void pf_dl(module_id_t module_id,
         sched_pdsch->mcs = max_mcs;
       else
         sched_pdsch->mcs = get_mcs_from_bler(bo, stats, &sched_ctrl->dl_bler_stats, max_mcs, frame);
-      UE->layers = set_dl_nrOfLayers(sched_ctrl);
+      sched_pdsch->nrOfLayers = get_dl_nrOfLayers(sched_ctrl, current_BWP->dci_format);
+      sched_pdsch->pm_index = get_pm_index(UE,
+                                           sched_pdsch->nrOfLayers,
+                                           mac->xp_pdsch_antenna_ports);
       const uint8_t Qm = nr_get_Qm_dl(sched_pdsch->mcs, current_BWP->mcsTableIdx);
       const uint16_t R = nr_get_code_rate_dl(sched_pdsch->mcs, current_BWP->mcsTableIdx);
       uint32_t tbs = nr_compute_tbs(Qm,
@@ -643,7 +632,7 @@ void pf_dl(module_id_t module_id,
                                     0, /* N_PRB_DMRS * N_DMRS_SLOT */
                                     0 /* N_PRB_oh, 0 for initialBWP */,
                                     0 /* tb_scaling */,
-                                    UE->layers) >> 3;
+                                    sched_pdsch->nrOfLayers) >> 3;
       float coeff_ue = (float) tbs / UE->dl_thr_ue;
       LOG_D(NR_MAC,"UE %04x b %d, thr_ue %f, tbs %d, coeff_ue %f\n",
             UE->rnti, b, UE->dl_thr_ue, tbs, coeff_ue);
@@ -732,21 +721,14 @@ void pf_dl(module_id_t module_id,
                        sched_ctrl->aggregation_level);
 
     /* MCS has been set above */
-    const int tda = get_dl_tda(RC.nrmac[module_id], scc, slot);
-    AssertFatal(tda>=0,"Unable to find PDSCH time domain allocation in list\n");
     NR_sched_pdsch_t *sched_pdsch = &sched_ctrl->sched_pdsch;
-    NR_pdsch_semi_static_t *ps = &sched_ctrl->pdsch_semi_static;
-
-    if (ps->nrOfLayers != iterator->UE->layers || ps->time_domain_allocation != tda ) {
-      nr_set_pdsch_semi_static(dl_bwp,
-                               scc,
-                               tda,
-                               iterator->UE->layers,
-                               sched_ctrl,
-                               ps);
-    }
+    sched_pdsch->time_domain_allocation = get_dl_tda(RC.nrmac[module_id], scc, slot);
+    AssertFatal(sched_pdsch->time_domain_allocation>=0,"Unable to find PDSCH time domain allocation in list\n");
+
+    sched_pdsch->tda_info = nr_get_pdsch_tda_info(dl_bwp, sched_pdsch->time_domain_allocation);
+    NR_tda_info_t *tda_info = &sched_pdsch->tda_info;
 
-    const uint16_t slbitmap = SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+    const uint16_t slbitmap = SL_to_bitmap(tda_info->startSymbolIndex, tda_info->nrOfSymbols);
 
     // Freq-demain allocation
     while (rbStart < bwpSize && (rballoc_mask[rbStart] & slbitmap) != slbitmap)
@@ -757,6 +739,10 @@ void pf_dl(module_id_t module_id,
     while (rbStart + max_rbSize < bwpSize && (rballoc_mask[rbStart + max_rbSize] & slbitmap) == slbitmap)
       max_rbSize++;
 
+    sched_pdsch->dmrs_parms = get_dl_dmrs_params(scc,
+                                                 dl_bwp,
+                                                 tda_info,
+                                                 sched_pdsch->nrOfLayers);
     sched_pdsch->Qm = nr_get_Qm_dl(sched_pdsch->mcs, dl_bwp->mcsTableIdx);
     sched_pdsch->R = nr_get_code_rate_dl(sched_pdsch->mcs, dl_bwp->mcsTableIdx);
     sched_pdsch->pucch_allocation = alloc;
@@ -770,9 +756,9 @@ void pf_dl(module_id_t module_id,
     //const int oh = 3 * sched_ctrl->dl_pdus_total + 2 * (frame == (sched_ctrl->ta_frame + 10) % 1024);
     nr_find_nb_rb(sched_pdsch->Qm,
                   sched_pdsch->R,
-                  ps->nrOfLayers,
-                  ps->nrOfSymbols,
-                  ps->N_PRB_DMRS * ps->N_DMRS_SLOT,
+                  sched_pdsch->nrOfLayers,
+                  tda_info->nrOfSymbols,
+                  sched_pdsch->dmrs_parms.N_PRB_DMRS * sched_pdsch->dmrs_parms.N_DMRS_SLOT,
                   sched_ctrl->num_total_bytes + oh,
                   min_rbSize,
                   max_rbSize,
@@ -912,12 +898,9 @@ void nr_schedule_ue_spec(module_id_t module_id,
       continue;
 
     const rnti_t rnti = UE->rnti;
-    /* pre-computed PDSCH values that only change if time domain
-     * allocation/DMRS parameters change. Updated in the preprocessor through
-     * nr_set_pdsch_semi_static() */
-    NR_pdsch_semi_static_t *ps = &sched_ctrl->pdsch_semi_static;
+
     /* POST processing */
-    const uint8_t nrOfLayers = ps->nrOfLayers;
+    const uint8_t nrOfLayers = sched_pdsch->nrOfLayers;
     const uint16_t R = sched_pdsch->R;
     const uint8_t Qm = sched_pdsch->Qm;
     const uint32_t TBS = sched_pdsch->tb_size;
@@ -941,6 +924,8 @@ void nr_schedule_ue_spec(module_id_t module_id,
         remove_nr_list(&sched_ctrl->retrans_dl_harq, current_harq_pid);
     }
 
+    NR_tda_info_t *tda_info = &sched_pdsch->tda_info;
+    NR_pdsch_dmrs_t *dmrs_parms = &sched_pdsch->dmrs_parms;
     NR_UE_harq_t *harq = &sched_ctrl->harq_processes[current_harq_pid];
     DevAssert(!harq->is_waiting);
     add_tail_nr_list(&sched_ctrl->feedback_dl_harq, current_harq_pid);
@@ -957,9 +942,9 @@ void nr_schedule_ue_spec(module_id_t module_id,
           sched_ctrl->aggregation_level,
           sched_pdsch->rbStart,
           sched_pdsch->rbSize,
-          ps->startSymbolIndex,
-          ps->nrOfSymbols,
-          ps->dl_dmrs_symb_pos,
+          tda_info->startSymbolIndex,
+          tda_info->nrOfSymbols,
+          dmrs_parms->dl_dmrs_symb_pos,
           sched_pdsch->mcs,
           nrOfLayers,
           TBS,
@@ -1033,11 +1018,11 @@ void nr_schedule_ue_spec(module_id_t module_id,
     pdsch_pdu->transmissionScheme = 0;
     pdsch_pdu->refPoint = 0; // Point A
     // DMRS
-    pdsch_pdu->dlDmrsSymbPos = ps->dl_dmrs_symb_pos;
-    pdsch_pdu->dmrsConfigType = ps->dmrsConfigType;
+    pdsch_pdu->dlDmrsSymbPos = dmrs_parms->dl_dmrs_symb_pos;
+    pdsch_pdu->dmrsConfigType = dmrs_parms->dmrsConfigType;
     pdsch_pdu->dlDmrsScramblingId = *scc->physCellId;
     pdsch_pdu->SCID = 0;
-    pdsch_pdu->numDmrsCdmGrpsNoData = ps->numDmrsCdmGrpsNoData;
+    pdsch_pdu->numDmrsCdmGrpsNoData = dmrs_parms->numDmrsCdmGrpsNoData;
     pdsch_pdu->dmrsPorts = (1<<nrOfLayers)-1;  // FIXME with a better implementation
     // Pdsch Allocation in frequency domain
     pdsch_pdu->resourceAlloc = 1;
@@ -1045,20 +1030,11 @@ void nr_schedule_ue_spec(module_id_t module_id,
     pdsch_pdu->rbSize = sched_pdsch->rbSize;
     pdsch_pdu->VRBtoPRBMapping = 1; // non-interleaved, check if this is ok for initialBWP
     // Resource Allocation in time domain
-    pdsch_pdu->StartSymbolIndex = ps->startSymbolIndex;
-    pdsch_pdu->NrOfSymbols = ps->nrOfSymbols;
+    pdsch_pdu->StartSymbolIndex = tda_info->startSymbolIndex;
+    pdsch_pdu->NrOfSymbols = tda_info->nrOfSymbols;
     // Precoding
-    if (sched_ctrl->set_pmi) {
-      const int report_id = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.csi_report_id;
-      nr_csi_report_t *csi_report = &UE->csi_report_template[report_id];
-      pdsch_pdu->precodingAndBeamforming.prg_size = pdsch_pdu->rbSize;
-      pdsch_pdu->precodingAndBeamforming.prgs_list[0].pm_idx = set_pm_index(sched_ctrl,
-                                                                            nrOfLayers,
-                                                                            csi_report->N1,
-                                                                            csi_report->N2,
-                                                                            gNB_mac->xp_pdsch_antenna_ports,
-                                                                            csi_report->codebook_mode);
-    }
+    pdsch_pdu->precodingAndBeamforming.prg_size = pdsch_pdu->rbSize;
+    pdsch_pdu->precodingAndBeamforming.prgs_list[0].pm_idx = sched_pdsch->pm_index;
     // TBS_LBRM according to section 5.4.2.1 of 38.212
     // TODO: verify the case where pdsch_servingcellconfig is NULL, in which case
     //       in principle maxMIMO_layers should be given by the maximum number of layers
@@ -1130,7 +1106,7 @@ void nr_schedule_ue_spec(module_id_t module_id,
                                                                                     pdsch_pdu->rbStart,
                                                                                     pdsch_pdu->BWPSize);
     dci_payload.format_indicator = 1;
-    dci_payload.time_domain_assignment.val = ps->time_domain_allocation;
+    dci_payload.time_domain_assignment.val = sched_pdsch->time_domain_allocation;
     dci_payload.mcs = sched_pdsch->mcs;
     dci_payload.rv = pdsch_pdu->rvIndex[0];
     dci_payload.harq_pid = current_harq_pid;
@@ -1139,7 +1115,7 @@ void nr_schedule_ue_spec(module_id_t module_id,
     dci_payload.tpc = sched_ctrl->tpc1; // TPC for PUCCH: table 7.2.1-1 in 38.213
     dci_payload.pucch_resource_indicator = pucch->resource_indicator;
     dci_payload.pdsch_to_harq_feedback_timing_indicator.val = pucch->timing_indicator; // PDSCH to HARQ TI
-    dci_payload.antenna_ports.val = ps->dmrs_ports_id;
+    dci_payload.antenna_ports.val = dmrs_parms->dmrs_ports_id;
     dci_payload.dmrs_sequence_initialization.val = pdsch_pdu->SCID;
     LOG_D(NR_MAC,
           "%4d.%2d DCI type 1 payload: freq_alloc %d (%d,%d,%d), "
@@ -1319,9 +1295,7 @@ void nr_schedule_ue_spec(module_id_t module_id,
       harq->sched_pdsch = *sched_pdsch;
       /* save which time allocation has been used, to be used on
        * retransmissions */
-      harq->sched_pdsch.time_domain_allocation = ps->time_domain_allocation;
-      /* save nr of layers for retransmissions */
-      harq->sched_pdsch.nrOfLayers = ps->nrOfLayers;
+      harq->sched_pdsch.time_domain_allocation = sched_pdsch->time_domain_allocation;
 
       // ta command is sent, values are reset
       if (sched_ctrl->ta_apply) {
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
index e474315de780568549570a1eff7e5f53acaa908a..5136c7fc53d563e4ae14da873a9a0b133df3d68b 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
@@ -179,8 +179,6 @@ void nr_schedule_css_dlsch_phytest(module_id_t   module_idP,
   }
 }
 
-extern int getNrOfSymbols(NR_BWP_Downlink_t *bwp, int tda);
-extern uint8_t getN_PRB_DMRS(NR_BWP_Downlink_t *bwp, int numDmrsCdmGrpsNoData);
 uint32_t target_dl_mcs = 9;
 uint32_t target_dl_Nl = 1;
 uint32_t target_dl_bw = 50;
@@ -199,10 +197,9 @@ void nr_preprocessor_phytest(module_id_t module_id,
   const int CC_id = 0;
 
   const int tda = get_dl_tda(RC.nrmac[module_id], scc, slot);
-  NR_pdsch_semi_static_t *ps = &sched_ctrl->pdsch_semi_static;
-  ps->nrOfLayers = target_dl_Nl;
-  if (ps->time_domain_allocation != tda || ps->nrOfLayers != target_dl_Nl)
-    nr_set_pdsch_semi_static(dl_bwp, scc, tda, target_dl_Nl,sched_ctrl , ps);
+  NR_tda_info_t tda_info = nr_get_pdsch_tda_info(dl_bwp, tda);
+  sched_ctrl->sched_pdsch.tda_info = tda_info;
+  sched_ctrl->sched_pdsch.time_domain_allocation = tda;
 
   /* find largest unallocated chunk */
   const int bwpSize = dl_bwp->BWPSize;
@@ -217,12 +214,12 @@ void nr_preprocessor_phytest(module_id_t module_id,
   while (true) {
     /* advance to first free RB */
     while (rbStart < bwpSize &&
-           (vrb_map[rbStart + BWPStart]&SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols)))
+           (vrb_map[rbStart + BWPStart]&SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols)))
       rbStart++;
     rbSize = 1;
     /* iterate until we are at target_dl_bw or no available RBs */
     while (rbStart + rbSize < bwpSize &&
-           !(vrb_map[rbStart + rbSize + BWPStart]&SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols)) &&
+           !(vrb_map[rbStart + rbSize + BWPStart]&SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols)) &&
            rbSize < target_dl_bw)
       rbSize++;
     /* found target_dl_bw? */
@@ -308,18 +305,24 @@ void nr_preprocessor_phytest(module_id_t module_id,
   sched_pdsch->rbStart = rbStart;
   sched_pdsch->rbSize = rbSize;
 
+  sched_pdsch->dmrs_parms = get_dl_dmrs_params(scc,
+                                               dl_bwp,
+                                               &tda_info,
+                                               target_dl_Nl);
+
   sched_pdsch->mcs = target_dl_mcs;
+  sched_pdsch->nrOfLayers = target_dl_Nl;
   sched_pdsch->Qm = nr_get_Qm_dl(sched_pdsch->mcs, dl_bwp->mcsTableIdx);
   sched_pdsch->R = nr_get_code_rate_dl(sched_pdsch->mcs, dl_bwp->mcsTableIdx);
   sched_ctrl->dl_bler_stats.mcs = target_dl_mcs; /* for logging output */
   sched_pdsch->tb_size = nr_compute_tbs(sched_pdsch->Qm,
                                         sched_pdsch->R,
                                         sched_pdsch->rbSize,
-                                        ps->nrOfSymbols,
-                                        ps->N_PRB_DMRS * ps->N_DMRS_SLOT,
+                                        tda_info.nrOfSymbols,
+                                        sched_pdsch->dmrs_parms.N_PRB_DMRS * sched_pdsch->dmrs_parms.N_DMRS_SLOT,
                                         0 /* N_PRB_oh, 0 for initialBWP */,
                                         0 /* tb_scaling */,
-                                        ps->nrOfLayers)
+                                        sched_pdsch->nrOfLayers)
                          >> 3;
 
   /* get the PID of a HARQ process awaiting retransmission, or -1 otherwise */
@@ -327,7 +330,7 @@ void nr_preprocessor_phytest(module_id_t module_id,
 
   /* mark the corresponding RBs as used */
   for (int rb = 0; rb < sched_pdsch->rbSize; rb++)
-    vrb_map[rb + sched_pdsch->rbStart + BWPStart] = SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+    vrb_map[rb + sched_pdsch->rbStart + BWPStart] = SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols);
 
   if ((frame&127) == 0) LOG_D(MAC,"phytest: %d.%d DL mcs %d, DL rbStart %d, DL rbSize %d\n", frame, slot, sched_pdsch->mcs, rbStart,rbSize);
 }
@@ -378,14 +381,6 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
   if (!is_xlsch_in_slot(ulsch_slot_bitmap, sched_slot))
     return false;
 
-  /* we want to avoid a lengthy deduction of DMRS and other parameters in
-   * every TTI if we can save it, so check whether TDA, or
-   * num_dmrs_cdm_grps_no_data has changed and only then recompute */
-  NR_pusch_semi_static_t *ps = &sched_ctrl->pusch_semi_static;
-  if (ps->time_domain_allocation != tda
-      || ps->nrOfLayers != target_ul_Nl)
-    nr_set_pusch_semi_static(ul_bwp, scc, tda, target_ul_Nl,ps);
-
   uint16_t rbStart = 0;
   uint16_t rbSize;
 
@@ -397,10 +392,13 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
   else
     rbSize = target_ul_bw;
 
+  NR_tda_info_t tda_info = nr_get_pusch_tda_info(ul_bwp, tda);
+  sched_ctrl->sched_pusch.tda_info = tda_info;
+
   uint16_t *vrb_map_UL =
       &RC.nrmac[module_id]->common_channels[CC_id].vrb_map_UL[sched_slot * MAX_BWP_SIZE];
   for (int i = rbStart; i < rbStart + rbSize; ++i) {
-    if ((vrb_map_UL[i+BWPStart] & SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols)) != 0) {
+    if ((vrb_map_UL[i+BWPStart] & SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols)) != 0) {
       LOG_E(MAC,
             "%s(): %4d.%2d RB %d is already reserved, cannot schedule UE\n",
             __func__,
@@ -452,7 +450,7 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
   sched_pusch->ul_harq_pid = sched_ctrl->retrans_ul_harq.head;
 
   /* Calculate TBS from MCS */
-  ps->nrOfLayers = target_ul_Nl;
+  sched_pusch->nrOfLayers = target_ul_Nl;
   sched_pusch->R = nr_get_code_rate_ul(mcs, ul_bwp->mcs_table);
   sched_pusch->Qm = nr_get_Qm_ul(mcs, ul_bwp->mcs_table);
   if (ul_bwp->pusch_Config->tp_pi2BPSK
@@ -460,14 +458,21 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
     sched_pusch->R >>= 1;
     sched_pusch->Qm <<= 1;
   }
+
+  NR_pusch_dmrs_t dmrs = get_ul_dmrs_params(scc,
+                                            ul_bwp,
+                                            &tda_info,
+                                            sched_pusch->nrOfLayers);
+  sched_ctrl->sched_pusch.dmrs_info = dmrs;
+
   sched_pusch->tb_size = nr_compute_tbs(sched_pusch->Qm,
                                         sched_pusch->R,
                                         sched_pusch->rbSize,
-                                        ps->nrOfSymbols,
-                                        ps->N_PRB_DMRS * ps->num_dmrs_symb,
+                                        tda_info.nrOfSymbols,
+                                        dmrs.N_PRB_DMRS * dmrs.num_dmrs_symb,
                                         0, // nb_rb_oh
                                         0,
-                                        ps->nrOfLayers /* NrOfLayers */)
+                                        sched_pusch->nrOfLayers /* NrOfLayers */)
                          >> 3;
 
   /* mark the corresponding RBs as used */
@@ -478,6 +483,6 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
                      sched_ctrl->aggregation_level);
 
   for (int rb = rbStart; rb < rbStart + rbSize; rb++)
-    vrb_map_UL[rb+BWPStart] |= SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+    vrb_map_UL[rb+BWPStart] |= SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols);
   return true;
 }
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
index e991c2450a4da77468229d365d294ff40e1d0426..690bb2211b361ef7815053964320c7bde7672822 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
@@ -142,31 +142,39 @@ static inline uint8_t get_max_cces(uint8_t scs) {
   return (nr_max_number_of_cces_per_slot[scs]);
 }
 
-uint8_t set_dl_nrOfLayers(NR_UE_sched_ctrl_t *sched_ctrl) {
+uint8_t get_dl_nrOfLayers(const NR_UE_sched_ctrl_t *sched_ctrl,
+                          const nr_dci_format_t dci_format) {
 
   // TODO check this but it should be enough for now
-  // if there is not csi report RI is 0 from initialization
-  return (sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.ri + 1);
+  // if there is not csi report activated RI is 0 from initialization
+  if(dci_format == NR_DL_DCI_FORMAT_1_0)
+    return 1;
+  else
+    return (sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.ri + 1);
 
 }
 
-uint16_t set_pm_index(NR_UE_sched_ctrl_t *sched_ctrl,
+uint16_t get_pm_index(const NR_UE_info_t *UE,
                       int layers,
-                      int N1, int N2,
-                      int xp_pdsch_antenna_ports,
-                      int codebook_mode) {
+                      int xp_pdsch_antenna_ports) {
+
+  if (layers == 1) return 0;
+
+  const NR_UE_sched_ctrl_t *sched_ctrl = &UE->UE_sched_ctrl;
+  const int report_id = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.csi_report_id;
+  const nr_csi_report_t *csi_report = &UE->csi_report_template[report_id];
+  const int N1 = csi_report->N1;
+  const int N2 = csi_report->N2;
+  const int antenna_ports = (N1*N2)<<1;
 
-  int antenna_ports = (N1*N2)<<1;
   if (xp_pdsch_antenna_ports == 1 &&
       antenna_ports>1)
     return 0; //identity matrix (basic 5G configuration handled by PMI report is with XP antennas)
 
-  int x1 = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x1;
-  int x2 = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x2;
+  const int x1 = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x1;
+  const int x2 = sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x2;
   LOG_D(NR_MAC,"PMI report: x1 %d x2 %d\n",x1,x2);
 
-  sched_ctrl->set_pmi = false;
-
   if (antenna_ports == 2)
     return x2;
   else
@@ -214,41 +222,68 @@ uint8_t get_mcs_from_cqi(int mcs_table, int cqi_table, int cqi_idx)
   return 9;
 }
 
+NR_pdsch_dmrs_t get_dl_dmrs_params(const NR_ServingCellConfigCommon_t *scc,
+                                   const NR_UE_DL_BWP_t *dl_bwp,
+                                   const NR_tda_info_t *tda_info,
+                                   const int Layers) {
 
-void set_dl_dmrs_ports(NR_pdsch_semi_static_t *ps) {
-
-  //TODO first basic implementation of dmrs port selection
-  //     only vaild for a single codeword
-  //     for now it assumes a selection of Nl consecutive dmrs ports
-  //     and a single front loaded symbol
-  //     dmrs_ports_id is the index of Tables 7.3.1.2.2-1/2/3/4
-  //     number of front loaded symbols need to be consistent with maxLength
-  //     when a more complete implementation is done
+  NR_pdsch_dmrs_t dmrs = {0};
+  int frontloaded_symb = 1; // default value
+  nr_dci_format_t dci_format = dl_bwp ? dl_bwp->dci_format : NR_DL_DCI_FORMAT_1_0;
+  if (dci_format == NR_DL_DCI_FORMAT_1_0) {
+    dmrs.numDmrsCdmGrpsNoData = tda_info->nrOfSymbols == 2 ? 1 : 2;
+    dmrs.dmrs_ports_id = 0;
+  }
+  else {
+    //TODO first basic implementation of dmrs port selection
+    //     only vaild for a single codeword
+    //     for now it assumes a selection of Nl consecutive dmrs ports
+    //     and a single front loaded symbol
+    //     dmrs_ports_id is the index of Tables 7.3.1.2.2-1/2/3/4
+    //     number of front loaded symbols need to be consistent with maxLength
+    //     when a more complete implementation is done
+
+    switch (Layers) {
+      case 1:
+        dmrs.dmrs_ports_id = 0;
+        dmrs.numDmrsCdmGrpsNoData = 1;
+        frontloaded_symb = 1;
+        break;
+      case 2:
+        dmrs.dmrs_ports_id = 2;
+        dmrs.numDmrsCdmGrpsNoData = 1;
+        frontloaded_symb = 1;
+        break;
+      case 3:
+        dmrs.dmrs_ports_id = 9;
+        dmrs.numDmrsCdmGrpsNoData = 2;
+        frontloaded_symb = 1;
+        break;
+      case 4:
+        dmrs.dmrs_ports_id = 10;
+        dmrs.numDmrsCdmGrpsNoData = 2;
+        frontloaded_symb = 1;
+        break;
+      default:
+        AssertFatal(1==0,"Number of layers %d\n not supported or not valid\n",Layers);
+    }
+  }
 
-  switch (ps->nrOfLayers) {
-    case 1:
-      ps->dmrs_ports_id = 0;
-      ps->numDmrsCdmGrpsNoData = 1;
-      ps->frontloaded_symb = 1;
-      break;
-    case 2:
-      ps->dmrs_ports_id = 2;
-      ps->numDmrsCdmGrpsNoData = 1;
-      ps->frontloaded_symb = 1;
-      break;
-    case 3:
-      ps->dmrs_ports_id = 9;
-      ps->numDmrsCdmGrpsNoData = 2;
-      ps->frontloaded_symb = 1;
-      break;
-    case 4:
-      ps->dmrs_ports_id = 10;
-      ps->numDmrsCdmGrpsNoData = 2;
-      ps->frontloaded_symb = 1;
-      break;
-    default:
-      AssertFatal(1==0,"Number of layers %d\n not supported or not valid\n",ps->nrOfLayers);
+  NR_PDSCH_Config_t *pdsch_Config = dl_bwp ? dl_bwp->pdsch_Config : NULL;
+  if (pdsch_Config) {
+    if (tda_info->mapping_type == typeB)
+      dmrs.dmrsConfigType = pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeB->choice.setup->dmrs_Type != NULL;
+    else
+      dmrs.dmrsConfigType = pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->choice.setup->dmrs_Type != NULL;
   }
+  else
+    dmrs.dmrsConfigType = NFAPI_NR_DMRS_TYPE1;
+
+  dmrs.N_PRB_DMRS = dmrs.numDmrsCdmGrpsNoData * (dmrs.dmrsConfigType == NFAPI_NR_DMRS_TYPE1 ? 6 : 4);
+  dmrs.dl_dmrs_symb_pos = fill_dmrs_mask(pdsch_Config, scc->dmrs_TypeA_Position, tda_info->nrOfSymbols, tda_info->startSymbolIndex, tda_info->mapping_type, frontloaded_symb);
+  dmrs.N_DMRS_SLOT = get_num_dmrs(dmrs.dl_dmrs_symb_pos);
+  LOG_D(NR_MAC,"Filling dmrs info, ps->N_PRB_DMRS %d, ps->dl_dmrs_symb_pos %x, ps->N_DMRS_SLOT %d\n",dmrs.N_PRB_DMRS,dmrs.dl_dmrs_symb_pos,dmrs.N_DMRS_SLOT);
+  return dmrs;
 }
 
 NR_ControlResourceSet_t *get_coreset(gNB_MAC_INST *nrmac,
@@ -514,109 +549,70 @@ bool nr_find_nb_rb(uint16_t Qm,
   return *tbs >= bytes && *nb_rb <= nb_rb_max;
 }
 
-void nr_set_pdsch_semi_static(const NR_UE_DL_BWP_t *dl_bwp,
-                              const NR_ServingCellConfigCommon_t *scc,
-                              int tda,
-                              uint8_t layers,
-                              NR_UE_sched_ctrl_t *sched_ctrl,
-                              NR_pdsch_semi_static_t *ps)
-{
-  bool reset_dmrs = false;
+NR_tda_info_t nr_get_pdsch_tda_info(const NR_UE_DL_BWP_t *dl_bwp,
+                                    const int tda) {
 
-  NR_PDSCH_Config_t *pdsch_Config = dl_bwp->pdsch_Config;
-  LOG_D(NR_MAC,"tda %d, ps->time_domain_allocation %d,layers %d, ps->nrOfLayers %d, pdsch_config %p\n",tda,ps->time_domain_allocation,layers,ps->nrOfLayers,pdsch_Config);
-  reset_dmrs = true;
-  ps->time_domain_allocation = tda;
+  NR_tda_info_t tda_info = {0};
   NR_PDSCH_TimeDomainResourceAllocationList_t *tdaList = dl_bwp->tdaList;
   AssertFatal(tda < tdaList->list.count, "time_domain_allocation %d>=%d\n", tda, tdaList->list.count);
-  ps->mapping_type = tdaList->list.array[tda]->mappingType;
-  if (pdsch_Config) {
-    if (ps->mapping_type == NR_PDSCH_TimeDomainResourceAllocation__mappingType_typeB)
-      ps->dmrsConfigType = pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeB->choice.setup->dmrs_Type != NULL;
-    else
-      ps->dmrsConfigType = pdsch_Config->dmrs_DownlinkForPDSCH_MappingTypeA->choice.setup->dmrs_Type != NULL;
-  }
-  else
-    ps->dmrsConfigType = NFAPI_NR_DMRS_TYPE1;
+  tda_info.mapping_type = tdaList->list.array[tda]->mappingType;
   const int startSymbolAndLength = tdaList->list.array[tda]->startSymbolAndLength;
-  SLIV2SL(startSymbolAndLength, &ps->startSymbolIndex, &ps->nrOfSymbols);
-
-  if (dl_bwp->dci_format == NR_DL_DCI_FORMAT_1_0) {
-    if (ps->nrOfSymbols == 2)
-      ps->numDmrsCdmGrpsNoData = 1;
-    else
-      ps->numDmrsCdmGrpsNoData = 2;
-    ps->dmrs_ports_id = 0;
-    ps->frontloaded_symb = 1;
-    ps->nrOfLayers = 1;
-  }
-  else {
-    LOG_D(NR_MAC,"checking layers\n");
-    if (ps->nrOfLayers != layers || ps->numDmrsCdmGrpsNoData == 0) {
-      reset_dmrs = true;
-      ps->nrOfLayers = layers;
-      set_dl_dmrs_ports(ps);
-    }
-  }
-
-  ps->N_PRB_DMRS = ps->numDmrsCdmGrpsNoData * (ps->dmrsConfigType == NFAPI_NR_DMRS_TYPE1 ? 6 : 4);
-
-  if (reset_dmrs) {
-    ps->dl_dmrs_symb_pos = fill_dmrs_mask(pdsch_Config, scc ? scc->dmrs_TypeA_Position : 0, ps->nrOfSymbols, ps->startSymbolIndex, ps->mapping_type, ps->frontloaded_symb);
-    ps->N_DMRS_SLOT = get_num_dmrs(ps->dl_dmrs_symb_pos);
-  }
-  LOG_D(NR_MAC,"Filling dmrs info, ps->N_PRB_DMRS %d, ps->dl_dmrs_symb_pos %x, ps->N_DMRS_SLOT %d\n",ps->N_PRB_DMRS,ps->dl_dmrs_symb_pos,ps->N_DMRS_SLOT);
+  SLIV2SL(startSymbolAndLength, &tda_info.startSymbolIndex, &tda_info.nrOfSymbols);
+  return tda_info;
 }
 
-void nr_set_pusch_semi_static(const NR_UE_UL_BWP_t *ul_bwp,
-                              const NR_ServingCellConfigCommon_t *scc,
-                              int tda,
-                              uint8_t nrOfLayers,
-                              NR_pusch_semi_static_t *ps) {
+NR_tda_info_t nr_get_pusch_tda_info(const NR_UE_UL_BWP_t *ul_bwp,
+                                    const int tda) {
 
-  ps->time_domain_allocation = tda;
+  NR_tda_info_t tda_info = {0};
+  NR_PUSCH_TimeDomainResourceAllocationList_t *tdaList = ul_bwp->tdaList;
+  AssertFatal(tda < tdaList->list.count, "time_domain_allocation %d>=%d\n", tda, tdaList->list.count);
+  tda_info.mapping_type = tdaList->list.array[tda]->mappingType;
+  const int startSymbolAndLength = tdaList->list.array[tda]->startSymbolAndLength;
+  SLIV2SL(startSymbolAndLength, &tda_info.startSymbolIndex, &tda_info.nrOfSymbols);
+  return tda_info;
+}
 
-  const int startSymbolAndLength = ul_bwp->tdaList->list.array[tda]->startSymbolAndLength;
-  SLIV2SL(startSymbolAndLength,
-          &ps->startSymbolIndex,
-          &ps->nrOfSymbols);
+NR_pusch_dmrs_t get_ul_dmrs_params(const NR_ServingCellConfigCommon_t *scc,
+                                   const NR_UE_UL_BWP_t *ul_bwp,
+                                   const NR_tda_info_t *tda_info,
+                                   const int Layers) {
 
-  ps->nrOfLayers = nrOfLayers;
+  NR_pusch_dmrs_t dmrs = {0};
   // TODO setting of cdm groups with no data to be redone for MIMO
-  if (ul_bwp->transform_precoding || nrOfLayers<3)
-    ps->num_dmrs_cdm_grps_no_data = (ul_bwp->dci_format == NR_UL_DCI_FORMAT_0_1) ? 1 : (ps->nrOfSymbols == 2 ? 1 : 2);
+  if (ul_bwp->transform_precoding || Layers<3)
+    dmrs.num_dmrs_cdm_grps_no_data = ul_bwp->dci_format == NR_UL_DCI_FORMAT_0_1 || tda_info->nrOfSymbols == 2 ? 1 : 2;
   else
-    ps->num_dmrs_cdm_grps_no_data = 2;
-
-  /* DMRS calculations */
-  ps->mapping_type = ul_bwp->tdaList->list.array[tda]->mappingType;
-  ps->NR_DMRS_UplinkConfig = ul_bwp->pusch_Config ?
-    (ps->mapping_type == NR_PUSCH_TimeDomainResourceAllocation__mappingType_typeA ?
-     ul_bwp->pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA->choice.setup :
-     ul_bwp->pusch_Config->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup) : NULL;
-  ps->dmrs_config_type = ps->NR_DMRS_UplinkConfig ? ((ps->NR_DMRS_UplinkConfig->dmrs_Type == NULL ? 0 : 1)) : 0;
-  const pusch_dmrs_AdditionalPosition_t additional_pos =
-						     ps->NR_DMRS_UplinkConfig ? (ps->NR_DMRS_UplinkConfig->dmrs_AdditionalPosition == NULL
-										 ? 2
-										 : (*ps->NR_DMRS_UplinkConfig->dmrs_AdditionalPosition ==
-										    NR_DMRS_UplinkConfig__dmrs_AdditionalPosition_pos3
-										    ? 3
-										    : *ps->NR_DMRS_UplinkConfig->dmrs_AdditionalPosition)):2;
-  const pusch_maxLength_t pusch_maxLength =
-    ps->NR_DMRS_UplinkConfig ? (ps->NR_DMRS_UplinkConfig->maxLength == NULL ? 1 : 2) : 1;
-  ps->ul_dmrs_symb_pos = get_l_prime(ps->nrOfSymbols,
-                                            ps->mapping_type,
-                                            additional_pos,
-                                            pusch_maxLength,
-                                            ps->startSymbolIndex,
-                                            scc->dmrs_TypeA_Position);
+    dmrs.num_dmrs_cdm_grps_no_data = 2;
+
+  NR_DMRS_UplinkConfig_t *NR_DMRS_UplinkConfig = ul_bwp->pusch_Config ?
+                                                 (tda_info->mapping_type == typeA ?
+                                                 ul_bwp->pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA->choice.setup :
+                                                 ul_bwp->pusch_Config->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup) : NULL;
+
+  dmrs.dmrs_config_type = NR_DMRS_UplinkConfig && NR_DMRS_UplinkConfig->dmrs_Type ? 1 : 0;
+
+  const pusch_dmrs_AdditionalPosition_t additional_pos = (NR_DMRS_UplinkConfig && NR_DMRS_UplinkConfig->dmrs_AdditionalPosition) ?
+                                                         (*NR_DMRS_UplinkConfig->dmrs_AdditionalPosition ==
+                                                         NR_DMRS_UplinkConfig__dmrs_AdditionalPosition_pos3 ?
+                                                         3 : *NR_DMRS_UplinkConfig->dmrs_AdditionalPosition) : 2;
+
+  const pusch_maxLength_t pusch_maxLength = NR_DMRS_UplinkConfig ? (NR_DMRS_UplinkConfig->maxLength == NULL ? 1 : 2) : 1;
+  dmrs.ul_dmrs_symb_pos = get_l_prime(tda_info->nrOfSymbols,
+                                       tda_info->mapping_type,
+                                       additional_pos,
+                                       pusch_maxLength,
+                                       tda_info->startSymbolIndex,
+                                       scc->dmrs_TypeA_Position);
+
   uint8_t num_dmrs_symb = 0;
-  for(int i = ps->startSymbolIndex; i < ps->startSymbolIndex + ps->nrOfSymbols; i++)
-    num_dmrs_symb += (ps->ul_dmrs_symb_pos >> i) & 1;
-  ps->num_dmrs_symb = num_dmrs_symb;
-  ps->N_PRB_DMRS = ps->dmrs_config_type == 0
-      ? ps->num_dmrs_cdm_grps_no_data * 6
-      : ps->num_dmrs_cdm_grps_no_data * 4;
+  for(int i = tda_info->startSymbolIndex; i < tda_info->startSymbolIndex + tda_info->nrOfSymbols; i++)
+    num_dmrs_symb += (dmrs.ul_dmrs_symb_pos >> i) & 1;
+  dmrs.num_dmrs_symb = num_dmrs_symb;
+  dmrs.N_PRB_DMRS = dmrs.num_dmrs_cdm_grps_no_data * (dmrs.dmrs_config_type == 0 ? 6 : 4);
+
+  dmrs.NR_DMRS_UplinkConfig = NR_DMRS_UplinkConfig;
+  return dmrs;
 }
 
 #define BLER_UPDATE_FRAME 10
@@ -886,6 +882,7 @@ void config_uldci(const NR_SIB1_t *sib1,
                   const NR_ServingCellConfigCommon_t *scc,
                   const nfapi_nr_pusch_pdu_t *pusch_pdu,
                   dci_pdu_rel15_t *dci_pdu_rel15,
+                  nr_srs_feedback_t *srs_feedback,
                   int time_domain_assignment,
                   uint8_t tpc,
                   NR_UE_UL_BWP_t *ul_bwp) {
@@ -920,13 +917,14 @@ void config_uldci(const NR_SIB1_t *sib1,
           pusch_Config->txConfig != NULL) {
         AssertFatal(*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook,
                     "Non Codebook configuration non supported\n");
-        dci_pdu_rel15->srs_resource_indicator.val = 0; // taking resource 0 for SRS
+        compute_srs_resource_indicator(ul_bwp->pusch_servingcellconfig, pusch_Config, ul_bwp->srs_Config, srs_feedback, &dci_pdu_rel15->srs_resource_indicator.val);
       }
-      dci_pdu_rel15->precoding_information.val= 0;
-      if (pusch_pdu->nrOfLayers == 2)
-        dci_pdu_rel15->precoding_information.val = 4;
-      else if (pusch_pdu->nrOfLayers == 4)
-        dci_pdu_rel15->precoding_information.val = 11;
+      compute_precoding_information(pusch_Config,
+                                    ul_bwp->srs_Config,
+                                    dci_pdu_rel15->srs_resource_indicator,
+                                    srs_feedback,
+                                    &pusch_pdu->nrOfLayers,
+                                    &dci_pdu_rel15->precoding_information.val);
 
       // antenna_ports.val = 0 for transform precoder is disabled, dmrs-Type=1, maxLength=1, Rank=1/2/3/4
       // Antenna Ports
@@ -1022,7 +1020,7 @@ int nr_get_pucch_resource(NR_ControlResourceSet_t *coreset,
 // This function configures pucch pdu fapi structure
 void nr_configure_pucch(nfapi_nr_pucch_pdu_t* pucch_pdu,
                         NR_ServingCellConfigCommon_t *scc,
-                        NR_UE_info_t* UE,
+                        NR_UE_info_t *UE,
                         uint8_t pucch_resource,
                         uint16_t O_csi,
                         uint16_t O_ack,
@@ -2460,6 +2458,12 @@ void configure_UE_BWP(gNB_MAC_INST *nr_mac,
                                           false);
 }
 
+void reset_srs_stats(NR_UE_info_t *UE) {
+  if (UE) {
+    UE->mac_stats.srs_stats[0] = '\0';
+  }
+}
+
 //------------------------------------------------------------------------------
 NR_UE_info_t *add_new_nr_ue(gNB_MAC_INST *nr_mac, rnti_t rntiP, NR_CellGroupConfig_t *CellGroup)
 {
@@ -2501,8 +2505,8 @@ NR_UE_info_t *add_new_nr_ue(gNB_MAC_INST *nr_mac, rnti_t rntiP, NR_CellGroupConf
   configure_UE_BWP(nr_mac, scc, sched_ctrl, NULL, UE);
 
   /* set illegal time domain allocation to force recomputation of all fields */
-  sched_ctrl->pdsch_semi_static.time_domain_allocation = -1;
-  sched_ctrl->pusch_semi_static.time_domain_allocation = -1;
+  sched_ctrl->sched_pdsch.time_domain_allocation = -1;
+  sched_ctrl->sched_pusch.time_domain_allocation = -1;
 
   /* Set default BWPs */
   sched_ctrl->next_dl_bwp_id = -1;
@@ -2526,6 +2530,8 @@ NR_UE_info_t *add_new_nr_ue(gNB_MAC_INST *nr_mac, rnti_t rntiP, NR_CellGroupConf
   create_nr_list(&sched_ctrl->feedback_ul_harq, 16);
   create_nr_list(&sched_ctrl->retrans_ul_harq, 16);
 
+  reset_srs_stats(UE);
+
   pthread_mutex_lock(&UE_info->mutex);
   int i;
   for(i=0; i<MAX_MOBILES_PER_GNB; i++) {
@@ -2933,6 +2939,8 @@ void nr_mac_update_timers(module_id_t module_id,
       if (sched_ctrl->rrc_processing_timer == 0) {
         LOG_I(NR_MAC, "(%d.%d) De-activating RRC processing timer for UE %04x\n", frame, slot, UE->rnti);
 
+        reset_srs_stats(UE);
+
         NR_CellGroupConfig_t *cg = NULL;
         uper_decode(NULL,
                     &asn_DEF_NR_CellGroupConfig,   //might be added prefix later
@@ -2957,28 +2965,6 @@ void nr_mac_update_timers(module_id_t module_id,
           // add all available DL HARQ processes for this UE in SA
           create_dl_harq_list(sched_ctrl, UE->current_DL_BWP.pdsch_servingcellconfig);
         }
-
-        NR_pdsch_semi_static_t *ps = &sched_ctrl->pdsch_semi_static;
-        const uint8_t layers = set_dl_nrOfLayers(sched_ctrl);
-        const int tda = get_dl_tda(RC.nrmac[module_id], scc, slot);
-
-        nr_set_pdsch_semi_static(&UE->current_DL_BWP,
-                                 scc,
-                                 tda,
-                                 layers,
-                                 sched_ctrl,
-                                 ps);
-
-
-        NR_pusch_semi_static_t *ups = &sched_ctrl->pusch_semi_static;
-        const uint8_t nrOfLayers = 1;
-        const int utda = get_ul_tda(RC.nrmac[module_id], scc, slot);
-
-        nr_set_pusch_semi_static(&UE->current_UL_BWP,
-                                 scc,
-                                 utda,
-                                 nrOfLayers,
-                                 ups);
       }
     }
 
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_srs.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_srs.c
index 65bf2a4ae956e6c956be6254e7f8c241aff031e1..8eddea2eabeae4ca8712bc29a422f8614236a26d 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_srs.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_srs.c
@@ -35,7 +35,12 @@
 
 extern RAN_CONTEXT_t RC;
 
-void nr_configure_srs(nfapi_nr_srs_pdu_t *srs_pdu, int module_id, int CC_id,NR_UE_info_t*  UE, NR_SRS_Resource_t *srs_resource) {
+const uint16_t m_SRS[64] = { 4, 8, 12, 16, 16, 20, 24, 24, 28, 32, 36, 40, 48, 48, 52, 56, 60, 64, 72, 72, 76, 80, 88,
+                             96, 96, 104, 112, 120, 120, 120, 128, 128, 128, 132, 136, 144, 144, 144, 144, 152, 160,
+                             160, 160, 168, 176, 184, 192, 192, 192, 192, 208, 216, 224, 240, 240, 240, 240, 256, 256,
+                             256, 264, 272, 272, 272 };
+
+void nr_configure_srs(nfapi_nr_srs_pdu_t *srs_pdu, int module_id, int CC_id,NR_UE_info_t*  UE, NR_SRS_ResourceSet_t *srs_resource_set, NR_SRS_Resource_t *srs_resource) {
 
   NR_UE_UL_BWP_t *current_BWP = &UE->current_UL_BWP;
 
@@ -74,9 +79,22 @@ void nr_configure_srs(nfapi_nr_srs_pdu_t *srs_pdu, int module_id, int CC_id,NR_U
   srs_pdu->resource_type = srs_resource->resourceType.present - 1;
   srs_pdu->t_srs = srs_period[srs_resource->resourceType.choice.periodic->periodicityAndOffset_p.present];
   srs_pdu->t_offset = get_nr_srs_offset(srs_resource->resourceType.choice.periodic->periodicityAndOffset_p);
+
+  // TODO: This should be completed
+  srs_pdu->srs_parameters_v4.srs_bandwidth_size = m_SRS[srs_pdu->config_index];
+  srs_pdu->srs_parameters_v4.usage = 1<<srs_resource_set->usage;
+  srs_pdu->srs_parameters_v4.report_type[0] = 1;
+  srs_pdu->srs_parameters_v4.iq_representation = 1;
+  srs_pdu->srs_parameters_v4.prg_size = 1;
+  srs_pdu->srs_parameters_v4.num_total_ue_antennas = 1<<srs_pdu->num_ant_ports;
+  if (srs_resource_set->usage == NR_SRS_ResourceSet__usage_beamManagement) {
+    srs_pdu->beamforming.trp_scheme = 0;
+    srs_pdu->beamforming.num_prgs = m_SRS[srs_pdu->config_index];
+    srs_pdu->beamforming.prg_size = 1;
+  }
 }
 
-void nr_fill_nfapi_srs(int module_id, int CC_id, NR_UE_info_t* UE, sub_frame_t slot, NR_SRS_Resource_t *srs_resource) {
+void nr_fill_nfapi_srs(int module_id, int CC_id, NR_UE_info_t* UE, sub_frame_t slot, NR_SRS_ResourceSet_t *srs_resource_set, NR_SRS_Resource_t *srs_resource) {
 
   nfapi_nr_ul_tti_request_t *future_ul_tti_req = &RC.nrmac[module_id]->UL_tti_req_ahead[0][slot];
   AssertFatal(future_ul_tti_req->n_pdus <
@@ -88,7 +106,7 @@ void nr_fill_nfapi_srs(int module_id, int CC_id, NR_UE_info_t* UE, sub_frame_t s
   memset(srs_pdu, 0, sizeof(nfapi_nr_srs_pdu_t));
   future_ul_tti_req->n_pdus += 1;
 
-  nr_configure_srs(srs_pdu, module_id, CC_id, UE, srs_resource);
+  nr_configure_srs(srs_pdu, module_id, CC_id, UE, srs_resource_set, srs_resource);
 }
 
 /*******************************************************************
@@ -158,7 +176,7 @@ void nr_schedule_srs(int module_id, frame_t frame) {
       // Check if UE will transmit the SRS in this frame
       if ( ((frame - offset/n_slots_frame)*n_slots_frame)%period == 0) {
         LOG_D(NR_MAC,"Scheduling SRS reception for %d.%d\n", frame, offset%n_slots_frame);
-        nr_fill_nfapi_srs(module_id, CC_id, UE, offset%n_slots_frame, srs_resource);
+        nr_fill_nfapi_srs(module_id, CC_id, UE, offset%n_slots_frame, srs_resource_set, srs_resource);
         sched_ctrl->sched_srs.frame = frame;
         sched_ctrl->sched_srs.slot = offset%n_slots_frame;
         sched_ctrl->sched_srs.srs_scheduled = true;
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c
index 7f4311c10d89297909a8136c371c36345e2b8077..c347ea5bea586d09848f7929da2899f02db6d4c4 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c
@@ -47,14 +47,14 @@ static void nr_fill_nfapi_pucch(gNB_MAC_INST *nrmac,
 {
   nfapi_nr_ul_tti_request_t *future_ul_tti_req =
       &nrmac->UL_tti_req_ahead[0][pucch->ul_slot];
-  AssertFatal(future_ul_tti_req->SFN == pucch->frame
-              && future_ul_tti_req->Slot == pucch->ul_slot,
-              "Current %4d.%2d : future UL_tti_req's frame.slot %4d.%2d does not match PUCCH %4d.%2d\n",
-              frame,slot,
-              future_ul_tti_req->SFN,
-              future_ul_tti_req->Slot,
-              pucch->frame,
-              pucch->ul_slot);
+  if (future_ul_tti_req->SFN != pucch->frame || future_ul_tti_req->Slot != pucch->ul_slot)
+    LOG_W(MAC,
+          "Current %d.%d : future UL_tti_req's frame.slot %4d.%2d does not match PUCCH %4d.%2d\n",
+          frame,slot,
+          future_ul_tti_req->SFN,
+          future_ul_tti_req->Slot,
+          pucch->frame,
+          pucch->ul_slot);
   // n_pdus is number of pdus, so, in the array, it is the index of the next free element
   if (future_ul_tti_req->n_pdus >= sizeofArray(future_ul_tti_req->pdus_list) ) {
     LOG_E(NR_MAC,"future_ul_tti_req->n_pdus %d is full, slot: %d, sr flag %d dropping request\n",
@@ -707,7 +707,6 @@ uint8_t evaluate_pmi_report(uint8_t *payload,
         sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x1,
         sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x2);
 
-  sched_ctrl->set_pmi = true;
   return tot_bitlen;
 
 }
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
index d9635d98bb532846574a7aa279df11c7232a1594..55ddf388a78cf1e7b4004e8cdd61798c4c5d0cfa 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
@@ -53,30 +53,28 @@ const int get_ul_tda(const gNB_MAC_INST *nrmac, const NR_ServingCellConfigCommon
   return 0; // if FDD or not mixed slot in TDD, for now use default TDA (TODO handle CSI-RS slots)
 }
 
-int compute_bw_factor(int mu, int rb) {
-  // 38.213 7.1.1
-  return (10 * log10(rb << mu));
-}
-
-int compute_delta_tf(int tbs_bits,
-                     int rb,
-                     int n_layers,
-                     int n_symbols,
-                     int n_dmrs,
-                     long *deltaMCS) {
+int compute_ph_factor(int mu,
+                      int tbs_bits,
+                      int rb,
+                      int n_layers,
+                      int n_symbols,
+                      int n_dmrs,
+                      long *deltaMCS) {
 
   // 38.213 7.1.1
   // if the PUSCH transmission is over more than one layer delta_tf = 0
-  if(deltaMCS == NULL || n_layers>1)
-    return 0;
-  else
-    AssertFatal(1==0,"Compute DeltaTF not yet fully supported\n");
+  int delta_tf = 0;
+  if(deltaMCS != NULL && n_layers == 1) {
+    const int n_re = (NR_NB_SC_PER_RB * n_symbols - n_dmrs) * rb;
+    const int BPRE = tbs_bits/n_re;  //TODO change for PUSCH with CSI
+    const float f = pow(2, (float) BPRE * 1.25);
+    const float beta = 1.0f; //TODO change for PUSCH with CSI
+    delta_tf = (10 * log10((f - 1) * beta));
+  }
 
-  const int n_re = (NR_NB_SC_PER_RB * n_symbols - n_dmrs) * rb;
-  const int BPRE = tbs_bits/n_re;  //TODO change for PUSCH with CSI
-  const float f = pow(2, (float) BPRE * 1.25);
-  const float beta = 1.0f; //TODO change for PUSCH with CSI
-  return(10 * log10((f - 1) * beta));
+  const int bw_factor = 10 * log10(rb << mu);
+
+  return (delta_tf + bw_factor);
 }
 
 //  For both UL-SCH except:
@@ -114,7 +112,6 @@ int nr_process_mac_pdu(instance_t module_idP,
 {
 
   uint8_t done = 0;
-
   int sdus = 0;
   NR_UE_UL_BWP_t *ul_bwp = &UE->current_UL_BWP;
   NR_UE_sched_ctrl_t *sched_ctrl = &UE->UE_sched_ctrl;
@@ -247,13 +244,13 @@ int nr_process_mac_pdu(instance_t module_idP,
         // in sched_ctrl we set normalized PH wrt MCS and PRBs
         long *deltaMCS = ul_bwp->pusch_Config ? ul_bwp->pusch_Config->pusch_PowerControl->deltaMCS : NULL;
         sched_ctrl->ph = PH +
-                         compute_bw_factor(sched_pusch->mu, sched_pusch->rbSize) +
-                         compute_delta_tf(sched_pusch->tb_size<<3,
-                                          sched_pusch->rbSize,
-                                          0, //n_layers
-                                          0, //n_symbols
-                                          0, //n_dmrs
-                                          deltaMCS);
+                         compute_ph_factor(sched_pusch->mu,
+                                           sched_pusch->tb_size<<3,
+                                           sched_pusch->rbSize,
+                                           sched_pusch->nrOfLayers,
+                                           sched_pusch->tda_info.nrOfSymbols, //n_symbols
+                                           sched_pusch->dmrs_info.num_dmrs_symb*sched_pusch->dmrs_info.N_PRB_DMRS, //n_dmrs
+                                           deltaMCS);
         /* 38.133 Table10.1.18.1-1 */
         sched_ctrl->pcmax = PCMAX - 29;
         LOG_D(NR_MAC, "SINGLE ENTRY PHR R1 %d PH %d (%d dB) R2 %d PCMAX %d (%d dBm)\n",
@@ -263,6 +260,7 @@ int nr_process_mac_pdu(instance_t module_idP,
       case UL_SCH_LCID_MULTI_ENTRY_PHR_1_OCT:
         //38.321 section 6.1.3.9
         //  varialbe length
+        AssertFatal(1==0,"Multi entry PHR not supported\n");
         if (!get_mac_len(pduP, pdu_len, &mac_len, &mac_subheader_len))
           return 0;
         /* Extract MULTI ENTRY PHR elements from single octet bitmap for PHR calculation */
@@ -271,6 +269,7 @@ int nr_process_mac_pdu(instance_t module_idP,
       case UL_SCH_LCID_MULTI_ENTRY_PHR_4_OCT:
         //38.321 section 6.1.3.9
         //  varialbe length
+        AssertFatal(1==0,"Multi entry PHR not supported\n");
         if (!get_mac_len(pduP, pdu_len, &mac_len, &mac_subheader_len))
           return 0;
         /* Extract MULTI ENTRY PHR elements from four octets bitmap for PHR calculation */
@@ -722,8 +721,6 @@ void nr_rx_sdu(const module_id_t gnb_mod_idP,
           // the function is only called to decode the contention resolution sub-header
           if (nr_process_mac_pdu(gnb_mod_idP, UE, CC_idP, frameP, slotP, sduP, sdu_lenP, -1) == 0) {
             ra->state = Msg4;
-            ra->Msg4_frame = (frameP + 2) % 1024;
-            ra->Msg4_slot = 1;
             
             if (ra->msg3_dcch_dtch) {
               // Check if the UE identified by C-RNTI still exists at the gNB
@@ -744,8 +741,8 @@ void nr_rx_sdu(const module_id_t gnb_mod_idP,
                 reset_ul_harq_list(&UE_C->UE_sched_ctrl);
               }
             }
-            LOG_I(NR_MAC, "Scheduling RA-Msg4 for TC_RNTI 0x%04x (state %d, frame %d, slot %d)\n",
-                  (ra->msg3_dcch_dtch?ra->crnti:ra->rnti), ra->state, ra->Msg4_frame, ra->Msg4_slot);
+            LOG_I(NR_MAC, "Activating scheduling RA-Msg4 for TC_RNTI 0x%04x (state %d)\n",
+                  (ra->msg3_dcch_dtch?ra->crnti:ra->rnti), ra->state);
           }
           else {
              nr_mac_remove_ra_rnti(gnb_mod_idP, ra->rnti);
@@ -787,60 +784,460 @@ void nr_rx_sdu(const module_id_t gnb_mod_idP,
   }
 }
 
+uint32_t calc_power_complex(const int16_t *x, const int16_t *y, const uint32_t size) {
+
+  // Real part value
+  int64_t sum_x = 0;
+  int64_t sum_x2 = 0;
+  for(int k = 0; k<size; k++) {
+    sum_x = sum_x + x[k];
+    sum_x2 = sum_x2 + x[k]*x[k];
+  }
+  uint32_t power_re = sum_x2/size - (sum_x/size)*(sum_x/size);
+
+  // Imaginary part power
+  int64_t sum_y = 0;
+  int64_t sum_y2 = 0;
+  for(int k = 0; k<size; k++) {
+    sum_y = sum_y + y[k];
+    sum_y2 = sum_y2 + y[k]*y[k];
+  }
+  uint32_t power_im = sum_y2/size - (sum_y/size)*(sum_y/size);
+
+  return power_re+power_im;
+}
+
+c16_t nr_h_times_w(c16_t h, char w) {
+  c16_t output;
+    switch (w) {
+      case '0': // 0
+        output.r = 0;
+        output.i = 0;
+        break;
+      case '1': // 1
+        output.r = h.r;
+        output.i = h.i;
+        break;
+      case 'n': // -1
+        output.r = -h.r;
+        output.i = -h.i;
+        break;
+      case 'j': // j
+        output.r = -h.i;
+        output.i = h.r;
+        break;
+      case 'o': // -j
+        output.r = h.i;
+        output.i = -h.r;
+        break;
+      default:
+        AssertFatal(1==0,"Invalid precoder value %c\n", w);
+    }
+  return output;
+}
+
+uint8_t get_max_tpmi(const NR_PUSCH_Config_t *pusch_Config,
+                     const uint16_t num_ue_srs_ports,
+                     const uint8_t *nrOfLayers,
+                     int *additional_max_tpmi) {
+
+  uint8_t max_tpmi = 0;
+
+  if ((pusch_Config && pusch_Config->txConfig != NULL && *pusch_Config->txConfig == NR_PUSCH_Config__txConfig_nonCodebook) ||
+      num_ue_srs_ports == 1) {
+    return max_tpmi;
+  }
+
+  long max_rank = *pusch_Config->maxRank;
+  long *ul_FullPowerTransmission = pusch_Config->ext1 ? pusch_Config->ext1->ul_FullPowerTransmission_r16 : NULL;
+  long *codebookSubset = pusch_Config->codebookSubset;
+
+  if (num_ue_srs_ports == 2) {
+
+    if (max_rank == 1) {
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        max_tpmi = 2;
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          max_tpmi = 1;
+        } else {
+          max_tpmi = 5;
+        }
+      }
+    } else {
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        max_tpmi = *nrOfLayers == 1 ? 2 : 0;
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          max_tpmi = *nrOfLayers == 1 ? 1 : 0;
+        } else {
+          max_tpmi = *nrOfLayers == 1 ? 5 : 2;
+        }
+      }
+    }
+
+  } else if (num_ue_srs_ports == 4) {
+
+    if (max_rank == 1) {
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          max_tpmi = 3;
+          *additional_max_tpmi = 13;
+        } else {
+          max_tpmi = 15;
+        }
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          max_tpmi = 3;
+        } else if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent) {
+          max_tpmi = 11;
+        } else {
+          max_tpmi = 27;
+        }
+      }
+    } else {
+      if (ul_FullPowerTransmission && *ul_FullPowerTransmission == NR_PUSCH_Config__ext1__ul_FullPowerTransmission_r16_fullpowerMode1) {
+        if (max_rank == 2) {
+          if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+            max_tpmi = *nrOfLayers == 1 ? 3 : 6;
+            if (*nrOfLayers == 1) {
+              *additional_max_tpmi = 13;
+            }
+          } else {
+            max_tpmi = *nrOfLayers == 1 ? 15 : 13;
+          }
+        } else {
+          if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+            switch (*nrOfLayers) {
+              case 1:
+                max_tpmi = 3;
+                *additional_max_tpmi = 13;
+                break;
+              case 2:
+                max_tpmi = 6;
+                break;
+              case 3:
+                max_tpmi = 1;
+                break;
+              case 4:
+                max_tpmi = 0;
+                break;
+              default:
+                LOG_E(NR_MAC,"Number of layers %d is invalid!\n", *nrOfLayers);
+            }
+          } else {
+            switch (*nrOfLayers) {
+              case 1:
+                max_tpmi = 15;
+                break;
+              case 2:
+                max_tpmi = 13;
+                break;
+              case 3:
+              case 4:
+                max_tpmi = 2;
+                break;
+              default:
+                LOG_E(NR_MAC,"Number of layers %d is invalid!\n", *nrOfLayers);
+            }
+          }
+        }
+      } else {
+        if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent) {
+          switch (*nrOfLayers) {
+            case 1:
+              max_tpmi = 3;
+              break;
+            case 2:
+              max_tpmi = 5;
+              break;
+            case 3:
+            case 4:
+              max_tpmi = 0;
+              break;
+            default:
+              LOG_E(NR_MAC,"Number of layers %d is invalid!\n", *nrOfLayers);
+          }
+        } else if (codebookSubset && *codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent) {
+          switch (*nrOfLayers) {
+            case 1:
+              max_tpmi = 11;
+              break;
+            case 2:
+              max_tpmi = 13;
+              break;
+            case 3:
+            case 4:
+              max_tpmi = 2;
+              break;
+            default:
+              LOG_E(NR_MAC,"Number of layers %d is invalid!\n", *nrOfLayers);
+          }
+        } else {
+          switch (*nrOfLayers) {
+            case 1:
+              max_tpmi = 28;
+              break;
+            case 2:
+              max_tpmi = 22;
+              break;
+            case 3:
+              max_tpmi = 7;
+              break;
+            case 4:
+              max_tpmi = 5;
+              break;
+            default:
+              LOG_E(NR_MAC,"Number of layers %d is invalid!\n", *nrOfLayers);
+          }
+        }
+      }
+    }
+
+  }
+
+  return max_tpmi;
+}
+
+void get_precoder_matrix_coef(char *w,
+                              const uint8_t ul_ri,
+                              const uint16_t num_ue_srs_ports,
+                              const uint8_t transform_precoding,
+                              const uint8_t tpmi,
+                              const uint8_t uI) {
+  if (ul_ri == 0) {
+    if (num_ue_srs_ports == 2) {
+      *w = *table_38211_6_3_1_5_1[tpmi][uI];
+    } else {
+      if (transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled) {
+        *w = *table_38211_6_3_1_5_2[tpmi][uI];
+      } else {
+        *w = *table_38211_6_3_1_5_3[tpmi][uI];
+      }
+    }
+  } else {
+    AssertFatal(1==0,"Function get_precoder_matrix_coef() does not support %i layers yet!\n", ul_ri+1);
+  }
+}
+
+int nr_srs_tpmi_estimation(const NR_PUSCH_Config_t *pusch_Config,
+                           const uint8_t transform_precoding,
+                           const uint8_t *channel_matrix,
+                           const uint8_t normalized_iq_representation,
+                           const uint16_t num_gnb_antenna_elements,
+                           const uint16_t num_ue_srs_ports,
+                           const uint16_t prg_size,
+                           const uint16_t num_prgs,
+                           const uint8_t ul_ri) {
+
+  uint8_t tpmi_sel = 0;
+  int16_t precoded_channel_matrix_re[num_prgs*num_gnb_antenna_elements];
+  int16_t precoded_channel_matrix_im[num_prgs*num_gnb_antenna_elements];
+  c16_t *channel_matrix16 = (c16_t*)channel_matrix;
+  uint32_t max_precoded_signal_power = 0;
+  int additional_max_tpmi = -1;
+  char w;
+
+  uint8_t max_tpmi = get_max_tpmi(pusch_Config,
+                                  num_ue_srs_ports,
+                                  &ul_ri,
+                                  &additional_max_tpmi);
+
+  uint8_t end_tpmi_loop = additional_max_tpmi > max_tpmi ? additional_max_tpmi : max_tpmi;
+
+  //                      channel_matrix                          x   precoder_matrix
+  // [ (gI=0,uI=0) (gI=0,uI=1) ... (gI=0,uI=num_ue_srs_ports-1) ] x   [uI=0]
+  // [ (gI=1,uI=0) (gI=1,uI=1) ... (gI=1,uI=num_ue_srs_ports-1) ]     [uI=1]
+  // [ (gI=2,uI=0) (gI=2,uI=1) ... (gI=2,uI=num_ue_srs_ports-1) ]     [uI=2]
+  //                           ...                                     ...
+
+  for(uint8_t tpmi = 0; tpmi<=end_tpmi_loop; tpmi++) {
+
+    if (tpmi > max_tpmi) {
+      tpmi = end_tpmi_loop;
+    }
+
+    for(int pI = 0; pI <num_prgs; pI++) {
+      for(int gI = 0; gI < num_gnb_antenna_elements; gI++) {
+
+        uint16_t index_gI_pI = gI*num_prgs + pI;
+        precoded_channel_matrix_re[index_gI_pI] = 0;
+        precoded_channel_matrix_im[index_gI_pI] = 0;
+
+        for(int uI = 0; uI < num_ue_srs_ports; uI++) {
+
+          uint16_t index = uI*num_gnb_antenna_elements*num_prgs + index_gI_pI;
+          get_precoder_matrix_coef(&w, ul_ri, num_ue_srs_ports, transform_precoding, tpmi, uI);
+          c16_t h_times_w = nr_h_times_w(channel_matrix16[index], w);
+
+          precoded_channel_matrix_re[index_gI_pI] += h_times_w.r;
+          precoded_channel_matrix_im[index_gI_pI] += h_times_w.i;
+
+#ifdef SRS_IND_DEBUG
+          LOG_I(NR_MAC, "(uI %i, gI %i, pI %i) channel_matrix --> real %i, imag %i\n",
+                uI, gI, pI, channel_matrix16[index].r, channel_matrix16[index].i);
+#endif
+        }
+
+#ifdef SRS_IND_DEBUG
+        LOG_I(NR_MAC, "(gI %i, pI %i) precoded_channel_coef --> real %i, imag %i\n",
+              gI, pI, precoded_channel_matrix_re[index_gI_pI], precoded_channel_matrix_im[index_gI_pI]);
+#endif
+      }
+    }
+
+    uint32_t precoded_signal_power = calc_power_complex(precoded_channel_matrix_re,
+                                                        precoded_channel_matrix_im,
+                                                        num_prgs*num_gnb_antenna_elements);
+
+#ifdef SRS_IND_DEBUG
+    LOG_I(NR_MAC, "(tpmi %i) precoded_signal_power = %i\n", tpmi, precoded_signal_power);
+#endif
+
+    if (precoded_signal_power > max_precoded_signal_power) {
+      max_precoded_signal_power = precoded_signal_power;
+      tpmi_sel = tpmi;
+    }
+  }
+
+  return tpmi_sel;
+}
+
 void handle_nr_srs_measurements(const module_id_t module_id,
                                 const frame_t frame,
                                 const sub_frame_t slot,
-                                const rnti_t rnti,
-                                const uint16_t timing_advance,
-                                const uint8_t num_symbols,
-                                const uint8_t wide_band_snr,
-                                const uint8_t num_reported_symbols,
-                                nfapi_nr_srs_indication_reported_symbol_t *reported_symbol_list)
+                                const nfapi_nr_srs_indication_pdu_t *srs_ind)
 {
-  LOG_D(NR_MAC, "(%d.%d) Received SRS indication for rnti: 0x%04x\n", frame, slot, rnti);
+  LOG_D(NR_MAC, "(%d.%d) Received SRS indication for UE %04x\n", frame, slot, srs_ind->rnti);
 
 #ifdef SRS_IND_DEBUG
   LOG_I(NR_MAC, "frame = %i\n", frame);
   LOG_I(NR_MAC, "slot = %i\n", slot);
-  LOG_I(NR_MAC, "rnti = 0x%04x\n", rnti);
-  LOG_I(NR_MAC, "timing_advance = %i\n", timing_advance);
-  LOG_I(NR_MAC, "num_symbols = %i\n", num_symbols);
-  LOG_I(NR_MAC, "wide_band_snr = %i (%i dB)\n", wide_band_snr, (wide_band_snr >> 1) - 64);
-  LOG_I(NR_MAC, "num_reported_symbols = %i\n", num_reported_symbols);
-  LOG_I(NR_MAC, "reported_symbol_list[0].num_rbs = %i\n", reported_symbol_list[0].num_rbs);
-  for (int rb = 0; rb < reported_symbol_list[0].num_rbs; rb++) {
-    LOG_I(NR_MAC, "reported_symbol_list[0].rb_list[%3i].rb_snr = %i (%i dB)\n", rb, reported_symbol_list[0].rb_list[rb].rb_snr, (reported_symbol_list[0].rb_list[rb].rb_snr >> 1) - 64);
-  }
+  LOG_I(NR_MAC, "srs_ind->rnti = %04x\n", srs_ind->rnti);
+  LOG_I(NR_MAC, "srs_ind->timing_advance_offset = %i\n", srs_ind->timing_advance_offset);
+  LOG_I(NR_MAC, "srs_ind->timing_advance_offset_nsec = %i\n", srs_ind->timing_advance_offset_nsec);
+  LOG_I(NR_MAC, "srs_ind->srs_usage = %i\n", srs_ind->srs_usage);
+  LOG_I(NR_MAC, "srs_ind->report_type = %i\n", srs_ind->report_type);
 #endif
 
-  NR_UE_info_t *UE = find_nr_UE(&RC.nrmac[module_id]->UE_info, rnti);
+  NR_UE_info_t *UE = find_nr_UE(&RC.nrmac[module_id]->UE_info, srs_ind->rnti);
   if (!UE) {
-    LOG_E(NR_MAC, "Could not find UE for RNTI %04x\n", rnti);
+    LOG_W(NR_MAC, "Could not find UE for RNTI %04x\n", srs_ind->rnti);
     return;
   }
 
-  if (wide_band_snr == 0xFF) {
-    LOG_W(NR_MAC, "Invalid wide_band_snr for RNTI %04x\n", rnti);
+  if (srs_ind->timing_advance_offset == 0xFFFF) {
+    LOG_W(NR_MAC, "Invalid timing advance offset for RNTI %04x\n", srs_ind->rnti);
     return;
   }
 
-  int wide_band_snr_dB = (wide_band_snr >> 1) - 64;
-
   gNB_MAC_INST *nr_mac = RC.nrmac[module_id];
   NR_mac_stats_t *stats = &UE->mac_stats;
-  stats->srs_wide_band_snr = wide_band_snr_dB;
 
-  const int ul_prbblack_SNR_threshold = nr_mac->ul_prbblack_SNR_threshold;
-  uint16_t *ulprbbl = nr_mac->ulprbbl;
+  switch (srs_ind->srs_usage) {
+    case NR_SRS_ResourceSet__usage_beamManagement: {
+      nfapi_nr_srs_beamforming_report_t nr_srs_beamforming_report;
+      unpack_nr_srs_beamforming_report(srs_ind->report_tlv->value,
+                                       srs_ind->report_tlv->length,
+                                       &nr_srs_beamforming_report,
+                                       sizeof(nfapi_nr_srs_beamforming_report_t));
+
+      if (nr_srs_beamforming_report.wide_band_snr == 0xFF) {
+        LOG_W(NR_MAC, "Invalid wide_band_snr for RNTI %04x\n", srs_ind->rnti);
+        return;
+      }
+
+      int wide_band_snr_dB = (nr_srs_beamforming_report.wide_band_snr >> 1) - 64;
+
+#ifdef SRS_IND_DEBUG
+      LOG_I(NR_MAC, "nr_srs_beamforming_report.prg_size = %i\n", nr_srs_beamforming_report.prg_size);
+      LOG_I(NR_MAC, "nr_srs_beamforming_report.num_symbols = %i\n", nr_srs_beamforming_report.num_symbols);
+      LOG_I(NR_MAC, "nr_srs_beamforming_report.wide_band_snr = %i (%i dB)\n", nr_srs_beamforming_report.wide_band_snr, wide_band_snr_dB);
+      LOG_I(NR_MAC, "nr_srs_beamforming_report.num_reported_symbols = %i\n", nr_srs_beamforming_report.num_reported_symbols);
+      LOG_I(NR_MAC, "nr_srs_beamforming_report.prgs[0].num_prgs = %i\n", nr_srs_beamforming_report.prgs[0].num_prgs);
+      for (int prg_idx = 0; prg_idx < nr_srs_beamforming_report.prgs[0].num_prgs; prg_idx++) {
+        LOG_I(NR_MAC,
+              "nr_srs_beamforming_report.prgs[0].prg_list[%3i].rb_snr = %i (%i dB)\n",
+              prg_idx,
+              nr_srs_beamforming_report.prgs[0].prg_list[prg_idx].rb_snr,
+              (nr_srs_beamforming_report.prgs[0].prg_list[prg_idx].rb_snr >> 1) - 64);
+      }
+#endif
 
-  memset(ulprbbl, 0, reported_symbol_list[0].num_rbs * sizeof(uint16_t));
+      sprintf(stats->srs_stats, "UL-SNR %i dB", wide_band_snr_dB);
 
-  for (int rb = 0; rb < reported_symbol_list[0].num_rbs; rb++) {
-    int snr = (reported_symbol_list[0].rb_list[rb].rb_snr >> 1) - 64;
-    if (snr < wide_band_snr_dB - ul_prbblack_SNR_threshold) {
-      ulprbbl[rb] = 0x3FFF; // all symbols taken
+      const int ul_prbblack_SNR_threshold = nr_mac->ul_prbblack_SNR_threshold;
+      uint16_t *ulprbbl = nr_mac->ulprbbl;
+
+      uint8_t num_rbs = nr_srs_beamforming_report.prg_size * nr_srs_beamforming_report.prgs[0].num_prgs;
+      memset(ulprbbl, 0, num_rbs * sizeof(uint16_t));
+      for (int rb = 0; rb < num_rbs; rb++) {
+        int snr = (nr_srs_beamforming_report.prgs[0].prg_list[rb / nr_srs_beamforming_report.prg_size].rb_snr >> 1) - 64;
+        if (snr < wide_band_snr_dB - ul_prbblack_SNR_threshold) {
+          ulprbbl[rb] = 0x3FFF; // all symbols taken
+        }
+        LOG_D(NR_MAC, "ulprbbl[%3i] = 0x%x\n", rb, ulprbbl[rb]);
+      }
+
+      break;
     }
-    LOG_D(NR_MAC, "ulprbbl[%3i] = 0x%x\n", rb, ulprbbl[rb]);
+
+    case NR_SRS_ResourceSet__usage_codebook: {
+      nfapi_nr_srs_normalized_channel_iq_matrix_t nr_srs_normalized_channel_iq_matrix;
+      unpack_nr_srs_normalized_channel_iq_matrix(srs_ind->report_tlv->value,
+                                                 srs_ind->report_tlv->length,
+                                                 &nr_srs_normalized_channel_iq_matrix,
+                                                 sizeof(nfapi_nr_srs_normalized_channel_iq_matrix_t));
+
+#ifdef SRS_IND_DEBUG
+      LOG_I(NR_MAC, "nr_srs_normalized_channel_iq_matrix.normalized_iq_representation = %i\n", nr_srs_normalized_channel_iq_matrix.normalized_iq_representation);
+      LOG_I(NR_MAC, "nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements = %i\n", nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements);
+      LOG_I(NR_MAC, "nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports = %i\n", nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports);
+      LOG_I(NR_MAC, "nr_srs_normalized_channel_iq_matrix.prg_size = %i\n", nr_srs_normalized_channel_iq_matrix.prg_size);
+      LOG_I(NR_MAC, "nr_srs_normalized_channel_iq_matrix.num_prgs = %i\n", nr_srs_normalized_channel_iq_matrix.num_prgs);
+      c16_t *channel_matrix16 = (c16_t *)nr_srs_normalized_channel_iq_matrix.channel_matrix;
+      c8_t *channel_matrix8 = (c8_t *)nr_srs_normalized_channel_iq_matrix.channel_matrix;
+      for (int uI = 0; uI < nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports; uI++) {
+        for (int gI = 0; gI < nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements; gI++) {
+          for (int pI = 0; pI < nr_srs_normalized_channel_iq_matrix.num_prgs; pI++) {
+            uint16_t index = uI * nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements * nr_srs_normalized_channel_iq_matrix.num_prgs + gI * nr_srs_normalized_channel_iq_matrix.num_prgs + pI;
+            LOG_I(NR_MAC,
+                  "(uI %i, gI %i, pI %i) channel_matrix --> real %i, imag %i\n",
+                  uI,
+                  gI,
+                  pI,
+                  nr_srs_normalized_channel_iq_matrix.normalized_iq_representation == 0 ? channel_matrix8[index].r : channel_matrix16[index].r,
+                  nr_srs_normalized_channel_iq_matrix.normalized_iq_representation == 0 ? channel_matrix8[index].i : channel_matrix16[index].i);
+          }
+        }
+      }
+#endif
+
+      // TODO: This should be improved
+      NR_UE_sched_ctrl_t *sched_ctrl = &UE->UE_sched_ctrl;
+      NR_UE_UL_BWP_t *current_BWP = &UE->current_UL_BWP;
+      sched_ctrl->srs_feedback.sri = NR_SRS_SRI_0;
+      sched_ctrl->srs_feedback.ul_ri = 0; // TODO: Compute this
+      sched_ctrl->srs_feedback.tpmi = nr_srs_tpmi_estimation(current_BWP->pusch_Config,
+                                                             current_BWP->transform_precoding,
+                                                             nr_srs_normalized_channel_iq_matrix.channel_matrix,
+                                                             nr_srs_normalized_channel_iq_matrix.normalized_iq_representation,
+                                                             nr_srs_normalized_channel_iq_matrix.num_gnb_antenna_elements,
+                                                             nr_srs_normalized_channel_iq_matrix.num_ue_srs_ports,
+                                                             nr_srs_normalized_channel_iq_matrix.prg_size,
+                                                             nr_srs_normalized_channel_iq_matrix.num_prgs,
+                                                             sched_ctrl->srs_feedback.ul_ri);
+      sprintf(stats->srs_stats, "UL-RI %d, TPMI %d", sched_ctrl->srs_feedback.ul_ri + 1, sched_ctrl->srs_feedback.tpmi);
+      break;
+    }
+
+    case NR_SRS_ResourceSet__usage_nonCodebook:
+    case NR_SRS_ResourceSet__usage_antennaSwitching:
+      LOG_W(NR_MAC, "MAC procedures for this SRS usage are not implemented yet!\n");
+      break;
+
+    default:
+      AssertFatal(1 == 0, "Invalid SRS usage\n");
   }
 }
 
@@ -910,8 +1307,7 @@ void update_ul_ue_R_Qm(int mcs, int mcs_table, const NR_PUSCH_Config_t *pusch_Co
   }
 }
 
-
-void nr_ue_max_mcs_min_rb(int mu, int ph_limit, NR_pusch_semi_static_t *ps, NR_UE_UL_BWP_t *ul_bwp, uint16_t minRb, uint32_t tbs, uint16_t *Rb, uint8_t *mcs)
+void nr_ue_max_mcs_min_rb(int mu, int ph_limit, NR_sched_pusch_t *sched_pusch, NR_UE_UL_BWP_t *ul_bwp, uint16_t minRb, uint32_t tbs, uint16_t *Rb, uint8_t *mcs)
 {
   AssertFatal(*Rb >= minRb, "illegal Rb %d < minRb %d\n", *Rb, minRb);
   AssertFatal(*mcs >= 0 && *mcs <= 28, "illegal MCS %d\n", *mcs);
@@ -922,35 +1318,35 @@ void nr_ue_max_mcs_min_rb(int mu, int ph_limit, NR_pusch_semi_static_t *ps, NR_U
   update_ul_ue_R_Qm(*mcs, ul_bwp->mcs_table, ul_bwp->pusch_Config, &R, &Qm);
 
   long *deltaMCS = ul_bwp->pusch_Config ? ul_bwp->pusch_Config->pusch_PowerControl->deltaMCS : NULL;
-  int tx_power = compute_bw_factor(mu, *Rb) +
-                 compute_delta_tf(tbs_bits,
-                                  *Rb,
-                                  ps->nrOfLayers,
-                                  ps->nrOfSymbols,
-                                  ps->N_PRB_DMRS*ps->num_dmrs_symb,
-                                  deltaMCS);
+  int tx_power = compute_ph_factor(mu,
+                                   tbs_bits,
+                                   *Rb,
+                                   sched_pusch->nrOfLayers,
+                                   sched_pusch->tda_info.nrOfSymbols,
+                                   sched_pusch->dmrs_info.N_PRB_DMRS*sched_pusch->dmrs_info.num_dmrs_symb,
+                                   deltaMCS);
 
   while (ph_limit < tx_power && *Rb >= minRb) {
     (*Rb)--;
-    tx_power = compute_bw_factor(mu, *Rb) +
-               compute_delta_tf(tbs_bits,
-                                *Rb,
-                                ps->nrOfLayers,
-                                ps->nrOfSymbols,
-                                ps->N_PRB_DMRS*ps->num_dmrs_symb,
-                                deltaMCS);
+    tx_power = compute_ph_factor(mu,
+                                 tbs_bits,
+                                 *Rb,
+                                 sched_pusch->nrOfLayers,
+                                 sched_pusch->tda_info.nrOfSymbols,
+                                 sched_pusch->dmrs_info.N_PRB_DMRS*sched_pusch->dmrs_info.num_dmrs_symb,
+                                 deltaMCS);
   }
 
   while (ph_limit < tx_power && *mcs > 6) {
     (*mcs)--;
     update_ul_ue_R_Qm(*mcs, ul_bwp->mcs_table, ul_bwp->pusch_Config, &R, &Qm);
-    tx_power = compute_bw_factor(mu, *Rb) +
-               compute_delta_tf(tbs_bits,
-                                *Rb,
-                                ps->nrOfLayers,
-                                ps->nrOfSymbols,
-                                ps->N_PRB_DMRS*ps->num_dmrs_symb,
-                                deltaMCS);
+    tx_power = compute_ph_factor(mu,
+                                 tbs_bits,
+                                 *Rb,
+                                 sched_pusch->nrOfLayers,
+                                 sched_pusch->tda_info.nrOfSymbols,
+                                 sched_pusch->dmrs_info.N_PRB_DMRS*sched_pusch->dmrs_info.num_dmrs_symb,
+                                 deltaMCS);
   }
 
   if (ph_limit < tx_power)
@@ -978,22 +1374,11 @@ static bool allocate_ul_retransmission(gNB_MAC_INST *nrmac,
   const uint8_t nrOfLayers = 1;
   LOG_D(NR_MAC,"retInfo->time_domain_allocation = %d, tda = %d\n", retInfo->time_domain_allocation, tda);
   LOG_D(NR_MAC,"tbs %d\n",retInfo->tb_size);
-  if (tda == retInfo->time_domain_allocation) {
-    /* check whether we need to switch the TDA allocation since tha last
-     * (re-)transmission */
-    NR_pusch_semi_static_t *ps = &sched_ctrl->pusch_semi_static;
-
-    if (ps->time_domain_allocation != tda
-        || ps->nrOfLayers != nrOfLayers) {
-      nr_set_pusch_semi_static(&UE->current_UL_BWP,
-                               scc,
-                               tda,
-                               nrOfLayers,
-                               ps);
-    }
+  if (tda == retInfo->time_domain_allocation &&
+      nrOfLayers == retInfo->nrOfLayers) {
 
     /* Check the resource is enough for retransmission */
-    const uint16_t slbitmap = SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+    const uint16_t slbitmap = SL_to_bitmap(retInfo->tda_info.startSymbolIndex, retInfo->tda_info.nrOfSymbols);
     while (rbStart < bwpSize && (rballoc_mask[rbStart] & slbitmap) != slbitmap)
       rbStart++;
     if (rbStart + retInfo->rbSize > bwpSize) {
@@ -1002,15 +1387,15 @@ static bool allocate_ul_retransmission(gNB_MAC_INST *nrmac,
     }
     LOG_D(NR_MAC, "%s(): retransmission keeping TDA %d and TBS %d\n", __func__, tda, retInfo->tb_size);
   } else {
-    NR_pusch_semi_static_t temp_ps;
-    nr_set_pusch_semi_static(&UE->current_UL_BWP,
-                             scc,
-                             tda,
-                             nrOfLayers,
-                             &temp_ps);
+
+    NR_tda_info_t tda_info = nr_get_pusch_tda_info(&UE->current_UL_BWP, tda);
+    NR_pusch_dmrs_t dmrs_info = get_ul_dmrs_params(scc,
+                                                   &UE->current_UL_BWP,
+                                                   &tda_info,
+                                                   nrOfLayers);
     /* the retransmission will use a different time domain allocation, check
      * that we have enough resources */
-    const uint16_t slbitmap = SL_to_bitmap(temp_ps.startSymbolIndex, temp_ps.nrOfSymbols);
+    const uint16_t slbitmap = SL_to_bitmap(tda_info.startSymbolIndex, tda_info.nrOfSymbols);
     while (rbStart < bwpSize && (rballoc_mask[rbStart] & slbitmap) != slbitmap)
       rbStart++;
     int rbSize = 0;
@@ -1021,8 +1406,8 @@ static bool allocate_ul_retransmission(gNB_MAC_INST *nrmac,
     bool success = nr_find_nb_rb(retInfo->Qm,
                                  retInfo->R,
                                  1, // layers
-                                 temp_ps.nrOfSymbols,
-                                 temp_ps.N_PRB_DMRS * temp_ps.num_dmrs_symb,
+                                 tda_info.nrOfSymbols,
+                                 dmrs_info.N_PRB_DMRS * dmrs_info.num_dmrs_symb,
                                  retInfo->tb_size,
                                  1, /* minimum of 1RB: need to find exact TBS, don't preclude any number */
                                  rbSize,
@@ -1038,7 +1423,8 @@ static bool allocate_ul_retransmission(gNB_MAC_INST *nrmac,
     retInfo->tb_size = new_tbs;
     retInfo->rbSize = new_rbSize;
     retInfo->time_domain_allocation = tda;
-    sched_ctrl->pusch_semi_static = temp_ps;
+    retInfo->dmrs_info = dmrs_info;
+    retInfo->tda_info = tda_info;
   }
 
   /* Find a free CCE */
@@ -1096,7 +1482,7 @@ static bool allocate_ul_retransmission(gNB_MAC_INST *nrmac,
   /* Mark the corresponding RBs as used */
   n_rb_sched -= sched_pusch->rbSize;
   for (int rb = 0; rb < sched_ctrl->sched_pusch.rbSize; rb++)
-    rballoc_mask[rb + sched_ctrl->sched_pusch.rbStart] ^= SL_to_bitmap(sched_ctrl->pusch_semi_static.startSymbolIndex, sched_ctrl->pusch_semi_static.nrOfSymbols);
+    rballoc_mask[rb + sched_ctrl->sched_pusch.rbStart] ^= SL_to_bitmap(sched_pusch->tda_info.startSymbolIndex, sched_pusch->tda_info.nrOfSymbols);
   return true;
 }
 
@@ -1143,7 +1529,6 @@ void pf_ul(module_id_t module_id,
 
     const uint16_t bwpSize = current_BWP->BWPSize;
     NR_sched_pusch_t *sched_pusch = &sched_ctrl->sched_pusch;
-    NR_pusch_semi_static_t *ps = &sched_ctrl->pusch_semi_static;
     const NR_mac_dir_stats_t *stats = &UE->mac_stats.ul;
 
     /* Calculate throughput */
@@ -1235,24 +1620,17 @@ void pf_ul(module_id_t module_id,
       if (remainUEs == 0)
         return;
 
-      /* Save PUSCH field */
-      /* we want to avoid a lengthy deduction of DMRS and other parameters in
-       * every TTI if we can save it, so check whether TDA, or
-       * num_dmrs_cdm_grps_no_data has changed and only then recompute */
-      const uint8_t nrOfLayers = 1;
-      const int tda = get_ul_tda(nrmac, scc, sched_pusch->slot);
-      if (ps->time_domain_allocation != tda
-          || ps->nrOfLayers != nrOfLayers) {
-        nr_set_pusch_semi_static(current_BWP,
-                                 scc,
-                                 tda,
-                                 nrOfLayers,
-                                 ps);
-      }
+      sched_pusch->nrOfLayers = 1;
+      sched_pusch->time_domain_allocation = get_ul_tda(nrmac, scc, sched_pusch->slot);
+      sched_pusch->tda_info = nr_get_pusch_tda_info(current_BWP, sched_pusch->time_domain_allocation);
+      sched_pusch->dmrs_info = get_ul_dmrs_params(scc,
+                                                  current_BWP,
+                                                  &sched_pusch->tda_info,
+                                                  sched_pusch->nrOfLayers);
 
       LOG_D(NR_MAC,"Looking for min_rb %d RBs, starting at %d num_dmrs_cdm_grps_no_data %d\n",
-            min_rb, rbStart, ps->num_dmrs_cdm_grps_no_data);
-      const uint16_t slbitmap = SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+            min_rb, rbStart, sched_pusch->dmrs_info.num_dmrs_cdm_grps_no_data);
+      const uint16_t slbitmap = SL_to_bitmap(sched_pusch->tda_info.startSymbolIndex, sched_pusch->tda_info.nrOfSymbols);
       while (rbStart < bwpSize && (rballoc_mask[rbStart] & slbitmap) != slbitmap)
         rbStart++;
       if (rbStart + min_rb >= bwpSize) {
@@ -1276,11 +1654,11 @@ void pf_ul(module_id_t module_id,
       sched_pusch->tb_size = nr_compute_tbs(sched_pusch->Qm,
                                             sched_pusch->R,
                                             sched_pusch->rbSize,
-                                            ps->nrOfSymbols,
-                                            ps->N_PRB_DMRS * ps->num_dmrs_symb,
+                                            sched_pusch->tda_info.nrOfSymbols,
+                                            sched_pusch->dmrs_info.N_PRB_DMRS * sched_pusch->dmrs_info.num_dmrs_symb,
                                             0, // nb_rb_oh
                                             0,
-                                            ps->nrOfLayers)
+                                            sched_pusch->nrOfLayers)
                              >> 3;
 
       /* Mark the corresponding RBs as used */
@@ -1340,26 +1718,19 @@ void pf_ul(module_id_t module_id,
 
     const uint16_t bwpSize = current_BWP->BWPSize;
     NR_sched_pusch_t *sched_pusch = &sched_ctrl->sched_pusch;
-    NR_pusch_semi_static_t *ps = &sched_ctrl->pusch_semi_static;
-
-    /* Save PUSCH field */
-    /* we want to avoid a lengthy deduction of DMRS and other parameters in
-     * every TTI if we can save it, so check whether TDA, or
-     * num_dmrs_cdm_grps_no_data has changed and only then recompute */
-    const uint8_t nrOfLayers = 1;
-    const int tda = get_ul_tda(nrmac, scc, sched_pusch->slot);
-    if (ps->time_domain_allocation != tda
-        || ps->nrOfLayers != nrOfLayers) {
-      nr_set_pusch_semi_static(current_BWP,
-                               scc,
-                               tda,
-                               nrOfLayers,
-                               ps);
-    }
+
+    sched_pusch->nrOfLayers = 1;
+    sched_pusch->time_domain_allocation = get_ul_tda(nrmac, scc, sched_pusch->slot);
+    sched_pusch->tda_info = nr_get_pusch_tda_info(current_BWP, sched_pusch->time_domain_allocation);
+    sched_pusch->dmrs_info = get_ul_dmrs_params(scc,
+                                                current_BWP,
+                                                &sched_pusch->tda_info,
+                                                sched_pusch->nrOfLayers);
+
     update_ul_ue_R_Qm(sched_pusch->mcs, current_BWP->mcs_table, current_BWP->pusch_Config, &sched_pusch->R, &sched_pusch->Qm);
 
     int rbStart = 0;
-    const uint16_t slbitmap = SL_to_bitmap(ps->startSymbolIndex, ps->nrOfSymbols);
+    const uint16_t slbitmap = SL_to_bitmap(sched_pusch->tda_info.startSymbolIndex, sched_pusch->tda_info.nrOfSymbols);
     while (rbStart < bwpSize && (rballoc_mask[rbStart] & slbitmap) != slbitmap)
       rbStart++;
     sched_pusch->rbStart = rbStart;
@@ -1381,7 +1752,7 @@ void pf_ul(module_id_t module_id,
     sched_pusch->mu  = scc->uplinkConfigCommon->initialUplinkBWP->genericParameters.subcarrierSpacing;
     if(sched_ctrl->pcmax!=0 ||
        sched_ctrl->ph!=0) // verify if the PHR related parameter have been initialized
-      nr_ue_max_mcs_min_rb(sched_pusch->mu, sched_ctrl->ph, ps, current_BWP, min_rbSize, B, &max_rbSize, &sched_pusch->mcs);
+      nr_ue_max_mcs_min_rb(current_BWP->scs, sched_ctrl->ph, sched_pusch, current_BWP, min_rbSize, B, &max_rbSize, &sched_pusch->mcs);
 
     if (sched_pusch->mcs < sched_ctrl->ul_bler_stats.mcs)
       sched_ctrl->ul_bler_stats.mcs = sched_pusch->mcs; /* force estimated MCS down */
@@ -1392,8 +1763,8 @@ void pf_ul(module_id_t module_id,
     nr_find_nb_rb(sched_pusch->Qm,
                   sched_pusch->R,
                   1, // layers
-                  ps->nrOfSymbols,
-                  ps->N_PRB_DMRS * ps->num_dmrs_symb,
+                  sched_pusch->tda_info.nrOfSymbols,
+                  sched_pusch->dmrs_info.N_PRB_DMRS * sched_pusch->dmrs_info.num_dmrs_symb,
                   B,
                   min_rbSize,
                   max_rbSize,
@@ -1403,7 +1774,8 @@ void pf_ul(module_id_t module_id,
     sched_pusch->rbSize = rbSize;
     sched_pusch->tb_size = TBS;
     LOG_D(NR_MAC,"rbSize %d (max_rbSize %d), TBS %d, est buf %d, sched_ul %d, B %d, CCE %d, num_dmrs_symb %d, N_PRB_DMRS %d\n",
-          rbSize, max_rbSize,sched_pusch->tb_size, sched_ctrl->estimated_ul_buffer, sched_ctrl->sched_ul_bytes, B,sched_ctrl->cce_index,ps->num_dmrs_symb,ps->N_PRB_DMRS);
+          rbSize, max_rbSize,sched_pusch->tb_size, sched_ctrl->estimated_ul_buffer, sched_ctrl->sched_ul_bytes, B,
+          sched_ctrl->cce_index,sched_pusch->dmrs_info.num_dmrs_symb,sched_pusch->dmrs_info.N_PRB_DMRS);
 
     /* Mark the corresponding RBs as used */
 
@@ -1628,11 +2000,6 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
 
     int rnti_types[2] = { NR_RNTI_C, 0 };
 
-    /* pre-computed PUSCH values that only change if time domain allocation,
-     * DCI format, or DMRS parameters change. Updated in the preprocessor
-     * through nr_set_pusch_semi_static() */
-    NR_pusch_semi_static_t *ps = &sched_ctrl->pusch_semi_static;
-
     /* Statistics */
     AssertFatal(cur_harq->round < nr_mac->ul_bler.harq_round_max, "Indexing ulsch_rounds[%d] is out of bounds\n", cur_harq->round);
     UE->mac_stats.ul.rounds[cur_harq->round]++;
@@ -1643,7 +2010,7 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
       cur_harq->sched_pusch = *sched_pusch;
       /* save which time allocation has been used, to be used on
        * retransmissions */
-      cur_harq->sched_pusch.time_domain_allocation = ps->time_domain_allocation;
+      cur_harq->sched_pusch.time_domain_allocation = sched_pusch->time_domain_allocation;
       sched_ctrl->sched_ul_bytes += sched_pusch->tb_size;
       UE->mac_stats.ul.total_rbs += sched_pusch->rbSize;
 
@@ -1675,12 +2042,12 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
           sched_ctrl->aggregation_level,
           sched_pusch->rbStart,
           sched_pusch->rbSize,
-          ps->startSymbolIndex,
-          ps->nrOfSymbols,
-          ps->ul_dmrs_symb_pos,
+          sched_pusch->tda_info.startSymbolIndex,
+          sched_pusch->tda_info.nrOfSymbols,
+          sched_pusch->dmrs_info.ul_dmrs_symb_pos,
           sched_pusch->mcs,
-          ps->nrOfLayers,
-          ps->num_dmrs_cdm_grps_no_data,
+          sched_pusch->nrOfLayers,
+          sched_pusch->dmrs_info.num_dmrs_cdm_grps_no_data,
           sched_pusch->tb_size,
           harq_id,
           cur_harq->round,
@@ -1693,17 +2060,18 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
 
     /* PUSCH in a later slot, but corresponding DCI now! */
     nfapi_nr_ul_tti_request_t *future_ul_tti_req = &RC.nrmac[module_id]->UL_tti_req_ahead[0][sched_pusch->slot];
-    AssertFatal(future_ul_tti_req->SFN == sched_pusch->frame
-                && future_ul_tti_req->Slot == sched_pusch->slot,
-                "%d.%d future UL_tti_req's frame.slot %d.%d does not match PUSCH %d.%d\n",
-                frame, slot,
-                future_ul_tti_req->SFN,
-                future_ul_tti_req->Slot,
-                sched_pusch->frame,
-                sched_pusch->slot);
+    if (future_ul_tti_req->SFN != sched_pusch->frame || future_ul_tti_req->Slot != sched_pusch->slot)
+      LOG_W(MAC,
+            "%d.%d future UL_tti_req's frame.slot %d.%d does not match PUSCH %d.%d\n",
+            frame, slot,
+            future_ul_tti_req->SFN,
+            future_ul_tti_req->Slot,
+            sched_pusch->frame,
+            sched_pusch->slot);
     AssertFatal(future_ul_tti_req->n_pdus <
                 sizeof(future_ul_tti_req->pdus_list) / sizeof(future_ul_tti_req->pdus_list[0]),
                 "Invalid future_ul_tti_req->n_pdus %d\n", future_ul_tti_req->n_pdus);
+
     future_ul_tti_req->pdus_list[future_ul_tti_req->n_pdus].pdu_type = NFAPI_NR_UL_CONFIG_PUSCH_PDU_TYPE;
     future_ul_tti_req->pdus_list[future_ul_tti_req->n_pdus].pdu_size = sizeof(nfapi_nr_pusch_pdu_t);
     nfapi_nr_pusch_pdu_t *pusch_pdu = &future_ul_tti_req->pdus_list[future_ul_tti_req->n_pdus].pusch_pdu;
@@ -1734,18 +2102,19 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
       pusch_pdu->data_scrambling_id = *current_BWP->pusch_Config->dataScramblingIdentityPUSCH;
     else
       pusch_pdu->data_scrambling_id = *scc->physCellId;
-    pusch_pdu->nrOfLayers = ps->nrOfLayers;
-    pusch_pdu->num_dmrs_cdm_grps_no_data = ps->num_dmrs_cdm_grps_no_data;
+    pusch_pdu->nrOfLayers = sched_pusch->nrOfLayers;
+    pusch_pdu->num_dmrs_cdm_grps_no_data = sched_pusch->dmrs_info.num_dmrs_cdm_grps_no_data;
 
     /* FAPI: DMRS */
-    pusch_pdu->ul_dmrs_symb_pos = ps->ul_dmrs_symb_pos;
-    pusch_pdu->dmrs_config_type = ps->dmrs_config_type;
+    pusch_pdu->ul_dmrs_symb_pos = sched_pusch->dmrs_info.ul_dmrs_symb_pos;
+    pusch_pdu->dmrs_config_type = sched_pusch->dmrs_info.dmrs_config_type;
+    const NR_DMRS_UplinkConfig_t *NR_DMRS_UplinkConfig = sched_pusch->dmrs_info.NR_DMRS_UplinkConfig;
     if (pusch_pdu->transform_precoding) { // transform precoding disabled
       long *scramblingid=NULL;
-      if (ps->NR_DMRS_UplinkConfig && pusch_pdu->scid == 0)
-        scramblingid = ps->NR_DMRS_UplinkConfig->transformPrecodingDisabled->scramblingID0;
-      else if (ps->NR_DMRS_UplinkConfig)
-        scramblingid = ps->NR_DMRS_UplinkConfig->transformPrecodingDisabled->scramblingID1;
+      if (NR_DMRS_UplinkConfig && pusch_pdu->scid == 0)
+        scramblingid = NR_DMRS_UplinkConfig->transformPrecodingDisabled->scramblingID0;
+      else if (NR_DMRS_UplinkConfig)
+        scramblingid = NR_DMRS_UplinkConfig->transformPrecodingDisabled->scramblingID1;
       if (scramblingid == NULL)
         pusch_pdu->ul_dmrs_scrambling_id = *scc->physCellId;
       else
@@ -1753,14 +2122,14 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
     }
     else {
       pusch_pdu->ul_dmrs_scrambling_id = *scc->physCellId;
-      if (ps->NR_DMRS_UplinkConfig && ps->NR_DMRS_UplinkConfig->transformPrecodingEnabled->nPUSCH_Identity != NULL)
-        pusch_pdu->pusch_identity = *ps->NR_DMRS_UplinkConfig->transformPrecodingEnabled->nPUSCH_Identity;
-      else if (ps->NR_DMRS_UplinkConfig)
+      if (NR_DMRS_UplinkConfig && NR_DMRS_UplinkConfig->transformPrecodingEnabled->nPUSCH_Identity != NULL)
+        pusch_pdu->pusch_identity = *NR_DMRS_UplinkConfig->transformPrecodingEnabled->nPUSCH_Identity;
+      else if (NR_DMRS_UplinkConfig)
         pusch_pdu->pusch_identity = *scc->physCellId;
     }
     pusch_pdu->scid = 0;      // DMRS sequence initialization [TS38.211, sec 6.4.1.1.1]
-    pusch_pdu->num_dmrs_cdm_grps_no_data = ps->num_dmrs_cdm_grps_no_data;
-    pusch_pdu->dmrs_ports = ((1<<ps->nrOfLayers) - 1);
+    pusch_pdu->num_dmrs_cdm_grps_no_data = sched_pusch->dmrs_info.num_dmrs_cdm_grps_no_data;
+    pusch_pdu->dmrs_ports = ((1<<sched_pusch->nrOfLayers) - 1);
 
     /* FAPI: Pusch Allocation in frequency domain */
     pusch_pdu->resource_alloc = 1; //type 1
@@ -1773,8 +2142,8 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
       pusch_pdu->frequency_hopping = 1;
 
     /* FAPI: Resource Allocation in time domain */
-    pusch_pdu->start_symbol_index = ps->startSymbolIndex;
-    pusch_pdu->nr_of_symbols = ps->nrOfSymbols;
+    pusch_pdu->start_symbol_index = sched_pusch->tda_info.startSymbolIndex;
+    pusch_pdu->nr_of_symbols = sched_pusch->tda_info.nrOfSymbols;
 
     /* PUSCH PDU */
     AssertFatal(cur_harq->round < nr_mac->ul_bler.harq_round_max, "Indexing nr_rv_round_map[%d] is out of bounds\n", cur_harq->round%4);
@@ -1810,8 +2179,8 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
       pusch_pdu->dfts_ofdm.low_papr_group_number = pusch_pdu->pusch_identity % 30;
 
       // V as specified in section 6.4.1.1.1.2 in 38.211 V = 0 if sequence hopping and group hopping are disabled
-      if ((ps->NR_DMRS_UplinkConfig==NULL) || ((ps->NR_DMRS_UplinkConfig->transformPrecodingEnabled->sequenceGroupHopping == NULL) &&
-					       (ps->NR_DMRS_UplinkConfig->transformPrecodingEnabled->sequenceHopping == NULL)))
+      if ((NR_DMRS_UplinkConfig==NULL) || ((NR_DMRS_UplinkConfig->transformPrecodingEnabled->sequenceGroupHopping == NULL) &&
+					       (NR_DMRS_UplinkConfig->transformPrecodingEnabled->sequenceHopping == NULL)))
         pusch_pdu->dfts_ofdm.low_papr_sequence_number = 0;
       else
         AssertFatal(1==0,"SequenceGroupHopping or sequenceHopping are NOT Supported\n");
@@ -1822,10 +2191,10 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
     /*-----------------------------------------------------------------------------*/
 
     /* PUSCH PTRS */
-    if (ps->NR_DMRS_UplinkConfig && ps->NR_DMRS_UplinkConfig->phaseTrackingRS != NULL) {
+    if (NR_DMRS_UplinkConfig && NR_DMRS_UplinkConfig->phaseTrackingRS != NULL) {
       bool valid_ptrs_setup = false;
       pusch_pdu->pusch_ptrs.ptrs_ports_list   = (nfapi_nr_ptrs_ports_t *) malloc(2*sizeof(nfapi_nr_ptrs_ports_t));
-      valid_ptrs_setup = set_ul_ptrs_values(ps->NR_DMRS_UplinkConfig->phaseTrackingRS->choice.setup,
+      valid_ptrs_setup = set_ul_ptrs_values(NR_DMRS_UplinkConfig->phaseTrackingRS->choice.setup,
                                             pusch_pdu->rb_size, pusch_pdu->mcs_index, pusch_pdu->mcs_table,
                                             &pusch_pdu->pusch_ptrs.ptrs_freq_density,&pusch_pdu->pusch_ptrs.ptrs_time_density,
                                             &pusch_pdu->pusch_ptrs.ptrs_ports_list->ptrs_re_offset,&pusch_pdu->pusch_ptrs.num_ptrs_ports,
@@ -1881,9 +2250,11 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
                  scc,
                  pusch_pdu,
                  &uldci_payload,
-                 ps->time_domain_allocation,
+                 &sched_ctrl->srs_feedback,
+                 sched_pusch->time_domain_allocation,
                  UE->UE_sched_ctrl.tpc0,
                  current_BWP);
+
     fill_dci_pdu_rel15(scc,
                        cg,
                        &UE->current_DL_BWP,
diff --git a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
index 6a3f778af9a48eaa4103701ec7cea4b06a105e74..4497e89030b54e9c9e3b402caa5967e0c3d7874e 100644
--- a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
+++ b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
@@ -87,9 +87,8 @@ uint32_t schedule_control_sib1(module_id_t module_id,
                                int CC_id,
                                NR_Type0_PDCCH_CSS_config_t *type0_PDCCH_CSS_config,
                                int time_domain_allocation,
-                               int startSymbolIndex,
-                               int nrOfSymbols,
-                               uint16_t dlDmrsSymbPos,
+                               NR_pdsch_dmrs_t *dmrs_parms,
+                               NR_tda_info_t *tda_info,
                                uint8_t candidate_idx,
                                uint16_t num_total_bytes);
 
@@ -190,6 +189,7 @@ void config_uldci(const NR_SIB1_t *sib1,
                   const NR_ServingCellConfigCommon_t *scc,
                   const nfapi_nr_pusch_pdu_t *pusch_pdu,
                   dci_pdu_rel15_t *dci_pdu_rel15,
+                  nr_srs_feedback_t *srs_feedback,
                   int time_domain_assignment,
                   uint8_t tpc,
                   NR_UE_UL_BWP_t *ul_bwp);
@@ -325,18 +325,16 @@ long get_K2(NR_PUSCH_TimeDomainResourceAllocationList_t *tdaList,
             int time_domain_assignment,
             int mu);
 
-void nr_set_pdsch_semi_static(const NR_UE_DL_BWP_t *dl_bwp,
-                              const NR_ServingCellConfigCommon_t *scc,
-                              int tda,
-                              uint8_t layers,
-                              NR_UE_sched_ctrl_t *sched_ctrl,
-                              NR_pdsch_semi_static_t *ps);
+NR_tda_info_t nr_get_pdsch_tda_info(const NR_UE_DL_BWP_t *dl_bwp,
+                                    const int tda);
 
-void nr_set_pusch_semi_static(const NR_UE_UL_BWP_t *ul_bwp,
-                              const NR_ServingCellConfigCommon_t *scc,
-                              int tda,
-                              uint8_t nrOfLayers,
-                              NR_pusch_semi_static_t *ps);
+NR_tda_info_t nr_get_pusch_tda_info(const NR_UE_UL_BWP_t *ul_bwp,
+                                    const int tda);
+
+NR_pusch_dmrs_t get_ul_dmrs_params(const NR_ServingCellConfigCommon_t *scc,
+                                   const NR_UE_UL_BWP_t *ul_bwp,
+                                   const NR_tda_info_t *tda_info,
+                                   const int Layers);
 
 uint8_t nr_get_tpc(int target, uint8_t cqi, int incr);
 
@@ -444,12 +442,7 @@ void handle_nr_ul_harq(const int CC_idP,
 void handle_nr_srs_measurements(const module_id_t module_id,
                                 const frame_t frame,
                                 const sub_frame_t slot,
-                                const rnti_t rnti,
-                                const uint16_t timing_advance,
-                                const uint8_t num_symbols,
-                                const uint8_t wide_band_snr,
-                                const uint8_t num_reported_symbols,
-                                nfapi_nr_srs_indication_reported_symbol_t* reported_symbol_list);
+                                const nfapi_nr_srs_indication_pdu_t *srs_ind);
 
 int16_t ssb_index_from_prach(module_id_t module_idP,
                              frame_t frameP,
@@ -460,16 +453,18 @@ int16_t ssb_index_from_prach(module_id_t module_idP,
 
 void find_SSB_and_RO_available(module_id_t module_idP);
 
-void set_dl_dmrs_ports(NR_pdsch_semi_static_t *ps);
+NR_pdsch_dmrs_t get_dl_dmrs_params(const NR_ServingCellConfigCommon_t *scc,
+                                   const NR_UE_DL_BWP_t *BWP,
+                                   const NR_tda_info_t *tda_info,
+                                   const int Layers);
 
-uint16_t set_pm_index(NR_UE_sched_ctrl_t *sched_ctrl,
+uint16_t get_pm_index(const NR_UE_info_t *UE,
                       int layers,
-                      int N1, int N2,
-                      int xp_pdsch_antenna_ports,
-                      int codebook_mode);
+                      int xp_pdsch_antenna_ports);
 
 uint8_t get_mcs_from_cqi(int mcs_table, int cqi_table, int cqi_idx);
-uint8_t set_dl_nrOfLayers(NR_UE_sched_ctrl_t *sched_ctrl);
+
+uint8_t get_dl_nrOfLayers(const NR_UE_sched_ctrl_t *sched_ctrl, const nr_dci_format_t dci_format);
 
 const int get_dl_tda(const gNB_MAC_INST *nrmac, const NR_ServingCellConfigCommon_t *scc, int slot);
 const int get_ul_tda(const gNB_MAC_INST *nrmac, const NR_ServingCellConfigCommon_t *scc, int slot);
diff --git a/openair2/LAYER2/NR_MAC_gNB/main.c b/openair2/LAYER2/NR_MAC_gNB/main.c
index b103182a67e1b86e7cb6ead8bc64b38e4099b8cc..6d592b797fee79cff9abda3698e82b68973f09d1 100644
--- a/openair2/LAYER2/NR_MAC_gNB/main.c
+++ b/openair2/LAYER2/NR_MAC_gNB/main.c
@@ -93,14 +93,14 @@ size_t dump_mac_stats(gNB_MAC_INST *gNB, char *output, size_t strlen, bool reset
 
     output += snprintf(output,
                        end - output,
-                       "UE RNTI %04x (%d) PH %d dB PCMAX %d dBm, average RSRP %d (%d meas), UL-SNR %d dB\n",
+                       "UE RNTI %04x (%d) PH %d dB PCMAX %d dBm, average RSRP %d (%d meas)\n",
                        UE->rnti,
                        num++,
                        sched_ctrl->ph,
                        sched_ctrl->pcmax,
                        avg_rsrp,
-                       stats->num_rsrp_meas,
-                       stats->srs_wide_band_snr);
+                       stats->num_rsrp_meas);
+
     output += snprintf(output,
                        end - output,
                        "UE %04x: CQI %d, RI %d, PMI (%d,%d)\n",
@@ -110,6 +110,10 @@ size_t dump_mac_stats(gNB_MAC_INST *gNB, char *output, size_t strlen, bool reset
                        sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x1,
                        sched_ctrl->CSI_report.cri_ri_li_pmi_cqi_report.pmi_x2);
 
+    if (stats->srs_stats[0] != '\0') {
+      output += snprintf(output, end - output, "UE %04x: %s\n", UE->rnti, stats->srs_stats);
+    }
+
     output += snprintf(output,
                        end - output,
                        "UE %04x: dlsch_rounds ", UE->rnti);
diff --git a/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h b/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
index 346f1f8516121b5cfad28d8418624743eff2fad9..4d65de8403e5f82949f576a985c98054ae26fae1 100644
--- a/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
+++ b/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
@@ -43,7 +43,7 @@
 #include <string.h>
 
 /* Commmon */
-#include "targets/ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 #include "COMMON/platform_constants.h"
 #include "common/ran_context.h"
 #include "collection/linear_alloc.h"
@@ -171,10 +171,6 @@ typedef struct {
   frame_t Msg3_frame;
   /// Msg3 time domain allocation index
   uint8_t Msg3_tda_id;
-  /// Subframe where Msg4 is to be sent
-  sub_frame_t Msg4_slot;
-  /// Frame where Msg4 is to be sent
-  frame_t Msg4_frame;
   /// harq_pid used for Msg4 transmission
   uint8_t harq_pid;
   /// UE RNTI allocated during RAR
@@ -373,23 +369,20 @@ typedef struct NR_sched_pucch {
   int start_symb;
 } NR_sched_pucch_t;
 
-/* PUSCH semi-static configuration: as long as the TDA and DCI format remain
- * the same over the same uBWP and search space, there is no need to
- * recalculate all S/L, MCS table, or DMRS-related parameters over and over
- * again. Hence, we store them in this struct for easy reference. */
-typedef struct NR_pusch_semi_static_t {
-  int time_domain_allocation;
-  uint8_t nrOfLayers;
-  uint8_t num_dmrs_cdm_grps_no_data;
+typedef struct NR_tda_info {
+  mappingType_t mapping_type;
   int startSymbolIndex;
   int nrOfSymbols;
-  long mapping_type;
-  NR_DMRS_UplinkConfig_t *NR_DMRS_UplinkConfig;
-  uint16_t dmrs_config_type;
-  uint16_t ul_dmrs_symb_pos;
-  uint8_t num_dmrs_symb;
+} NR_tda_info_t;
+
+typedef struct NR_pusch_dmrs {
   uint8_t N_PRB_DMRS;
-} NR_pusch_semi_static_t;
+  uint8_t num_dmrs_symb;
+  uint16_t ul_dmrs_symb_pos;
+  uint8_t num_dmrs_cdm_grps_no_data;
+  nfapi_nr_dmrs_type_e dmrs_config_type;
+  NR_DMRS_UplinkConfig_t *NR_DMRS_UplinkConfig;
+} NR_pusch_dmrs_t;
 
 typedef struct NR_sched_pusch {
   int frame;
@@ -411,10 +404,11 @@ typedef struct NR_sched_pusch {
   /// UL HARQ PID to use for this UE, or -1 for "any new"
   int8_t ul_harq_pid;
 
-  /// the Time Domain Allocation used for this transmission. Note that this is
-  /// only important for retransmissions; otherwise, the TDA in
-  /// NR_pusch_semi_static_t has precedence
+  uint8_t nrOfLayers;
+  // time_domain_allocation is the index of a list of tda
   int time_domain_allocation;
+  NR_tda_info_t tda_info;
+  NR_pusch_dmrs_t dmrs_info;
 } NR_sched_pusch_t;
 
 typedef struct NR_sched_srs {
@@ -423,24 +417,14 @@ typedef struct NR_sched_srs {
   bool srs_scheduled;
 } NR_sched_srs_t;
 
-/* PDSCH semi-static configuratio: as long as the TDA/DMRS/mcsTable remains the
- * same, there is no need to recalculate all S/L or DMRS-related parameters
- * over and over again.  Hence, we store them in this struct for easy
- * reference. */
-typedef struct NR_pdsch_semi_static {
-  int time_domain_allocation;
-  uint8_t numDmrsCdmGrpsNoData;
-  uint8_t frontloaded_symb;
-  int mapping_type;
-  int startSymbolIndex;
-  int nrOfSymbols;
-  uint8_t nrOfLayers;
+typedef struct NR_pdsch_dmrs {
   uint8_t dmrs_ports_id;
   uint8_t N_PRB_DMRS;
   uint8_t N_DMRS_SLOT;
   uint16_t dl_dmrs_symb_pos;
+  uint8_t numDmrsCdmGrpsNoData;
   nfapi_nr_dmrs_type_e dmrsConfigType;
-} NR_pdsch_semi_static_t;
+} NR_pdsch_dmrs_t;
 
 typedef struct NR_sched_pdsch {
   /// RB allocation within active BWP
@@ -461,11 +445,13 @@ typedef struct NR_sched_pdsch {
   // pucch format allocation
   uint8_t pucch_allocation;
 
-  /// the Time Domain Allocation used for this transmission. Note that this is
-  /// only important for retransmissions; otherwise, the TDA in
-  /// NR_pdsch_semi_static_t has precedence
-  int time_domain_allocation;
+  uint16_t pm_index;
   uint8_t nrOfLayers;
+
+  NR_pdsch_dmrs_t dmrs_parms;
+  // time_domain_allocation is the index of a list of tda
+  int time_domain_allocation;
+  NR_tda_info_t tda_info;
 } NR_sched_pdsch_t;
 
 typedef struct NR_UE_harq {
@@ -578,8 +564,6 @@ typedef struct {
   /// CSI in second.  This order is important for nr_acknack_scheduling()!
   NR_sched_pucch_t sched_pucch[2];
 
-  /// PUSCH semi-static configuration: is not cleared across TTIs
-  NR_pusch_semi_static_t pusch_semi_static;
   /// Sched PUSCH: scheduling decisions, copied into HARQ and cleared every TTI
   NR_sched_pusch_t sched_pusch;
 
@@ -596,8 +580,6 @@ typedef struct {
   /// PHR info: nominal UE transmit power levels (dBm)
   int pcmax;
 
-  /// PDSCH semi-static configuration: is not cleared across TTIs
-  NR_pdsch_semi_static_t pdsch_semi_static;
   /// Sched PDSCH: scheduling decisions, copied into HARQ and cleared every TTI
   NR_sched_pdsch_t sched_pdsch;
   /// UE-estimated maximum MCS (from CSI-RS)
@@ -632,7 +614,6 @@ typedef struct {
   int ul_failure;
   struct CSI_Report CSI_report;
   bool SR;
-  bool set_pmi;
   /// information about every HARQ process
   NR_UE_harq_t harq_processes[NR_MAX_NB_HARQ_PROCESSES];
   /// HARQ processes that are free
@@ -649,7 +630,7 @@ typedef struct {
   NR_list_t feedback_ul_harq;
   /// UL HARQ processes that await retransmission
   NR_list_t retrans_ul_harq;
-  NR_UE_mac_ce_ctrl_t UE_mac_ce_ctrl;// MAC CE related information
+  NR_UE_mac_ce_ctrl_t UE_mac_ce_ctrl; // MAC CE related information
   /// number of active DL LCs
   uint8_t dl_lc_num;
   /// order in which DLSCH scheduler should allocate LCs
@@ -657,6 +638,9 @@ typedef struct {
 
   /// Timer for RRC processing procedures
   uint32_t rrc_processing_timer;
+
+  /// sri, ul_ri and tpmi based on SRS
+  nr_srs_feedback_t srs_feedback;
 } NR_UE_sched_ctrl_t;
 
 typedef struct {
@@ -683,7 +667,7 @@ typedef struct NR_mac_stats {
   uint32_t pucch0_DTX;
   int cumul_rsrp;
   uint8_t num_rsrp_meas;
-  int8_t srs_wide_band_snr;
+  char srs_stats[50]; // Statistics may differ depending on SRS usage
 } NR_mac_stats_t;
 
 typedef struct NR_bler_options {
@@ -720,7 +704,6 @@ typedef struct {
   NR_gNB_UCI_STATS_t uci_statS;
   float ul_thr_ue;
   float dl_thr_ue;
-  int layers; 
 } NR_UE_info_t;
 
 typedef struct {
diff --git a/openair2/LAYER2/RLC/rlc.c b/openair2/LAYER2/RLC/rlc.c
index ec90817500d55bdf30b901518754e05f7a08f9cb..1e2266adb17fc9d299ced5c86cd9f554e280c8d7 100644
--- a/openair2/LAYER2/RLC/rlc.c
+++ b/openair2/LAYER2/RLC/rlc.c
@@ -590,7 +590,7 @@ void rlc_data_ind     (
     T(T_ENB_RLC_UL, T_INT(ctxt_pP->module_id), T_INT(ctxt_pP->rnti), T_INT(rb_idP), T_INT(sdu_sizeP));
 #endif
     const ngran_node_t type = RC.rrc[ctxt_pP->module_id]->node_type;
-    AssertFatal(type != ngran_eNB_CU && type != ngran_ng_eNB_CU && type != ngran_gNB_CU,
+    AssertFatal(!NODE_IS_CU(type),
                 "Can't be CU, bad node type %d\n", type);
 
     if (NODE_IS_DU(type) && srb_flagP == 1) {
diff --git a/openair2/LAYER2/nr_pdcp/nr_pdcp_entity.h b/openair2/LAYER2/nr_pdcp/nr_pdcp_entity.h
index 19445887f43f385d8fdddf9cb18e7f099a1428d6..dc50ca07eb338f7ab9f945ac16ccb642f9b98a72 100644
--- a/openair2/LAYER2/nr_pdcp/nr_pdcp_entity.h
+++ b/openair2/LAYER2/nr_pdcp/nr_pdcp_entity.h
@@ -25,6 +25,7 @@
 #include <stdint.h>
 
 #include "nr_pdcp_sdu.h"
+#include "openair2/RRC/NR/rrc_gNB_radio_bearers.h"
 
 typedef enum {
   NR_PDCP_DRB_AM,
diff --git a/openair2/LAYER2/nr_pdcp/nr_pdcp_oai_api.c b/openair2/LAYER2/nr_pdcp/nr_pdcp_oai_api.c
index ece0b0274d20562a590c629f48387ca70f309ddf..f1bcfe39074e3fc8edf32d6667ac356ba7aecbbb 100644
--- a/openair2/LAYER2/nr_pdcp/nr_pdcp_oai_api.c
+++ b/openair2/LAYER2/nr_pdcp/nr_pdcp_oai_api.c
@@ -265,7 +265,7 @@ static void do_pdcp_data_ind(
     else
       rb = ue->srb[rb_id - 1];
   } else {
-    if (rb_id < 1 || rb_id > 5)
+    if (rb_id < 1 || rb_id > MAX_DRBS_PER_UE)
       rb = NULL;
     else
       rb = ue->drb[rb_id - 1];
@@ -631,7 +631,7 @@ static void deliver_sdu_drb(void *_ue, nr_pdcp_entity_t *entity,
                   size);
   }
   else{
-    for (i = 0; i < 5; i++) {
+    for (i = 0; i < MAX_DRBS_PER_UE; i++) {
         if (entity == ue->drb[i]) {
           rb_id = i+1;
           goto rb_found;
@@ -666,7 +666,7 @@ static void deliver_pdu_drb(void *_ue, nr_pdcp_entity_t *entity,
   int i;
   mem_block_t *memblock;
 
-  for (i = 0; i < 5; i++) {
+  for (i = 0; i < MAX_DRBS_PER_UE; i++) {
     if (entity == ue->drb[i]) {
       rb_id = i+1;
       goto rb_found;
@@ -711,7 +711,7 @@ rb_found:
     
     memblock = get_free_mem_block(size, __FUNCTION__);
     memcpy(memblock->data, buf, size);
-    LOG_D(PDCP, "%s(): (srb %d) calling rlc_data_req size %d\n", __func__, rb_id, size);
+    LOG_D(PDCP, "%s(): (drb %d) calling rlc_data_req size %d\n", __func__, rb_id, size);
     //for (i = 0; i < size; i++) printf(" %2.2x", (unsigned char)memblock->data[i]);
     //printf("\n");
     enqueue_rlc_data_req(&ctxt, 0, MBMS_FLAG_NO, rb_id, sdu_id, 0, size, memblock);
@@ -999,14 +999,14 @@ static void add_drb_am(int is_gnb, int rnti, struct NR_DRB_ToAddMod *s,
 
     LOG_D(PDCP, "%s:%d:%s: added drb %d to ue rnti %x\n", __FILE__, __LINE__, __FUNCTION__, drb_id, rnti);
 
-    new_nr_sdap_entity(has_sdap,
+    new_nr_sdap_entity(is_gnb,
+                       has_sdap,
                        rnti,
                        pdusession_id,
                        is_sdap_DefaultDRB,
                        drb_id,
                        mappedQFIs2Add,
                        mappedQFIs2AddCount);
-    LOG_D(SDAP, "Added SDAP entity to ue rnti %x with pdusession_id %d\n", rnti, pdusession_id);
   }
   nr_pdcp_manager_unlock(nr_pdcp_ue_manager);
 }
@@ -1347,7 +1347,7 @@ static bool pdcp_data_req_drb(protocol_ctxt_t  *ctxt_pP,
 
   ue = nr_pdcp_manager_get_ue(nr_pdcp_ue_manager, rnti);
 
-  if (rb_id < 1 || rb_id > 5)
+  if (rb_id < 1 || rb_id > MAX_DRBS_PER_UE)
     rb = NULL;
   else
     rb = ue->drb[rb_id - 1];
diff --git a/openair2/LAYER2/nr_pdcp/nr_pdcp_ue_manager.h b/openair2/LAYER2/nr_pdcp/nr_pdcp_ue_manager.h
index 742e0e84be8c67f0e1a109a95e32879f2884ce9d..a3a6e076d708e3712d3ea96c287ed5aafd98d03f 100644
--- a/openair2/LAYER2/nr_pdcp/nr_pdcp_ue_manager.h
+++ b/openair2/LAYER2/nr_pdcp/nr_pdcp_ue_manager.h
@@ -29,7 +29,7 @@ typedef void nr_pdcp_ue_manager_t;
 typedef struct nr_pdcp_ue_t {
   int rnti;
   nr_pdcp_entity_t *srb[3];
-  nr_pdcp_entity_t *drb[5];
+  nr_pdcp_entity_t *drb[MAX_DRBS_PER_UE];
 } nr_pdcp_ue_t;
 
 /***********************************************************************/
diff --git a/openair2/LAYER2/nr_rlc/nr_rlc_entity.h b/openair2/LAYER2/nr_rlc/nr_rlc_entity.h
index cd26094cf7ee68779a475e3a75d021a78bd57328..675f4a3e96177caa81194ba336b70edbcba4aeae 100644
--- a/openair2/LAYER2/nr_rlc/nr_rlc_entity.h
+++ b/openair2/LAYER2/nr_rlc/nr_rlc_entity.h
@@ -23,6 +23,7 @@
 #define _NR_RLC_ENTITY_H_
 
 #include <stdint.h>
+#include "openair2/RRC/NR/rrc_gNB_radio_bearers.h"
 
 #include "common/utils/time_stat.h"
 
diff --git a/openair2/LAYER2/nr_rlc/nr_rlc_oai_api.c b/openair2/LAYER2/nr_rlc/nr_rlc_oai_api.c
index 6ac5c93f2a6c7f6531b5a63fd26ede3b6fae2c29..b809e4caba11186e3acd4b5abfd8c45ad8d257f0 100644
--- a/openair2/LAYER2/nr_rlc/nr_rlc_oai_api.c
+++ b/openair2/LAYER2/nr_rlc/nr_rlc_oai_api.c
@@ -165,7 +165,7 @@ void mac_rlc_data_ind     (
 
   switch (channel_idP) {
   case 1 ... 3: rb = ue->srb[channel_idP - 1]; break;
-  case 4 ... 8: rb = ue->drb[channel_idP - 4]; break;
+  case 4 ... 32: rb = ue->drb[channel_idP - 4]; break;
   default:      rb = NULL;                     break;
   }
 
@@ -206,7 +206,7 @@ tbs_size_t mac_rlc_data_req(
 
   switch (channel_idP) {
   case 1 ... 3: rb = ue->srb[channel_idP - 1]; break;
-  case 4 ... 8: rb = ue->drb[channel_idP - 4]; break;
+  case 4 ... 32: rb = ue->drb[channel_idP - 4]; break;
   default:
   rb = NULL;
   LOG_E(RLC, "In %s:%d:%s: data request for unknown RB with LCID 0x%02x !\n", __FILE__, __LINE__, __FUNCTION__, channel_idP);
@@ -254,7 +254,7 @@ mac_rlc_status_resp_t mac_rlc_status_ind(
 
   switch (channel_idP) {
   case 1 ... 3: rb = ue->srb[channel_idP - 1]; break;
-  case 4 ... 8: rb = ue->drb[channel_idP - 4]; break;
+  case 4 ... NGAP_MAX_DRBS_PER_UE: rb = ue->drb[channel_idP - 4]; break;
   default:      rb = NULL;                     break;
   }
 
@@ -317,7 +317,7 @@ rlc_buffer_occupancy_t mac_rlc_get_buffer_occupancy_ind(
 
   switch (channel_idP) {
   case 1 ... 3: rb = ue->srb[channel_idP - 1]; break;
-  case 4 ... 8: rb = ue->drb[channel_idP - 4]; break;
+  case 4 ... NGAP_MAX_DRBS_PER_UE: rb = ue->drb[channel_idP - 4]; break;
   default:      rb = NULL;                     break;
   }
 
@@ -379,7 +379,7 @@ rlc_op_status_t rlc_data_req     (const protocol_ctxt_t *const ctxt_pP,
     if (rb_idP >= 1 && rb_idP <= 2)
       rb = ue->srb[rb_idP - 1];
   } else {
-    if (rb_idP >= 1 && rb_idP <= 5)
+    if (rb_idP >= 1 && rb_idP <= MAX_DRBS_PER_UE)
       rb = ue->drb[rb_idP - 1];
   }
 
@@ -410,7 +410,7 @@ int nr_rlc_get_available_tx_space(
 
   switch (channel_idP) {
   case 1 ... 3: rb = ue->srb[channel_idP - 1]; break;
-  case 4 ... 8: rb = ue->drb[channel_idP - 4]; break;
+  case 4 ... NGAP_MAX_DRBS_PER_UE: rb = ue->drb[channel_idP - 4]; break;
   default:      rb = NULL;                     break;
   }
 
@@ -514,7 +514,7 @@ rb_found:
       T_INT(ue->rnti), T_INT(rb_id), T_INT(size));
 
     const ngran_node_t type = RC.nrrrc[0 /*ctxt_pP->module_id*/]->node_type;
-    AssertFatal(type != ngran_eNB_CU && type != ngran_ng_eNB_CU && type != ngran_gNB_CU,
+    AssertFatal(!NODE_IS_CU(type),
                 "Can't be CU, bad node type %d\n", type);
 
     // if (NODE_IS_DU(type) && is_srb == 0) {
@@ -587,7 +587,7 @@ static void successful_delivery(void *_ue, nr_rlc_entity_t *entity, int sdu_id)
   }
 
   /* maybe DRB? */
-  for (i = 0; i < 5; i++) {
+  for (i = 0; i < MAX_DRBS_PER_UE; i++) {
     if (entity == ue->drb[i]) {
       is_srb = 0;
       rb_id = i+1;
@@ -645,7 +645,7 @@ static void max_retx_reached(void *_ue, nr_rlc_entity_t *entity)
   }
 
   /* maybe DRB? */
-  for (i = 0; i < 5; i++) {
+  for (i = 0; i < MAX_DRBS_PER_UE; i++) {
     if (entity == ue->drb[i]) {
       is_srb = 0;
       rb_id = i+1;
@@ -791,7 +791,7 @@ static void add_drb_am(int rnti, struct NR_DRB_ToAddMod *s, NR_RLC_BearerConfig_
   int t_reassembly;
   int sn_field_length;
 
-  if (!(drb_id >= 1 && drb_id <= 5)) {
+  if (!(drb_id >= 1 && drb_id <= MAX_DRBS_PER_UE)) {
     LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n",
           __FILE__, __LINE__, __FUNCTION__, drb_id);
     exit(1);
@@ -868,7 +868,7 @@ static void add_drb_um(int rnti, struct NR_DRB_ToAddMod *s, NR_RLC_BearerConfig_
   int sn_field_length;
   int t_reassembly;
 
-  if (!(drb_id >= 1 && drb_id <= 5)) {
+  if (!(drb_id >= 1 && drb_id <= MAX_DRBS_PER_UE)) {
     LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n",
           __FILE__, __LINE__, __FUNCTION__, drb_id);
     exit(1);
@@ -1041,7 +1041,7 @@ rlc_op_status_t rrc_rlc_config_req   (
     exit(1);
   }
   if ((srb_flagP && !(rb_idP >= 1 && rb_idP <= 2)) ||
-      (!srb_flagP && !(rb_idP >= 1 && rb_idP <= 5))) {
+      (!srb_flagP && !(rb_idP >= 1 && rb_idP <= MAX_DRBS_PER_UE))) {
     LOG_E(RLC, "%s:%d:%s: bad rb_id (%ld) (is_srb %d)\n", __FILE__, __LINE__, __FUNCTION__, rb_idP, srb_flagP);
     exit(1);
   }
@@ -1066,10 +1066,10 @@ rlc_op_status_t rrc_rlc_config_req   (
     if (ue->srb[i] != NULL)
       break;
   if (i == 2) {
-    for (i = 0; i < 5; i++)
+    for (i = 0; i < MAX_DRBS_PER_UE; i++)
       if (ue->drb[i] != NULL)
         break;
-    if (i == 5)
+    if (i == MAX_DRBS_PER_UE)
       nr_rlc_manager_remove_ue(nr_rlc_ue_manager, ctxt_pP->rnti);
   }
   nr_rlc_manager_unlock(nr_rlc_ue_manager);
diff --git a/openair2/LAYER2/nr_rlc/nr_rlc_ue_manager.h b/openair2/LAYER2/nr_rlc/nr_rlc_ue_manager.h
index f578faaf380f4ae6712c04ceeee3a80de2ae476b..73c7e4c3b49052be3bf0b34abc578081b8573c31 100644
--- a/openair2/LAYER2/nr_rlc/nr_rlc_ue_manager.h
+++ b/openair2/LAYER2/nr_rlc/nr_rlc_ue_manager.h
@@ -29,7 +29,7 @@ typedef void nr_rlc_ue_manager_t;
 typedef struct nr_rlc_ue_t {
   int rnti;
   nr_rlc_entity_t *srb[3];
-  nr_rlc_entity_t *drb[5];
+  nr_rlc_entity_t *drb[MAX_DRBS_PER_UE];
 } nr_rlc_ue_t;
 
 /***********************************************************************/
diff --git a/openair2/LAYER2/rlc_v2/rlc_oai_api.c b/openair2/LAYER2/rlc_v2/rlc_oai_api.c
index 11ae770e3ecdd8944c56b04713803124ebd41136..914484c1b612fa714616e01437ae0f96f1877d86 100644
--- a/openair2/LAYER2/rlc_v2/rlc_oai_api.c
+++ b/openair2/LAYER2/rlc_v2/rlc_oai_api.c
@@ -415,7 +415,7 @@ rb_found:
       T_INT(ue->rnti), T_INT(rb_id), T_INT(size));
 
     const ngran_node_t type = RC.rrc[0 /*ctxt_pP->module_id*/]->node_type;
-    AssertFatal(type != ngran_eNB_CU && type != ngran_ng_eNB_CU && type != ngran_gNB_CU,
+    AssertFatal(!NODE_IS_CU(type),
                 "Can't be CU, bad node type %d\n", type);
 
     if (NODE_IS_DU(type)) {
diff --git a/openair2/MCE_APP/mce_config.c b/openair2/MCE_APP/mce_config.c
index 9d9d9028e5c01d6a716571c12a4fcdf298f9f912..dfbea5c6522e067f9ac7d5d7bd18563d6a3497a7 100644
--- a/openair2/MCE_APP/mce_config.c
+++ b/openair2/MCE_APP/mce_config.c
@@ -45,7 +45,7 @@
 #include "LAYER2/MAC/mac_proto.h"
 #include "PHY/phy_extern.h"
 #include "PHY/INIT/phy_init.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 #include "nfapi_vnf.h"
 #include "nfapi_pnf.h"
 
diff --git a/openair2/NETWORK_DRIVER/UE_IP/device.c b/openair2/NETWORK_DRIVER/UE_IP/device.c
index bf858a0005db261f9aace3bbf014c6f44ea744f8..84c7a0b7146a62fa1be0da0dd4b8044ac2b23525 100644
--- a/openair2/NETWORK_DRIVER/UE_IP/device.c
+++ b/openair2/NETWORK_DRIVER/UE_IP/device.c
@@ -304,7 +304,7 @@ void ue_ip_change_rx_flags(struct net_device *dev_pP, int flagsP) {
 
 //---------------------------------------------------------------------------
 
-#if  LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
+#if  LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) || (defined RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == 2055)
 void ue_ip_tx_timeout(struct net_device *dev_pP, unsigned int txqueue)
 #else
 void ue_ip_tx_timeout(struct net_device *dev_pP)
@@ -335,7 +335,7 @@ static const struct net_device_ops ue_ip_netdev_ops = {
   .ndo_set_mac_address    = ue_ip_set_mac_address,
   .ndo_set_config         = ue_ip_set_config,
   .ndo_do_ioctl           = NULL,
-#if defined RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= 1797 && RHEL_RELEASE_CODE != 2403
+#if defined RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= 1797 && RHEL_RELEASE_CODE != 2403 && RHEL_RELEASE_CODE != 2055
   .extended.ndo_change_mtu         = ue_ip_change_mtu,
 #else
   .ndo_change_mtu   = ue_ip_change_mtu,
diff --git a/openair2/NR_PHY_INTERFACE/NR_IF_Module.c b/openair2/NR_PHY_INTERFACE/NR_IF_Module.c
index f295d95184b84f8dcecfbe2c8a4897e8277f0505..a0a7cf5d11bfda82f33edd420e3eb8843daea53f 100644
--- a/openair2/NR_PHY_INTERFACE/NR_IF_Module.c
+++ b/openair2/NR_PHY_INTERFACE/NR_IF_Module.c
@@ -239,12 +239,7 @@ void handle_nr_srs(NR_UL_IND_t *UL_info) {
     handle_nr_srs_measurements(module_id,
                                frame,
                                slot,
-                               srs_ind->rnti,
-                               srs_ind->timing_advance,
-                               srs_ind->num_symbols,
-                               srs_ind->wide_band_snr,
-                               srs_ind->num_reported_symbols,
-                               srs_ind->reported_symbol_list);
+                               &srs_list[i]);
   }
 
   UL_info->srs_ind.number_of_pdus = 0;
diff --git a/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c b/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c
index cefafcba80fb6683ce6e832e5f7a062bdfd1552c..fad6d4df04e0bab132fcefecb0c0a1d907607952 100644
--- a/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c
+++ b/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c
@@ -45,7 +45,7 @@
 #include "openair2/RRC/NR_UE/rrc_vars.h"
 #include "openair2/GNB_APP/L1_nr_paramdef.h"
 #include "openair2/GNB_APP/gnb_paramdef.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/if_defs.h"
 #include <stdio.h>
 
 #define MAX_IF_MODULES 100
@@ -762,7 +762,7 @@ void check_and_process_dci(nfapi_nr_dl_tti_request_t *dl_tti_request,
     ul_info.slot_rx = slot;
     ul_info.slot_tx = (slot + slot_ahead) % slots_per_frame;
     ul_info.frame_tx = (ul_info.slot_rx + slot_ahead >= slots_per_frame) ? ul_info.frame_rx + 1 : ul_info.frame_rx;
-    ul_info.ue_sched_mode = SCHED_ALL;
+    ul_info.ue_sched_mode = SCHED_PUSCH;
     if (mac->scc || mac->scc_SIB)
     {
         if (is_nr_UL_slot(mac->scc ?
@@ -772,7 +772,7 @@ void check_and_process_dci(nfapi_nr_dl_tti_request_t *dl_tti_request,
                           mac->frame_type) && mac->ra.ra_state != RA_SUCCEEDED)
         {
             nr_ue_scheduler(NULL, &ul_info);
-            nr_ue_prach_scheduler(ul_info.module_id, ul_info.frame_tx, ul_info.slot_tx, ul_info.thread_id);
+            nr_ue_prach_scheduler(ul_info.module_id, ul_info.frame_tx, ul_info.slot_tx);
         }
     }
 }
@@ -1129,24 +1129,24 @@ int nr_ue_ul_indication(nr_uplink_indication_t *ul_info){
   module_id_t module_id = ul_info->module_id;
   NR_UE_MAC_INST_t *mac = get_mac_inst(module_id);
 
-  if (ul_info->ue_sched_mode == ONLY_PUSCH) {
-    ret = nr_ue_scheduler(NULL, ul_info);
-    return 0;
-  }
-  if (ul_info->ue_sched_mode == SCHED_ALL) {
-    ret = nr_ue_scheduler(NULL, ul_info);
-  }
-  else
-    LOG_T(NR_MAC, "In %s():%d not calling scheduler. sched mode = %d and mac->ra.ra_state = %d\n",
-        __FUNCTION__, __LINE__, ul_info->ue_sched_mode, mac->ra.ra_state);
-
   NR_TDD_UL_DL_ConfigCommon_t *tdd_UL_DL_ConfigurationCommon = mac->scc != NULL ? mac->scc->tdd_UL_DL_ConfigurationCommon : mac->scc_SIB->tdd_UL_DL_ConfigurationCommon;
 
-  if (is_nr_UL_slot(tdd_UL_DL_ConfigurationCommon, ul_info->slot_tx, mac->frame_type) && !get_softmodem_params()->phy_test)
-    nr_ue_prach_scheduler(module_id, ul_info->frame_tx, ul_info->slot_tx, ul_info->thread_id);
-
-  if (is_nr_UL_slot(tdd_UL_DL_ConfigurationCommon, ul_info->slot_tx, mac->frame_type))
-    nr_ue_pucch_scheduler(module_id, ul_info->frame_tx, ul_info->slot_tx, ul_info->thread_id);
+  switch (ul_info->ue_sched_mode) {
+    case SCHED_PUSCH:
+      ret = nr_ue_scheduler(NULL, ul_info);
+      if (is_nr_UL_slot(tdd_UL_DL_ConfigurationCommon, ul_info->slot_tx, mac->frame_type) && !get_softmodem_params()->phy_test)
+        nr_ue_prach_scheduler(module_id, ul_info->frame_tx, ul_info->slot_tx);
+      LOG_T(NR_MAC, "In %s():%d not calling scheduler. sched mode = %d and mac->ra.ra_state = %d\n",
+            __FUNCTION__, __LINE__, ul_info->ue_sched_mode, mac->ra.ra_state);
+      break;
+
+    case SCHED_PUCCH:
+      if (is_nr_UL_slot(tdd_UL_DL_ConfigurationCommon, ul_info->slot_tx, mac->frame_type))
+        nr_ue_pucch_scheduler(module_id, ul_info->frame_tx, ul_info->slot_tx, ul_info->phy_data);
+      LOG_T(NR_MAC, "In %s():%d not calling scheduler. sched mode = %d and mac->ra.ra_state = %d\n",
+            __FUNCTION__, __LINE__, ul_info->ue_sched_mode, mac->ra.ra_state);
+      break;
+  }
 
   switch(ret){
   case UE_CONNECTION_OK:
@@ -1204,7 +1204,7 @@ int nr_ue_dl_indication(nr_downlink_indication_t *dl_info, NR_UL_TIME_ALIGNMENT_
         if (ret >= 0) {
           AssertFatal( nr_ue_if_module_inst[module_id] != NULL, "IF module is NULL!\n" );
           AssertFatal( nr_ue_if_module_inst[module_id]->scheduled_response != NULL, "scheduled_response is NULL!\n" );
-          fill_scheduled_response(&scheduled_response, dl_config, NULL, NULL, dl_info->module_id, dl_info->cc_id, dl_info->frame, dl_info->slot, dl_info->thread_id, dl_info->phy_data);
+          fill_scheduled_response(&scheduled_response, dl_config, NULL, NULL, dl_info->module_id, dl_info->cc_id, dl_info->frame, dl_info->slot, dl_info->phy_data);
           nr_ue_if_module_inst[module_id]->scheduled_response(&scheduled_response);
         }
         memset(def_dci_pdu_rel15, 0, sizeof(*def_dci_pdu_rel15));
diff --git a/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.h b/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.h
index f663c17d10f310530483b65dfa506811b7cb3037..722b1be9ec580fbb4ab97589f1f4544ebd13b6af 100755
--- a/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.h
+++ b/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.h
@@ -47,9 +47,8 @@ extern slot_rnti_mcs_s slot_rnti_mcs[NUM_NFAPI_SLOT];
 typedef struct NR_UL_TIME_ALIGNMENT NR_UL_TIME_ALIGNMENT_t;
 
 typedef enum {
-  ONLY_PUSCH,
-  NOT_PUSCH,
-  SCHED_ALL,
+  SCHED_PUSCH,
+  SCHED_PUCCH,
 } NR_UE_SCHED_MODE_t;
 
 typedef struct {
@@ -79,8 +78,6 @@ typedef struct {
     frame_t frame;
     /// slot
     int slot;
-    /// index of the current UE RX/TX thread
-    int thread_id;
 
     /// NR UE FAPI-like P7 message, direction: L1 to L2
     /// data reception indication structure
@@ -111,13 +108,12 @@ typedef struct {
     frame_t frame_tx;
     /// slot tx
     uint32_t slot_tx;
-    /// index of the current UE RX/TX thread
-    int thread_id;
 
     /// dci reception indication structure
     fapi_nr_dci_indication_t *dci_ind;
 
     NR_UE_SCHED_MODE_t ue_sched_mode;
+    void *phy_data;
 
 } nr_uplink_indication_t;
 
@@ -133,8 +129,6 @@ typedef struct {
     frame_t frame;
     /// slot
     int slot;
-    /// index of the current UE RX/TX thread
-    int thread_id;
 
     /// NR UE FAPI-like P7 message, direction: L2 to L1
     /// downlink transmission configuration request structure
@@ -284,5 +278,6 @@ int handle_bcch_dlsch(module_id_t module_id, int cc_id, unsigned int gNB_index,
 
 int handle_dci(module_id_t module_id, int cc_id, unsigned int gNB_index, frame_t frame, int slot, fapi_nr_dci_indication_pdu_t *dci);
 
+
 #endif
 
diff --git a/openair2/PHY_INTERFACE/IF_Module.h b/openair2/PHY_INTERFACE/IF_Module.h
index b1123dce5f7ff735880ffcfe479eab4cd1d9c016..d446c239387d456f499eb59ed5b848079d668292 100644
--- a/openair2/PHY_INTERFACE/IF_Module.h
+++ b/openair2/PHY_INTERFACE/IF_Module.h
@@ -1,5 +1,4 @@
-/*
- * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ /* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The OpenAirInterface Software Alliance licenses this file to You under
@@ -39,6 +38,8 @@
 #include "nfapi_interface.h"
 #include "platform_types.h"
 #include <common/utils/threadPool/thread-pool.h>
+#include <sdr/COMMON/common_lib.h>
+
 
 #define MAX_NUM_DL_PDU 100
 #define MAX_NUM_UL_PDU 100
@@ -135,7 +136,6 @@ typedef struct {
     int CC_id;
     nfapi_config_request_t *cfg;
 } PHY_Config_t;
-#include <targets/ARCH/COMMON/common_lib.h>
 
 typedef struct IF_Module_s{
 //define the function pointer
diff --git a/openair2/PHY_INTERFACE/phy_stub_UE.c b/openair2/PHY_INTERFACE/phy_stub_UE.c
index cd57ea9ff101ea225e6169408cc5e5fad5d3d255..28aeac880a59baae846577c5499a8d91c437dc46 100644
--- a/openair2/PHY_INTERFACE/phy_stub_UE.c
+++ b/openair2/PHY_INTERFACE/phy_stub_UE.c
@@ -27,7 +27,7 @@
 #include "openair2/PHY_INTERFACE/phy_stub_UE.h"
 #include "openair2/ENB_APP/L1_paramdef.h"
 #include "openair2/ENB_APP/enb_paramdef.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/if_defs.h"
 #include "common/config/config_load_configmodule.h"
 #include "common/config/config_userapi.h"
 #include <arpa/inet.h>
diff --git a/openair2/RRC/LTE/rrc_eNB.c b/openair2/RRC/LTE/rrc_eNB.c
index 8be30175e1173b82675f02c44b842ee7da70b3ea..fb72899458d1fc72bd018ece3251b215489ad5e5 100644
--- a/openair2/RRC/LTE/rrc_eNB.c
+++ b/openair2/RRC/LTE/rrc_eNB.c
@@ -979,7 +979,7 @@ rrc_eNB_free_UE(
 void put_UE_in_freelist(module_id_t mod_id, rnti_t rnti, bool removeFlag) {
   eNB_MAC_INST                             *eNB_MAC = RC.mac[mod_id];
   pthread_mutex_lock(&lock_ue_freelist);
-  LOG_I(PHY,"add ue %d in fre list, context flag: %d\n", rnti, removeFlag);
+  LOG_I(PHY,"add ue %d in free list, context flag: %d\n", rnti, removeFlag);
   int i;
   for (i=0; i < sizeofArray(eNB_MAC->UE_free_ctrl); i++) 
     if (eNB_MAC->UE_free_ctrl[i].rnti == 0)
diff --git a/openair2/RRC/LTE/rrc_eNB_S1AP.c b/openair2/RRC/LTE/rrc_eNB_S1AP.c
index aefbc0183157fb5189a834f830f8e42b8f34d1ac..7154b34962e00a387e771c2964eccb032b6d9895 100644
--- a/openair2/RRC/LTE/rrc_eNB_S1AP.c
+++ b/openair2/RRC/LTE/rrc_eNB_S1AP.c
@@ -1004,9 +1004,7 @@ int rrc_eNB_process_S1AP_INITIAL_CONTEXT_SETUP_REQ(MessageDef *msg_p, const char
     }
 
     
-    if ((RC.rrc[ctxt.module_id]->node_type == ngran_eNB_CU) ||
-	(RC.rrc[ctxt.module_id]->node_type == ngran_ng_eNB_CU) ||
-	(RC.rrc[ctxt.module_id]->node_type == ngran_gNB_CU) ){
+    if (NODE_IS_CU(RC.rrc[ctxt.module_id]->node_type)) {
       struct eNB_RRC_INST_s *rrc= RC.rrc[0];
       MessageDef *message_p = itti_alloc_new_message (TASK_RRC_ENB, 0, F1AP_UE_CONTEXT_SETUP_REQ);
       f1ap_ue_context_setup_t *req=&F1AP_UE_CONTEXT_SETUP_REQ (message_p);
diff --git a/openair2/RRC/NR/MESSAGES/asn1_msg.c b/openair2/RRC/NR/MESSAGES/asn1_msg.c
index 47b6e384028d59177214b52f6666cfc438d003a4..85a50710ec5effaa3d96ee977775833df668b9eb 100755
--- a/openair2/RRC/NR/MESSAGES/asn1_msg.c
+++ b/openair2/RRC/NR/MESSAGES/asn1_msg.c
@@ -983,7 +983,6 @@ uint8_t do_RRCReject(uint8_t Mod_id,
     return((enc_rval.encoded+7)/8);
 }
 
-
 void fill_initial_SpCellConfig(int uid,
                                NR_SpCellConfig_t *SpCellConfig,
                                const NR_ServingCellConfigCommon_t *scc,
@@ -1003,9 +1002,10 @@ void fill_initial_SpCellConfig(int uid,
 
   SpCellConfig->spCellConfigDedicated = calloc(1,sizeof(*SpCellConfig->spCellConfigDedicated));
   SpCellConfig->spCellConfigDedicated->uplinkConfig = calloc(1,sizeof(*SpCellConfig->spCellConfigDedicated->uplinkConfig));
+  NR_UplinkConfig_t *uplinkConfig = SpCellConfig->spCellConfigDedicated->uplinkConfig;
 
   NR_BWP_UplinkDedicated_t *initialUplinkBWP = calloc(1,sizeof(*initialUplinkBWP));
-  SpCellConfig->spCellConfigDedicated->uplinkConfig->initialUplinkBWP = initialUplinkBWP;
+  uplinkConfig->initialUplinkBWP = initialUplinkBWP;
   initialUplinkBWP->pucch_Config = calloc(1,sizeof(*initialUplinkBWP->pucch_Config));
   initialUplinkBWP->pucch_Config->present = NR_SetupRelease_PUCCH_Config_PR_setup;
   NR_PUCCH_Config_t *pucch_Config = calloc(1,sizeof(*pucch_Config));
@@ -1061,7 +1061,8 @@ void fill_initial_SpCellConfig(int uid,
   ASN_SEQUENCE_ADD(&pusch_Config->pusch_PowerControl->pathlossReferenceRSToAddModList->list,plrefRS);
   pusch_Config->pusch_PowerControl->pathlossReferenceRSToReleaseList = NULL;
   pusch_Config->pusch_PowerControl->twoPUSCH_PC_AdjustmentStates = NULL;
-  pusch_Config->pusch_PowerControl->deltaMCS = NULL;
+  pusch_Config->pusch_PowerControl->deltaMCS = calloc(1, sizeof(*pusch_Config->pusch_PowerControl->deltaMCS));
+  *pusch_Config->pusch_PowerControl->deltaMCS = NR_PUSCH_PowerControl__deltaMCS_enabled;
   pusch_Config->pusch_PowerControl->sri_PUSCH_MappingToAddModList = calloc(1,sizeof(*pusch_Config->pusch_PowerControl->sri_PUSCH_MappingToAddModList));
   NR_SRI_PUSCH_PowerControl_t *sriPUSCHPC=calloc(1,sizeof(*sriPUSCHPC));
   sriPUSCHPC->sri_PUSCH_PowerControlId=0;
@@ -1090,9 +1091,15 @@ void fill_initial_SpCellConfig(int uid,
   pusch_Config->uci_OnPUSCH=NULL;
   pusch_Config->tp_pi2BPSK=NULL;
 
+  long maxMIMO_Layers = uplinkConfig &&
+                                uplinkConfig->pusch_ServingCellConfig &&
+                                uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1 &&
+                                uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers ?
+                            *uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers : 1;
+
   // We are using do_srs = 0 here because the periodic SRS will only be enabled in update_cellGroupConfig() if do_srs == 1
   initialUplinkBWP->srs_Config = calloc(1,sizeof(*initialUplinkBWP->srs_Config));
-  config_srs(initialUplinkBWP->srs_Config, NULL, curr_bwp, uid, 0, 0);
+  config_srs(initialUplinkBWP->srs_Config, NULL, curr_bwp, uid, 0, maxMIMO_Layers, 0);
 
   scheduling_request_config(scc, pucch_Config);
 
@@ -1216,7 +1223,7 @@ void fill_initial_SpCellConfig(int uid,
     n_ul_bwp = servingcellconfigdedicated->uplinkConfig->uplinkBWP_ToAddModList->list.count;
   }
   if(n_ul_bwp>0) {
-    SpCellConfig->spCellConfigDedicated->uplinkConfig->uplinkBWP_ToAddModList = calloc(1,sizeof(*SpCellConfig->spCellConfigDedicated->uplinkConfig->uplinkBWP_ToAddModList));
+    uplinkConfig->uplinkBWP_ToAddModList = calloc(1,sizeof(*uplinkConfig->uplinkBWP_ToAddModList));
     for (int bwp_loop = 0; bwp_loop < n_ul_bwp; bwp_loop++) {
       NR_BWP_Uplink_t *ubwp = calloc(1, sizeof(*ubwp));
       config_uplinkBWP(ubwp, bwp_loop, true, uid,
@@ -1224,13 +1231,12 @@ void fill_initial_SpCellConfig(int uid,
                        servingcellconfigdedicated,
                        scc,
                        NULL);
-      ASN_SEQUENCE_ADD(&SpCellConfig->spCellConfigDedicated->uplinkConfig->uplinkBWP_ToAddModList->list, ubwp);
+      ASN_SEQUENCE_ADD(&uplinkConfig->uplinkBWP_ToAddModList->list, ubwp);
     }
-    SpCellConfig->spCellConfigDedicated->uplinkConfig->firstActiveUplinkBWP_Id = calloc(1,sizeof(*SpCellConfig->spCellConfigDedicated->uplinkConfig->firstActiveUplinkBWP_Id));
-    *SpCellConfig->spCellConfigDedicated->uplinkConfig->firstActiveUplinkBWP_Id = servingcellconfigdedicated->uplinkConfig->firstActiveUplinkBWP_Id ? *servingcellconfigdedicated->uplinkConfig->firstActiveUplinkBWP_Id : 1;
+    uplinkConfig->firstActiveUplinkBWP_Id = calloc(1,sizeof(*uplinkConfig->firstActiveUplinkBWP_Id));
+    *uplinkConfig->firstActiveUplinkBWP_Id = servingcellconfigdedicated->uplinkConfig->firstActiveUplinkBWP_Id ? *servingcellconfigdedicated->uplinkConfig->firstActiveUplinkBWP_Id : 1;
   }
 
-
   SpCellConfig->spCellConfigDedicated->csi_MeasConfig=calloc(1,sizeof(*SpCellConfig->spCellConfigDedicated->csi_MeasConfig));
   SpCellConfig->spCellConfigDedicated->csi_MeasConfig->present = NR_SetupRelease_CSI_MeasConfig_PR_setup;
 
@@ -1585,6 +1591,12 @@ void update_cellGroupConfig(NR_CellGroupConfig_t *cellGroupConfig,
 
   if(scc) {
     int curr_bwp = NRRIV2BW(scc->downlinkConfigCommon->initialDownlinkBWP->genericParameters.locationAndBandwidth,MAX_BWP_SIZE);
+    NR_UplinkConfig_t *uplinkConfig = SpCellConfig && SpCellConfig->spCellConfigDedicated ? SpCellConfig->spCellConfigDedicated->uplinkConfig : NULL;
+    long maxMIMO_Layers = uplinkConfig &&
+                                  uplinkConfig->pusch_ServingCellConfig &&
+                                  uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1 &&
+                                  uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers ?
+                              *uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers : 1;
     // SRS configuration
     if (configuration->do_SRS &&
         SpCellConfig &&
@@ -1600,6 +1612,7 @@ void update_cellGroupConfig(NR_CellGroupConfig_t *cellGroupConfig,
                  curr_bwp,
                  uid,
                  0,
+                 maxMIMO_Layers,
                  configuration->do_SRS);
     }
 
@@ -1625,6 +1638,7 @@ void update_cellGroupConfig(NR_CellGroupConfig_t *cellGroupConfig,
                    bwp_size,
                    uid,
                    i+1,
+                   maxMIMO_Layers,
                    configuration->do_SRS);
       }
     }
diff --git a/openair2/RRC/NR/nr_rrc_config.c b/openair2/RRC/NR/nr_rrc_config.c
index deb2c4965e282853c2163f2671b25cdf6401f721..b98c25fc5435ace9b1a15ccfc5c09a31237adf7b 100644
--- a/openair2/RRC/NR/nr_rrc_config.c
+++ b/openair2/RRC/NR/nr_rrc_config.c
@@ -356,8 +356,9 @@ void config_srs(NR_SetupRelease_SRS_Config_t *setup_release_srs_Config,
                 const int curr_bwp,
                 const int uid,
                 const int res_id,
-                const int do_srs) {
-
+                const long maxMIMO_Layers,
+                const int do_srs)
+{
   setup_release_srs_Config->present = NR_SetupRelease_SRS_Config_PR_setup;
 
   NR_SRS_Config_t *srs_Config;
@@ -419,26 +420,45 @@ void config_srs(NR_SetupRelease_SRS_Config_t *setup_release_srs_Config,
   NR_SRS_Resource_t *srs_res0=calloc(1,sizeof(*srs_res0));
   srs_res0->srs_ResourceId = res_id;
   srs_res0->nrofSRS_Ports = NR_SRS_Resource__nrofSRS_Ports_port1;
-  //  if (uecap &&
-  //      uecap->featureSets &&
-  //      uecap->featureSets->featureSetsUplink &&
-  //      uecap->featureSets->featureSetsUplink->list.count > 0) {
-  //    NR_FeatureSetUplink_t *ul_feature_setup = uecap->featureSets->featureSetsUplink->list.array[0];
-  //    switch (ul_feature_setup->supportedSRS_Resources->maxNumberSRS_Ports_PerResource) {
-  //      case NR_SRS_Resources__maxNumberSRS_Ports_PerResource_n1:
-  //        srs_res0->nrofSRS_Ports = NR_SRS_Resource__nrofSRS_Ports_port1;
-  //        break;
-  //      case NR_SRS_Resources__maxNumberSRS_Ports_PerResource_n2:
-  //        srs_res0->nrofSRS_Ports = NR_SRS_Resource__nrofSRS_Ports_ports2;
-  //        break;
-  //      case NR_SRS_Resources__maxNumberSRS_Ports_PerResource_n4:
-  //        srs_res0->nrofSRS_Ports = NR_SRS_Resource__nrofSRS_Ports_ports4;
-  //        break;
-  //      default:
-  //        LOG_E(NR_RRC, "Max Number of SRS Ports Per Resource %ld is invalid!\n",
-  //              ul_feature_setup->supportedSRS_Resources->maxNumberSRS_Ports_PerResource);
-  //    }
-  //  }
+  if (do_srs) {
+    long nrofSRS_Ports = 1;
+    if (uecap &&
+        uecap->featureSets &&
+        uecap->featureSets->featureSetsUplink &&
+        uecap->featureSets->featureSetsUplink->list.count > 0) {
+      NR_FeatureSetUplink_t *ul_feature_setup = uecap->featureSets->featureSetsUplink->list.array[0];
+      switch (ul_feature_setup->supportedSRS_Resources->maxNumberSRS_Ports_PerResource) {
+        case NR_SRS_Resources__maxNumberSRS_Ports_PerResource_n1:
+          nrofSRS_Ports = 1;
+          break;
+        case NR_SRS_Resources__maxNumberSRS_Ports_PerResource_n2:
+          nrofSRS_Ports = 2;
+          break;
+        case NR_SRS_Resources__maxNumberSRS_Ports_PerResource_n4:
+          nrofSRS_Ports = 4;
+          break;
+        default:
+          LOG_E(NR_RRC, "Max Number of SRS Ports Per Resource %ld is invalid!\n",
+                ul_feature_setup->supportedSRS_Resources->maxNumberSRS_Ports_PerResource);
+      }
+      nrofSRS_Ports = min(nrofSRS_Ports, maxMIMO_Layers);
+      switch (nrofSRS_Ports) {
+        case 1:
+          srs_res0->nrofSRS_Ports = NR_SRS_Resource__nrofSRS_Ports_port1;
+          break;
+        case 2:
+          srs_res0->nrofSRS_Ports = NR_SRS_Resource__nrofSRS_Ports_ports2;
+          break;
+        case 4:
+          srs_res0->nrofSRS_Ports = NR_SRS_Resource__nrofSRS_Ports_ports4;
+          break;
+        default:
+          LOG_E(NR_RRC, "Number of SRS Ports Per Resource %ld is invalid!\n",
+                ul_feature_setup->supportedSRS_Resources->maxNumberSRS_Ports_PerResource);
+      }
+    }
+    LOG_I(NR_RRC, "SRS configured with %d ports\n", 1<<srs_res0->nrofSRS_Ports);
+  }
   srs_res0->ptrs_PortIndex = NULL;
   srs_res0->transmissionComb.present = NR_SRS_Resource__transmissionComb_PR_n2;
   srs_res0->transmissionComb.choice.n2 = calloc(1,sizeof(*srs_res0->transmissionComb.choice.n2));
@@ -584,21 +604,24 @@ void nr_rrc_config_ul_tda(NR_ServingCellConfigCommon_t *scc, int min_fb_delay){
         pusch_timedomainresourceallocation->mappingType = NR_PUSCH_TimeDomainResourceAllocation__mappingType_typeB;
         pusch_timedomainresourceallocation->startSymbolAndLength = get_SLIV(14-ul_symb,ul_symb-1); // starting in fist ul symbol til the last but one
         ASN_SEQUENCE_ADD(&scc->uplinkConfigCommon->initialUplinkBWP->pusch_ConfigCommon->choice.setup->pusch_TimeDomainAllocationList->list,pusch_timedomainresourceallocation);
-
-        // UL TDA index 2 for msg3 in the mixed slot (TDD)
-        int nb_periods_per_frame = get_nb_periods_per_frame(scc->tdd_UL_DL_ConfigurationCommon->pattern1.dl_UL_TransmissionPeriodicity);
-        int nb_slots_per_period = ((1<<mu) * 10)/nb_periods_per_frame;
-        struct NR_PUSCH_TimeDomainResourceAllocation *pusch_timedomainresourceallocation_msg3 = CALLOC(1,sizeof(struct NR_PUSCH_TimeDomainResourceAllocation));
-        pusch_timedomainresourceallocation_msg3->k2  = CALLOC(1,sizeof(long));
-        *pusch_timedomainresourceallocation_msg3->k2 = nb_slots_per_period - DELTA[mu];
-        if(*pusch_timedomainresourceallocation_msg3->k2 < min_fb_delay)
-          *pusch_timedomainresourceallocation_msg3->k2 += nb_slots_per_period;
-        AssertFatal(*pusch_timedomainresourceallocation_msg3->k2<33,"Computed k2 for msg3 %ld is larger than the range allowed by RRC (0..32)\n",
-                    *pusch_timedomainresourceallocation_msg3->k2);
-        pusch_timedomainresourceallocation_msg3->mappingType = NR_PUSCH_TimeDomainResourceAllocation__mappingType_typeB;
-        pusch_timedomainresourceallocation_msg3->startSymbolAndLength = get_SLIV(14-ul_symb,ul_symb-1); // starting in fist ul symbol til the last but one
-        ASN_SEQUENCE_ADD(&scc->uplinkConfigCommon->initialUplinkBWP->pusch_ConfigCommon->choice.setup->pusch_TimeDomainAllocationList->list,pusch_timedomainresourceallocation_msg3);
       }
+      // UL TDA index 2 for msg3 in the mixed slot (TDD)
+      int nb_periods_per_frame = get_nb_periods_per_frame(scc->tdd_UL_DL_ConfigurationCommon->pattern1.dl_UL_TransmissionPeriodicity);
+      int nb_slots_per_period = ((1<<mu) * 10)/nb_periods_per_frame;
+      struct NR_PUSCH_TimeDomainResourceAllocation *pusch_timedomainresourceallocation_msg3 = CALLOC(1,sizeof(struct NR_PUSCH_TimeDomainResourceAllocation));
+      pusch_timedomainresourceallocation_msg3->k2  = CALLOC(1,sizeof(long));
+      int no_mix_slot = ul_symb < 3 ? 1 : 0; // we need at least 2 symbols for scheduling Msg3
+      *pusch_timedomainresourceallocation_msg3->k2 = nb_slots_per_period - DELTA[mu] + no_mix_slot;
+      if(*pusch_timedomainresourceallocation_msg3->k2 < min_fb_delay)
+        *pusch_timedomainresourceallocation_msg3->k2 += nb_slots_per_period;
+      AssertFatal(*pusch_timedomainresourceallocation_msg3->k2<33,"Computed k2 for msg3 %ld is larger than the range allowed by RRC (0..32)\n",
+                  *pusch_timedomainresourceallocation_msg3->k2);
+      pusch_timedomainresourceallocation_msg3->mappingType = NR_PUSCH_TimeDomainResourceAllocation__mappingType_typeB;
+      if(no_mix_slot)
+        pusch_timedomainresourceallocation_msg3->startSymbolAndLength = get_SLIV(0,13); // full allocation if there is no mixed slot
+      else
+        pusch_timedomainresourceallocation_msg3->startSymbolAndLength = get_SLIV(14-ul_symb,ul_symb-1); // starting in fist ul symbol til the last but one
+      ASN_SEQUENCE_ADD(&scc->uplinkConfigCommon->initialUplinkBWP->pusch_ConfigCommon->choice.setup->pusch_TimeDomainAllocationList->list,pusch_timedomainresourceallocation_msg3);
     }
   }
 }
@@ -1112,7 +1135,8 @@ void config_uplinkBWP(NR_BWP_Uplink_t *ubwp,
   pusch_Config->pusch_PowerControl->pathlossReferenceRSToAddModList = NULL;
   pusch_Config->pusch_PowerControl->pathlossReferenceRSToReleaseList = NULL;
   pusch_Config->pusch_PowerControl->twoPUSCH_PC_AdjustmentStates = NULL;
-  pusch_Config->pusch_PowerControl->deltaMCS = NULL;
+  pusch_Config->pusch_PowerControl->deltaMCS = calloc(1, sizeof(*pusch_Config->pusch_PowerControl->deltaMCS));
+  *pusch_Config->pusch_PowerControl->deltaMCS = NR_PUSCH_PowerControl__deltaMCS_enabled;
   pusch_Config->pusch_PowerControl->sri_PUSCH_MappingToAddModList = NULL;
   pusch_Config->pusch_PowerControl->sri_PUSCH_MappingToReleaseList = NULL;
   pusch_Config->frequencyHopping=NULL;
@@ -1135,12 +1159,20 @@ void config_uplinkBWP(NR_BWP_Uplink_t *ubwp,
   pusch_Config->uci_OnPUSCH=NULL;
   pusch_Config->tp_pi2BPSK=NULL;
 
+  long maxMIMO_Layers = servingcellconfigdedicated &&
+                                servingcellconfigdedicated->uplinkConfig
+                                && servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig
+                                && servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1
+                                && servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers ?
+                            *servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers : 1;
+
   ubwp->bwp_Dedicated->srs_Config = calloc(1,sizeof(*ubwp->bwp_Dedicated->srs_Config));
   config_srs(ubwp->bwp_Dedicated->srs_Config,
              NULL,
              curr_bwp,
              uid,
              bwp_loop+1,
+             maxMIMO_Layers,
              configuration->do_SRS);
 
   ubwp->bwp_Dedicated->configuredGrantConfig = NULL;
diff --git a/openair2/RRC/NR/nr_rrc_config.h b/openair2/RRC/NR/nr_rrc_config.h
index f8bdc4be5342524f3a39918b7544ba697d27750e..27d6c5f080dfcd09350d6d0c428fef7c82ad6d3c 100644
--- a/openair2/RRC/NR/nr_rrc_config.h
+++ b/openair2/RRC/NR/nr_rrc_config.h
@@ -150,6 +150,7 @@ void config_srs(NR_SetupRelease_SRS_Config_t *setup_release_srs_Config,
                 const int curr_bwp,
                 const int uid,
                 const int res_id,
+                const long maxMIMO_Layers,
                 const int do_srs);
 void set_dl_mcs_table(int scs,
                       NR_UE_NR_Capability_t *cap,
diff --git a/openair2/RRC/NR/nr_rrc_defs.h b/openair2/RRC/NR/nr_rrc_defs.h
index e9d9766f41b5a7621acc97b1a8a6ffef372d05ce..f99e89a4705ed9b218437714676572bf6b78063d 100644
--- a/openair2/RRC/NR/nr_rrc_defs.h
+++ b/openair2/RRC/NR/nr_rrc_defs.h
@@ -91,6 +91,12 @@
 #define NR_RRC_RECONFIGURATION_DELAY_MS 10
 #define NR_RRC_BWP_SWITCHING_DELAY_MS   6
 
+// 3GPP TS 38.133 - Section 8 - Table 8.2.1.2.7-2: Parameters which cause interruption other than SCS
+// This table was recently added to 3GPP. It shows that changing the parameters locationAndBandwidth, nrofSRS-Ports or
+// maxMIMO-Layers-r16 causes an interruption. This parameter is not yet being used in code, but has been placed here
+// for future reference.
+#define NR_OF_SRS_PORTS_SWITCHING_DELAY_MS 30
+
 #define NR_UE_MODULE_INVALID ((module_id_t) ~0) // FIXME attention! depends on type uint8_t!!!
 #define NR_UE_INDEX_INVALID  ((module_id_t) ~0) // FIXME attention! depends on type uint8_t!!! used to be -1
 
@@ -277,7 +283,7 @@ typedef struct gNB_RRC_UE_s {
   NR_DRB_ToAddModList_t             *DRB_configList;
   NR_DRB_ToAddModList_t             *DRB_configList2[NR_RRC_TRANSACTION_IDENTIFIER_NUMBER];
   NR_DRB_ToReleaseList_t            *DRB_Release_configList2[NR_RRC_TRANSACTION_IDENTIFIER_NUMBER];
-  uint8_t                            DRB_active[8];
+  uint8_t                            DRB_active[NGAP_MAX_DRBS_PER_UE];
 
   NR_SRB_INFO                       SI;
   NR_SRB_INFO                       Srb0;
@@ -353,7 +359,7 @@ typedef struct gNB_RRC_UE_s {
   /* list of e_rab to be setup by RRC layers */
   /* list of pdu session to be setup by RRC layers */
   nr_e_rab_param_t                   e_rab[NB_RB_MAX];//[S1AP_MAX_E_RAB];
-  pdu_session_param_t                pduSession[NR_NB_RB_MAX];//[NGAP_MAX_PDU_SESSION];
+  pdu_session_param_t                pduSession[NGAP_MAX_PDU_SESSION];
   //release e_rabs
   uint8_t                            nb_release_of_e_rabs;
   e_rab_failed_t                     e_rabs_release_failed[S1AP_MAX_E_RAB];
diff --git a/openair2/RRC/NR/rrc_gNB.c b/openair2/RRC/NR/rrc_gNB.c
index d9e2de5272336d541028bbe1e9f4969ebcba6cc5..b5941a5b0c004c7a9320232358838b2ff3a62a07 100755
--- a/openair2/RRC/NR/rrc_gNB.c
+++ b/openair2/RRC/NR/rrc_gNB.c
@@ -40,6 +40,7 @@
 #include "assertions.h"
 #include "common/ran_context.h"
 #include "asn1_conversions.h"
+#include "rrc_gNB_radio_bearers.h"
 
 #include "RRC/L2_INTERFACE/openair_rrc_L2_interface.h"
 #include "LAYER2/RLC/rlc.h"
@@ -524,7 +525,7 @@ rrc_gNB_process_RRCSetupComplete(
   ue_context_pP->ue_context.Srb1.Srb_info.Srb_id = 1;
   ue_context_pP->ue_context.StatusRrc = NR_RRC_CONNECTED;
 
-  if (AMF_MODE_ENABLED) {
+  if (get_softmodem_params()->sa) {
     rrc_gNB_send_NGAP_NAS_FIRST_REQ(ctxt_pP, ue_context_pP, rrcSetupComplete);
   } else {
     rrc_gNB_generate_SecurityModeCommand(ctxt_pP, ue_context_pP);
@@ -765,12 +766,9 @@ rrc_gNB_generate_dedicatedRRCReconfiguration(
 //-----------------------------------------------------------------------------
 {
   gNB_RRC_INST                  *rrc = RC.nrrrc[ctxt_pP->module_id];
-  NR_DRB_ToAddMod_t             *DRB_config           = NULL;
-  NR_SRB_ToAddMod_t             *SRB2_config          = NULL;
-  NR_SDAP_Config_t              *sdap_config          = NULL;
+  gNB_RRC_UE_t                  *ue_p = &ue_context_pP->ue_context;
   NR_DRB_ToAddModList_t        **DRB_configList  = NULL;
   NR_DRB_ToAddModList_t        **DRB_configList2 = NULL;
-  NR_SRB_ToAddModList_t        **SRB_configList2 = NULL;
   NR_SRB_ToAddModList_t        *SRB_configList  = ue_context_pP->ue_context.SRB_configList;
   struct NR_RRCReconfiguration_v1530_IEs__dedicatedNAS_MessageList
                                 *dedicatedNAS_MessageList = NULL;
@@ -781,22 +779,13 @@ rrc_gNB_generate_dedicatedRRCReconfiguration(
   int                            pdu_sessions_done = 0;
   int i;
   uint8_t drb_id_to_setup_start = 1;
-  uint8_t nb_drb_to_setup = 0;
-  long drb_priority[1] = {13}; // For now, we assume only one drb per pdu sessions with a default preiority (will be dynamique in future)
+  uint8_t nb_drb_to_setup = rrc->configuration.drbs;
+  long drb_priority[NGAP_MAX_DRBS_PER_UE];
   NR_CellGroupConfig_t *cellGroupConfig = NULL;
 
   uint8_t xid = rrc_gNB_get_next_transaction_identifier(ctxt_pP->module_id);
 
-  /* Configure SRB2 */
-  SRB_configList2 = &ue_context_pP->ue_context.SRB_configList2[xid];
-  if (*SRB_configList2 == NULL) {
-    *SRB_configList2 = CALLOC(1, sizeof(**SRB_configList2));
-    memset(*SRB_configList2, 0, sizeof(**SRB_configList2));
-    SRB2_config = CALLOC(1, sizeof(*SRB2_config));
-    SRB2_config->srb_Identity = 2;
-    ASN_SEQUENCE_ADD(&(*SRB_configList2)->list, SRB2_config);
-    ASN_SEQUENCE_ADD(&SRB_configList->list, SRB2_config);
-  }
+  NR_SRB_ToAddModList_t **SRB_configList2 = generateSRB2_confList(ue_p, SRB_configList, xid);
 
   DRB_configList = &ue_context_pP->ue_context.DRB_configList;
   if (*DRB_configList) {
@@ -823,98 +812,97 @@ rrc_gNB_generate_dedicatedRRCReconfiguration(
       continue;
     }
 
-    DRB_config = CALLOC(1, sizeof(*DRB_config));
-    DRB_config->drb_Identity = i+1;
-    if (drb_id_to_setup_start == 1) drb_id_to_setup_start = DRB_config->drb_Identity;
-    nb_drb_to_setup++;
-    DRB_config->cnAssociation = CALLOC(1, sizeof(*DRB_config->cnAssociation));
-    DRB_config->cnAssociation->present = NR_DRB_ToAddMod__cnAssociation_PR_sdap_Config;
-    // sdap_Config
-    sdap_config = CALLOC(1, sizeof(NR_SDAP_Config_t));
-    memset(sdap_config, 0, sizeof(NR_SDAP_Config_t));
-    sdap_config->pdu_Session = ue_context_pP->ue_context.pduSession[i].param.pdusession_id;
-    if (rrc->configuration.enable_sdap) {
-      sdap_config->sdap_HeaderDL = NR_SDAP_Config__sdap_HeaderDL_present;
-      sdap_config->sdap_HeaderUL = NR_SDAP_Config__sdap_HeaderUL_present;
-    } else {
-      sdap_config->sdap_HeaderDL = NR_SDAP_Config__sdap_HeaderDL_absent;
-      sdap_config->sdap_HeaderUL = NR_SDAP_Config__sdap_HeaderUL_absent;
-    }
-    sdap_config->defaultDRB = true;
-    sdap_config->mappedQoS_FlowsToAdd = calloc(1, sizeof(struct NR_SDAP_Config__mappedQoS_FlowsToAdd));
-    memset(sdap_config->mappedQoS_FlowsToAdd, 0, sizeof(struct NR_SDAP_Config__mappedQoS_FlowsToAdd));
-
-    for (qos_flow_index = 0; qos_flow_index < ue_context_pP->ue_context.pduSession[i].param.nb_qos; qos_flow_index++) {
-      NR_QFI_t *qfi = calloc(1, sizeof(NR_QFI_t));
-      *qfi = ue_context_pP->ue_context.pduSession[i].param.qos[qos_flow_index].qfi;
-      ASN_SEQUENCE_ADD(&sdap_config->mappedQoS_FlowsToAdd->list, qfi);
-    }
-    sdap_config->mappedQoS_FlowsToRelease = NULL;
-    DRB_config->cnAssociation->choice.sdap_Config = sdap_config;
-
-    // pdcp_Config
-    DRB_config->reestablishPDCP = NULL;
-    DRB_config->recoverPDCP = NULL;
-    DRB_config->pdcp_Config = calloc(1, sizeof(*DRB_config->pdcp_Config));
-    DRB_config->pdcp_Config->drb = calloc(1,sizeof(*DRB_config->pdcp_Config->drb));
-    DRB_config->pdcp_Config->drb->discardTimer = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->discardTimer));
-    *DRB_config->pdcp_Config->drb->discardTimer = NR_PDCP_Config__drb__discardTimer_infinity;
-    DRB_config->pdcp_Config->drb->pdcp_SN_SizeUL = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->pdcp_SN_SizeUL));
-    *DRB_config->pdcp_Config->drb->pdcp_SN_SizeUL = NR_PDCP_Config__drb__pdcp_SN_SizeUL_len18bits;
-    DRB_config->pdcp_Config->drb->pdcp_SN_SizeDL = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->pdcp_SN_SizeDL));
-    *DRB_config->pdcp_Config->drb->pdcp_SN_SizeDL = NR_PDCP_Config__drb__pdcp_SN_SizeDL_len18bits;
-    DRB_config->pdcp_Config->drb->headerCompression.present = NR_PDCP_Config__drb__headerCompression_PR_notUsed;
-    DRB_config->pdcp_Config->drb->headerCompression.choice.notUsed = 0;
-
-    DRB_config->pdcp_Config->drb->integrityProtection = NULL;
-    DRB_config->pdcp_Config->drb->statusReportRequired = NULL;
-    DRB_config->pdcp_Config->drb->outOfOrderDelivery = calloc(1,sizeof(*DRB_config->pdcp_Config->drb->outOfOrderDelivery));
-    *DRB_config->pdcp_Config->drb->outOfOrderDelivery = NR_PDCP_Config__drb__outOfOrderDelivery_true;
-    DRB_config->pdcp_Config->moreThanOneRLC = NULL;
-
-    DRB_config->pdcp_Config->t_Reordering = calloc(1, sizeof(*DRB_config->pdcp_Config->t_Reordering));
-    *DRB_config->pdcp_Config->t_Reordering = NR_PDCP_Config__t_Reordering_ms20;
-    DRB_config->pdcp_Config->ext1 = NULL;
-
-    if (rrc->security.do_drb_integrity) {
-      DRB_config->pdcp_Config->drb->integrityProtection = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->integrityProtection));
-      *DRB_config->pdcp_Config->drb->integrityProtection = NR_PDCP_Config__drb__integrityProtection_enabled;
-    }
+    for(long drb_id_add = 1; drb_id_add <= nb_drb_to_setup; drb_id_add++){
+      uint8_t drb_id;
 
-    if (!rrc->security.do_drb_ciphering) {
-      DRB_config->pdcp_Config->ext1 = calloc(1, sizeof(*DRB_config->pdcp_Config->ext1));
-      DRB_config->pdcp_Config->ext1->cipheringDisabled = calloc(1, sizeof(*DRB_config->pdcp_Config->ext1->cipheringDisabled));
-      *DRB_config->pdcp_Config->ext1->cipheringDisabled = NR_PDCP_Config__ext1__cipheringDisabled_true;
-    }
+      // Reference TS23501 Table 5.7.4-1: Standardized 5QI to QoS characteristics mapping
+      for (qos_flow_index = 0; qos_flow_index < ue_context_pP->ue_context.pduSession[i].param.nb_qos; qos_flow_index++) {
+        switch (ue_context_pP->ue_context.pduSession[i].param.qos[qos_flow_index].fiveQI) {
+          case 1 ... 4:  /* GBR */
+            drb_id = next_available_drb(ue_p, ue_context_pP->ue_context.pduSession[i].param.pdusession_id, GBR_FLOW);
+            break;
+          case 5 ... 9:  /* Non-GBR */
+            if(rrc->configuration.drbs > 1) /* Force the creation from gNB Conf file - Should be used only in noS1 mode and rfsim for testing purposes. */
+              drb_id = next_available_drb(ue_p, ue_context_pP->ue_context.pduSession[i].param.pdusession_id, GBR_FLOW);
+            else
+              drb_id = next_available_drb(ue_p, ue_context_pP->ue_context.pduSession[i].param.pdusession_id, NONGBR_FLOW);
+            break;
 
-    // Reference TS23501 Table 5.7.4-1: Standardized 5QI to QoS characteristics mapping
-    for (qos_flow_index = 0; qos_flow_index < ue_context_pP->ue_context.pduSession[i].param.nb_qos; qos_flow_index++) {
-      switch (ue_context_pP->ue_context.pduSession[i].param.qos[qos_flow_index].fiveQI) {
-        case 1: //100ms
-        case 2: //150ms
-        case 3: //50ms
-        case 4: //300ms
-        case 5: //100ms
-        case 6: //300ms
-        case 7: //100ms
-        case 8: //300ms
-        case 9: //300ms Video (Buffered Streaming)TCP-based (e.g., www, e-mail, chat, ftp, p2p file sharing, progressive video, etc.)
-          // TODO
-          break;
+          default:
+            LOG_E(NR_RRC,"not supported 5qi %lu\n", ue_context_pP->ue_context.pduSession[i].param.qos[qos_flow_index].fiveQI);
+            ue_context_pP->ue_context.pduSession[i].status = PDU_SESSION_STATUS_FAILED;
+            ue_context_pP->ue_context.pduSession[i].xid = xid;
+            pdu_sessions_done++;
+            continue;
+        }
 
-        default:
-          LOG_E(NR_RRC,"not supported 5qi %lu\n", ue_context_pP->ue_context.pduSession[i].param.qos[qos_flow_index].fiveQI);
-          ue_context_pP->ue_context.pduSession[i].status = PDU_SESSION_STATUS_FAILED;
-          ue_context_pP->ue_context.pduSession[i].xid = xid;
-          pdu_sessions_done++;
-          free(DRB_config);
-          continue;
+        switch(ue_context_pP->ue_context.pduSession[i].param.qos[qos_flow_index].allocation_retention_priority.priority_level) {
+          case NGAP_PRIORITY_LEVEL_HIGHEST:
+            drb_priority[drb_id-1] = 1;
+            break;
+          case NGAP_PRIORITY_LEVEL_2:
+            drb_priority[drb_id-1] = 2;
+            break;
+          case NGAP_PRIORITY_LEVEL_3:
+            drb_priority[drb_id-1] = 3;
+            break;
+          case NGAP_PRIORITY_LEVEL_4:
+            drb_priority[drb_id-1] = 4;
+            break;
+          case NGAP_PRIORITY_LEVEL_5:
+            drb_priority[drb_id-1] = 5;
+            break;
+          case NGAP_PRIORITY_LEVEL_6:
+            drb_priority[drb_id-1] = 6;
+            break;
+          case NGAP_PRIORITY_LEVEL_7:
+            drb_priority[drb_id-1] = 7;
+            break;
+          case NGAP_PRIORITY_LEVEL_8:
+            drb_priority[drb_id-1] = 8;
+            break;
+          case NGAP_PRIORITY_LEVEL_9:
+            drb_priority[drb_id-1] = 9;
+            break;
+          case NGAP_PRIORITY_LEVEL_10:
+            drb_priority[drb_id-1] = 10;
+            break;
+          case NGAP_PRIORITY_LEVEL_11:
+            drb_priority[drb_id-1] = 11;
+            break;
+          case NGAP_PRIORITY_LEVEL_12:
+            drb_priority[drb_id-1] = 12;
+            break;
+          case NGAP_PRIORITY_LEVEL_13:
+            drb_priority[drb_id-1] = 13;
+            break;
+          case NGAP_PRIORITY_LEVEL_LOWEST:
+            drb_priority[drb_id-1] = 14;
+            break;
+          case NGAP_PRIORITY_LEVEL_NO_PRIORITY:
+            drb_priority[drb_id-1] = 15;
+            break;
+
+          default:
+            LOG_E(NR_RRC,"Not supported priority level\n");
+            break;
+        }
+
+        if(drb_is_active(ue_p, drb_id)){ /* Non-GBR flow using the same DRB or a GBR flow with no available DRBs*/
+          nb_drb_to_setup--;
+        } else {
+          NR_DRB_ToAddMod_t *DRB_config = generateDRB(ue_p,
+                                                    drb_id,
+                                                    &ue_context_pP->ue_context.pduSession[i],
+                                                    rrc->configuration.enable_sdap,
+                                                    rrc->security.do_drb_integrity,
+                                                    rrc->security.do_drb_ciphering);
+          ASN_SEQUENCE_ADD(&(*DRB_configList)->list, DRB_config);
+          ASN_SEQUENCE_ADD(&(*DRB_configList2)->list, DRB_config);
+        }
       }
     }
 
-    ASN_SEQUENCE_ADD(&(*DRB_configList)->list, DRB_config);
-    ASN_SEQUENCE_ADD(&(*DRB_configList2)->list, DRB_config);
-
     ue_context_pP->ue_context.pduSession[i].status = PDU_SESSION_STATUS_DONE;
     ue_context_pP->ue_context.pduSession[i].xid = xid;
 
@@ -1617,7 +1605,7 @@ rrc_gNB_process_RRCConnectionReestablishmentComplete(
   ue_context_pP->ue_context.Srb1.Active = 1;
   //ue_context_pP->ue_context.Srb2.Srb_info.Srb_id = 2;
 
-  if (AMF_MODE_ENABLED) {
+  if (get_softmodem_params()->sa) {
     hashtable_rc_t    h_rc;
     int               j;
     rrc_ue_ngap_ids_t *rrc_ue_ngap_ids_p = NULL;
@@ -1694,13 +1682,13 @@ rrc_gNB_process_RRCConnectionReestablishmentComplete(
       ue_context_pP->ue_context.ul_failure_timer = 0;
       return;
     }
-  } /* AMF_MODE_ENABLED */
+  } 
 
   /* Update RNTI in ue_context */
   ue_context_pP->ue_id_rnti                    = ctxt_pP->rnti; // here ue_id_rnti is just a key, may be something else
   ue_context_pP->ue_context.rnti               = ctxt_pP->rnti;
 
-  if (AMF_MODE_ENABLED) {
+  if (get_softmodem_params()->sa) {
     uint8_t send_security_mode_command = false;
     nr_rrc_pdcp_config_security(
       ctxt_pP,
@@ -2343,7 +2331,7 @@ rrc_gNB_decode_dcch(
                 ul_dcch_msg->message.choice.c1->choice.rrcReconfigurationComplete->rrc_TransactionIdentifier);
         }
 
-        if (AMF_MODE_ENABLED) {
+        if (get_softmodem_params()->sa) {
           if(ue_context_p->ue_context.pdu_session_release_command_flag == 1) {
             xid = ul_dcch_msg->message.choice.c1->choice.rrcReconfigurationComplete->rrc_TransactionIdentifier;
             ue_context_p->ue_context.pdu_session_release_command_flag = 0;
@@ -2476,7 +2464,7 @@ rrc_gNB_decode_dcch(
             LOG_DUMPMSG(RRC,DEBUG_RRC,(char *)Rx_sdu,sdu_sizeP,
                         "[MSG] RRC UL Information Transfer \n");
 
-            if (AMF_MODE_ENABLED == 1) {
+            if (get_softmodem_params()->sa) {
                 rrc_gNB_send_NGAP_UPLINK_NAS(ctxt_pP,
                                           ue_context_p,
                                           ul_dcch_msg);
@@ -2624,7 +2612,7 @@ rrc_gNB_decode_dcch(
           if(eutra_index == -1)
           break;
       }
-      if (AMF_MODE_ENABLED == 1) {
+      if (get_softmodem_params()->sa) {
           rrc_gNB_send_NGAP_UE_CAPABILITIES_IND(ctxt_pP,
                                     ue_context_p,
                                     ul_dcch_msg);
diff --git a/openair2/RRC/NR/rrc_gNB_radio_bearers.c b/openair2/RRC/NR/rrc_gNB_radio_bearers.c
new file mode 100644
index 0000000000000000000000000000000000000000..6c8f2ecddaa4ca59d767d2819fdbf6f33441ea6c
--- /dev/null
+++ b/openair2/RRC/NR/rrc_gNB_radio_bearers.c
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include "rrc_gNB_radio_bearers.h"
+
+NR_SRB_ToAddMod_t *generateSRB2() {
+  NR_SRB_ToAddMod_t *SRB2_config = NULL;
+
+  SRB2_config = CALLOC(1, sizeof(*SRB2_config));
+  SRB2_config->srb_Identity = 2;
+
+  return SRB2_config;
+}
+
+NR_SRB_ToAddModList_t **generateSRB2_confList(gNB_RRC_UE_t *ue, 
+                                              NR_SRB_ToAddModList_t *SRB_configList, 
+                                              uint8_t xid) {
+  NR_SRB_ToAddModList_t **SRB_configList2 = NULL;
+
+  SRB_configList2 = &ue->SRB_configList2[xid];
+  if (*SRB_configList2 == NULL) {
+    *SRB_configList2 = CALLOC(1, sizeof(**SRB_configList2));
+    memset(*SRB_configList2, 0, sizeof(**SRB_configList2));
+    NR_SRB_ToAddMod_t *SRB2_config = generateSRB2();
+    ASN_SEQUENCE_ADD(&(*SRB_configList2)->list, SRB2_config);
+    ASN_SEQUENCE_ADD(&SRB_configList->list, SRB2_config);
+  }
+
+  return SRB_configList2;
+}
+
+NR_DRB_ToAddMod_t *generateDRB(gNB_RRC_UE_t *ue,
+                               uint8_t drb_id,
+                               const pdu_session_param_t *pduSession,
+                               bool enable_sdap,
+                               int do_drb_integrity,
+                               int do_drb_ciphering) {
+  NR_DRB_ToAddMod_t *DRB_config  = NULL;
+  NR_SDAP_Config_t  *SDAP_config = NULL;
+
+  DRB_config = CALLOC(1, sizeof(*DRB_config));
+  DRB_config->drb_Identity = drb_id;
+  DRB_config->cnAssociation = CALLOC(1, sizeof(*DRB_config->cnAssociation));
+  DRB_config->cnAssociation->present = NR_DRB_ToAddMod__cnAssociation_PR_sdap_Config;
+  
+  /* SDAP Configuration */
+  SDAP_config = CALLOC(1, sizeof(NR_SDAP_Config_t));
+  memset(SDAP_config, 0, sizeof(NR_SDAP_Config_t));
+  SDAP_config->mappedQoS_FlowsToAdd = calloc(1, sizeof(struct NR_SDAP_Config__mappedQoS_FlowsToAdd));
+  memset(SDAP_config->mappedQoS_FlowsToAdd, 0, sizeof(struct NR_SDAP_Config__mappedQoS_FlowsToAdd));
+  
+  SDAP_config->pdu_Session = pduSession->param.pdusession_id;
+  
+  if (enable_sdap) {
+    SDAP_config->sdap_HeaderDL = NR_SDAP_Config__sdap_HeaderDL_present;
+    SDAP_config->sdap_HeaderUL = NR_SDAP_Config__sdap_HeaderUL_present;
+  } else {
+    SDAP_config->sdap_HeaderDL = NR_SDAP_Config__sdap_HeaderDL_absent;
+    SDAP_config->sdap_HeaderUL = NR_SDAP_Config__sdap_HeaderUL_absent;
+  }
+  
+  SDAP_config->defaultDRB = true;
+  
+  for (int qos_flow_index = 0; qos_flow_index < pduSession->param.nb_qos; qos_flow_index++) 
+  {
+    NR_QFI_t *qfi = calloc(1, sizeof(NR_QFI_t));
+    *qfi = pduSession->param.qos[qos_flow_index].qfi;
+    ASN_SEQUENCE_ADD(&SDAP_config->mappedQoS_FlowsToAdd->list, qfi);
+
+    if(pduSession->param.qos[qos_flow_index].fiveQI > 5)
+      ue->pduSession[pduSession->param.pdusession_id].param.used_drbs[drb_id-1] = DRB_ACTIVE_NONGBR;
+    else
+      ue->pduSession[pduSession->param.pdusession_id].param.used_drbs[drb_id-1] = DRB_ACTIVE;
+  }
+  
+  SDAP_config->mappedQoS_FlowsToRelease = NULL;
+  DRB_config->cnAssociation->choice.sdap_Config = SDAP_config;
+  
+  /* PDCP Configuration */
+  DRB_config->reestablishPDCP  = NULL;
+  DRB_config->recoverPDCP      = NULL;
+  DRB_config->pdcp_Config      = calloc(1, sizeof(*DRB_config->pdcp_Config));
+  DRB_config->pdcp_Config->drb = calloc(1,sizeof(*DRB_config->pdcp_Config->drb));
+
+  DRB_config->pdcp_Config->drb->discardTimer    = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->discardTimer));
+  *DRB_config->pdcp_Config->drb->discardTimer   = NR_PDCP_Config__drb__discardTimer_infinity;
+  DRB_config->pdcp_Config->drb->pdcp_SN_SizeUL  = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->pdcp_SN_SizeUL));
+  *DRB_config->pdcp_Config->drb->pdcp_SN_SizeUL = NR_PDCP_Config__drb__pdcp_SN_SizeUL_len18bits;
+  DRB_config->pdcp_Config->drb->pdcp_SN_SizeDL  = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->pdcp_SN_SizeDL));
+  *DRB_config->pdcp_Config->drb->pdcp_SN_SizeDL = NR_PDCP_Config__drb__pdcp_SN_SizeDL_len18bits;
+
+  DRB_config->pdcp_Config->drb->headerCompression.present = NR_PDCP_Config__drb__headerCompression_PR_notUsed;
+  DRB_config->pdcp_Config->drb->headerCompression.choice.notUsed = 0;
+  
+  DRB_config->pdcp_Config->drb->integrityProtection  = NULL;
+  DRB_config->pdcp_Config->drb->statusReportRequired = NULL;
+  DRB_config->pdcp_Config->drb->outOfOrderDelivery   = NULL;
+  DRB_config->pdcp_Config->moreThanOneRLC            = NULL;
+  
+  DRB_config->pdcp_Config->t_Reordering  = calloc(1, sizeof(*DRB_config->pdcp_Config->t_Reordering));
+  *DRB_config->pdcp_Config->t_Reordering = NR_PDCP_Config__t_Reordering_ms0;
+  DRB_config->pdcp_Config->ext1          = NULL;
+  
+  if (do_drb_integrity) {
+    DRB_config->pdcp_Config->drb->integrityProtection = calloc(1, sizeof(*DRB_config->pdcp_Config->drb->integrityProtection));
+    *DRB_config->pdcp_Config->drb->integrityProtection = NR_PDCP_Config__drb__integrityProtection_enabled;
+  }
+  
+  if (!do_drb_ciphering) {
+    DRB_config->pdcp_Config->ext1 = calloc(1, sizeof(*DRB_config->pdcp_Config->ext1));
+    DRB_config->pdcp_Config->ext1->cipheringDisabled = calloc(1, sizeof(*DRB_config->pdcp_Config->ext1->cipheringDisabled));
+    *DRB_config->pdcp_Config->ext1->cipheringDisabled = NR_PDCP_Config__ext1__cipheringDisabled_true;
+  }
+
+  ue->DRB_active[drb_id-1] = DRB_ACTIVE;
+
+  return DRB_config;
+}
+
+uint8_t next_available_drb(gNB_RRC_UE_t *ue, uint8_t pdusession_id, bool is_gbr) {
+  uint8_t drb_id;
+
+  if(!is_gbr) { /* Find if Non-GBR DRB exists in the same PDU Session */
+    for (drb_id = 0; drb_id < NGAP_MAX_DRBS_PER_UE; drb_id++)
+      if(ue->pduSession[pdusession_id].param.used_drbs[drb_id] == DRB_ACTIVE_NONGBR)
+        return drb_id+1;
+  }
+  /* GBR Flow  or a Non-GBR DRB does not exist in the same PDU Session, find an available DRB */
+  for (drb_id = 0; drb_id < NGAP_MAX_DRBS_PER_UE; drb_id++)
+    if(ue->DRB_active[drb_id] == DRB_INACTIVE)
+      return drb_id+1;
+  /* From this point, we need to handle the case that all DRBs are already used by the UE. */
+  LOG_E(RRC, "Error - All the DRBs are used - Handle this\n");
+  return DRB_INACTIVE;
+}
+
+bool drb_is_active(gNB_RRC_UE_t *ue, uint8_t drb_id) {
+  if(ue->DRB_active[drb_id-1] == DRB_ACTIVE)
+    return true;
+  return false;
+}
\ No newline at end of file
diff --git a/openair2/RRC/NR/rrc_gNB_radio_bearers.h b/openair2/RRC/NR/rrc_gNB_radio_bearers.h
new file mode 100644
index 0000000000000000000000000000000000000000..729b5b781930260a1c38fa799fd00d37fe81a71d
--- /dev/null
+++ b/openair2/RRC/NR/rrc_gNB_radio_bearers.h
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _RRC_GNB_DRBS_H_
+#define _RRC_GNB_DRBS_H_
+
+#include "nr_rrc_defs.h"
+#include "NR_SDAP-Config.h"
+#include "NR_DRB-ToAddMod.h"
+#include "NR_SRB-ToAddMod.h"
+
+#define MAX_DRBS_PER_UE         (32)  /* Maximum number of Data Radio Bearers per UE */
+#define MAX_PDUS_PER_UE         (8)   /* Maximum number of PDU Sessions per UE */
+#define DRB_ACTIVE_NONGBR       (2)   /* DRB is used for Non-GBR Flows */
+#define DRB_ACTIVE              (1)
+#define DRB_INACTIVE            (0)
+#define GBR_FLOW                (1)
+#define NONGBR_FLOW             (0)
+
+NR_SRB_ToAddMod_t *generateSRB2(void);
+NR_SRB_ToAddModList_t **generateSRB2_confList(gNB_RRC_UE_t *ue, 
+                                              NR_SRB_ToAddModList_t *SRB_configList, 
+                                              uint8_t xid);
+NR_DRB_ToAddMod_t *generateDRB(gNB_RRC_UE_t *rrc_ue,
+                               uint8_t drb_id,
+                               const pdu_session_param_t *pduSession,
+                               bool enable_sdap,
+                               int do_drb_integrity,
+                               int do_drb_ciphering);
+
+uint8_t next_available_drb(gNB_RRC_UE_t *ue, uint8_t pdusession_id, bool is_gbr);
+bool drb_is_active(gNB_RRC_UE_t *ue, uint8_t drb_id);
+
+#endif
\ No newline at end of file
diff --git a/openair2/RRC/NR/rrc_gNB_reconfig.c b/openair2/RRC/NR/rrc_gNB_reconfig.c
index 1dca25b6180beceb57dfdba74051436bc1578610..01abd5865974eb8d12e2844bbb34eb04766cc32c 100644
--- a/openair2/RRC/NR/rrc_gNB_reconfig.c
+++ b/openair2/RRC/NR/rrc_gNB_reconfig.c
@@ -424,7 +424,8 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
   pusch_Config->pusch_PowerControl->pathlossReferenceRSToAddModList = NULL;
   pusch_Config->pusch_PowerControl->pathlossReferenceRSToReleaseList = NULL;
   pusch_Config->pusch_PowerControl->twoPUSCH_PC_AdjustmentStates = NULL;
-  pusch_Config->pusch_PowerControl->deltaMCS = NULL;
+  pusch_Config->pusch_PowerControl->deltaMCS = calloc(1, sizeof(*pusch_Config->pusch_PowerControl->deltaMCS));
+  *pusch_Config->pusch_PowerControl->deltaMCS = NR_PUSCH_PowerControl__deltaMCS_enabled;
   pusch_Config->pusch_PowerControl->sri_PUSCH_MappingToAddModList = NULL;
   pusch_Config->pusch_PowerControl->sri_PUSCH_MappingToReleaseList = NULL;
   pusch_Config->frequencyHopping=NULL;
@@ -478,9 +479,15 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
     LOG_I(RRC,"TRANSFORM PRECODING ENABLED......\n");
   }
 
+  long maxMIMO_Layers = servingcellconfigdedicated->uplinkConfig &&
+                                servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig &&
+                                servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1 &&
+                                servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers ?
+                            *servingcellconfigdedicated->uplinkConfig->pusch_ServingCellConfig->choice.setup->ext1->maxMIMO_Layers : 1;
+
   int curr_bwp = NRRIV2BW(servingcellconfigcommon->downlinkConfigCommon->initialDownlinkBWP->genericParameters.locationAndBandwidth,MAX_BWP_SIZE);
   initialUplinkBWP->srs_Config = calloc(1,sizeof(*initialUplinkBWP->srs_Config));
-  config_srs(initialUplinkBWP->srs_Config, NULL, curr_bwp, uid, 0, configuration->do_SRS);
+  config_srs(initialUplinkBWP->srs_Config, NULL, curr_bwp, uid, 0, maxMIMO_Layers, configuration->do_SRS);
 
   // Downlink BWPs
   int n_dl_bwp = 1;
diff --git a/openair2/RRC/NR_UE/rrc_UE.c b/openair2/RRC/NR_UE/rrc_UE.c
index bf4f053ba6fe5ec3d9023550d2e4e91bef7c2713..c6935136c36bb4960bea014e67113b60a826629b 100644
--- a/openair2/RRC/NR_UE/rrc_UE.c
+++ b/openair2/RRC/NR_UE/rrc_UE.c
@@ -152,7 +152,7 @@ static int nr_rrc_set_state (module_id_t ue_mod_idP, Rrc_State_NR_t state) {
 }
 
 static int nr_rrc_set_sub_state( module_id_t ue_mod_idP, Rrc_Sub_State_NR_t subState ) {
-  if (AMF_MODE_ENABLED) {
+  if (get_softmodem_params()->sa) {
     switch (NR_UE_rrc_inst[ue_mod_idP].nrRrcState) {
       case RRC_STATE_INACTIVE_NR:
         AssertFatal ((RRC_SUB_STATE_INACTIVE_FIRST_NR <= subState) && (subState <= RRC_SUB_STATE_INACTIVE_LAST_NR),
@@ -835,7 +835,7 @@ int nr_decode_SI( const protocol_ctxt_t *const ctxt_pP, const uint8_t gNB_index
 
           // After SI is received, prepare RRCConnectionRequest
           if (NR_UE_rrc_inst[ctxt_pP->module_id].MBMS_flag < 3) // see -Q option
-            if (AMF_MODE_ENABLED) {
+            if (get_softmodem_params()->sa) {
               nr_rrc_ue_generate_RRCSetupRequest( ctxt_pP->module_id, gNB_index );
             }
 
@@ -1159,7 +1159,7 @@ int8_t nr_rrc_ue_decode_NR_BCCH_DL_SCH_Message(module_id_t module_id,
            dec_rval.consumed );
     log_dump(NR_RRC, Sdu, Sdu_len, LOG_DUMP_CHAR,"   Received bytes:\n" );
     // free the memory
-    SEQUENCE_free( &asn_DEF_LTE_BCCH_DL_SCH_Message, (void *)bcch_message, 1 );
+    SEQUENCE_free( &asn_DEF_NR_BCCH_DL_SCH_Message, (void *)bcch_message, 1 );
     VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME( VCD_SIGNAL_DUMPER_FUNCTIONS_UE_DECODE_BCCH, VCD_FUNCTION_OUT );
     return -1;
   }
@@ -1227,7 +1227,7 @@ int8_t nr_rrc_ue_decode_NR_BCCH_DL_SCH_Message(module_id_t module_id,
   }
 
   if (nr_rrc_get_sub_state(module_id) == RRC_SUB_STATE_IDLE_SIB_COMPLETE_NR) {
-    //if ( (NR_UE_rrc_inst[ctxt_pP->module_id].initialNasMsg.data != NULL) || (!AMF_MODE_ENABLED)) {
+    //if ( (NR_UE_rrc_inst[ctxt_pP->module_id].initialNasMsg.data != NULL) || (!get_softmodem_params()->sa)) {
       nr_rrc_ue_generate_RRCSetupRequest(module_id, 0);
       nr_rrc_set_sub_state( module_id, RRC_SUB_STATE_IDLE_CONNECTING );
     //}
@@ -1341,20 +1341,16 @@ static void rrc_ue_generate_RRCSetupComplete(
   const char *nas_msg;
   int   nas_msg_length;
 
- if (AMF_MODE_ENABLED) {
-    if (get_softmodem_params()->sa) {
-      as_nas_info_t initialNasMsg;
-      generateRegistrationRequest(&initialNasMsg, ctxt_pP->module_id);
-      nas_msg = (char*)initialNasMsg.data;
-      nas_msg_length = initialNasMsg.length;
-    } else {
-      nas_msg         = (char *) NR_UE_rrc_inst[ctxt_pP->module_id].initialNasMsg.data;
-      nas_msg_length  = NR_UE_rrc_inst[ctxt_pP->module_id].initialNasMsg.length;
-    }
+  if (get_softmodem_params()->sa) {
+    as_nas_info_t initialNasMsg;
+    generateRegistrationRequest(&initialNasMsg, ctxt_pP->module_id);
+    nas_msg = (char*)initialNasMsg.data;
+    nas_msg_length = initialNasMsg.length;
   } else {
     nas_msg         = nr_nas_attach_req_imsi;
     nas_msg_length  = sizeof(nr_nas_attach_req_imsi);
   }
+
   size = do_RRCSetupComplete(ctxt_pP->module_id, buffer, sizeof(buffer),
                              Transaction_id, sel_plmn_id, nas_msg_length, nas_msg);
   LOG_I(NR_RRC,"[UE %d][RAPROC] Frame %d : Logical Channel UL-DCCH (SRB1), Generating RRCSetupComplete (bytes%d, gNB %d)\n",
@@ -1708,9 +1704,6 @@ int8_t nr_rrc_ue_decode_ccch( const protocol_ctxt_t *const ctxt_pP, const NR_SRB
  void nr_rrc_ue_generate_RRCSetupRequest(module_id_t module_id, const uint8_t gNB_index) {
    uint8_t i=0,rv[6];
 
-   if(get_softmodem_params()->sa) {
-     AMF_MODE_ENABLED = 1;
-   }
    if(NR_UE_rrc_inst[module_id].Srb0[gNB_index].Tx_buffer.payload_size ==0) {
      // Get RRCConnectionRequest, fill random for now
      // Generate random byte stream for contention resolution
@@ -1802,7 +1795,7 @@ nr_rrc_ue_establish_srb2(
    LOG_I(NR_RRC,"[UE %d] Frame %d: processing RRCReconfiguration: reconfiguring DRB %ld\n",
 	 ue_mod_idP, frameP, DRB_config->drb_Identity);
 
-  if(!AMF_MODE_ENABLED) {
+  if(!get_softmodem_params()->sa) {
     ip_addr_offset3 = 0;
     ip_addr_offset4 = 1;
     LOG_I(OIP, "[UE %d] trying to bring up the OAI interface %d, IP X.Y.%d.%d\n", ue_mod_idP, ip_addr_offset3+ue_mod_idP,
@@ -2798,13 +2791,13 @@ void process_lte_nsa_msg(nsa_msg_t *msg, int msg_len)
 
             LTE_MeasObjectToAddMod_t *nr_meas_obj = NULL;
             asn_dec_rval_t dec_rval = uper_decode_complete(NULL,
-                            &asn_DEF_LTE_MeasObjectToAddMod,
+                            &asn_DEF_NR_MeasObjectToAddMod,
                             (void **)&nr_meas_obj,
                             msg_buffer,
                             msg_len);
             if ((dec_rval.code != RC_OK) && (dec_rval.consumed == 0))
             {
-              SEQUENCE_free(&asn_DEF_LTE_MeasObjectToAddMod, nr_meas_obj, ASFM_FREE_EVERYTHING);
+              SEQUENCE_free(&asn_DEF_NR_MeasObjectToAddMod, nr_meas_obj, ASFM_FREE_EVERYTHING);
               LOG_E(RRC, "Failed to decode measurement object (%zu bits) %d\n", dec_rval.consumed, dec_rval.code);
               break;
             }
diff --git a/openair2/SDAP/nr_sdap/nr_sdap.c b/openair2/SDAP/nr_sdap/nr_sdap.c
index 6457843ae06b003f9ac3d07926cb0e8c601b60a1..b71381b1162b50757c2d054c9f25dbde66ba0bf0 100644
--- a/openair2/SDAP/nr_sdap/nr_sdap.c
+++ b/openair2/SDAP/nr_sdap/nr_sdap.c
@@ -73,7 +73,7 @@ void sdap_data_ind(rb_id_t pdcp_entity,
   sdap_entity = nr_sdap_get_entity(rnti, pdusession_id);
 
   if(sdap_entity == NULL) {
-    LOG_E(SDAP, "%s:%d:%s: Entity not found\n", __FILE__, __LINE__, __FUNCTION__);
+    LOG_E(SDAP, "%s:%d:%s: Entity not found for ue rnti: %x and pdusession id: %d\n", __FILE__, __LINE__, __FUNCTION__, rnti, pdusession_id);
     return;
   }
 
diff --git a/openair2/SDAP/nr_sdap/nr_sdap_entity.c b/openair2/SDAP/nr_sdap/nr_sdap_entity.c
index c1fcdced7966ce9dc5a3291a86af78359ee85279..efa93a29f7290472c629b2f7f9f334690aea5f4d 100644
--- a/openair2/SDAP/nr_sdap/nr_sdap_entity.c
+++ b/openair2/SDAP/nr_sdap/nr_sdap_entity.c
@@ -43,6 +43,8 @@ void nr_pdcp_submit_sdap_ctrl_pdu(int rnti, rb_id_t sdap_ctrl_pdu_drb, nr_sdap_u
   nr_pdcp_ue_manager = nr_pdcp_sdap_get_ue_manager();
   ue = nr_pdcp_manager_get_ue(nr_pdcp_ue_manager, rnti);
   ue->drb[sdap_ctrl_pdu_drb-1]->recv_sdu(ue->drb[sdap_ctrl_pdu_drb-1], (char*)&ctrl_pdu, SDAP_HDR_LENGTH, RLC_MUI_UNDEFINED);
+  LOG_D(SDAP, "Control PDU - Submitting Control PDU to DRB ID:  %ld\n", sdap_ctrl_pdu_drb);
+  LOG_D(SDAP, "QFI: %u\n R: %u\n D/C: %u\n", ctrl_pdu.QFI, ctrl_pdu.R, ctrl_pdu.DC);
   return;
 }
 
@@ -77,9 +79,11 @@ static bool nr_sdap_tx_entity(nr_sdap_entity_t *entity,
   if(pdcp_entity){
     sdap_drb_id = pdcp_entity;
     pdcp_ent_has_sdap = entity->qfi2drb_table[qfi].hasSdap;
+    LOG_D(SDAP, "TX - QFI: %u is mapped to DRB ID: %ld\n", qfi, entity->qfi2drb_table[qfi].drb_id);
   }
 
   if(!pdcp_ent_has_sdap){
+    LOG_D(SDAP, "TX - DRB ID: %ld does not have SDAP\n", entity->qfi2drb_table[qfi].drb_id);
     ret = pdcp_data_req(ctxt_p,
                         srb_flag,
                         sdap_drb_id,
@@ -137,8 +141,8 @@ static bool nr_sdap_tx_entity(nr_sdap_entity_t *entity,
     memcpy(&sdap_buf[0], &sdap_hdr, SDAP_HDR_LENGTH);
     memcpy(&sdap_buf[SDAP_HDR_LENGTH], sdu_buffer, sdu_buffer_size);
     LOG_D(SDAP, "TX Entity QFI: %u \n", sdap_hdr.QFI);
-    LOG_D(SDAP, "TX Entity R: %u \n", sdap_hdr.R);
-    LOG_D(SDAP, "TX Entity DC: %u \n", sdap_hdr.DC);
+    LOG_D(SDAP, "TX Entity R:   %u \n", sdap_hdr.R);
+    LOG_D(SDAP, "TX Entity DC:  %u \n", sdap_hdr.DC);
   }
 
   /*
@@ -181,9 +185,9 @@ static void nr_sdap_rx_entity(nr_sdap_entity_t *entity,
     if(has_sdap && has_sdapHeader ) { // Handling the SDAP Header
       offset = SDAP_HDR_LENGTH;
       nr_sdap_ul_hdr_t *sdap_hdr = (nr_sdap_ul_hdr_t *)buf;
-      LOG_D(SDAP, "RX Entity Received QFI : %u\n", sdap_hdr->QFI);
-      LOG_D(SDAP, "RX Entity Received Reserved bit : %u\n", sdap_hdr->R);
-      LOG_D(SDAP, "RX Entity Received DC bit : %u\n", sdap_hdr->DC);
+      LOG_D(SDAP, "RX Entity Received QFI:    %u\n", sdap_hdr->QFI);
+      LOG_D(SDAP, "RX Entity Received R bit:  %u\n", sdap_hdr->R);
+      LOG_D(SDAP, "RX Entity Received DC bit: %u\n", sdap_hdr->DC);
 
       switch (sdap_hdr->DC) {
         case SDAP_HDR_UL_DATA_PDU:
@@ -237,6 +241,7 @@ static void nr_sdap_rx_entity(nr_sdap_entity_t *entity,
        * Perform reflective QoS flow to DRB mapping as specified in the subclause 5.3.2.
        */
       if(sdap_hdr->RDI == SDAP_REFLECTIVE_MAPPING) {
+        LOG_D(SDAP, "RX - Performing Reflective Mapping\n");
         /*
          * TS 37.324 5.3 QoS flow to DRB Mapping 
          * 5.3.2 Reflective mapping
@@ -298,22 +303,22 @@ static void nr_sdap_rx_entity(nr_sdap_entity_t *entity,
   }
 }
 
-void nr_sdap_qfi2drb_map_update(nr_sdap_entity_t *entity, uint8_t qfi, rb_id_t drb, bool hasSdap) {
+void nr_sdap_qfi2drb_map_update(nr_sdap_entity_t *entity, uint8_t qfi, rb_id_t drb, bool hasSdap){
   if(qfi < SDAP_MAX_QFI &&
      qfi > SDAP_MAP_RULE_EMPTY &&
      drb > 0 &&
-     drb <= AVLBL_DRB)
-  {
+     drb <= AVLBL_DRB){
     entity->qfi2drb_table[qfi].drb_id = drb;
     entity->qfi2drb_table[qfi].hasSdap = hasSdap;
-    LOG_D(SDAP, "Updated QFI to DRB Map: QFI %u -> DRB %ld \n", qfi, entity->qfi2drb_table[qfi].drb_id);
-    LOG_D(SDAP, "DRB %ld %s\n", entity->qfi2drb_table[qfi].drb_id, hasSdap ? "has SDAP" : "does not have SDAP");
+    LOG_D(SDAP, "Updated mapping: QFI %u -> DRB %ld \n", qfi, entity->qfi2drb_table[qfi].drb_id);
+  } else {
+    LOG_D(SDAP, "Map updated failed, QFI: %u, DRB: %ld\n", qfi, drb);
   }
 }
 
 void nr_sdap_qfi2drb_map_del(nr_sdap_entity_t *entity, uint8_t qfi){
   entity->qfi2drb_table[qfi].drb_id = SDAP_NO_MAPPING_RULE;
-  LOG_D(SDAP, "Deleted QFI to DRB Map for QFI %u \n", qfi);
+  LOG_D(SDAP, "Deleted mapping for QFI: %u \n", qfi);
 }
 
 rb_id_t nr_sdap_qfi2drb_map(nr_sdap_entity_t *entity, uint8_t qfi, rb_id_t upper_layer_rb_id){
@@ -322,11 +327,14 @@ rb_id_t nr_sdap_qfi2drb_map(nr_sdap_entity_t *entity, uint8_t qfi, rb_id_t upper
   pdcp_entity = entity->qfi2drb_table[qfi].drb_id;
 
   if(pdcp_entity){
+    LOG_D(SDAP, "Mapping rule exists for QFI: %u\n", qfi);
     return pdcp_entity;
   } else if(entity->default_drb) {
-    LOG_D(SDAP, "Mapped QFI %u to Default DRB\n", qfi);
+    LOG_D(SDAP, "Mapping QFI: %u to Default DRB: %ld\n", qfi, entity->default_drb);
+    entity->qfi2drb_map_update(entity, qfi, entity->default_drb, entity->qfi2drb_table[qfi].hasSdap);
     return entity->default_drb;
   } else {
+    LOG_D(SDAP, "Mapping rule and default DRB do not exist for QFI:%u\n", qfi);
     return SDAP_MAP_RULE_EMPTY;
   }
 
@@ -348,11 +356,11 @@ rb_id_t nr_sdap_map_ctrl_pdu(nr_sdap_entity_t *entity, rb_id_t pdcp_entity, int
   rb_id_t drb_of_endmarker = 0;
   if(map_type == SDAP_CTRL_PDU_MAP_DEF_DRB){
     drb_of_endmarker = entity->default_drb;
-    LOG_D(SDAP, "Mapped Control PDU to default drb\n");
+    LOG_D(SDAP, "Mapping Control PDU QFI: %u to Default DRB: %ld\n", dl_qfi, drb_of_endmarker);
   }
   if(map_type == SDAP_CTRL_PDU_MAP_RULE_DRB){
     drb_of_endmarker = entity->qfi2drb_map(entity, dl_qfi, pdcp_entity);
-    LOG_D(SDAP, "Mapped Control PDU according to the mapping rule, qfi %u \n", dl_qfi);
+    LOG_D(SDAP, "Mapping Control PDU QFI: %u to DRB: %ld\n", dl_qfi, drb_of_endmarker);
   }
   return drb_of_endmarker;
 }
@@ -371,7 +379,9 @@ void nr_sdap_ue_qfi2drb_config(nr_sdap_entity_t *existing_sdap_entity,
                                uint8_t mappedQFIs2AddCount,
                                uint8_t drb_identity)
 {
+  LOG_D(SDAP, "RRC Configuring SDAP Entity\n");
   uint8_t qfi = 0;
+  bool hasSdap = true;
 
   for(int i = 0; i < mappedQFIs2AddCount; i++){
     qfi = mapped_qfi_2_add[i];
@@ -385,10 +395,13 @@ void nr_sdap_ue_qfi2drb_config(nr_sdap_entity_t *existing_sdap_entity,
       rb_id_t sdap_ctrl_pdu_drb = existing_sdap_entity->sdap_map_ctrl_pdu(existing_sdap_entity, pdcp_entity, SDAP_CTRL_PDU_MAP_RULE_DRB, qfi);
       existing_sdap_entity->sdap_submit_ctrl_pdu(rnti, sdap_ctrl_pdu_drb, sdap_ctrl_pdu);
     }
+    LOG_D(SDAP, "Storing the configured QoS flow to DRB mapping rule\n");
+    existing_sdap_entity->qfi2drb_map_update(existing_sdap_entity, qfi, drb_identity, hasSdap);
   }
 }
 
-nr_sdap_entity_t *new_nr_sdap_entity(int has_sdap,
+nr_sdap_entity_t *new_nr_sdap_entity(int is_gnb,
+                                     int has_sdap,
                                      uint16_t rnti,
                                      int pdusession_id,
                                      bool is_defaultDRB,
@@ -397,10 +410,11 @@ nr_sdap_entity_t *new_nr_sdap_entity(int has_sdap,
                                      uint8_t mappedQFIs2AddCount)
 {
   if(nr_sdap_get_entity(rnti, pdusession_id)) {
-    LOG_E(SDAP, "SDAP Entity for UE already exists.\n");
+    LOG_E(SDAP, "SDAP Entity for UE already exists with RNTI: %u and PDU SESSION ID: %d\n", rnti, pdusession_id);
     nr_sdap_entity_t *existing_sdap_entity = nr_sdap_get_entity(rnti, pdusession_id);
     rb_id_t pdcp_entity = existing_sdap_entity->default_drb;
-    nr_sdap_ue_qfi2drb_config(existing_sdap_entity, pdcp_entity, rnti, mapped_qfi_2_add, mappedQFIs2AddCount, drb_identity);
+    if(!is_gnb)
+      nr_sdap_ue_qfi2drb_config(existing_sdap_entity, pdcp_entity, rnti, mapped_qfi_2_add, mappedQFIs2AddCount, drb_identity);
     return existing_sdap_entity;
   }
 
@@ -431,9 +445,9 @@ nr_sdap_entity_t *new_nr_sdap_entity(int has_sdap,
     LOG_I(SDAP, "Default DRB for the created SDAP entity: %ld \n", sdap_entity->default_drb);
 
     if(mappedQFIs2AddCount) {
+      LOG_D(SDAP, "RRC updating mapping rules\n");
       for (int i = 0; i < mappedQFIs2AddCount; i++)
       {
-        LOG_D(SDAP, "Mapped QFI to Add : %ld \n", mapped_qfi_2_add[i]);
         sdap_entity->qfi2drb_map_update(sdap_entity, mapped_qfi_2_add[i], sdap_entity->default_drb, has_sdap);
       }
     }
diff --git a/openair2/SDAP/nr_sdap/nr_sdap_entity.h b/openair2/SDAP/nr_sdap/nr_sdap_entity.h
index 635289d4d48cfc4f7d41564f87c8668236bc19a6..82aa267ebf11a5f38f11e80279c2259670bc3fb0 100644
--- a/openair2/SDAP/nr_sdap/nr_sdap_entity.h
+++ b/openair2/SDAP/nr_sdap/nr_sdap_entity.h
@@ -23,6 +23,7 @@
 #define _NR_SDAP_ENTITY_H_
 
 #include <stdint.h>
+#include <stdbool.h>
 #include "openair2/COMMON/platform_types.h"
 #include "openair2/LAYER2/nr_pdcp/nr_pdcp_entity.h"
 #include "NR_RadioBearerConfig.h"
@@ -164,7 +165,8 @@ void nr_sdap_ue_qfi2drb_config(nr_sdap_entity_t *existing_sdap_entity,
  * TS 37.324 4.4 5.1.1 SDAP entity establishment
  * Establish an SDAP entity.
  */
-nr_sdap_entity_t *new_nr_sdap_entity(int has_sdap,
+nr_sdap_entity_t *new_nr_sdap_entity(int is_gnb,
+                                     int has_sdap,
                                      uint16_t rnti,
                                      int pdusession_id,
                                      bool is_defaultDRB,
diff --git a/openair3/LPP/MESSAGES/37355-g60.asn b/openair3/LPP/MESSAGES/37355-g60.asn
new file mode 100644
index 0000000000000000000000000000000000000000..af7e51bd4c67bc739ef62fe63979bc898c672623
--- /dev/null
+++ b/openair3/LPP/MESSAGES/37355-g60.asn
@@ -0,0 +1,6532 @@
+-- ASN1START
+
+LPP-PDU-Definitions {
+itu-t (0) identified-organization (4) etsi (0) mobileDomain (0)
+eps-Access (21) modules (3) lpp (7) version1 (1) lpp-PDU-Definitions (1) }
+
+DEFINITIONS AUTOMATIC TAGS ::=
+
+BEGIN
+
+-- ASN1STOP
+-- ASN1START
+
+LPP-Message ::= SEQUENCE {
+	transactionID			LPP-TransactionID	OPTIONAL,	-- Need ON
+	endTransaction			BOOLEAN,
+	sequenceNumber			SequenceNumber		OPTIONAL,	-- Need ON
+	acknowledgement			Acknowledgement		OPTIONAL,	-- Need ON
+	lpp-MessageBody			LPP-MessageBody		OPTIONAL	-- Need ON
+}
+
+SequenceNumber ::= INTEGER (0..255)
+
+Acknowledgement ::= SEQUENCE {
+	ackRequested	BOOLEAN,
+	ackIndicator	SequenceNumber		OPTIONAL
+}
+
+-- ASN1STOP
+-- ASN1START
+
+LPP-MessageBody ::= CHOICE {
+	c1						CHOICE {
+		requestCapabilities			RequestCapabilities,
+		provideCapabilities			ProvideCapabilities,
+		requestAssistanceData		RequestAssistanceData,
+		provideAssistanceData		ProvideAssistanceData,
+		requestLocationInformation	RequestLocationInformation,
+		provideLocationInformation	ProvideLocationInformation,
+		abort						Abort,
+		error						Error,
+		spare7 NULL, spare6 NULL, spare5 NULL, spare4 NULL,
+		spare3 NULL, spare2 NULL, spare1 NULL, spare0 NULL
+	},
+	messageClassExtension	SEQUENCE {}
+}
+
+-- ASN1STOP
+-- ASN1START
+
+LPP-TransactionID ::= SEQUENCE {
+	initiator				Initiator,
+	transactionNumber		TransactionNumber,
+	...
+}
+
+Initiator ::= ENUMERATED {
+	locationServer,
+	targetDevice,
+	...
+}
+
+TransactionNumber ::= INTEGER (0..255)
+
+-- ASN1STOP
+-- ASN1START
+
+RequestCapabilities ::= SEQUENCE {
+	criticalExtensions		CHOICE {
+		c1						CHOICE {
+			requestCapabilities-r9		RequestCapabilities-r9-IEs,
+			spare3 NULL, spare2 NULL, spare1 NULL
+		},
+		criticalExtensionsFuture	SEQUENCE {}
+	}
+}
+
+RequestCapabilities-r9-IEs ::= SEQUENCE {
+	commonIEsRequestCapabilities		CommonIEsRequestCapabilities		OPTIONAL,	-- Need ON
+	a-gnss-RequestCapabilities			A-GNSS-RequestCapabilities			OPTIONAL,	-- Need ON
+	otdoa-RequestCapabilities			OTDOA-RequestCapabilities			OPTIONAL,	-- Need ON
+	ecid-RequestCapabilities			ECID-RequestCapabilities			OPTIONAL,	-- Need ON
+	epdu-RequestCapabilities			EPDU-Sequence						OPTIONAL,	-- Need ON
+	...,
+	[[	sensor-RequestCapabilities-r13	Sensor-RequestCapabilities-r13		OPTIONAL,	-- Need ON
+		tbs-RequestCapabilities-r13		TBS-RequestCapabilities-r13			OPTIONAL,	-- Need ON
+		wlan-RequestCapabilities-r13	WLAN-RequestCapabilities-r13		OPTIONAL,	-- Need ON
+		bt-RequestCapabilities-r13		BT-RequestCapabilities-r13			OPTIONAL	-- Need ON
+	]],
+	[[	nr-ECID-RequestCapabilities-r16	NR-ECID-RequestCapabilities-r16		OPTIONAL,	-- Need ON
+		nr-Multi-RTT-RequestCapabilities-r16
+										NR-Multi-RTT-RequestCapabilities-r16	
+																			OPTIONAL,	-- Need ON
+		nr-DL-AoD-RequestCapabilities-r16	
+										NR-DL-AoD-RequestCapabilities-r16	OPTIONAL,	-- Need ON
+		nr-DL-TDOA-RequestCapabilities-r16
+										NR-DL-TDOA-RequestCapabilities-r16	OPTIONAL,	-- Need ON
+		nr-UL-RequestCapabilities-r16	NR-UL-RequestCapabilities-r16		OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ProvideCapabilities ::= SEQUENCE {
+	criticalExtensions		CHOICE {
+		c1						CHOICE {
+			provideCapabilities-r9		ProvideCapabilities-r9-IEs,
+			spare3 NULL, spare2 NULL, spare1 NULL
+		},
+		criticalExtensionsFuture	SEQUENCE {}
+	}
+}
+
+ProvideCapabilities-r9-IEs ::= SEQUENCE {
+	commonIEsProvideCapabilities		CommonIEsProvideCapabilities			OPTIONAL,
+	a-gnss-ProvideCapabilities			A-GNSS-ProvideCapabilities				OPTIONAL,
+	otdoa-ProvideCapabilities			OTDOA-ProvideCapabilities				OPTIONAL,
+	ecid-ProvideCapabilities			ECID-ProvideCapabilities				OPTIONAL,
+	epdu-ProvideCapabilities			EPDU-Sequence							OPTIONAL,
+	...,
+	[[	sensor-ProvideCapabilities-r13	Sensor-ProvideCapabilities-r13			OPTIONAL,
+		tbs-ProvideCapabilities-r13		TBS-ProvideCapabilities-r13				OPTIONAL,
+		wlan-ProvideCapabilities-r13	WLAN-ProvideCapabilities-r13			OPTIONAL,
+		bt-ProvideCapabilities-r13		BT-ProvideCapabilities-r13				OPTIONAL
+	]],
+	[[	nr-ECID-ProvideCapabilities-r16	NR-ECID-ProvideCapabilities-r16			OPTIONAL,
+		nr-Multi-RTT-ProvideCapabilities-r16	
+										NR-Multi-RTT-ProvideCapabilities-r16	OPTIONAL,
+		nr-DL-AoD-ProvideCapabilities-r16
+										NR-DL-AoD-ProvideCapabilities-r16		OPTIONAL,
+		nr-DL-TDOA-ProvideCapabilities-r16
+										NR-DL-TDOA-ProvideCapabilities-r16		OPTIONAL,
+		nr-UL-ProvideCapabilities-r16	NR-UL-ProvideCapabilities-r16			OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+RequestAssistanceData ::= SEQUENCE {
+	criticalExtensions		CHOICE {
+		c1						CHOICE {
+			requestAssistanceData-r9	RequestAssistanceData-r9-IEs,
+			spare3 NULL, spare2 NULL, spare1 NULL
+		},
+		criticalExtensionsFuture	SEQUENCE {}
+	}
+}
+
+RequestAssistanceData-r9-IEs ::= SEQUENCE {
+	commonIEsRequestAssistanceData		CommonIEsRequestAssistanceData		OPTIONAL,
+	a-gnss-RequestAssistanceData		A-GNSS-RequestAssistanceData		OPTIONAL,
+	otdoa-RequestAssistanceData			OTDOA-RequestAssistanceData			OPTIONAL,
+	epdu-RequestAssistanceData			EPDU-Sequence						OPTIONAL,
+	...,
+	[[	sensor-RequestAssistanceData-r14
+										Sensor-RequestAssistanceData-r14	OPTIONAL,
+		tbs-RequestAssistanceData-r14	TBS-RequestAssistanceData-r14		OPTIONAL,
+		wlan-RequestAssistanceData-r14	WLAN-RequestAssistanceData-r14		OPTIONAL
+	]],
+	[[	nr-Multi-RTT-RequestAssistanceData-r16	NR-Multi-RTT-RequestAssistanceData-r16	OPTIONAL,
+		nr-DL-AoD-RequestAssistanceData-r16		NR-DL-AoD-RequestAssistanceData-r16		OPTIONAL,
+		nr-DL-TDOA-RequestAssistanceData-r16	NR-DL-TDOA-RequestAssistanceData-r16	OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ProvideAssistanceData ::= SEQUENCE {
+	criticalExtensions		CHOICE {
+		c1						CHOICE {
+			provideAssistanceData-r9	ProvideAssistanceData-r9-IEs,
+			spare3 NULL, spare2 NULL, spare1 NULL
+		},
+		criticalExtensionsFuture	SEQUENCE {}
+	}
+}
+
+ProvideAssistanceData-r9-IEs ::= SEQUENCE {
+	commonIEsProvideAssistanceData		CommonIEsProvideAssistanceData		OPTIONAL,	-- Need ON
+	a-gnss-ProvideAssistanceData		A-GNSS-ProvideAssistanceData		OPTIONAL,	-- Need ON
+	otdoa-ProvideAssistanceData			OTDOA-ProvideAssistanceData			OPTIONAL,	-- Need ON
+	epdu-Provide-Assistance-Data		EPDU-Sequence						OPTIONAL,	-- Need ON
+	...,
+	[[
+	sensor-ProvideAssistanceData-r14	Sensor-ProvideAssistanceData-r14	OPTIONAL,	-- Need ON
+	tbs-ProvideAssistanceData-r14		TBS-ProvideAssistanceData-r14		OPTIONAL,	-- Need ON
+	wlan-ProvideAssistanceData-r14		WLAN-ProvideAssistanceData-r14		OPTIONAL	-- Need ON
+	]],
+	[[	nr-Multi-RTT-ProvideAssistanceData-r16
+										NR-Multi-RTT-ProvideAssistanceData-r16
+																			OPTIONAL,	-- Need ON
+		nr-DL-AoD-ProvideAssistanceData-r16
+										NR-DL-AoD-ProvideAssistanceData-r16	OPTIONAL,	-- Need ON
+		nr-DL-TDOA-ProvideAssistanceData-r16
+										NR-DL-TDOA-ProvideAssistanceData-r16
+																			OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+RequestLocationInformation ::= SEQUENCE {
+	criticalExtensions		CHOICE {
+		c1						CHOICE {
+			requestLocationInformation-r9	RequestLocationInformation-r9-IEs,
+			spare3 NULL, spare2 NULL, spare1 NULL
+		},
+		criticalExtensionsFuture	SEQUENCE {}
+	}
+}
+
+RequestLocationInformation-r9-IEs ::= SEQUENCE {
+	commonIEsRequestLocationInformation
+										CommonIEsRequestLocationInformation	OPTIONAL,	-- Need ON
+	a-gnss-RequestLocationInformation	A-GNSS-RequestLocationInformation	OPTIONAL,	-- Need ON
+	otdoa-RequestLocationInformation	OTDOA-RequestLocationInformation	OPTIONAL,	-- Need ON
+	ecid-RequestLocationInformation		ECID-RequestLocationInformation		OPTIONAL,	-- Need ON
+	epdu-RequestLocationInformation		EPDU-Sequence						OPTIONAL,	-- Need ON
+	...,
+	[[
+	sensor-RequestLocationInformation-r13
+										Sensor-RequestLocationInformation-r13
+																			OPTIONAL,	-- Need ON
+	tbs-RequestLocationInformation-r13	TBS-RequestLocationInformation-r13	OPTIONAL,	-- Need ON
+	wlan-RequestLocationInformation-r13	WLAN-RequestLocationInformation-r13	OPTIONAL,	-- Need ON
+	bt-RequestLocationInformation-r13	BT-RequestLocationInformation-r13	OPTIONAL	-- Need ON
+	]],
+	[[	nr-ECID-RequestLocationInformation-r16
+										NR-ECID-RequestLocationInformation-r16
+																			OPTIONAL,	-- Need ON
+		nr-Multi-RTT-RequestLocationInformation-r16
+										NR-Multi-RTT-RequestLocationInformation-r16
+																			OPTIONAL,	-- Need ON
+		nr-DL-AoD-RequestLocationInformation-r16
+										NR-DL-AoD-RequestLocationInformation-r16
+																			OPTIONAL,	-- Need ON
+		nr-DL-TDOA-RequestLocationInformation-r16
+										NR-DL-TDOA-RequestLocationInformation-r16
+																			OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ProvideLocationInformation ::= SEQUENCE {
+	criticalExtensions		CHOICE {
+		c1						CHOICE {
+			provideLocationInformation-r9	ProvideLocationInformation-r9-IEs,
+			spare3 NULL, spare2 NULL, spare1 NULL
+		},
+		criticalExtensionsFuture	SEQUENCE {}
+	}
+}
+
+ProvideLocationInformation-r9-IEs ::= SEQUENCE {
+	commonIEsProvideLocationInformation
+										CommonIEsProvideLocationInformation	OPTIONAL,
+	a-gnss-ProvideLocationInformation	A-GNSS-ProvideLocationInformation	OPTIONAL,
+	otdoa-ProvideLocationInformation	OTDOA-ProvideLocationInformation	OPTIONAL,
+	ecid-ProvideLocationInformation		ECID-ProvideLocationInformation		OPTIONAL,
+	epdu-ProvideLocationInformation		EPDU-Sequence						OPTIONAL,
+	...,
+	[[
+	sensor-ProvideLocationInformation-r13
+										Sensor-ProvideLocationInformation-r13
+																			OPTIONAL,
+	tbs-ProvideLocationInformation-r13	TBS-ProvideLocationInformation-r13	OPTIONAL,
+	wlan-ProvideLocationInformation-r13	WLAN-ProvideLocationInformation-r13	OPTIONAL,
+	bt-ProvideLocationInformation-r13	BT-ProvideLocationInformation-r13	OPTIONAL
+	]],
+	[[	nr-ECID-ProvideLocationInformation-r16
+									NR-ECID-ProvideLocationInformation-r16		OPTIONAL,
+		nr-Multi-RTT-ProvideLocationInformation-r16
+									NR-Multi-RTT-ProvideLocationInformation-r16 OPTIONAL,
+		nr-DL-AoD-ProvideLocationInformation-r16	
+									NR-DL-AoD-ProvideLocationInformation-r16	OPTIONAL,
+		nr-DL-TDOA-ProvideLocationInformation-r16
+									NR-DL-TDOA-ProvideLocationInformation-r16	OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Abort ::= SEQUENCE {
+	criticalExtensions		CHOICE {
+		c1						CHOICE {
+			abort-r9		Abort-r9-IEs,
+			spare3 NULL, spare2 NULL, spare1 NULL
+		},
+		criticalExtensionsFuture	SEQUENCE {}
+	}
+}
+
+Abort-r9-IEs ::= SEQUENCE {
+	commonIEsAbort		CommonIEsAbort			OPTIONAL,	-- Need ON
+	...,
+	epdu-Abort			EPDU-Sequence			OPTIONAL	-- Need ON
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Error ::= CHOICE {
+	error-r9					Error-r9-IEs,
+	criticalExtensionsFuture	SEQUENCE {}
+}
+
+Error-r9-IEs ::= SEQUENCE {
+	commonIEsError		CommonIEsError			OPTIONAL,	-- Need ON
+	...,
+	epdu-Error			EPDU-Sequence			OPTIONAL	-- Need ON
+}
+-- ASN1STOP
+-- ASN1START
+
+AccessTypes ::= SEQUENCE {
+	accessTypes		BIT STRING {	eutra		(0),
+									utra		(1),
+									gsm			(2),
+									nb-iot		(3),
+									nr-v1510	(4) } (SIZE (1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ARFCN-ValueEUTRA ::= INTEGER (0..maxEARFCN)
+
+ARFCN-ValueEUTRA-v9a0 ::=	INTEGER (maxEARFCN-Plus1..maxEARFCN2)
+
+ARFCN-ValueEUTRA-r14 ::=	INTEGER (0..maxEARFCN2)
+
+-- ASN1STOP
+-- ASN1START
+
+ARFCN-ValueNR-r15 ::= INTEGER (0..3279165)
+
+-- ASN1STOP
+-- ASN1START
+
+ARFCN-ValueUTRA ::=	INTEGER (0..16383)
+
+-- ASN1STOP
+-- ASN1START
+
+CarrierFreq-NB-r14 ::=		SEQUENCE {
+	carrierFreq-r14				ARFCN-ValueEUTRA-r14,
+	carrierFreqOffset-r14		CarrierFreqOffsetNB-r14				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CarrierFreqOffsetNB-r14 ::=		ENUMERATED {
+									v-10, v-9, v-8,	v-7, v-6, v-5, v-4, v-3, v-2, v-1, v-0dot5,
+									v0, v1, v2, v3, v4, v5, v6, v7, v8, v9
+									}
+
+-- ASN1STOP
+-- ASN1START
+
+CellGlobalIdEUTRA-AndUTRA ::= SEQUENCE {
+	plmn-Identity		SEQUENCE {
+							mcc		SEQUENCE (SIZE (3))	OF INTEGER (0..9),
+							mnc		SEQUENCE (SIZE (2..3))	OF INTEGER (0..9)
+						},
+	cellIdentity		CHOICE {
+		eutra	BIT STRING (SIZE (28)),
+		utra	BIT STRING (SIZE (32))
+	},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CellGlobalIdGERAN ::= SEQUENCE {
+	plmn-Identity		SEQUENCE {
+							mcc		SEQUENCE (SIZE (3))	OF INTEGER (0..9),
+							mnc		SEQUENCE (SIZE (2..3))	OF INTEGER (0..9)
+							},
+	locationAreaCode		BIT STRING (SIZE (16)),
+	cellIdentity			BIT STRING (SIZE (16)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECGI ::= SEQUENCE {
+	mcc				SEQUENCE (SIZE (3))	OF INTEGER (0..9),
+	mnc				SEQUENCE (SIZE (2..3))	OF INTEGER (0..9),
+	cellidentity	BIT STRING (SIZE (28))
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Ellipsoid-Point ::= SEQUENCE {
+	latitudeSign				ENUMERATED {north, south},
+	degreesLatitude				INTEGER (0..8388607),			-- 23 bit field
+	degreesLongitude			INTEGER (-8388608..8388607)		-- 24 bit field
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Ellipsoid-PointWithUncertaintyCircle ::= SEQUENCE {
+	latitudeSign				ENUMERATED {north, south},
+	degreesLatitude				INTEGER (0..8388607),			-- 23 bit field
+	degreesLongitude			INTEGER (-8388608..8388607),	-- 24 bit field
+	uncertainty					INTEGER (0..127)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+EllipsoidPointWithUncertaintyEllipse ::= SEQUENCE {
+	latitudeSign				ENUMERATED {north, south},
+	degreesLatitude				INTEGER (0..8388607),			-- 23 bit field
+	degreesLongitude			INTEGER (-8388608..8388607),	-- 24 bit field
+	uncertaintySemiMajor		INTEGER (0..127),
+	uncertaintySemiMinor		INTEGER (0..127),
+	orientationMajorAxis		INTEGER (0..179),
+	confidence					INTEGER (0..100)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+EllipsoidPointWithAltitude ::= SEQUENCE {
+	latitudeSign				ENUMERATED {north, south},
+	degreesLatitude				INTEGER (0..8388607),			-- 23 bit field
+	degreesLongitude			INTEGER (-8388608..8388607),	-- 24 bit field
+	altitudeDirection			ENUMERATED {height, depth},
+	altitude					INTEGER (0..32767)				-- 15 bit field
+}
+
+-- ASN1STOP
+-- ASN1START
+
+EllipsoidPointWithAltitudeAndUncertaintyEllipsoid ::= SEQUENCE {
+	latitudeSign				ENUMERATED {north, south},
+	degreesLatitude				INTEGER (0..8388607),			-- 23 bit field
+	degreesLongitude			INTEGER (-8388608..8388607),	-- 24 bit field
+	altitudeDirection			ENUMERATED {height, depth},
+	altitude					INTEGER (0..32767),				-- 15 bit field
+	uncertaintySemiMajor		INTEGER (0..127),
+	uncertaintySemiMinor		INTEGER (0..127),
+	orientationMajorAxis		INTEGER (0..179),
+	uncertaintyAltitude			INTEGER (0..127),
+	confidence					INTEGER (0..100)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+EllipsoidArc ::= SEQUENCE {
+	latitudeSign				ENUMERATED {north, south},
+	degreesLatitude				INTEGER (0..8388607),			-- 23 bit field
+	degreesLongitude			INTEGER (-8388608..8388607),	-- 24 bit field
+	innerRadius					INTEGER (0..65535),				-- 16 bit field,
+	uncertaintyRadius			INTEGER (0..127),
+	offsetAngle					INTEGER (0..179),
+	includedAngle				INTEGER (0..179),
+	confidence					INTEGER (0..100)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+EPDU-Sequence ::= SEQUENCE (SIZE (1..maxEPDU)) OF EPDU
+
+maxEPDU INTEGER ::= 16
+
+EPDU ::= SEQUENCE {
+	ePDU-Identifier			EPDU-Identifier,
+	ePDU-Body				EPDU-Body
+}
+
+EPDU-Identifier ::= SEQUENCE {
+	ePDU-ID					EPDU-ID,
+	ePDU-Name				EPDU-Name		OPTIONAL,
+	...
+}
+
+EPDU-ID ::= INTEGER (1..256)
+
+EPDU-Name ::= VisibleString (SIZE (1..32))
+
+EPDU-Body ::= OCTET STRING
+
+-- ASN1STOP
+-- ASN1START
+
+FreqBandIndicatorNR-r16 ::= INTEGER (1..1024)
+
+-- ASN1STOP
+-- ASN1START
+
+HighAccuracyEllipsoidPointWithUncertaintyEllipse-r15 ::= SEQUENCE {
+	degreesLatitude-r15				INTEGER(-2147483648..2147483647),
+	degreesLongitude-r15			INTEGER(-2147483648..2147483647),
+	uncertaintySemiMajor-r15		INTEGER (0..255),
+	uncertaintySemiMinor-r15		INTEGER (0..255),
+	orientationMajorAxis-r15		INTEGER (0..179),
+	confidence-r15					INTEGER (0..100)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+HighAccuracyEllipsoidPointWithAltitudeAndUncertaintyEllipsoid-r15 ::= SEQUENCE {
+	degreesLatitude-r15				INTEGER(-2147483648..2147483647),
+	degreesLongitude-r15			INTEGER(-2147483648..2147483647),
+	altitude-r15					INTEGER(-64000..1280000),
+	uncertaintySemiMajor-r15		INTEGER (0..255),
+	uncertaintySemiMinor-r15		INTEGER (0..255),
+	orientationMajorAxis-r15		INTEGER (0..179),
+	horizontalConfidence-r15		INTEGER (0..100),
+	uncertaintyAltitude-r15			INTEGER (0..255),
+	verticalConfidence-r15			INTEGER (0..100)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+HorizontalVelocity ::= SEQUENCE {
+	bearing						INTEGER(0..359),
+	horizontalSpeed				INTEGER(0..2047)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+HorizontalWithVerticalVelocity ::= SEQUENCE {
+	bearing						INTEGER(0..359),
+	horizontalSpeed				INTEGER(0..2047),
+	verticalDirection			ENUMERATED{upward, downward},
+	verticalSpeed				INTEGER(0..255)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+HorizontalVelocityWithUncertainty ::= SEQUENCE {
+	bearing						INTEGER(0..359),
+	horizontalSpeed				INTEGER(0..2047),
+	uncertaintySpeed			INTEGER(0..255)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+HorizontalWithVerticalVelocityAndUncertainty ::= SEQUENCE {
+	bearing						INTEGER(0..359),
+	horizontalSpeed				INTEGER(0..2047),
+	verticalDirection			ENUMERATED{upward, downward},
+	verticalSpeed				INTEGER(0..255),
+	horizontalUncertaintySpeed	INTEGER(0..255),
+	verticalUncertaintySpeed	INTEGER(0..255)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+LocationCoordinateTypes ::= SEQUENCE {
+	ellipsoidPoint											BOOLEAN,
+	ellipsoidPointWithUncertaintyCircle						BOOLEAN,
+	ellipsoidPointWithUncertaintyEllipse					BOOLEAN,
+	polygon													BOOLEAN,
+	ellipsoidPointWithAltitude								BOOLEAN,
+	ellipsoidPointWithAltitudeAndUncertaintyEllipsoid		BOOLEAN,
+	ellipsoidArc											BOOLEAN,
+	...,
+	[[
+		highAccuracyEllipsoidPointWithUncertaintyEllipse-r15
+															BOOLEAN		OPTIONAL, -- Need ON
+		highAccuracyEllipsoidPointWithAltitudeAndUncertaintyEllipsoid-r15
+															BOOLEAN		OPTIONAL  -- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NCGI-r15 ::= SEQUENCE {
+	mcc-r15					SEQUENCE (SIZE (3)) 	OF INTEGER (0..9),
+	mnc-r15					SEQUENCE (SIZE (2..3)) 	OF INTEGER (0..9),
+	nr-cellidentity-r15		BIT STRING (SIZE (36))
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-PhysCellID-r16 ::= INTEGER (0..1007)
+
+-- ASN1STOP
+-- ASN1START
+
+PeriodicAssistanceDataControlParameters-r15 ::= SEQUENCE {
+	periodicSessionID-r15			PeriodicSessionID-r15,
+	...,
+	[[
+		updateCapabilities-r15		UpdateCapabilities-r15		OPTIONAL	 -- Need ON
+	]]
+}
+
+PeriodicSessionID-r15 ::= SEQUENCE {
+	periodicSessionInitiator-r15	ENUMERATED { locationServer, targetDevice, ... },
+	periodicSessionNumber-r15		INTEGER (0..255),
+	...
+}
+
+UpdateCapabilities-r15 ::= BIT STRING {primaryCellID-r15	(0)} (SIZE(1..8))
+
+-- ASN1STOP
+-- ASN1START
+
+Polygon ::= SEQUENCE (SIZE (3..15)) OF PolygonPoints
+
+PolygonPoints ::= SEQUENCE {
+	latitudeSign				ENUMERATED {north, south},
+	degreesLatitude				INTEGER (0..8388607),			-- 23 bit field
+	degreesLongitude			INTEGER (-8388608..8388607)		-- 24 bit field
+}
+
+-- ASN1STOP
+-- ASN1START
+
+PositioningModes ::= SEQUENCE {
+	posModes		BIT STRING {	standalone	(0),
+									ue-based	(1),
+									ue-assisted	(2)
+	} (SIZE (1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+SegmentationInfo-r14 ::= ENUMERATED { noMoreMessages, moreMessagesOnTheWay }
+
+-- ASN1STOP
+-- ASN1START
+
+VelocityTypes ::= SEQUENCE {
+	horizontalVelocity										BOOLEAN,
+	horizontalWithVerticalVelocity							BOOLEAN,
+	horizontalVelocityWithUncertainty						BOOLEAN,
+	horizontalWithVerticalVelocityAndUncertainty			BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsRequestCapabilities ::= SEQUENCE {
+	...,
+	[[
+	lpp-message-segmentation-req-r14	BIT STRING {	serverToTarget	(0),
+														targetToServer	(1) }	OPTIONAL -- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsProvideCapabilities ::= SEQUENCE {
+	...,
+	[[
+	segmentationInfo-r14			SegmentationInfo-r14			OPTIONAL,	-- Cond Segmentation
+	lpp-message-segmentation-r14	BIT STRING { serverToTarget	(0),
+												targetToServer	(1) }	OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsRequestAssistanceData ::= SEQUENCE {
+	primaryCellID		ECGI		OPTIONAL,	-- Cond EUTRA
+	...,
+	[[
+		segmentationInfo-r14		SegmentationInfo-r14		OPTIONAL	-- Cond Segmentation
+	]],
+	[[
+		periodicAssistanceDataReq-r15
+									PeriodicAssistanceDataControlParameters-r15
+																OPTIONAL,	-- Cond PerADreq
+		primaryCellID-r15			NCGI-r15					OPTIONAL	-- Cond NR
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsProvideAssistanceData ::= SEQUENCE {
+	...,
+	[[
+		segmentationInfo-r14		SegmentationInfo-r14		OPTIONAL	-- Need ON
+	]],
+	[[
+		periodicAssistanceData-r15	PeriodicAssistanceDataControlParameters-r15
+																OPTIONAL	-- Cond PerAD
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsRequestLocationInformation ::= SEQUENCE {
+	locationInformationType		LocationInformationType,
+	triggeredReporting			TriggeredReportingCriteria	OPTIONAL,	-- Cond ECID
+	periodicalReporting			PeriodicalReportingCriteria OPTIONAL,	-- Need ON
+	additionalInformation		AdditionalInformation		OPTIONAL,	-- Need ON
+	qos							QoS							OPTIONAL,	-- Need ON
+	environment					Environment					OPTIONAL,	-- Need ON
+	locationCoordinateTypes		LocationCoordinateTypes		OPTIONAL,	-- Need ON
+	velocityTypes				VelocityTypes				OPTIONAL,	-- Need ON
+	...,
+	[[
+		messageSizeLimitNB-r14	MessageSizeLimitNB-r14		OPTIONAL	-- Need ON
+	]],
+	[[
+		segmentationInfo-r14	SegmentationInfo-r14		OPTIONAL	-- Need ON
+	]]
+}
+
+LocationInformationType ::= ENUMERATED {
+	locationEstimateRequired,
+	locationMeasurementsRequired,
+	locationEstimatePreferred,
+	locationMeasurementsPreferred,
+	...
+}
+
+PeriodicalReportingCriteria ::=		SEQUENCE {
+	reportingAmount						ENUMERATED {
+											ra1, ra2, ra4, ra8, ra16, ra32,
+											ra64, ra-Infinity
+										} DEFAULT ra-Infinity,
+	reportingInterval					ENUMERATED {
+											noPeriodicalReporting, ri0-25,
+											ri0-5, ri1, ri2, ri4, ri8, ri16, ri32, ri64
+										}
+}
+
+TriggeredReportingCriteria ::=		SEQUENCE {
+	cellChange							BOOLEAN,
+	reportingDuration					ReportingDuration,
+	...
+}
+
+ReportingDuration ::=				INTEGER (0..255)
+
+AdditionalInformation ::= ENUMERATED {
+	onlyReturnInformationRequested,
+	mayReturnAditionalInformation,
+	...
+}
+
+QoS ::= SEQUENCE {
+	horizontalAccuracy			HorizontalAccuracy		OPTIONAL,	-- Need ON
+	verticalCoordinateRequest	BOOLEAN,
+	verticalAccuracy			VerticalAccuracy		OPTIONAL,	-- Need ON
+	responseTime				ResponseTime			OPTIONAL,	-- Need ON
+	velocityRequest				BOOLEAN,				
+	...,
+	[[	responseTimeNB-r14		ResponseTimeNB-r14		OPTIONAL	-- Need ON
+	]],
+	[[	horizontalAccuracyExt-r15	HorizontalAccuracyExt-r15		OPTIONAL,	-- Need ON
+		verticalAccuracyExt-r15		VerticalAccuracyExt-r15			OPTIONAL	-- Need ON
+	]]
+}
+
+HorizontalAccuracy ::= SEQUENCE {
+	accuracy		INTEGER(0..127),
+	confidence		INTEGER(0..100),
+	...
+}
+
+VerticalAccuracy ::= SEQUENCE {
+	accuracy		INTEGER(0..127),
+	confidence		INTEGER(0..100),
+	...
+}
+
+HorizontalAccuracyExt-r15 ::= SEQUENCE {
+	accuracyExt-r15		INTEGER(0..255),
+	confidence-r15		INTEGER(0..100),
+	...
+}
+
+VerticalAccuracyExt-r15 ::= SEQUENCE {
+	accuracyExt-r15		INTEGER(0..255),
+	confidence-r15		INTEGER(0..100),
+	...
+}
+
+ResponseTime ::= SEQUENCE {
+	time								INTEGER (1..128),
+	...,	
+	[[	responseTimeEarlyFix-r12		INTEGER (1..128)		OPTIONAL		-- Need ON
+	]],
+	[[	unit-r15				ENUMERATED { ten-seconds, ... }	OPTIONAL		-- Need ON
+	]]
+}
+
+ResponseTimeNB-r14 ::= SEQUENCE {
+	timeNB-r14							INTEGER (1..512),
+	responseTimeEarlyFixNB-r14			INTEGER (1..512)		OPTIONAL,		-- Need ON
+	...,
+	[[	unitNB-r15				ENUMERATED { ten-seconds, ... }	OPTIONAL		-- Need ON
+	]]
+}
+
+Environment ::= ENUMERATED {
+	badArea,
+	notBadArea,
+	mixedArea,
+	...
+}
+
+MessageSizeLimitNB-r14 ::= SEQUENCE {
+	measurementLimit-r14				INTEGER (1..512)		OPTIONAL,		-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsProvideLocationInformation ::= SEQUENCE {
+	locationEstimate			LocationCoordinates		OPTIONAL,
+	velocityEstimate			Velocity				OPTIONAL,
+	locationError				LocationError			OPTIONAL,
+	...,
+	[[	earlyFixReport-r12		EarlyFixReport-r12		OPTIONAL
+	]],
+	[[	locationSource-r13		LocationSource-r13		OPTIONAL,
+		locationTimestamp-r13	UTCTime					OPTIONAL
+	]],
+	[[
+		segmentationInfo-r14	SegmentationInfo-r14	OPTIONAL		-- Cond Segmentation
+	]]
+}
+
+LocationCoordinates ::= CHOICE {
+	ellipsoidPoint								Ellipsoid-Point,
+	ellipsoidPointWithUncertaintyCircle			Ellipsoid-PointWithUncertaintyCircle,
+	ellipsoidPointWithUncertaintyEllipse		EllipsoidPointWithUncertaintyEllipse,
+	polygon										Polygon,
+	ellipsoidPointWithAltitude					EllipsoidPointWithAltitude,
+	ellipsoidPointWithAltitudeAndUncertaintyEllipsoid
+												EllipsoidPointWithAltitudeAndUncertaintyEllipsoid,
+	ellipsoidArc								EllipsoidArc,
+	...,
+	highAccuracyEllipsoidPointWithUncertaintyEllipse-v1510
+								HighAccuracyEllipsoidPointWithUncertaintyEllipse-r15,
+	highAccuracyEllipsoidPointWithAltitudeAndUncertaintyEllipsoid-v1510
+								HighAccuracyEllipsoidPointWithAltitudeAndUncertaintyEllipsoid-r15
+}
+
+Velocity ::= CHOICE {
+	horizontalVelocity							HorizontalVelocity,
+	horizontalWithVerticalVelocity				HorizontalWithVerticalVelocity,
+	horizontalVelocityWithUncertainty			HorizontalVelocityWithUncertainty,
+	horizontalWithVerticalVelocityAndUncertainty
+												HorizontalWithVerticalVelocityAndUncertainty,
+	...
+}
+
+LocationError ::= SEQUENCE {
+	locationfailurecause			LocationFailureCause,
+	...
+}
+
+LocationFailureCause ::= ENUMERATED {
+	undefined,
+	requestedMethodNotSupported,
+	positionMethodFailure,
+	periodicLocationMeasurementsNotAvailable,
+	...
+}
+
+EarlyFixReport-r12 ::= ENUMERATED {
+	noMoreMessages,
+	moreMessagesOnTheWay
+}
+
+LocationSource-r13 ::= BIT STRING {	a-gnss				(0),
+									wlan				(1),
+									bt					(2),
+									tbs					(3),
+									sensor				(4),
+									ha-gnss-v1510		(5),
+									motion-sensor-v1550 (6),
+									dl-tdoa-r16 		(7),
+									dl-aod-r16			(8) } (SIZE(1..16))
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsAbort ::= SEQUENCE {
+	abortCause			ENUMERATED {
+		undefined,
+		stopPeriodicReporting,
+		targetDeviceAbort,
+		networkAbort,
+		...,
+		stopPeriodicAssistanceDataDelivery-v1510
+	}
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CommonIEsError ::= SEQUENCE {
+	errorCause		ENUMERATED {
+		undefined,
+		lppMessageHeaderError,
+		lppMessageBodyError,
+		epduError,
+		incorrectDataValue,
+		...,
+		lppSegmentationError-v1450
+	}
+}
+
+-- ASN1STOP
+-- ASN1START
+
+DL-PRS-ID-Info-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16					INTEGER (0..255),
+	nr-DL-PRS-ResourceID-List-r16	SEQUENCE (SIZE (1..nrMaxResourceIDs-r16)) OF
+													NR-DL-PRS-ResourceID-r16
+																			OPTIONAL, -- Need ON
+	nr-DL-PRS-ResourceSetID-r16		NR-DL-PRS-ResourceSetID-r16
+																			OPTIONAL  -- Need ON
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-AdditionalPathList-r16 ::= SEQUENCE (SIZE(1..2)) OF NR-AdditionalPath-r16
+
+NR-AdditionalPath-r16 ::= SEQUENCE {
+	nr-RelativeTimeDifference-r16	CHOICE {
+				k0-r16					INTEGER(0..16351),
+				k1-r16					INTEGER(0..8176),
+				k2-r16					INTEGER(0..4088),
+				k3-r16					INTEGER(0..2044),
+				k4-r16					INTEGER(0..1022),
+				k5-r16					INTEGER(0..511),
+				...
+	},
+	nr-PathQuality-r16				NR-TimingQuality-r16					OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-AssistanceData-r16 ::= SEQUENCE {
+	nr-DL-PRS-ReferenceInfo-r16 		DL-PRS-ID-Info-r16,
+	nr-DL-PRS-AssistanceDataList-r16	SEQUENCE (SIZE (1..nrMaxFreqLayers-r16)) OF
+														NR-DL-PRS-AssistanceDataPerFreq-r16,
+	nr-SSB-Config-r16					SEQUENCE (SIZE (1..nrMaxTRPs-r16)) OF
+														NR-SSB-Config-r16	OPTIONAL,	-- Need ON
+	...
+}
+
+NR-DL-PRS-AssistanceDataPerFreq-r16 ::= SEQUENCE {
+	nr-DL-PRS-PositioningFrequencyLayer-r16	
+										NR-DL-PRS-PositioningFrequencyLayer-r16,
+	nr-DL-PRS-AssistanceDataPerFreq-r16 SEQUENCE (SIZE (1..nrMaxTRPsPerFreq-r16)) OF
+														NR-DL-PRS-AssistanceDataPerTRP-r16,
+	...
+}
+
+NR-DL-PRS-AssistanceDataPerTRP-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16					INTEGER (0..255),
+	nr-PhysCellID-r16				NR-PhysCellID-r16			OPTIONAL,	-- Need ON
+	nr-CellGlobalID-r16				NCGI-r15					OPTIONAL,	-- Need ON
+	nr-ARFCN-r16					ARFCN-ValueNR-r15			OPTIONAL,	-- Need ON
+	nr-DL-PRS-SFN0-Offset-r16		NR-DL-PRS-SFN0-Offset-r16,
+	nr-DL-PRS-ExpectedRSTD-r16		INTEGER (-3841..3841),
+	nr-DL-PRS-ExpectedRSTD-Uncertainty-r16
+									INTEGER (0..246),
+	nr-DL-PRS-Info-r16				NR-DL-PRS-Info-r16,
+	...,
+	[[
+		prs-OnlyTP-r16				ENUMERATED { true }		OPTIONAL	-- Need ON	
+	]]
+}
+
+NR-DL-PRS-PositioningFrequencyLayer-r16 ::= SEQUENCE {
+	dl-PRS-SubcarrierSpacing-r16	ENUMERATED {kHz15, kHz30, kHz60, kHz120, ...},
+	dl-PRS-ResourceBandwidth-r16	INTEGER (1..63),
+	dl-PRS-StartPRB-r16				INTEGER (0..2176),
+	dl-PRS-PointA-r16				ARFCN-ValueNR-r15,
+	dl-PRS-CombSizeN-r16			ENUMERATED {n2, n4, n6, n12, ...},
+	dl-PRS-CyclicPrefix-r16			ENUMERATED {normal, extended, ...},
+	...
+}
+
+NR-DL-PRS-SFN0-Offset-r16 ::= SEQUENCE {
+	sfn-Offset-r16					INTEGER (0..1023),
+	integerSubframeOffset-r16		INTEGER (0..9),
+	...}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-BeamInfo-r16 ::= SEQUENCE (SIZE (1..nrMaxFreqLayers-r16)) OF
+																NR-DL-PRS-BeamInfoPerFreqLayer-r16
+
+NR-DL-PRS-BeamInfoPerFreqLayer-r16 ::= SEQUENCE (SIZE (1..nrMaxTRPsPerFreq-r16)) OF
+																NR-DL-PRS-BeamInfoPerTRP-r16
+
+NR-DL-PRS-BeamInfoPerTRP-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16						INTEGER (0..255),
+	nr-PhysCellID-r16					NR-PhysCellID-r16		OPTIONAL,	-- Need ON
+	nr-CellGlobalID-r16					NCGI-r15				OPTIONAL,	-- Need ON
+	nr-ARFCN-r16						ARFCN-ValueNR-r15		OPTIONAL,	-- Need ON
+	associated-DL-PRS-ID-r16			INTEGER (0..255)		OPTIONAL,	-- Need OP
+	lcs-GCS-TranslationParameter-r16	LCS-GCS-TranslationParameter-r16	
+																OPTIONAL,	-- Need OP
+	dl-PRS-BeamInfoSet-r16				DL-PRS-BeamInfoSet-r16	OPTIONAL,	-- Need OP
+	...
+}
+
+DL-PRS-BeamInfoSet-r16 ::= SEQUENCE (SIZE(1..nrMaxSetsPerTrpPerFreqLayer-r16)) OF
+																DL-PRS-BeamInfoResourceSet-r16
+
+DL-PRS-BeamInfoResourceSet-r16 ::= SEQUENCE (SIZE(1..nrMaxResourcesPerSet-r16)) OF
+																DL-PRS-BeamInfoElement-r16
+
+DL-PRS-BeamInfoElement-r16 ::= SEQUENCE {
+	dl-PRS-Azimuth-r16				INTEGER (0..359),
+	dl-PRS-Azimuth-fine-r16			INTEGER (0..9)					OPTIONAL,	-- Need ON
+	dl-PRS-Elevation-r16			INTEGER (0..180)				OPTIONAL,	-- Need ON
+	dl-PRS-Elevation-fine-r16		INTEGER (0..9)					OPTIONAL,	-- Need ON
+	...
+}
+
+LCS-GCS-TranslationParameter-r16 ::= SEQUENCE {
+	alpha-r16						INTEGER (0..359),
+	alpha-fine-r16					INTEGER (0..9)					OPTIONAL,	-- Cond AzElFine
+	beta-r16						INTEGER (0..359),
+	beta-fine-r16					INTEGER (0..9)					OPTIONAL,	-- Cond AzElFine
+	gamma-r16						INTEGER (0..359),
+	gamma-fine-r16					INTEGER (0..9) 					OPTIONAL,	-- Cond AzElFine
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-Info-r16 ::= SEQUENCE {
+	nr-DL-PRS-ResourceSetList-r16		SEQUENCE (SIZE (1..nrMaxSetsPerTrpPerFreqLayer-r16)) OF
+																	NR-DL-PRS-ResourceSet-r16,
+	...
+}
+
+NR-DL-PRS-ResourceSet-r16 ::= SEQUENCE {
+	nr-DL-PRS-ResourceSetID-r16			NR-DL-PRS-ResourceSetID-r16,
+	dl-PRS-Periodicity-and-ResourceSetSlotOffset-r16
+										NR-DL-PRS-Periodicity-and-ResourceSetSlotOffset-r16,
+	dl-PRS-ResourceRepetitionFactor-r16	ENUMERATED {n2, n4, n6, n8, n16, n32, ...}
+																			OPTIONAL,	-- Need OP
+	dl-PRS-ResourceTimeGap-r16			ENUMERATED {s1, s2, s4, s8, s16, s32, ...}
+																			OPTIONAL, 	-- Cond Rep
+	dl-PRS-NumSymbols-r16				ENUMERATED {n2, n4, n6, n12, ...},
+	dl-PRS-MutingOption1-r16			DL-PRS-MutingOption1-r16			OPTIONAL,	-- Need OP
+	dl-PRS-MutingOption2-r16			DL-PRS-MutingOption2-r16			OPTIONAL,	-- Need OP
+	dl-PRS-ResourcePower-r16			INTEGER (-60..50),	
+	dl-PRS-ResourceList-r16				SEQUENCE (SIZE (1..nrMaxResourcesPerSet-r16)) OF
+																	NR-DL-PRS-Resource-r16,
+	...
+}
+
+DL-PRS-MutingOption1-r16 ::= SEQUENCE {
+	dl-prs-MutingBitRepetitionFactor-r16
+										ENUMERATED { n1, n2, n4, n8, ... }	OPTIONAL,	-- Need OP
+	nr-option1-muting-r16				NR-MutingPattern-r16,
+	...
+}
+
+DL-PRS-MutingOption2-r16 ::= SEQUENCE {
+	nr-option2-muting-r16				NR-MutingPattern-r16,
+	...
+}
+
+NR-MutingPattern-r16 ::= CHOICE {
+	po2-r16								BIT STRING (SIZE(2)),
+	po4-r16								BIT STRING (SIZE(4)),
+	po6-r16								BIT STRING (SIZE(6)),
+	po8-r16								BIT STRING (SIZE(8)),
+	po16-r16							BIT STRING (SIZE(16)),
+	po32-r16							BIT STRING (SIZE(32)),
+	...
+}
+
+NR-DL-PRS-Resource-r16 ::= SEQUENCE {
+	nr-DL-PRS-ResourceID-r16			NR-DL-PRS-ResourceID-r16,
+	dl-PRS-SequenceID-r16				INTEGER (0.. 4095),
+	dl-PRS-CombSizeN-AndReOffset-r16	CHOICE {
+			n2-r16							INTEGER (0..1),
+			n4-r16							INTEGER (0..3),
+			n6-r16							INTEGER (0..5),
+			n12-r16							INTEGER (0..11),
+			...
+	},
+	dl-PRS-ResourceSlotOffset-r16		INTEGER (0..nrMaxResourceOffsetValue-1-r16),
+	dl-PRS-ResourceSymbolOffset-r16		INTEGER (0..12),
+	dl-PRS-QCL-Info-r16					DL-PRS-QCL-Info-r16		OPTIONAL,	--Need ON
+	...
+}
+
+DL-PRS-QCL-Info-r16 ::= CHOICE {
+	ssb-r16						SEQUENCE {
+		pci-r16							NR-PhysCellID-r16,
+		ssb-Index-r16					INTEGER (0..63),
+		rs-Type-r16						ENUMERATED {typeC, typeD, typeC-plus-typeD}
+	},
+	dl-PRS-r16					SEQUENCE {
+		qcl-DL-PRS-ResourceID-r16		NR-DL-PRS-ResourceID-r16,
+		qcl-DL-PRS-ResourceSetID-r16	NR-DL-PRS-ResourceSetID-r16
+	}
+}
+
+NR-DL-PRS-Periodicity-and-ResourceSetSlotOffset-r16 ::= CHOICE {
+	scs15-r16		CHOICE {
+						n4-r16					INTEGER (0..3),
+						n5-r16					INTEGER (0..4),
+						n8-r16					INTEGER (0..7),
+						n10-r16					INTEGER (0..9),
+						n16-r16					INTEGER (0..15),
+						n20-r16					INTEGER (0..19),
+						n32-r16					INTEGER (0..31),
+						n40-r16					INTEGER (0..39),
+						n64-r16					INTEGER (0..63),
+						n80-r16					INTEGER (0..79),
+						n160-r16				INTEGER (0..159),
+						n320-r16				INTEGER (0..319),
+						n640-r16				INTEGER (0..639),
+						n1280-r16				INTEGER (0..1279),
+						n2560-r16				INTEGER (0..2559),
+						n5120-r16				INTEGER (0..5119),
+						n10240-r16				INTEGER (0..10239),
+						...
+	},
+	scs30-r16		CHOICE {
+						n8-r16					INTEGER (0..7),
+						n10-r16					INTEGER (0..9),
+						n16-r16					INTEGER (0..15),
+						n20-r16					INTEGER (0..19),
+						n32-r16					INTEGER (0..31),
+						n40-r16					INTEGER (0..39),
+						n64-r16					INTEGER (0..63),
+						n80-r16					INTEGER (0..79),
+						n128-r16				INTEGER (0..127),
+						n160-r16				INTEGER (0..159),
+						n320-r16				INTEGER (0..319),
+						n640-r16				INTEGER (0..639),
+						n1280-r16				INTEGER (0..1279),
+						n2560-r16				INTEGER (0..2559),
+						n5120-r16				INTEGER (0..5119),
+						n10240-r16				INTEGER (0..10239),
+						n20480-r16				INTEGER (0..20479),
+						...
+	},
+	scs60-r16		CHOICE {
+						n16-r16					INTEGER (0..15),
+						n20-r16					INTEGER (0..19),
+						n32-r16					INTEGER (0..31),
+						n40-r16					INTEGER (0..39),
+						n64-r16					INTEGER (0..63),
+						n80-r16					INTEGER (0..79),
+						n128-r16				INTEGER (0..127),
+						n160-r16				INTEGER (0..159),
+						n256-r16				INTEGER (0..255),
+						n320-r16				INTEGER (0..319),
+						n640-r16				INTEGER (0..639),
+						n1280-r16				INTEGER (0..1279),
+						n2560-r16				INTEGER (0..2559),
+						n5120-r16				INTEGER (0..5119),
+						n10240-r16				INTEGER (0..10239),
+						n20480-r16				INTEGER (0..20479),
+						n40960-r16				INTEGER (0..40959),
+						...
+	},
+	scs120-r16		CHOICE {
+						n32-r16					INTEGER (0..31),
+						n40-r16					INTEGER (0..39),
+						n64-r16					INTEGER (0..63),
+						n80-r16					INTEGER (0..79),
+						n128-r16				INTEGER (0..127),
+						n160-r16				INTEGER (0..159),
+						n256-r16				INTEGER (0..255),
+						n320-r16				INTEGER (0..319),
+						n512-r16				INTEGER (0..511),
+						n640-r16				INTEGER (0..639),
+						n1280-r16				INTEGER (0..1279),
+						n2560-r16				INTEGER (0..2559),
+						n5120-r16				INTEGER (0..5119),
+						n10240-r16				INTEGER (0..10239),
+						n20480-r16				INTEGER (0..20479),
+						n40960-r16				INTEGER (0..40959),
+						n81920-r16				INTEGER (0..81919),
+						...
+	},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-ProcessingCapability-r16 ::= SEQUENCE {
+	prs-ProcessingCapabilityBandList-r16	SEQUENCE (SIZE (1..nrMaxBands-r16)) OF
+												PRS-ProcessingCapabilityPerBand-r16,
+	maxSupportedFreqLayers-r16				INTEGER (1..4),
+	simulLTE-NR-PRS-r16						ENUMERATED { supported}	OPTIONAL,
+	...
+}
+
+PRS-ProcessingCapabilityPerBand-r16 ::= SEQUENCE {
+	freqBandIndicatorNR-r16				FreqBandIndicatorNR-r16,
+	supportedBandwidthPRS-r16			CHOICE {
+		fr1										ENUMERATED {mhz5, mhz10, mhz20, mhz40,
+															mhz50, mhz80, mhz100},
+		fr2										ENUMERATED {mhz50, mhz100, mhz200, mhz400},
+		...
+	},
+	dl-PRS-BufferType-r16		 		ENUMERATED {type1, type2, ...},
+	durationOfPRS-Processing-r16		SEQUENCE {
+		durationOfPRS-ProcessingSymbols-r16	ENUMERATED {nDot125, nDot25, nDot5, n1,
+															 n2, n4, n6, n8, n12, n16, n20, n25,
+															 n30, n32, n35, n40, n45, n50},
+		durationOfPRS-ProcessingSymbolsInEveryTms-r16	
+												ENUMERATED {n8, n16, n20, n30, n40, n80,
+															 n160,n320, n640, n1280},
+		...
+	},
+	maxNumOfDL-PRS-ResProcessedPerSlot-r16	SEQUENCE {
+		scs15-r16								ENUMERATED {n1, n2, n4, n8, n16, n24, n32,
+															 n48, n64}					OPTIONAL,
+		scs30-r16								ENUMERATED {n1, n2, n4, n8, n16, n24, n32,
+															 n48, n64}					OPTIONAL,
+		scs60-r16								ENUMERATED {n1, n2, n4, n8, n16, n24, n32,
+															 n48, n64}					OPTIONAL,
+		scs120-r16								ENUMERATED {n1, n2, n4, n8, n16, n24, n32,
+															 n48, n64}					OPTIONAL,
+		...
+	},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-QCL-ProcessingCapability-r16 ::= SEQUENCE {
+	dl-PRS-QCL-ProcessingCapabilityBandList-r16			SEQUENCE (SIZE (1..nrMaxBands-r16)) OF
+														DL-PRS-QCL-ProcessingCapabilityPerBand-r16,
+	...
+}
+
+DL-PRS-QCL-ProcessingCapabilityPerBand-r16 ::= SEQUENCE {
+	freqBandIndicatorNR-r16						FreqBandIndicatorNR-r16,
+	ssb-FromNeighCellAsQCL-r16					ENUMERATED { supported}	OPTIONAL,
+	prs-FromServNeighCellAsQCL-r16				ENUMERATED { supported} OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-ResourceID-r16 ::= INTEGER (0..nrMaxNumDL-PRS-ResourcesPerSet-1-r16)
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-ResourcesCapability-r16 ::= SEQUENCE {
+	maxNrOfDL-PRS-ResourceSetPerTrpPerFrequencyLayer-r16	
+												INTEGER (1..2),
+	maxNrOfTRP-AcrossFreqs-r16					ENUMERATED { n4, n6, n12, n16, n32,
+															 n64, n128, n256, ...},
+	maxNrOfPosLayer-r16							INTEGER (1..4),
+	dl-PRS-ResourcesCapabilityBandList-r16		SEQUENCE (SIZE (1..nrMaxBands-r16)) OF
+													DL-PRS-ResourcesCapabilityPerBand-r16,
+	dl-PRS-ResourcesBandCombinationList-r16		DL-PRS-ResourcesBandCombinationList-r16,
+	...
+}
+
+DL-PRS-ResourcesCapabilityPerBand-r16 ::= SEQUENCE {
+	freqBandIndicatorNR-r16						FreqBandIndicatorNR-r16,
+	maxNrOfDL-PRS-ResourcesPerResourceSet-r16	ENUMERATED { n1, n2, n4, n8, n16, n32, n64, ...},
+	maxNrOfDL-PRS-ResourcesPerPositioningFrequencylayer-r16	
+												ENUMERATED { n6, n24, n32, n64, n96, n128,
+															 n256, n512, n1024, ...},
+	...
+}
+
+DL-PRS-ResourcesBandCombinationList-r16 ::=	SEQUENCE (SIZE (1..maxBandComb-r16)) OF
+														DL-PRS-ResourcesBandCombination-r16
+
+DL-PRS-ResourcesBandCombination-r16 ::=	SEQUENCE {
+	bandList-r16							SEQUENCE (SIZE (1..maxSimultaneousBands-r16)) OF
+															FreqBandIndicatorNR-r16,
+	maxNrOfDL-PRS-ResourcesAcrossAllFL-TRP-ResourceSet-r16	
+											CHOICE {
+		fr1-Only-r16							ENUMERATED {n6, n24, n64, n128, n192,
+															 n256, n512, n1024, n2048},
+		fr2-Only-r16							ENUMERATED {n24, n64, n96, n128, n192,
+															 n256, n512, n1024, n2048},
+		fr1-FR2Mix-r16							SEQUENCE {
+			fr1-r16									ENUMERATED {n6, n24, n64, n96, n128,
+																 n192, n256, n512, n1024, n2048},
+			fr2-r16									ENUMERATED {n24, n64, n96, n128, n192,
+																 n256, n512, n1024, n2048},
+			...
+		},
+		...
+	},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-PRS-ResourceSetID-r16 ::= INTEGER (0..nrMaxNumDL-PRS-ResourceSetsPerTRP-1-r16)
+
+-- ASN1STOP
+-- ASN1START
+
+NR-PositionCalculationAssistance-r16 ::= SEQUENCE {
+	nr-TRP-LocationInfo-r16 		NR-TRP-LocationInfo-r16				OPTIONAL,	-- Need ON
+	nr-DL-PRS-BeamInfo-r16			NR-DL-PRS-BeamInfo-r16				OPTIONAL,	-- Need ON
+	nr-RTD-Info-r16					NR-RTD-Info-r16						OPTIONAL,	-- Need ON
+	...
+}
+-- ASN1STOP
+-- ASN1START
+
+NR-RTD-Info-r16 ::= SEQUENCE {
+	referenceTRP-RTD-Info-r16		ReferenceTRP-RTD-Info-r16,
+	rtd-InfoList-r16				RTD-InfoList-r16,
+	...
+}
+
+ReferenceTRP-RTD-Info-r16 ::= SEQUENCE {
+	dl-PRS-ID-Ref-r16				INTEGER (0..255),
+	nr-PhysCellID-Ref-r16			NR-PhysCellID-r16		OPTIONAL,	-- Need ON
+	nr-CellGlobalID-Ref-r16			NCGI-r15				OPTIONAL,	-- Need ON
+	nr-ARFCN-Ref-r16				ARFCN-ValueNR-r15		OPTIONAL,	-- Need ON
+	refTime-r16						CHOICE {
+			systemFrameNumber-r16		BIT STRING (SIZE (10)),
+			utc-r16						UTCTime,
+			...
+	},
+	rtd-RefQuality-r16				NR-TimingQuality-r16	OPTIONAL,	-- Need ON
+	...
+}
+
+RTD-InfoList-r16 ::= SEQUENCE (SIZE (1..nrMaxFreqLayers-r16)) OF RTD-InfoListPerFreqLayer-r16
+
+RTD-InfoListPerFreqLayer-r16 ::= SEQUENCE (SIZE(1..nrMaxTRPsPerFreq-r16)) OF RTD-InfoElement-r16
+
+RTD-InfoElement-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16					INTEGER (0..255),
+	nr-PhysCellID-r16				NR-PhysCellID-r16		OPTIONAL,	-- Need ON
+	nr-CellGlobalID-r16				NCGI-r15				OPTIONAL,	-- Need ON
+	nr-ARFCN-r16					ARFCN-ValueNR-r15		OPTIONAL,	-- Need ON
+	subframeOffset-r16				INTEGER (0..1966079),
+	rtd-Quality-r16					NR-TimingQuality-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-SelectedDL-PRS-IndexList-r16 ::=	SEQUENCE (SIZE (1..nrMaxFreqLayers-r16)) OF
+										NR-SelectedDL-PRS-PerFreq-r16
+
+NR-SelectedDL-PRS-PerFreq-r16 ::= SEQUENCE {
+	nr-SelectedDL-PRS-FrequencyLayerIndex-r16	INTEGER (0..nrMaxFreqLayers-1-r16),
+	nr-SelectedDL-PRS-IndexListPerFreq-r16 		SEQUENCE (SIZE (1..nrMaxTRPsPerFreq-r16)) OF
+														NR-SelectedDL-PRS-IndexPerTRP-r16
+																			OPTIONAL,	--Need OP
+	...
+}
+
+NR-SelectedDL-PRS-IndexPerTRP-r16 ::= SEQUENCE {
+	nr-SelectedTRP-Index-r16					INTEGER (0..nrMaxTRPsPerFreq-1-r16),
+	dl-SelectedPRS-ResourceSetIndexList-r16		SEQUENCE (SIZE (1..nrMaxSetsPerTrpPerFreqLayer-r16))
+													OF	DL-SelectedPRS-ResourceSetIndex-r16
+																			OPTIONAL,	--Need OP
+	...
+}
+
+DL-SelectedPRS-ResourceSetIndex-r16 ::= SEQUENCE {
+	nr-DL-SelectedPRS-ResourceSetIndex-r16		INTEGER (0..nrMaxSetsPerTrpPerFreqLayer-1-r16),
+	dl-SelectedPRS-ResourceIndexList-r16		SEQUENCE (SIZE (1..nrMaxResourcesPerSet-r16)) OF
+														DL-SelectedPRS-ResourceIndex-r16
+																			OPTIONAL	--Need OP
+}
+
+DL-SelectedPRS-ResourceIndex-r16 ::= SEQUENCE {
+	nr-DL-SelectedPRS-ResourceIdIndex-r16		INTEGER (0..nrMaxNumDL-PRS-ResourcesPerSet-1-r16),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-SSB-Config-r16 ::= SEQUENCE {
+	nr-PhysCellID-r16					NR-PhysCellID-r16,
+	nr-ARFCN-r16						ARFCN-ValueNR-r15,
+	ss-PBCH-BlockPower-r16				INTEGER (-60..50),
+	halfFrameIndex-r16					INTEGER (0..1),
+	ssb-periodicity-r16					ENUMERATED { ms5, ms10, ms20, ms40, ms80, ms160, ...},
+	ssb-PositionsInBurst-r16			CHOICE {
+		shortBitmap-r16						BIT STRING (SIZE (4)),
+		mediumBitmap-r16					BIT STRING (SIZE (8)),
+		longBitmap-r16						BIT STRING (SIZE (64))
+	}																			OPTIONAL, --Need OR
+	ssb-SubcarrierSpacing-r16			ENUMERATED {kHz15, kHz30, kHz60, kHz120, kHz240, ...},
+	sfn-SSB-Offset-r16					INTEGER (0..15),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-TimeStamp-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16				INTEGER (0..255),
+	nr-PhysCellID-r16			NR-PhysCellID-r16			OPTIONAL,	-- Need ON
+	nr-CellGlobalID-r16			NCGI-r15					OPTIONAL,	-- Need ON
+	nr-ARFCN-r16				ARFCN-ValueNR-r15			OPTIONAL,	-- Need ON
+	nr-SFN-r16					INTEGER (0..1023),
+	nr-Slot-r16 				CHOICE {
+			scs15-r16				INTEGER (0..9),
+			scs30-r16				INTEGER (0..19),
+			scs60-r16				INTEGER (0..39),
+			scs120-r16				INTEGER (0..79)
+	},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-TimingQuality-r16 ::= SEQUENCE {
+	timingQualityValue-r16			INTEGER (0..31),
+	timingQualityResolution-r16		ENUMERATED {mdot1, m1, m10, m30, ...},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-TRP-LocationInfo-r16 ::= SEQUENCE (SIZE (1..nrMaxFreqLayers-r16)) OF
+									NR-TRP-LocationInfoPerFreqLayer-r16
+
+NR-TRP-LocationInfoPerFreqLayer-r16 ::= SEQUENCE {
+	referencePoint-r16			ReferencePoint-r16				OPTIONAL,	-- Cond NotSameAsPrev
+	trp-LocationInfoList-r16	SEQUENCE (SIZE (1..nrMaxTRPsPerFreq-r16)) OF
+										TRP-LocationInfoElement-r16,
+	...
+}
+
+TRP-LocationInfoElement-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16					INTEGER (0..255),
+	nr-PhysCellID-r16				NR-PhysCellID-r16			OPTIONAL,	-- Need ON
+	nr-CellGlobalID-r16				NCGI-r15					OPTIONAL,	-- Need ON
+	nr-ARFCN-r16					ARFCN-ValueNR-r15			OPTIONAL,	-- Need ON
+	associated-DL-PRS-ID-r16		INTEGER (0..255)			OPTIONAL,	-- Need OP
+	trp-Location-r16				RelativeLocation-r16					OPTIONAL,	-- Need OP
+	trp-DL-PRS-ResourceSets-r16		SEQUENCE (SIZE(1..nrMaxSetsPerTrpPerFreqLayer-r16)) OF
+										DL-PRS-ResourceSets-TRP-Element-r16	OPTIONAL,	-- Need OP
+	...
+}
+
+DL-PRS-ResourceSets-TRP-Element-r16 ::= SEQUENCE {
+	dl-PRS-ResourceSetARP-r16			RelativeLocation-r16				OPTIONAL,	-- Need OP
+	dl-PRS-Resource-ARP-List-r16		SEQUENCE (SIZE(1..nrMaxResourcesPerSet-r16)) OF
+											DL-PRS-Resource-ARP-Element-r16	OPTIONAL,	-- Need OP
+	...
+}
+
+DL-PRS-Resource-ARP-Element-r16 ::= SEQUENCE {
+	dl-PRS-Resource-ARP-location-r16	RelativeLocation-r16				OPTIONAL,	-- Need OP
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-UL-SRS-Capability-r16 ::= SEQUENCE {
+	srs-CapabilityBandList-r16					SEQUENCE (SIZE (1..nrMaxBands-r16)) OF
+													SRS-CapabilityPerBand-r16,
+	srs-PosResourceConfigCA-BandList-r16		SEQUENCE (SIZE (1..nrMaxConfiguredBands-r16)) OF
+													SRS-PosResourcesPerBand-r16			OPTIONAL,
+	maxNumberSRS-PosPathLossEstimateAllServingCells-r16	
+												ENUMERATED {n1, n4, n8, n16}			OPTIONAL,
+	maxNumberSRS-PosSpatialRelationsAllServingCells-r16	
+												ENUMERATED {n0, n1, n2, n4, n8, n16}	OPTIONAL,
+	...
+}
+
+SRS-CapabilityPerBand-r16 ::= SEQUENCE {
+	freqBandIndicatorNR-r16			FreqBandIndicatorNR-r16,
+	olpc-SRS-Pos-r16				OLPC-SRS-Pos-r16									OPTIONAL,
+	spatialRelationsSRS-Pos-r16		SpatialRelationsSRS-Pos-r16							OPTIONAL,
+	...
+}
+
+OLPC-SRS-Pos-r16 ::= SEQUENCE {
+	olpc-SRS-PosBasedOnPRS-Serving-r16		ENUMERATED {supported}						OPTIONAL,
+	olpc-SRS-PosBasedOnSSB-Neigh-r16 		ENUMERATED {supported}						OPTIONAL,
+	olpc-SRS-PosBasedOnPRS-Neigh-r16		ENUMERATED {supported}						OPTIONAL,
+	maxNumberPathLossEstimatePerServing-r16	ENUMERATED {n1, n4, n8, n16}				OPTIONAL,
+	...
+}
+
+SpatialRelationsSRS-Pos-r16 ::=	SEQUENCE {
+	spatialRelation-SRS-PosBasedOnSSB-Serving-r16		ENUMERATED {supported}			OPTIONAL,
+	spatialRelation-SRS-PosBasedOnCSI-RS-Serving-r16	ENUMERATED {supported}			OPTIONAL,
+	spatialRelation-SRS-PosBasedOnPRS-Serving-r16		ENUMERATED {supported}			OPTIONAL,
+	spatialRelation-SRS-PosBasedOnSRS-r16				ENUMERATED {supported}			OPTIONAL,
+	spatialRelation-SRS-PosBasedOnSSB-Neigh-r16			ENUMERATED {supported}			OPTIONAL,
+	spatialRelation-SRS-PosBasedOnPRS-Neigh-r16			ENUMERATED {supported}			OPTIONAL,
+	...
+}
+
+SRS-PosResourcesPerBand-r16 ::= SEQUENCE {
+	freqBandIndicatorNR-r16							FreqBandIndicatorNR-r16,
+	maxNumberSRS-PosResourceSetsPerBWP-r16			ENUMERATED {n1, n2, n4, n8, n12, n16},
+	maxNumberSRS-PosResourcesPerBWP-r16				ENUMERATED {n1, n2, n4, n8, n16, n32, n64},
+	maxNumberPeriodicSRS-PosResourcesPerBWP-r16		ENUMERATED {n1, n2, n4, n8, n16, n32, n64},
+	maxNumberAP-SRS-PosResourcesPerBWP-r16			ENUMERATED {n1, n2, n4, n8, n16, n32, n64}
+																						OPTIONAL,
+	maxNumberSP-SRS-PosResourcesPerBWP-r16			ENUMERATED {n1, n2, n4, n8, n16, n32, n64}
+																						OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ReferencePoint-r16 ::= SEQUENCE {
+	referencePointGeographicLocation-r16 		CHOICE {
+		location3D-r16 			EllipsoidPointWithAltitudeAndUncertaintyEllipsoid,
+		ha-location3D-r16 		HighAccuracyEllipsoidPointWithAltitudeAndUncertaintyEllipsoid-r15,
+		...
+	},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+RelativeLocation-r16 ::= SEQUENCE {
+	milli-arc-second-units-r16 	ENUMERATED { mas0-03, mas0-3, mas3, mas30, ...},
+	height-units-r16			ENUMERATED {mm, cm, m, ...},
+	delta-latitude-r16			Delta-Latitude-r16,
+	delta-longitude-r16			Delta-Longitude-r16,
+	delta-height-r16			Delta-Height-r16,
+	locationUNC-r16				LocationUncertainty-r16				OPTIONAL,		-- Need OP
+	...
+}
+
+Delta-Latitude-r16 ::= SEQUENCE {
+	delta-Latitude-r16						INTEGER (-1024..1023),
+	coarse-delta-Latitude-r16				INTEGER (0..4095)		OPTIONAL,		-- Need OP
+	...
+}
+
+Delta-Longitude-r16 ::= SEQUENCE {
+	delta-Longitude-r16						INTEGER (-1024..1023),
+	coarse-delta-Longitude-r16				INTEGER (0..4095)		OPTIONAL,		-- Need OP
+	...
+}
+
+Delta-Height-r16 ::= SEQUENCE {
+	delta-Height-r16						INTEGER (-1024..1023),
+	coarse-delta-Height-r16					INTEGER (0..4095)		OPTIONAL,		-- Need OP
+	...
+}
+
+LocationUncertainty-r16 ::= SEQUENCE {
+	horizontalUncertainty-r16				INTEGER (0..255),
+	horizontalConfidence-r16				INTEGER (0..100),
+	verticalUncertainty-r16					INTEGER (0..255),
+	verticalConfidence-r16					INTEGER (0..100)
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-ProvideAssistanceData ::= SEQUENCE {
+	otdoa-ReferenceCellInfo			OTDOA-ReferenceCellInfo				OPTIONAL,	-- Need ON
+	otdoa-NeighbourCellInfo			OTDOA-NeighbourCellInfoList			OPTIONAL,	-- Need ON
+	otdoa-Error						OTDOA-Error							OPTIONAL,	-- Need ON
+	...,
+	[[
+	 otdoa-ReferenceCellInfoNB-r14	OTDOA-ReferenceCellInfoNB-r14		OPTIONAL,	-- Need ON
+	 otdoa-NeighbourCellInfoNB-r14	OTDOA-NeighbourCellInfoListNB-r14	OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-ReferenceCellInfo ::= SEQUENCE {
+	physCellId					INTEGER (0..503),
+	cellGlobalId				ECGI						OPTIONAL,		-- Need ON
+	earfcnRef					ARFCN-ValueEUTRA			OPTIONAL,		-- Cond NotSameAsServ0
+	antennaPortConfig			ENUMERATED {ports1-or-2, ports4, ... }
+															OPTIONAL,		-- Cond NotSameAsServ1
+	cpLength					ENUMERATED { normal, extended, ... },
+	prsInfo						PRS-Info					OPTIONAL,		-- Cond PRS
+	...,
+	[[ earfcnRef-v9a0			ARFCN-ValueEUTRA-v9a0		OPTIONAL		-- Cond NotSameAsServ2
+	]],
+	[[	tpId-r14				INTEGER (0..4095)			OPTIONAL,		-- Need ON
+		cpLengthCRS-r14			ENUMERATED { normal, extended, ... }	
+															OPTIONAL,		-- Cond CRS
+		sameMBSFNconfigRef-r14	BOOLEAN						OPTIONAL,		-- Need ON
+		dlBandwidth-r14			ENUMERATED {n6, n15, n25, n50, n75, n100}
+															OPTIONAL,		-- Cond NotSameAsServ3
+		addPRSconfigRef-r14		SEQUENCE (SIZE (1..maxAddPRSconfig-r14)) OF PRS-Info
+															OPTIONAL		-- Need ON	
+	]],
+	[[
+		nr-LTE-SFN-Offset-r15	INTEGER (0..1023)			OPTIONAL		-- Cond NR
+	]],
+	[[
+		tdd-config-v1520					TDD-Config-v1520	OPTIONAL,		-- Need ON
+		nr-LTE-fineTiming-Offset-r15		INTEGER (0..19)		OPTIONAL		-- Cond FineOffset
+	]]
+}
+
+maxAddPRSconfig-r14			INTEGER ::= 2
+
+-- ASN1STOP
+-- ASN1START
+
+PRS-Info ::= SEQUENCE {
+	prs-Bandwidth			ENUMERATED { n6, n15, n25, n50, n75, n100, ... },
+	prs-ConfigurationIndex	INTEGER (0..4095),
+	numDL-Frames			ENUMERATED {sf-1, sf-2, sf-4, sf-6, ..., sf-add-v1420},
+	...,
+	prs-MutingInfo-r9		CHOICE {
+		po2-r9					BIT STRING (SIZE(2)),
+		po4-r9					BIT STRING (SIZE(4)),
+		po8-r9					BIT STRING (SIZE(8)),
+		po16-r9					BIT STRING (SIZE(16)),
+		...,
+		po32-v1420				BIT STRING (SIZE(32)),
+		po64-v1420				BIT STRING (SIZE(64)),
+		po128-v1420				BIT STRING (SIZE(128)),
+		po256-v1420				BIT STRING (SIZE(256)),
+		po512-v1420				BIT STRING (SIZE(512)),
+		po1024-v1420			BIT STRING (SIZE(1024))
+	}														OPTIONAL,				-- Need OP
+	[[	prsID-r14				INTEGER (0..4095)			OPTIONAL,				-- Need ON
+		add-numDL-Frames-r14	INTEGER (1..160)			OPTIONAL,				-- Cond sf-add
+		prsOccGroupLen-r14		ENUMERATED {g2, g4, g8, g16, g32, g64, g128,... }
+															OPTIONAL,				-- Cond Occ-Grp
+		prsHoppingInfo-r14	CHOICE {
+			nb2-r14				INTEGER (0.. maxAvailNarrowBands-Minus1-r14),
+			nb4-r14				SEQUENCE (SIZE (3))
+										OF INTEGER (0.. maxAvailNarrowBands-Minus1-r14)
+	}														OPTIONAL				-- Cond PRS-FH
+	]]
+}
+
+maxAvailNarrowBands-Minus1-r14		INTEGER ::= 15	-- Maximum number of narrowbands minus 1
+
+-- ASN1STOP
+-- ASN1START
+
+TDD-Config-v1520 ::= SEQUENCE {
+	subframeAssignment-v1520			ENUMERATED { sa0, sa1, sa2, sa3, sa4, sa5, sa6 },
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-NeighbourCellInfoList ::= SEQUENCE (SIZE (1..maxFreqLayers)) OF OTDOA-NeighbourFreqInfo
+OTDOA-NeighbourFreqInfo ::= SEQUENCE (SIZE (1..24)) OF OTDOA-NeighbourCellInfoElement
+
+OTDOA-NeighbourCellInfoElement ::= SEQUENCE {
+	physCellId							INTEGER (0..503),
+	cellGlobalId						ECGI				OPTIONAL,		-- Need ON
+	earfcn								ARFCN-ValueEUTRA	OPTIONAL,		-- Cond NotSameAsRef0
+	cpLength							ENUMERATED {normal, extended, ...}
+															OPTIONAL,		-- Cond NotSameAsRef1
+	prsInfo								PRS-Info			OPTIONAL,		-- Cond NotSameAsRef2
+	antennaPortConfig					ENUMERATED {ports-1-or-2, ports-4, ...}
+															OPTIONAL, 		-- Cond NotsameAsRef3
+	slotNumberOffset					INTEGER (0..19)		OPTIONAL,		-- Cond NotSameAsRef4
+	prs-SubframeOffset					INTEGER (0..1279)	OPTIONAL,		-- Cond InterFreq
+	expectedRSTD						INTEGER (0..16383),
+	expectedRSTD-Uncertainty			INTEGER (0..1023),
+	...,
+	[[ earfcn-v9a0					ARFCN-ValueEUTRA-v9a0	OPTIONAL		-- Cond NotSameAsRef5
+	]],
+	[[	tpId-r14					INTEGER (0..4095)		OPTIONAL,		-- Need ON
+		prs-only-tp-r14				ENUMERATED { true }		OPTIONAL,		-- Cond TBS
+		cpLengthCRS-r14				ENUMERATED { normal, extended, ... }	
+															OPTIONAL,		-- Cond CRS
+		sameMBSFNconfigNeighbour-r14	BOOLEAN				OPTIONAL,		-- Need ON
+		dlBandwidth-r14				ENUMERATED {n6, n15, n25, n50, n75, n100}
+															OPTIONAL,		-- Cond NotSameAsRef6
+		addPRSconfigNeighbour-r14	SEQUENCE (SIZE (1..maxAddPRSconfig-r14)) OF
+										Add-PRSconfigNeighbourElement-r14
+															OPTIONAL		-- Need ON
+	]],
+	[[
+		tdd-config-v1520			TDD-Config-v1520		OPTIONAL		-- Need ON
+	]]
+}
+
+Add-PRSconfigNeighbourElement-r14 ::= SEQUENCE {
+	add-prsInfo-r14					PRS-Info				OPTIONAL,		-- Cond NotSameAsRef7
+	...
+}
+
+maxFreqLayers	INTEGER ::= 3
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-ReferenceCellInfoNB-r14 ::= SEQUENCE {
+	physCellIdNB-r14				INTEGER (0..503)			OPTIONAL,	-- Cond NoPRS-AD1
+	cellGlobalIdNB-r14				ECGI						OPTIONAL,	-- Cond NoPRS-AD2
+	carrierFreqRef-r14				CarrierFreq-NB-r14			OPTIONAL,	-- Cond NotSameAsServ1
+	earfcn-r14						ARFCN-ValueEUTRA-r14		OPTIONAL,	-- Cond Inband
+	eutra-NumCRS-Ports-r14			ENUMERATED {ports1-or-2, ports4}
+																OPTIONAL,	-- Cond NoPRS-AD3
+	otdoa-SIB1-NB-repetitions-r14	ENUMERATED { r4, r8, r16 }	OPTIONAL,	-- Cond NotSameAsServ2
+	nprsInfo-r14					PRS-Info-NB-r14				OPTIONAL,	-- Cond NPRS-Type1
+	...,
+	[[
+	nprsInfo-Type2-v1470			PRS-Info-NB-r14				OPTIONAL	-- Cond NPRS-Type2
+	]],
+	[[	tdd-config-r15				TDD-Config-v1520			OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+PRS-Info-NB-r14 ::= SEQUENCE (SIZE (1..maxCarrier-r14)) OF NPRS-Info-r14
+
+NPRS-Info-r14 ::= SEQUENCE {
+	operationModeInfoNPRS-r14	ENUMERATED { inband, standalone },
+	nprs-carrier-r14			CarrierFreq-NB-r14	OPTIONAL,	-- Cond Standalone/Guardband
+	nprsSequenceInfo-r14		INTEGER (0..174)	OPTIONAL,	-- Cond Inband
+	nprsID-r14					INTEGER (0..4095)	OPTIONAL,	-- Cond NPRS-ID
+	partA-r14					SEQUENCE {
+		nprsBitmap-r14			CHOICE {
+			subframePattern10-r14	BIT STRING (SIZE (10)),
+			subframePattern40-r14 	BIT STRING (SIZE (40))	
+		},
+		nprs-MutingInfoA-r14	CHOICE {
+			po2-r14					BIT STRING (SIZE(2)),
+			po4-r14					BIT STRING (SIZE(4)),
+			po8-r14					BIT STRING (SIZE(8)),
+			po16-r14				BIT STRING (SIZE(16)),
+			...
+		}															OPTIONAL,		-- Cond MutingA
+		...
+	}																OPTIONAL,		-- Cond PartA
+	partB-r14					SEQUENCE {
+		nprs-Period-r14			ENUMERATED { ms160, ms320, ms640, ms1280, ... , ms2560-v1510},
+		nprs-startSF-r14		ENUMERATED { zero, one-eighth, two-eighths, three-eighths,
+											four-eighths, five-eighths, six-eighths,
+											seven-eighths, ...},
+		nprs-NumSF-r14			ENUMERATED { sf10, sf20, sf40, sf80, sf160, sf320,
+											sf640, sf1280, ... , sf2560-v1510},
+		nprs-MutingInfoB-r14	CHOICE {
+			po2-r14					BIT STRING (SIZE(2)),
+			po4-r14					BIT STRING (SIZE(4)),
+			po8-r14					BIT STRING (SIZE(8)),
+			po16-r14				BIT STRING (SIZE(16)),
+			...
+		}															OPTIONAL,		-- Cond MutingB
+		...,
+	[[	sib1-SF-TDD-r15			ENUMERATED {sf0, sf4, sf0and5}		OPTIONAL		-- Cond SIB1-TDD
+	]]
+	}																OPTIONAL,		-- Cond PartB
+	...,
+	[[
+	partA-TDD-r15					SEQUENCE {
+		nprsBitmap-r15			CHOICE {
+			subframePattern10-TDD-r15	BIT STRING (SIZE (8)),
+			subframePattern40-TDD-r15 	BIT STRING (SIZE (32)),
+			...	
+		},
+		nprs-MutingInfoA-r15	CHOICE {
+			po2-r15					BIT STRING (SIZE(2)),
+			po4-r15					BIT STRING (SIZE(4)),
+			po8-r15					BIT STRING (SIZE(8)),
+			po16-r15				BIT STRING (SIZE(16)),
+			...
+		}															OPTIONAL,	-- Cond MutingA
+		...
+	}																OPTIONAL	-- Cond PartA-TDD
+	]]
+}
+
+maxCarrier-r14	INTEGER ::= 5
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-NeighbourCellInfoListNB-r14 ::= SEQUENCE (SIZE (1..maxCells-r14)) OF
+														OTDOA-NeighbourCellInfoNB-r14
+
+OTDOA-NeighbourCellInfoNB-r14 ::= SEQUENCE {
+	physCellIdNB-r14				INTEGER (0..503)		OPTIONAL,		-- Cond NoPRS-AD1
+	cellGlobalIdNB-r14				ECGI					OPTIONAL,		-- Cond NoPRS-AD2
+	carrierFreq-r14					CarrierFreq-NB-r14		OPTIONAL,		-- Cond NotSameAsRef1
+	earfcn-r14						ARFCN-ValueEUTRA-r14	OPTIONAL,		-- Cond Inband
+	eutra-NumCRS-Ports-r14			ENUMERATED {ports-1-or-2, ports-4, ...}
+															OPTIONAL, 		-- Cond NotsameAsRef2
+	otdoa-SIB1-NB-repetitions-r14	ENUMERATED { r4, r8, r16 }	
+															OPTIONAL,		-- Cond	NotSameAsRef3
+	nprsInfo-r14					PRS-Info-NB-r14			OPTIONAL,		-- Cond NotsameAsRef4
+	nprs-slotNumberOffset-r14		INTEGER (0..19)			OPTIONAL,		-- Cond NotsameAsRef5
+	nprs-SFN-Offset-r14				INTEGER (0..63)			OPTIONAL,		-- Cond NotsameAsRef6
+	nprs-SubframeOffset-r14			INTEGER (0..1279)		OPTIONAL,		-- Need OP
+	expectedRSTD-r14				INTEGER (0..16383)		OPTIONAL,		-- Cond NoPRS-AD3
+	expectedRSTD-Uncertainty-r14	INTEGER (0..1023)		OPTIONAL,		-- Cond NoPRS-AD3
+	prsNeighbourCellIndex-r14		INTEGER (1..72)			OPTIONAL,		-- Cond PRS-AD
+	...,
+	[[
+	nprsInfo-Type2-v1470			PRS-Info-NB-r14			OPTIONAL		-- Cond NotSameAsRef4
+	]],
+	[[	tdd-config-r15				TDD-Config-v1520		OPTIONAL		-- Need ON
+	]]
+}
+
+maxCells-r14	INTEGER ::= 72
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-RequestAssistanceData ::= SEQUENCE {
+	physCellId			INTEGER (0..503),
+	...,
+	[[
+	 adType-r14			BIT STRING { prs (0), nprs (1) } (SIZE (1..8))		OPTIONAL
+	]],
+	[[
+		nrPhysCellId-r15	INTEGER (0..1007)									OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-ProvideLocationInformation ::= SEQUENCE {
+	otdoaSignalMeasurementInformation	OTDOA-SignalMeasurementInformation	OPTIONAL,
+	otdoa-Error							OTDOA-Error							OPTIONAL,
+	...,
+	[[
+		otdoaSignalMeasurementInformation-NB-r14	OTDOA-SignalMeasurementInformation-NB-r14
+																			OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-SignalMeasurementInformation ::= SEQUENCE {
+	systemFrameNumber		BIT STRING (SIZE (10)),
+	physCellIdRef			INTEGER (0..503),
+	cellGlobalIdRef			ECGI					OPTIONAL,
+	earfcnRef				ARFCN-ValueEUTRA		OPTIONAL,		-- Cond NotSameAsRef0
+	referenceQuality		OTDOA-MeasQuality		OPTIONAL,
+	neighbourMeasurementList	NeighbourMeasurementList,
+	...,
+	[[ earfcnRef-v9a0		ARFCN-ValueEUTRA-v9a0	OPTIONAL		-- Cond NotSameAsRef1
+	]],
+	[[ tpIdRef-r14			INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer0
+		prsIdRef-r14		INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer1
+		additionalPathsRef-r14	
+							AdditionalPathList-r14	OPTIONAL,
+		nprsIdRef-r14		INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer2
+		carrierFreqOffsetNB-Ref-r14
+							CarrierFreqOffsetNB-r14	OPTIONAL,		-- Cond NB-IoT
+		hyperSFN-r14		BIT STRING (SIZE (10))	OPTIONAL		-- Cond H-SFN
+	]],
+	[[
+		motionTimeSource-r15		MotionTimeSource-r15		OPTIONAL
+	]]
+}
+
+NeighbourMeasurementList ::= SEQUENCE (SIZE(1..24)) OF NeighbourMeasurementElement
+
+NeighbourMeasurementElement ::= SEQUENCE {
+	physCellIdNeighbour		INTEGER (0..503),
+	cellGlobalIdNeighbour	ECGI					OPTIONAL,
+	earfcnNeighbour			ARFCN-ValueEUTRA		OPTIONAL,		-- Cond NotSameAsRef2
+	rstd					INTEGER (0..12711),
+	rstd-Quality			OTDOA-MeasQuality,
+	...,
+	[[ earfcnNeighbour-v9a0	ARFCN-ValueEUTRA-v9a0	OPTIONAL		-- Cond NotSameAsRef3
+	]],
+	[[ tpIdNeighbour-r14	INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer0
+		prsIdNeighbour-r14	INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer1
+		delta-rstd-r14		INTEGER (0..5)			OPTIONAL,
+		additionalPathsNeighbour-r14	
+							AdditionalPathList-r14	OPTIONAL,
+		nprsIdNeighbour-r14	INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer2
+		carrierFreqOffsetNB-Neighbour-r14
+							CarrierFreqOffsetNB-r14	OPTIONAL		-- Cond NB-IoT
+	]],
+	[[
+		delta-SFN-r15			INTEGER (-8192..8191)	OPTIONAL
+	]]
+}
+
+AdditionalPathList-r14 ::= SEQUENCE (SIZE(1..maxPaths-r14)) OF AdditionalPath-r14
+
+maxPaths-r14	INTEGER ::= 2
+
+MotionTimeSource-r15 ::= SEQUENCE {
+	timeSource-r15				ENUMERATED {servingCell, referenceCell, gnss, mixed,
+											other, none, ...}
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-SignalMeasurementInformation-NB-r14 ::= SEQUENCE {
+	systemFrameNumber-r14			BIT STRING (SIZE (10)),
+	physCellIdRef-r14				INTEGER (0..503),
+	cellGlobalIdRef-r14				ECGI					OPTIONAL,
+	earfcnRef-r14					ARFCN-ValueEUTRA-r14	OPTIONAL,	-- Cond NotSameAsRef0
+	referenceQuality-r14			OTDOA-MeasQuality		OPTIONAL,
+	neighbourMeasurementList-r14	NeighbourMeasurementList-NB-r14,
+	tpIdRef-r14						INTEGER (0..4095)		OPTIONAL,	-- Cond ProvidedByServer0
+	prsIdRef-r14					INTEGER (0..4095)		OPTIONAL,	-- Cond ProvidedByServer1
+	additionalPathsRef-r14			AdditionalPathList-r14	OPTIONAL,
+	nprsIdRef-r14					INTEGER (0..4095)		OPTIONAL,	-- Cond ProvidedByServer2
+	carrierFreqOffsetNB-Ref-r14		CarrierFreqOffsetNB-r14	OPTIONAL,	-- Cond NB-IoT
+	hyperSFN-r14					BIT STRING (SIZE (10))	OPTIONAL,	-- Cond H-SFN
+	...
+}
+
+NeighbourMeasurementList-NB-r14 ::= SEQUENCE (SIZE(1..24)) OF NeighbourMeasurementElement-NB-r14
+
+NeighbourMeasurementElement-NB-r14 ::= SEQUENCE {
+	physCellIdNeighbour-r14		INTEGER (0..503),
+	cellGlobalIdNeighbour-r14	ECGI					OPTIONAL,
+	earfcnNeighbour-r14			ARFCN-ValueEUTRA-r14	OPTIONAL,		-- Cond NotSameAsRef2
+	rstd-r14					INTEGER (0..12711),
+	rstd-Quality-r14			OTDOA-MeasQuality,
+	tpIdNeighbour-r14			INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer0
+	prsIdNeighbour-r14			INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer1
+	delta-rstd-r14				INTEGER (0..5)			OPTIONAL,
+	additionalPathsNeighbour-r14	
+								AdditionalPathList-r14	OPTIONAL,
+	nprsIdNeighbour-r14			INTEGER (0..4095)		OPTIONAL,		-- Cond ProvidedByServer2
+	carrierFreqOffsetNB-Neighbour-r14
+								CarrierFreqOffsetNB-r14	OPTIONAL,		-- Cond NB-IoT
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-MeasQuality ::= SEQUENCE {
+	error-Resolution		BIT STRING (SIZE (2)),
+	error-Value				BIT STRING (SIZE (5)),
+	error-NumSamples		BIT STRING (SIZE (3))				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AdditionalPath-r14 ::= SEQUENCE {
+	relativeTimeDifference-r14	INTEGER (-256..255),
+	path-Quality-r14			OTDOA-MeasQuality				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-RequestLocationInformation ::= SEQUENCE {
+	assistanceAvailability		BOOLEAN,
+	...,
+	[[
+		multipathRSTD-r14		ENUMERATED { requested }	OPTIONAL,		-- Need ON
+		maxNoOfRSTDmeas-r14		INTEGER (1..32)				OPTIONAL		-- Need ON
+	]],
+	[[
+		motionMeasurements-r15	ENUMERATED { requested }	OPTIONAL		-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-ProvideCapabilities ::= SEQUENCE {
+	otdoa-Mode		BIT STRING {	ue-assisted				(0),
+									ue-assisted-NB-r14		(1),
+									ue-assisted-NB-TDD-r15	(2) } (SIZE (1..8)),
+	...,
+	supportedBandListEUTRA		SEQUENCE (SIZE (1..maxBands)) OF SupportedBandEUTRA		OPTIONAL,
+	supportedBandListEUTRA-v9a0	SEQUENCE (SIZE (1..maxBands)) OF SupportedBandEUTRA-v9a0
+																						OPTIONAL,
+	interFreqRSTDmeasurement-r10		ENUMERATED { supported }						OPTIONAL,
+	additionalNeighbourCellInfoList-r10	ENUMERATED { supported }						OPTIONAL,
+	prs-id-r14							ENUMERATED { supported }						OPTIONAL,
+	tp-separation-via-muting-r14		ENUMERATED { supported }						OPTIONAL,
+	additional-prs-config-r14			ENUMERATED { supported }						OPTIONAL,
+	prs-based-tbs-r14					ENUMERATED { supported }						OPTIONAL,
+	additionalPathsReport-r14			ENUMERATED { supported }						OPTIONAL,
+	densePrsConfig-r14					ENUMERATED { supported }						OPTIONAL,
+	maxSupportedPrsBandwidth-r14		ENUMERATED { n6, n15, n25, n50, n75, n100, ...}	OPTIONAL,
+	prsOccGroup-r14						ENUMERATED { supported }						OPTIONAL,
+	prsFrequencyHopping-r14				ENUMERATED { supported }						OPTIONAL,
+	maxSupportedPrsConfigs-r14			ENUMERATED { c2, c3 }							OPTIONAL,
+	periodicalReporting-r14				ENUMERATED { supported }						OPTIONAL,
+	multiPrbNprs-r14					ENUMERATED { supported }						OPTIONAL,
+	idleStateForMeasurements-r14		ENUMERATED { required }							OPTIONAL,
+	numberOfRXantennas-r14				ENUMERATED { rx1, ... }							OPTIONAL,
+	motionMeasurements-r15				ENUMERATED { supported }						OPTIONAL,
+	interRAT-RSTDmeasurement-r15		ENUMERATED { supported }						OPTIONAL
+}
+
+maxBands INTEGER ::= 64
+
+SupportedBandEUTRA ::= SEQUENCE {
+	bandEUTRA							INTEGER (1..maxFBI)
+}
+
+SupportedBandEUTRA-v9a0 ::=		SEQUENCE {
+	bandEUTRA-v9a0						INTEGER (maxFBI-Plus1..maxFBI2)		OPTIONAL
+}
+
+maxFBI								INTEGER	::=	64	-- Maximum value of frequency band indicator
+maxFBI-Plus1						INTEGER ::= 65	-- lowest value extended FBI range
+maxFBI2								INTEGER ::= 256	-- highest value extended FBI range
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-RequestCapabilities ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-Error ::= CHOICE {
+	locationServerErrorCauses		OTDOA-LocationServerErrorCauses,
+	targetDeviceErrorCauses			OTDOA-TargetDeviceErrorCauses,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-LocationServerErrorCauses ::= SEQUENCE {
+	cause		ENUMERATED	{	undefined,
+								assistanceDataNotSupportedByServer,
+								assistanceDataSupportedButCurrentlyNotAvailableByServer,
+								...
+							},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-TargetDeviceErrorCauses ::= SEQUENCE {
+	cause		ENUMERATED {	undefined,
+								assistance-data-missing,
+								unableToMeasureReferenceCell,
+								unableToMeasureAnyNeighbourCell,
+								attemptedButUnableToMeasureSomeNeighbourCells,
+								...
+							},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+A-GNSS-ProvideAssistanceData ::= SEQUENCE {
+	gnss-CommonAssistData			GNSS-CommonAssistData				OPTIONAL,	-- Need ON
+	gnss-GenericAssistData			GNSS-GenericAssistData				OPTIONAL,	-- Need ON
+	gnss-Error						A-GNSS-Error						OPTIONAL,	-- Need ON
+	...,
+	[[
+		gnss-PeriodicAssistData-r15	GNSS-PeriodicAssistData-r15			OPTIONAL	-- Cond CtrTrans
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-CommonAssistData ::= SEQUENCE {
+	gnss-ReferenceTime				GNSS-ReferenceTime					OPTIONAL,	-- Need ON
+	gnss-ReferenceLocation			GNSS-ReferenceLocation				OPTIONAL,	-- Need ON
+	gnss-IonosphericModel			GNSS-IonosphericModel				OPTIONAL,	-- Need ON
+	gnss-EarthOrientationParameters	GNSS-EarthOrientationParameters		OPTIONAL,	-- Need ON
+	...,
+	[[
+		gnss-RTK-ReferenceStationInfo-r15
+									GNSS-RTK-ReferenceStationInfo-r15	OPTIONAL,	-- Need ON
+		gnss-RTK-CommonObservationInfo-r15	
+									GNSS-RTK-CommonObservationInfo-r15	OPTIONAL,	-- Cond RTK
+		gnss-RTK-AuxiliaryStationData-r15
+									GNSS-RTK-AuxiliaryStationData-r15	OPTIONAL	-- Need ON
+	]],
+	[[
+		gnss-SSR-CorrectionPoints-r16
+									GNSS-SSR-CorrectionPoints-r16		OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-GenericAssistData ::= SEQUENCE (SIZE (1..16)) OF GNSS-GenericAssistDataElement
+
+GNSS-GenericAssistDataElement ::= SEQUENCE {
+	gnss-ID							GNSS-ID,
+	sbas-ID							SBAS-ID							OPTIONAL, 	-- Cond GNSS-ID-SBAS
+	gnss-TimeModels					GNSS-TimeModelList				OPTIONAL,	-- Need ON
+	gnss-DifferentialCorrections	GNSS-DifferentialCorrections	OPTIONAL,	-- Need ON
+	gnss-NavigationModel			GNSS-NavigationModel			OPTIONAL,	-- Need ON
+	gnss-RealTimeIntegrity			GNSS-RealTimeIntegrity			OPTIONAL,	-- Need ON
+	gnss-DataBitAssistance			GNSS-DataBitAssistance			OPTIONAL,	-- Need ON
+	gnss-AcquisitionAssistance		GNSS-AcquisitionAssistance		OPTIONAL,	-- Need ON
+	gnss-Almanac					GNSS-Almanac					OPTIONAL,	-- Need ON
+	gnss-UTC-Model					GNSS-UTC-Model					OPTIONAL,	-- Need ON
+	gnss-AuxiliaryInformation		GNSS-AuxiliaryInformation		OPTIONAL,	-- Need ON
+	...,
+	[[
+		bds-DifferentialCorrections-r12	
+									BDS-DifferentialCorrections-r12	OPTIONAL,	-- Cond	GNSS-ID-BDS
+		bds-GridModel-r12			BDS-GridModelParameter-r12		OPTIONAL	-- Cond	GNSS-ID-BDS
+	]],
+	[[
+		gnss-RTK-Observations-r15	GNSS-RTK-Observations-r15		OPTIONAL,	-- Need ON
+		glo-RTK-BiasInformation-r15	GLO-RTK-BiasInformation-r15		OPTIONAL,	-- Cond GNSS-ID-GLO
+		gnss-RTK-MAC-CorrectionDifferences-r15
+									GNSS-RTK-MAC-CorrectionDifferences-r15
+																	OPTIONAL,	-- Need ON
+		gnss-RTK-Residuals-r15		GNSS-RTK-Residuals-r15			OPTIONAL,	-- Need ON
+		gnss-RTK-FKP-Gradients-r15	GNSS-RTK-FKP-Gradients-r15		OPTIONAL,	-- Need ON
+		gnss-SSR-OrbitCorrections-r15
+									GNSS-SSR-OrbitCorrections-r15	OPTIONAL,	-- Need ON
+		gnss-SSR-ClockCorrections-r15
+									GNSS-SSR-ClockCorrections-r15	OPTIONAL,	-- Need ON
+		gnss-SSR-CodeBias-r15		GNSS-SSR-CodeBias-r15			OPTIONAL	-- Need ON
+	]],
+	[[
+		gnss-SSR-URA-r16					GNSS-SSR-URA-r16		OPTIONAL,	-- Need ON
+		gnss-SSR-PhaseBias-r16				GNSS-SSR-PhaseBias-r16	OPTIONAL,	-- Need ON
+		gnss-SSR-STEC-Correction-r16		GNSS-SSR-STEC-Correction-r16	
+																	OPTIONAL,	-- Need ON
+		gnss-SSR-GriddedCorrection-r16		GNSS-SSR-GriddedCorrection-r16	
+																	OPTIONAL,	-- Need ON
+		navic-DifferentialCorrections-r16	NavIC-DifferentialCorrections-r16
+																OPTIONAL,	-- Cond	GNSS-ID-NavIC
+		navic-GridModel-r16					NavIC-GridModelParameter-r16
+																OPTIONAL	-- Cond	GNSS-ID-NavIC
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-PeriodicAssistData-r15 ::= SEQUENCE {
+	gnss-RTK-PeriodicObservations-r15		GNSS-PeriodicControlParam-r15	OPTIONAL,	-- Need ON
+	glo-RTK-PeriodicBiasInformation-r15		GNSS-PeriodicControlParam-r15	OPTIONAL,	-- Need ON
+	gnss-RTK-MAC-PeriodicCorrectionDifferences-r15
+											GNSS-PeriodicControlParam-r15	OPTIONAL,	-- Need ON
+	gnss-RTK-PeriodicResiduals-r15			GNSS-PeriodicControlParam-r15	OPTIONAL,	-- Need ON
+	gnss-RTK-FKP-PeriodicGradients-r15		GNSS-PeriodicControlParam-r15	OPTIONAL,	-- Need ON
+	gnss-SSR-PeriodicOrbitCorrections-r15
+											GNSS-PeriodicControlParam-r15	OPTIONAL, 	-- Need ON
+	gnss-SSR-PeriodicClockCorrections-r15
+											GNSS-PeriodicControlParam-r15	OPTIONAL, 	-- Need ON
+	gnss-SSR-PeriodicCodeBias-r15			GNSS-PeriodicControlParam-r15	OPTIONAL, 	-- Need ON
+	...,
+	[[
+	gnss-SSR-PeriodicURA-r16				GNSS-PeriodicControlParam-r15	OPTIONAL,	-- Need ON
+	gnss-SSR-PeriodicPhaseBias-r16			GNSS-PeriodicControlParam-r15	OPTIONAL, 	-- Need ON
+	gnss-SSR-PeriodicSTEC-Correction-r16	GNSS-PeriodicControlParam-r15	OPTIONAL, 	-- Need ON
+	gnss-SSR-PeriodicGriddedCorrection-r16	GNSS-PeriodicControlParam-r15	OPTIONAL 	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ReferenceTime ::= SEQUENCE {
+	gnss-SystemTime				GNSS-SystemTime,
+	referenceTimeUnc			INTEGER (0..127)					OPTIONAL,	-- Cond noFTA
+	gnss-ReferenceTimeForCells	SEQUENCE (SIZE (1..16)) OF
+									GNSS-ReferenceTimeForOneCell	OPTIONAL,	-- Need ON
+	...
+}
+
+GNSS-ReferenceTimeForOneCell ::= SEQUENCE {	
+	networkTime				NetworkTime,
+	referenceTimeUnc			INTEGER (0..127),
+	bsAlign						ENUMERATED {true}	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SystemTime ::= SEQUENCE {
+	gnss-TimeID						GNSS-ID,
+	gnss-DayNumber					INTEGER (0..32767),
+	gnss-TimeOfDay					INTEGER (0..86399),
+	gnss-TimeOfDayFrac-msec			INTEGER (0..999)		OPTIONAL,	-- Need ON
+	notificationOfLeapSecond		BIT STRING (SIZE(2))	OPTIONAL,	-- Cond gnss-TimeID-glonass
+	gps-TOW-Assist					GPS-TOW-Assist			OPTIONAL,	-- Cond gnss-TimeID-gps
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GPS-TOW-Assist ::= SEQUENCE (SIZE(1..64)) OF GPS-TOW-AssistElement
+
+GPS-TOW-AssistElement ::= SEQUENCE {
+	satelliteID		INTEGER (1..64),
+	tlmWord			INTEGER (0..16383),
+	antiSpoof		INTEGER (0..1),
+	alert			INTEGER (0..1),
+	tlmRsvdBits		INTEGER (0..3),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NetworkTime ::= SEQUENCE {
+	secondsFromFrameStructureStart				INTEGER(0..12533),
+	fractionalSecondsFromFrameStructureStart	INTEGER(0..3999999),
+	frameDrift									INTEGER (-64..63)	OPTIONAL,	-- Cond GNSSsynch
+	cellID		CHOICE {
+				eUTRA		SEQUENCE {
+							physCellId			INTEGER (0..503),
+							cellGlobalIdEUTRA	CellGlobalIdEUTRA-AndUTRA	OPTIONAL,	-- Need ON
+							earfcn				ARFCN-ValueEUTRA,
+							...,
+							[[ earfcn-v9a0		ARFCN-ValueEUTRA-v9a0 OPTIONAL	-- Cond EARFCN-max
+							]]
+							},
+				uTRA		SEQUENCE {
+							mode	CHOICE {
+									fdd		SEQUENCE {
+											primary-CPICH-Info	INTEGER (0..511),
+											...
+											},
+									tdd		SEQUENCE {
+											cellParameters		INTEGER (0..127),
+											...
+											}
+									},
+							cellGlobalIdUTRA	CellGlobalIdEUTRA-AndUTRA	OPTIONAL,	-- Need ON
+							uarfcn				ARFCN-ValueUTRA,
+							...
+							},
+				gSM			SEQUENCE {
+							bcchCarrier			INTEGER (0..1023),
+							bsic				INTEGER (0..63),
+							cellGlobalIdGERAN	CellGlobalIdGERAN			OPTIONAL,	-- Need ON
+							...
+							},
+				...,
+				nBIoT-r14	SEQUENCE {
+							nbPhysCellId-r14	INTEGER (0..503),
+							nbCellGlobalId-r14	ECGI						OPTIONAL,	-- Need ON
+							nbCarrierFreq-r14	CarrierFreq-NB-r14,
+							...
+							},
+				nr-r15		SEQUENCE {
+							nrPhysCellId-r15	INTEGER (0..1007),
+							nrCellGlobalID-r15	NCGI-r15					OPTIONAL,	-- Need ON
+							nrARFCN-r15			ARFCN-ValueNR-r15,
+							...
+							}
+				},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ReferenceLocation ::= SEQUENCE {
+		threeDlocation			EllipsoidPointWithAltitudeAndUncertaintyEllipsoid,
+		...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-IonosphericModel ::= SEQUENCE {
+	klobucharModel			KlobucharModelParameter		OPTIONAL,	-- Need ON
+	neQuickModel			NeQuickModelParameter		OPTIONAL,	-- Need ON
+	...,
+	[[	klobucharModel2-r16		KlobucharModel2Parameter-r16	OPTIONAL	-- Need ON
+	]]
+
+}
+
+-- ASN1STOP
+-- ASN1START
+
+KlobucharModelParameter ::= SEQUENCE {
+	dataID			BIT STRING (SIZE (2)),
+	alfa0			INTEGER (-128..127),
+	alfa1			INTEGER (-128..127),
+	alfa2			INTEGER (-128..127),
+	alfa3			INTEGER (-128..127),
+	beta0			INTEGER (-128..127),
+	beta1			INTEGER (-128..127),
+	beta2			INTEGER (-128..127),
+	beta3			INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+KlobucharModel2Parameter-r16 ::= SEQUENCE {
+	alfa1-r16			INTEGER (0..1023),
+	alfa2-r16			INTEGER (-128..127),
+	alfa3-r16			INTEGER (0..255),
+	alfa4-r16			INTEGER (0..255),
+	alfa5-r16			INTEGER (0..255),
+	alfa6-r16			INTEGER (-128..127),
+	alfa7-r16			INTEGER (-128..127),
+	alfa8-r16			INTEGER (-128..127),
+	alfa9-r16			INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NeQuickModelParameter ::= SEQUENCE {
+	ai0			INTEGER (0..2047),
+	ai1			INTEGER (-1024..1023),
+	ai2			INTEGER (-8192..8191),
+	ionoStormFlag1	INTEGER (0..1)		OPTIONAL,	-- Need OP
+	ionoStormFlag2	INTEGER (0..1)		OPTIONAL,	-- Need OP
+	ionoStormFlag3	INTEGER (0..1)		OPTIONAL,	-- Need OP
+	ionoStormFlag4	INTEGER (0..1)		OPTIONAL,	-- Need OP
+	ionoStormFlag5	INTEGER (0..1)		OPTIONAL,	-- Need OP
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-EarthOrientationParameters ::= SEQUENCE {
+	teop				INTEGER (0..65535),
+	pmX					INTEGER (-1048576..1048575),
+	pmXdot				INTEGER (-16384..16383),
+	pmY					INTEGER (-1048576..1048575),
+	pmYdot				INTEGER (-16384..16383),
+	deltaUT1			INTEGER (-1073741824..1073741823),
+	deltaUT1dot			INTEGER (-262144..262143),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-ReferenceStationInfo-r15 ::= SEQUENCE {
+	referenceStationID-r15					GNSS-ReferenceStationID-r15,
+	referenceStationIndicator-r15			ENUMERATED {physical, non-physical},
+	antenna-reference-point-ECEF-X-r15		INTEGER (-137438953472..137438953471),
+	antenna-reference-point-ECEF-Y-r15		INTEGER (-137438953472..137438953471),
+	antenna-reference-point-ECEF-Z-r15		INTEGER (-137438953472..137438953471),
+	antennaHeight-r15						INTEGER (0..65535)					OPTIONAL, -- Need ON
+	antennaDescription-r15					AntennaDescription-r15				OPTIONAL, -- Need ON
+	antenna-reference-point-unc-r15			AntennaReferencePointUnc-r15		OPTIONAL, -- Need ON
+	physical-reference-station-info-r15		PhysicalReferenceStationInfo-r15	OPTIONAL, -- Cond NP
+	...,
+	[[
+	equalIntegerAmbiguityLevel-r16			EqualIntegerAmbiguityLevel-r16		OPTIONAL -- Need ON
+	]]
+}
+
+AntennaDescription-r15 ::= SEQUENCE {
+	antennaDescriptor-r15					VisibleString (SIZE (1..256)),
+	antennaSetUpID-r15						ENUMERATED { non-zero }				OPTIONAL, -- Need OP
+	...
+}
+
+AntennaReferencePointUnc-r15 ::= SEQUENCE {
+	uncertainty-X-r15						INTEGER (0..255),
+	confidence-X-r15						INTEGER (0..100),
+	uncertainty-Y-r15						INTEGER (0..255),
+	confidence-Y-r15						INTEGER (0..100),
+	uncertainty-Z-r15						INTEGER (0..255),
+	confidence-Z-r15						INTEGER (0..100),
+	...
+}
+
+PhysicalReferenceStationInfo-r15 ::= SEQUENCE {
+	physicalReferenceStationID-r15			GNSS-ReferenceStationID-r15,
+	physical-ARP-ECEF-X-r15					INTEGER (-137438953472..137438953471),
+	physical-ARP-ECEF-Y-r15					INTEGER (-137438953472..137438953471),
+	physical-ARP-ECEF-Z-r15					INTEGER (-137438953472..137438953471),
+	physical-ARP-unc-r15					AntennaReferencePointUnc-r15		OPTIONAL, -- Need ON
+	...
+}
+
+EqualIntegerAmbiguityLevel-r16 ::= CHOICE {
+	allReferenceStations-r16				NULL,
+	referenceStationList-r16				ReferenceStationList-r16
+}
+
+ReferenceStationList-r16 ::= SEQUENCE (SIZE(1..16)) OF GNSS-ReferenceStationID-r15
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-CommonObservationInfo-r15 ::= SEQUENCE {
+	referenceStationID-r15				GNSS-ReferenceStationID-r15,
+	clockSteeringIndicator-r15			INTEGER (0..3),
+	externalClockIndicator-r15			INTEGER (0..3),
+	smoothingIndicator-r15				BIT STRING (SIZE(1)),
+	smoothingInterval-r15				BIT STRING (SIZE(3)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-AuxiliaryStationData-r15 ::= SEQUENCE {
+	networkID-r15					GNSS-NetworkID-r15,
+	subNetworkID-r15				GNSS-SubNetworkID-r15					OPTIONAL,	-- Need ON
+	master-referenceStationID-r15	GNSS-ReferenceStationID-r15,
+	auxiliaryStationList-r15		AuxiliaryStationList-r15,
+	...
+}
+
+AuxiliaryStationList-r15 ::= SEQUENCE (SIZE (1..32)) OF AuxiliaryStationElement-r15
+
+AuxiliaryStationElement-r15 ::= SEQUENCE {
+	aux-referenceStationID-r15				GNSS-ReferenceStationID-r15,
+	aux-master-delta-latitude-r15			INTEGER (-524288..524287),
+	aux-master-delta-longitude-r15			INTEGER (-1048576..1048575),
+	aux-master-delta-height-r15				INTEGER (-4194304..4194303),
+	aux-ARP-unc-r15							Aux-ARP-Unc-r15					OPTIONAL,	-- Need ON
+	...
+}
+
+Aux-ARP-Unc-r15 ::= SEQUENCE {
+	horizontalUncertainty-r15				INTEGER (0..255),
+	horizontalConfidence-r15				INTEGER (0..100),
+	verticalUncertainty-r15					INTEGER (0..255)				OPTIONAL,	-- Need ON
+	verticalConfidence-r15					INTEGER (0..100)				OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-CorrectionPoints-r16 ::= SEQUENCE {
+	correctionPointSetID-r16			INTEGER (0..16383),
+	correctionPoints-r16				CHOICE {
+		listOfCorrectionPoints-r16			GNSS-SSR-ListOfCorrectionPoints-r16,
+		arrayOfCorrectionPoints-r16			GNSS-SSR-ArrayOfCorrectionPoints-r16
+	},
+	...
+}
+
+GNSS-SSR-ListOfCorrectionPoints-r16 ::= SEQUENCE {
+	referencePointLatitude-r16		INTEGER (-16384..16383),
+	referencePointLongitude-r16		INTEGER (-32768..32767),
+	relativeLocationsList-r16		SEQUENCE (SIZE (0..63)) OF RelativeLocationElement-r16,
+	...
+}
+
+RelativeLocationElement-r16 ::= SEQUENCE {
+	deltaLatitude-r16				INTEGER (-512..511),
+	deltaLongitude-r16				INTEGER (-1024..1023),
+	...
+}
+
+GNSS-SSR-ArrayOfCorrectionPoints-r16 ::=SEQUENCE {
+	referencePointLatitude-r16			INTEGER (-16384..16383),
+	referencePointLongitude-r16			INTEGER (-32768..32767),
+	numberOfStepsLatitude-r16			INTEGER (0..63),
+	numberOfStepsLongitude-r16			INTEGER (0..63),
+	stepOfLatitude-r16					INTEGER (1..511),
+	stepOfLongitude-r16					INTEGER (1..1023),
+	bitmaskOfGrids-r16					BIT STRING (SIZE(64))				OPTIONAL,	-- Need OP
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-TimeModelList ::= SEQUENCE (SIZE (1..15)) OF GNSS-TimeModelElement
+
+GNSS-TimeModelElement ::= SEQUENCE {
+	gnss-TimeModelRefTime		INTEGER (0..65535),
+	tA0							INTEGER (-67108864..67108863),
+	tA1							INTEGER (-4096..4095)					OPTIONAL,	-- Need ON
+	tA2							INTEGER (-64..63)						OPTIONAL,	-- Need ON
+	gnss-TO-ID					INTEGER (1..15),
+	weekNumber					INTEGER (0..8191)						OPTIONAL,	-- Need ON
+	deltaT						INTEGER (-128..127)						OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-DifferentialCorrections ::= SEQUENCE {
+	dgnss-RefTime		INTEGER (0..3599),
+	dgnss-SgnTypeList	DGNSS-SgnTypeList,
+	...
+}
+
+DGNSS-SgnTypeList ::= SEQUENCE (SIZE (1..3)) OF DGNSS-SgnTypeElement
+
+DGNSS-SgnTypeElement ::= SEQUENCE {
+	gnss-SignalID		GNSS-SignalID,
+	gnss-StatusHealth	INTEGER (0..7),
+	dgnss-SatList		DGNSS-SatList,
+	...
+}
+
+DGNSS-SatList ::= SEQUENCE (SIZE (1..64)) OF DGNSS-CorrectionsElement
+
+DGNSS-CorrectionsElement ::= SEQUENCE {
+	svID				SV-ID,
+	iod				BIT STRING (SIZE(11)),
+	udre				INTEGER (0..3),		
+	pseudoRangeCor		INTEGER (-2047..2047),
+	rangeRateCor		INTEGER (-127..127),
+	udreGrowthRate		INTEGER (0..7)			OPTIONAL,	-- Need ON
+	udreValidityTime	INTEGER (0..7)			OPTIONAL,	-- Need ON	
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-NavigationModel ::= SEQUENCE {
+	nonBroadcastIndFlag		INTEGER (0..1),
+	gnss-SatelliteList		GNSS-NavModelSatelliteList,
+	...
+}
+
+GNSS-NavModelSatelliteList ::= SEQUENCE (SIZE(1..64)) OF GNSS-NavModelSatelliteElement
+
+GNSS-NavModelSatelliteElement ::= SEQUENCE {
+	svID				SV-ID,
+	svHealth			BIT STRING (SIZE(8)),	
+	iod					BIT STRING (SIZE(11)),	
+	gnss-ClockModel		GNSS-ClockModel,
+	gnss-OrbitModel		GNSS-OrbitModel,
+	...,
+	[[	svHealthExt-v1240 BIT STRING (SIZE(4))			OPTIONAL		-- Need ON
+	]]
+}
+
+GNSS-ClockModel ::= CHOICE {
+	standardClockModelList	StandardClockModelList,			-- Model-1
+	nav-ClockModel			NAV-ClockModel,					-- Model-2
+	cnav-ClockModel			CNAV-ClockModel,				-- Model-3
+	glonass-ClockModel		GLONASS-ClockModel,				-- Model-4
+	sbas-ClockModel			SBAS-ClockModel,				-- Model-5
+	...,
+	bds-ClockModel-r12		BDS-ClockModel-r12,				-- Model-6
+	bds-ClockModel2-r16		BDS-ClockModel2-r16,			-- Model-7
+	navic-ClockModel-r16	NavIC-ClockModel-r16			-- Model-8
+}
+
+GNSS-OrbitModel ::= CHOICE {
+	keplerianSet			NavModelKeplerianSet,			-- Model-1
+	nav-KeplerianSet		NavModelNAV-KeplerianSet,		-- Model-2
+	cnav-KeplerianSet		NavModelCNAV-KeplerianSet,		-- Model-3
+	glonass-ECEF			NavModel-GLONASS-ECEF,			-- Model-4
+	sbas-ECEF				NavModel-SBAS-ECEF,				-- Model-5
+	...,
+	bds-KeplerianSet-r12	NavModel-BDS-KeplerianSet-r12,	-- Model-6
+	bds-KeplerianSet2-r16	NavModel-BDS-KeplerianSet2-r16,	-- Model-7
+	navic-KeplerianSet-r16	NavModel-NavIC-KeplerianSet-r16	-- Model-8
+}
+
+-- ASN1STOP
+-- ASN1START
+
+StandardClockModelList ::= SEQUENCE (SIZE(1..2)) OF StandardClockModelElement
+
+StandardClockModelElement ::= SEQUENCE {
+	stanClockToc			INTEGER (0..16383),
+	stanClockAF2			INTEGER (-32..31),
+	stanClockAF1			INTEGER (-1048576..1048575),
+	stanClockAF0			INTEGER (-1073741824..1073741823),
+	stanClockTgd			INTEGER (-512..511)				OPTIONAL,	-- Need ON
+	sisa					INTEGER (0..255),
+	stanModelID				INTEGER (0..1)					OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NAV-ClockModel ::= SEQUENCE {
+	navToc			INTEGER (0..37799),
+	navaf2			INTEGER (-128..127),
+	navaf1			INTEGER (-32768..32767),
+	navaf0			INTEGER (-2097152..2097151),
+	navTgd			INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+CNAV-ClockModel ::= SEQUENCE {
+	cnavToc			INTEGER (0..2015),
+	cnavTop			INTEGER (0..2015),
+	cnavURA0		INTEGER (-16..15),
+	cnavURA1		INTEGER (0..7),
+	cnavURA2		INTEGER (0..7),
+	cnavAf2			INTEGER (-512..511),
+	cnavAf1			INTEGER (-524288..524287),
+	cnavAf0			INTEGER (-33554432..33554431),
+	cnavTgd			INTEGER (-4096..4095),
+	cnavISCl1cp		INTEGER (-4096..4095)			OPTIONAL,	-- Need ON
+	cnavISCl1cd		INTEGER (-4096..4095)			OPTIONAL,	-- Need ON
+	cnavISCl1ca		INTEGER (-4096..4095)			OPTIONAL,	-- Need ON
+	cnavISCl2c		INTEGER (-4096..4095)			OPTIONAL,	-- Need ON
+	cnavISCl5i5		INTEGER (-4096..4095)			OPTIONAL,	-- Need ON
+	cnavISCl5q5		INTEGER (-4096..4095)			OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GLONASS-ClockModel ::= SEQUENCE {
+	gloTau			INTEGER (-2097152..2097151),
+	gloGamma		INTEGER (-1024..1023),
+	gloDeltaTau		INTEGER (-16..15)				OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+SBAS-ClockModel ::= SEQUENCE {
+	sbasTo			INTEGER (0..5399),
+	sbasAgfo		INTEGER (-2048..2047),
+	sbasAgf1		INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BDS-ClockModel-r12 ::= SEQUENCE {
+	bdsAODC-r12			INTEGER (0..31),
+	bdsToc-r12			INTEGER (0..131071),
+	bdsA0-r12			INTEGER (-8388608..8388607),
+	bdsA1-r12			INTEGER (-2097152..2097151),
+	bdsA2-r12			INTEGER (-1024..1023),
+	bdsTgd1-r12			INTEGER (-512..511),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+BDS-ClockModel2-r16 ::= SEQUENCE {
+	bdsToc-r16		INTEGER (0..2047),
+	bdsA0-r16		INTEGER (-16777216..16777215),
+	bdsA1-r16		INTEGER (-2097152..2097151),
+	bdsA2-r16		INTEGER (-1024..1023),
+	bdsTgdB1Cp-r16	INTEGER (-2048..2047),
+	bdsIscB1Cd-r16	INTEGER (-2048..2047),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavIC-ClockModel-r16 ::= SEQUENCE {
+	navic-Toc-r16			INTEGER (0..65535),
+	navic-af2-r16			INTEGER (-128..127),
+	navic-af1-r16			INTEGER (-32768..32767),
+	navic-af0-r16			INTEGER (-2097152..2097151),
+	navic-Tgd-r16			INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModelKeplerianSet ::= SEQUENCE {
+	keplerToe		INTEGER (0 .. 16383),
+	keplerW			INTEGER (-2147483648..2147483647),
+	keplerDeltaN	INTEGER (-32768..32767),
+	keplerM0		INTEGER (-2147483648..2147483647),
+	keplerOmegaDot	INTEGER (-8388608.. 8388607),
+	keplerE		INTEGER (0..4294967295),
+	keplerIDot		INTEGER (-8192..8191),
+	keplerAPowerHalf INTEGER (0.. 4294967295),
+	keplerI0		INTEGER (-2147483648..2147483647),
+	keplerOmega0	INTEGER (-2147483648..2147483647),
+	keplerCrs		INTEGER (-32768..32767),
+	keplerCis		INTEGER (-32768..32767),
+	keplerCus		INTEGER (-32768..32767),
+	keplerCrc		INTEGER (-32768..32767),
+	keplerCic		INTEGER (-32768..32767),
+	keplerCuc		INTEGER (-32768..32767),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModelNAV-KeplerianSet ::= SEQUENCE {
+	navURA			INTEGER (0..15),
+	navFitFlag		INTEGER (0..1),
+	navToe			INTEGER (0..37799),
+	navOmega		INTEGER (-2147483648..2147483647),
+	navDeltaN		INTEGER (-32768..32767),
+	navM0			INTEGER (-2147483648..2147483647),
+	navOmegaADot	INTEGER (-8388608..8388607),
+	navE			INTEGER (0..4294967295),
+	navIDot			INTEGER (-8192..8191),
+	navAPowerHalf	INTEGER (0..4294967295),
+	navI0			INTEGER (-2147483648..2147483647),
+	navOmegaA0		INTEGER (-2147483648..2147483647),
+	navCrs			INTEGER (-32768..32767),
+	navCis			INTEGER (-32768..32767),
+	navCus			INTEGER (-32768..32767),
+	navCrc			INTEGER (-32768..32767),
+	navCic			INTEGER (-32768..32767),
+	navCuc			INTEGER (-32768..32767),
+	addNAVparam		SEQUENCE {
+		ephemCodeOnL2	INTEGER (0..3),
+		ephemL2Pflag	INTEGER (0..1),
+		ephemSF1Rsvd	SEQUENCE {
+			reserved1		INTEGER (0..8388607),	-- 23-bit field
+			reserved2		INTEGER (0..16777215),	-- 24-bit field
+			reserved3		INTEGER (0..16777215),	-- 24-bit field
+			reserved4		INTEGER (0..65535)		-- 16-bit field
+		},
+		ephemAODA		INTEGER (0..31)
+	}	OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModelCNAV-KeplerianSet ::= SEQUENCE {
+	cnavTop				INTEGER (0..2015),
+	cnavURAindex		INTEGER (-16..15),
+	cnavDeltaA			INTEGER (-33554432..33554431),
+	cnavAdot			INTEGER (-16777216..16777215),
+	cnavDeltaNo			INTEGER (-65536..65535),
+	cnavDeltaNoDot		INTEGER (-4194304..4194303),
+	cnavMo				INTEGER (-4294967296..4294967295),
+	cnavE				INTEGER (0..8589934591),
+	cnavOmega			INTEGER (-4294967296..4294967295),
+	cnavOMEGA0			INTEGER (-4294967296..4294967295),
+	cnavDeltaOmegaDot	INTEGER (-65536..65535),
+	cnavIo				INTEGER (-4294967296..4294967295),
+	cnavIoDot			INTEGER (-16384..16383),
+	cnavCis				INTEGER (-32768..32767),
+	cnavCic				INTEGER (-32768..32767),
+	cnavCrs				INTEGER (-8388608..8388607),
+	cnavCrc				INTEGER (-8388608..8388607),
+	cnavCus				INTEGER (-1048576..1048575),
+	cnavCuc				INTEGER (-1048576..1048575),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModel-GLONASS-ECEF ::= SEQUENCE {
+	gloEn				INTEGER (0..31),
+	gloP1				BIT STRING (SIZE(2)),
+	gloP2				BOOLEAN,
+	gloM				INTEGER (0..3),
+	gloX				INTEGER (-67108864..67108863),
+	gloXdot				INTEGER (-8388608..8388607),
+	gloXdotdot			INTEGER (-16..15),
+	gloY				INTEGER (-67108864..67108863),
+	gloYdot				INTEGER (-8388608..8388607),
+	gloYdotdot			INTEGER (-16..15),
+	gloZ				INTEGER (-67108864..67108863),
+	gloZdot				INTEGER (-8388608..8388607),
+	gloZdotdot			INTEGER (-16..15),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModel-SBAS-ECEF ::= SEQUENCE {
+	sbasTo				INTEGER (0..5399)					OPTIONAL,	-- Cond ClockModel
+	sbasAccuracy		BIT STRING (SIZE(4)),
+	sbasXg				INTEGER (-536870912..536870911),
+	sbasYg				INTEGER (-536870912..536870911),
+	sbasZg				INTEGER (-16777216..16777215),
+	sbasXgDot			INTEGER (-65536..65535),
+	sbasYgDot			INTEGER (-65536..65535),
+	sbasZgDot			INTEGER (-131072..131071),
+	sbasXgDotDot		INTEGER (-512..511),
+	sbagYgDotDot		INTEGER (-512..511),
+	sbasZgDotDot		INTEGER (-512..511),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModel-BDS-KeplerianSet-r12 ::= SEQUENCE {
+	bdsAODE-r12				INTEGER (0..31),
+	bdsURAI-r12				INTEGER (0..15),
+	bdsToe-r12				INTEGER (0..131071),
+	bdsAPowerHalf-r12		INTEGER (0..4294967295),
+	bdsE-r12				INTEGER (0..4294967295),
+	bdsW-r12				INTEGER (-2147483648..2147483647),
+	bdsDeltaN-r12			INTEGER (-32768..32767),
+	bdsM0-r12				INTEGER (-2147483648..2147483647),
+	bdsOmega0-r12			INTEGER (-2147483648..2147483647),
+	bdsOmegaDot-r12			INTEGER (-8388608..8388607),
+	bdsI0-r12				INTEGER (-2147483648..2147483647),
+	bdsIDot-r12				INTEGER (-8192..8191),
+	bdsCuc-r12				INTEGER (-131072..131071),
+	bdsCus-r12				INTEGER (-131072..131071),
+	bdsCrc-r12				INTEGER (-131072..131071),
+	bdsCrs-r12				INTEGER (-131072..131071),
+	bdsCic-r12				INTEGER (-131072..131071),
+	bdsCis-r12				INTEGER (-131072..131071),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModel-BDS-KeplerianSet2-r16 ::= SEQUENCE {
+	bdsIODE-r16			INTEGER (0..255),
+	bdsToe-r16					INTEGER (0..2047),
+	bdsDeltaA-r16			INTEGER (-33554432..33554431),
+	bdsAdot-r16					INTEGER (-16777216..16777216),
+	bdsDeltaN0-r16			INTEGER (-65536..65535),
+	bdsDeltaN0dot-r16		INTEGER (-4194304..4194303),
+	bdsM0-r16					INTEGER (-4294967296..4294967295),
+	bdsE-r16					INTEGER (0..8589934591),
+	bdsOmega-r16			INTEGER (-4294967296..4294967295),
+	bdsOmega0-r16			INTEGER (-4294967296..4294967295),
+	bdsI0-r16					INTEGER (-4294967296..4294967295),
+	bdsOmegaDot-r16 		INTEGER (-262144..262143),
+	bdsI0Dot-r16			INTEGER (-16384..16383),
+	bdsCuc-r16				INTEGER (-1048576..1048575),
+	bdsCus-r16				INTEGER (-1048576..1048575),
+	bdsCrc-r16				INTEGER (-8388608..8388607),
+	bdsCrs-r16				INTEGER (-8388608..8388607),
+	bdsCic-r16				INTEGER (-32768..32767),
+	bdsCis-r16				INTEGER (-32768..32767),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavModel-NavIC-KeplerianSet-r16	 ::= SEQUENCE {
+	navic-Toe-r16			INTEGER (0..65536),
+	navic-URAI-r16			INTEGER (0..15),
+	navic-W-r16				INTEGER (-2147483648..2147483647),
+	navic-DeltaN-r16		INTEGER (-2097152..2097151),
+	navic-M0-r16			INTEGER (-2147483648..2147483647),
+	navic-OmegaDot-r16		INTEGER (-2147483648..2147483647),
+	navic-E-r16				INTEGER (0..4294967295),
+	navic-IDot-r16			INTEGER (-8192..8191),
+	navic-APowerHalf-r16 	INTEGER (0.. 4294967295),
+	navic-I0-r16			INTEGER (-2147483648..2147483647),
+	navic-Omega0-r16		INTEGER (-2147483648..2147483647),
+	navic-Crs-r16			INTEGER (-32768..32767),
+	navic-Cis-r16			INTEGER (-32768..32767),
+	navic-Cus-r16			INTEGER (-32768..32767),
+	navic-Crc-r16			INTEGER (-32768..32767),
+	navic-Cic-r16			INTEGER (-32768..32767),
+	navic-Cuc-r16			INTEGER (-32768..32767),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RealTimeIntegrity ::= SEQUENCE {
+	gnss-BadSignalList	GNSS-BadSignalList,
+	...
+}
+
+GNSS-BadSignalList ::= SEQUENCE (SIZE(1..64)) OF BadSignalElement
+
+BadSignalElement ::= SEQUENCE {
+	badSVID			SV-ID,						
+	badSignalID		GNSS-SignalIDs	OPTIONAL,	-- Need OP
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-DataBitAssistance ::= SEQUENCE {
+	gnss-TOD				INTEGER (0..3599),
+	gnss-TODfrac			INTEGER (0..999)		OPTIONAL,	-- Need ON
+	gnss-DataBitsSatList	GNSS-DataBitsSatList,
+	...
+}
+
+GNSS-DataBitsSatList ::= SEQUENCE (SIZE(1..64))OF GNSS-DataBitsSatElement
+
+GNSS-DataBitsSatElement ::= SEQUENCE {
+	svID					SV-ID,
+	gnss-DataBitsSgnList	GNSS-DataBitsSgnList,
+	...
+}
+
+GNSS-DataBitsSgnList ::= SEQUENCE (SIZE(1..8)) OF GNSS-DataBitsSgnElement
+
+GNSS-DataBitsSgnElement ::= SEQUENCE {
+	gnss-SignalType			GNSS-SignalID,
+	gnss-DataBits			BIT STRING (SIZE (1..1024)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AcquisitionAssistance ::= SEQUENCE {
+	gnss-SignalID				GNSS-SignalID,
+	gnss-AcquisitionAssistList	GNSS-AcquisitionAssistList,
+	...,
+	confidence-r10				INTEGER (0..100)	OPTIONAL	-- Need ON
+}
+
+GNSS-AcquisitionAssistList ::= SEQUENCE (SIZE(1..64)) OF GNSS-AcquisitionAssistElement
+
+GNSS-AcquisitionAssistElement ::= SEQUENCE {
+	svID						SV-ID,
+	doppler0					INTEGER (-2048..2047),
+	doppler1					INTEGER (0..63),
+	dopplerUncertainty			INTEGER (0..4),
+	codePhase					INTEGER (0..1022),
+	intCodePhase				INTEGER (0..127),
+	codePhaseSearchWindow		INTEGER (0..31),
+	azimuth						INTEGER (0..511),
+	elevation					INTEGER (0..127),		
+	...,
+	codePhase1023				BOOLEAN				OPTIONAL,	-- Need OP
+	dopplerUncertaintyExt-r10	ENUMERATED {	d60,
+												d80,
+												d100,
+												d120,
+												noInformation, ... }	OPTIONAL	-- Need ON
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-Almanac ::= SEQUENCE {
+	weekNumber					INTEGER (0..255)	OPTIONAL,	-- Need ON
+	toa							INTEGER (0..255)	OPTIONAL,	-- Need ON
+	ioda						INTEGER (0..3)		OPTIONAL,	-- Need ON
+	completeAlmanacProvided		BOOLEAN,
+	gnss-AlmanacList			GNSS-AlmanacList,
+	...,
+	[[	toa-ext-v1240			INTEGER (256..1023)	OPTIONAL,	-- Need ON
+		ioda-ext-v1240			INTEGER (4..15)		OPTIONAL	-- Need ON
+	]],
+	[[
+		weekNumber-ext-r16		INTEGER (256..8191)			OPTIONAL,	-- Need ON
+		toa-ext2-r16			INTEGER (256..65535)		OPTIONAL	-- Need ON
+	]]
+}
+
+GNSS-AlmanacList ::= SEQUENCE (SIZE(1..64)) OF GNSS-AlmanacElement
+
+GNSS-AlmanacElement ::= CHOICE {
+	keplerianAlmanacSet				AlmanacKeplerianSet,		-- Model-1
+	keplerianNAV-Almanac			AlmanacNAV-KeplerianSet,	-- Model-2
+	keplerianReducedAlmanac			AlmanacReducedKeplerianSet,	-- Model-3
+	keplerianMidiAlmanac			AlmanacMidiAlmanacSet,		-- Model-4
+	keplerianGLONASS				AlmanacGLONASS-AlmanacSet,	-- Model-5
+	ecef-SBAS-Almanac				AlmanacECEF-SBAS-AlmanacSet,-- Model-6
+	...,
+	keplerianBDS-Almanac-r12		AlmanacBDS-AlmanacSet-r12,	-- Model-7
+	keplerianNavIC-Almanac-r16		AlmanacNavIC-AlmanacSet-r16	-- Model-8
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacKeplerianSet ::= SEQUENCE {
+	svID					SV-ID,
+	kepAlmanacE				INTEGER (0..2047),
+	kepAlmanacDeltaI		INTEGER (-1024..1023),
+	kepAlmanacOmegaDot		INTEGER (-1024..1023),
+	kepSV-StatusINAV		BIT STRING (SIZE (4)),
+	kepSV-StatusFNAV		BIT STRING (SIZE (2))			OPTIONAL,	-- Need ON
+	kepAlmanacAPowerHalf	INTEGER (-4096..4095),
+	kepAlmanacOmega0		INTEGER (-32768..32767),
+	kepAlmanacW				INTEGER (-32768..32767),
+	kepAlmanacM0			INTEGER (-32768..32767),
+	kepAlmanacAF0			INTEGER (-32768..32767),
+	kepAlmanacAF1			INTEGER (-4096..4095),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacNAV-KeplerianSet ::= SEQUENCE {
+	svID					SV-ID,
+	navAlmE					INTEGER (0..65535),
+	navAlmDeltaI			INTEGER (-32768..32767),
+	navAlmOMEGADOT			INTEGER (-32768..32767),
+	navAlmSVHealth			INTEGER (0..255),
+	navAlmSqrtA				INTEGER (0..16777215),
+	navAlmOMEGAo			INTEGER (-8388608..8388607),
+	navAlmOmega				INTEGER (-8388608..8388607),
+	navAlmMo				INTEGER (-8388608..8388607),
+	navAlmaf0				INTEGER (-1024..1023),
+	navAlmaf1				INTEGER (-1024..1023),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacReducedKeplerianSet ::= SEQUENCE {
+	svID					SV-ID,
+	redAlmDeltaA			INTEGER (-128..127),
+	redAlmOmega0			INTEGER (-64..63),
+	redAlmPhi0				INTEGER (-64..63),
+	redAlmL1Health			BOOLEAN,
+	redAlmL2Health			BOOLEAN,
+	redAlmL5Health			BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacMidiAlmanacSet ::= SEQUENCE {
+	svID					SV-ID,
+	midiAlmE				INTEGER (0..2047),
+	midiAlmDeltaI			INTEGER (-1024..1023),
+	midiAlmOmegaDot			INTEGER (-1024..1023),
+	midiAlmSqrtA			INTEGER (0..131071),
+	midiAlmOmega0			INTEGER (-32768..32767),
+	midiAlmOmega			INTEGER (-32768..32767),
+	midiAlmMo				INTEGER (-32768..32767),
+	midiAlmaf0				INTEGER (-1024..1023),
+	midiAlmaf1				INTEGER (-512..511),
+	midiAlmL1Health			BOOLEAN,
+	midiAlmL2Health			BOOLEAN,
+	midiAlmL5Health			BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacGLONASS-AlmanacSet ::= SEQUENCE {
+	gloAlm-NA				INTEGER (1..1461),
+	gloAlmnA				INTEGER (1..24),
+	gloAlmHA				INTEGER (0..31),
+	gloAlmLambdaA			INTEGER (-1048576..1048575),
+	gloAlmtlambdaA			INTEGER (0..2097151),
+	gloAlmDeltaIa			INTEGER (-131072..131071),
+	gloAlmDeltaTA			INTEGER (-2097152..2097151),
+	gloAlmDeltaTdotA		INTEGER (-64..63),
+	gloAlmEpsilonA			INTEGER (0..32767),
+	gloAlmOmegaA			INTEGER (-32768..32767),
+	gloAlmTauA				INTEGER (-512..511),
+	gloAlmCA				INTEGER (0..1),
+	gloAlmMA				BIT STRING (SIZE(2))			OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacECEF-SBAS-AlmanacSet ::= SEQUENCE {
+	sbasAlmDataID			INTEGER (0..3),
+	svID					SV-ID,
+	sbasAlmHealth			BIT STRING (SIZE(8)),
+	sbasAlmXg				INTEGER (-16384..16383),
+	sbasAlmYg				INTEGER (-16384..16383),
+	sbasAlmZg				INTEGER (-256..255),
+	sbasAlmXgdot			INTEGER (-4..3),
+	sbasAlmYgDot			INTEGER (-4..3),
+	sbasAlmZgDot			INTEGER (-8..7),
+	sbasAlmTo				INTEGER (0..2047),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacBDS-AlmanacSet-r12 ::= SEQUENCE {
+	svID					SV-ID,
+	bdsAlmToa-r12			INTEGER (0..255)					OPTIONAL,	-- Cond NotSameForAllSV
+	bdsAlmSqrtA-r12			INTEGER (0..16777215),
+	bdsAlmE-r12				INTEGER (0..131071),
+	bdsAlmW-r12				INTEGER (-8388608..8388607),
+	bdsAlmM0-r12			INTEGER (-8388608..8388607),
+	bdsAlmOmega0-r12		INTEGER (-8388608..8388607),
+	bdsAlmOmegaDot-r12		INTEGER (-65536..65535),
+	bdsAlmDeltaI-r12		INTEGER (-32768..32767),
+	bdsAlmA0-r12			INTEGER (-1024..1023),
+	bdsAlmA1-r12			INTEGER (-1024..1023),
+	bdsSvHealth-r12			BIT STRING (SIZE(9))				OPTIONAL,	-- Cond SV-ID
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+AlmanacNavIC-AlmanacSet-r16	 ::= SEQUENCE {
+	svID-r16					SV-ID,
+	navic-AlmToa-r16			INTEGER (0..65535)			OPTIONAL,	-- Cond NotSameForAllSV		
+	navic-AlmE-r16				INTEGER (0..65535),
+	navic-AlmOMEGADOT-r16		INTEGER (-32768..32767),
+	navic-AlmSqrtA-r16			INTEGER (0..16777215),
+	navic-AlmOMEGAo-r16			INTEGER (-8388608..8388607),
+	navic-AlmOmega-r16			INTEGER (-8388608..8388607),
+	navic-AlmMo-r16				INTEGER (-8388608..8388607),
+	navic-Almaf0-r16			INTEGER (-1024..1023),
+	navic-Almaf1-r16			INTEGER (-1024..1023),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-UTC-Model ::= CHOICE {
+	utcModel1			UTC-ModelSet1,			-- Model-1
+	utcModel2			UTC-ModelSet2,			-- Model-2
+	utcModel3			UTC-ModelSet3,			-- Model-3
+	utcModel4			UTC-ModelSet4,			-- Model-4
+	...,
+	utcModel5-r12		UTC-ModelSet5-r12		-- Model-5
+}
+
+-- ASN1STOP
+-- ASN1START
+
+UTC-ModelSet1 ::= SEQUENCE {
+	gnss-Utc-A1			INTEGER (-8388608..8388607),
+	gnss-Utc-A0			INTEGER (-2147483648..2147483647),
+	gnss-Utc-Tot		INTEGER (0..255),
+	gnss-Utc-WNt		INTEGER (0..255),
+	gnss-Utc-DeltaTls	INTEGER (-128..127),
+	gnss-Utc-WNlsf		INTEGER (0..255),
+	gnss-Utc-DN			INTEGER (-128..127),
+	gnss-Utc-DeltaTlsf	INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+UTC-ModelSet2 ::= SEQUENCE {
+	utcA0				INTEGER (-32768..32767),
+	utcA1				INTEGER (-4096..4095),
+	utcA2				INTEGER (-64..63),
+	utcDeltaTls			INTEGER (-128..127),
+	utcTot				INTEGER (0..65535),
+	utcWNot				INTEGER (0..8191),
+	utcWNlsf			INTEGER (0..255),
+	utcDN				BIT STRING (SIZE(4)),
+	utcDeltaTlsf		INTEGER (-128..127),
+	...,
+	[[
+		utcWNlsf-ext-r16	INTEGER (256..8191)	OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+UTC-ModelSet3 ::= SEQUENCE {
+	nA					INTEGER (1..1461),
+	tauC				INTEGER (-2147483648..2147483647),
+	b1					INTEGER (-1024..1023)					OPTIONAL,	-- Cond GLONASS-M
+	b2					INTEGER (-512..511)						OPTIONAL,	-- Cond GLONASS-M
+	kp					BIT STRING (SIZE(2))					OPTIONAL,	-- Cond GLONASS-M
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+UTC-ModelSet4 ::= SEQUENCE {
+	utcA1wnt			INTEGER (-8388608..8388607),
+	utcA0wnt			INTEGER (-2147483648..2147483647),
+	utcTot				INTEGER (0..255),
+	utcWNt				INTEGER (0..255),
+	utcDeltaTls			INTEGER (-128..127),
+	utcWNlsf			INTEGER (0..255),
+	utcDN				INTEGER (-128..127),
+	utcDeltaTlsf		INTEGER (-128..127),
+	utcStandardID		INTEGER (0..7),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+UTC-ModelSet5-r12 ::= SEQUENCE {
+	utcA0-r12			INTEGER (-2147483648..2147483647),
+	utcA1-r12			INTEGER (-8388608..8388607),
+	utcDeltaTls-r12		INTEGER (-128..127),
+	utcWNlsf-r12		INTEGER (0..255),
+	utcDN-r12			INTEGER (0..255),
+	utcDeltaTlsf-r12	INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AuxiliaryInformation ::= CHOICE {
+	gnss-ID-GPS		GNSS-ID-GPS,
+	gnss-ID-GLONASS	GNSS-ID-GLONASS,
+	...,
+	[[	gnss-ID-BDS-r16		GNSS-ID-BDS-r16
+	]]
+}
+
+GNSS-ID-GPS ::= SEQUENCE	(SIZE(1..64)) OF GNSS-ID-GPS-SatElement
+
+GNSS-ID-GPS-SatElement ::= SEQUENCE {
+	svID				SV-ID,
+	signalsAvailable	GNSS-SignalIDs,
+	...
+}
+
+GNSS-ID-GLONASS ::= SEQUENCE (SIZE(1..64)) OF GNSS-ID-GLONASS-SatElement
+
+GNSS-ID-GLONASS-SatElement ::= SEQUENCE {
+	svID				SV-ID,
+	signalsAvailable	GNSS-SignalIDs,
+	channelNumber		INTEGER (-7..13)		OPTIONAL,		-- Cond FDMA
+	...
+}	
+
+GNSS-ID-BDS-r16 ::= SEQUENCE	(SIZE(1..64)) OF GNSS-ID-BDS-SatElement-r16
+
+GNSS-ID-BDS-SatElement-r16 ::= SEQUENCE {
+	svID-r16			SV-ID,
+	satType-r16				INTEGER (0..3),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BDS-DifferentialCorrections-r12 ::= SEQUENCE {
+	dbds-RefTime-r12			INTEGER (0..3599),
+	bds-SgnTypeList-r12			BDS-SgnTypeList-r12,
+	...
+}
+
+BDS-SgnTypeList-r12 ::= SEQUENCE (SIZE (1..3)) OF BDS-SgnTypeElement-r12
+
+BDS-SgnTypeElement-r12 ::= SEQUENCE {
+	gnss-SignalID				GNSS-SignalID				OPTIONAL,	-- Need ON
+	dbds-CorrectionList-r12		DBDS-CorrectionList-r12,
+	...
+}
+
+DBDS-CorrectionList-r12 ::= SEQUENCE (SIZE (1..64)) OF DBDS-CorrectionElement-r12
+
+DBDS-CorrectionElement-r12 ::= SEQUENCE {
+	svID						SV-ID,
+	bds-UDREI-r12				INTEGER (0..15),
+	bds-RURAI-r12				INTEGER (0..15),
+	bds-ECC-DeltaT-r12			INTEGER (-4096..4095),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BDS-GridModelParameter-r12 ::= SEQUENCE {
+	bds-RefTime-r12			INTEGER (0..3599),
+	gridIonList-r12			GridIonList-r12,
+	...
+}
+
+GridIonList-r12 ::= SEQUENCE (SIZE (1..320)) OF GridIonElement-r12
+
+GridIonElement-r12 ::= SEQUENCE {
+	igp-ID-r12				INTEGER (1..320),
+	dt-r12					INTEGER (0..511),
+	givei-r12				INTEGER (0..15) ,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-Observations-r15 ::= SEQUENCE {
+	epochTime-r15							GNSS-SystemTime,
+	gnss-ObservationList-r15				GNSS-ObservationList-r15,
+	...
+}
+
+GNSS-ObservationList-r15 ::= SEQUENCE (SIZE(1..64)) OF GNSS-RTK-SatelliteDataElement-r15
+
+GNSS-RTK-SatelliteDataElement-r15 ::= SEQUENCE{
+	svID-r15								SV-ID,
+	integer-ms-r15							INTEGER (0..254)				OPTIONAL,	-- Need ON
+	rough-range-r15							INTEGER (0..1023),
+	rough-phase-range-rate-r15				INTEGER (-8192..8191)			OPTIONAL,	-- Need ON
+	gnss-rtk-SatelliteSignalDataList-r15	GNSS-RTK-SatelliteSignalDataList-r15,
+	...
+}
+
+GNSS-RTK-SatelliteSignalDataList-r15 ::= SEQUENCE (SIZE(1..24)) OF
+														GNSS-RTK-SatelliteSignalDataElement-r15
+
+
+GNSS-RTK-SatelliteSignalDataElement-r15 ::= SEQUENCE {
+	gnss-SignalID-r15					GNSS-SignalID,
+	fine-PseudoRange-r15				INTEGER (-524288..524287),
+	fine-PhaseRange-r15					INTEGER (-8388608..8388607),
+	lockTimeIndicator-r15				INTEGER (0..1023),
+	halfCycleAmbiguityIndicator-r15		BIT STRING (SIZE (1)),
+	carrier-to-noise-ratio-r15			INTEGER (0..1023)					OPTIONAL,	-- Need ON
+	fine-PhaseRangeRate-r15				INTEGER (-16384..16383)				OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GLO-RTK-BiasInformation-r15 ::= SEQUENCE{
+	referenceStationID-r15		GNSS-ReferenceStationID-r15,
+	cpbIndicator-r15			BIT STRING (SIZE(1)),
+	l1-ca-cpBias-r15			INTEGER (-32768..32767)			OPTIONAL,		-- Need ON
+	l1-p-cpBias-r15				INTEGER (-32768..32767)			OPTIONAL, 		-- Need ON
+	l2-ca-cpBias-r15			INTEGER (-32768..32767)			OPTIONAL, 		-- Need ON
+	l2-p-cpBias-r15				INTEGER (-32768..32767)			OPTIONAL, 		-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-MAC-CorrectionDifferences-r15 ::= SEQUENCE {
+	networkID-r15						GNSS-NetworkID-r15,
+	subNetworkID-r15					GNSS-SubNetworkID-r15				OPTIONAL,	-- Need ON
+	master-ReferenceStationID-r15		GNSS-ReferenceStationID-r15,
+	l1-r15								GNSS-FrequencyID-r15				OPTIONAL,	-- Need OP
+	l2-r15								GNSS-FrequencyID-r15				OPTIONAL,	-- Need OP
+	rtkCorrectionDifferencesList-r15	RTK-CorrectionDifferencesList-r15,
+	...
+}
+
+RTK-CorrectionDifferencesList-r15 ::= SEQUENCE (SIZE (1..32)) OF
+											RTK-CorrectionDifferencesElement-r15
+
+RTK-CorrectionDifferencesElement-r15 ::= SEQUENCE {
+	epochTime-r15							GNSS-SystemTime,
+	auxiliary-referenceStationID-r15		GNSS-ReferenceStationID-r15,
+	geometric-ionospheric-corrections-differences-r15	
+											Geometric-Ionospheric-Corrections-Differences-r15,
+	...
+}
+
+Geometric-Ionospheric-Corrections-Differences-r15 ::= SEQUENCE (SIZE(1..64)) OF
+									Geometric-Ionospheric-Corrections-Differences-Element-r15
+
+Geometric-Ionospheric-Corrections-Differences-Element-r15 ::= SEQUENCE {
+	svID-r15										SV-ID,
+	ambiguityStatusFlag-r15							INTEGER (0..3),
+	non-synch-count-r15								INTEGER (0..7),
+	geometricCarrierPhaseCorrectionDifference-r15	INTEGER (-65536..65535),
+	iod-r15											BIT STRING (SIZE(11)),
+	ionosphericCarrierPhaseCorrectionDifference-r15	INTEGER (-65536..65535),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-Residuals-r15 ::= SEQUENCE {
+	epochTime-r15						GNSS-SystemTime,
+	referenceStationID-r15				GNSS-ReferenceStationID-r15,
+	n-Refs-r15							INTEGER (0..127),
+	l1-r15								GNSS-FrequencyID-r15				OPTIONAL,	-- Need OP
+	l2-r15								GNSS-FrequencyID-r15				OPTIONAL,	-- Need OP
+	rtk-residuals-list-r15				RTK-Residuals-List-r15,
+	...
+}
+
+RTK-Residuals-List-r15 ::= SEQUENCE (SIZE(1..64)) OF RTK-Residuals-Element-r15
+
+RTK-Residuals-Element-r15 ::= SEQUENCE {
+	svID-r15			SV-ID,
+	s-oc-r15			INTEGER (0..255),
+	s-od-r15			INTEGER (0..511),
+	s-oh-r15			INTEGER (0..63),
+	s-lc-r15			INTEGER (0..1023),
+	s-ld-r15			INTEGER (0..1023),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-FKP-Gradients-r15 ::= SEQUENCE {
+	referenceStationID-r15				GNSS-ReferenceStationID-r15,
+	epochTime-r15						GNSS-SystemTime,
+	l1-r15								GNSS-FrequencyID-r15				OPTIONAL,	-- Need OP
+	l2-r15								GNSS-FrequencyID-r15				OPTIONAL,	-- Need OP
+	fkp-gradients-list-r15				FKP-Gradients-List-r15,
+	...
+}
+
+FKP-Gradients-List-r15 ::= SEQUENCE (SIZE(1..64)) OF FKP-Gradients-Element-r15
+
+FKP-Gradients-Element-r15 ::= SEQUENCE {
+	svID-r15							SV-ID,
+	iod-r15								BIT STRING (SIZE(11)),
+	north-geometric-gradient-r15		INTEGER (-2048..2047),
+	east-geometric-gradient-r15			INTEGER (-2048..2047),
+	north-ionospheric-gradient-r15		INTEGER (-8192..8191),
+	east-ionospheric-gradient-r15		INTEGER (-8192..8191),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-OrbitCorrections-r15 ::= SEQUENCE {
+	epochTime-r15						GNSS-SystemTime,
+	ssrUpdateInterval-r15				INTEGER (0..15),
+	satelliteReferenceDatum-r15			ENUMERATED { itrf, regional, ... },
+	iod-ssr-r15							INTEGER (0..15),
+	ssr-OrbitCorrectionList-r15			SSR-OrbitCorrectionList-r15,
+	...
+}
+
+SSR-OrbitCorrectionList-r15 ::= SEQUENCE (SIZE(1..64)) OF SSR-OrbitCorrectionSatelliteElement-r15
+
+SSR-OrbitCorrectionSatelliteElement-r15 ::= SEQUENCE {
+	svID-r15							SV-ID,
+	iod-r15								BIT STRING (SIZE(11)),
+	delta-radial-r15					INTEGER (-2097152..2097151),
+	delta-AlongTrack-r15				INTEGER (-524288..524287),
+	delta-CrossTrack-r15				INTEGER (-524288..524287),
+	dot-delta-radial-r15				INTEGER (-1048576..1048575)		OPTIONAL, -- Need ON
+	dot-delta-AlongTrack-r15			INTEGER (-262144..262143) 		OPTIONAL, -- Need ON
+	dot-delta-CrossTrack-r15			INTEGER (-262144..262143) 		OPTIONAL, -- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-ClockCorrections-r15 ::= SEQUENCE {
+	epochTime-r15						GNSS-SystemTime,
+	ssrUpdateInterval-r15				INTEGER (0..15),
+	iod-ssr-r15							INTEGER (0..15),
+	ssr-ClockCorrectionList-r15			SSR-ClockCorrectionList-r15,
+	...
+}
+
+SSR-ClockCorrectionList-r15 ::= SEQUENCE (SIZE(1..64)) OF SSR-ClockCorrectionSatelliteElement-r15
+
+SSR-ClockCorrectionSatelliteElement-r15 ::= SEQUENCE {
+	svID-r15							SV-ID,
+	delta-Clock-C0-r15					INTEGER (-2097152..2097151),
+	delta-Clock-C1-r15					INTEGER (-1048576..1048575)			OPTIONAL, -- Need ON
+	delta-Clock-C2-r15					INTEGER (-67108864..67108863)		OPTIONAL, -- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-CodeBias-r15 ::= SEQUENCE {
+	epochTime-r15						GNSS-SystemTime,
+	ssrUpdateInterval-r15				INTEGER (0..15),
+	iod-ssr-r15							INTEGER (0..15),
+	ssr-CodeBiasSatList-r15				SSR-CodeBiasSatList-r15,
+	...
+}
+
+SSR-CodeBiasSatList-r15 ::= SEQUENCE (SIZE(1..64)) OF SSR-CodeBiasSatElement-r15
+
+SSR-CodeBiasSatElement-r15 ::= SEQUENCE {
+	svID-r15							SV-ID,
+	ssr-CodeBiasSignalList-r15			SSR-CodeBiasSignalList-r15,
+	...
+}
+
+SSR-CodeBiasSignalList-r15 ::= SEQUENCE (SIZE(1..16)) OF SSR-CodeBiasSignalElement-r15
+
+SSR-CodeBiasSignalElement-r15 ::= SEQUENCE {
+	signal-and-tracking-mode-ID-r15		GNSS-SignalID,
+	codeBias-r15						INTEGER (-8192..8191),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-URA-r16 ::= SEQUENCE {
+	epochTime-r16						GNSS-SystemTime,
+	ssrUpdateInterval-r16				INTEGER (0..15),
+	iod-ssr-r16							INTEGER (0..15),
+	ssr-URA-SatList-r16					SSR-URA-SatList-r16,
+	...
+}
+
+SSR-URA-SatList-r16 ::= SEQUENCE (SIZE(1..64)) OF SSR-URA-SatElement-r16
+
+SSR-URA-SatElement-r16 ::= SEQUENCE {
+	svID-r16							SV-ID,
+	ssr-URA-r16							BIT STRING (SIZE (6)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-PhaseBias-r16 ::= SEQUENCE {
+	epochTime-r16						GNSS-SystemTime,
+	ssrUpdateInterval-r16				INTEGER (0..15),
+	iod-ssr-r16							INTEGER (0..15),
+	ssr-PhaseBiasSatList-r16			SSR-PhaseBiasSatList-r16,
+	...
+}
+
+SSR-PhaseBiasSatList-r16 ::= SEQUENCE (SIZE(1..64)) OF SSR-PhaseBiasSatElement-r16
+
+SSR-PhaseBiasSatElement-r16 ::= SEQUENCE {
+	svID-r16							SV-ID,
+	ssr-PhaseBiasSignalList-r16			SSR-PhaseBiasSignalList-r16,
+	...
+}
+
+SSR-PhaseBiasSignalList-r16 ::= SEQUENCE (SIZE(1..16)) OF SSR-PhaseBiasSignalElement-r16
+
+SSR-PhaseBiasSignalElement-r16 ::= SEQUENCE {
+	signal-and-tracking-mode-ID-r16		GNSS-SignalID,
+	phaseBias-r16						INTEGER (-16384..16383),
+	phaseDiscontinuityIndicator-r16		INTEGER (0..3),
+	phaseBiasIntegerIndicator-r16		INTEGER (0..3)					OPTIONAL,	-- Need OP
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-STEC-Correction-r16 ::= SEQUENCE {
+	epochTime-r16						GNSS-SystemTime,
+	ssrUpdateInterval-r16				INTEGER (0..15),
+	iod-ssr-r16							INTEGER (0..15),
+	correctionPointSetID-r16			INTEGER (0..16383),
+	stec-SatList-r16					STEC-SatList-r16,
+	...
+}
+
+STEC-SatList-r16 ::= SEQUENCE (SIZE(1..64)) OF STEC-SatElement-r16
+
+STEC-SatElement-r16 ::= SEQUENCE {
+	svID-r16							SV-ID,
+	stecQualityIndicator-r16			BIT STRING (SIZE(6)),
+	stec-C00-r16						INTEGER (-8192..8191),
+	stec-C01-r16						INTEGER (-2048..2047)				OPTIONAL, -- Need ON
+	stec-C10-r16						INTEGER (-2048..2047)				OPTIONAL, -- Need ON
+	stec-C11-r16						INTEGER (-512..511)					OPTIONAL, -- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-GriddedCorrection-r16 ::= SEQUENCE {
+	epochTime-r16								GNSS-SystemTime,
+	ssrUpdateInterval-r16						INTEGER (0..15),
+	iod-ssr-r16									INTEGER (0..15),
+	troposphericDelayQualityIndicator-r16		BIT STRING (SIZE(6))		OPTIONAL, -- Cond Tropo
+	correctionPointSetID-r16					INTEGER (0..16383),
+	gridList-r16								GridList-r16,
+	...
+}
+
+GridList-r16 ::= SEQUENCE (SIZE(1..64)) OF GridElement-r16
+
+GridElement-r16 ::= SEQUENCE {
+	tropospericDelayCorrection-r16	TropospericDelayCorrection-r16	OPTIONAL, -- Need ON
+	stec-ResidualSatList-r16		STEC-ResidualSatList-r16		OPTIONAL, -- Need ON
+	...
+}
+
+TropospericDelayCorrection-r16 ::= SEQUENCE {
+	tropoHydroStaticVerticalDelay-r16		INTEGER (-256..255),
+	tropoWetVerticalDelay-r16				INTEGER (-128..127),
+	...
+}
+
+STEC-ResidualSatList-r16 ::= SEQUENCE (SIZE(1..64)) OF STEC-ResidualSatElement-r16
+
+STEC-ResidualSatElement-r16 ::= SEQUENCE {
+	svID-r16							SV-ID,
+	stecResidualCorrection-r16			CHOICE {
+					b7-r16					INTEGER (-64..63),
+					b16-r16					INTEGER (-32768..32767)
+	},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavIC-DifferentialCorrections-r16 ::= SEQUENCE {
+	navic-RefTOWC-r16				INTEGER (0..50400),
+	navic-CorrectionListAutoNav-r16	NavIC-CorrectionListAutoNav-r16,
+	...
+}
+
+NavIC-CorrectionListAutoNav-r16 ::= SEQUENCE (SIZE (1..64)) OF NavIC-CorrectionElementAutoNav-r16
+
+NavIC-CorrectionElementAutoNav-r16 ::= SEQUENCE {
+	svID						SV-ID,
+	navic-Tod-r16				INTEGER (0..65535),
+	navic-iodec-r16				INTEGER (0..255),
+	navic-UDRAI-r16				INTEGER (-16..15),
+	navic-UDRArateI-r16			INTEGER (-16..15),
+	navic-EDC-r16				NavIC-EDC-r16,
+	navic-CDC-r16				NavIC-CDC-r16,
+	...
+}
+
+NavIC-EDC-r16 ::= SEQUENCE {
+	navic-AlphaEDC-r16				INTEGER (-8192..8191),
+	navic-BetaEDC-r16				INTEGER (-8192..8191),
+	navic-GammaEDC-r16				INTEGER (-16384..16383),
+	navic-AoIcorrection-r16			INTEGER (-2048..2047),
+	navic-AoRAcorrection-r16		INTEGER (-2048..2047),
+	navic-SemiMajorcorrection-r16	INTEGER (-2048..2047),
+	...
+}
+
+NavIC-CDC-r16 ::= SEQUENCE {
+	navic-ClockBiasCorrection-r16	INTEGER (-4096..4095),
+	navic-ClockDriftCorrection-r16	INTEGER (-128..127),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavIC-GridModelParameter-r16 ::= SEQUENCE {
+	navic-RefTOWC-r16	INTEGER (0..50400),
+	regionMasked-r16	INTEGER (0..1023),
+	regionIgpList-r16	RegionIgpList-r16,
+	...
+}
+
+RegionIgpList-r16 ::= SEQUENCE (SIZE (1..16)) OF RegionIgpElement-r16
+
+RegionIgpElement-r16 ::= SEQUENCE {
+	regionID-r16	INTEGER (0..15),
+	givei1-r16		INTEGER (0..15),
+	givd1-r16		INTEGER (0..511),
+	givei2-r16		INTEGER (0..15),
+	givd2-r16		INTEGER (0..511),
+	givei3-r16		INTEGER (0..15),
+	givd3-r16		INTEGER (0..511),
+	givei4-r16		INTEGER (0..15),
+	givd4-r16		INTEGER (0..511),
+	givei5-r16		INTEGER (0..15),
+	givd5-r16		INTEGER (0..511),
+	givei6-r16		INTEGER (0..15),
+	givd6-r16		INTEGER (0..511),
+	givei7-r16		INTEGER (0..15),
+	givd7-r16		INTEGER (0..511),
+	givei8-r16		INTEGER (0..15),
+	givd8-r16		INTEGER (0..511),
+	givei9-r16		INTEGER (0..15),
+	givd9-r16		INTEGER (0..511),
+	givei10-r16		INTEGER (0..15),
+	givd10-r16		INTEGER (0..511),
+	givei11-r16		INTEGER (0..15),
+	givd11-r16		INTEGER (0..511),
+	givei12-r16		INTEGER (0..15),
+	givd12-r16		INTEGER (0..511),
+	givei13-r16		INTEGER (0..15),
+	givd13-r16		INTEGER (0..511),
+	givei14-r16		INTEGER (0..15),
+	givd14-r16		INTEGER (0..511),
+	givei15-r16		INTEGER (0..15),
+	givd15-r16		INTEGER (0..511),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+A-GNSS-RequestAssistanceData ::= SEQUENCE {
+	gnss-CommonAssistDataReq		GNSS-CommonAssistDataReq		OPTIONAL, -- Cond CommonADReq
+	gnss-GenericAssistDataReq		GNSS-GenericAssistDataReq		OPTIONAL, -- Cond GenADReq
+	...,
+	[[
+		gnss-PeriodicAssistDataReq-r15
+									GNSS-PeriodicAssistDataReq-r15	OPTIONAL -- Cond PerADReq
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-CommonAssistDataReq ::= SEQUENCE {
+	gnss-ReferenceTimeReq				GNSS-ReferenceTimeReq				
+																OPTIONAL, -- Cond RefTimeReq
+	gnss-ReferenceLocationReq			GNSS-ReferenceLocationReq			
+																OPTIONAL, -- Cond RefLocReq
+	gnss-IonosphericModelReq			GNSS-IonosphericModelReq				
+																OPTIONAL, -- Cond IonoModReq
+	gnss-EarthOrientationParametersReq	GNSS-EarthOrientationParametersReq	
+																OPTIONAL, -- Cond EOPReq
+	...,
+	[[
+		gnss-RTK-ReferenceStationInfoReq-r15	
+										GNSS-RTK-ReferenceStationInfoReq-r15
+																OPTIONAL, -- Cond ARPReq
+		gnss-RTK-AuxiliaryStationDataReq-r15
+										GNSS-RTK-AuxiliaryStationDataReq-r15
+																OPTIONAL -- Cond AuxARPReq
+	]],
+	[[
+		gnss-SSR-CorrectionPointsReq-r16
+										GNSS-SSR-CorrectionPointsReq-r16
+																OPTIONAL -- Cond PointsReq
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-GenericAssistDataReq ::= SEQUENCE (SIZE (1..16)) OF GNSS-GenericAssistDataReqElement
+
+GNSS-GenericAssistDataReqElement ::= SEQUENCE {
+	gnss-ID							GNSS-ID,
+	sbas-ID							SBAS-ID							OPTIONAL, -- Cond GNSS-ID-SBAS
+	gnss-TimeModelsReq				GNSS-TimeModelListReq			OPTIONAL, -- Cond TimeModReq
+	gnss-DifferentialCorrectionsReq	GNSS-DifferentialCorrectionsReq	OPTIONAL, -- Cond DGNSS-Req
+	gnss-NavigationModelReq			GNSS-NavigationModelReq			OPTIONAL, -- Cond NavModReq
+	gnss-RealTimeIntegrityReq		GNSS-RealTimeIntegrityReq		OPTIONAL, -- Cond RTIReq
+	gnss-DataBitAssistanceReq		GNSS-DataBitAssistanceReq		OPTIONAL, -- Cond DataBitsReq
+	gnss-AcquisitionAssistanceReq	GNSS-AcquisitionAssistanceReq	OPTIONAL, -- Cond AcquAssistReq
+	gnss-AlmanacReq					GNSS-AlmanacReq					OPTIONAL, -- Cond AlmanacReq
+	gnss-UTCModelReq				GNSS-UTC-ModelReq				OPTIONAL, -- Cond UTCModReq
+	gnss-AuxiliaryInformationReq	GNSS-AuxiliaryInformationReq	OPTIONAL, -- Cond AuxInfoReq
+	...,
+	[[
+		bds-DifferentialCorrectionsReq-r12	
+									BDS-DifferentialCorrectionsReq-r12
+																	OPTIONAL,	-- Cond DBDS-Req
+		bds-GridModelReq-r12		BDS-GridModelReq-r12			OPTIONAL	-- Cond BDS-GridModReq
+	]],
+	[[
+		gnss-RTK-ObservationsReq-r15
+									GNSS-RTK-ObservationsReq-r15	OPTIONAL,	-- Cond RTK-OSR-Req
+		glo-RTK-BiasInformationReq-r15	
+									GLO-RTK-BiasInformationReq-r15	OPTIONAL,	-- Cond GLO-CPB-Req
+		gnss-RTK-MAC-CorrectionDifferencesReq-r15
+									GNSS-RTK-MAC-CorrectionDifferencesReq-r15
+																	OPTIONAL,	-- Cond MAC-Req
+		gnss-RTK-ResidualsReq-r15	GNSS-RTK-ResidualsReq-r15		OPTIONAL,	-- Cond Res-Req
+		gnss-RTK-FKP-GradientsReq-r15
+									GNSS-RTK-FKP-GradientsReq-r15	OPTIONAL,	-- Cond FKP-Req
+		gnss-SSR-OrbitCorrectionsReq-r15
+									GNSS-SSR-OrbitCorrectionsReq-r15
+																	OPTIONAL, 	-- Cond OC-Req
+		gnss-SSR-ClockCorrectionsReq-r15
+									GNSS-SSR-ClockCorrectionsReq-r15
+																	OPTIONAL, 	-- Cond CC-Req
+		gnss-SSR-CodeBiasReq-r15	GNSS-SSR-CodeBiasReq-r15		OPTIONAL 	-- Cond CB-Req
+	]],
+	[[
+		gnss-SSR-URA-Req-r16		GNSS-SSR-URA-Req-r16			OPTIONAL,	-- Cond URA-Req
+		gnss-SSR-PhaseBiasReq-r16	GNSS-SSR-PhaseBiasReq-r16		OPTIONAL,	-- Cond PB-Req
+		gnss-SSR-STEC-CorrectionReq-r16
+									GNSS-SSR-STEC-CorrectionReq-r16	OPTIONAL,	-- Cond STEC-Req
+		gnss-SSR-GriddedCorrectionReq-r16	GNSS-SSR-GriddedCorrectionReq-r16
+																	OPTIONAL,	-- Cond Grid-Req
+		navic-DifferentialCorrectionsReq-r16	
+									NavIC-DifferentialCorrectionsReq-r16
+																OPTIONAL,	-- Cond DNavIC-Req
+		navic-GridModelReq-r16		NavIC-GridModelReq-r16		OPTIONAL	-- Cond NavIC-GridModReq
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-PeriodicAssistDataReq-r15 ::= SEQUENCE {
+	gnss-RTK-PeriodicObservationsReq-r15	GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pOSR
+	glo-RTK-PeriodicBiasInformationReq-r15	GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pCPB
+	gnss-RTK-MAC-PeriodicCorrectionDifferencesReq-r15
+											GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pMAC
+	gnss-RTK-PeriodicResidualsReq-r15		GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pRes
+	gnss-RTK-FKP-PeriodicGradientsReq-r15	GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pFKP
+	gnss-SSR-PeriodicOrbitCorrectionsReq-r15
+											GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pOC
+	gnss-SSR-PeriodicClockCorrectionsReq-r15
+											GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pCC
+	gnss-SSR-PeriodicCodeBiasReq-r15		GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pCB
+	...,
+	[[
+	gnss-SSR-PeriodicURA-Req-r16			GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pURA
+	gnss-SSR-PeriodicPhaseBiasReq-r16		GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pPB
+	gnss-SSR-PeriodicSTEC-CorrectionReq-r16	GNSS-PeriodicControlParam-r15	OPTIONAL, -- Cond pSTEC
+	gnss-SSR-PeriodicGriddedCorrectionReq-r16	
+											GNSS-PeriodicControlParam-r15	OPTIONAL  -- Cond pGrid
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ReferenceTimeReq ::= SEQUENCE {
+    	gnss-TimeReqPrefList	SEQUENCE (SIZE (1..8)) OF GNSS-ID,			
+    	gps-TOW-assistReq		BOOLEAN								OPTIONAL, -- Cond gps
+    	notOfLeapSecReq			BOOLEAN								OPTIONAL, -- Cond glonass
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ReferenceLocationReq ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-IonosphericModelReq ::=	SEQUENCE {
+	klobucharModelReq		BIT STRING (SIZE(2))	OPTIONAL,	-- Cond klobuchar
+	neQuickModelReq			NULL					OPTIONAL,	-- Cond	nequick
+	...,
+	[[	klobucharModel2Req-r16 	NULL				OPTIONAL	-- Cond klobuchar2
+	]]
+
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-EarthOrientationParametersReq ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-ReferenceStationInfoReq-r15 ::= SEQUENCE {
+	antennaDescriptionReq-r15			BOOLEAN,
+	antennaHeightReq-r15				BOOLEAN,
+	physicalReferenceStationReq-r15		BOOLEAN,
+	stationID-r15						GNSS-ReferenceStationID-r15		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-AuxiliaryStationDataReq-r15 ::= SEQUENCE {
+	master-referenceStationID-r15		GNSS-ReferenceStationID-r15		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-CorrectionPointsReq-r16 ::=	SEQUENCE {
+	correctionPointSetID-Req-r16			INTEGER (0..16383)			OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-TimeModelListReq ::= SEQUENCE (SIZE(1..15)) OF GNSS-TimeModelElementReq
+
+GNSS-TimeModelElementReq ::= SEQUENCE {
+	gnss-TO-IDsReq	INTEGER (1..15),
+	deltaTreq		BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-DifferentialCorrectionsReq ::=	SEQUENCE {
+	dgnss-SignalsReq			GNSS-SignalIDs,
+	dgnss-ValidityTimeReq		BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-NavigationModelReq ::=	CHOICE {
+	storedNavList		StoredNavListInfo,
+	reqNavList			ReqNavListInfo,
+	...
+}
+
+
+StoredNavListInfo ::= SEQUENCE {
+	gnss-WeekOrDay			INTEGER (0..4095),
+	gnss-Toe				INTEGER (0..255),
+	t-toeLimit				INTEGER (0..15),
+	satListRelatedDataList	SatListRelatedDataList	OPTIONAL,
+	...
+}
+
+SatListRelatedDataList ::= SEQUENCE (SIZE (1..64)) OF SatListRelatedDataElement
+
+SatListRelatedDataElement ::= SEQUENCE {
+	svID				SV-ID,
+    	iod				BIT STRING (SIZE(11)),
+    	clockModelID		INTEGER (1..8)			OPTIONAL,
+     orbitModelID		INTEGER (1..8)			OPTIONAL,			
+    	...
+}
+
+ReqNavListInfo ::=	SEQUENCE {
+	svReqList				BIT STRING (SIZE (64)),
+    	clockModelID-PrefList	SEQUENCE (SIZE (1..8)) OF	INTEGER (1..8)		OPTIONAL,
+	orbitModelID-PrefList	SEQUENCE (SIZE (1..8)) OF	INTEGER (1..8)		OPTIONAL,	
+	addNavparamReq			BOOLEAN				OPTIONAL,	-- Cond orbitModelID-2
+	...
+}
+
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RealTimeIntegrityReq ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-DataBitAssistanceReq ::=	SEQUENCE {
+	gnss-TOD-Req		INTEGER (0..3599),
+	gnss-TOD-FracReq	INTEGER (0..999)		OPTIONAL,
+	dataBitInterval		INTEGER (0..15),
+	gnss-SignalType		GNSS-SignalIDs,
+	gnss-DataBitsReq	GNSS-DataBitsReqSatList	OPTIONAL,
+	...
+}
+
+GNSS-DataBitsReqSatList ::= SEQUENCE (SIZE(1..64)) OF GNSS-DataBitsReqSatElement
+
+GNSS-DataBitsReqSatElement ::= SEQUENCE {
+	svID				SV-ID,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AcquisitionAssistanceReq ::=	SEQUENCE {
+	gnss-SignalID-Req		GNSS-SignalID,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AlmanacReq ::= SEQUENCE {
+	modelID				INTEGER(1..8)	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-UTC-ModelReq ::=	SEQUENCE {
+	modelID				INTEGER(1..8)	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AuxiliaryInformationReq ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BDS-DifferentialCorrectionsReq-r12 ::=	SEQUENCE {
+	dgnss-SignalsReq			GNSS-SignalIDs,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BDS-GridModelReq-r12 ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-ObservationsReq-r15::= 	SEQUENCE {
+	gnss-RTK-SignalsReq-r15				GNSS-SignalIDs,
+	gnss-RTK-Integer-ms-Req-r15			BOOLEAN,
+	gnss-RTK-PhaseRangeRateReq-r15		BOOLEAN,
+	gnss-RTK-CNR-Req-r15				BOOLEAN,
+	stationID-r15						GNSS-ReferenceStationID-r15		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GLO-RTK-BiasInformationReq-r15 ::= 	SEQUENCE {
+	stationID-r15						GNSS-ReferenceStationID-r15		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-MAC-CorrectionDifferencesReq-r15 ::= 	SEQUENCE {
+	master-ReferenceStationID-r15		GNSS-ReferenceStationID-r15				OPTIONAL,
+	aux-ReferenceStationList-r15		AUX-ReferenceStationList-r15			OPTIONAL,
+	linkCombinations-PrefList-r15		GNSS-Link-CombinationsList-r15			OPTIONAL,
+	...
+}
+
+AUX-ReferenceStationList-r15 ::= SEQUENCE (SIZE (1..32)) OF AUX-ReferenceStationID-Element-r15
+
+AUX-ReferenceStationID-Element-r15 ::= SEQUENCE {
+	aux-stationID-r15		GNSS-ReferenceStationID-r15,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-ResidualsReq-r15 ::= 	SEQUENCE {
+	stationID-r15						GNSS-ReferenceStationID-r15		OPTIONAL,
+	linkCombinations-PrefList-r15		GNSS-Link-CombinationsList-r15	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-FKP-GradientsReq-r15 ::= 	SEQUENCE {
+	stationID-r15						GNSS-ReferenceStationID-r15		OPTIONAL,
+	linkCombinations-PrefList-r15		GNSS-Link-CombinationsList-r15	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-OrbitCorrectionsReq-r15 ::= SEQUENCE {
+	storedNavList-r15				GNSS-NavListInfo-r15				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-ClockCorrectionsReq-r15 ::= SEQUENCE {
+	storedNavList-r15				GNSS-NavListInfo-r15				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-CodeBiasReq-r15 ::= SEQUENCE {
+	signal-and-tracking-mode-ID-Map-r15		GNSS-SignalIDs,
+	storedNavList-r15						GNSS-NavListInfo-r15				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-URA-Req-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-PhaseBiasReq-r16 ::= SEQUENCE {
+	signal-and-tracking-mode-ID-Map-r16		GNSS-SignalIDs,
+	storedNavList-r16						GNSS-NavListInfo-r15				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-STEC-CorrectionReq-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-GriddedCorrectionReq-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavIC-DifferentialCorrectionsReq-r16 ::=	SEQUENCE {
+	dgnss-SignalsReq-r16			GNSS-SignalIDs,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavIC-GridModelReq-r16 ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+A-GNSS-ProvideLocationInformation ::= SEQUENCE {
+	gnss-SignalMeasurementInformation	GNSS-SignalMeasurementInformation		OPTIONAL,
+	gnss-LocationInformation			GNSS-LocationInformation				OPTIONAL,
+	gnss-Error							A-GNSS-Error							OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SignalMeasurementInformation ::= SEQUENCE {
+	measurementReferenceTime		MeasurementReferenceTime,
+	gnss-MeasurementList			GNSS-MeasurementList,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+MeasurementReferenceTime ::= SEQUENCE {
+	gnss-TOD-msec		INTEGER (0..3599999),
+	gnss-TOD-frac		INTEGER (0..3999)			OPTIONAL,		
+	gnss-TOD-unc		INTEGER (0..127)			OPTIONAL,		
+	gnss-TimeID			GNSS-ID,
+	networkTime			CHOICE {
+		eUTRA	SEQUENCE {
+				physCellId			INTEGER (0..503),
+				cellGlobalId		CellGlobalIdEUTRA-AndUTRA		OPTIONAL,
+				systemFrameNumber	BIT STRING (SIZE (10)),
+				...
+				},
+		uTRA	SEQUENCE {
+				mode					CHOICE {
+										fdd			SEQUENCE {
+													primary-CPICH-Info	INTEGER (0..511),
+													...
+													},
+										tdd			SEQUENCE {
+													cellParameters		INTEGER (0..127),
+													...
+													}
+										},
+				cellGlobalId			CellGlobalIdEUTRA-AndUTRA		OPTIONAL,
+				referenceSystemFrameNumber
+										INTEGER (0..4095),
+				...
+				},
+		gSM		SEQUENCE {
+				bcchCarrier			INTEGER (0..1023),
+				bsic				INTEGER (0..63),
+				cellGlobalId		CellGlobalIdGERAN					OPTIONAL,
+				referenceFrame		SEQUENCE {
+									referenceFN		INTEGER (0..65535),
+									referenceFNMSB		INTEGER (0..63)		OPTIONAL,
+									...
+									},
+				deltaGNSS-TOD		INTEGER (0 .. 127)		OPTIONAL,
+				...
+				},
+		...,
+		nbIoT-r14
+				SEQUENCE {
+				nbPhysCellId-r14	INTEGER (0..503),
+				nbCellGlobalId-r14	ECGI						OPTIONAL,
+				sfn-r14				BIT STRING (SIZE (10)),
+				hyperSFN-r14		BIT STRING (SIZE (10))		OPTIONAL,
+				...
+				},
+		nr-r15	SEQUENCE {
+				nrPhysCellId-r15	INTEGER (0..1007),
+				nrCellGlobalID-r15	NCGI-r15					OPTIONAL,
+				nr-sfn-r15			BIT STRING (SIZE (10)),
+				...
+				}
+		}		OPTIONAL,												
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-MeasurementList ::= SEQUENCE (SIZE(1..16)) OF GNSS-MeasurementForOneGNSS
+
+GNSS-MeasurementForOneGNSS ::= SEQUENCE {
+	gnss-ID					GNSS-ID,
+	gnss-SgnMeasList		GNSS-SgnMeasList,
+	...
+}
+
+GNSS-SgnMeasList ::= SEQUENCE (SIZE(1..8)) OF GNSS-SgnMeasElement
+
+GNSS-SgnMeasElement ::= SEQUENCE {
+	gnss-SignalID			GNSS-SignalID,
+	gnss-CodePhaseAmbiguity	INTEGER (0..127)		OPTIONAL,
+	gnss-SatMeasList		GNSS-SatMeasList,
+	...
+}
+
+GNSS-SatMeasList ::= SEQUENCE (SIZE(1..64)) OF GNSS-SatMeasElement
+
+GNSS-SatMeasElement ::= SEQUENCE {
+	svID				SV-ID,
+	cNo					INTEGER (0..63),
+	mpathDet			ENUMERATED {notMeasured (0), low (1), medium (2), high (3), ...},
+	carrierQualityInd	INTEGER (0..3)				OPTIONAL,	
+	codePhase			INTEGER (0..2097151),
+	integerCodePhase	INTEGER (0..127)			OPTIONAL,
+	codePhaseRMSError	INTEGER (0..63),						
+	doppler				INTEGER (-32768..32767)	OPTIONAL,
+	adr					INTEGER (0..33554431)		OPTIONAL,
+	...,
+	[[
+		adrMSB-r15			INTEGER (0..15)						OPTIONAL,
+		adrSign-r15			ENUMERATED {positive, negative}		OPTIONAL,
+		adrRMSerror-r15		INTEGER (0..127)					OPTIONAL,
+		delta-codePhase-r15	INTEGER (0..7)						OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-LocationInformation ::= SEQUENCE {
+	measurementReferenceTime		MeasurementReferenceTime,
+	agnss-List						GNSS-ID-Bitmap,					
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+A-GNSS-RequestLocationInformation ::= SEQUENCE {
+	gnss-PositioningInstructions		GNSS-PositioningInstructions,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-PositioningInstructions ::= SEQUENCE {
+	gnss-Methods				GNSS-ID-Bitmap,	
+	fineTimeAssistanceMeasReq	BOOLEAN,
+	adrMeasReq					BOOLEAN,
+	multiFreqMeasReq			BOOLEAN,
+	assistanceAvailability		BOOLEAN,
+	...,
+	[[
+		ha-GNSS-Req-r15			ENUMERATED { true }		OPTIONAL	-- Cond UEB
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+A-GNSS-ProvideCapabilities ::= SEQUENCE {
+	gnss-SupportList			GNSS-SupportList				OPTIONAL,
+	assistanceDataSupportList	AssistanceDataSupportList		OPTIONAL,
+	locationCoordinateTypes		LocationCoordinateTypes			OPTIONAL,
+	velocityTypes				VelocityTypes					OPTIONAL,
+	...,
+	[[ periodicalReportingNotSupported-r14
+								PositioningModes				OPTIONAL,
+		idleStateForMeasurements-r14		
+								ENUMERATED { required }			OPTIONAL
+	]],
+	[[ periodicAssistanceData-r15
+								BIT STRING { solicited 	 (0),
+											 unsolicited (1)	} (SIZE (1..8))		OPTIONAL
+	]]
+}
+
+GNSS-SupportList ::= SEQUENCE (SIZE(1..16)) OF GNSS-SupportElement
+
+GNSS-SupportElement ::= SEQUENCE {
+	gnss-ID							GNSS-ID,
+	sbas-IDs						SBAS-IDs					OPTIONAL,	-- Cond GNSS-ID-SBAS
+	agnss-Modes						PositioningModes,
+	gnss-Signals					GNSS-SignalIDs,
+	fta-MeasSupport					SEQUENCE {
+										cellTime	AccessTypes,
+										mode		PositioningModes,
+										...
+									}							OPTIONAL,	-- Cond fta
+	adr-Support						BOOLEAN,
+	velocityMeasurementSupport		BOOLEAN,
+	...,
+	[[
+		adrEnhancementsSupport-r15	ENUMERATED { true }			OPTIONAL,
+		ha-gnss-Modes-r15			PositioningModes			OPTIONAL
+	]]
+}
+
+AssistanceDataSupportList ::= SEQUENCE {
+	gnss-CommonAssistanceDataSupport	GNSS-CommonAssistanceDataSupport,
+	gnss-GenericAssistanceDataSupport	GNSS-GenericAssistanceDataSupport,
+	...
+}
+
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-CommonAssistanceDataSupport ::= SEQUENCE {
+	gnss-ReferenceTimeSupport				GNSS-ReferenceTimeSupport				
+																OPTIONAL, -- Cond RefTimeSup
+	gnss-ReferenceLocationSupport			GNSS-ReferenceLocationSupport			
+																OPTIONAL, -- Cond RefLocSup
+	gnss-IonosphericModelSupport			GNSS-IonosphericModelSupport				
+																OPTIONAL, -- Cond IonoModSup
+	gnss-EarthOrientationParametersSupport	GNSS-EarthOrientationParametersSupport	
+																OPTIONAL, -- Cond EOPSup
+	...,
+	[[
+		gnss-RTK-ReferenceStationInfoSupport-r15	
+											GNSS-RTK-ReferenceStationInfoSupport-r15
+																OPTIONAL, -- Cond ARPSup
+		gnss-RTK-AuxiliaryStationDataSupport-r15
+											GNSS-RTK-AuxiliaryStationDataSupport-r15
+																OPTIONAL -- Cond AuxARPSup
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ReferenceTimeSupport ::=	SEQUENCE {
+	gnss-SystemTime		GNSS-ID-Bitmap,
+	fta-Support			AccessTypes										OPTIONAL, -- Cond fta
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ReferenceLocationSupport ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-IonosphericModelSupport ::=	SEQUENCE {
+	ionoModel		BIT STRING {	klobuchar	(0),
+									neQuick		(1),
+									klobuchar2-r16	(2) } (SIZE (1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-EarthOrientationParametersSupport ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-ReferenceStationInfoSupport-r15 ::= 	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-AuxiliaryStationDataSupport-r15 ::= 	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-GenericAssistanceDataSupport ::=
+								SEQUENCE (SIZE (1..16)) OF GNSS-GenericAssistDataSupportElement
+
+GNSS-GenericAssistDataSupportElement ::= SEQUENCE {
+	gnss-ID								GNSS-ID,
+	sbas-ID								SBAS-ID						OPTIONAL, -- Cond GNSSIDSBAS
+	gnss-TimeModelsSupport				GNSS-TimeModelListSupport
+																	OPTIONAL, -- Cond TimeModSup
+	gnss-DifferentialCorrectionsSupport	GNSS-DifferentialCorrectionsSupport
+																	OPTIONAL, -- Cond DGNSS-Sup
+	gnss-NavigationModelSupport			GNSS-NavigationModelSupport
+																	OPTIONAL, -- Cond NavModSup
+	gnss-RealTimeIntegritySupport		GNSS-RealTimeIntegritySupport
+																	OPTIONAL, -- Cond RTISup
+	gnss-DataBitAssistanceSupport		GNSS-DataBitAssistanceSupport
+																	OPTIONAL, -- Cond DataBitsSup
+	gnss-AcquisitionAssistanceSupport	GNSS-AcquisitionAssistanceSupport
+																	OPTIONAL, -- Cond AcquAssistSup
+	gnss-AlmanacSupport					GNSS-AlmanacSupport
+																	OPTIONAL, -- Cond AlmanacSup
+	gnss-UTC-ModelSupport				GNSS-UTC-ModelSupport
+																	OPTIONAL, -- Cond UTCModSup
+	gnss-AuxiliaryInformationSupport	GNSS-AuxiliaryInformationSupport
+																	OPTIONAL, -- Cond AuxInfoSup
+	...,
+	[[
+		bds-DifferentialCorrectionsSupport-r12
+										BDS-DifferentialCorrectionsSupport-r12
+																	OPTIONAL, -- Cond DBDS-Sup
+		bds-GridModelSupport-r12		BDS-GridModelSupport-r12
+																	OPTIONAL	-- Cond BDS-GridModSup
+	]],
+	[[
+		gnss-RTK-ObservationsSupport-r15
+										GNSS-RTK-ObservationsSupport-r15
+																	OPTIONAL,	-- Cond RTK-OSR-Sup
+		glo-RTK-BiasInformationSupport-r15	
+										GLO-RTK-BiasInformationSupport-r15	
+																	OPTIONAL,	-- Cond GLO-CPB-Sup
+		gnss-RTK-MAC-CorrectionDifferencesSupport-r15
+										GNSS-RTK-MAC-CorrectionDifferencesSupport-r15
+																	OPTIONAL,	-- Cond MAC-Sup
+		gnss-RTK-ResidualsSupport-r15	GNSS-RTK-ResidualsSupport-r15
+																	OPTIONAL,	-- Cond Res-Sup
+		gnss-RTK-FKP-GradientsSupport-r15
+										GNSS-RTK-FKP-GradientsSupport-r15
+																	OPTIONAL,	-- Cond FKP-Sup
+		gnss-SSR-OrbitCorrectionsSupport-r15
+										GNSS-SSR-OrbitCorrectionsSupport-r15
+																	OPTIONAL, 	-- Cond OC-Sup
+		gnss-SSR-ClockCorrectionsSupport-r15
+										GNSS-SSR-ClockCorrectionsSupport-r15
+																	OPTIONAL, 	-- Cond CC-Sup
+		gnss-SSR-CodeBiasSupport-r15	GNSS-SSR-CodeBiasSupport-r15
+																	OPTIONAL 	-- Cond CB-Sup
+	]],
+	[[
+		gnss-SSR-URA-Support-r16		GNSS-SSR-URA-Support-r16	OPTIONAL,	-- Cond URA-Sup
+		gnss-SSR-PhaseBiasSupport-r16	GNSS-SSR-PhaseBiasSupport-r16		
+																	OPTIONAL,	-- Cond PB-Sup
+		gnss-SSR-STEC-CorrectionSupport-r16
+										GNSS-SSR-STEC-CorrectionSupport-r16
+																	OPTIONAL,	-- Cond STEC-Sup
+		gnss-SSR-GriddedCorrectionSupport-r16
+										GNSS-SSR-GriddedCorrectionSupport-r16
+																	OPTIONAL,	-- Cond Grid-Sup
+		navic-DifferentialCorrectionsSupport-r16
+										NavIC-DifferentialCorrectionsSupport-r16
+																OPTIONAL,	-- Cond DNavIC-Sup
+		navic-GridModelSupport-r16		NavIC-GridModelSupport-r16
+																OPTIONAL	-- Cond NavIC-GridModSup
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-TimeModelListSupport ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-DifferentialCorrectionsSupport ::=	SEQUENCE {
+	gnssSignalIDs			GNSS-SignalIDs,
+	dgnss-ValidityTimeSup	BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-NavigationModelSupport ::= SEQUENCE {
+	clockModel		BIT STRING {	model-1		(0),
+									model-2		(1),
+									model-3		(2),
+									model-4		(3),
+									model-5		(4),
+									model-6		(5),
+									model-7-r16	(6),
+									model-8-r16	(7) } (SIZE (1..8))		OPTIONAL,
+	orbitModel		BIT STRING {	model-1		(0),
+									model-2		(1),
+									model-3		(2),
+									model-4		(3),
+									model-5		(4),
+									model-6		(5),
+									model-7-r16	(6),
+									model-8-r16	(7) } (SIZE (1..8))		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RealTimeIntegritySupport ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-DataBitAssistanceSupport ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AcquisitionAssistanceSupport ::=	SEQUENCE {
+	...,
+	confidenceSupport-r10					ENUMERATED { true }		OPTIONAL,
+	dopplerUncertaintyExtSupport-r10		ENUMERATED { true }		OPTIONAL
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AlmanacSupport ::=	SEQUENCE {
+	almanacModel		BIT STRING {	model-1		(0),
+										model-2		(1),
+										model-3		(2),
+										model-4		(3),
+										model-5		(4),
+										model-6		(5),
+										model-7		(6) } (SIZE (1..8))		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-UTC-ModelSupport ::=	SEQUENCE {
+	utc-Model		BIT STRING {	model-1		(0),
+									model-2		(1),
+									model-3		(2),
+									model-4		(3),
+									model-5		(4) } (SIZE (1..8))		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-AuxiliaryInformationSupport ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BDS-DifferentialCorrectionsSupport-r12 ::=	SEQUENCE {
+	gnssSignalIDs			GNSS-SignalIDs,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BDS-GridModelSupport-r12 ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-ObservationsSupport-r15 ::= 	SEQUENCE {
+	gnssSignalIDs-r15			GNSS-SignalIDs,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GLO-RTK-BiasInformationSupport-r15 ::= 	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-MAC-CorrectionDifferencesSupport-r15 ::= 	SEQUENCE {
+	link-combinations-support-r15		GNSS-Link-CombinationsList-r15,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-ResidualsSupport-r15 ::= 	SEQUENCE {
+	link-combinations-support-r15		GNSS-Link-CombinationsList-r15,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-RTK-FKP-GradientsSupport-r15 ::= 	SEQUENCE {
+	link-combinations-support-r15		GNSS-Link-CombinationsList-r15,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-OrbitCorrectionsSupport-r15 ::= 	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-ClockCorrectionsSupport-r15 ::= 	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-CodeBiasSupport-r15 ::= SEQUENCE {
+	signal-and-tracking-mode-ID-Sup-r15		GNSS-SignalIDs,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-URA-Support-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-PhaseBiasSupport-r16 ::= SEQUENCE {
+	signal-and-tracking-mode-ID-Sup-r16		GNSS-SignalIDs,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-STEC-CorrectionSupport-r16 ::= 	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SSR-GriddedCorrectionSupport-r16 ::= 	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavIC-DifferentialCorrectionsSupport-r16 ::=	SEQUENCE {
+	gnssSignalIDs-r16			GNSS-SignalIDs,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NavIC-GridModelSupport-r16 ::=	SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+A-GNSS-RequestCapabilities ::= SEQUENCE {
+	gnss-SupportListReq				BOOLEAN,
+	assistanceDataSupportListReq	BOOLEAN,
+	locationVelocityTypesReq		BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+A-GNSS-Error ::= CHOICE {
+	locationServerErrorCauses		GNSS-LocationServerErrorCauses,
+	targetDeviceErrorCauses			GNSS-TargetDeviceErrorCauses,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-LocationServerErrorCauses ::= SEQUENCE {
+	cause		ENUMERATED	{	
+					undefined,
+					undeliveredAssistanceDataIsNotSupportedByServer,
+					undeliveredAssistanceDataIsSupportedButCurrentlyNotAvailableByServer,								undeliveredAssistanceDataIsPartlyNotSupportedAndPartlyNotAvailableByServer,
+					...,
+				unconfirmedPeriodicAssistanceDataIsNotSupported-v1510,
+				unconfirmedPeriodicAssistanceDataIsSupportedButCurrentlyNotAvailable-v1510,
+				unconfirmedPeriodicAssistanceDataIsPartlyNotSupportedAndPartlyNotAvailable-v1510,
+				undeliveredPeriodicAssistanceDataIsCurrentlyNotAvailable-v1510
+					},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-TargetDeviceErrorCauses ::= SEQUENCE {
+	cause		ENUMERATED {	undefined,
+								thereWereNotEnoughSatellitesReceived,
+								assistanceDataMissing,
+								notAllRequestedMeasurementsPossible,
+								...
+							},
+	fineTimeAssistanceMeasurementsNotPossible		NULL		OPTIONAL,
+	adrMeasurementsNotPossible						NULL		OPTIONAL,
+	multiFrequencyMeasurementsNotPossible			NULL		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-FrequencyID-r15	::= SEQUENCE {
+	gnss-FrequencyID-r15		INTEGER (0 .. 7),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ID ::= SEQUENCE {
+	gnss-id				ENUMERATED{ gps, sbas, qzss, galileo, glonass, ..., bds, navic-v1610 },
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ID-Bitmap ::= SEQUENCE {
+	gnss-ids			BIT STRING {	gps			(0),	
+										sbas		(1),
+										qzss		(2),
+										galileo		(3),
+										glonass		(4),
+										bds			(5),
+										navic-v1610	(6) } (SIZE (1..16)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-Link-CombinationsList-r15 ::= SEQUENCE (SIZE(1..8)) OF GNSS-Link-Combinations-r15
+
+GNSS-Link-Combinations-r15 ::= SEQUENCE {
+	l1-r15		GNSS-FrequencyID-r15,
+	l2-r15		GNSS-FrequencyID-r15,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-NavListInfo-r15 ::= SEQUENCE (SIZE (1..64)) OF SatListElement-r15
+
+SatListElement-r15 ::= SEQUENCE {
+	svID-r15		SV-ID,
+	iod-r15			BIT STRING (SIZE(11)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-NetworkID-r15 ::= SEQUENCE {
+	networkID-r15					INTEGER (0..255),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-PeriodicControlParam-r15 ::= SEQUENCE {
+	deliveryAmount-r15			INTEGER (1..32),
+	deliveryInterval-r15		INTEGER (1..64),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-ReferenceStationID-r15 ::= SEQUENCE {
+	referenceStationID-r15			INTEGER (0..65535),
+	providerName-r15				VisibleString (SIZE (1..32))		OPTIONAL, -- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SignalID	::= SEQUENCE {
+	gnss-SignalID		INTEGER (0 .. 7),
+	...,
+	[[
+		gnss-SignalID-Ext-r15	INTEGER (8..23)			OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SignalIDs	::= SEQUENCE {
+	gnss-SignalIDs		BIT STRING (SIZE(8)),
+	...,
+	[[
+		gnss-SignalIDs-Ext-r15	BIT STRING (SIZE(16))			OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+GNSS-SubNetworkID-r15 ::= SEQUENCE {
+	subNetworkID-r15				INTEGER (0..15),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+SBAS-ID ::= SEQUENCE {
+	sbas-id				ENUMERATED { waas, egnos, msas, gagan, ...},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+SBAS-IDs ::= SEQUENCE {
+	sbas-IDs		BIT STRING {	waas		(0),	
+									egnos		(1),
+									msas		(2),
+									gagan		(3)	} (SIZE (1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+SV-ID ::= SEQUENCE {
+	satellite-id		INTEGER(0..63),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-ProvideLocationInformation ::= SEQUENCE {
+	ecid-SignalMeasurementInformation	ECID-SignalMeasurementInformation		OPTIONAL,
+	ecid-Error							ECID-Error								OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-SignalMeasurementInformation ::= SEQUENCE {
+	primaryCellMeasuredResults	MeasuredResultsElement	OPTIONAL,
+	measuredResultsList			MeasuredResultsList,
+	...
+}
+
+MeasuredResultsList ::= SEQUENCE (SIZE(1..32)) OF MeasuredResultsElement
+
+MeasuredResultsElement ::= SEQUENCE {
+	physCellId						INTEGER (0..503),
+	cellGlobalId					CellGlobalIdEUTRA-AndUTRA	OPTIONAL,
+	arfcnEUTRA						ARFCN-ValueEUTRA,
+	systemFrameNumber				BIT STRING (SIZE (10))		OPTIONAL,
+	rsrp-Result						INTEGER (0..97)				OPTIONAL,
+	rsrq-Result						INTEGER (0..34)				OPTIONAL,
+	ue-RxTxTimeDiff					INTEGER (0..4095)			OPTIONAL,
+	...,
+	[[	arfcnEUTRA-v9a0				ARFCN-ValueEUTRA-v9a0		OPTIONAL		-- Cond EARFCN-max
+	]],
+	[[	nrsrp-Result-r14			INTEGER (0..113)			OPTIONAL,
+		nrsrq-Result-r14			INTEGER (0..74)				OPTIONAL,
+		carrierFreqOffsetNB-r14		CarrierFreqOffsetNB-r14		OPTIONAL,		-- Cond NB-IoT
+		hyperSFN-r14				BIT STRING (SIZE (10))		OPTIONAL
+	]],
+	[[
+		rsrp-Result-v1470			INTEGER (-17..-1)			OPTIONAL,
+		rsrq-Result-v1470			INTEGER (-30..46)			OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-RequestLocationInformation ::= SEQUENCE {
+	requestedMeasurements		BIT STRING {	rsrpReq		(0),
+												rsrqReq		(1),
+												ueRxTxReq	(2),
+												nrsrpReq-r14	(3),
+												nrsrqReq-r14	(4)} (SIZE(1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-ProvideCapabilities ::= SEQUENCE {
+	ecid-MeasSupported	BIT STRING {	rsrpSup		(0),
+										rsrqSup		(1),
+										ueRxTxSup	(2),
+										nrsrpSup-r14	(3),
+										nrsrqSup-r14	(4)} (SIZE(1..8)),
+	...,
+	[[	ueRxTxSupTDD-r13					ENUMERATED { true }				OPTIONAL
+	]],
+	[[	periodicalReporting-r14				ENUMERATED { supported }		OPTIONAL,
+		triggeredReporting-r14				ENUMERATED { supported }		OPTIONAL,
+		idleStateForMeasurements-r14		ENUMERATED { required }			OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-RequestCapabilities ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-Error ::= CHOICE {
+	locationServerErrorCauses		ECID-LocationServerErrorCauses,
+	targetDeviceErrorCauses			ECID-TargetDeviceErrorCauses,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-LocationServerErrorCauses ::= SEQUENCE {
+	cause		ENUMERATED	{	undefined,								
+								...
+							},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+ECID-TargetDeviceErrorCauses ::= SEQUENCE {
+	cause		ENUMERATED {	undefined,
+								requestedMeasurementNotAvailable,
+								notAllrequestedMeasurementsPossible,
+								...
+							},
+	rsrpMeasurementNotPossible				NULL		OPTIONAL,
+	rsrqMeasurementNotPossible				NULL		OPTIONAL,
+	ueRxTxMeasurementNotPossible			NULL		OPTIONAL,
+	...,
+	[[
+	 nrsrpMeasurementNotPossible-r14		NULL		OPTIONAL,
+	 nrsrqMeasurementNotPossible-r14		NULL		OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-ProvideLocationInformation-r13 ::= SEQUENCE {
+	tbs-MeasurementInformation-r13			TBS-MeasurementInformation-r13		OPTIONAL,
+	tbs-Error-r13							TBS-Error-r13						OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-MeasurementInformation-r13 ::= SEQUENCE {
+	measurementReferenceTime-r13	UTCTime						OPTIONAL,
+	mbs-SgnMeasList-r13				MBS-BeaconMeasList-r13		OPTIONAL,	-- Cond MBS
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+MBS-BeaconMeasList-r13 ::= SEQUENCE (SIZE(1..64)) OF MBS-BeaconMeasElement-r13
+
+MBS-BeaconMeasElement-r13 ::= SEQUENCE {
+	transmitterID-r13				INTEGER (0..32767),
+	codePhase-r13					INTEGER (0..2097151),
+	codePhaseRMSError-r13			INTEGER (0..63),
+	...,
+	[[ rssi-r14						INTEGER (-130..-30)		OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-RequestLocationInformation-r13 ::= SEQUENCE {
+	mbsSgnMeasListReq-r13				BOOLEAN,
+	...,
+	[[	mbsAssistanceAvailability-r14	BOOLEAN							OPTIONAL,	-- Need ON
+		mbsRequestedMeasurements-r14	BIT STRING {
+										rssi		(0)} (SIZE(1..8))	OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-ProvideCapabilities-r13 ::= SEQUENCE {
+	tbs-Modes-r13			BIT STRING {	standalone		(0),
+											ue-assisted		(1),
+											ue-based		(2)} (SIZE (1..8)),
+	...,
+	[[	mbs-AssistanceDataSupportList-r14	MBS-AssistanceDataSupportList-r14		OPTIONAL,
+		periodicalReportingSupported-r14	PositioningModes						OPTIONAL,
+		mbs-ConfigSupport-r14	BIT STRING {	tb1		(0),
+												tb2		(1),
+												tb3		(2),
+												tb4		(3)} (SIZE (1..8))			OPTIONAL,
+		mbs-IdleStateForMeasurements-r14		ENUMERATED { required }				OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+MBS-AssistanceDataSupportList-r14 ::= SEQUENCE {
+	mbs-AcquisitionAssistanceDataSupport-r14	BOOLEAN,
+	mbs-AlmanacAssistanceDataSupport-r14		BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-RequestCapabilities-r13 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-Error-r13 ::= CHOICE {
+	locationServerErrorCauses-r13		TBS-LocationServerErrorCauses-r13,
+	targetDeviceErrorCauses-r13			TBS-TargetDeviceErrorCauses-r13,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-LocationServerErrorCauses-r13 ::= SEQUENCE {
+	cause-r13		ENUMERATED	{	undefined,
+									...,
+									assistanceDataNotSupportedByServer-v1420,
+									assistanceDataSupportedButCurrentlyNotAvailableByServer-v1420
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-TargetDeviceErrorCauses-r13 ::= SEQUENCE {
+	cause-r13		ENUMERATED {	undefined,
+									thereWereNotEnoughMBSBeaconsReceived,
+									...,
+									assistanceDataMissing-v1420
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-ProvideAssistanceData-r14 ::= SEQUENCE {
+	tbs-AssistanceDataList-r14	TBS-AssistanceDataList-r14	OPTIONAL,	-- Need ON
+	tbs-Error-r14				TBS-Error-r13				OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-AssistanceDataList-r14 ::= SEQUENCE {
+	mbs-AssistanceDataList-r14		MBS-AssistanceDataList-r14		OPTIONAL,	-- Need ON
+	...
+}
+
+MBS-AssistanceDataList-r14 ::= SEQUENCE (SIZE (1..maxMBS-r14)) OF MBS-AssistanceDataElement-r14
+
+MBS-AssistanceDataElement-r14 ::= SEQUENCE {
+	mbs-AlmanacAssistance-r14		MBS-AlmanacAssistance-r14			OPTIONAL,	-- Need ON
+	mbs-AcquisitionAssistance-r14	MBS-AcquisitionAssistance-r14		OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+MBS-AlmanacAssistance-r14 ::= SEQUENCE {
+	transmitterID-r14			INTEGER (0..32767),
+	transmitterLatitude-r14		BIT STRING (SIZE (26)),
+	transmitterLongitude-r14	BIT STRING (SIZE (27)),
+	transmitterAltitude-r14		BIT STRING (SIZE (15)),
+	timeCorrection-r14			INTEGER (0..25)		OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+MBS-AcquisitionAssistance-r14 ::= SEQUENCE {
+	transmitterID-r14				INTEGER (0..32767)						OPTIONAL,	-- Need ON
+	mbsConfiguration-r14			ENUMERATED {tb1, tb2, tb3, tb4, ...}	OPTIONAL,	-- Need ON
+	pnCodeIndex-r14					INTEGER (1..128)						OPTIONAL,	-- Need ON
+	freq-r14						INTEGER (919750000..927250000)			OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+TBS-RequestAssistanceData-r14 ::= SEQUENCE {
+	mbs-AlmanacAssistanceDataReq-r14		BOOLEAN,
+	mbs-AcquisitionAssistanceDataReq-r14	BOOLEAN,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-ProvideLocationInformation-r13 ::= SEQUENCE {
+	sensor-MeasurementInformation-r13		Sensor-MeasurementInformation-r13		OPTIONAL,
+	sensor-Error-r13						Sensor-Error-r13						OPTIONAL,
+	...,
+	[[
+	sensor-MotionInformation-r15			Sensor-MotionInformation-r15			OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-MeasurementInformation-r13 ::= SEQUENCE {
+	measurementReferenceTime-r13		UTCTime						OPTIONAL,
+	uncompensatedBarometricPressure-r13	INTEGER (30000..115000)		OPTIONAL, -- Cond Barometer
+	...,
+	[[
+	uncertainty-r14					SEQUENCE {
+									range-r14		INTEGER (0..1000),
+									confidence-r14	INTEGER (1..100)
+									}									OPTIONAL
+	]],
+	[[	adjustment-r16				INTEGER (-5000..5000) 				OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-MotionInformation-r15 ::= SEQUENCE {
+	refTime-r15					DisplacementTimeStamp-r15,
+	displacementInfoList-r15	DisplacementInfoList-r15,
+	...
+}
+
+DisplacementInfoList-r15 ::= SEQUENCE (SIZE (1..128)) OF DisplacementInfoListElement-r15
+
+DisplacementInfoListElement-r15 ::= SEQUENCE {
+	deltaTimeStamp-r15			DeltaTime-r15,
+	displacement-r15			Displacement-r15			OPTIONAL,
+	...
+}
+
+DisplacementTimeStamp-r15 ::= CHOICE {
+	utcTime-r15					UTC-Time-r15,
+	gnssTime-r15				MeasurementReferenceTime,
+	systemFrameNumber-r15		SFN-r15,
+	measurementSFN-r15			INTEGER(-8192..9214),
+	...
+}
+
+DeltaTime-r15 ::= CHOICE {
+	deltaTimeSec-r15			INTEGER (1..16384),
+	deltaTimeSFN-r15			INTEGER (1..4096),
+	...
+}
+
+SFN-r15 ::= SEQUENCE {
+	sfn-r15						BIT STRING (SIZE (10)),
+	hyperSFN-r15				BIT STRING (SIZE (10))		OPTIONAL,
+	...
+}
+
+Displacement-r15 ::= SEQUENCE {
+	bearing-r15					INTEGER (0..3599),
+	bearingUncConfidence-r15	INTEGER (0..100)				OPTIONAL,
+	bearingRef-r15				ENUMERATED { geographicNorth, magneticNorth, local },
+	horizontalDistance-r15		INTEGER (0..8191),
+	horizontalDistanceUnc-r15	INTEGER (0..255)				OPTIONAL,
+	horizontalUncConfidence-r15	INTEGER (0..100)				OPTIONAL,
+	verticalDirection-r15		ENUMERATED{upward, downward}	OPTIONAL,
+	verticalDistance-r15		INTEGER(0..8191)				OPTIONAL,
+	verticalDistanceUnc-r15		INTEGER (0..255)				OPTIONAL,
+	verticalUncConfidence-r15	INTEGER (0..100)				OPTIONAL,
+	...
+}
+
+UTC-Time-r15 ::= SEQUENCE {
+	utcTime-r15					UTCTime,
+	utcTime-ms-r15				INTEGER (0..999),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-RequestLocationInformation-r13 ::= SEQUENCE {
+	uncompensatedBarometricPressureReq-r13		BOOLEAN,
+	...,
+	[[	assistanceAvailability-r14				BOOLEAN		OPTIONAL	-- Need ON
+	]],
+	[[	sensor-MotionInformationReq-r15			BOOLEAN		OPTIONAL	-- Need ON
+	]],
+	[[	adjustmentReq-r16						BOOLEAN		OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-ProvideCapabilities-r13 ::= SEQUENCE {
+	sensor-Modes-r13			BIT STRING {	standalone	(0),
+												ue-assisted	(1),
+												ue-based	(2)} (SIZE (1..8)),
+	...,
+	[[	sensor-AssistanceDataSupportList-r14	Sensor-AssistanceDataSupportList-r14	OPTIONAL,
+		periodicalReportingSupported-r14		PositioningModes						OPTIONAL,
+		idleStateForMeasurements-r14			ENUMERATED { required }					OPTIONAL
+	]],
+	[[	sensor-MotionInformationSup-r15			ENUMERATED { true }						OPTIONAL
+	]],
+	[[	adjustmentSupported-r16					ENUMERATED	{ true }					OPTIONAL
+	]]
+}
+
+Sensor-AssistanceDataSupportList-r14 ::= SEQUENCE {
+	...,
+	[[	validityPeriodSupported-v1520			ENUMERATED { true }		OPTIONAL,
+		validityAreaSupported-v1520				ENUMERATED { true }		OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-RequestCapabilities-r13 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-Error-r13 ::= CHOICE {
+	locationServerErrorCauses-r13		Sensor-LocationServerErrorCauses-r13,
+	targetDeviceErrorCauses-r13			Sensor-TargetDeviceErrorCauses-r13,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-LocationServerErrorCauses-r13 ::= SEQUENCE {
+	cause-r13		ENUMERATED	{	undefined,
+									...,
+									assistanceDataNotSupportedByServer-v1420,
+									assistanceDataSupportedButCurrentlyNotAvailableByServer-v1420
+									},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-TargetDeviceErrorCauses-r13 ::= SEQUENCE {
+	cause-r13			ENUMERATED		{	undefined,
+											...,
+											assistanceDataMissing-v1420
+										},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-ProvideAssistanceData-r14 ::= SEQUENCE {
+	sensor-AssistanceDataList-r14		Sensor-AssistanceDataList-r14		OPTIONAL,	-- Need ON
+	sensor-Error-r14					Sensor-Error-r13					OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-AssistanceDataList-r14::= SEQUENCE {
+	refPressure-r14		INTEGER (-20000..10000),
+	refPosition-r14		EllipsoidPointWithAltitudeAndUncertaintyEllipsoid	OPTIONAL,	-- Need ON
+	refTemperature-r14	INTEGER (-64..63)									OPTIONAL,	-- Need ON
+	...,
+	[[
+	period-v1520		SEQUENCE {
+		pressureValidityPeriod-v1520	PressureValidityPeriod-v1520,
+		referencePressureRate-v1520		INTEGER	(-128..127)					OPTIONAL,	-- Need ON
+		...
+	}																		OPTIONAL,	-- Need ON
+	area-v1520			SEQUENCE {
+		pressureValidityArea-v1520		PressureValidityArea-v1520,
+		gN-pressure-v1520				INTEGER (-1024..1023)				OPTIONAL,	-- Need ON
+		gE-pressure-v1520				INTEGER (-1024..1023)				OPTIONAL,	-- Need ON
+		...
+	}																		OPTIONAL	-- Need ON
+	]]
+}
+
+PressureValidityArea-v1520 ::= SEQUENCE {
+	centerPoint-v1520				Ellipsoid-Point,
+	validityAreaWidth-v1520			INTEGER (1..128),
+	validityAreaHeight-v1520		INTEGER (1..128),
+	...
+}
+
+PressureValidityPeriod-v1520 ::= SEQUENCE {
+	beginTime-v1520					GNSS-SystemTime,
+	beginTimeAlt-v1520				INTEGER (0..2881)						OPTIONAL,	-- Need ON
+	duration-v1520					INTEGER (1..2881),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+Sensor-RequestAssistanceData-r14 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-ProvideLocationInformation-r13 ::= SEQUENCE {
+	wlan-MeasurementInformation-r13		WLAN-MeasurementInformation-r13		OPTIONAL,
+	wlan-Error-r13						WLAN-Error-r13						OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-MeasurementInformation-r13 ::= SEQUENCE {
+	measurementReferenceTime-r13		UTCTime						OPTIONAL,
+	wlan-MeasurementList-r13			WLAN-MeasurementList-r13	OPTIONAL,
+	...
+}
+
+WLAN-MeasurementList-r13 ::= SEQUENCE (SIZE(1..maxWLAN-AP-r13)) OF WLAN-MeasurementElement-r13
+
+WLAN-MeasurementElement-r13 ::= SEQUENCE {
+	wlan-AP-Identifier-r13		WLAN-AP-Identifier-r13,
+	rssi-r13					INTEGER (-127..128)					OPTIONAL,
+	rtt-r13						WLAN-RTT-r13						OPTIONAL,
+	apChannelFrequency-r13		INTEGER (0..256)					OPTIONAL,
+	servingFlag-r13				BOOLEAN								OPTIONAL,
+	...
+}
+
+WLAN-AP-Identifier-r13 ::= SEQUENCE {
+	bssid-r13					OCTET STRING (SIZE (6)),
+	ssid-r13					OCTET STRING (SIZE (1..32))			OPTIONAL,
+	...
+}
+
+WLAN-RTT-r13 ::= SEQUENCE {
+	rttValue-r13	INTEGER (0..16777215),
+	rttUnits-r13	ENUMERATED {	microseconds,
+									hundredsofnanoseconds,
+									tensofnanoseconds,
+									nanoseconds,
+									tenthsofnanoseconds,
+									... },
+	rttAccuracy-r13	INTEGER (0..255)								OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-RequestLocationInformation-r13 ::= SEQUENCE {
+	requestedMeasurements-r13	BIT STRING {	
+											rssi		(0),
+											rtt			(1)} (SIZE(1..8)),
+	...,
+	[[	assistanceAvailability-r14	BOOLEAN			OPTIONAL	-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-ProvideCapabilities-r13 ::= SEQUENCE {
+	wlan-Modes-r13			BIT STRING	{	standalone		(0),
+											ue-assisted		(1),
+											ue-based		(2)}	(SIZE (1..8)),
+	wlan-MeasSupported-r13	BIT STRING	{	
+											rssi-r13		(0),
+											rtt-r13			(1)}	(SIZE(1..8)),
+	...	,
+	[[	wlan-AP-AD-Supported-r14	
+							BIT STRING {	ap-identifier	(0),
+											ap-location		(1)}	(SIZE (1..8))
+																					OPTIONAL,
+		periodicalReportingSupported-r14	PositioningModes						OPTIONAL,
+		idleStateForMeasurements-r14
+							ENUMERATED {	required	}							OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-RequestCapabilities-r13 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-Error-r13 ::= CHOICE {
+	locationServerErrorCauses-r13		WLAN-LocationServerErrorCauses-r13,
+	targetDeviceErrorCauses-r13			WLAN-TargetDeviceErrorCauses-r13,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-LocationServerErrorCauses-r13 ::= SEQUENCE {
+	cause-r13								ENUMERATED	{undefined,	
+														...,
+														requestedADNotAvailable-v1420,
+														notAllrequestedADAvailable-v1420
+														},
+	...,
+	[[	apLocationDataUnavailable-r14		NULL	OPTIONAL		-- Need ON
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-TargetDeviceErrorCauses-r13 ::= SEQUENCE {
+	cause-r13								ENUMERATED {undefined,
+														requestedMeasurementsNotAvailable,
+														notAllrequestedMeasurementsPossible,
+														...
+														},
+	wlan-AP-RSSI-MeasurementNotPossible-r13				NULL		OPTIONAL,
+	wlan-AP-RTT-MeasurementNotPossible-r13				NULL		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-ProvideAssistanceData-r14 ::= SEQUENCE {
+	wlan-DataSet-r14	SEQUENCE (SIZE (1..maxWLAN-DataSets-r14)) OF WLAN-DataSet-r14
+																			OPTIONAL,	-- Need ON
+	wlan-Error-r14		WLAN-Error-r13										OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-DataSet-r14 ::= SEQUENCE {
+	wlan-AP-List-r14				SEQUENCE (SIZE (1..maxWLAN-AP-r14)) OF WLAN-AP-Data-r14,
+	supportedChannels-11a-r14		SupportedChannels-11a-r14		OPTIONAL,	-- Need ON
+	supportedChannels-11bg-r14		SupportedChannels-11bg-r14		OPTIONAL,	-- Need ON
+	...
+}
+
+SupportedChannels-11a-r14 ::= SEQUENCE {
+	ch34-r14		BOOLEAN,
+	ch36-r14		BOOLEAN,
+	ch38-r14		BOOLEAN,
+	ch40-r14		BOOLEAN,
+	ch42-r14		BOOLEAN,
+	ch44-r14		BOOLEAN,
+	ch46-r14		BOOLEAN,
+	ch48-r14		BOOLEAN,
+	ch52-r14		BOOLEAN,
+	ch56-r14		BOOLEAN,
+	ch60-r14		BOOLEAN,
+	ch64-r14		BOOLEAN,
+	ch149-r14		BOOLEAN,
+	ch153-r14		BOOLEAN,
+	ch157-r14		BOOLEAN,
+	ch161-r14		BOOLEAN
+}
+
+SupportedChannels-11bg-r14 ::= SEQUENCE {
+	ch1-r14			BOOLEAN,
+	ch2-r14			BOOLEAN,
+	ch3-r14			BOOLEAN,
+	ch4-r14			BOOLEAN,
+	ch5-r14			BOOLEAN,
+	ch6-r14			BOOLEAN,
+	ch7-r14			BOOLEAN,
+	ch8-r14			BOOLEAN,
+	ch9-r14			BOOLEAN,
+	ch10-r14		BOOLEAN,
+	ch11-r14		BOOLEAN,
+	ch12-r14		BOOLEAN,
+	ch13-r14		BOOLEAN,
+	ch14-r14		BOOLEAN
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-AP-Data-r14 ::= SEQUENCE {
+	wlan-AP-Identifier-r14				WLAN-AP-Identifier-r13,
+	wlan-AP-Location-r14				WLAN-AP-Location-r14		OPTIONAL,	-- Need ON
+	...
+}
+
+WLAN-AP-Location-r14 ::= SEQUENCE {
+	locationDataLCI-r14					LocationDataLCI-r14,
+	...
+}
+
+LocationDataLCI-r14 ::= SEQUENCE {
+	latitudeUncertainty-r14				BIT STRING (SIZE (6)),
+	latitude-r14						BIT STRING (SIZE (34)),
+	longitudeUncertainty-r14			BIT STRING (SIZE (6)),
+	longitude-r14						BIT STRING (SIZE (34)),
+	altitudeUncertainty-r14				BIT STRING (SIZE (6))		OPTIONAL,	-- Need ON
+	altitude-r14						BIT STRING (SIZE (30))		OPTIONAL,	-- Need ON
+	datum-r14							BIT STRING (SIZE (8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+WLAN-RequestAssistanceData-r14 ::= SEQUENCE {
+	requestedAD-r14			BIT STRING {	ap-identifier		(0),
+											ap-location			(1)}	(SIZE (1..8)),
+	visibleAPs-r14			SEQUENCE (SIZE (1..maxVisibleAPs-r14)) OF WLAN-AP-Identifier-r13	OPTIONAL,
+	wlan-AP-StoredData-r14	SEQUENCE (SIZE (1..maxKnownAPs-r14)) OF WLAN-AP-Identifier-r13	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-ProvideLocationInformation-r13 ::= SEQUENCE {
+	bt-MeasurementInformation-r13		BT-MeasurementInformation-r13	OPTIONAL,
+	bt-Error-r13						BT-Error-r13					OPTIONAL,
+	...	
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-MeasurementInformation-r13 ::= SEQUENCE {
+	measurementReferenceTime-r13		UTCTime						OPTIONAL,
+	bt-MeasurementList-r13				BT-MeasurementList-r13		OPTIONAL,
+	...
+}
+
+BT-MeasurementList-r13 ::= SEQUENCE (SIZE(1..maxBT-Beacon-r13)) OF BT-MeasurementElement-r13
+
+
+BT-MeasurementElement-r13 ::= SEQUENCE {
+	btAddr-r13						BIT STRING (SIZE (48)),
+	rssi-r13						INTEGER (-128..127)				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-RequestLocationInformation-r13 ::= SEQUENCE {
+	requestedMeasurements-r13	BIT STRING {	
+											rssi		(0)} (SIZE(1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-ProvideCapabilities-r13 ::= SEQUENCE {
+	bt-Modes-r13			BIT STRING {	standalone		(0),
+											ue-assisted		(1)}	(SIZE (1..8)),
+	bt-MeasSupported-r13	BIT STRING {	rssi-r13		(0)}	(SIZE (1..8)),
+	...,
+	[[
+	idleStateForMeasurements-r14
+							ENUMERATED {	required	}							OPTIONAL,
+	periodicalReportingSupported-r14	
+							PositioningModes										OPTIONAL
+	]]
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-RequestCapabilities-r13 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-Error-r13 ::= CHOICE {
+	locationServerErrorCauses-r13		BT-LocationServerErrorCauses-r13,
+	targetDeviceErrorCauses-r13			BT-TargetDeviceErrorCauses-r13,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-LocationServerErrorCauses-r13 ::= SEQUENCE {
+	cause-r13								ENUMERATED	{undefined,	...},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+BT-TargetDeviceErrorCauses-r13 ::= SEQUENCE {
+	cause-r13								ENUMERATED {undefined,
+														requestedMeasurementsNotAvailable,
+														notAllrequestedMeasurementsPossible,
+														...
+														},
+	bt-Beacon-rssiMeasurementNotPossible-r13	NULL		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-UL-ProvideCapabilities-r16 ::= SEQUENCE {
+	nr-UL-SRS-Capability-r16		NR-UL-SRS-Capability-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-UL-RequestCapabilities-r16 ::= SEQUENCE {
+	...
+}
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-ProvideLocationInformation-r16 ::= SEQUENCE {
+	nr-ECID-SignalMeasurementInformation-r16	NR-ECID-SignalMeasurementInformation-r16 OPTIONAL,
+	nr-ECID-Error-r16							NR-ECID-Error-r16						 OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-SignalMeasurementInformation-r16 ::= SEQUENCE {
+	nr-PrimaryCellMeasuredResults-r16	NR-MeasuredResultsElement-r16,
+	nr-MeasuredResultsList-r16			NR-MeasuredResultsList-r16				OPTIONAL,
+	...
+}
+
+NR-MeasuredResultsList-r16 ::= SEQUENCE (SIZE(1..32)) OF NR-MeasuredResultsElement-r16
+
+NR-MeasuredResultsElement-r16 ::= SEQUENCE {
+	nr-PhysCellID-r16				NR-PhysCellID-r16,
+	nr-ARFCN-r16					CHOICE {
+		ssb-ARFCN-r16					ARFCN-ValueNR-r15,
+		csi-RS-pointA-r16				ARFCN-ValueNR-r15
+	},
+	nr-CellGlobalID-r16				NCGI-r15									OPTIONAL,
+	systemFrameNumber-r16			BIT STRING (SIZE (10))						OPTIONAL,	resultsSSB-Cell-r16				MeasQuantityResults-r16						OPTIONAL,
+	resultsCSI-RS-Cell-r16			MeasQuantityResults-r16						OPTIONAL,
+	resultsSSB-Indexes-r16			ResultsPerSSB-IndexList-r16					OPTIONAL,
+	resultsCSI-RS-Indexes-r16		ResultsPerCSI-RS-IndexList-r16				OPTIONAL,
+	...
+}
+
+MeasQuantityResults-r16 ::= SEQUENCE {
+	nr-RSRP-r16						INTEGER (0..127)							OPTIONAL,
+	nr-RSRQ-r16						INTEGER (0..127)							OPTIONAL
+}
+
+ResultsPerSSB-IndexList-r16::= SEQUENCE (SIZE (1..64)) OF ResultsPerSSB-Index-r16
+
+ResultsPerSSB-Index-r16 ::= SEQUENCE {
+	ssb-Index-r16					INTEGER (0..63),
+	ssb-Results-r16					MeasQuantityResults-r16
+}
+
+ResultsPerCSI-RS-IndexList-r16::= SEQUENCE (SIZE (1..64)) OF ResultsPerCSI-RS-Index-r16
+
+ResultsPerCSI-RS-Index-r16 ::= SEQUENCE {
+	csi-RS-Index-r16				INTEGER (0..95),
+	csi-RS-Results-r16				MeasQuantityResults-r16
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-RequestLocationInformation-r16 ::= SEQUENCE {
+	requestedMeasurements-r16		BIT STRING {	ssrsrpReq		(0),
+													ssrsrqReq		(1),
+													csirsrpReq		(2),				
+													csirsrqReq		(3)} (SIZE(1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-ProvideCapabilities-r16 ::= SEQUENCE {
+	nr-ECID-MeasSupported-r16		BIT STRING {	ssrsrpSup		(0),
+													ssrsrqSup		(1),
+													csirsrpSup		(2),
+													csirsrqSup		(3)} (SIZE(1..8)),
+	periodicalReporting-r16		ENUMERATED { supported }							OPTIONAL,
+	triggeredReporting-r16		ENUMERATED { supported }							OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-RequestCapabilities-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-Error-r16 ::= CHOICE {
+	locationServerErrorCauses-r16		NR-ECID-LocationServerErrorCauses-r16,
+	targetDeviceErrorCauses-r16			NR-ECID-TargetDeviceErrorCauses-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-LocationServerErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED	{	undefined,
+									...
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-ECID-TargetDeviceErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED {	undefined,
+									requestedMeasurementNotAvailable,
+									notAllrequestedMeasurementsPossible,
+									...
+								},
+	ss-RSRPMeasurementNotPossible-r16				NULL		OPTIONAL,
+	ss-RSRQMeasurementNotPossible-r16				NULL		OPTIONAL,
+	csi-RSRPMeasurementNotPossible-r16				NULL		OPTIONAL,
+	csi-RSRQMeasurementNotPossible-r16				NULL		OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-ProvideAssistanceData-r16 ::= SEQUENCE {
+	nr-DL-PRS-AssistanceData-r16		NR-DL-PRS-AssistanceData-r16		OPTIONAL,	-- Need ON
+	nr-SelectedDL-PRS-IndexList-r16		NR-SelectedDL-PRS-IndexList-r16 	OPTIONAL,	-- Need ON
+	nr-PositionCalculationAssistance-r16
+										NR-PositionCalculationAssistance-r16
+																			OPTIONAL, 	-- Cond UEB
+	nr-DL-TDOA-Error-r16				NR-DL-TDOA-Error-r16				OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-RequestAssistanceData-r16 ::= SEQUENCE {
+	nr-PhysCellID-r16				NR-PhysCellID-r16							OPTIONAL,
+	nr-AdType-r16					BIT STRING {	dl-prs 	(0),
+													posCalc (1) } (SIZE (1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-ProvideLocationInformation-r16 ::= SEQUENCE {
+	nr-DL-TDOA-SignalMeasurementInformation-r16	
+											NR-DL-TDOA-SignalMeasurementInformation-r16
+																			OPTIONAL,
+	nr-dl-tdoa-LocationInformation-r16		NR-DL-TDOA-LocationInformation-r16
+																			OPTIONAL,
+	nr-DL-TDOA-Error-r16					NR-DL-TDOA-Error-r16			OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-SignalMeasurementInformation-r16 ::= SEQUENCE {
+	dl-PRS-ReferenceInfo-r16		DL-PRS-ID-Info-r16,
+	nr-DL-TDOA-MeasList-r16			NR-DL-TDOA-MeasList-r16,
+	...
+}
+
+NR-DL-TDOA-MeasList-r16 ::= SEQUENCE (SIZE(1..nrMaxTRPs-r16)) OF NR-DL-TDOA-MeasElement-r16
+
+NR-DL-TDOA-MeasElement-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16					INTEGER (0..255),
+	nr-PhysCellID-r16				NR-PhysCellID-r16								OPTIONAL,
+	nr-CellGlobalID-r16				NCGI-r15										OPTIONAL,
+	nr-ARFCN-r16					ARFCN-ValueNR-r15								OPTIONAL,
+	nr-DL-PRS-ResourceID-r16		NR-DL-PRS-ResourceID-r16	 					OPTIONAL,
+	nr-DL-PRS-ResourceSetID-r16		NR-DL-PRS-ResourceSetID-r16						OPTIONAL,
+	nr-TimeStamp-r16				NR-TimeStamp-r16,
+	nr-RSTD-r16						CHOICE {
+			k0-r16						INTEGER (0..1970049),
+			k1-r16						INTEGER (0..985025),
+			k2-r16						INTEGER (0..492513),
+			k3-r16						INTEGER (0..246257),
+			k4-r16						INTEGER (0..123129),
+			k5-r16						INTEGER (0..61565),
+			...
+	},
+	nr-AdditionalPathList-r16		NR-AdditionalPathList-r16						OPTIONAL,
+	nr-TimingQuality-r16			NR-TimingQuality-r16,
+	nr-DL-PRS-RSRP-Result-r16		INTEGER (0..126)								OPTIONAL,
+	nr-DL-TDOA-AdditionalMeasurements-r16
+									NR-DL-TDOA-AdditionalMeasurements-r16			OPTIONAL,
+	...
+}
+
+NR-DL-TDOA-AdditionalMeasurements-r16 ::= SEQUENCE (SIZE (1..3)) OF
+													NR-DL-TDOA-AdditionalMeasurementElement-r16
+
+NR-DL-TDOA-AdditionalMeasurementElement-r16 ::= SEQUENCE {
+	nr-DL-PRS-ResourceID-r16		NR-DL-PRS-ResourceID-r16	 					OPTIONAL,
+	nr-DL-PRS-ResourceSetID-r16		NR-DL-PRS-ResourceSetID-r16 					OPTIONAL,
+	nr-TimeStamp-r16				NR-TimeStamp-r16,
+	nr-RSTD-ResultDiff-r16			CHOICE {
+			k0-r16						INTEGER (0..8191),
+			k1-r16						INTEGER (0..4095),
+			k2-r16						INTEGER (0..2047),
+			k3-r16						INTEGER (0..1023),
+			k4-r16						INTEGER (0..511),
+			k5-r16						INTEGER (0..255),
+			...
+	},
+	nr-TimingQuality-r16			NR-TimingQuality-r16,
+	nr-DL-PRS-RSRP-ResultDiff-r16	INTEGER (0..61)									OPTIONAL,
+	nr-AdditionalPathList-r16		NR-AdditionalPathList-r16						OPTIONAL,
+...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-LocationInformation-r16 ::= SEQUENCE {
+	measurementReferenceTime-r16	CHOICE {
+			systemFrameNumber-r16			NR-TimeStamp-r16,
+			utc-time-r16					UTCTime,
+			...
+			}															OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-RequestLocationInformation-r16 ::= SEQUENCE {
+	nr-DL-PRS-RstdMeasurementInfoRequest-r16	ENUMERATED { true }				OPTIONAL,-- Need ON
+	nr-RequestedMeasurements-r16				BIT STRING { prsrsrpReq (0) } (SIZE(1..8)),
+	nr-AssistanceAvailability-r16				BOOLEAN,
+	nr-DL-TDOA-ReportConfig-r16					NR-DL-TDOA-ReportConfig-r16		OPTIONAL, -- Need ON
+	additionalPaths-r16							ENUMERATED { requested }		OPTIONAL, -- Need ON
+	...
+}
+
+NR-DL-TDOA-ReportConfig-r16 ::= SEQUENCE {
+	maxDL-PRS-RSTD-MeasurementsPerTRPPair-r16	INTEGER (1..4)					OPTIONAL, -- Need ON
+	timingReportingGranularityFactor-r16 		INTEGER (0..5)					OPTIONAL, -- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-ProvideCapabilities-r16 ::= SEQUENCE {
+	nr-DL-TDOA-Mode-r16						PositioningModes,
+	nr-DL-TDOA-PRS-Capability-r16			NR-DL-PRS-ResourcesCapability-r16,
+	nr-DL-TDOA-MeasurementCapability-r16	NR-DL-TDOA-MeasurementCapability-r16,
+	nr-DL-PRS-QCL-ProcessingCapability-r16	NR-DL-PRS-QCL-ProcessingCapability-r16,
+	nr-DL-PRS-ProcessingCapability-r16		NR-DL-PRS-ProcessingCapability-r16,
+	additionalPathsReport-r16				ENUMERATED { supported }					OPTIONAL,
+	periodicalReporting-r16					PositioningModes							OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-MeasurementCapability-r16 ::= SEQUENCE {
+	dl-RSTD-MeasurementPerPairOfTRP-FR1-r16			INTEGER (1..4),
+	dl-RSTD-MeasurementPerPairOfTRP-FR2-r16			INTEGER (1..4),
+	supportOfDL-PRS-RSRP-MeasFR1-r16				ENUMERATED { supported}	OPTIONAL,
+	supportOfDL-PRS-RSRP-MeasFR2-r16				ENUMERATED { supported}	OPTIONAL,
+	...
+}
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-RequestCapabilities-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-Error-r16 ::= CHOICE {
+	locationServerErrorCauses-r16		NR-DL-TDOA-LocationServerErrorCauses-r16,
+	targetDeviceErrorCauses-r16			NR-DL-TDOA-TargetDeviceErrorCauses-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-LocationServerErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED	{	undefined,
+									assistanceDataNotSupportedByServer,
+									assistanceDataSupportedButCurrentlyNotAvailableByServer,
+									notProvidedAssistanceDataNotSupportedByServer,
+									...
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-TDOA-TargetDeviceErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED {	undefined,
+									assistance-data-missing,
+									unableToMeasureAnyTRP,
+									attemptedButUnableToMeasureSomeNeighbourTRPs,
+									thereWereNotEnoughSignalsReceivedForUeBasedDL-TDOA,
+									locationCalculationAssistanceDataMissing,
+									...
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-ProvideAssistanceData-r16 ::= SEQUENCE {
+	nr-DL-PRS-AssistanceData-r16		NR-DL-PRS-AssistanceData-r16		OPTIONAL,	-- Need ON
+	nr-SelectedDL-PRS-IndexList-r16		NR-SelectedDL-PRS-IndexList-r16		OPTIONAL,	-- Need ON
+	nr-PositionCalculationAssistance-r16
+										NR-PositionCalculationAssistance-r16
+																			OPTIONAL, -- Cond UEB
+	nr-DL-AoD-Error-r16					NR-DL-AoD-Error-r16					OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-RequestAssistanceData-r16 ::= SEQUENCE {
+	nr-PhysCellID-r16				NR-PhysCellID-r16						OPTIONAL,
+	nr-AdType-r16					BIT STRING { 	dl-prs 	(0),
+													posCalc (1) } (SIZE (1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-ProvideLocationInformation-r16 ::= SEQUENCE {
+	nr-DL-AoD-SignalMeasurementInformation-r16
+											NR-DL-AoD-SignalMeasurementInformation-r16
+																			OPTIONAL,
+	nr-dl-AoD-LocationInformation-r16		NR-DL-AoD-LocationInformation-r16
+																			OPTIONAL,
+	nr-DL-AoD-Error-r16						NR-DL-AoD-Error-r16				OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-SignalMeasurementInformation-r16 ::= SEQUENCE {
+	nr-DL-AoD-MeasList-r16			NR-DL-AoD-MeasList-r16,
+	...
+}
+
+NR-DL-AoD-MeasList-r16 ::= SEQUENCE (SIZE(1..nrMaxTRPs-r16)) OF NR-DL-AoD-MeasElement-r16
+
+NR-DL-AoD-MeasElement-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16					INTEGER (0..255),
+	nr-PhysCellID-r16				NR-PhysCellID-r16						OPTIONAL,
+	nr-CellGlobalID-r16				NCGI-r15								OPTIONAL,
+	nr-ARFCN-r16					ARFCN-ValueNR-r15						OPTIONAL,
+	nr-DL-PRS-ResourceID-r16		NR-DL-PRS-ResourceID-r16	 			OPTIONAL,
+	nr-DL-PRS-ResourceSetID-r16		NR-DL-PRS-ResourceSetID-r16 			OPTIONAL,
+	nr-TimeStamp-r16				NR-TimeStamp-r16,
+	nr-DL-PRS-RSRP-Result-r16		INTEGER (0..126),
+	nr-DL-PRS-RxBeamIndex-r16		INTEGER (1..8)							OPTIONAL,
+	nr-DL-AoD-AdditionalMeasurements-r16
+									NR-DL-AoD-AdditionalMeasurements-r16	OPTIONAL,
+	...
+}
+
+NR-DL-AoD-AdditionalMeasurements-r16 ::= SEQUENCE (SIZE (1..7)) OF
+													NR-DL-AoD-AdditionalMeasurementElement-r16
+
+NR-DL-AoD-AdditionalMeasurementElement-r16 ::= SEQUENCE {
+	nr-DL-PRS-ResourceID-r16		NR-DL-PRS-ResourceID-r16	 			OPTIONAL,
+	nr-DL-PRS-ResourceSetID-r16		NR-DL-PRS-ResourceSetID-r16 			OPTIONAL,
+	nr-TimeStamp-r16				NR-TimeStamp-r16,
+	nr-DL-PRS-RSRP-ResultDiff-r16	INTEGER (0..30),
+	nr-DL-PRS-RxBeamIndex-r16		INTEGER (1..8)							OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-LocationInformation-r16 ::= SEQUENCE {
+	measurementReferenceTime-r16	CHOICE {
+			sfn-time-r16					NR-TimeStamp-r16,
+			utc-time-r16					UTCTime,
+			...
+			}															OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-RequestLocationInformation-r16 ::= SEQUENCE {
+	nr-AssistanceAvailability-r16				BOOLEAN,
+	nr-DL-AoD-ReportConfig-r16					NR-DL-AoD-ReportConfig-r16,
+	...
+}
+
+NR-DL-AoD-ReportConfig-r16 ::= SEQUENCE {
+	maxDL-PRS-RSRP-MeasurementsPerTRP-r16		INTEGER (1..8)			OPTIONAL, -- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-ProvideCapabilities-r16 ::= SEQUENCE {
+	nr-DL-AoD-Mode-r16						PositioningModes,
+	nr-DL-AoD-PRS-Capability-r16			NR-DL-PRS-ResourcesCapability-r16,
+	nr-DL-AoD-MeasurementCapability-r16		NR-DL-AoD-MeasurementCapability-r16,
+	nr-DL-PRS-QCL-ProcessingCapability-r16	NR-DL-PRS-QCL-ProcessingCapability-r16,
+	nr-DL-PRS-ProcessingCapability-r16		NR-DL-PRS-ProcessingCapability-r16,
+	periodicalReporting-r16					PositioningModes						OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-MeasurementCapability-r16 ::= SEQUENCE {
+	maxDL-PRS-RSRP-MeasurementFR1-r16		INTEGER (1..8),
+	maxDL-PRS-RSRP-MeasurementFR2-r16		INTEGER (1..8),
+	dl-AoD-MeasCapabilityBandList-r16		SEQUENCE (SIZE (1..nrMaxBands-r16)) OF
+													DL-AoD-MeasCapabilityPerBand-r16,
+	...
+}
+
+DL-AoD-MeasCapabilityPerBand-r16 ::= SEQUENCE {
+	freqBandIndicatorNR-r16				FreqBandIndicatorNR-r16,
+	simul-NR-DL-AoD-DL-TDOA-r16			ENUMERATED { supported}	OPTIONAL,
+	simul-NR-DL-AoD-Multi-RTT-r16		ENUMERATED { supported}	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-RequestCapabilities-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-Error-r16 ::= CHOICE {
+	locationServerErrorCauses-r16		NR-DL-AoD-LocationServerErrorCauses-r16,
+	targetDeviceErrorCauses-r16			NR-DL-AoD-TargetDeviceErrorCauses-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-LocationServerErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED	{	undefined,
+									assistanceDataNotSupportedByServer,
+									assistanceDataSupportedButCurrentlyNotAvailableByServer,
+									notProvidedAssistanceDataNotSupportedByServer,
+									...
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-DL-AoD-TargetDeviceErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED {	undefined,
+									assistance-data-missing,
+									unableToMeasureAnyTRP,
+									attemptedButUnableToMeasureSomeNeighbourTRPs,
+									thereWereNotEnoughSignalsReceivedForUeBasedDL-AoD,
+									locationCalculationAssistanceDataMissing,
+									...
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-ProvideAssistanceData-r16 ::= SEQUENCE {
+	nr-DL-PRS-AssistanceData-r16			NR-DL-PRS-AssistanceData-r16	OPTIONAL,	-- Need ON
+	nr-SelectedDL-PRS-IndexList-r16			NR-SelectedDL-PRS-IndexList-r16 OPTIONAL,	-- Need ON
+	nr-Multi-RTT-Error-r16					NR-Multi-RTT-Error-r16			OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-RequestAssistanceData-r16 ::= SEQUENCE {
+	nr-PhysCellID-r16				NR-PhysCellID-r16						OPTIONAL,
+	nr-AdType-r16					BIT STRING { 	dl-prs (0),
+													ul-srs (1) } (SIZE (1..8)),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-ProvideLocationInformation-r16 ::= SEQUENCE {
+	nr-Multi-RTT-SignalMeasurementInformation-r16
+											NR-Multi-RTT-SignalMeasurementInformation-r16
+																					OPTIONAL,
+	nr-Multi-RTT-Error-r16					NR-Multi-RTT-Error-r16					OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-SignalMeasurementInformation-r16 ::= SEQUENCE {
+	nr-Multi-RTT-MeasList-r16		NR-Multi-RTT-MeasList-r16,
+	nr-NTA-Offset-r16				ENUMERATED { nTA1, nTA2, nTA3, nTA4, ... }		OPTIONAL,
+	...
+}
+
+NR-Multi-RTT-MeasList-r16 ::= SEQUENCE (SIZE(1..nrMaxTRPs-r16)) OF NR-Multi-RTT-MeasElement-r16
+
+NR-Multi-RTT-MeasElement-r16 ::= SEQUENCE {
+	dl-PRS-ID-r16					INTEGER (0..255),
+	nr-PhysCellID-r16				NR-PhysCellID-r16								OPTIONAL,
+	nr-CellGlobalID-r16				NCGI-r15										OPTIONAL,
+	nr-ARFCN-r16					ARFCN-ValueNR-r15								OPTIONAL,
+	nr-DL-PRS-ResourceID-r16		NR-DL-PRS-ResourceID-r16						OPTIONAL,
+	nr-DL-PRS-ResourceSetID-r16		NR-DL-PRS-ResourceSetID-r16 					OPTIONAL,
+	nr-UE-RxTxTimeDiff-r16			CHOICE {
+			k0-r16						INTEGER (0..1970049),
+			k1-r16						INTEGER (0..985025),
+			k2-r16						INTEGER (0..492513),
+			k3-r16						INTEGER (0..246257),
+			k4-r16						INTEGER (0..123129),
+			k5-r16						INTEGER (0..61565),
+			...
+	},
+	nr-AdditionalPathList-r16		NR-AdditionalPathList-r16						OPTIONAL,
+	nr-TimeStamp-r16				NR-TimeStamp-r16,
+	nr-TimingQuality-r16			NR-TimingQuality-r16,
+	nr-DL-PRS-RSRP-Result-r16		INTEGER (0..126)								OPTIONAL,
+	nr-Multi-RTT-AdditionalMeasurements-r16
+									NR-Multi-RTT-AdditionalMeasurements-r16			OPTIONAL,
+	...
+}
+
+NR-Multi-RTT-AdditionalMeasurements-r16 ::= SEQUENCE (SIZE (1..3)) OF
+									NR-Multi-RTT-AdditionalMeasurementElement-r16
+
+NR-Multi-RTT-AdditionalMeasurementElement-r16 ::= SEQUENCE {
+	nr-DL-PRS-ResourceID-r16			NR-DL-PRS-ResourceID-r16					OPTIONAL,
+	nr-DL-PRS-ResourceSetID-r16			NR-DL-PRS-ResourceSetID-r16 				OPTIONAL,
+	nr-DL-PRS-RSRP-ResultDiff-r16		INTEGER (0..61)								OPTIONAL,
+	nr-UE-RxTxTimeDiffAdditional-r16	CHOICE {
+			k0-r16							INTEGER (0..8191),
+			k1-r16							INTEGER (0..4095),
+			k2-r16							INTEGER (0..2047),
+			k3-r16							INTEGER (0..1023),
+			k4-r16							INTEGER (0..511),
+			k5-r16							INTEGER (0..255),
+			...
+	},
+	nr-TimingQuality-r16				NR-TimingQuality-r16,
+	nr-AdditionalPathList-r16			NR-AdditionalPathList-r16					OPTIONAL,
+	nr-TimeStamp-r16					NR-TimeStamp-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-RequestLocationInformation-r16 ::= SEQUENCE {
+	nr-UE-RxTxTimeDiffMeasurementInfoRequest-r16
+										ENUMERATED { true }					OPTIONAL, -- Need ON
+	nr-RequestedMeasurements-r16		BIT STRING { prsrsrpReq(0)} (SIZE(1..8)),
+	nr-AssistanceAvailability-r16		BOOLEAN,
+	nr-Multi-RTT-ReportConfig-r16		NR-Multi-RTT-ReportConfig-r16,
+	additionalPaths-r16					ENUMERATED { requested }			OPTIONAL, -- Need ON
+	...
+}
+
+NR-Multi-RTT-ReportConfig-r16 ::= SEQUENCE {
+	maxDL-PRS-RxTxTimeDiffMeasPerTRP-r16 	INTEGER (1..4)					OPTIONAL, -- Need ON
+	timingReportingGranularityFactor-r16 	INTEGER (0..5)					OPTIONAL	-- Need ON
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-ProvideCapabilities-r16 ::= SEQUENCE {
+	nr-Multi-RTT-PRS-Capability-r16			NR-DL-PRS-ResourcesCapability-r16,
+	nr-Multi-RTT-MeasurementCapability-r16	NR-Multi-RTT-MeasurementCapability-r16,
+	nr-DL-PRS-QCL-ProcessingCapability-r16	NR-DL-PRS-QCL-ProcessingCapability-r16,
+	nr-DL-PRS-ProcessingCapability-r16		NR-DL-PRS-ProcessingCapability-r16,
+	nr-UL-SRS-Capability-r16				NR-UL-SRS-Capability-r16,
+	additionalPathsReport-r16				ENUMERATED { supported }					OPTIONAL,
+	periodicalReporting-r16					ENUMERATED { supported }					OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-MeasurementCapability-r16 ::= SEQUENCE {
+	maxNrOfRx-TX-MeasFR1-r16					INTEGER (1..4)	OPTIONAL,
+	maxNrOfRx-TX-MeasFR2-r16					INTEGER (1..4)	OPTIONAL,
+	supportOfRSRP-MeasFR1-r16					ENUMERATED { supported}	OPTIONAL,
+	supportOfRSRP-MeasFR2-r16					ENUMERATED { supported}	OPTIONAL,
+	srs-AssocPRS-MultiLayersFR1-r16				ENUMERATED { supported}	OPTIONAL,
+	srs-AssocPRS-MultiLayersFR2-r16				ENUMERATED { supported}	OPTIONAL,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-RequestCapabilities-r16 ::= SEQUENCE {
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-Error-r16 ::= CHOICE {
+	locationServerErrorCauses-r16		NR-Multi-RTT-LocationServerErrorCauses-r16,
+	targetDeviceErrorCauses-r16			NR-Multi-RTT-TargetDeviceErrorCauses-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-LocationServerErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED	{	undefined,
+									assistanceDataNotSupportedByServer,
+									assistanceDataSupportedButCurrentlyNotAvailableByServer,
+									...
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-Multi-RTT-TargetDeviceErrorCauses-r16 ::= SEQUENCE {
+	cause-r16		ENUMERATED {	undefined,
+									dl-assistance-data-missing,
+									unableToMeasureAnyTRP,
+									attemptedButUnableToMeasureSomeNeighbourTRPs,
+									ul-srs-configuration-missing,
+									unableToTransmit-ul-srs,
+									...
+								},
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+maxEARFCN					INTEGER ::= 65535	-- Maximum value of EUTRA carrier frequency
+maxEARFCN-Plus1				INTEGER ::= 65536	-- Lowest value extended EARFCN range
+maxEARFCN2					INTEGER ::= 262143	-- Highest value extended EARFCN range
+
+maxMBS-r14					INTEGER ::= 64
+maxWLAN-AP-r13				INTEGER ::= 64
+maxKnownAPs-r14				INTEGER ::= 2048
+maxVisibleAPs-r14			INTEGER ::= 32
+maxWLAN-AP-r14				INTEGER ::= 128
+maxWLAN-DataSets-r14		INTEGER ::= 8
+
+maxBT-Beacon-r13			INTEGER ::= 32
+
+nrMaxBands-r16							INTEGER ::= 1024	-- Maximum number of supported bands in
+															-- UE capability.
+nrMaxFreqLayers-r16						INTEGER ::= 4		-- Max freq layers
+nrMaxFreqLayers-1-r16					INTEGER ::= 3
+nrMaxNumDL-PRS-ResourcesPerSet-1-r16 	INTEGER ::= 63
+nrMaxNumDL-PRS-ResourceSetsPerTRP-1-r16	INTEGER ::= 7
+nrMaxResourceIDs-r16					INTEGER ::= 64		-- Max Resource IDs
+nrMaxResourceOffsetValue-1-r16 			INTEGER ::= 511
+nrMaxResourcesPerSet-r16				INTEGER ::= 64		-- Maximum resources for one set
+nrMaxSetsPerTrpPerFreqLayer-r16			INTEGER ::= 2		-- Maximum resource sets for one TRP
+nrMaxSetsPerTrpPerFreqLayer-1-r16		INTEGER ::= 1
+nrMaxTRPs-r16							INTEGER ::= 256		-- Max TRPs per UE
+nrMaxTRPsPerFreq-r16					INTEGER ::= 64		-- Max TRPs per freq layers
+nrMaxTRPsPerFreq-1-r16					INTEGER ::= 63
+maxSimultaneousBands-r16				INTEGER ::= 4		-- Maximum number of simultaneously
+															-- measured bands
+maxBandComb-r16							INTEGER ::= 1024
+nrMaxConfiguredBands-r16				INTEGER ::= 16
+
+-- ASN1STOP
+-- ASN1START
+
+END
+
+-- ASN1STOP
+-- ASN1START
+
+LPP-Broadcast-Definitions
+
+DEFINITIONS AUTOMATIC TAGS ::=
+
+BEGIN
+
+IMPORTS
+	OTDOA-ReferenceCellInfo,
+	OTDOA-NeighbourCellInfoList,
+	NR-TRP-LocationInfo-r16,
+	NR-DL-PRS-BeamInfo-r16,
+	NR-RTD-Info-r16	
+FROM LPP-PDU-Definitions;
+
+-- ASN1STOP
+-- ASN1START
+
+AssistanceDataSIBelement-r15 ::= SEQUENCE {
+	valueTag-r15						INTEGER (0..63)					OPTIONAL, -- Need OP
+	expirationTime-r15					UTCTime							OPTIONAL, -- Need OP
+	cipheringKeyData-r15				CipheringKeyData-r15 			OPTIONAL, -- Need OP
+	segmentationInfo-r15				SegmentationInfo-r15			OPTIONAL, -- Need OP
+	assistanceDataElement-r15			OCTET STRING,
+	...
+}
+
+CipheringKeyData-r15 ::= SEQUENCE {
+	cipherSetID-r15 					INTEGER (0..65535),
+	d0-r15 								BIT STRING (SIZE (1..128)),
+	...
+}
+
+SegmentationInfo-r15 ::= SEQUENCE {
+	segmentationOption-r15				ENUMERATED {pseudo-seg, octet-string-seg},
+	assistanceDataSegmentType-r15		ENUMERATED {notLastSegment, lastSegment},
+	assistanceDataSegmentNumber-r15		INTEGER (0..63),
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+OTDOA-UE-Assisted-r15 ::= SEQUENCE {
+	otdoa-ReferenceCellInfo-r15			OTDOA-ReferenceCellInfo,
+	otdoa-NeighbourCellInfo-r15			OTDOA-NeighbourCellInfoList,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-UEB-TRP-LocationData-r16 ::= SEQUENCE {
+	nr-trp-LocationInfo-r16 			NR-TRP-LocationInfo-r16,
+	nr-dl-prs-BeamInfo-r16				NR-DL-PRS-BeamInfo-r16			OPTIONAL,	-- Need ON
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+NR-UEB-TRP-RTD-Info-r16 ::= SEQUENCE {
+	nr-rtd-Info-r16						NR-RTD-Info-r16,
+	...
+}
+
+-- ASN1STOP
+-- ASN1START
+
+END
+
+-- ASN1STOP
diff --git a/openair3/MME_APP/mme_config.c b/openair3/MME_APP/mme_config.c
index 8005b85fd48553ca55452c33d26a04da1d8cc286..d51795a91af768e402a7882cdc624b06005747cc 100644
--- a/openair3/MME_APP/mme_config.c
+++ b/openair3/MME_APP/mme_config.c
@@ -45,7 +45,7 @@
 #include "LAYER2/MAC/mac_proto.h"
 #include "PHY/phy_extern.h"
 #include "PHY/INIT/phy_init.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 #include "nfapi_vnf.h"
 #include "nfapi_pnf.h"
 
diff --git a/openair3/NGAP/ngap_gNB.c b/openair3/NGAP/ngap_gNB.c
index 59e275ad537ed0631ce347817c8ed08f6d96b1cb..bce973354b300b4b60e7204f0949450133f8fb1c 100644
--- a/openair3/NGAP/ngap_gNB.c
+++ b/openair3/NGAP/ngap_gNB.c
@@ -64,8 +64,6 @@
   #include "oaisim_amf_test_s1c.h"
 #endif
 
-ngap_gNB_config_t ngap_config;
-
 static int ngap_gNB_generate_ng_setup_request(
   ngap_gNB_instance_t *instance_p, ngap_gNB_amf_data_t *ngap_amf_data_p);
 
diff --git a/openair3/NGAP/ngap_gNB.h b/openair3/NGAP/ngap_gNB.h
index e6a24acdb4aacbe0fba174904b66fe20561ed633..2aa0080cd1c78e2d173ef051cf2205cd6f287627 100644
--- a/openair3/NGAP/ngap_gNB.h
+++ b/openair3/NGAP/ngap_gNB.h
@@ -39,18 +39,6 @@
 #ifndef NGAP_GNB_H_
 #define NGAP_GNB_H_
 
-typedef struct ngap_gNB_config_s {
-  // MME related params
-  unsigned char amf_enabled;          ///< AMF enabled ?
-  unsigned char ngap_enabled;          ///< NGAP enabled ?
-} ngap_gNB_config_t;
-
-extern ngap_gNB_config_t ngap_config;
-
-#define AMF_MODE_ENABLED       ngap_config.amf_enabled
-#define NGAP_CONF_MODE         ngap_config.ngap_enabled
-
-
 void *ngap_gNB_process_itti_msg(void*);
 void  ngap_gNB_init(void);
 void *ngap_gNB_task(void *arg);
diff --git a/targets/ARCH/ADRV9371_ZC706/SYRTEM_IQ_QuickStart.readme b/sdr/ADRV9371_ZC706/SYRTEM_IQ_QuickStart.readme
similarity index 98%
rename from targets/ARCH/ADRV9371_ZC706/SYRTEM_IQ_QuickStart.readme
rename to sdr/ADRV9371_ZC706/SYRTEM_IQ_QuickStart.readme
index cd74fdb397dc60eac555a7dbfa3e67f25475a858..82814a1d588270f779512dcfdb0aaa5f8209642a 100755
--- a/targets/ARCH/ADRV9371_ZC706/SYRTEM_IQ_QuickStart.readme
+++ b/sdr/ADRV9371_ZC706/SYRTEM_IQ_QuickStart.readme
@@ -22,7 +22,7 @@ git checkout 375-syrtem-sdr-platform
 or
 git checkout develop-nr (after merge request accepted)
 
-./targets/ARCH/ADRV9371_ZC706/SYRTEM_IQ_uninstall_previous_SW.sh
+./sdr/ADRV9371_ZC706/SYRTEM_IQ_uninstall_previous_SW.sh
 -> Reading package lists... Done
 -> Building dependency tree       
 -> Reading state information... Done
diff --git a/targets/ARCH/ADRV9371_ZC706/SYRTEM_IQ_uninstall_previous_SW.sh b/sdr/ADRV9371_ZC706/SYRTEM_IQ_uninstall_previous_SW.sh
similarity index 100%
rename from targets/ARCH/ADRV9371_ZC706/SYRTEM_IQ_uninstall_previous_SW.sh
rename to sdr/ADRV9371_ZC706/SYRTEM_IQ_uninstall_previous_SW.sh
diff --git a/targets/ARCH/AW2SORI/oaiori.c b/sdr/AW2SORI/oaiori.c
similarity index 99%
rename from targets/ARCH/AW2SORI/oaiori.c
rename to sdr/AW2SORI/oaiori.c
index 67de3a3c5d4162bbd93e1b36d45b62de1a1dc43b..07a90a21297c0e8c2f9715eb51a2cd8a37d3ec3f 100644
--- a/targets/ARCH/AW2SORI/oaiori.c
+++ b/sdr/AW2SORI/oaiori.c
@@ -30,7 +30,7 @@
 #include "common/utils/system.h"
 #include "ori.h"
 
-#include "targets/ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 
 typedef struct eutra_bandentry_s {
   int16_t band;
diff --git a/targets/ARCH/AW2SORI/ori.h b/sdr/AW2SORI/ori.h
similarity index 100%
rename from targets/ARCH/AW2SORI/ori.h
rename to sdr/AW2SORI/ori.h
diff --git a/targets/ARCH/BLADERF/README b/sdr/BLADERF/README
similarity index 100%
rename from targets/ARCH/BLADERF/README
rename to sdr/BLADERF/README
diff --git a/targets/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.c b/sdr/BLADERF/USERSPACE/LIB/bladerf_lib.c
similarity index 99%
rename from targets/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.c
rename to sdr/BLADERF/USERSPACE/LIB/bladerf_lib.c
index af5ab5ad6d169a0bd574ff7b8de8ad3bd1b5b179..eb1873cc66f6c7218bc96bb9d3c6c00d9aab9bf4 100644
--- a/targets/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.c
+++ b/sdr/BLADERF/USERSPACE/LIB/bladerf_lib.c
@@ -30,20 +30,13 @@
 #include <inttypes.h>
 #include "bladerf_lib.h"
 #include "math.h"
+#include "PHY/sse_intrin.h"
 
 /** @addtogroup _BLADERF_PHY_RF_INTERFACE_
  * @{
  */
 
 //! Number of BladeRF devices
-#ifdef __SSE4_1__
-#  include <smmintrin.h>
-#endif
-
-#ifdef __AVX2__
-#  include <immintrin.h>
-#endif
-
 int num_devices=0;
 
 /*These items configure the underlying asynch stream used by the the sync interface.
diff --git a/targets/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.h b/sdr/BLADERF/USERSPACE/LIB/bladerf_lib.h
similarity index 100%
rename from targets/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.h
rename to sdr/BLADERF/USERSPACE/LIB/bladerf_lib.h
diff --git a/targets/ARCH/COMMON/common_lib.c b/sdr/COMMON/common_lib.c
similarity index 100%
rename from targets/ARCH/COMMON/common_lib.c
rename to sdr/COMMON/common_lib.c
diff --git a/targets/ARCH/COMMON/common_lib.h b/sdr/COMMON/common_lib.h
similarity index 99%
rename from targets/ARCH/COMMON/common_lib.h
rename to sdr/COMMON/common_lib.h
index b07c5f319f138fcc45e8310edbac5aac4a6c855f..71607cb27f5eb85119e05af78d59e36991e8c5f9 100644
--- a/targets/ARCH/COMMON/common_lib.h
+++ b/sdr/COMMON/common_lib.h
@@ -187,7 +187,11 @@ typedef struct {
   int rx_num_channels;
   //! number of TX channels (=TX antennas)
   int tx_num_channels;
-  //! \brief RX base addresses for mmapped_dma or direct access
+  //! rx daughter card
+  char* rx_subdev;
+  //! tx daughter card
+  char* tx_subdev;
+  //! \brief RX base addresses for mmapped_dma
   int32_t *rxbase[4];
   //! \brief RX buffer size for direct access
   int rxsize;
diff --git a/targets/ARCH/COMMON/record_player.c b/sdr/COMMON/record_player.c
similarity index 100%
rename from targets/ARCH/COMMON/record_player.c
rename to sdr/COMMON/record_player.c
diff --git a/targets/ARCH/COMMON/record_player.h b/sdr/COMMON/record_player.h
similarity index 98%
rename from targets/ARCH/COMMON/record_player.h
rename to sdr/COMMON/record_player.h
index f4f17c6ee69127eecadd302da30f82f76be576b9..3418d16a22fe45055de870b9846c630de9e938cc 100644
--- a/targets/ARCH/COMMON/record_player.h
+++ b/sdr/COMMON/record_player.h
@@ -21,7 +21,7 @@
  *      contact@openairinterface.org
  */
 
-/** targets/ARCH/COMMON/record-player.h
+/** sdr/COMMON/record-player.h
  *
  * \author: bruno.mongazon-cazavet@nokia-bell-labs.com
  */
@@ -44,7 +44,7 @@ extern "C"
 #define RECPLAY_REPLAYMODE   2
 
 #define BELL_LABS_IQ_HEADER       0xabababababababab
-#define BELL_LABS_IQ_PER_SF       7680 // Up to 5MHz bw for now
+#define BELL_LABS_IQ_PER_SF       46080 // 7680 => 5MHz bw for now; 46080 => 3/4 40MHz (106 PRBs)
 #define BELL_LABS_IQ_BYTES_PER_SF (BELL_LABS_IQ_PER_SF * 4)
 #define MAX_BELL_LABS_IQ_BYTES_PER_SF  BELL_LABS_IQ_BYTES_PER_SF*10
 
diff --git a/targets/ARCH/ETHERNET/USERSPACE/LIB/eth_raw.c b/sdr/ETHERNET/USERSPACE/LIB/eth_raw.c
similarity index 100%
rename from targets/ARCH/ETHERNET/USERSPACE/LIB/eth_raw.c
rename to sdr/ETHERNET/USERSPACE/LIB/eth_raw.c
diff --git a/targets/ARCH/ETHERNET/USERSPACE/LIB/eth_udp.c b/sdr/ETHERNET/USERSPACE/LIB/eth_udp.c
similarity index 98%
rename from targets/ARCH/ETHERNET/USERSPACE/LIB/eth_udp.c
rename to sdr/ETHERNET/USERSPACE/LIB/eth_udp.c
index dd9414a44c54e03399451b3ece5267ab4703e98d..d3b777100778df90e708d7c08ef27e9ae3646dc4 100644
--- a/targets/ARCH/ETHERNET/USERSPACE/LIB/eth_udp.c
+++ b/sdr/ETHERNET/USERSPACE/LIB/eth_udp.c
@@ -50,6 +50,7 @@
 
 #include "common_lib.h"
 #include "ethernet_lib.h"
+#include "openair1/PHY/sse_intrin.h"
 #include "common/utils/threadPool/thread-pool.h"
 
 //#define DEBUG 1
@@ -317,15 +318,9 @@ void *trx_eth_write_udp_cmd(udpTXelem_t *udpTXelem) {
   if (TS_advance < (nsamps/2)) LOG_W(PHY,"Starting TX FH for TS %llu absslot %llu(%llu) last_rxTS %llu TS_advance %llu samples\n",(unsigned long long)timestamp,(unsigned long long)timestamp/nsamps,((unsigned long long)timestamp/nsamps)%20,(unsigned long long)last_rxTS,(unsigned long long)TS_advance);
   void *buff2;
 #if defined(__x86_64) || defined(__i386__)
-#ifdef __AVX2__
   int nsamps2 = 256>>3;
   __m256i buff_tx[nsamps2+1];
   buff2=(void*)&buff_tx[1] - APP_HEADER_SIZE_BYTES;
-#else
-  int nsamps2 = 256>>2;
-  __m128i buff_tx[nsamps2+2];
-  buff2=(void*)&buff_tx[2] - APP_HEADER_SIZE_BYTES;
-#endif
 #elif defined(__arm__) || defined(__aarch64__)
   int nsamps2 = 256>>2;
   int16x8_t buff_tx[nsamps2+2];
@@ -362,7 +357,6 @@ void *trx_eth_write_udp_cmd(udpTXelem_t *udpTXelem) {
       *(uint16_t *)(buff2 + 4) = aid;
       // bring TX data into 12 MSBs 
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
       __m256i *buff256 = (__m256i *)&(((int32_t*)buff[aid])[offset]);
       for (int j=0; j<32; j+=8) {
         buff_tx[1+j] = _mm256_slli_epi16(buff256[j],4);
@@ -374,10 +368,6 @@ void *trx_eth_write_udp_cmd(udpTXelem_t *udpTXelem) {
         buff_tx[7+j] = _mm256_slli_epi16(buff256[j+6],4);
         buff_tx[8+j] = _mm256_slli_epi16(buff256[j+7],4);
       }
-#else
-      __m128i *buff128 = (__m128i *)&buff[aid][offset];
-      for (int j=0; j<64; j++) buff_tx[2+j] = _mm_slli_epi16(buff128[j],4);
-#endif
 #elif defined(__arm__)
       int16x8_t *buff128 = (__int16x8_t*)&buff[aid][offset];
       for (int j=0; j<64; j++) buff_tx[2+j] = vshlq_n_s16(((int16x8_t *)buff128)[j],4);
diff --git a/targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet.md b/sdr/ETHERNET/USERSPACE/LIB/ethernet.md
similarity index 99%
rename from targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet.md
rename to sdr/ETHERNET/USERSPACE/LIB/ethernet.md
index 4751cfe74ad651c42e4040efaca4a1769272e0ed..78058e9324a09fd23c74d87e56146ee87a292790 100644
--- a/targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet.md
+++ b/sdr/ETHERNET/USERSPACE/LIB/ethernet.md
@@ -27,7 +27,7 @@ The files implement the OAI IF device interface which provides the transmit/rece
 
 It is contained in the eth_udp.c and eth_raw.c files. The two basic routines are
 
-* trx_eth_read_udp_IF4p5() : implements a blocking read for three particular IF4p5 packets, IF4p5_PULFFT (for OAI RCC/DU), IF4p5_PRACH and IF4p5_PDLFFT (for OAI RU). The packets are parsed and mapped to the appropriate physical channels by the OAI physical layer   
+* trx_eth_read_udp_IF4p5() : implements a blocking read for three particular IF4p5 packets, IF4p5_PULFFT (for OAI RCC/DU), IF4p5_PRACH and IF4p5_PDLFFT (for OAI RU). The packets are parsed and mapped to the appropriate physical channels by the OAI physical layer
 * trx_eth_write_udp_IF4p5 : implements a write for the three IF4p5 packets.
 
 * trx_eth_ctlsend_udp : implements the sending component for the control socket
diff --git a/targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.c b/sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.c
similarity index 100%
rename from targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.c
rename to sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.c
diff --git a/targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h b/sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h
similarity index 100%
rename from targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h
rename to sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h
diff --git a/targets/ARCH/ETHERNET/USERSPACE/LIB/if_defs.h b/sdr/ETHERNET/USERSPACE/LIB/if_defs.h
similarity index 98%
rename from targets/ARCH/ETHERNET/USERSPACE/LIB/if_defs.h
rename to sdr/ETHERNET/USERSPACE/LIB/if_defs.h
index fd33177ac3ce7af07db5d4e31f36e021a1429612..0df47e22052279c306f87fb98310d4965f141f7e 100644
--- a/targets/ARCH/ETHERNET/USERSPACE/LIB/if_defs.h
+++ b/sdr/ETHERNET/USERSPACE/LIB/if_defs.h
@@ -19,7 +19,7 @@
  *      contact@openairinterface.org
  */
 
-/*! \file targets/ARCH/ETHERNET/USERSPACE/LIB/if_defs.h
+/*! \file sdr/ETHERNET/USERSPACE/LIB/if_defs.h
 * \brief 
 * \author S. Sandeep Kumar, Raymond Knopp
 * \date 2016
diff --git a/targets/ARCH/ETHERNET/benetel/4g/benetel.c b/sdr/ETHERNET/benetel/4g/benetel.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/4g/benetel.c
rename to sdr/ETHERNET/benetel/4g/benetel.c
diff --git a/targets/ARCH/ETHERNET/benetel/4g/dpdk_driver.c b/sdr/ETHERNET/benetel/4g/dpdk_driver.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/4g/dpdk_driver.c
rename to sdr/ETHERNET/benetel/4g/dpdk_driver.c
diff --git a/targets/ARCH/ETHERNET/benetel/4g/low.c b/sdr/ETHERNET/benetel/4g/low.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/4g/low.c
rename to sdr/ETHERNET/benetel/4g/low.c
diff --git a/targets/ARCH/ETHERNET/benetel/4g/low.h b/sdr/ETHERNET/benetel/4g/low.h
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/4g/low.h
rename to sdr/ETHERNET/benetel/4g/low.h
diff --git a/targets/ARCH/ETHERNET/benetel/4g/low_dpdk.c b/sdr/ETHERNET/benetel/4g/low_dpdk.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/4g/low_dpdk.c
rename to sdr/ETHERNET/benetel/4g/low_dpdk.c
diff --git a/targets/ARCH/ETHERNET/benetel/4g/shared_buffers.c b/sdr/ETHERNET/benetel/4g/shared_buffers.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/4g/shared_buffers.c
rename to sdr/ETHERNET/benetel/4g/shared_buffers.c
diff --git a/targets/ARCH/ETHERNET/benetel/4g/shared_buffers.h b/sdr/ETHERNET/benetel/4g/shared_buffers.h
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/4g/shared_buffers.h
rename to sdr/ETHERNET/benetel/4g/shared_buffers.h
diff --git a/targets/ARCH/ETHERNET/benetel/5g/benetel.c b/sdr/ETHERNET/benetel/5g/benetel.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/5g/benetel.c
rename to sdr/ETHERNET/benetel/5g/benetel.c
diff --git a/targets/ARCH/ETHERNET/benetel/5g/dpdk_driver.c b/sdr/ETHERNET/benetel/5g/dpdk_driver.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/5g/dpdk_driver.c
rename to sdr/ETHERNET/benetel/5g/dpdk_driver.c
diff --git a/targets/ARCH/ETHERNET/benetel/5g/low.c b/sdr/ETHERNET/benetel/5g/low.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/5g/low.c
rename to sdr/ETHERNET/benetel/5g/low.c
diff --git a/targets/ARCH/ETHERNET/benetel/5g/low.h b/sdr/ETHERNET/benetel/5g/low.h
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/5g/low.h
rename to sdr/ETHERNET/benetel/5g/low.h
diff --git a/targets/ARCH/ETHERNET/benetel/5g/low_dpdk.c b/sdr/ETHERNET/benetel/5g/low_dpdk.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/5g/low_dpdk.c
rename to sdr/ETHERNET/benetel/5g/low_dpdk.c
diff --git a/targets/ARCH/ETHERNET/benetel/5g/shared_buffers.c b/sdr/ETHERNET/benetel/5g/shared_buffers.c
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/5g/shared_buffers.c
rename to sdr/ETHERNET/benetel/5g/shared_buffers.c
diff --git a/targets/ARCH/ETHERNET/benetel/5g/shared_buffers.h b/sdr/ETHERNET/benetel/5g/shared_buffers.h
similarity index 100%
rename from targets/ARCH/ETHERNET/benetel/5g/shared_buffers.h
rename to sdr/ETHERNET/benetel/5g/shared_buffers.h
diff --git a/targets/ARCH/IRIS/USERSPACE/LIB/iris_lib.cpp b/sdr/IRIS/USERSPACE/LIB/iris_lib.cpp
similarity index 98%
rename from targets/ARCH/IRIS/USERSPACE/LIB/iris_lib.cpp
rename to sdr/IRIS/USERSPACE/LIB/iris_lib.cpp
index 47bbadfe2823544b094fe9ce916207b08a5ab869..488876a4bc453369576ab232b818fc0d276cfb47 100644
--- a/targets/ARCH/IRIS/USERSPACE/LIB/iris_lib.cpp
+++ b/sdr/IRIS/USERSPACE/LIB/iris_lib.cpp
@@ -23,13 +23,7 @@
 #include "common_lib.h"
 #include <chrono>
 
-#ifdef __SSE4_1__
-#  include <smmintrin.h>
-#endif
-
-#ifdef __AVX2__
-#  include <immintrin.h>
-#endif
+#include "openair1/PHY/sse_intrin.h"
 
 #define MOVE_DC
 #define SAMPLE_RATE_DOWN 1
@@ -152,13 +146,8 @@ trx_iris_write(openair0_device *device, openair0_timestamp timestamp, void **buf
     iris_state_t *s = (iris_state_t *) device->priv;
     int nsamps2;  // aligned to upper 32 or 16 byte boundary
 #if defined(__x86_64) || defined(__i386__)
-  #ifdef __AVX2__
     nsamps2 = (nsamps+7)>>3;
     __m256i buff_tx[2][nsamps2];
-  #else
-    nsamps2 = (nsamps+3)>>2;
-    __m128i buff_tx[2][nsamps2];
-  #endif
 #else
   #error unsupported CPU architecture, iris device cannot be built
 #endif
@@ -167,11 +156,7 @@ trx_iris_write(openair0_device *device, openair0_timestamp timestamp, void **buf
     for (int i=0; i<cc; i++) {
       for (int j=0; j<nsamps2; j++) {
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
-        buff_tx[i][j] = _mm256_slli_epi16(((__m256i *)buff[i])[j],4);
-#else
-        buff_tx[i][j] = _mm_slli_epi16(((__m128i *)buff[i])[j],4);
-#endif
+        buff_tx[i][j] = simde_mm256_slli_epi16(((__m256i *)buff[i])[j],4);
 #endif
       }
     }
@@ -253,13 +238,8 @@ static int trx_iris_read(openair0_device *device, openair0_timestamp *ptimestamp
     int m = s->rx_num_channels;
     int nsamps2;  // aligned to upper 32 or 16 byte boundary
 #if defined(__x86_64) || defined(__i386__)
-#ifdef __AVX2__
     nsamps2 = (nsamps+7)>>3;
     __m256i buff_tmp[2][nsamps2];
-#else
-    nsamps2 = (nsamps+3)>>2;
-    __m128i buff_tmp[2][nsamps2];
-#endif
 #endif
 
     for (r = 0; r < s->device_num; r++) {
@@ -331,11 +311,7 @@ static int trx_iris_read(openair0_device *device, openair0_timestamp *ptimestamp
         for (int i=0; i<cc; i++) {
           for (int j=0; j<nsamps2; j++) {
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef   __AVX2__
-            ((__m256i *)buff[i])[j] = _mm256_srai_epi16(buff_tmp[i][j],4);
-#else
-            ((__m128i *)buff[i])[j] = _mm_srai_epi16(buff_tmp[i][j],4);
-#endif
+            ((__m256i *)buff[i])[j] = simde_mm256_srai_epi16(buff_tmp[i][j],4);
 #endif
           }
         }
diff --git a/targets/ARCH/LMSSDR/LimeSDR.ini b/sdr/LMSSDR/LimeSDR.ini
similarity index 100%
rename from targets/ARCH/LMSSDR/LimeSDR.ini
rename to sdr/LMSSDR/LimeSDR.ini
diff --git a/targets/ARCH/LMSSDR/LimeSDR_above_1p8GHz.ini b/sdr/LMSSDR/LimeSDR_above_1p8GHz.ini
similarity index 100%
rename from targets/ARCH/LMSSDR/LimeSDR_above_1p8GHz.ini
rename to sdr/LMSSDR/LimeSDR_above_1p8GHz.ini
diff --git a/targets/ARCH/LMSSDR/LimeSDR_above_1p8GHz_1v4.ini b/sdr/LMSSDR/LimeSDR_above_1p8GHz_1v4.ini
similarity index 100%
rename from targets/ARCH/LMSSDR/LimeSDR_above_1p8GHz_1v4.ini
rename to sdr/LMSSDR/LimeSDR_above_1p8GHz_1v4.ini
diff --git a/targets/ARCH/LMSSDR/LimeSDR_below_1p8GHz.ini b/sdr/LMSSDR/LimeSDR_below_1p8GHz.ini
similarity index 100%
rename from targets/ARCH/LMSSDR/LimeSDR_below_1p8GHz.ini
rename to sdr/LMSSDR/LimeSDR_below_1p8GHz.ini
diff --git a/targets/ARCH/LMSSDR/LimeSDR_below_1p8GHz_1v4.ini b/sdr/LMSSDR/LimeSDR_below_1p8GHz_1v4.ini
similarity index 100%
rename from targets/ARCH/LMSSDR/LimeSDR_below_1p8GHz_1v4.ini
rename to sdr/LMSSDR/LimeSDR_below_1p8GHz_1v4.ini
diff --git a/targets/ARCH/LMSSDR/USERSPACE/LIB/lms_lib.cpp b/sdr/LMSSDR/USERSPACE/LIB/lms_lib.cpp
similarity index 100%
rename from targets/ARCH/LMSSDR/USERSPACE/LIB/lms_lib.cpp
rename to sdr/LMSSDR/USERSPACE/LIB/lms_lib.cpp
diff --git a/targets/ARCH/LMSSDR/USERSPACE/LIB/sodera_lib.cpp b/sdr/LMSSDR/USERSPACE/LIB/sodera_lib.cpp
similarity index 99%
rename from targets/ARCH/LMSSDR/USERSPACE/LIB/sodera_lib.cpp
rename to sdr/LMSSDR/USERSPACE/LIB/sodera_lib.cpp
index 6f1afe47c0da98cda5b3bcea704d48d66fbd3f36..d1e46c9d88267e3ef51ecf117f86808ee15a34f7 100644
--- a/targets/ARCH/LMSSDR/USERSPACE/LIB/sodera_lib.cpp
+++ b/sdr/LMSSDR/USERSPACE/LIB/sodera_lib.cpp
@@ -46,13 +46,7 @@
 #include "Si5351C.h"
 #include "LMS_StreamBoard.h"
 
-#ifdef __SSE4_1__
-#  include <smmintrin.h>
-#endif
- 
-#ifdef __AVX2__
-#  include <immintrin.h>
-#endif
+#include "openair1/PHY/sse_intrin.h"
 
 using namespace std;
 
diff --git a/targets/ARCH/USRP/USERSPACE/LIB/usrp_lib.cpp b/sdr/USRP/USERSPACE/LIB/usrp_lib.cpp
similarity index 93%
rename from targets/ARCH/USRP/USERSPACE/LIB/usrp_lib.cpp
rename to sdr/USRP/USERSPACE/LIB/usrp_lib.cpp
index f9cc4a111aecacb740dde74d2b1f93af53e683b2..cc58389c3411d5c4384ba92ba9a6e37891e7faee 100644
--- a/targets/ARCH/USRP/USERSPACE/LIB/usrp_lib.cpp
+++ b/sdr/USRP/USERSPACE/LIB/usrp_lib.cpp
@@ -54,17 +54,7 @@
 
 #include <sys/resource.h>
 
-#ifdef __SSE4_1__
-  #include <smmintrin.h>
-#endif
-
-#ifdef __AVX2__
-  #include <immintrin.h>
-#endif
-
-#ifdef __arm__
-  #include <arm_neon.h>
-#endif
+#include "openair1/PHY/sse_intrin.h"
 
 /** @addtogroup _USRP_PHY_RF_INTERFACE_
  * @{
@@ -203,8 +193,9 @@ static int sync_to_gps(openair0_device *device) {
 
       //Set to GPS time
       uhd::time_spec_t gps_time = uhd::time_spec_t(time_t(s->usrp->get_mboard_sensor("gps_time", mboard).to_int()));
-      //s->usrp->set_time_next_pps(gps_time+1.0, mboard);
-      s->usrp->set_time_next_pps(uhd::time_spec_t(0.0));
+      s->usrp->set_time_next_pps(gps_time+1.0, mboard);
+      //s->usrp->set_time_next_pps(uhd::time_spec_t(0.0));
+      
       //Wait for it to apply
       //The wait is 2 seconds because N-Series has a known issue where
       //the time at the last PPS does not properly update at the PPS edge
@@ -215,10 +206,10 @@ static int sync_to_gps(openair0_device *device) {
       uhd::time_spec_t time_last_pps = s->usrp->get_time_last_pps(mboard);
       std::cout << "USRP time: " << (boost::format("%0.9f") % time_last_pps.get_real_secs()) << std::endl;
       std::cout << "GPSDO time: " << (boost::format("%0.9f") % gps_time.get_real_secs()) << std::endl;
-      //if (gps_time.get_real_secs() == time_last_pps.get_real_secs())
-      //    std::cout << std::endl << "SUCCESS: USRP time synchronized to GPS time" << std::endl << std::endl;
-      //else
-      //    std::cerr << std::endl << "ERROR: Failed to synchronize USRP time to GPS time" << std::endl << std::endl;
+      if (gps_time.get_real_secs() == time_last_pps.get_real_secs())
+          std::cout << std::endl << "SUCCESS: USRP time synchronized to GPS time" << std::endl << std::endl;
+      else
+          std::cerr << std::endl << "ERROR: Failed to synchronize USRP time to GPS time" << std::endl << std::endl;
     }
 
     if (num_gps_locked == num_mboards and num_mboards > 1) {
@@ -295,15 +286,18 @@ static int trx_usrp_start(openair0_device *device) {
   //s->first_rx = 1;
   s->rx_timestamp = 0;
 
-  s->usrp->set_time_next_pps(uhd::time_spec_t(0.0));
-  // wait for the pps to change
-  uhd::time_spec_t time_last_pps = s->usrp->get_time_last_pps();
-  while (time_last_pps == s->usrp->get_time_last_pps()) {
+    //wait for next pps
+  uhd::time_spec_t last_pps = s->usrp->get_time_last_pps();
+  uhd::time_spec_t current_pps = s->usrp->get_time_last_pps();
+  while(current_pps == last_pps) {
     boost::this_thread::sleep(boost::posix_time::milliseconds(1));
+    current_pps = s->usrp->get_time_last_pps();
   }
 
+  LOG_I(HW,"current pps at %f, starting streaming at %f\n",current_pps.get_real_secs(),current_pps.get_real_secs()+1.0);
+
   uhd::stream_cmd_t cmd(uhd::stream_cmd_t::STREAM_MODE_START_CONTINUOUS);
-  cmd.time_spec = uhd::time_spec_t(1.0);
+  cmd.time_spec = uhd::time_spec_t(current_pps+1.0);
   cmd.stream_now = false; // start at constant delay
   s->rx_stream->issue_stream_cmd(cmd);
 
@@ -421,14 +415,9 @@ static int trx_usrp_write(openair0_device *device,
 
   if(usrp_tx_thread == 0){
 #if defined(__x86_64) || defined(__i386__)
-  #ifdef __AVX2__
       nsamps2 = (nsamps+7)>>3;
       __m256i buff_tx[cc<2?2:cc][nsamps2];
-  #else
-    nsamps2 = (nsamps+3)>>2;
-    __m128i buff_tx[cc<2?2:cc][nsamps2];
-  #endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     nsamps2 = (nsamps+3)>>2;
     int16x8_t buff_tx[cc<2?2:cc][nsamps2];
 #else
@@ -439,12 +428,15 @@ static int trx_usrp_write(openair0_device *device,
     for (int i=0; i<cc; i++) {
       for (int j=0; j<nsamps2; j++) {
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
-        buff_tx[i][j] = _mm256_slli_epi16(((__m256i *)buff[i])[j],4);
-#else
-        buff_tx[i][j] = _mm_slli_epi16(((__m128i *)buff[i])[j],4);
-#endif
-#elif defined(__arm__)
+        if ((((uintptr_t) buff[i])&0x1F)==0) {
+          buff_tx[i][j] = simde_mm256_slli_epi16(((__m256i *)buff[i])[j],4);
+        }
+        else 
+        {
+          __m256i tmp = simde_mm256_loadu_si256(((__m256i *)buff[i])+j);
+          buff_tx[i][j] = simde_mm256_slli_epi16(tmp,4);
+        }
+#elif defined(__arm__) || defined(__aarch64__)
         buff_tx[i][j] = vshlq_n_s16(((int16x8_t *)buff[i])[j],4);
 #endif
       }
@@ -561,14 +553,9 @@ void *trx_usrp_write_thread(void * arg){
     }*/
 
     #if defined(__x86_64) || defined(__i386__)
-      #ifdef __AVX2__
         nsamps2 = (nsamps+7)>>3;
         __m256i buff_tx[cc<2?2:cc][nsamps2];
-      #else
-        nsamps2 = (nsamps+3)>>2;
-        __m128i buff_tx[cc<2?2:cc][nsamps2];
-      #endif
-    #elif defined(__arm__)
+    #elif defined(__arm__) || defined(__aarch64__)
       nsamps2 = (nsamps+3)>>2;
       int16x8_t buff_tx[cc<2?2:cc][nsamps2];
     #else
@@ -579,12 +566,15 @@ void *trx_usrp_write_thread(void * arg){
     for (int i=0; i<cc; i++) {
       for (int j=0; j<nsamps2; j++) {
         #if defined(__x86_64__) || defined(__i386__)
-          #ifdef __AVX2__
-            buff_tx[i][j] = _mm256_slli_epi16(((__m256i *)buff[i])[j],4);
-          #else
-            buff_tx[i][j] = _mm_slli_epi16(((__m128i *)buff[i])[j],4);
-          #endif
-        #elif defined(__arm__)
+            if ((((uintptr_t) buff[i])&0x1F)==0) {
+              buff_tx[i][j] = simde_mm256_slli_epi16(((__m256i *)buff[i])[j],4);
+            }
+            else
+            {
+              __m256i tmp = simde_mm256_loadu_si256(((__m256i *)buff[i])+j);
+              buff_tx[i][j] = simde_mm256_slli_epi16(tmp,4);
+            }
+        #elif defined(__arm__) || defined(__aarch64__)
           buff_tx[i][j] = vshlq_n_s16(((int16x8_t *)buff[i])[j],4);
         #endif
       }
@@ -675,14 +665,9 @@ static int trx_usrp_read(openair0_device *device, openair0_timestamp *ptimestamp
   int samples_received=0;
   int nsamps2;  // aligned to upper 32 or 16 byte boundary
 #if defined(__x86_64) || defined(__i386__)
-#ifdef __AVX2__
   nsamps2 = (nsamps+7)>>3;
   __m256i buff_tmp[cc<2 ? 2 : cc][nsamps2];
-#else
-  nsamps2 = (nsamps+3)>>2;
-  __m128i buff_tmp[cc<2 ? 2 : cc][nsamps2];
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
   nsamps2 = (nsamps+3)>>2;
   int16x8_t buff_tmp[cc<2 ? 2 : cc][nsamps2];
 #endif
@@ -727,22 +712,18 @@ static int trx_usrp_read(openair0_device *device, openair0_timestamp *ptimestamp
 
   // bring RX data into 12 LSBs for softmodem RX
   for (int i=0; i<cc; i++) {
-
+    for (int j=0; j<nsamps2; j++) {
 #if defined(__x86_64__) || defined(__i386__)
-#ifdef __AVX2__
+      // FK: in some cases the buffer might not be 32 byte aligned, so we cannot use avx2
 
       if ((((uintptr_t) buff[i])&0x1F)==0) {
-        for (int j=0; j<nsamps2; j++) 
-           ((__m256i *)buff[i])[j] = _mm256_srai_epi16(buff_tmp[i][j],rxshift);
+        ((__m256i *)buff[i])[j] = simde_mm256_srai_epi16(buff_tmp[i][j],rxshift);
       } else {
-        for (int j=0; j<(nsamps2<<1); j++) 
-          ((__m128i *)buff[i])[j]  = _mm_srai_epi16(((__m128i *)buff_tmp[i])[j],rxshift);
+        __m256i tmp = simde_mm256_srai_epi16(buff_tmp[i][j],rxshift);
+        simde_mm256_storeu_si256(((__m256i *)buff[i])+j, tmp);
       }
-#else    
-      for (int j=0; j<nsamps2; j++) 
-        ((__m128i *)buff[i])[j] = _mm_srai_epi16(buff_tmp[i][j],rxshift);
-#endif
-#elif defined(__arm__)
+    }
+#elif defined(__arm__) || defined(__aarch64__)
       for (int j=0; j<nsamps2; j++) 
         ((int16x8_t *)buff[i])[j] = vshrq_n_s16(buff_tmp[i][j],rxshift);
 #endif
@@ -778,7 +759,9 @@ static int trx_usrp_read(openair0_device *device, openair0_timestamp *ptimestamp
       memcpy(hdr+1, buff[0], nsamps*4);
       recPlay->currentPtr+=sizeof(iqrec_t)+nsamps*4;
       recPlay->nbSamplesBlocks++;
+#if 0 // BMC: this is too verbose      
       LOG_D(HW,"recorded %d samples, for TS %lu, shift in buffer %ld\n", nsamps, hdr->ts, recPlay->currentPtr-(uint8_t *)recPlay->ms_sample);
+#endif      
     } else
       exit_function(__FILE__, __FUNCTION__, __LINE__,"Recording reaches max iq limit\n");
   }
@@ -1048,7 +1031,7 @@ extern "C" {
     sscanf(uhd::get_version_string().c_str(),"%d.%d.%d",&vers,&subvers,&subsubvers);
     LOG_I(HW,"UHD version %s (%d.%d.%d)\n",
           uhd::get_version_string().c_str(),vers,subvers,subsubvers);
-    std::string args;
+    std::string args,tx_subdev,rx_subdev;
 
     if (openair0_cfg[0].sdr_addrs == NULL) {
       args = "type=b200";
@@ -1169,12 +1152,16 @@ extern "C" {
       LOG_I(HW,"USRP fails to sync with GPS. Exiting.\n");
       exit(EXIT_FAILURE);
     }
-  } else if (s->usrp->get_clock_source(0) == "external") {
-    if (check_ref_locked(s,0)) {
-      LOG_I(HW,"USRP locked to external reference!\n");
-    } else {
-      LOG_I(HW,"Failed to lock to external reference. Exiting.\n");
-      exit(EXIT_FAILURE);
+  } else {
+    s->usrp->set_time_next_pps(uhd::time_spec_t(0.0));
+ 
+    if (s->usrp->get_clock_source(0) == "external") {
+      if (check_ref_locked(s,0)) {
+	LOG_I(HW,"USRP locked to external reference!\n");
+      } else {
+	LOG_I(HW,"Failed to lock to external reference. Exiting.\n");
+	exit(EXIT_FAILURE);
+      }
     }
   }
 
@@ -1198,6 +1185,12 @@ extern "C" {
     LOG_I(HW,"%s() sample_rate:%u\n", __FUNCTION__, (int)openair0_cfg[0].sample_rate);
 
     switch ((int)openair0_cfg[0].sample_rate) {
+      case 184320000:
+        // from usrp_time_offset
+        //openair0_cfg[0].samples_per_packet    = 2048;
+        openair0_cfg[0].tx_sample_advance     = 15; //to be checked
+        break;
+
       case 122880000:
         // from usrp_time_offset
         //openair0_cfg[0].samples_per_packet    = 2048;
@@ -1210,8 +1203,8 @@ extern "C" {
         // from usrp_time_offset
         //openair0_cfg[0].samples_per_packet    = 2048;
         openair0_cfg[0].tx_sample_advance     = 15; //to be checked
-        openair0_cfg[0].tx_bw                 = 80e6;
-        openair0_cfg[0].rx_bw                 = 80e6;
+        //openair0_cfg[0].tx_bw                 = 80e6;
+        //openair0_cfg[0].rx_bw                 = 80e6;
         break;
 
       case 61440000:
@@ -1237,6 +1230,13 @@ extern "C" {
         openair0_cfg[0].rx_bw                 = 20e6;
         break;
 
+      case 23040000:
+        //openair0_cfg[0].samples_per_packet    = 2048;
+        openair0_cfg[0].tx_sample_advance     = 15;
+        openair0_cfg[0].tx_bw                 = 20e6;
+        openair0_cfg[0].rx_bw                 = 20e6;
+        break;
+
       case 15360000:
         //openair0_cfg[0].samples_per_packet    = 2048;
         openair0_cfg[0].tx_sample_advance     = 45;
@@ -1338,6 +1338,18 @@ extern "C" {
   openair0_cfg[0].iq_txshift = 4;//shift
   openair0_cfg[0].iq_rxrescale = 15;//rescale iqs
 
+  if(openair0_cfg[0].tx_subdev!=NULL){
+    LOG_I(HW, "openair0_cfg[0].tx_subdev == %s\n", openair0_cfg[0].tx_subdev);
+    tx_subdev = openair0_cfg[0].tx_subdev;
+    s->usrp->set_tx_subdev_spec(tx_subdev);
+  }
+
+  if(openair0_cfg[0].rx_subdev!=NULL){
+    LOG_I(HW, "openair0_cfg[0].rx_subdev == %s\n", openair0_cfg[0].rx_subdev);
+    rx_subdev = openair0_cfg[0].rx_subdev;
+    s->usrp->set_rx_subdev_spec(rx_subdev);
+  }
+
   for(int i=0; i<((int) s->usrp->get_rx_num_channels()); i++) {
     if (i<openair0_cfg[0].rx_num_channels) {
       s->usrp->set_rx_rate(openair0_cfg[0].sample_rate,i+choffset);
@@ -1383,7 +1395,7 @@ extern "C" {
   LOG_I(HW,"Actual master clock: %fMHz...\n",s->usrp->get_master_clock_rate()/1e6);
   LOG_I(HW,"Actual clock source %s...\n",s->usrp->get_clock_source(0).c_str());
   LOG_I(HW,"Actual time source %s...\n",s->usrp->get_time_source(0).c_str());
-   sleep(1);
+
   // create tx & rx streamer
   uhd::stream_args_t stream_args_rx("sc16", "sc16");
   int samples=openair0_cfg[0].sample_rate;
@@ -1437,6 +1449,7 @@ extern "C" {
     LOG_I(HW,"  Actual TX packet size: %lu\n",s->tx_stream->get_max_num_samps());
   }
 
+  std::cout << boost::format("Using Device: %s") % s->usrp->get_pp_string() << std::endl;
   LOG_I(HW,"Device timestamp: %f...\n", s->usrp->get_time_now().get_real_secs());
   device->trx_write_func = trx_usrp_write;
   device->trx_read_func  = trx_usrp_read;
diff --git a/targets/ARCH/iqplayer/DOC/iqrecordplayer_usage.md b/sdr/iqplayer/DOC/iqrecordplayer_usage.md
similarity index 98%
rename from targets/ARCH/iqplayer/DOC/iqrecordplayer_usage.md
rename to sdr/iqplayer/DOC/iqrecordplayer_usage.md
index 6ccdca3bfa5bfcb2a6ed40b898d31e19a1a70950..d2507c329184492c36d5c7929813b1c3f1f45044 100644
--- a/targets/ARCH/iqplayer/DOC/iqrecordplayer_usage.md
+++ b/sdr/iqplayer/DOC/iqrecordplayer_usage.md
@@ -134,7 +134,7 @@ options for replay mode are:
 [PHY]I lte-softmodem prach_I0 = 0.0 dB
 [PHY]I ru thread max_I0 18, min_I0 12
 [HW]W ru thread iqplayer device terminating subframes replay  after 5 iteration
-/usr/local/oai/develop/openairinterface5g/targets/ARCH/iqplayer/iqplayer_lib.c:222 trx_iqplayer_read() Exiting OAI softmodem: replay ended, triggering process termination
+/usr/local/oai/develop/openairinterface5g/sdr/iqplayer/iqplayer_lib.c:222 trx_iqplayer_read() Exiting OAI softmodem: replay ended, triggering process termination
 ```
 
 ## iq recorder and iq player implementation overview
diff --git a/targets/ARCH/iqplayer/iqplayer_lib.c b/sdr/iqplayer/iqplayer_lib.c
similarity index 95%
rename from targets/ARCH/iqplayer/iqplayer_lib.c
rename to sdr/iqplayer/iqplayer_lib.c
index f6578d6e9a31707b3b0220465bb47bf7048b1615..8fd3da5ed1f4837d5d0669d5e3c736e7cadb7be5 100644
--- a/targets/ARCH/iqplayer/iqplayer_lib.c
+++ b/sdr/iqplayer/iqplayer_lib.c
@@ -88,7 +88,7 @@ static int iqplayer_loadfile(openair0_device *device, openair0_config_t *openair
     size_t hs = read(s->fd,&fh,sizeof(fh));
 
     if (hs == sizeof(fh)) {
-      parse_iqfile_header(device, &fh);
+        parse_iqfile_header(device, &fh);
         fstat(s->fd, &sb);
         s->mapsize=sb.st_size;
         LOG_I(HW, "Loading %u subframes from %s,size=%lu bytes ...\n",s->nbSamplesBlocks, c->u_sf_filename,(uint64_t)sb.st_size);
@@ -181,10 +181,12 @@ static int trx_iqplayer_write(openair0_device *device, openair0_timestamp timest
 */
 static int trx_iqplayer_read(openair0_device *device, openair0_timestamp *ptimestamp, void **buff, int nsamps, int cc) {
   recplay_state_t *s = device->recplay_state;
-
-  if (s->curSamplesBlock==0 && s->wrap_count==0 ) 
-    s->currentTs=s->ms_sample->ts;
   
+  if (s->curSamplesBlock==0 && s->wrap_count==0) { 
+    s->currentTs=s->ms_sample->ts;
+    LOG_I(HW, "First timestamp=%lu s->nbSamplesBlocks=%u\n", s->currentTs, s->nbSamplesBlocks);
+  }
+
   if (s->curSamplesBlock == s->nbSamplesBlocks) {
     LOG_I(HW, "wrapping on iq file (%ld)\n", s->wrap_count);
     s->curSamplesBlock = 0;
@@ -202,8 +204,6 @@ static int trx_iqplayer_read(openair0_device *device, openair0_timestamp *ptimes
     if (!(device->openair0_cfg->recplay_conf->use_mmap) ) {
       close(device->recplay_state->fd);
       iqplayer_loadfile(device, device->openair0_cfg);
- //       LOG_E(HW, "Problem seeking at the beginning of IQ file %s\n",strerror(errno));
-      
     }
   }
 
@@ -223,10 +223,6 @@ static int trx_iqplayer_read(openair0_device *device, openair0_timestamp *ptimes
   iqrec_t *curHeader=(iqrec_t *)s->currentPtr;
   AssertFatal(curHeader->header==BELL_LABS_IQ_HEADER,"" );
   // the current timestamp is the stored timestamp until we wrap on input
-  // USRP shifts 1 sample time to time
-  if (s->wrap_count !=0 && device->openair0_cfg->recplay_conf->use_mmap)
-    AssertFatal( abs(curHeader->ts-s->currentTs) < 5 ,
-              "wrap_count=%li, ts %lu %lu",s->wrap_count,curHeader->ts,s->currentTs);
   AssertFatal(nsamps*4==curHeader->nbBytes,"");
   *ptimestamp = s->currentTs;
   memcpy(buff[0], curHeader+1, nsamps*4);
@@ -237,6 +233,7 @@ static int trx_iqplayer_read(openair0_device *device, openair0_timestamp *ptimes
   if (device->openair0_cfg->recplay_conf->use_mmap)
     s->currentPtr+=sizeof(iqrec_t)+s->ms_sample->nbBytes;
 
+  // BMC TODO: support 1 second or more subframe read delay
   struct timespec req;
   req.tv_sec = 0;
   req.tv_nsec = (device->openair0_cfg[0].recplay_conf->u_sf_read_delay) * 1000;
diff --git a/targets/ARCH/rfsimulator/README.md b/sdr/rfsimulator/README.md
similarity index 100%
rename from targets/ARCH/rfsimulator/README.md
rename to sdr/rfsimulator/README.md
diff --git a/targets/ARCH/rfsimulator/apply_channelmod.c b/sdr/rfsimulator/apply_channelmod.c
similarity index 99%
rename from targets/ARCH/rfsimulator/apply_channelmod.c
rename to sdr/rfsimulator/apply_channelmod.c
index 4ff5321e4d973572ce25026fc908ad35e4c8fc5b..5cb16803bbf3ee4d4b0fe28fff76d4420abdf513 100644
--- a/targets/ARCH/rfsimulator/apply_channelmod.c
+++ b/sdr/rfsimulator/apply_channelmod.c
@@ -36,7 +36,7 @@
 #include <openair1/SIMULATION/TOOLS/sim.h>
 #include <common/utils/telnetsrv/telnetsrv.h>
 #include <common/utils/load_module_shlib.h>
-#include <targets/ARCH/rfsimulator/rfsimulator.h>
+#include <sdr/rfsimulator/rfsimulator.h>
 
 /*
   Legacy study:
diff --git a/targets/ARCH/rfsimulator/new_channel_sim.c b/sdr/rfsimulator/new_channel_sim.c
similarity index 98%
rename from targets/ARCH/rfsimulator/new_channel_sim.c
rename to sdr/rfsimulator/new_channel_sim.c
index 0501babd2004df7b92b13b7c1e0adb919b57ec4f..b5a10bc61cb6161aa55d528961acfcf65d5a8231 100644
--- a/targets/ARCH/rfsimulator/new_channel_sim.c
+++ b/sdr/rfsimulator/new_channel_sim.c
@@ -34,7 +34,7 @@
 #include <common/utils/LOG/log.h>
 #include <common/config/config_userapi.h>
 #include <openair1/SIMULATION/TOOLS/sim.h>
-#include <targets/ARCH/rfsimulator/rfsimulator.h>
+#include <sdr/rfsimulator/rfsimulator.h>
 
 // Ziggurat 
 static double wn[128],fn[128];
diff --git a/targets/ARCH/rfsimulator/rfsimulator.h b/sdr/rfsimulator/rfsimulator.h
similarity index 100%
rename from targets/ARCH/rfsimulator/rfsimulator.h
rename to sdr/rfsimulator/rfsimulator.h
diff --git a/targets/ARCH/rfsimulator/simulator.c b/sdr/rfsimulator/simulator.c
similarity index 99%
rename from targets/ARCH/rfsimulator/simulator.c
rename to sdr/rfsimulator/simulator.c
index fb16796cab6a759a2931ffef4b00f2ac3f123c59..a395f3ca8561617d3b699708bf573dfaf37aee20 100644
--- a/targets/ARCH/rfsimulator/simulator.c
+++ b/sdr/rfsimulator/simulator.c
@@ -51,7 +51,7 @@
 #include "openair1/PHY/defs_UE.h"
 #define CHANNELMOD_DYNAMICLOAD
 #include <openair1/SIMULATION/TOOLS/sim.h>
-#include <targets/ARCH/rfsimulator/rfsimulator.h>
+#include <sdr/rfsimulator/rfsimulator.h>
 
 #define PORT 4043 //default TCP port for this simulator
 #define CirSize 6144000 // 100ms is enough
diff --git a/targets/ARCH/rfsimulator/stored_node.c b/sdr/rfsimulator/stored_node.c
similarity index 100%
rename from targets/ARCH/rfsimulator/stored_node.c
rename to sdr/rfsimulator/stored_node.c
diff --git a/targets/ARCH/ETHERNET/USERSPACE/LIB/Makefile.inc b/targets/ARCH/ETHERNET/USERSPACE/LIB/Makefile.inc
deleted file mode 100644
index 6e58738c4ffb4499cdec01d2f452b72f1e39377d..0000000000000000000000000000000000000000
--- a/targets/ARCH/ETHERNET/USERSPACE/LIB/Makefile.inc
+++ /dev/null
@@ -1,4 +0,0 @@
-ETHERNET_OBJ += $(OPENAIR_TARGETS)/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.o
-ETHERNET_FILE_OBJ += $(OPENAIR_TARGETS)/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.c
-ETHERNET_CFLAGS += -O2 -I$(OPENAIR_TARGETS)/ARCH/COMMON -I$(OPENAIR_TARGETS)/ARCH/ETHERNET/USERSPACE/LIB/ -I$(OPENAIR_TARGETS)/COMMON
-
diff --git a/targets/ARCH/IRIS/USERSPACE/LIB/Makefile.inc b/targets/ARCH/IRIS/USERSPACE/LIB/Makefile.inc
deleted file mode 100644
index b79e3cec746031f7e72e56af734d170bb0ce7da7..0000000000000000000000000000000000000000
--- a/targets/ARCH/IRIS/USERSPACE/LIB/Makefile.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-IRIS_OBJ += $(OPENAIR_TARGETS)/ARCH/IRIS/USERSPACE/LIB/iris_lib.o
-IRIS_FILE_OBJ += $(OPENAIR_TARGETS)/ARCH/IRIS/USERSPACE/LIB/iris_lib.cpp
-IRIS_CFLAGS += -I$(OPENAIR_TARGETS)/ARCH/COMMON -I$(OPENAIR_TARGETS)/ARCH/IRIS/USERSPACE/LIB/ -I$(OPENAIR_TARGETS)/COMMON
diff --git a/targets/DOCS/Doxyfile b/targets/DOCS/Doxyfile
index 68c76f93fb6b31f142e8d8a23fba92744df3ec43..249afce0ff4c96452de28b8122d92400c545afd9 100644
--- a/targets/DOCS/Doxyfile
+++ b/targets/DOCS/Doxyfile
@@ -792,11 +792,11 @@ INPUT                  = $(OPENAIR1_DIR)/PHY/defs.h \
                          $(OPENAIR2_DIR)/LAYER2/RLC/UM_v9.3.0/rlc_um.h \
                          $(OPENAIR2_DIR)/LAYER2/RLC/UM_v9.3.0/rlc_um_entity.h \
                          $(OPENAIR2_DIR)/NETWORK_DRIVER/MESH/proto_extern.h \
-                         $(OPENAIR_TARGETS)/ARCH/COMMON/common_lib.h \
-                         $(OPENAIR_TARGETS)/ARCH/USRP/USERSPACE/LIB/usrp_lib.cpp \
-                         $(OPENAIR_TARGETS)/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.c \
-                         $(OPENAIR_TARGETS)/ARCH/BLADERF/USERSPACE/LIB/bladerf_lib.h \
-                         $(OPENAIR_TARGETS)/ARCH/LMSSDR/USERSPACE/LIB/lms_lib.cpp
+                         $(OPENAIR_DIR)/sdr/COMMON/common_lib.h \
+                         $(OPENAIR_DIR)/sdr/USRP/USERSPACE/LIB/usrp_lib.cpp \
+                         $(OPENAIR_DIR)/sdr/BLADERF/USERSPACE/LIB/bladerf_lib.c \
+                         $(OPENAIR_DIR)/sdr/BLADERF/USERSPACE/LIB/bladerf_lib.h \
+                         $(OPENAIR_DIR)/sdr/LMSSDR/USERSPACE/LIB/lms_lib.cpp
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band261.fr2.64PRB.prs.usrpx310.conf b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band261.fr2.64PRB.prs.usrpx310.conf
new file mode 100644
index 0000000000000000000000000000000000000000..4303f5627008c0f21518cc1d62b4fb9823af2d0b
--- /dev/null
+++ b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band261.fr2.64PRB.prs.usrpx310.conf
@@ -0,0 +1,290 @@
+Active_gNBs = ( "gNB-Eurecom-5GNRBox");
+# Asn1_verbosity, choice in: none, info, annoying
+Asn1_verbosity = "none";
+
+gNBs =
+(
+ {
+    ////////// Identification parameters:
+    gNB_ID    =  0xe00;
+
+    cell_type =  "CELL_MACRO_GNB";
+
+    gNB_name  =  "gNB-Eurecom-5GNRBox";
+
+    // Tracking area code, 0x0000 and 0xfffe are reserved values
+    tracking_area_code  =  1;
+
+    plmn_list = ({mcc = 311; mnc = 480; mnc_length = 3;});	 
+
+    tr_s_preference     = "local_mac"
+
+    ////////// Physical parameters:
+
+    min_rxtxtime                                              = 6;
+	
+    servingCellConfigCommon = (
+    {
+# spCellConfigCommon
+
+      physCellId                                                    = 0;
+
+# downlinkConfigCommon
+    #frequencyInfoDL
+      # this is pointA + 23 PRBs@120kHz SCS (same as initial BWP)
+      absoluteFrequencySSB                                             = 2071241;
+      dl_frequencyBand                                                 = 261;
+      # this is 27.900 GHz
+      dl_absoluteFrequencyPointA                                       = 2070833;
+      #scs-SpecificCarrierList
+        dl_offstToCarrier                                              = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+        dl_subcarrierSpacing                                           = 3;
+        dl_carrierBandwidth                                            = 64;
+     #initialDownlinkBWP
+      #genericParameters
+        # this is RBstart=0,L=32 (275*(L-1))+RBstart
+        initialDLBWPlocationAndBandwidth                               = 8525;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+        initialDLBWPsubcarrierSpacing                                  = 3;
+      #pdcch-ConfigCommon
+        initialDLBWPcontrolResourceSetZero                             = 12;
+        initialDLBWPsearchSpaceZero                                    = 0;
+
+  #uplinkConfigCommon 
+     #frequencyInfoUL
+      ul_frequencyBand                                              = 261;
+      #scs-SpecificCarrierList
+      ul_offstToCarrier                                             = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+      ul_subcarrierSpacing                                          = 3;
+      ul_carrierBandwidth                                           = 64;
+      pMax                                                          = 20;
+     #initialUplinkBWP
+      #genericParameters
+        initialULBWPlocationAndBandwidth                            = 8525;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+        initialULBWPsubcarrierSpacing                               = 3;
+      #rach-ConfigCommon
+        #rach-ConfigGeneric
+          prach_ConfigurationIndex                                  = 52;
+#prach_msg1_FDM
+#0 = one, 1=two, 2=four, 3=eight
+          prach_msg1_FDM                                            = 0;
+          prach_msg1_FrequencyStart                                 = 0;
+          zeroCorrelationZoneConfig                                 = 13;
+          preambleReceivedTargetPower                               = -118;
+#preamblTransMax (0...10) = (3,4,5,6,7,8,10,20,50,100,200)
+          preambleTransMax                                          = 6;
+#powerRampingStep
+# 0=dB0,1=dB2,2=dB4,3=dB6
+        powerRampingStep                                            = 1;
+#ra_ReponseWindow
+#1,2,4,8,10,20,40,80
+        ra_ResponseWindow                                           = 7;
+#ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR
+#0=oneeighth,1=onefourth,2=half,3=one,4=two,5=four,6=eight,7=sixteen        ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR                = 4;
+#oneHalf (0..15) 4,8,12,16,...60,64
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB                   = 7;
+#ra_ContentionResolutionTimer
+#(0..7) 8,16,24,32,40,48,56,64
+        ra_ContentionResolutionTimer                                = 7;
+        rsrp_ThresholdSSB                                           = 19;
+#prach-RootSequenceIndex_PR
+#1 = 839, 2 = 139
+        prach_RootSequenceIndex_PR                                  = 2;
+        prach_RootSequenceIndex                                     = 1;
+        # SCS for msg1, can only be 15 for 30 kHz < 6 GHz, takes precendence over the one derived from prach-ConfigIndex
+        #  
+        msg1_SubcarrierSpacing                                      = 3,
+
+# restrictedSetConfig
+# 0=unrestricted, 1=restricted type A, 2=restricted type B
+        restrictedSetConfig                                         = 0,
+
+        msg3_DeltaPreamble                                          = 1;
+        p0_NominalWithGrant                                         =-90;
+
+# pucch-ConfigCommon setup :
+# pucchGroupHopping
+# 0 = neither, 1= group hopping, 2=sequence hopping
+        pucchGroupHopping                                           = 0;
+        hoppingId                                                   = 40;
+        p0_nominal                                                  = -90;
+# ssb_PositionsInBurs_BitmapPR
+# 1=short, 2=medium, 3=long
+      ssb_PositionsInBurst_PR                                       = 3;
+      ssb_PositionsInBurst_Bitmap                                   = 0x0001000100010001L;
+
+# ssb_periodicityServingCell
+# 0 = ms5, 1=ms10, 2=ms20, 3=ms40, 4=ms80, 5=ms160, 6=spare2, 7=spare1 
+      ssb_periodicityServingCell                                    = 1;
+
+# dmrs_TypeA_position
+# 0 = pos2, 1 = pos3
+      dmrs_TypeA_Position                                           = 0;
+
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+      subcarrierSpacing                                             = 3;
+
+
+  #tdd-UL-DL-ConfigurationCommon
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+      referenceSubcarrierSpacing                                    = 3;
+      # pattern1 
+      # dl_UL_TransmissionPeriodicity
+      # 0=ms0p5, 1=ms0p625, 2=ms1, 3=ms1p25, 4=ms2, 5=ms2p5, 6=ms5, 7=ms10
+      dl_UL_TransmissionPeriodicity                                 = 3;
+      nrofDownlinkSlots                                             = 7;
+      nrofDownlinkSymbols                                           = 6;
+      nrofUplinkSlots                                               = 2;
+      nrofUplinkSymbols                                             = 4;
+
+      ssPBCH_BlockPower                                             = 10;
+  }
+
+  );
+
+
+    # ------- SCTP definitions
+    SCTP :
+    {
+        # Number of streams to use in input/output
+        SCTP_INSTREAMS  = 2;
+        SCTP_OUTSTREAMS = 2;
+    };
+
+
+    ////////// MME parameters:
+    mme_ip_address      = ( { ipv4       = "192.168.18.99";
+                              ipv6       = "192:168:30::17";
+                              active     = "yes";
+                              preference = "ipv4";
+                            }
+                          );
+
+    ///X2
+    enable_x2 = "no";
+    t_reloc_prep      = 1000;      /* unit: millisecond */
+    tx2_reloc_overall = 2000;      /* unit: millisecond */
+    t_dc_prep         = 1000;      /* unit: millisecond */
+    t_dc_overall      = 2000;      /* unit: millisecond */
+    target_enb_x2_ip_address      = (
+                                     { ipv4       = "192.168.18.199";
+                                       ipv6       = "192:168:30::17";
+                                       preference = "ipv4";
+                                     }
+                                    );
+
+    NETWORK_INTERFACES :
+    {
+
+        GNB_INTERFACE_NAME_FOR_S1_MME            = "eth0";
+        GNB_IPV4_ADDRESS_FOR_S1_MME              = "192.168.18.198/24";
+        GNB_INTERFACE_NAME_FOR_S1U               = "eth0";
+        GNB_IPV4_ADDRESS_FOR_S1U                 = "192.168.18.198/24";
+        GNB_PORT_FOR_S1U                         = 2152; # Spec 2152
+        GNB_IPV4_ADDRESS_FOR_X2C                 = "192.168.18.198/24";
+        GNB_PORT_FOR_X2C                         = 36422; # Spec 36422
+    };
+  }
+);
+
+MACRLCs = (
+{
+  num_cc                      = 1;
+  tr_s_preference             = "local_L1";
+  tr_n_preference             = "local_RRC";
+  pusch_TargetSNRx10          = 200;
+  pucch_TargetSNRx10          = 200;
+}
+);
+
+prs_config = (
+{
+  NumPRSResources       = 8;
+  PRSResourceSetPeriod  = [80, 2];
+  SymbolStart           = [8,8,8,8,8,8,8,8];
+  NumPRSSymbols         = [6,6,6,6,6,6,6,6];
+  NumRB                 = 64;
+  RBOffset              = 0;
+  CombSize              = 4;
+  REOffset              = [0,0,0,0,0,0,0,0];
+  PRSResourceOffset     = [0,10,20,30,40,50,60,70];
+  PRSResourceRepetition = 1;
+  PRSResourceTimeGap    = 1;
+  NPRS_ID               = [0,1,2,3,4,5,6,7];
+  MutingPattern1        = [];
+  MutingPattern2        = [];
+  MutingBitRepetition   = 1;
+}
+);
+
+L1s = (
+{
+  num_cc = 1;
+  tr_n_preference     = "local_mac";
+  ofdm_offset_divisor = 8; #set this to UINT_MAX for offset 0
+}
+);
+
+RUs = (
+{		  
+  local_rf       = "yes"
+  nb_tx          = 1;
+  nb_rx          = 1;
+  att_tx         = 0;
+  att_rx         = 0;
+  bands          = [7];
+  sl_ahead       = 12;
+  max_pdschReferenceSignalPower = -27;
+  max_rxgain                    = 75;
+  eNB_instances  = [0];
+  sdr_addrs = "addr=192.168.10.2,second_addr=192.168.20.2,clock_source=gpsdo,time_source=gpsdo";
+  if_freq   = 50000000L;
+}
+);  
+
+THREAD_STRUCT = (
+{
+  #three config for level of parallelism "PARALLEL_SINGLE_THREAD", "PARALLEL_RU_L1_SPLIT", or "PARALLEL_RU_L1_TRX_SPLIT"
+  parallel_config    = "PARALLEL_RU_L1_TRX_SPLIT";
+  #two option for worker "WORKER_DISABLE" or "WORKER_ENABLE"
+  worker_config      = "WORKER_ENABLE";
+}
+);
+
+security = {
+  # preferred ciphering algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nea0, nea1, nea2, nea3
+  ciphering_algorithms = ( "nea0" );
+
+  # preferred integrity algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nia0, nia1, nia2, nia3
+  integrity_algorithms = ( "nia2", "nia0" );
+
+  # setting 'drb_ciphering' to "no" disables ciphering for DRBs, no matter
+  # what 'ciphering_algorithms' configures; same thing for 'drb_integrity'
+  drb_ciphering = "yes";
+  drb_integrity = "no";
+};
+
+log_config :
+{
+  global_log_level                      ="info";
+  hw_log_level                          ="info";
+  phy_log_level                         ="info";
+  mac_log_level                         ="info";
+  rlc_log_level                         ="info";
+  pdcp_log_level                        ="info";
+  rrc_log_level                         ="info";
+};
+
diff --git a/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band78.fr1.106PRB.prs.usrpx310.conf b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band78.fr1.106PRB.prs.usrpx310.conf
new file mode 100644
index 0000000000000000000000000000000000000000..129056d1916fd30fbd4cd5c51ad6a55f29f6e6a7
--- /dev/null
+++ b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb0.sa.band78.fr1.106PRB.prs.usrpx310.conf
@@ -0,0 +1,334 @@
+Active_gNBs = ( "gNB-OAI");
+# Asn1_verbosity, choice in: none, info, annoying
+Asn1_verbosity = "none";
+
+gNBs =
+(
+ {
+    ////////// Identification parameters:
+    gNB_CU_ID = 0xe00;
+
+#     cell_type =  "CELL_MACRO_GNB";
+
+    gNB_name  =  "gNB-OAI";
+
+    // Tracking area code, 0x0000 and 0xfffe are reserved values
+    tracking_area_code  =  1;
+
+    plmn_list = ({
+                  mcc = 208;
+                  mnc = 97;
+                  mnc_length = 2;
+                  snssaiList = (
+                                {
+                                  sst = 1;
+                                  sd  = 0x010203; // 0 false, else true
+                                },
+                                                                                                                                                                   {
+                                  sst = 1;
+                                  sd  = 0x112233; // 0 false, else true
+                                }
+                               );
+                 });
+
+    nr_cellid = 12345678L
+
+#     tr_s_preference     = "local_mac"
+
+    ////////// Physical parameters:
+
+    #pusch_TargetSNRx10                                        = 200;
+    #pucch_TargetSNRx10                                        = 200;
+    ul_prbblacklist                                           = "51,52,53,54"
+    min_rxtxtime                                              = 6;
+    do_SRS                                                    = 0;
+
+    pdcch_ConfigSIB1 = (
+      {
+        controlResourceSetZero = 11;
+        searchSpaceZero        = 0;
+      }
+    );
+
+    servingCellConfigCommon = (
+    {
+ #spCellConfigCommon
+
+      physCellId                                                    = 0;
+
+# downlinkConfigCommon
+    #frequencyInfoDL
+      # this is 3301.68 MHz + 22*12*30e-3 MHz = 3309.6
+      #absoluteFrequencySSB                                          = 620640;
+      # this is 3300.60 MHz + 53*12*30e-3 MHz = 3319.68
+      absoluteFrequencySSB                                           = 621312;
+      # this is 3503.28 MHz + 22*12*30e-3 MHz = 3511.2
+      #absoluteFrequencySSB                                          = 634080;
+      # this is 3600.48 MHz
+      #absoluteFrequencySSB                                          = 640032;
+      #dl_frequencyBand                                              = 78;
+      # this is 3301.68 MHz
+      #dl_absoluteFrequencyPointA                                    = 620112;
+      # this is 3300.60 MHz
+      dl_absoluteFrequencyPointA                                     = 620040;
+      # this is 3502.56 MHz
+      #dl_absoluteFrequencyPointA                                    = 633552;
+      # this is 3600.48 MHz
+      #dl_absoluteFrequencyPointA                                    = 640032;
+      #scs-SpecificCarrierList
+      dl_offstToCarrier                                              = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        dl_subcarrierSpacing                                         = 1;
+        dl_carrierBandwidth                                          = 106;
+     #initialDownlinkBWP
+      #genericParameters
+        # this is RBstart=0,L=106 (275*(L-1))+RBstart
+        initialDLBWPlocationAndBandwidth                             = 28875;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        initialDLBWPsubcarrierSpacing                                = 1;
+      #pdcch-ConfigCommon
+        initialDLBWPcontrolResourceSetZero                           = 11;
+        initialDLBWPsearchSpaceZero                                  = 0;
+
+
+
+#uplinkConfigCommon
+     #frequencyInfoUL
+      ul_frequencyBand                                               = 78;
+      #scs-SpecificCarrierList
+      ul_offstToCarrier                                              = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      ul_subcarrierSpacing                                           = 1;
+      ul_carrierBandwidth                                            = 106;
+      pMax                                                           = 20;
+     #initialUplinkBWP
+      #genericParameters
+        initialULBWPlocationAndBandwidth                             = 28875;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        initialULBWPsubcarrierSpacing                                = 1;
+      #rach-ConfigCommon
+        #rach-ConfigGeneric
+          prach_ConfigurationIndex                                   = 98;
+#prach_msg1_FDM
+#0 = one, 1=two, 2=four, 3=eight
+          prach_msg1_FDM                                             = 0;
+          prach_msg1_FrequencyStart                                  = 0;
+          zeroCorrelationZoneConfig                                  = 12;
+          preambleReceivedTargetPower                                = -104;
+#preamblTransMax (0...10) = (3,4,5,6,7,8,10,20,50,100,200)
+          preambleTransMax                                           = 6;
+#powerRampingStep
+# 0=dB0,1=dB2,2=dB4,3=dB6
+        powerRampingStep                                             = 1;
+#ra_ReponseWindow
+#1,2,4,8,10,20,40,80
+        ra_ResponseWindow                                            = 4;
+#ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR
+#1=oneeighth,2=onefourth,3=half,4=one,5=two,6=four,7=eight,8=sixteen 
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR                 = 3;
+#oneHalf (0..15) 4,8,12,16,...60,64
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB                    = 15;
+#ra_ContentionResolutionTimer
+#(0..7) 8,16,24,32,40,48,56,64
+        ra_ContentionResolutionTimer                                 = 7;
+        rsrp_ThresholdSSB                                            = 19;
+#prach-RootSequenceIndex_PR
+#1 = 839, 2 = 139
+        prach_RootSequenceIndex_PR                                   = 2;
+        prach_RootSequenceIndex                                      = 1;
+        # SCS for msg1, can only be 15 for 30 kHz < 6 GHz, takes precendence over the one derived from prach-ConfigIndex
+        #
+        msg1_SubcarrierSpacing                                       = 1,
+# restrictedSetConfig
+# 0=unrestricted, 1=restricted type A, 2=restricted type B
+        restrictedSetConfig                                          = 0,
+
+        msg3_DeltaPreamble                                           = 1;
+        p0_NominalWithGrant                                          =-90;
+
+# pucch-ConfigCommon setup :
+# pucchGroupHopping
+# 0 = neither, 1= group hopping, 2=sequence hopping
+        pucchGroupHopping                                            = 0;
+        hoppingId                                                    = 40;
+        p0_nominal                                                   = -90;
+# ssb_PositionsInBurs_BitmapPR
+# 1=short, 2=medium, 3=long
+      ssb_PositionsInBurst_PR                                        = 2;
+      ssb_PositionsInBurst_Bitmap                                    = 1;
+
+# ssb_periodicityServingCell
+# 0 = ms5, 1=ms10, 2=ms20, 3=ms40, 4=ms80, 5=ms160, 6=spare2, 7=spar e1
+      ssb_periodicityServingCell                                     = 2;
+
+# dmrs_TypeA_position
+# 0 = pos2, 1 = pos3
+      dmrs_TypeA_Position                                            = 0;
+
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      subcarrierSpacing                                              = 1;
+
+
+#tdd-UL-DL-ConfigurationCommon
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      referenceSubcarrierSpacing                                     = 1;
+      # pattern1
+      # dl_UL_TransmissionPeriodicity
+      # 0=ms0p5, 1=ms0p625, 2=ms1, 3=ms1p25, 4=ms2, 5=ms2p5, 6=ms5, 7=ms10
+      dl_UL_TransmissionPeriodicity                                  = 6;
+      nrofDownlinkSlots                                              = 7;
+      nrofDownlinkSymbols                                            = 6;
+      nrofUplinkSlots                                                = 2;
+      nrofUplinkSymbols                                              = 4;
+
+      ssPBCH_BlockPower                                              = -25;
+  }
+
+  );
+
+
+    # ------- SCTP definitions
+    SCTP :
+    {
+        # Number of streams to use in input/output
+        SCTP_INSTREAMS  = 2;
+        SCTP_OUTSTREAMS = 2;
+    };
+
+    ////////// AMF parameters:
+        amf_ip_address      = ( { ipv4       = "CI_MME_IP_ADDR";
+                                  ipv6       = "192:168:30::17";
+                                  active     = "yes";
+                                  preference = "ipv4";
+                                                                                                                              }
+                                                                                                                                          );
+
+        NETWORK_INTERFACES :
+        {
+
+           GNB_INTERFACE_NAME_FOR_NG_AMF            = "em1";
+           GNB_IPV4_ADDRESS_FOR_NG_AMF              = "CI_GNB_IP_ADDR";
+           GNB_INTERFACE_NAME_FOR_NGU               = "em1";
+           GNB_IPV4_ADDRESS_FOR_NGU                 = "CI_GNB_IP_ADDR";
+           GNB_PORT_FOR_S1U                         = 2152; # Spec 2152
+        };
+
+  }
+);
+
+MACRLCs = (
+{
+  num_cc           = 1;
+  tr_s_preference  = "local_L1";
+  tr_n_preference  = "local_RRC";
+  pusch_TargetSNRx10 = 200;
+  pucch_TargetSNRx10 = 200;
+   ulsch_max_frame_inactivity = 1;
+}
+);
+
+prs_config = (
+{
+  NumPRSResources       = 1;
+  PRSResourceSetPeriod  = [20, 2];
+  SymbolStart           = [7];
+  NumPRSSymbols         = [6];
+  NumRB                 = 106;
+  RBOffset              = 0;
+  CombSize              = 4;
+  REOffset              = [0];
+  PRSResourceOffset     = [0];
+  PRSResourceRepetition = 1;
+  PRSResourceTimeGap    = 1;
+  NPRS_ID               = [0];
+  MutingPattern1        = [];
+  MutingPattern2        = [];
+  MutingBitRepetition   = 1;
+}
+);
+
+L1s = (
+{
+  num_cc = 1;
+  tr_n_preference = "local_mac";
+  prach_dtx_threshold = 120;
+  #pucch0_dtx_threshold = 150;
+}
+);
+
+RUs = (
+{
+  local_rf       = "yes"
+  nb_tx          = 1
+  nb_rx          = 1
+  att_tx         = 0
+  att_rx         = 0;
+  bands          = [78];
+  max_pdschReferenceSignalPower = -27;
+  max_rxgain                    = 75;
+  eNB_instances  = [0];
+  ##beamforming 1x2 matrix: 1 layer x 2 antennas
+  bf_weights = [0x00007fff, 0x0000];
+  ##beamforming 1x4 matrix: 1 layer x 4 antennas
+  #bf_weights = [0x00007fff, 0x0000,0x0000, 0x0000];
+  ## beamforming 2x2 matrix:
+  # bf_weights = [0x00007fff, 0x00000000, 0x00000000, 0x00007fff];
+  ## beamforming 4x4 matrix:
+  #bf_weights = [0x00007fff, 0x0000, 0x0000, 0x0000, 0x00000000, 0x00007fff, 0x0000, 0x0000, 0x0000, 0x0000, 0x00007fff, 0x0000, 0x0000, 0x0000, 0x0000, 0x00007fff];
+  sf_extension = 0
+  sdr_addrs = "mgmt_addr=172.21.19.14,addr=192.168.10.2,second_addr=192.168.20.2,clock_source=internal,time_source=internal"
+}
+);
+
+THREAD_STRUCT = (
+{
+  #three config for level of parallelism "PARALLEL_SINGLE_THREAD", "PARALLEL_RU_L1_SPLIT", or "PARALLEL_RU_L1_TRX_SPLIT"
+  parallel_config    = "PARALLEL_SINGLE_THREAD";
+  #two option for worker "WORKER_DISABLE" or "WORKER_ENABLE"
+  worker_config      = "WORKER_ENABLE";
+}
+);
+
+rfsimulator :
+{
+  serveraddr = "server";
+  serverport = "4043";
+  options = (); #("saviq"); or/and "chanmod"
+  modelname = "AWGN";
+  IQfile = "/tmp/rfsimulator.iqs";
+};
+
+security = {
+  # preferred ciphering algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nea0, nea1, nea2, nea3
+  ciphering_algorithms = ( "nea0" );
+
+  # preferred integrity algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nia0, nia1, nia2, nia3
+  integrity_algorithms = ( "nia2", "nia0" );
+
+  # setting 'drb_ciphering' to "no" disables ciphering for DRBs, no matter
+  # what 'ciphering_algorithms' configures; same thing for 'drb_integrity'
+  drb_ciphering = "yes";
+  drb_integrity = "no";
+};
+
+log_config :
+{
+  global_log_level                      ="info";
+  hw_log_level                          ="info";
+  phy_log_level                         ="info";
+  mac_log_level                         ="info";
+  rlc_log_level                         ="info";
+  pdcp_log_level                        ="info";
+  rrc_log_level                         ="info";
+  f1ap_log_level                         ="debug";
+};
diff --git a/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb1.sa.band261.fr2.64PRB.prs.usrpx310.conf b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb1.sa.band261.fr2.64PRB.prs.usrpx310.conf
new file mode 100644
index 0000000000000000000000000000000000000000..e76367686bd3ae1fa87aa1aabd98cdc56d1ddc1e
--- /dev/null
+++ b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb1.sa.band261.fr2.64PRB.prs.usrpx310.conf
@@ -0,0 +1,290 @@
+Active_gNBs = ( "gNB-Eurecom-5GNRBox");
+# Asn1_verbosity, choice in: none, info, annoying
+Asn1_verbosity = "none";
+
+gNBs =
+(
+ {
+    ////////// Identification parameters:
+    gNB_ID    =  0xe00;
+
+    cell_type =  "CELL_MACRO_GNB";
+
+    gNB_name  =  "gNB-Eurecom-5GNRBox";
+
+    // Tracking area code, 0x0000 and 0xfffe are reserved values
+    tracking_area_code  =  1;
+
+    plmn_list = ({mcc = 311; mnc = 480; mnc_length = 3;});	 
+
+    tr_s_preference     = "local_mac"
+
+    ////////// Physical parameters:
+
+    min_rxtxtime                                              = 6;
+	
+    servingCellConfigCommon = (
+    {
+# spCellConfigCommon
+
+      physCellId                                                    = 0;
+
+# downlinkConfigCommon
+    #frequencyInfoDL
+      # this is pointA + 23 PRBs@120kHz SCS (same as initial BWP)
+      absoluteFrequencySSB                                             = 2071241;
+      dl_frequencyBand                                                 = 261;
+      # this is 27.900 GHz
+      dl_absoluteFrequencyPointA                                       = 2070833;
+      #scs-SpecificCarrierList
+        dl_offstToCarrier                                              = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+        dl_subcarrierSpacing                                           = 3;
+        dl_carrierBandwidth                                            = 64;
+     #initialDownlinkBWP
+      #genericParameters
+        # this is RBstart=0,L=32 (275*(L-1))+RBstart
+        initialDLBWPlocationAndBandwidth                               = 8525;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+        initialDLBWPsubcarrierSpacing                                  = 3;
+      #pdcch-ConfigCommon
+        initialDLBWPcontrolResourceSetZero                             = 12;
+        initialDLBWPsearchSpaceZero                                    = 0;
+
+  #uplinkConfigCommon 
+     #frequencyInfoUL
+      ul_frequencyBand                                              = 261;
+      #scs-SpecificCarrierList
+      ul_offstToCarrier                                             = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+      ul_subcarrierSpacing                                          = 3;
+      ul_carrierBandwidth                                           = 64;
+      pMax                                                          = 20;
+     #initialUplinkBWP
+      #genericParameters
+        initialULBWPlocationAndBandwidth                            = 8525;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+        initialULBWPsubcarrierSpacing                               = 3;
+      #rach-ConfigCommon
+        #rach-ConfigGeneric
+          prach_ConfigurationIndex                                  = 52;
+#prach_msg1_FDM
+#0 = one, 1=two, 2=four, 3=eight
+          prach_msg1_FDM                                            = 0;
+          prach_msg1_FrequencyStart                                 = 0;
+          zeroCorrelationZoneConfig                                 = 13;
+          preambleReceivedTargetPower                               = -118;
+#preamblTransMax (0...10) = (3,4,5,6,7,8,10,20,50,100,200)
+          preambleTransMax                                          = 6;
+#powerRampingStep
+# 0=dB0,1=dB2,2=dB4,3=dB6
+        powerRampingStep                                            = 1;
+#ra_ReponseWindow
+#1,2,4,8,10,20,40,80
+        ra_ResponseWindow                                           = 7;
+#ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR
+#0=oneeighth,1=onefourth,2=half,3=one,4=two,5=four,6=eight,7=sixteen        ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR                = 4;
+#oneHalf (0..15) 4,8,12,16,...60,64
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB                   = 7;
+#ra_ContentionResolutionTimer
+#(0..7) 8,16,24,32,40,48,56,64
+        ra_ContentionResolutionTimer                                = 7;
+        rsrp_ThresholdSSB                                           = 19;
+#prach-RootSequenceIndex_PR
+#1 = 839, 2 = 139
+        prach_RootSequenceIndex_PR                                  = 2;
+        prach_RootSequenceIndex                                     = 1;
+        # SCS for msg1, can only be 15 for 30 kHz < 6 GHz, takes precendence over the one derived from prach-ConfigIndex
+        #  
+        msg1_SubcarrierSpacing                                      = 3,
+
+# restrictedSetConfig
+# 0=unrestricted, 1=restricted type A, 2=restricted type B
+        restrictedSetConfig                                         = 0,
+
+        msg3_DeltaPreamble                                          = 1;
+        p0_NominalWithGrant                                         =-90;
+
+# pucch-ConfigCommon setup :
+# pucchGroupHopping
+# 0 = neither, 1= group hopping, 2=sequence hopping
+        pucchGroupHopping                                           = 0;
+        hoppingId                                                   = 40;
+        p0_nominal                                                  = -90;
+# ssb_PositionsInBurs_BitmapPR
+# 1=short, 2=medium, 3=long
+      ssb_PositionsInBurst_PR                                       = 3;
+      ssb_PositionsInBurst_Bitmap                                   = 0x0002000200020002L;
+
+# ssb_periodicityServingCell
+# 0 = ms5, 1=ms10, 2=ms20, 3=ms40, 4=ms80, 5=ms160, 6=spare2, 7=spare1 
+      ssb_periodicityServingCell                                    = 1;
+
+# dmrs_TypeA_position
+# 0 = pos2, 1 = pos3
+      dmrs_TypeA_Position                                           = 0;
+
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+      subcarrierSpacing                                             = 3;
+
+
+  #tdd-UL-DL-ConfigurationCommon
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120  
+      referenceSubcarrierSpacing                                    = 3;
+      # pattern1 
+      # dl_UL_TransmissionPeriodicity
+      # 0=ms0p5, 1=ms0p625, 2=ms1, 3=ms1p25, 4=ms2, 5=ms2p5, 6=ms5, 7=ms10
+      dl_UL_TransmissionPeriodicity                                 = 3;
+      nrofDownlinkSlots                                             = 7;
+      nrofDownlinkSymbols                                           = 6;
+      nrofUplinkSlots                                               = 2;
+      nrofUplinkSymbols                                             = 4;
+
+      ssPBCH_BlockPower                                             = 10;
+  }
+
+  );
+
+
+    # ------- SCTP definitions
+    SCTP :
+    {
+        # Number of streams to use in input/output
+        SCTP_INSTREAMS  = 2;
+        SCTP_OUTSTREAMS = 2;
+    };
+
+
+    ////////// MME parameters:
+    mme_ip_address      = ( { ipv4       = "192.168.18.99";
+                              ipv6       = "192:168:30::17";
+                              active     = "yes";
+                              preference = "ipv4";
+                            }
+                          );
+
+    ///X2
+    enable_x2 = "no";
+    t_reloc_prep      = 1000;      /* unit: millisecond */
+    tx2_reloc_overall = 2000;      /* unit: millisecond */
+    t_dc_prep         = 1000;      /* unit: millisecond */
+    t_dc_overall      = 2000;      /* unit: millisecond */
+    target_enb_x2_ip_address      = (
+                                     { ipv4       = "192.168.18.199";
+                                       ipv6       = "192:168:30::17";
+                                       preference = "ipv4";
+                                     }
+                                    );
+
+    NETWORK_INTERFACES :
+    {
+
+        GNB_INTERFACE_NAME_FOR_S1_MME            = "eth0";
+        GNB_IPV4_ADDRESS_FOR_S1_MME              = "192.168.18.198/24";
+        GNB_INTERFACE_NAME_FOR_S1U               = "eth0";
+        GNB_IPV4_ADDRESS_FOR_S1U                 = "192.168.18.198/24";
+        GNB_PORT_FOR_S1U                         = 2152; # Spec 2152
+        GNB_IPV4_ADDRESS_FOR_X2C                 = "192.168.18.198/24";
+        GNB_PORT_FOR_X2C                         = 36422; # Spec 36422
+    };
+  }
+);
+
+MACRLCs = (
+{
+  num_cc                      = 1;
+  tr_s_preference             = "local_L1";
+  tr_n_preference             = "local_RRC";
+  pusch_TargetSNRx10          = 200;
+  pucch_TargetSNRx10          = 200;
+}
+);
+
+prs_config = (
+{
+  NumPRSResources       = 8;
+  PRSResourceSetPeriod  = [80, 2];
+  SymbolStart           = [8,8,8,8,8,8,8,8];
+  NumPRSSymbols         = [6,6,6,6,6,6,6,6];
+  NumRB                 = 64;
+  RBOffset              = 0;
+  CombSize              = 4;
+  REOffset              = [1,1,1,1,1,1,1,1];
+  PRSResourceOffset     = [0,10,20,30,40,50,60,70];
+  PRSResourceRepetition = 1;
+  PRSResourceTimeGap    = 1;
+  NPRS_ID               = [10,11,12,13,14,15,16,17];
+  MutingPattern1        = [];
+  MutingPattern2        = [];
+  MutingBitRepetition   = 1;
+}
+);
+
+L1s = (
+{
+  num_cc = 1;
+  tr_n_preference     = "local_mac";
+  ofdm_offset_divisor = 8; #set this to UINT_MAX for offset 0
+}
+);
+
+RUs = (
+{		  
+  local_rf       = "yes"
+  nb_tx          = 1;
+  nb_rx          = 1;
+  att_tx         = 0;
+  att_rx         = 0;
+  bands          = [7];
+  sl_ahead       = 12;
+  max_pdschReferenceSignalPower = -27;
+  max_rxgain                    = 75;
+  eNB_instances  = [0];
+  sdr_addrs = "addr=192.168.10.2,second_addr=192.168.20.2,clock_source=gpsdo,time_source=gpsdo";
+  if_freq   = 50000000L;
+}
+);  
+
+THREAD_STRUCT = (
+{
+  #three config for level of parallelism "PARALLEL_SINGLE_THREAD", "PARALLEL_RU_L1_SPLIT", or "PARALLEL_RU_L1_TRX_SPLIT"
+  parallel_config    = "PARALLEL_RU_L1_TRX_SPLIT";
+  #two option for worker "WORKER_DISABLE" or "WORKER_ENABLE"
+  worker_config      = "WORKER_ENABLE";
+}
+);
+
+security = {
+  # preferred ciphering algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nea0, nea1, nea2, nea3
+  ciphering_algorithms = ( "nea0" );
+
+  # preferred integrity algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nia0, nia1, nia2, nia3
+  integrity_algorithms = ( "nia2", "nia0" );
+
+  # setting 'drb_ciphering' to "no" disables ciphering for DRBs, no matter
+  # what 'ciphering_algorithms' configures; same thing for 'drb_integrity'
+  drb_ciphering = "yes";
+  drb_integrity = "no";
+};
+
+log_config :
+{
+  global_log_level                      ="info";
+  hw_log_level                          ="info";
+  phy_log_level                         ="info";
+  mac_log_level                         ="info";
+  rlc_log_level                         ="info";
+  pdcp_log_level                        ="info";
+  rrc_log_level                         ="info";
+};
+
diff --git a/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb1.sa.band78.fr1.106PRB.prs.usrpx310.conf b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb1.sa.band78.fr1.106PRB.prs.usrpx310.conf
new file mode 100644
index 0000000000000000000000000000000000000000..025a3bfe66a92d433a2c876d5a887b64eb798586
--- /dev/null
+++ b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb1.sa.band78.fr1.106PRB.prs.usrpx310.conf
@@ -0,0 +1,334 @@
+Active_gNBs = ( "gNB-OAI");
+# Asn1_verbosity, choice in: none, info, annoying
+Asn1_verbosity = "none";
+
+gNBs =
+(
+ {
+    ////////// Identification parameters:
+    gNB_CU_ID = 0xe00;
+
+#     cell_type =  "CELL_MACRO_GNB";
+
+    gNB_name  =  "gNB-OAI";
+
+    // Tracking area code, 0x0000 and 0xfffe are reserved values
+    tracking_area_code  =  1;
+
+    plmn_list = ({
+                  mcc = 208;
+                  mnc = 97;
+                  mnc_length = 2;
+                  snssaiList = (
+                                {
+                                  sst = 1;
+                                  sd  = 0x010203; // 0 false, else true
+                                },
+                                                                                                                                                                   {
+                                  sst = 1;
+                                  sd  = 0x112233; // 0 false, else true
+                                }
+                               );
+                 });
+
+    nr_cellid = 12345678L
+
+#     tr_s_preference     = "local_mac"
+
+    ////////// Physical parameters:
+
+    #pusch_TargetSNRx10                                        = 200;
+    #pucch_TargetSNRx10                                        = 200;
+    ul_prbblacklist                                           = "51,52,53,54"
+    min_rxtxtime                                              = 6;
+    do_SRS                                                    = 0;
+
+    pdcch_ConfigSIB1 = (
+      {
+        controlResourceSetZero = 11;
+        searchSpaceZero        = 0;
+      }
+    );
+
+    servingCellConfigCommon = (
+    {
+ #spCellConfigCommon
+
+      physCellId                                                    = 0;
+
+# downlinkConfigCommon
+    #frequencyInfoDL
+      # this is 3301.68 MHz + 22*12*30e-3 MHz = 3309.6
+      #absoluteFrequencySSB                                          = 620640;
+      # this is 3300.60 MHz + 53*12*30e-3 MHz = 3319.68
+      absoluteFrequencySSB                                           = 621312;
+      # this is 3503.28 MHz + 22*12*30e-3 MHz = 3511.2
+      #absoluteFrequencySSB                                          = 634080;
+      # this is 3600.48 MHz
+      #absoluteFrequencySSB                                          = 640032;
+      #dl_frequencyBand                                              = 78;
+      # this is 3301.68 MHz
+      #dl_absoluteFrequencyPointA                                    = 620112;
+      # this is 3300.60 MHz
+      dl_absoluteFrequencyPointA                                     = 620040;
+      # this is 3502.56 MHz
+      #dl_absoluteFrequencyPointA                                    = 633552;
+      # this is 3600.48 MHz
+      #dl_absoluteFrequencyPointA                                    = 640032;
+      #scs-SpecificCarrierList
+      dl_offstToCarrier                                              = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        dl_subcarrierSpacing                                         = 1;
+        dl_carrierBandwidth                                          = 106;
+     #initialDownlinkBWP
+      #genericParameters
+        # this is RBstart=0,L=106 (275*(L-1))+RBstart
+        initialDLBWPlocationAndBandwidth                             = 28875;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        initialDLBWPsubcarrierSpacing                                = 1;
+      #pdcch-ConfigCommon
+        initialDLBWPcontrolResourceSetZero                           = 11;
+        initialDLBWPsearchSpaceZero                                  = 0;
+
+
+
+#uplinkConfigCommon
+     #frequencyInfoUL
+      ul_frequencyBand                                               = 78;
+      #scs-SpecificCarrierList
+      ul_offstToCarrier                                              = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      ul_subcarrierSpacing                                           = 1;
+      ul_carrierBandwidth                                            = 106;
+      pMax                                                           = 20;
+     #initialUplinkBWP
+      #genericParameters
+        initialULBWPlocationAndBandwidth                             = 28875;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        initialULBWPsubcarrierSpacing                                = 1;
+      #rach-ConfigCommon
+        #rach-ConfigGeneric
+          prach_ConfigurationIndex                                   = 98;
+#prach_msg1_FDM
+#0 = one, 1=two, 2=four, 3=eight
+          prach_msg1_FDM                                             = 0;
+          prach_msg1_FrequencyStart                                  = 0;
+          zeroCorrelationZoneConfig                                  = 12;
+          preambleReceivedTargetPower                                = -104;
+#preamblTransMax (0...10) = (3,4,5,6,7,8,10,20,50,100,200)
+          preambleTransMax                                           = 6;
+#powerRampingStep
+# 0=dB0,1=dB2,2=dB4,3=dB6
+        powerRampingStep                                             = 1;
+#ra_ReponseWindow
+#1,2,4,8,10,20,40,80
+        ra_ResponseWindow                                            = 4;
+#ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR
+#1=oneeighth,2=onefourth,3=half,4=one,5=two,6=four,7=eight,8=sixteen 
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR                 = 3;
+#oneHalf (0..15) 4,8,12,16,...60,64
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB                    = 15;
+#ra_ContentionResolutionTimer
+#(0..7) 8,16,24,32,40,48,56,64
+        ra_ContentionResolutionTimer                                 = 7;
+        rsrp_ThresholdSSB                                            = 19;
+#prach-RootSequenceIndex_PR
+#1 = 839, 2 = 139
+        prach_RootSequenceIndex_PR                                   = 2;
+        prach_RootSequenceIndex                                      = 1;
+        # SCS for msg1, can only be 15 for 30 kHz < 6 GHz, takes precendence over the one derived from prach-ConfigIndex
+        #
+        msg1_SubcarrierSpacing                                       = 1,
+# restrictedSetConfig
+# 0=unrestricted, 1=restricted type A, 2=restricted type B
+        restrictedSetConfig                                          = 0,
+
+        msg3_DeltaPreamble                                           = 1;
+        p0_NominalWithGrant                                          =-90;
+
+# pucch-ConfigCommon setup :
+# pucchGroupHopping
+# 0 = neither, 1= group hopping, 2=sequence hopping
+        pucchGroupHopping                                            = 0;
+        hoppingId                                                    = 40;
+        p0_nominal                                                   = -90;
+# ssb_PositionsInBurs_BitmapPR
+# 1=short, 2=medium, 3=long
+      ssb_PositionsInBurst_PR                                        = 2;
+      ssb_PositionsInBurst_Bitmap                                    = 2;
+
+# ssb_periodicityServingCell
+# 0 = ms5, 1=ms10, 2=ms20, 3=ms40, 4=ms80, 5=ms160, 6=spare2, 7=spar e1
+      ssb_periodicityServingCell                                     = 2;
+
+# dmrs_TypeA_position
+# 0 = pos2, 1 = pos3
+      dmrs_TypeA_Position                                            = 0;
+
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      subcarrierSpacing                                              = 1;
+
+
+#tdd-UL-DL-ConfigurationCommon
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      referenceSubcarrierSpacing                                     = 1;
+      # pattern1
+      # dl_UL_TransmissionPeriodicity
+      # 0=ms0p5, 1=ms0p625, 2=ms1, 3=ms1p25, 4=ms2, 5=ms2p5, 6=ms5, 7=ms10
+      dl_UL_TransmissionPeriodicity                                  = 6;
+      nrofDownlinkSlots                                              = 7;
+      nrofDownlinkSymbols                                            = 6;
+      nrofUplinkSlots                                                = 2;
+      nrofUplinkSymbols                                              = 4;
+
+      ssPBCH_BlockPower                                              = -25;
+  }
+
+  );
+
+
+    # ------- SCTP definitions
+    SCTP :
+    {
+        # Number of streams to use in input/output
+        SCTP_INSTREAMS  = 2;
+        SCTP_OUTSTREAMS = 2;
+    };
+
+    ////////// AMF parameters:
+        amf_ip_address      = ( { ipv4       = "CI_MME_IP_ADDR";
+                                  ipv6       = "192:168:30::17";
+                                  active     = "yes";
+                                  preference = "ipv4";
+                                                                                                                              }
+                                                                                                                                          );
+
+        NETWORK_INTERFACES :
+        {
+
+           GNB_INTERFACE_NAME_FOR_NG_AMF            = "em1";
+           GNB_IPV4_ADDRESS_FOR_NG_AMF              = "CI_GNB_IP_ADDR";
+           GNB_INTERFACE_NAME_FOR_NGU               = "em1";
+           GNB_IPV4_ADDRESS_FOR_NGU                 = "CI_GNB_IP_ADDR";
+           GNB_PORT_FOR_S1U                         = 2152; # Spec 2152
+        };
+
+  }
+);
+
+MACRLCs = (
+{
+  num_cc           = 1;
+  tr_s_preference  = "local_L1";
+  tr_n_preference  = "local_RRC";
+  pusch_TargetSNRx10 = 200;
+  pucch_TargetSNRx10 = 200;
+   ulsch_max_frame_inactivity = 1;
+}
+);
+
+prs_config = (
+{
+  NumPRSResources       = 1;
+  PRSResourceSetPeriod  = [20, 2];
+  SymbolStart           = [7];
+  NumPRSSymbols         = [6];
+  NumRB                 = 106;
+  RBOffset              = 0;
+  CombSize              = 4;
+  REOffset              = [0];
+  PRSResourceOffset     = [1];
+  PRSResourceRepetition = 1;
+  PRSResourceTimeGap    = 1;
+  NPRS_ID               = [1];
+  MutingPattern1        = [];
+  MutingPattern2        = [];
+  MutingBitRepetition   = 1;
+}
+);
+
+L1s = (
+{
+  num_cc = 1;
+  tr_n_preference = "local_mac";
+  prach_dtx_threshold = 120;
+  #pucch0_dtx_threshold = 150;
+}
+);
+
+RUs = (
+{
+  local_rf       = "yes"
+  nb_tx          = 1
+  nb_rx          = 1
+  att_tx         = 0
+  att_rx         = 0;
+  bands          = [78];
+  max_pdschReferenceSignalPower = -27;
+  max_rxgain                    = 75;
+  eNB_instances  = [0];
+  ##beamforming 1x2 matrix: 1 layer x 2 antennas
+  bf_weights = [0x00007fff, 0x0000];
+  ##beamforming 1x4 matrix: 1 layer x 4 antennas
+  #bf_weights = [0x00007fff, 0x0000,0x0000, 0x0000];
+  ## beamforming 2x2 matrix:
+  # bf_weights = [0x00007fff, 0x00000000, 0x00000000, 0x00007fff];
+  ## beamforming 4x4 matrix:
+  #bf_weights = [0x00007fff, 0x0000, 0x0000, 0x0000, 0x00000000, 0x00007fff, 0x0000, 0x0000, 0x0000, 0x0000, 0x00007fff, 0x0000, 0x0000, 0x0000, 0x0000, 0x00007fff];
+  sf_extension = 0
+  sdr_addrs = "mgmt_addr=172.21.19.14,addr=192.168.10.2,second_addr=192.168.20.2,clock_source=internal,time_source=internal"
+}
+);
+
+THREAD_STRUCT = (
+{
+  #three config for level of parallelism "PARALLEL_SINGLE_THREAD", "PARALLEL_RU_L1_SPLIT", or "PARALLEL_RU_L1_TRX_SPLIT"
+  parallel_config    = "PARALLEL_SINGLE_THREAD";
+  #two option for worker "WORKER_DISABLE" or "WORKER_ENABLE"
+  worker_config      = "WORKER_ENABLE";
+}
+);
+
+rfsimulator :
+{
+  serveraddr = "server";
+  serverport = "4043";
+  options = (); #("saviq"); or/and "chanmod"
+  modelname = "AWGN";
+  IQfile = "/tmp/rfsimulator.iqs";
+};
+
+security = {
+  # preferred ciphering algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nea0, nea1, nea2, nea3
+  ciphering_algorithms = ( "nea0" );
+
+  # preferred integrity algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nia0, nia1, nia2, nia3
+  integrity_algorithms = ( "nia2", "nia0" );
+
+  # setting 'drb_ciphering' to "no" disables ciphering for DRBs, no matter
+  # what 'ciphering_algorithms' configures; same thing for 'drb_integrity'
+  drb_ciphering = "yes";
+  drb_integrity = "no";
+};
+
+log_config :
+{
+  global_log_level                      ="info";
+  hw_log_level                          ="info";
+  phy_log_level                         ="info";
+  mac_log_level                         ="info";
+  rlc_log_level                         ="info";
+  pdcp_log_level                        ="info";
+  rrc_log_level                         ="info";
+  f1ap_log_level                         ="debug";
+};
diff --git a/targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr1.106prb.conf b/targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr1.106prb.conf
new file mode 100644
index 0000000000000000000000000000000000000000..1bf92fd0d61474a8396a1c4d989ed3d2dd1e4695
--- /dev/null
+++ b/targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr1.106prb.conf
@@ -0,0 +1,89 @@
+PRSs =
+(
+  {
+    Active_gNBs = 1;
+    prs_config0 = (
+    {
+      gNB_id = 0;
+      NumPRSResources       = 1;
+      PRSResourceSetPeriod  = [20, 2];
+      SymbolStart           = [7];
+      NumPRSSymbols         = [6];
+      NumRB                 = 106;
+      RBOffset              = 0;
+      CombSize              = 4;
+      REOffset              = [0];
+      PRSResourceOffset     = [0];
+      PRSResourceRepetition = 1;
+      PRSResourceTimeGap    = 1;
+      NPRS_ID               = [0];
+      MutingPattern1        = [];
+      MutingPattern2        = [];
+      MutingBitRepetition   = 1;
+    }
+    );
+
+    prs_config1 = (
+    {
+      gNB_id = 1;
+      NumPRSResources       = 1;
+      PRSResourceSetPeriod  = [20, 2];
+      SymbolStart           = [7];
+      NumPRSSymbols         = [6];
+      NumRB                 = 106;
+      RBOffset              = 0;
+      CombSize              = 4;
+      REOffset              = [0];
+      PRSResourceOffset     = [1];
+      PRSResourceRepetition = 1;
+      PRSResourceTimeGap    = 1;
+      NPRS_ID               = [1];
+      MutingPattern1        = [];
+      MutingPattern2        = [];
+      MutingBitRepetition   = 1;
+    }
+    );
+
+    prs_config2 = (
+    {
+      gNB_id = 2;
+      NumPRSResources       = 1;
+      PRSResourceSetPeriod  = [20, 2];
+      SymbolStart           = [7];
+      NumPRSSymbols         = [6];
+      NumRB                 = 106;
+      RBOffset              = 0;
+      CombSize              = 4;
+      REOffset              = [0];
+      PRSResourceOffset     = [2];
+      PRSResourceRepetition = 1;
+      PRSResourceTimeGap    = 1;
+      NPRS_ID               = [2];
+      MutingPattern1        = [];
+      MutingPattern2        = [];
+      MutingBitRepetition   = 1;
+    }
+    );
+
+    prs_config3 = (
+    {
+      gNB_id = 3;
+      NumPRSResources       = 1;
+      PRSResourceSetPeriod  = [20, 2];
+      SymbolStart           = [7];
+      NumPRSSymbols         = [6];
+      NumRB                 = 106;
+      RBOffset              = 0;
+      CombSize              = 4;
+      REOffset              = [0];
+      PRSResourceOffset     = [3];
+      PRSResourceRepetition = 1;
+      PRSResourceTimeGap    = 1;
+      NPRS_ID               = [3];
+      MutingPattern1        = [];
+      MutingPattern2        = [];
+      MutingBitRepetition   = 1;
+    }
+    );
+  }
+);
diff --git a/targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr2.64prb.conf b/targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr2.64prb.conf
new file mode 100644
index 0000000000000000000000000000000000000000..cb66dfff37de2f49306bed5f4e42848f184f447d
--- /dev/null
+++ b/targets/PROJECTS/GENERIC-NR-5GC/CONF/ue.nr.prs.fr2.64prb.conf
@@ -0,0 +1,47 @@
+PRSs =
+(
+  {
+    Active_gNBs = 1;
+    prs_config0 = (
+    {
+      gNB_id = 0;
+      NumPRSResources       = 8;
+      PRSResourceSetPeriod  = [80, 2];
+      SymbolStart           = [8,8,8,8,8,8,8,8];
+      NumPRSSymbols         = [6,6,6,6,6,6,6,6];
+      NumRB                 = 64;
+      RBOffset              = 0;
+      CombSize              = 4;
+      REOffset              = [0,0,0,0,0,0,0,0];
+      PRSResourceOffset     = [0,10,20,30,40,50,60,70];
+      PRSResourceRepetition = 1;
+      PRSResourceTimeGap    = 1;
+      NPRS_ID               = [0,1,2,3,4,5,6,7];
+      MutingPattern1        = [];
+      MutingPattern2        = [];
+      MutingBitRepetition   = 1;
+    }
+    );
+
+    prs_config1 = (
+    {
+      gNB_id = 1;
+      NumPRSResources       = 8;
+      PRSResourceSetPeriod  = [80, 2];
+      SymbolStart           = [8,8,8,8,8,8,8,8];
+      NumPRSSymbols         = [6,6,6,6,6,6,6,6];
+      NumRB                 = 64;
+      RBOffset              = 0;
+      CombSize              = 4;
+      REOffset              = [1,1,1,1,1,1,1,1];
+      PRSResourceOffset     = [0,10,20,30,40,50,60,70];
+      PRSResourceRepetition = 1;
+      PRSResourceTimeGap    = 1;
+      NPRS_ID               = [10,11,12,13,14,15,16,17];
+      MutingPattern1        = [];
+      MutingPattern2        = [];
+      MutingBitRepetition   = 1;
+    }
+    );
+  }
+);
diff --git a/targets/RT/USER/lte-enb.c b/targets/RT/USER/lte-enb.c
index cca3ec126126e32aa174bff66aa1457d17c784ba..5d01df15ef8eb546d148b732981b1b64946a5e24 100644
--- a/targets/RT/USER/lte-enb.c
+++ b/targets/RT/USER/lte-enb.c
@@ -62,7 +62,7 @@
 #undef MALLOC //there are two conflicting definitions, so we better make sure we don't use it at all
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
-#include "targets/ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
diff --git a/targets/RT/USER/lte-ru.c b/targets/RT/USER/lte-ru.c
index d1e9c6029c8eaadffd547eb5a58dd27839ba6816..5a103cf886e1b1d3973edb9206a74a597c842bd4 100644
--- a/targets/RT/USER/lte-ru.c
+++ b/targets/RT/USER/lte-ru.c
@@ -58,8 +58,8 @@
 #include "SCHED/sched_common.h"
 #include "common/utils/LOG/log.h"
 #include "common/utils/LOG/vcd_signal_dumper.h"
-#include "targets/ARCH/COMMON/common_lib.h"
-#include "targets/ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 
 /* these variables have to be defined before including ENB_APP/enb_paramdef.h */
 static int DEFBANDS[] = {7};
@@ -768,12 +768,8 @@ void tx_rf(RU_t *ru,
     }
 
 #if defined(__x86_64) || defined(__i386__)
-#ifdef __AVX2__
     sf_extension = (sf_extension)&0xfffffff8;
-#else
-    sf_extension = (sf_extension)&0xfffffffc;
-#endif
-#elif defined(__arm__)
+#elif defined(__arm__) || defined(__aarch64__)
     sf_extension = (sf_extension)&0xfffffffc;
 #endif
 
diff --git a/targets/RT/USER/lte-softmodem.c b/targets/RT/USER/lte-softmodem.c
index 0b826555317d08bb3e0fea64187719493c3c6dda..82acba4f84abb4920f19eca80903a150c03f8fac 100644
--- a/targets/RT/USER/lte-softmodem.c
+++ b/targets/RT/USER/lte-softmodem.c
@@ -47,8 +47,8 @@
 #undef MALLOC //there are two conflicting definitions, so we better make sure we don't use it at all
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
-#include "../../ARCH/COMMON/common_lib.h"
-#include "../../ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/if_defs.h"
 
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
diff --git a/targets/RT/USER/lte-softmodem.h b/targets/RT/USER/lte-softmodem.h
index e1fa279195c0c2c7d4d52e7902e2156611ab54e2..774561e26803399aa8b0a45fbb4a448a121721f3 100644
--- a/targets/RT/USER/lte-softmodem.h
+++ b/targets/RT/USER/lte-softmodem.h
@@ -20,7 +20,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include "threads_t.h"
-#include "../../ARCH/COMMON/common_lib.h"
+#include "sdr/COMMON/common_lib.h"
 //#undef MALLOC
 #include "assertions.h"
 #include "PHY/types.h"
@@ -29,7 +29,6 @@
 #include "flexran_agent.h"
 #include "s1ap_eNB.h"
 #include "SIMULATION/ETH_TRANSPORT/proto.h"
-#include "targets/ARCH/COMMON/common_lib.h"
 #include "executables/softmodem-common.h"
 
 
diff --git a/targets/RT/USER/lte-uesoftmodem.c b/targets/RT/USER/lte-uesoftmodem.c
index 963044a506f100c1c49ea7530b442fe0441b0f99..24d444751f390f497355c6b355d49ebac3522181 100644
--- a/targets/RT/USER/lte-uesoftmodem.c
+++ b/targets/RT/USER/lte-uesoftmodem.c
@@ -47,8 +47,8 @@
 #undef MALLOC //there are two conflicting definitions, so we better make sure we don't use it at all
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
-#include "../../ARCH/COMMON/common_lib.h"
-#include "../../ARCH/ETHERNET/USERSPACE/LIB/if_defs.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/if_defs.h"
 
 //#undef FRAME_LENGTH_COMPLEX_SAMPLES //there are two conflicting definitions, so we better make sure we don't use it at all
 
diff --git a/targets/RT/USER/ru_control.c b/targets/RT/USER/ru_control.c
index 93a098ead6469781d08d9a71eebbe6f2214f113f..077ccdc771db4a760bba0131d16f427c0a64e4cf 100644
--- a/targets/RT/USER/ru_control.c
+++ b/targets/RT/USER/ru_control.c
@@ -53,8 +53,8 @@
 #undef MALLOC //there are two conflicting definitions, so we better make sure we don't use it at all
 
 
-#include "../../ARCH/COMMON/common_lib.h"
-#include "../../ARCH/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
+#include "sdr/COMMON/common_lib.h"
+#include "sdr/ETHERNET/USERSPACE/LIB/ethernet_lib.h"
 
 #include "PHY/LTE_TRANSPORT/if4_tools.h"
 
diff --git a/targets/TEST/PACKET_TRACER/pt.c b/targets/TEST/PACKET_TRACER/pt.c
index 510865d33e5ab3a802544522ecdd36fbe444e564..cc3b62728c3948d07001fd0baa367cf27c05a65b 100644
--- a/targets/TEST/PACKET_TRACER/pt.c
+++ b/targets/TEST/PACKET_TRACER/pt.c
@@ -32,7 +32,6 @@
 #include "PHY/vars.h"
 #include "MAC_INTERFACE/vars.h"
 
-#include "ARCH/CBMIMO1/DEVICE_DRIVER/vars.h"
 #include "SCHED/defs.h"
 #include "SCHED/vars.h"
 #include "LAYER2/MAC/defs.h"