Commit 918d1fd1 authored by Florian Kaltenberger's avatar Florian Kaltenberger
Browse files

Merge branch 'polar-decoder-optimizations' into 'develop-nr'

Polar decoder optimizations

See merge request !413
parents 9b3fc8b3 76ebc5ad
......@@ -18,13 +18,17 @@ int main(int argc, char *argv[]) {
//Initiate timing. (Results depend on CPU Frequency. Therefore, might change due to performance variances during simulation.)
time_stats_t timeEncoder,timeDecoder;
time_stats_t polar_decoder_init,polar_rate_matching,decoding,bit_extraction,deinterleaving;
time_stats_t path_metric,sorting,update_LLR;
opp_enabled=1;
int decoder_int16=0;
int generate_optim_code=0;
cpu_freq_GHz = get_cpu_freq_GHz();
reset_meas(&timeEncoder);
reset_meas(&timeDecoder);
randominit(0);
crcTableInit();
//Default simulation values (Aim for iterations = 1000000.)
int itr, iterations = 1000, arguments, polarMessageType = 0; //0=PBCH, 1=DCI, -1=UCI
double SNRstart = -20.0, SNRstop = 0.0, SNRinc= 0.5; //dB
......@@ -36,7 +40,7 @@ int main(int argc, char *argv[]) {
double timeEncoderCumulative = 0, timeDecoderCumulative = 0;
uint8_t aggregation_level = 8, decoderListSize = 8, pathMetricAppr = 0;
while ((arguments = getopt (argc, argv, "s:d:f:m:i:l:a:h")) != -1)
while ((arguments = getopt (argc, argv, "s:d:f:m:i:l:a:h:qg")) != -1)
switch (arguments)
{
case 's':
......@@ -67,6 +71,17 @@ int main(int argc, char *argv[]) {
pathMetricAppr = (uint8_t) atoi(optarg);
break;
case 'q':
decoder_int16=1;
break;
case 'g':
generate_optim_code=1;
iterations=1;
SNRstart=-6.0;
SNRstop =-6.0;
decoder_int16=1;
break;
case 'h':
printf("./polartest -s SNRstart -d SNRinc -f SNRstop -m [0=PBCH|1=DCI|2=UCI] -i iterations -l decoderListSize -a pathMetricAppr\n");
exit(-1);
......@@ -136,7 +151,9 @@ int main(int argc, char *argv[]) {
uint8_t *encoderOutputByte = malloc(sizeof(uint8_t) * coderLength);
double *modulatedInput = malloc (sizeof(double) * coderLength); //channel input
double *channelOutput = malloc (sizeof(double) * coderLength); //add noise
int16_t *channelOutput_int16;
if (decoder_int16 == 1) channelOutput_int16 = (int16_t*)malloc (sizeof(int16_t) * coderLength);
t_nrPolar_paramsPtr nrPolar_params = NULL, currentPtr = NULL;
nr_polar_init(&nrPolar_params, polarMessageType, testLength, aggregation_level);
currentPtr = nr_polar_params(nrPolar_params, polarMessageType, testLength, aggregation_level);
......@@ -285,6 +302,7 @@ int main(int argc, char *argv[]) {
for (int i=0; i<currentPtr->payloadBits; i++) aPrioriArray[i] = NAN;
for (SNR = SNRstart; SNR <= SNRstop; SNR += SNRinc) {
printf("SNR %f\n",SNR);
SNR_lin = pow(10, SNR/10);
for (itr = 1; itr <= iterations; itr++) {
......@@ -318,6 +336,15 @@ int main(int argc, char *argv[]) {
modulatedInput[i]=(-1)/sqrt(2);
channelOutput[i] = modulatedInput[i] + (gaussdouble(0.0,1.0) * (1/sqrt(2*SNR_lin)));
if (decoder_int16==1) {
if (channelOutput[i] > 15) channelOutput_int16[i] = 127;
else if (channelOutput[i] < -16) channelOutput_int16[i] = -128;
else channelOutput_int16[i] = (int16_t) (8*channelOutput[i]);
}
}
start_meas(&timeDecoder);
......@@ -327,12 +354,19 @@ int main(int argc, char *argv[]) {
NR_POLAR_DECODER_LISTSIZE,
aPrioriArray,
NR_POLAR_DECODER_PATH_METRIC_APPROXIMATION);*/
decoderState = polar_decoder_aPriori(channelOutput,
estimatedOutput,
currentPtr,
NR_POLAR_DECODER_LISTSIZE,
NR_POLAR_DECODER_PATH_METRIC_APPROXIMATION,
aPrioriArray);
if (decoder_int16==0)
decoderState = polar_decoder_aPriori(channelOutput,
estimatedOutput,
currentPtr,
NR_POLAR_DECODER_LISTSIZE,
NR_POLAR_DECODER_PATH_METRIC_APPROXIMATION,
aPrioriArray);
else
decoderState = polar_decoder_int16(channelOutput_int16,
estimatedOutput,
currentPtr);
stop_meas(&timeDecoder);
/*printf("testInput: [0]->0x%08x\n", testInput[0]);
printf("estimatedOutput: [0]->0x%08x\n", estimatedOutput[0]);*/
......
/*
* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The OpenAirInterface Software Alliance licenses this file to You under
* the OAI Public License, Version 1.1 (the "License"); you may not use this file
* except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.openairinterface.org/?page_id=698
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* contact@openairinterface.org
*/
#include "nrPolar_tools/nr_polar_defs.h"
#include "nrPolar_tools/nr_polar_pbch_defs.h"
#include "nrPolar_tools/nr_polar_uci_defs.h"
void nr_polar_init(t_nrPolar_params* polarParams, int messageType) {
uint32_t poly6 = 0x84000000; // 1000100000... -> D^6+D^5+1
uint32_t poly11 = 0x63200000; //11000100001000... -> D^11+D^10+D^9+D^5+1
//uint32_t poly16 = 0x81080000; //100000010000100... - > D^16+D^12+D^5+1
//uint32_t poly24a = 0x864cfb00; //100001100100110011111011 -> D^24+D^23+D^18+D^17+D^14+D^11+D^10+D^7+D^6+D^5+D^4+D^3+D+1
//uint32_t poly24b = 0x80006300; //100000000000000001100011 -> D^24+D^23+D^6+D^5+D+1
uint32_t poly24c = 0xB2B11700; //101100101011000100010111 -> D^24...
if (messageType == 0) { //DCI
} else if (messageType == 1) { //PBCH
polarParams->n_max = NR_POLAR_PBCH_N_MAX;
polarParams->i_il = NR_POLAR_PBCH_I_IL;
polarParams->i_seg = NR_POLAR_PBCH_I_SEG;
polarParams->n_pc = NR_POLAR_PBCH_N_PC;
polarParams->n_pc_wm = NR_POLAR_PBCH_N_PC_WM;
polarParams->i_bil = NR_POLAR_PBCH_I_BIL;
polarParams->payloadBits = NR_POLAR_PBCH_PAYLOAD_BITS;
polarParams->encoderLength = NR_POLAR_PBCH_E;
polarParams->crcParityBits = NR_POLAR_PBCH_CRC_PARITY_BITS;
polarParams->K = polarParams->payloadBits + polarParams->crcParityBits; // Number of bits to encode.
polarParams->N = nr_polar_output_length(polarParams->K, polarParams->encoderLength, polarParams->n_max);
polarParams->n = log2(polarParams->N);
polarParams->crc_generator_matrix=crc24c_generator_matrix(polarParams->payloadBits);
polarParams->crc_polynomial = poly24c;
polarParams->G_N = nr_polar_kronecker_power_matrices(polarParams->n);
//polar_encoder vectors:
polarParams->nr_polar_crc = malloc(sizeof(uint8_t) * polarParams->crcParityBits);
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K);
polarParams->nr_polar_d = malloc(sizeof(uint8_t) * polarParams->N);
//Polar Coding vectors
polarParams->nr_polar_u = malloc(sizeof(uint8_t) * polarParams->N); //Decoder: nr_polar_uHat
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_cHat
polarParams->nr_polar_b = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_bHat
polarParams->decoder_kernel = NULL;//polar_decoder_K56_N512_E864;
} else if (messageType == 2) { //UCI
polarParams->payloadBits = NR_POLAR_PUCCH_PAYLOAD_BITS; //A depends on what they carry...
polarParams->encoderLength = NR_POLAR_PUCCH_E ; //E depends on other standards 6.3.1.4
if (polarParams->payloadBits <= 11) //Ref. 38-212, Section 6.3.1.2.2
polarParams->crcParityBits = 0; //K=A
else //Ref. 38-212, Section 6.3.1.2.1
{
if (polarParams->payloadBits < 20)
polarParams->crcParityBits = NR_POLAR_PUCCH_CRC_PARITY_BITS_SHORT;
else
polarParams->crcParityBits = NR_POLAR_PUCCH_CRC_PARITY_BITS_LONG;
if (polarParams->payloadBits >= 360 && polarParams->encoderLength >= 1088)
polarParams->i_seg = NR_POLAR_PUCCH_I_SEG_LONG; // -> C=2
else
polarParams->i_seg = NR_POLAR_PUCCH_I_SEG_SHORT; // -> C=1
}
polarParams->K = polarParams->payloadBits + polarParams->crcParityBits; // Number of bits to encode.
//K_r = K/C ; C = I_seg+1
if((polarParams->K)/(polarParams->i_seg+1)>=18 && (polarParams->K)/(polarParams->i_seg+1)<=25) //Ref. 38-212, Section 6.3.1.3.1
{
polarParams->n_max = NR_POLAR_PUCCH_N_MAX;
polarParams->i_il =NR_POLAR_PUCCH_I_IL;
polarParams->n_pc = NR_POLAR_PUCCH_N_PC_SHORT;
if( (polarParams->encoderLength - polarParams->K)/(polarParams->i_seg + 1) + 3 > 192 )
polarParams->n_pc_wm = NR_POLAR_PUCCH_N_PC_WM_LONG;
else
polarParams->n_pc_wm = NR_POLAR_PUCCH_N_PC_WM_SHORT;
}
if( (polarParams->K)/(polarParams->i_seg + 1) > 30 ) //Ref. 38-212, Section 6.3.1.3.1
{
polarParams->n_max = NR_POLAR_PUCCH_N_MAX;
polarParams->i_il =NR_POLAR_PUCCH_I_IL;
polarParams->n_pc = NR_POLAR_PUCCH_N_PC_LONG;
polarParams->n_pc_wm = NR_POLAR_PUCCH_N_PC_WM_LONG;
}
polarParams->i_bil = NR_POLAR_PUCCH_I_BIL; //Ref. 38-212, Section 6.3.1.4.1
polarParams->N = nr_polar_output_length(polarParams->K, polarParams->encoderLength, polarParams->n_max);
polarParams->n = log2(polarParams->N);
if((polarParams->payloadBits) <= 19)
{
polarParams->crc_generator_matrix=crc6_generator_matrix(polarParams->payloadBits);
polarParams->crc_polynomial = poly6;
}
else
{
polarParams->crc_generator_matrix=crc11_generator_matrix(polarParams->payloadBits);
polarParams->crc_polynomial = poly11;
}
polarParams->G_N = nr_polar_kronecker_power_matrices(polarParams->n);
//polar_encoder vectors:
polarParams->nr_polar_crc = malloc(sizeof(uint8_t) * polarParams->crcParityBits);
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K);
polarParams->nr_polar_d = malloc(sizeof(uint8_t) * polarParams->N);
//Polar Coding vectors
polarParams->nr_polar_u = malloc(sizeof(uint8_t) * polarParams->N); //Decoder: nr_polar_uHat
polarParams->nr_polar_cPrime = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_cHat
polarParams->nr_polar_b = malloc(sizeof(uint8_t) * polarParams->K); //Decoder: nr_polar_bHat
}
polarParams->crcCorrectionBits = NR_POLAR_CRC_ERROR_CORRECTION_BITS;
polarParams->crc256Table = malloc(sizeof(uint32_t)*256);
crcTable256Init(polarParams->crc_polynomial, polarParams->crc256Table);
polarParams->Q_0_Nminus1 = nr_polar_sequence_pattern(polarParams->n);
polarParams->interleaving_pattern = malloc(sizeof(uint16_t) * polarParams->K);
nr_polar_interleaving_pattern(polarParams->K, polarParams->i_il, polarParams->interleaving_pattern);
polarParams->rate_matching_pattern = malloc(sizeof(uint16_t) * polarParams->encoderLength);
uint16_t *J = malloc(sizeof(uint16_t) * polarParams->N);
nr_polar_rate_matching_pattern(polarParams->rate_matching_pattern, J,
nr_polar_subblock_interleaver_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
polarParams->information_bit_pattern = malloc(sizeof(uint8_t) * polarParams->N);
polarParams->Q_I_N = malloc(sizeof(int16_t) * (polarParams->K + polarParams->n_pc));
polarParams->Q_F_N = malloc(sizeof(int16_t) * (polarParams->N+1)); // Last element shows the final array index assigned a value.
polarParams->Q_PC_N = malloc(sizeof(int16_t) * (polarParams->n_pc));
for (int i=0; i<=polarParams->N; i++) polarParams->Q_F_N[i] = -1; // Empty array.
nr_polar_info_bit_pattern(polarParams->information_bit_pattern,
polarParams->Q_I_N, polarParams->Q_F_N, J, polarParams->Q_0_Nminus1,
polarParams->K, polarParams->N, polarParams->encoderLength, polarParams->n_pc);
polarParams->channel_interleaver_pattern = malloc(sizeof(uint16_t) * polarParams->encoderLength);
nr_polar_channel_interleaver_pattern(polarParams->channel_interleaver_pattern,
polarParams->i_bil, polarParams->encoderLength);
polarParams->extended_crc_generator_matrix = malloc(polarParams->K * sizeof(uint8_t *)); //G_P3
uint8_t tempECGM[polarParams->K][polarParams->crcParityBits];
for (int i = 0; i < polarParams->K; i++){
polarParams->extended_crc_generator_matrix[i] = malloc(polarParams->crcParityBits * sizeof(uint8_t));
}
for (int i=0; i<polarParams->payloadBits; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
tempECGM[i][j]=polarParams->crc_generator_matrix[i][j];
}
}
for (int i=polarParams->payloadBits; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
if( (i-polarParams->payloadBits) == j ){
tempECGM[i][j]=1;
} else {
tempECGM[i][j]=0;
}
}
}
for (int i=0; i<polarParams->K; i++) {
for (int j=0; j<polarParams->crcParityBits; j++) {
polarParams->extended_crc_generator_matrix[i][j]=tempECGM[polarParams->interleaving_pattern[i]][j];
}
}
build_decoder_tree(polarParams);
printf("decoder tree nodes %d\n",polarParams->tree.num_nodes);
free(J);
}
......@@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*-------------------------------------------------------------------------------
* For more information about the OpenAirInterface (OAI) Software Alliance:
* For more information about the OpenAirInterface (OAI) Software Alliance
* contact@openairinterface.org
*/
......@@ -38,6 +38,7 @@
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
int8_t polar_decoder(
double *input,
uint8_t *out,
......@@ -285,7 +286,6 @@ int8_t polar_decoder_aPriori(double *input,
uint8_t pathMetricAppr,
double *aPrioriPayload)
{
uint8_t ***bit = nr_alloc_uint8_t_3D_array(polarParams->N, (polarParams->n+1), 2*listSize);
uint8_t **bitUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
uint8_t **llrUpdated = nr_alloc_uint8_t_2D_array(polarParams->N, (polarParams->n+1)); //0=False, 1=True
......@@ -355,6 +355,7 @@ int8_t polar_decoder_aPriori(double *input,
uint8_t listIndex[2*listSize], copyIndex;
for (uint16_t currentBit=0; currentBit<polarParams->N; currentBit++){
updateLLR(llr, llrUpdated, bit, bitUpdated, currentListSize, currentBit, 0, polarParams->N, (polarParams->n+1), pathMetricAppr);
if (polarParams->information_bit_pattern[currentBit]==0) { //Frozen bit.
updatePathMetric(pathMetric, llr, currentListSize, 0, currentBit, pathMetricAppr); //approximation=0 --> 11b, approximation=1 --> 12
......@@ -528,6 +529,7 @@ int8_t polar_decoder_aPriori(double *input,
*/
nr_byte2bit_uint8_32_t(polarParams->nr_polar_A, polarParams->payloadBits, out);
return(0);
}
......@@ -1034,3 +1036,42 @@ int8_t polar_decoder_dci(double *input,
nr_byte2bit_uint8_32_t(polarParams->nr_polar_A, polarParams->payloadBits, out);
return(0);
}
int8_t polar_decoder_int16(int16_t *input,
uint8_t *out,
t_nrPolar_params *polarParams)
{
int16_t d_tilde[polarParams->N];// = malloc(sizeof(double) * polarParams->N);
nr_polar_rate_matching_int16(input, d_tilde, polarParams->rate_matching_pattern, polarParams->K, polarParams->N, polarParams->encoderLength);
for (int i=0;i<polarParams->N;i++) {
if (d_tilde[i]<-128) d_tilde[i]=-128;
else if (d_tilde[i]>127) d_tilde[i]=128;
}
memcpy((void*)&polarParams->tree.root->alpha[0],(void*)&d_tilde[0],sizeof(int16_t)*polarParams->N);
/*
* SCL polar decoder.
*/
generic_polar_decoder(polarParams,polarParams->tree.root);
//Extract the information bits (û to ĉ)
nr_polar_info_bit_extraction(polarParams->nr_polar_U, polarParams->nr_polar_CPrime, polarParams->information_bit_pattern, polarParams->N);
//Deinterleaving (ĉ to b)
nr_polar_deinterleaver(polarParams->nr_polar_CPrime, polarParams->nr_polar_B, polarParams->interleaving_pattern, polarParams->K);
//Remove the CRC (â)
for (int j = 0; j < polarParams->payloadBits; j++) polarParams->nr_polar_A[j]=polarParams->nr_polar_B[j];
nr_byte2bit_uint8_32_t(polarParams->nr_polar_A, polarParams->payloadBits, out);
return(0);
}
......@@ -31,6 +31,10 @@
*/
#include "PHY/CODING/nrPolar_tools/nr_polar_defs.h"
#include "PHY/sse_intrin.h"
#include "PHY/impl_defs_top.h"
//#define DEBUG_NEW_IMPL
void updateLLR(double ***llr,
uint8_t **llrU,
......@@ -56,8 +60,9 @@ void updateLLR(double ***llr,
computeLLR(llr, row, col, i, offset, approximation);
}
}
llrU[row][col]=1;
// printf("LLR (a %f, b %f): llr[%d][%d] %f\n",32*a,32*b,col,row,32*llr[col][row]);
}
void updateBit(uint8_t ***bit,
......@@ -68,6 +73,7 @@ void updateBit(uint8_t ***bit,
uint16_t xlen,
uint8_t ylen)
{
uint16_t offset = ( xlen/(pow(2,(ylen-col))) );
for (uint8_t i=0; i<listSize; i++) {
......@@ -101,6 +107,7 @@ void updatePathMetric(double *pathMetric,
}
}
void updatePathMetric2(double *pathMetric,
double ***llr,
uint8_t listSize,
......@@ -110,6 +117,7 @@ void updatePathMetric2(double *pathMetric,
double *tempPM = malloc(sizeof(double) * listSize);
for (int i=0; i < listSize; i++) tempPM[i]=pathMetric[i];
uint8_t bitValue = 0;
if (appr) { //eq. (12)
for (uint8_t i = 0; i < listSize; i++) {
......@@ -132,7 +140,9 @@ void updatePathMetric2(double *pathMetric,
free(tempPM);
}
}
void computeLLR(double ***llr,
uint16_t row,
......@@ -152,6 +162,7 @@ void computeLLR(double ***llr,
llr[row][col][i] = log((exp(a + b) + 1) / (exp(a) + exp(b)));
}
}
void updateCrcChecksum(uint8_t **crcChecksum,
......@@ -179,3 +190,347 @@ void updateCrcChecksum2(uint8_t **crcChecksum,
}
}
}
decoder_node_t *new_decoder_node(int first_leaf_index,int level) {
decoder_node_t *node=(decoder_node_t *)malloc(sizeof(decoder_node_t));
node->first_leaf_index=first_leaf_index;
node->level=level;
node->Nv = 1<<level;
node->leaf = 0;
node->left=(decoder_node_t *)NULL;
node->right=(decoder_node_t *)NULL;
node->all_frozen=0;
node->alpha = (int16_t*)malloc16(node->Nv*sizeof(int16_t));
node->beta = (int16_t*)malloc16(node->Nv*sizeof(int16_t));
memset((void*)node->beta,-1,node->Nv*sizeof(int16_t));
return(node);
}
decoder_node_t *add_nodes(int level,int first_leaf_index,t_nrPolar_params *pp) {
int all_frozen_below=1;
int Nv = 1<<level;
decoder_node_t *new_node = new_decoder_node(first_leaf_index,level);
#ifdef DEBUG_NEW_IMPL
printf("New node %d order %d, level %d\n",pp->tree.num_nodes,Nv,level);
pp->tree.num_nodes++;
#endif
if (level==0) {
#ifdef DEBUG_NEW_IMPL
printf("leaf %d (%s)\n",first_leaf_index,pp->information_bit_pattern[first_leaf_index]==1 ? "information or crc" : "frozen");
#endif
new_node->leaf=1;
new_node->all_frozen = pp->information_bit_pattern[first_leaf_index]==0 ? 1 : 0;
return new_node; // this is a leaf node
}
for (int i=0;i<Nv;i++) {
if (pp->information_bit_pattern[i+first_leaf_index]>0) all_frozen_below=0;
}
if (all_frozen_below==0) new_node->left=add_nodes(level-1,first_leaf_index,pp);
else {
#ifdef DEBUG_NEW_IMPL
printf("aggregating frozen bits %d ... %d at level %d (%s)\n",first_leaf_index,first_leaf_index+Nv-1,level,((first_leaf_index/Nv)&1)==0?"left":"right");
#endif
new_node->leaf=1;
new_node->all_frozen=1;
}
if (all_frozen_below==0) new_node->right=add_nodes(level-1,first_leaf_index+(Nv/2),pp);
return(new_node);
}
void build_decoder_tree(t_nrPolar_params *pp) {
pp->tree.num_nodes=0;
pp->tree.root = add_nodes(pp->n,0,pp);
}
#if defined(__arm__) || defined(__aarch64__)
// translate 1-1 SIMD functions from SSE to NEON
#define __m128i int16x8_t
#define __m64 int8x8_t
#define _mm_abs_epi16(a) vabsq_s16(a)
#define _mm_min_epi16(a,b) vminq_s16(a,b)
#define _mm_subs_epi16(a,b) vsubq_s16(a,b)
#define _mm_abs_pi16(a) vabs_s16(a)
#define _mm_min_pi16(a,b) vmin_s16(a,b)
#define _mm_subs_pi16(a,b) vsub_s16(a,b)
#endif
void applyFtoleft(t_nrPolar_params *pp,decoder_node_t *node) {
int16_t *alpha_v=node->alpha;
int16_t *alpha_l=node->left->alpha;
int16_t *betal = node->left->beta;
int16_t a,b,absa,absb,maska,maskb,minabs;
#ifdef DEBUG_NEW_IMPL
printf("applyFtoleft %d, Nv %d (level %d,node->left (leaf %d, AF %d))\n",node->first_leaf_index,node->Nv,node->level,node->left->leaf,node->left->all_frozen);
for (int i=0;i<node->Nv;i++) printf("i%d (frozen %d): alpha_v[i] = %d\n",i,1-pp->information_bit_pattern[node->first_leaf_index+i],alpha_v[i]);
#endif
if (node->left->all_frozen == 0) {
#if defined(__AVX2__)
int avx2mod = (node->Nv/2)&15;
if (avx2mod == 0) {
__m256i a256,b256,absa256,absb256,minabs256;
int avx2len = node->Nv/2/16;
// printf("avx2len %d\n",avx2len);
for (int i=0;i<avx2len;i++) {
a256 =((__m256i*)alpha_v)[i];
b256 =((__m256i*)alpha_v)[i+avx2len];
absa256 =_mm256_abs_epi16(a256);
absb256 =_mm256_abs_epi16(b256);
minabs256 =_mm256_min_epi16(absa256,absb256);
((__m256i*)alpha_l)[i] =_mm256_sign_epi16(minabs256,_mm256_sign_epi16(a256,b256));
}
}
else if (avx2mod == 8) {
__m128i a128,b128,absa128,absb128,minabs128;
a128 =*((__m128i*)alpha_v);
b128 =((__m128i*)alpha_v)[1];
absa128 =_mm_abs_epi16(a128);
absb128 =_mm_abs_epi16(b128);
minabs128 =_mm_min_epi16(absa128,absb128);
*((__m128i*)alpha_l) =_mm_sign_epi16(minabs128,_mm_sign_epi16(a128,b128));
}
else if (avx2mod == 4) {
__m64 a64,b64,absa64,absb64,minabs64;
a64 =*((__m64*)alpha_v);
b64 =((__m64*)alpha_v)[1];
absa64 =_mm_abs_pi16(a64);
absb64 =_mm_abs_pi16(b64);
minabs64 =_mm_min_pi16(absa64,absb64);
*((__m64*)alpha_l) =_mm_sign_pi16(minabs64,_mm_sign_pi16(a64,b64));
}
else
#else
int sse4mod = (node->Nv/2)&7;
int sse4len = node->Nv/2/8;
#if defined(__arm__) || defined(__aarch64__)
int16x8_t signatimesb,comp1,comp2,negminabs128;
int16x8_t zero=vdupq_n_s16(0);
#endif