Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
oai
openairinterface5G
Commits
89849444
Commit
89849444
authored
Feb 19, 2016
by
Florian Kaltenberger
Browse files
fixing a bug in cmult_vv, removing unused code.
parent
78913e26
Changes
5
Hide whitespace changes
Inline
Side-by-side
openair1/PHY/TOOLS/cmult_mm.c
deleted
100644 → 0
View file @
78913e26
/*******************************************************************************
OpenAirInterface
Copyright(c) 1999 - 2014 Eurecom
OpenAirInterface is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OpenAirInterface is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OpenAirInterface.The full GNU General Public License is
included in this distribution in the file called "COPYING". If not,
see <http://www.gnu.org/licenses/>.
Contact Information
OpenAirInterface Admin: openair_admin@eurecom.fr
OpenAirInterface Tech : openair_tech@eurecom.fr
OpenAirInterface Dev : openair4g-devel@lists.eurecom.fr
Address : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE
*******************************************************************************/
#include
"defs.h"
int
mult_cpx_matrix_h
(
short
*
x1
[
2
][
2
],
short
*
x2
[
2
][
2
],
short
*
y
[
2
][
2
],
unsigned
int
N
,
unsigned
short
output_shift
,
short
hermitian
)
{
if
(
hermitian
)
{
// this computes x1^H*x2 and stores it in y
mult_cpx_vector_h
(
x2
[
0
][
0
],
x1
[
0
][
0
],
y
[
0
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x2
[
0
][
1
],
x1
[
0
][
1
],
y
[
0
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x2
[
0
][
0
],
x1
[
1
][
0
],
y
[
1
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x2
[
0
][
1
],
x1
[
1
][
1
],
y
[
1
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x2
[
1
][
0
],
x1
[
0
][
0
],
y
[
0
][
1
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x2
[
1
][
1
],
x1
[
0
][
1
],
y
[
0
][
1
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x2
[
1
][
0
],
x1
[
1
][
0
],
y
[
1
][
1
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x2
[
1
][
1
],
x1
[
1
][
1
],
y
[
1
][
1
],
N
,
output_shift
,
1
);
}
else
{
// this computes x1*x2^H and stores it in y
mult_cpx_vector_h
(
x1
[
0
][
0
],
x2
[
0
][
0
],
y
[
0
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x1
[
0
][
1
],
x2
[
0
][
1
],
y
[
0
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x1
[
0
][
0
],
x2
[
1
][
0
],
y
[
0
][
1
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x1
[
0
][
1
],
x2
[
1
][
1
],
y
[
0
][
1
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x1
[
1
][
0
],
x2
[
0
][
0
],
y
[
1
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x1
[
1
][
1
],
x2
[
0
][
1
],
y
[
1
][
0
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x1
[
1
][
0
],
x2
[
1
][
0
],
y
[
1
][
1
],
N
,
output_shift
,
1
);
mult_cpx_vector_h
(
x1
[
1
][
1
],
x2
[
1
][
1
],
y
[
1
][
1
],
N
,
output_shift
,
1
);
}
}
int
mult_cpx_matrix_vector
(
int
*
x1
[
2
][
2
],
int
*
x2
[
2
],
int
*
y
[
2
],
unsigned
int
N
,
unsigned
short
output_shift
)
{
Zero_Buffer
(
y
[
0
],
N
*
8
);
Zero_Buffer
(
y
[
1
],
N
*
8
);
// this computes x1*x2 and stores it in y (32 bit)
mult_cpx_vector_add32
((
short
*
)
x2
[
0
],(
short
*
)
x1
[
0
][
0
],(
short
*
)
y
[
0
],
N
);
mult_cpx_vector_add32
((
short
*
)
x2
[
1
],(
short
*
)
x1
[
0
][
1
],(
short
*
)
y
[
0
],
N
);
mult_cpx_vector_add32
((
short
*
)
x2
[
0
],(
short
*
)
x1
[
1
][
0
],(
short
*
)
y
[
1
],
N
);
mult_cpx_vector_add32
((
short
*
)
x2
[
1
],(
short
*
)
x1
[
1
][
1
],(
short
*
)
y
[
1
],
N
);
// shift and pack
shift_and_pack
((
short
*
)
y
[
0
],
N
,
output_shift
);
shift_and_pack
((
short
*
)
y
[
1
],
N
,
output_shift
);
}
#ifdef MAIN_MM
#include
<stdio.h>
#include
<stdlib.h>
main
()
{
short
x1_00
[
256
]
__attribute__
((
aligned
(
16
)));
short
x1_10
[
256
]
__attribute__
((
aligned
(
16
)));
short
x1_01
[
256
]
__attribute__
((
aligned
(
16
)));
short
x1_11
[
256
]
__attribute__
((
aligned
(
16
)));
short
x2_0
[
256
]
__attribute__
((
aligned
(
16
)));
short
x2_1
[
256
]
__attribute__
((
aligned
(
16
)));
short
y_0
[
256
]
__attribute__
((
aligned
(
16
)));
short
y_1
[
256
]
__attribute__
((
aligned
(
16
)));
int
*
x1
[
2
][
2
];
int
*
x2
[
2
];
int
*
y
[
2
];
int
i
,
m
,
n
;
x1
[
0
][
0
]
=
(
int
*
)
x1_00
;
x1
[
0
][
1
]
=
(
int
*
)
x1_01
;
x1
[
1
][
0
]
=
(
int
*
)
x1_10
;
x1
[
1
][
1
]
=
(
int
*
)
x1_11
;
x2
[
0
]
=
(
int
*
)
x2_0
;
x2
[
1
]
=
(
int
*
)
x2_1
;
y
[
0
]
=
(
int
*
)
y_0
;
y
[
1
]
=
(
int
*
)
y_1
;
for
(
m
=
0
;
m
<
2
;
m
++
)
{
for
(
n
=
0
;
n
<
2
;
n
++
)
{
for
(
i
=
0
;
i
<
256
;
i
+=
4
)
{
((
short
*
)
x1
[
m
][
n
])[
i
]
=
((
short
)
rand
())
/
4
;
((
short
*
)
x1
[
m
][
n
])[
i
+
1
]
=
((
short
)
rand
())
/
4
;
((
short
*
)
x1
[
m
][
n
])[
i
+
2
]
=
-
((
short
*
)
x1
[
m
][
n
])[
i
+
1
];
((
short
*
)
x1
[
m
][
n
])[
i
+
3
]
=
((
short
*
)
x1
[
m
][
n
])[
i
];
}
}
for
(
i
=
0
;
i
<
256
;
i
+=
4
)
{
((
short
*
)
x2
[
m
])[
i
]
=
((
short
)
rand
())
/
4
;
((
short
*
)
x2
[
m
])[
i
+
1
]
=
((
short
)
rand
())
/
4
;
((
short
*
)
x2
[
m
])[
i
+
2
]
=
((
short
*
)
x2
[
m
])[
i
];
((
short
*
)
x2
[
m
])[
i
+
3
]
=
((
short
*
)
x2
[
m
])[
i
+
1
];
}
Zero_Buffer
(
y
[
m
],
512
);
}
/*
input[0] = 100;
input[1] = 200;
input[2] = -200;
input[3] = 100;
input[4] = 1000;
input[5] = 2000;
input[6] = -2000;
input[7] = 1000;
input[8] = 100;
input[9] = 200;
input[10] = -200;
input[11] = 100;
input[12] = 1000;
input[13] = 2000;
input[14] = -2000;
input[15] = 1000;
input2[0] = 2;
input2[1] = 1;
input2[2] = 2;
input2[3] = 1;
input2[4] = 20;
input2[5] = 10;
input2[6] = 20;
input2[7] = 10;
input2[8] = 2;
input2[9] = 1;
input2[10] = 2;
input2[11] = 1;
input2[12] = 2000;
input2[13] = 1000;
input2[14] = 2000;
input2[15] = 1000;
x1[0][0] = (int*)input;
x1[0][1] = (int*)input;
x1[1][0] = (int*)input;
x1[1][1] = (int*)input;
x2[0] = (int*)input2;
x2[1] = (int*)input2;
y[0] = (int*)output;
y[1] = (int*)output2;
*/
mult_cpx_matrix_vector
(
x1
,
x2
,
y
,
64
,
15
);
//mult_cpx_vector_add32(x2[0],x1[0][0],y[0],64);
for
(
i
=
0
;
i
<
128
;
i
+=
2
)
printf
(
"i=%d, x1 = [%d+1i*%d %d+1i*%d; %d+1i*%d %d+1i*%d]; x2 = [%d+1i*%d; %d+1i*%d]; y = [%d+1i*%d; %d+1i*%d]; y_m= round(x1*x2./pow2(15)); y-y_m
\n
"
,
i
,
((
short
*
)
x1
[
0
][
0
])[
2
*
i
],
((
short
*
)
x1
[
0
][
0
])[
2
*
i
+
2
],
((
short
*
)
x1
[
0
][
1
])[
2
*
i
],
((
short
*
)
x1
[
0
][
1
])[
2
*
i
+
2
],
((
short
*
)
x1
[
1
][
0
])[
2
*
i
],
((
short
*
)
x1
[
1
][
0
])[
2
*
i
+
2
],
((
short
*
)
x1
[
1
][
1
])[
2
*
i
],
((
short
*
)
x1
[
1
][
1
])[
2
*
i
+
2
],
((
short
*
)
x2
[
0
])[
2
*
i
],
((
short
*
)
x2
[
0
])[
2
*
i
+
1
],
((
short
*
)
x2
[
1
])[
2
*
i
],
((
short
*
)
x2
[
1
])[
2
*
i
+
1
],
((
short
*
)
y
[
0
])[
2
*
i
],
((
short
*
)
y
[
0
])[
2
*
i
+
1
],
((
short
*
)
y
[
1
])[
2
*
i
],
((
short
*
)
y
[
1
])[
2
*
i
+
1
]);
//((int*)y[0])[i], ((int*)y[0])[i+1],
//((int*)y[1])[i], ((int*)y[1])[i+1]);
}
#endif
openair1/PHY/TOOLS/cmult_vv.c
View file @
89849444
...
...
@@ -50,7 +50,7 @@ int mult_cpx_conj_vector(int16_t *x1,
uint32_t
N
,
int
output_shift
)
{
// Multiply elementwise t
wo
complex
vectors of N elements with repeated formatted output
// Multiply elementwise t
he
complex
conjugate of x1 with x2.
// x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)|
// We assume x1 with a dinamic of 15 bit maximum
//
...
...
@@ -90,7 +90,7 @@ int mult_cpx_conj_vector(int16_t *x1,
tmp_im
=
_mm_shufflelo_epi16
(
*
x1_128
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_shufflehi_epi16
(
tmp_im
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
tmp_im
=
_mm_sign_epi16
(
tmp_im
,
*
(
__m128i
*
)
&
conjug
[
0
]);
tmp_im
=
_mm_madd_epi16
(
tmp_im
,
*
x
1
_128
);
tmp_im
=
_mm_madd_epi16
(
tmp_im
,
*
x
2
_128
);
tmp_re
=
_mm_srai_epi32
(
tmp_re
,
output_shift
);
tmp_im
=
_mm_srai_epi32
(
tmp_im
,
output_shift
);
tmpy0
=
_mm_unpacklo_epi32
(
tmp_re
,
tmp_im
);
...
...
@@ -130,3 +130,4 @@ int mult_cpx_conj_vector(int16_t *x1,
return
(
0
);
}
openair1/PHY/TOOLS/cmult_vvh.c
deleted
100644 → 0
View file @
78913e26
/*******************************************************************************
OpenAirInterface
Copyright(c) 1999 - 2014 Eurecom
OpenAirInterface is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OpenAirInterface is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OpenAirInterface.The full GNU General Public License is
included in this distribution in the file called "COPYING". If not,
see <http://www.gnu.org/licenses/>.
Contact Information
OpenAirInterface Admin: openair_admin@eurecom.fr
OpenAirInterface Tech : openair_tech@eurecom.fr
OpenAirInterface Dev : openair4g-devel@lists.eurecom.fr
Address : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE
*******************************************************************************/
#include
"defs.h"
static
__m128i
shift
__attribute__
((
aligned
(
16
)));
int
mult_cpx_vector_h
(
short
*
x1
,
short
*
x2
,
short
*
y
,
unsigned
int
N
,
unsigned
short
output_shift
,
short
sign
)
{
// Multiply elementwise the complex vector x1 with the complex conjugate of the complex vecotr x2 of N elements and adds it to the vector y.
// x1 - input 1 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)|
// We assume x1 with a dinamic of 15 bit maximum
//
// x2 - input 2 in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)|
// We assume x2 with a dinamic of 14 bit maximum
//
// y - output in the format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)|
//
// N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
//
// log2_amp - increase the output amplitude by a factor 2^log2_amp (default is 0)
// WARNING: log2_amp>0 can cause overflow!!
// sign - +1..add, -1..substract
unsigned
int
i
;
// loop counter
register
__m128i
m0
,
m1
,
m2
;
short
*
temps
;
int
*
tempd
;
__m128i
*
x1_128
;
__m128i
*
x2_128
;
__m128i
*
y_128
;
__m128i
mask
;
__m128i
temp
;
shift
=
_mm_cvtsi32_si128
(
output_shift
);
x1_128
=
(
__m128i
*
)
&
x1
[
0
];
x2_128
=
(
__m128i
*
)
&
x2
[
0
];
y_128
=
(
__m128i
*
)
&
y
[
0
];
if
(
sign
==
-
1
)
mask
=
(
__m128i
)
_mm_set_epi16
(
-
1
,
1
,
-
1
,
-
1
,
-
1
,
1
,
-
1
,
-
1
);
else
mask
=
(
__m128i
)
_mm_set_epi16
(
1
,
-
1
,
1
,
1
,
1
,
-
1
,
1
,
1
);
// we compute 2*4 cpx multiply for each loop
for
(
i
=
0
;
i
<
(
N
>>
3
);
i
++
)
{
// printf("i=%d\n",i);
// unroll 1
// temps = (short *)x1_128;
// printf("x1 : %d,%d,%d,%d,%d,%d,%d,%d\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]);
m1
=
x1_128
[
0
];
m2
=
x2_128
[
0
];
// temps = (short *)&x2_128[0];
// printf("x2 : %x,%x,%x,%x,%x,%x,%x,%x\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]);
// bring x2 in conjugate form
// the first two instructions might be replaced with a single one in SSE3
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
// temp = m2;
// temps = (short *)&temp;
// printf("x2 conj : %x,%x,%x,%x,%x,%x,%x,%x\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
//pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0]
// temp = m0;
// tempd = &temp;
// printf("m0 : %d,%d,%d,%d\n",tempd[0],tempd[1],tempd[2],tempd[3]);
m0
=
_mm_sra_epi32
(
m0
,
shift
);
// 1- shift right by shift in order to compensate for the input amplitude
// temp = m0;
// tempd = (int *)&temp;
// printf("m0 : %d,%d,%d,%d\n",tempd[0],tempd[1],tempd[2],tempd[3]);
m0
=
_mm_packs_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
m0
=
_mm_unpacklo_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
y_128
[
0
]
=
_mm_add_epi16
(
m0
,
y_128
[
0
]);
// temps = (short *)&y_128[0];
// printf("y0 : %d,%d,%d,%d,%d,%d,%d,%d\n",temps[0],temps[1],temps[2],temps[3],temps[4],temps[5],temps[6],temps[7]);
// unroll 2
m1
=
x1_128
[
1
];
m2
=
x2_128
[
1
];
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
//pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0]
m0
=
_mm_sra_epi32
(
m0
,
shift
);
// 1- shift right by shift in order to compensate for the input amplitude
m0
=
_mm_packs_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
m0
=
_mm_unpacklo_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
y_128
[
1
]
=
_mm_add_epi16
(
m0
,
y_128
[
1
]);
// unroll 3
m1
=
x1_128
[
2
];
m2
=
x2_128
[
2
];
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
//pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0]
m0
=
_mm_sra_epi32
(
m0
,
shift
);
// 1- shift right by shift in order to compensate for the input amplitude
m0
=
_mm_packs_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
m0
=
_mm_unpacklo_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
y_128
[
2
]
=
_mm_add_epi16
(
m0
,
y_128
[
2
]);
// unroll 4
m1
=
x1_128
[
3
];
m2
=
x2_128
[
3
];
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
//pmaddwd_r2r(mm1,mm0); // 1- compute x1[0]*x2[0]
m0
=
_mm_sra_epi32
(
m0
,
shift
);
// 1- shift right by shift in order to compensate for the input amplitude
m0
=
_mm_packs_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
m0
=
_mm_unpacklo_epi32
(
m0
,
m0
);
// 1- pack in a 128 bit register [re im re im]
y_128
[
3
]
=
_mm_add_epi16
(
m0
,
y_128
[
3
]);
x1_128
+=
4
;
x2_128
+=
4
;
y_128
+=
4
;
// printf("x1_128 = %p, x2_128 =%p, y_128=%p\n",x1_128,x2_128,y_128);
}
_mm_empty
();
_m_empty
();
return
(
0
);
}
int
mult_cpx_vector_h_add32
(
short
*
x1
,
short
*
x2
,
short
*
y
,
unsigned
int
N
,
short
sign
)
{
// Multiply elementwise the complex vector x1 with the complex conjugate of the complex vecotr x2 of N elements and adds it to the vector y.
// x1 - input 1 in 16bit format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)|
// We assume x1 with a dinamic of 15 bit maximum
//
// x2 - input 2 in 16bit format |Re0 Im0 Re0 Im0|,......,|Re(N-1) Im(N-1) Re(N-1) Im(N-1)|
// We assume x2 with a dinamic of 14 bit maximum
//
// y - output in 32bit format |Re0 Im0|,......,|Re(N-1) Im(N-1)|
//
// N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4;
//
// sign - +1..add, -1..substract
unsigned
int
i
;
// loop counter
register
__m128i
m0
,
m1
,
m2
;
short
*
temps
;
int
*
tempd
;
__m128i
*
x1_128
;
__m128i
*
x2_128
;
__m128i
*
y_128
;
__m128i
mask
;
__m128i
temp
;
x1_128
=
(
__m128i
*
)
&
x1
[
0
];
x2_128
=
(
__m128i
*
)
&
x2
[
0
];
y_128
=
(
__m128i
*
)
&
y
[
0
];
if
(
sign
==
-
1
)
mask
=
(
__m128i
)
_mm_set_epi16
(
-
1
,
1
,
-
1
,
-
1
,
-
1
,
1
,
-
1
,
-
1
);
else
mask
=
(
__m128i
)
_mm_set_epi16
(
1
,
-
1
,
1
,
1
,
1
,
-
1
,
1
,
1
);
// we compute 2*4 cpx multiply for each loop
for
(
i
=
0
;
i
<
(
N
>>
3
);
i
++
)
{
m1
=
x1_128
[
0
];
m2
=
x2_128
[
0
];
// bring x2 in conjugate form
// the first two instructions might be replaced with a single one in SSE3
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
// 1- compute x1[0]*x2[0], result is 32bit
y_128
[
0
]
=
_mm_add_epi32
(
m0
,
y_128
[
0
]);
// unroll 2
m1
=
x1_128
[
1
];
m2
=
x2_128
[
1
];
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
y_128
[
1
]
=
_mm_add_epi32
(
m0
,
y_128
[
1
]);
// unroll 3
m1
=
x1_128
[
2
];
m2
=
x2_128
[
2
];
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
y_128
[
2
]
=
_mm_add_epi32
(
m0
,
y_128
[
2
]);
// unroll 4
m1
=
x1_128
[
3
];
m2
=
x2_128
[
3
];
m2
=
_mm_shufflelo_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_shufflehi_epi16
(
m2
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
m2
=
_mm_mullo_epi16
(
m2
,
mask
);
m0
=
_mm_madd_epi16
(
m1
,
m2
);
y_128
[
3
]
=
_mm_add_epi32
(
m0
,
y_128
[
3
]);
x1_128
+=
4
;
x2_128
+=
4
;
y_128
+=
4
;
}
_mm_empty
();
_m_empty
();
return
(
0
);
}
#ifdef MAIN
#define L 16
main
()
{
short
input
[
256
]
__attribute__
((
aligned
(
16
)));
short
input2
[
256
]
__attribute__
((
aligned
(
16
)));
short
output
[
256
]
__attribute__
((
aligned
(
16
)));
int
i
;
Zero_Buffer
(
output
,
256
*
2
);
for
(
i
=
0
;
i
<
16
;
i
+=
2
)
printf
(
"output[%d] = %d + %d i
\n
"
,
i
,
output
[
i
],
output
[
i
+
1
]);
input
[
0
]
=
100
;
input
[
1
]
=
200
;
input
[
2
]
=
100
;
input
[
3
]
=
200
;
input
[
4
]
=
1234
;
input
[
5
]
=
-
1234
;
input
[
6
]
=
1234
;
input
[
7
]
=
-
1234
;
input
[
8
]
=
100
;
input
[
9
]
=
200
;
input
[
10
]
=
100
;
input
[
11
]
=
200
;
input
[
12
]
=
1000
;
input
[
13
]
=
2000
;
input
[
14
]
=
1000
;
input
[
15
]
=
2000
;
input2
[
0
]
=
1
;
input2
[
1
]
=
2
;
input2
[
2
]
=
1
;
input2
[
3
]
=
2
;
input2
[
4
]
=
10
;
input2
[
5
]
=
20
;
input2
[
6
]
=
10
;
input2
[
7
]
=
20
;
input2
[
8
]
=
1
;
input2
[
9
]
=
2
;
input2
[
10
]
=
1
;
input2
[
11
]
=
2
;
input2
[
12
]
=
1000
;
input2
[
13
]
=
2000
;
input2
[
14
]
=
1000
;
input2
[
15
]
=
2000
;
mult_cpx_vector_h
(
input2
,
input2
,
output
,
8
,
0
,
1
);
for
(
i
=
0
;
i
<
16
;
i
+=
2
)
printf
(
"output[%d] = %d + %d i
\n
"
,
i
,
output
[
i
],
output
[
i
+
1
]);
Zero_Buffer
(
output
,
256
*
2
);
mult_cpx_vector_h
(
input2
,
input2
,
output
,
8
,
0
,
-
1
);
for
(
i
=
0
;
i
<
16
;
i
+=
2
)
printf
(
"output[%d] = %d + %d i
\n
"
,
i
,
output
[
i
],
output
[
i
+
1
]);
}
#endif //MAIN
openair1/PHY/TOOLS/defs.h
View file @
89849444
...
...
@@ -64,6 +64,8 @@ struct complex32 {
int32_t
i
;
};
//cmult_sv.h
/*!\fn void multadd_real_vector_complex_scalar(int16_t *x,int16_t *alpha,int16_t *y,uint32_t N)
This function performs componentwise multiplication and accumulation of a complex scalar and a real vector.
@param x Vector input (Q1.15)
...
...
@@ -95,192 +97,14 @@ void multadd_complex_vector_real_scalar(int16_t *x,
uint8_t
zero_flag
,
uint32_t
N
);
/*!\fn int32_t mult_cpx_vector(int16_t *x1,int16_t *x2,int16_t *y,uint32_t N,int32_t output_shift)
This function performs optimized componentwise multiplication of two Q1.15 vectors in repeated format.