cadd_sv.c 3.8 KB
Newer Older
1 2 3 4 5
/*
 * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The OpenAirInterface Software Alliance licenses this file to You under
6
 * the OAI Public License, Version 1.1  (the "License"); you may not use this file
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.openairinterface.org/?page_id=698
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *-------------------------------------------------------------------------------
 * For more information about the OpenAirInterface (OAI) Software Alliance:
 *      contact@openairinterface.org
 */

22 23
#include "defs.h"

24
static  __m128i alpha_128 __attribute__ ((aligned(16)));
25 26
static  __m128i shift     __attribute__ ((aligned(16)));

27 28 29 30
int add_cpx_vector(short *x,
                   short *alpha,
                   short *y,
                   unsigned int N)
31 32 33
{
  unsigned int i;                 // loop counter

34 35
  __m128i *x_128;
  __m128i *y_128;
36 37 38 39 40 41 42

  x_128 = (__m128i *)&x[0];
  y_128 = (__m128i *)&y[0];

  alpha_128 = _mm_set1_epi32(*((int*)alpha));

  // we compute 4 cpx multiply for each loop
43
  for(i=0; i<(N>>3); i++) {
44 45 46 47 48 49 50 51 52 53
    y_128[0] = _mm_adds_epi16(alpha_128, x_128[0]);
    y_128[1] = _mm_adds_epi16(alpha_128, x_128[1]);
    y_128[2] = _mm_adds_epi16(alpha_128, x_128[2]);
    y_128[3] = _mm_adds_epi16(alpha_128, x_128[3]);


    x_128+=4;
    y_128 +=4;

  }
54

55 56 57
  return (0);
}

58 59 60 61
int add_vector32_scalar(short *x,
                        int alpha,
                        short *y,
                        unsigned int N)
62 63 64
{
  unsigned int i;                 // loop counter

65 66
  __m128i *x_128;
  __m128i *y_128;
67 68 69 70 71 72 73

  x_128 = (__m128i *)&x[0];
  y_128 = (__m128i *)&y[0];

  alpha_128 = _mm_setr_epi32(alpha,0,alpha,0);

  // we compute 4 cpx multiply for each loop
74
  for(i=0; i<(N>>3); i++) {
75 76 77 78 79 80 81 82 83 84
    y_128[0] = _mm_add_epi32(alpha_128, x_128[0]);
    y_128[1] = _mm_add_epi32(alpha_128, x_128[1]);
    y_128[2] = _mm_add_epi32(alpha_128, x_128[2]);
    y_128[3] = _mm_add_epi32(alpha_128, x_128[3]);


    x_128+=4;
    y_128 +=4;

  }
85

86 87 88 89
  return (0);
}


90 91 92 93
int add_real_vector64_scalar(short *x,
                             long long int a,
                             short *y,
                             unsigned int N)
94 95 96
{
  unsigned int i;                 // loop counter

97 98
  __m128i *x_128;
  __m128i *y_128;
99 100 101 102 103 104 105

  x_128 = (__m128i *)&x[0];
  y_128 = (__m128i *)&y[0];

  alpha_128 = _mm_set1_epi64((__m64) a);

  // we compute 4 cpx multiply for each loop
106
  for(i=0; i<(N>>3); i++) {
107 108 109 110 111 112 113 114 115 116
    y_128[0] = _mm_add_epi64(alpha_128, x_128[0]);
    y_128[1] = _mm_add_epi64(alpha_128, x_128[1]);
    y_128[2] = _mm_add_epi64(alpha_128, x_128[2]);
    y_128[3] = _mm_add_epi64(alpha_128, x_128[3]);


    x_128+=4;
    y_128+=4;

  }
117

118 119 120 121 122 123 124
  return(0);
}


#ifdef MAIN
#include <stdio.h>

125 126
main ()
{
127 128 129 130 131 132 133 134

  short input[256] __attribute__((aligned(16)));
  short output[256] __attribute__((aligned(16)));

  int i;
  struct complex16 alpha;

  Zero_Buffer(output,256*2);
135

136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
  input[0] = 100;
  input[1] = 200;
  input[2] = 100;
  input[3] = 200;
  input[4] = 1234;
  input[5] = -1234;
  input[6] = 1234;
  input[7] = -1234;
  input[8] = 100;
  input[9] = 200;
  input[10] = 100;
  input[11] = 200;
  input[12] = 1000;
  input[13] = 2000;
  input[14] = 1000;
  input[15] = 2000;

  alpha.r = 10;
  alpha.i = -10;
155

156 157
  add_cpx_vector(input,(short*) &alpha,input,8);

158
  for (i=0; i<16; i+=2)
159 160 161 162 163
    printf("output[%d] = %d + %d i\n",i,input[i],input[i+1]);

}

#endif //MAIN