ul_7_5_kHz.c 10.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The OpenAirInterface Software Alliance licenses this file to You under
 * the OAI Public License, Version 1.0  (the "License"); you may not use this file
 * except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.openairinterface.org/?page_id=698
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *-------------------------------------------------------------------------------
 * For more information about the OpenAirInterface (OAI) Software Alliance:
 *      contact@openairinterface.org
 */

22 23 24 25 26 27 28 29 30 31
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "extern.h"
#include "kHz_7_5.h"
#include "prach625Hz.h"
#ifdef USER_MODE
#include <math.h>
#else
#include "rtai_math.h"
#endif
32 33
#include "PHY/sse_intrin.h"

34 35 36 37
short conjugate75[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1} ;
short conjugate75_2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
short negate[8]__attribute__((aligned(16))) = {-1,-1,-1,-1,-1,-1,-1,-1};

38
void apply_7_5_kHz(PHY_VARS_UE *ue,int32_t*txdata,uint8_t slot)
39
{
40 41


42 43
  uint16_t len;
  uint32_t *kHz7_5ptr;
44
#if defined(__x86_64__) || defined(__i386__)
45
  __m128i *txptr128,*kHz7_5ptr128,mmtmp_re,mmtmp_im,mmtmp_re2,mmtmp_im2;
46 47 48 49 50
#elif defined(__arm__)
  int16x8_t *txptr128,*kHz7_5ptr128;
  int32x4_t mmtmp_re,mmtmp_im;
  int32x4_t mmtmp0,mmtmp1;
#endif
51
  uint32_t slot_offset;
52
  //   uint8_t aa;
53
  uint32_t i;
54
  LTE_DL_FRAME_PARMS *frame_parms=&ue->frame_parms;
55 56

  switch (frame_parms->N_RB_UL) {
57

58
  case 6:
59
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s6n_kHz_7_5 : (uint32_t*)s6e_kHz_7_5;
60
    break;
61

62
  case 15:
63
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s15n_kHz_7_5 : (uint32_t*)s15e_kHz_7_5;
64
    break;
65

66
  case 25:
67
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s25n_kHz_7_5 : (uint32_t*)s25e_kHz_7_5;
68
    break;
69

70
  case 50:
71
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s50n_kHz_7_5 : (uint32_t*)s50e_kHz_7_5;
72
    break;
73

74
  case 75:
75
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s75n_kHz_7_5 : (uint32_t*)s75e_kHz_7_5;
76
    break;
77

78
  case 100:
79
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s100n_kHz_7_5 : (uint32_t*)s100e_kHz_7_5;
80
    break;
81

82
  default:
83
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s25n_kHz_7_5 : (uint32_t*)s25e_kHz_7_5;
84 85 86
    break;
  }

87 88
  slot_offset = (uint32_t)slot * frame_parms->samples_per_tti/2;
  len = frame_parms->samples_per_tti/2;
89

90
#if defined(__x86_64__) || defined(__i386__)
91 92
  txptr128 = (__m128i *)&txdata[slot_offset];
  kHz7_5ptr128 = (__m128i *)kHz7_5ptr;
93 94 95 96
#elif defined(__arm__)
  txptr128 = (int16x8_t*)&txdata[slot_offset];
  kHz7_5ptr128 = (int16x8_t*)kHz7_5ptr;
#endif
97 98 99
  // apply 7.5 kHz

  for (i=0; i<(len>>2); i++) {
100
#if defined(__x86_64__) || defined(__i386__)
101 102 103 104 105 106 107 108 109 110 111 112
    mmtmp_re = _mm_madd_epi16(*txptr128,*kHz7_5ptr128);
    // Real part of complex multiplication (note: 7_5kHz signal is conjugated for this to work)
    mmtmp_im = _mm_shufflelo_epi16(*kHz7_5ptr128,_MM_SHUFFLE(2,3,0,1));
    mmtmp_im = _mm_shufflehi_epi16(mmtmp_im,_MM_SHUFFLE(2,3,0,1));
    mmtmp_im = _mm_sign_epi16(mmtmp_im,*(__m128i*)&conjugate75[0]);
    mmtmp_im = _mm_madd_epi16(mmtmp_im,txptr128[0]);
    mmtmp_re = _mm_srai_epi32(mmtmp_re,15);
    mmtmp_im = _mm_srai_epi32(mmtmp_im,15);
    mmtmp_re2 = _mm_unpacklo_epi32(mmtmp_re,mmtmp_im);
    mmtmp_im2 = _mm_unpackhi_epi32(mmtmp_re,mmtmp_im);

    txptr128[0] = _mm_packs_epi32(mmtmp_re2,mmtmp_im2);
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    txptr128++;
    kHz7_5ptr128++;  
#elif defined(__arm__)

    mmtmp0 = vmull_s16(((int16x4_t*)txptr128)[0],((int16x4_t*)kHz7_5ptr128)[0]);
        //mmtmp0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] 
    mmtmp1 = vmull_s16(((int16x4_t*)txptr128)[1],((int16x4_t*)kHz7_5ptr128)[1]);
        //mmtmp1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])] 
    mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
                            vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
        //mmtmp_re = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] 

    mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)txptr128)[0],*(int16x4_t*)conjugate75_2)),((int16x4_t*)kHz7_5ptr128)[0]);
        //mmtmp0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
    mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)txptr128)[1],*(int16x4_t*)conjugate75_2)), ((int16x4_t*)kHz7_5ptr128)[1]);
        //mmtmp0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
    mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
                            vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
        //mmtmp_im = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]

    txptr128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
134 135
    txptr128++;
    kHz7_5ptr128++;
136
#endif
137 138 139
  }

  //}
140 141 142
}


143
void remove_7_5_kHz(PHY_VARS_eNB *eNB,uint8_t slot)
144
{
145 146


147 148
  int32_t **rxdata=eNB->common_vars.rxdata[0];
  int32_t **rxdata_7_5kHz=eNB->common_vars.rxdata_7_5kHz[0];
149 150
  uint16_t len;
  uint32_t *kHz7_5ptr;
151
#if defined(__x86_64__) || defined(__i386__)
152
  __m128i *rxptr128,*rxptr128_7_5kHz,*kHz7_5ptr128,kHz7_5_2,mmtmp_re,mmtmp_im,mmtmp_re2,mmtmp_im2;
153 154 155 156 157 158
#elif defined(__arm__)
  int16x8_t *rxptr128,*kHz7_5ptr128,*rxptr128_7_5kHz;
  int32x4_t mmtmp_re,mmtmp_im;
  int32x4_t mmtmp0,mmtmp1;

#endif
159 160 161
  uint32_t slot_offset,slot_offset2;
  uint8_t aa;
  uint32_t i;
162
  LTE_DL_FRAME_PARMS *frame_parms=&eNB->frame_parms;
163

164
  switch (frame_parms->N_RB_UL) {
165

166
  case 6:
167
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s6n_kHz_7_5 : (uint32_t*)s6e_kHz_7_5;
168
    break;
169

170
  case 15:
171
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s15n_kHz_7_5 : (uint32_t*)s15e_kHz_7_5;
172
    break;
173

174
  case 25:
175
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s25n_kHz_7_5 : (uint32_t*)s25e_kHz_7_5;
176
    break;
177

178
  case 50:
179
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s50n_kHz_7_5 : (uint32_t*)s50e_kHz_7_5;
180
    break;
181

182
  case 75:
183
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s75n_kHz_7_5 : (uint32_t*)s75e_kHz_7_5;
184
    break;
185

186
  case 100:
187
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s100n_kHz_7_5 : (uint32_t*)s100e_kHz_7_5;
188
    break;
189

190
  default:
191
    kHz7_5ptr = (frame_parms->Ncp==0) ? (uint32_t*)s25n_kHz_7_5 : (uint32_t*)s25e_kHz_7_5;
192 193 194
    break;
  }

195

196 197
  slot_offset = (uint32_t)slot * frame_parms->samples_per_tti/2-eNB->N_TA_offset;
  slot_offset2 = (uint32_t)(slot&1) * frame_parms->samples_per_tti/2;
198

199
  len = frame_parms->samples_per_tti/2;
200

201
  for (aa=0; aa<frame_parms->nb_antennas_rx; aa++) {
202

203
#if defined(__x86_64__) || defined(__i386__)
204
    rxptr128        = (__m128i *)&rxdata[aa][slot_offset];
205
    rxptr128_7_5kHz = (__m128i *)&rxdata_7_5kHz[aa][slot_offset2];
206
    kHz7_5ptr128    = (__m128i *)kHz7_5ptr;
207 208 209 210 211
#elif defined(__arm__)
    rxptr128        = (int16x8_t *)&rxdata[aa][slot_offset];
    rxptr128_7_5kHz = (int16x8_t *)&rxdata_7_5kHz[aa][slot_offset2];
    kHz7_5ptr128    = (int16x8_t *)kHz7_5ptr;
#endif
212
    // apply 7.5 kHz
213 214

    //      if (((slot>>1)&1) == 0) { // apply the sinusoid from the table directly
215
    for (i=0; i<(len>>2); i++) {
216 217

#if defined(__x86_64__) || defined(__i386__)
218
      kHz7_5_2 = _mm_sign_epi16(*kHz7_5ptr128,*(__m128i*)&conjugate75_2[0]);
219
      mmtmp_re = _mm_madd_epi16(*rxptr128,kHz7_5_2);
220 221 222 223 224 225 226 227 228
      // Real part of complex multiplication (note: 7_5kHz signal is conjugated for this to work)
      mmtmp_im = _mm_shufflelo_epi16(kHz7_5_2,_MM_SHUFFLE(2,3,0,1));
      mmtmp_im = _mm_shufflehi_epi16(mmtmp_im,_MM_SHUFFLE(2,3,0,1));
      mmtmp_im = _mm_sign_epi16(mmtmp_im,*(__m128i*)&conjugate75[0]);
      mmtmp_im = _mm_madd_epi16(mmtmp_im,rxptr128[0]);
      mmtmp_re = _mm_srai_epi32(mmtmp_re,15);
      mmtmp_im = _mm_srai_epi32(mmtmp_im,15);
      mmtmp_re2 = _mm_unpacklo_epi32(mmtmp_re,mmtmp_im);
      mmtmp_im2 = _mm_unpackhi_epi32(mmtmp_re,mmtmp_im);
229

230 231 232 233
      rxptr128_7_5kHz[0] = _mm_packs_epi32(mmtmp_re2,mmtmp_im2);
      rxptr128++;
      rxptr128_7_5kHz++;
      kHz7_5ptr128++;
234

235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
#elif defined(__arm__)

      kHz7_5ptr128[0] = vmulq_s16(kHz7_5ptr128[0],((int16x8_t*)conjugate75_2)[0]);
      mmtmp0 = vmull_s16(((int16x4_t*)rxptr128)[0],((int16x4_t*)kHz7_5ptr128)[0]);
        //mmtmp0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])]
      mmtmp1 = vmull_s16(((int16x4_t*)rxptr128)[1],((int16x4_t*)kHz7_5ptr128)[1]);
        //mmtmp1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])]
      mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
                              vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
        //mmtmp_re = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])]

      mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)rxptr128)[0],*(int16x4_t*)conjugate75_2)), ((int16x4_t*)kHz7_5ptr128)[0]);
        //mmtmp0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])]
      mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)rxptr128)[1],*(int16x4_t*)conjugate75_2)), ((int16x4_t*)kHz7_5ptr128)[1]);
        //mmtmp1 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])]
      mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
                              vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
        //mmtmp_im = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])]

      rxptr128_7_5kHz[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
      rxptr128_7_5kHz++;
      rxptr128++;
      kHz7_5ptr128++;
258

259

260
#endif
261 262 263
    }
  }
}
264