ulsch_demodulation.c 61.6 KB
Newer Older
1
/*******************************************************************************
2
    OpenAirInterface
ghaddab's avatar
ghaddab committed
3
    Copyright(c) 1999 - 2014 Eurecom
4

ghaddab's avatar
ghaddab committed
5
6
7
8
    OpenAirInterface is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
9
10


ghaddab's avatar
ghaddab committed
11
12
13
14
    OpenAirInterface is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
15

ghaddab's avatar
ghaddab committed
16
    You should have received a copy of the GNU General Public License
17
18
    along with OpenAirInterface.The full GNU General Public License is
   included in this distribution in the file called "COPYING". If not,
ghaddab's avatar
ghaddab committed
19
   see <http://www.gnu.org/licenses/>.
20
21

  Contact Information
ghaddab's avatar
ghaddab committed
22
23
24
  OpenAirInterface Admin: openair_admin@eurecom.fr
  OpenAirInterface Tech : openair_tech@eurecom.fr
  OpenAirInterface Dev  : openair4g-devel@eurecom.fr
25

ghaddab's avatar
ghaddab committed
26
  Address      : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE
27

ghaddab's avatar
ghaddab committed
28
 *******************************************************************************/
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

/*! \file PHY/LTE_TRANSPORT/ulsch_demodulation.c
* \brief Top-level routines for demodulating the PUSCH physical channel from 36.211 V8.6 2009-03
* \author R. Knopp
* \date 2011
* \version 0.1
* \company Eurecom
* \email: knopp@eurecom.fr, florian.kaltenberger@eurecom.fr, ankit.bhamri@eurecom.fr
* \note
* \warning
*/

#include "PHY/defs.h"
#include "PHY/extern.h"
#include "MAC_INTERFACE/defs.h"
#include "MAC_INTERFACE/extern.h"
#include "defs.h"
#include "extern.h"
//#define DEBUG_ULSCH
48
#include "PHY/sse_intrin.h"
49
50
51
52
53
54
55
56
57
58
59

//extern char* namepointer_chMag ;
//eren
//extern int **ulchmag_eren;
//eren


static short jitter[8]  __attribute__ ((aligned(16))) = {1,0,0,1,0,1,1,0};
static short jitterc[8] __attribute__ ((aligned(16))) = {0,1,1,0,1,0,0,1};

#ifndef OFDMA_ULSCH
60
61
void lte_idft(LTE_DL_FRAME_PARMS *frame_parms,uint32_t *z, uint16_t Msc_PUSCH)
{
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

  __m128i idft_in128[3][1200],idft_out128[3][1200];
  int16_t *idft_in0=(int16_t*)idft_in128[0],*idft_out0=(int16_t*)idft_out128[0];
  int16_t *idft_in1=(int16_t*)idft_in128[1],*idft_out1=(int16_t*)idft_out128[1];
  int16_t *idft_in2=(int16_t*)idft_in128[2],*idft_out2=(int16_t*)idft_out128[2];

  uint32_t *z0,*z1,*z2,*z3,*z4,*z5,*z6,*z7,*z8,*z9,*z10=NULL,*z11=NULL;
  int i,ip;

  __m128i norm128;

  //  printf("Doing lte_idft for Msc_PUSCH %d\n",Msc_PUSCH);

  if (frame_parms->Ncp == 0) { // Normal prefix
    z0 = z;
    z1 = z0+(frame_parms->N_RB_DL*12);
    z2 = z1+(frame_parms->N_RB_DL*12);
    //pilot
    z3 = z2+(2*frame_parms->N_RB_DL*12);
    z4 = z3+(frame_parms->N_RB_DL*12);
    z5 = z4+(frame_parms->N_RB_DL*12);

    z6 = z5+(frame_parms->N_RB_DL*12);
    z7 = z6+(frame_parms->N_RB_DL*12);
    z8 = z7+(frame_parms->N_RB_DL*12);
    //pilot
    z9 = z8+(2*frame_parms->N_RB_DL*12);
    z10 = z9+(frame_parms->N_RB_DL*12);
    // srs
    z11 = z10+(frame_parms->N_RB_DL*12);
92
  } else { // extended prefix
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    z0 = z;
    z1 = z0+(frame_parms->N_RB_DL*12);
    //pilot
    z2 = z1+(2*frame_parms->N_RB_DL*12);
    z3 = z2+(frame_parms->N_RB_DL*12);
    z4 = z3+(frame_parms->N_RB_DL*12);

    z5 = z4+(frame_parms->N_RB_DL*12);
    z6 = z5+(frame_parms->N_RB_DL*12);
    //pilot
    z7 = z6+(2*frame_parms->N_RB_DL*12);
    z8 = z7+(frame_parms->N_RB_DL*12);
    // srs
    z9 = z8+(frame_parms->N_RB_DL*12);
  }
108

109
  // conjugate input
110
  for (i=0; i<(Msc_PUSCH>>2); i++) {
111
112
113
114
115
116
117
118
119
120
    *&(((__m128i*)z0)[i])=_mm_sign_epi16(*&(((__m128i*)z0)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z1)[i])=_mm_sign_epi16(*&(((__m128i*)z1)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z2)[i])=_mm_sign_epi16(*&(((__m128i*)z2)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z3)[i])=_mm_sign_epi16(*&(((__m128i*)z3)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z4)[i])=_mm_sign_epi16(*&(((__m128i*)z4)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z5)[i])=_mm_sign_epi16(*&(((__m128i*)z5)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z6)[i])=_mm_sign_epi16(*&(((__m128i*)z6)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z7)[i])=_mm_sign_epi16(*&(((__m128i*)z7)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z8)[i])=_mm_sign_epi16(*&(((__m128i*)z8)[i]),*(__m128i*)&conjugate2[0]);
    *&(((__m128i*)z9)[i])=_mm_sign_epi16(*&(((__m128i*)z9)[i]),*(__m128i*)&conjugate2[0]);
121

122
123
124
125
    if (frame_parms->Ncp==0) {
      *&(((__m128i*)z10)[i])=_mm_sign_epi16(*&(((__m128i*)z10)[i]),*(__m128i*)&conjugate2[0]);
      *&(((__m128i*)z11)[i])=_mm_sign_epi16(*&(((__m128i*)z11)[i]),*(__m128i*)&conjugate2[0]);
    }
126
127
128
  }

  for (i=0,ip=0; i<Msc_PUSCH; i++,ip+=4) {
129
130
131
132
133
134
135
136
137
138
    ((uint32_t*)idft_in0)[ip+0] =  z0[i];
    ((uint32_t*)idft_in0)[ip+1] =  z1[i];
    ((uint32_t*)idft_in0)[ip+2] =  z2[i];
    ((uint32_t*)idft_in0)[ip+3] =  z3[i];
    ((uint32_t*)idft_in1)[ip+0] =  z4[i];
    ((uint32_t*)idft_in1)[ip+1] =  z5[i];
    ((uint32_t*)idft_in1)[ip+2] =  z6[i];
    ((uint32_t*)idft_in1)[ip+3] =  z7[i];
    ((uint32_t*)idft_in2)[ip+0] =  z8[i];
    ((uint32_t*)idft_in2)[ip+1] =  z9[i];
139

140
141
142
143
144
    if (frame_parms->Ncp==0) {
      ((uint32_t*)idft_in2)[ip+2] =  z10[i];
      ((uint32_t*)idft_in2)[ip+3] =  z11[i];
    }
  }
145
146


147
148
149
150
151
152
153
154
  switch (Msc_PUSCH) {
  case 12:
    dft12((int16_t *)idft_in0,(int16_t *)idft_out0);
    dft12((int16_t *)idft_in1,(int16_t *)idft_out1);
    dft12((int16_t *)idft_in2,(int16_t *)idft_out2);

    /*
    dft12f(&((__m128i *)idft_in0)[0],&((__m128i *)idft_in0)[1],&((__m128i *)idft_in0)[2],&((__m128i *)idft_in0)[3],&((__m128i *)idft_in0)[4],&((__m128i *)idft_in0)[5],&((__m128i *)idft_in0)[6],&((__m128i *)idft_in0)[7],&((__m128i *)idft_in0)[8],&((__m128i *)idft_in0)[9],&((__m128i *)idft_in0)[10],&((__m128i *)idft_in0)[11],
155
    &((__m128i *)idft_out0)[0],&((__m128i *)idft_out0)[1],&((__m128i *)idft_out0)[2],&((__m128i *)idft_out0)[3],&((__m128i *)idft_out0)[4],&((__m128i *)idft_out0)[5],&((__m128i *)idft_out0)[6],&((__m128i *)idft_out0)[7],&((__m128i *)idft_out0)[8],&((__m128i *)idft_out0)[9],&((__m128i *)idft_out0)[10],&((__m128i *)idft_out0)[11]);
156
157

    dft12f(&((__m128i *)idft_in1)[0],&((__m128i *)idft_in1)[1],&((__m128i *)idft_in1)[2],&((__m128i *)idft_in1)[3],&((__m128i *)idft_in1)[4],&((__m128i *)idft_in1)[5],&((__m128i *)idft_in1)[6],&((__m128i *)idft_in1)[7],&((__m128i *)idft_in1)[8],&((__m128i *)idft_in1)[9],&((__m128i *)idft_in1)[10],&((__m128i *)idft_in1)[11],
158
    &((__m128i *)idft_out1)[0],&((__m128i *)idft_out1)[1],&((__m128i *)idft_out1)[2],&((__m128i *)idft_out1)[3],&((__m128i *)idft_out1)[4],&((__m128i *)idft_out1)[5],&((__m128i *)idft_out1)[6],&((__m128i *)idft_out1)[7],&((__m128i *)idft_out1)[8],&((__m128i *)idft_out1)[9],&((__m128i *)idft_out1)[10],&((__m128i *)idft_out1)[11]);
159
160

    dft12f(&((__m128i *)idft_in2)[0],&((__m128i *)idft_in2)[1],&((__m128i *)idft_in2)[2],&((__m128i *)idft_in2)[3],&((__m128i *)idft_in2)[4],&((__m128i *)idft_in2)[5],&((__m128i *)idft_in2)[6],&((__m128i *)idft_in2)[7],&((__m128i *)idft_in2)[8],&((__m128i *)idft_in2)[9],&((__m128i *)idft_in2)[10],&((__m128i *)idft_in2)[11],
161
    &((__m128i *)idft_out2)[0],&((__m128i *)idft_out2)[1],&((__m128i *)idft_out2)[2],&((__m128i *)idft_out2)[3],&((__m128i *)idft_out2)[4],&((__m128i *)idft_out2)[5],&((__m128i *)idft_out2)[6],&((__m128i *)idft_out2)[7],&((__m128i *)idft_out2)[8],&((__m128i *)idft_out2)[9],&((__m128i *)idft_out2)[10],&((__m128i *)idft_out2)[11]);
162
163
164
    */

    norm128 = _mm_set1_epi16(9459);
165
166

    for (i=0; i<12; i++) {
167
168
169
170
171
172
      ((__m128i*)idft_out0)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)idft_out0)[i],norm128),1);
      ((__m128i*)idft_out1)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)idft_out1)[i],norm128),1);
      ((__m128i*)idft_out2)[i] = _mm_slli_epi16(_mm_mulhi_epi16(((__m128i*)idft_out2)[i],norm128),1);
    }

    break;
173

174
175
176
177
178
  case 24:
    dft24(idft_in0,idft_out0,1);
    dft24(idft_in1,idft_out1,1);
    dft24(idft_in2,idft_out2,1);
    break;
179

180
181
182
183
184
  case 36:
    dft36(idft_in0,idft_out0,1);
    dft36(idft_in1,idft_out1,1);
    dft36(idft_in2,idft_out2,1);
    break;
185

186
187
188
189
190
  case 48:
    dft48(idft_in0,idft_out0,1);
    dft48(idft_in1,idft_out1,1);
    dft48(idft_in2,idft_out2,1);
    break;
191

192
193
194
195
196
  case 60:
    dft60(idft_in0,idft_out0,1);
    dft60(idft_in1,idft_out1,1);
    dft60(idft_in2,idft_out2,1);
    break;
197

198
199
200
201
202
  case 72:
    dft72(idft_in0,idft_out0,1);
    dft72(idft_in1,idft_out1,1);
    dft72(idft_in2,idft_out2,1);
    break;
203

204
205
206
207
208
  case 96:
    dft96(idft_in0,idft_out0,1);
    dft96(idft_in1,idft_out1,1);
    dft96(idft_in2,idft_out2,1);
    break;
209

210
211
212
213
214
  case 108:
    dft108(idft_in0,idft_out0,1);
    dft108(idft_in1,idft_out1,1);
    dft108(idft_in2,idft_out2,1);
    break;
215

216
217
218
219
220
  case 120:
    dft120(idft_in0,idft_out0,1);
    dft120(idft_in1,idft_out1,1);
    dft120(idft_in2,idft_out2,1);
    break;
221

222
223
224
225
226
  case 144:
    dft144(idft_in0,idft_out0,1);
    dft144(idft_in1,idft_out1,1);
    dft144(idft_in2,idft_out2,1);
    break;
227

228
229
230
231
232
  case 180:
    dft180(idft_in0,idft_out0,1);
    dft180(idft_in1,idft_out1,1);
    dft180(idft_in2,idft_out2,1);
    break;
233

234
235
236
237
238
  case 192:
    dft192(idft_in0,idft_out0,1);
    dft192(idft_in1,idft_out1,1);
    dft192(idft_in2,idft_out2,1);
    break;
239

240
241
242
243
244
  case 216:
    dft216(idft_in0,idft_out0,1);
    dft216(idft_in1,idft_out1,1);
    dft216(idft_in2,idft_out2,1);
    break;
245

246
247
248
249
250
  case 240:
    dft240(idft_in0,idft_out0,1);
    dft240(idft_in1,idft_out1,1);
    dft240(idft_in2,idft_out2,1);
    break;
251

252
253
  case 288:
    dft288(idft_in0,idft_out0,1);
254
    dft288(idft_in1,idft_out1,1);
255
256
    dft288(idft_in2,idft_out2,1);
    break;
257
258

  case 300:
259
260
261
262
    dft300(idft_in0,idft_out0,1);
    dft300(idft_in1,idft_out1,1);
    dft300(idft_in2,idft_out2,1);
    break;
263

264
265
266
267
268
  case 324:
    dft324((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft324((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft324((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
269

270
271
272
273
274
  case 360:
    dft360((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft360((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft360((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
275

276
277
278
279
280
  case 384:
    dft384((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft384((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft384((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
281

282
283
284
285
286
  case 432:
    dft432((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft432((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft432((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
287

288
289
290
291
292
  case 480:
    dft480((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft480((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft480((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
293

294
295
296
297
298
  case 540:
    dft540((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft540((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft540((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
299

300
301
302
303
304
  case 576:
    dft576((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft576((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft576((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
305

306
307
308
309
310
  case 600:
    dft600((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft600((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft600((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
311

312
313
314
315
316
  case 648:
    dft648((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft648((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft648((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
317

318
319
320
321
322
  case 720:
    dft720((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft720((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft720((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
323

324
325
326
327
328
  case 864:
    dft864((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft864((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft864((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
329

330
331
332
333
334
  case 900:
    dft900((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft900((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft900((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
335

336
337
338
339
340
  case 960:
    dft960((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft960((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft960((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
341

342
343
344
345
346
  case 972:
    dft972((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft972((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft972((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
347

348
349
350
351
352
  case 1080:
    dft1080((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft1080((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft1080((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
353

354
355
356
357
358
  case 1152:
    dft1152((int16_t*)idft_in0,(int16_t*)idft_out0,1);
    dft1152((int16_t*)idft_in1,(int16_t*)idft_out1,1);
    dft1152((int16_t*)idft_in2,(int16_t*)idft_out2,1);
    break;
359

360
361
362
363
364
  case 1200:
    dft1200(idft_in0,idft_out0,1);
    dft1200(idft_in1,idft_out1,1);
    dft1200(idft_in2,idft_out2,1);
    break;
365

366
367
368
  }


369
370

  for (i=0,ip=0; i<Msc_PUSCH; i++,ip+=4) {
371
372
373
374
375
376
377
378
    z0[i]     = ((uint32_t*)idft_out0)[ip];
    /*
      printf("out0 (%d,%d),(%d,%d),(%d,%d),(%d,%d)\n",
      ((int16_t*)&idft_out0[ip])[0],((int16_t*)&idft_out0[ip])[1],
      ((int16_t*)&idft_out0[ip+1])[0],((int16_t*)&idft_out0[ip+1])[1],
      ((int16_t*)&idft_out0[ip+2])[0],((int16_t*)&idft_out0[ip+2])[1],
      ((int16_t*)&idft_out0[ip+3])[0],((int16_t*)&idft_out0[ip+3])[1]);
    */
379
380
381
382
383
384
385
386
387
388
    z1[i]     = ((uint32_t*)idft_out0)[ip+1];
    z2[i]     = ((uint32_t*)idft_out0)[ip+2];
    z3[i]     = ((uint32_t*)idft_out0)[ip+3];
    z4[i]     = ((uint32_t*)idft_out1)[ip+0];
    z5[i]     = ((uint32_t*)idft_out1)[ip+1];
    z6[i]     = ((uint32_t*)idft_out1)[ip+2];
    z7[i]     = ((uint32_t*)idft_out1)[ip+3];
    z8[i]     = ((uint32_t*)idft_out2)[ip];
    z9[i]     = ((uint32_t*)idft_out2)[ip+1];

389
    if (frame_parms->Ncp==0) {
390
      z10[i]    = ((uint32_t*)idft_out2)[ip+2];
391
392
393
      z11[i]    = ((uint32_t*)idft_out2)[ip+3];
    }
  }
394

395
  // conjugate output
396
  for (i=0; i<(Msc_PUSCH>>2); i++) {
397
398
399
400
401
402
403
404
405
406
    ((__m128i*)z0)[i]=_mm_sign_epi16(((__m128i*)z0)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z1)[i]=_mm_sign_epi16(((__m128i*)z1)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z2)[i]=_mm_sign_epi16(((__m128i*)z2)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z3)[i]=_mm_sign_epi16(((__m128i*)z3)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z4)[i]=_mm_sign_epi16(((__m128i*)z4)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z5)[i]=_mm_sign_epi16(((__m128i*)z5)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z6)[i]=_mm_sign_epi16(((__m128i*)z6)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z7)[i]=_mm_sign_epi16(((__m128i*)z7)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z8)[i]=_mm_sign_epi16(((__m128i*)z8)[i],*(__m128i*)&conjugate2[0]);
    ((__m128i*)z9)[i]=_mm_sign_epi16(((__m128i*)z9)[i],*(__m128i*)&conjugate2[0]);
407

408
409
410
411
412
413
414
415
416
417
418
419
420
421
    if (frame_parms->Ncp==0) {
      ((__m128i*)z10)[i]=_mm_sign_epi16(((__m128i*)z10)[i],*(__m128i*)&conjugate2[0]);
      ((__m128i*)z11)[i]=_mm_sign_epi16(((__m128i*)z11)[i],*(__m128i*)&conjugate2[0]);
    }
  }

}
#endif





int32_t ulsch_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms,
422
423
424
425
426
427
                       int32_t **rxdataF_comp,
                       int16_t *ulsch_llr,
                       uint8_t symbol,
                       uint16_t nb_rb,
                       int16_t **llrp)
{
428
429
430
431
432
433
434

  __m128i *rxF=(__m128i*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
  int32_t i;
  __m128i **llrp128 = (__m128i **)llrp;

  //  printf("qpsk llr for symbol %d (pos %d), llr offset %d\n",symbol,(symbol*frame_parms->N_RB_DL*12),llr128U-(__m128i*)ulsch_llr);

435
  for (i=0; i<(nb_rb*3); i++) {
436
437
438
439
440
441
442
443
444
445
446
447
448
449
    //printf("%d,%d,%d,%d,%d,%d,%d,%d\n",((int16_t *)rxF)[0],((int16_t *)rxF)[1],((int16_t *)rxF)[2],((int16_t *)rxF)[3],((int16_t *)rxF)[4],((int16_t *)rxF)[5],((int16_t *)rxF)[6],((int16_t *)rxF)[7]);
    *(*llrp128) = *rxF;
    rxF++;
    (*llrp128)++;
  }

  _mm_empty();
  _m_empty();

  return(0);

}

void ulsch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
450
451
452
453
454
455
456
                     int32_t **rxdataF_comp,
                     int16_t *ulsch_llr,
                     int32_t **ul_ch_mag,
                     uint8_t symbol,
                     uint16_t nb_rb,
                     int16_t **llrp)
{
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472

  __m128i *rxF=(__m128i*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
  __m128i *ch_mag;
  __m128i mmtmpU0;
  __m128i **llrp128=(__m128i **)llrp;

  int32_t i;
  //  uint8_t symbol_mod;

  //  printf("ulsch_rx.c: ulsch_16qam_llr: symbol %d\n",symbol);

  //  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;

  ch_mag =(__m128i*)&ul_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];


473
  for (i=0; i<(nb_rb*3); i++) {
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495


    mmtmpU0 = _mm_abs_epi16(rxF[i]);
    //    print_shorts("tmp0",&tmp0);

    mmtmpU0 = _mm_subs_epi16(ch_mag[i],mmtmpU0);


    (*llrp128)[0] = _mm_unpacklo_epi32(rxF[i],mmtmpU0);
    (*llrp128)[1] = _mm_unpackhi_epi32(rxF[i],mmtmpU0);
    (*llrp128)+=2;

    //    print_bytes("rxF[i]",&rxF[i]);
    //    print_bytes("rxF[i+1]",&rxF[i+1]);
  }

  _mm_empty();
  _m_empty();

}

void ulsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
496
497
498
499
500
501
502
503
                     int32_t **rxdataF_comp,
                     int16_t *ulsch_llr,
                     int32_t **ul_ch_mag,
                     int32_t **ul_ch_magb,
                     uint8_t symbol,
                     uint16_t nb_rb,
                     int16_t **llrp)
{
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520

  __m128i *rxF=(__m128i*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)];
  __m128i *ch_mag,*ch_magb;
  int32_t i;
  __m128i mmtmpU1,mmtmpU2;
  int32_t **llrp32=(int32_t **)llrp;

  //  uint8_t symbol_mod;



  //  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;

  ch_mag =(__m128i*)&ul_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)];
  ch_magb =(__m128i*)&ul_ch_magb[0][(symbol*frame_parms->N_RB_DL*12)];

  //  printf("symbol %d: mag %d, magb %d\n",symbol,_mm_extract_epi16(ch_mag[0],0),_mm_extract_epi16(ch_magb[0],0));
521
  for (i=0; i<(nb_rb*3); i++) {
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552



    mmtmpU1 = _mm_abs_epi16(rxF[i]);

    mmtmpU1  = _mm_subs_epi16(ch_mag[i],mmtmpU1);

    mmtmpU2 = _mm_abs_epi16(mmtmpU1);
    mmtmpU2 = _mm_subs_epi16(ch_magb[i],mmtmpU2);

    (*llrp32)[0]  = _mm_extract_epi32(rxF[i],0);
    (*llrp32)[1]  = _mm_extract_epi32(mmtmpU1,0);
    (*llrp32)[2]  = _mm_extract_epi32(mmtmpU2,0);
    (*llrp32)[3]  = _mm_extract_epi32(rxF[i],1);
    (*llrp32)[4]  = _mm_extract_epi32(mmtmpU1,1);
    (*llrp32)[5]  = _mm_extract_epi32(mmtmpU2,1);
    (*llrp32)[6]  = _mm_extract_epi32(rxF[i],2);
    (*llrp32)[7]  = _mm_extract_epi32(mmtmpU1,2);
    (*llrp32)[8]  = _mm_extract_epi32(mmtmpU2,2);
    (*llrp32)[9]  = _mm_extract_epi32(rxF[i],3);
    (*llrp32)[10] = _mm_extract_epi32(mmtmpU1,3);
    (*llrp32)[11] = _mm_extract_epi32(mmtmpU2,3);
    (*llrp32)+=12;
  }

  _mm_empty();
  _m_empty();

}

void ulsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms,
553
554
555
556
557
558
                         int32_t **rxdataF_comp,
                         int32_t **ul_ch_mag,
                         int32_t **ul_ch_magb,
                         uint8_t symbol,
                         uint16_t nb_rb)
{
559
560
561
562
563
564
565
566
567



  __m128i *rxdataF_comp128_0,*ul_ch_mag128_0,*ul_ch_mag128_0b;
  __m128i *rxdataF_comp128_1,*ul_ch_mag128_1,*ul_ch_mag128_1b;

  int32_t i;

  if (frame_parms->nb_antennas_rx>1) {
568
569
570
571
572
573
    rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
    rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128_0      = (__m128i *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128_1      = (__m128i *)&ul_ch_mag[1][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128_0b     = (__m128i *)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128_1b     = (__m128i *)&ul_ch_magb[1][symbol*frame_parms->N_RB_DL*12];
574
575

    // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
576
    for (i=0; i<nb_rb*3; i++) {
577
578
579
580
581
      rxdataF_comp128_0[i] = _mm_adds_epi16(_mm_srai_epi16(rxdataF_comp128_0[i],1),_mm_srai_epi16(rxdataF_comp128_1[i],1));
      ul_ch_mag128_0[i]    = _mm_adds_epi16(_mm_srai_epi16(ul_ch_mag128_0[i],1),_mm_srai_epi16(ul_ch_mag128_1[i],1));
      ul_ch_mag128_0b[i]   = _mm_adds_epi16(_mm_srai_epi16(ul_ch_mag128_0b[i],1),_mm_srai_epi16(ul_ch_mag128_1b[i],1));
      rxdataF_comp128_0[i] = _mm_add_epi16(rxdataF_comp128_0[i],(*(__m128i*)&jitterc[0]));
    }
582

583
584
585
586
587
588
589
590
591
592
    // remove any bias (DC component after IDFT)
    //    ((uint32_t*)rxdataF_comp128_0)[0]=0;
  }

  _mm_empty();
  _m_empty();

}

void ulsch_extract_rbs_single(int32_t **rxdataF,
593
594
595
596
597
598
599
                              int32_t **rxdataF_ext,
                              uint32_t first_rb,
                              uint32_t nb_rb,
                              uint8_t l,
                              uint8_t Ns,
                              LTE_DL_FRAME_PARMS *frame_parms)
{
600
601
602
603
604


  uint16_t nb_rb1,nb_rb2;
  uint8_t aarx;
  int32_t *rxF,*rxF_ext;
605

606
607
608
  //uint8_t symbol = l+Ns*frame_parms->symbols_per_tti/2;
  uint8_t symbol = l+((7-frame_parms->Ncp)*(Ns&1)); ///symbol within sub-frame

609
  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
610
611
612
613
614
615
616
617
618
619


    nb_rb1 = cmin(cmax((int)(frame_parms->N_RB_UL) - (int)(2*first_rb),(int)0),(int)(2*nb_rb));    // 2 times no. RBs before the DC
    nb_rb2 = 2*nb_rb - nb_rb1;                                   // 2 times no. RBs after the DC

#ifdef DEBUG_ULSCH
    msg("ulsch_extract_rbs_single: 2*nb_rb1 = %d, 2*nb_rb2 = %d\n",nb_rb1,nb_rb2);
#endif

    rxF_ext   = &rxdataF_ext[aarx][(symbol*frame_parms->N_RB_UL*12)];
620

621
622
623
624
    if (nb_rb1) {
      rxF = &rxdataF[aarx][(first_rb*12 + frame_parms->first_carrier_offset + symbol*frame_parms->ofdm_symbol_size)];
      memcpy(rxF_ext, rxF, nb_rb1*6*sizeof(int));
      rxF_ext += nb_rb1*6;
knopp's avatar
   
knopp committed
625

626
      if (nb_rb2)  {
627
628
629
630
631
632
633
634
635
        //#ifdef OFDMA_ULSCH
        //  rxF = &rxdataF[aarx][(1 + symbol*frame_parms->ofdm_symbol_size)*2];
        //#else
        rxF = &rxdataF[aarx][(symbol*frame_parms->ofdm_symbol_size)];
        //#endif
        memcpy(rxF_ext, rxF, nb_rb2*6*sizeof(int));
        rxF_ext += nb_rb2*6;
      }
    } else { //there is only data in the second half
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
      //#ifdef OFDMA_ULSCH
      //      rxF = &rxdataF[aarx][(1 + 6*(2*first_rb - frame_parms->N_RB_UL) + symbol*frame_parms->ofdm_symbol_size)*2];
      //#else
      rxF = &rxdataF[aarx][(6*(2*first_rb - frame_parms->N_RB_UL) + symbol*frame_parms->ofdm_symbol_size)];
      //#endif
      memcpy(rxF_ext, rxF, nb_rb2*6*sizeof(int));
      rxF_ext += nb_rb2*6;
    }
  }

  _mm_empty();
  _m_empty();

}

void ulsch_correct_ext(int32_t **rxdataF_ext,
652
653
654
655
656
                       int32_t **rxdataF_ext2,
                       uint16_t symbol,
                       LTE_DL_FRAME_PARMS *frame_parms,
                       uint16_t nb_rb)
{
657
658
659
660

  int32_t i,j,aarx;
  int32_t *rxF_ext2,*rxF_ext;

661
  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
662
663
664
    rxF_ext2 = &rxdataF_ext2[aarx][symbol*12*frame_parms->N_RB_UL];
    rxF_ext  = &rxdataF_ext[aarx][2*symbol*12*frame_parms->N_RB_UL];

665
666
    for (i=0,j=0; i<12*nb_rb; i++,j+=2) {
      rxF_ext2[i] = rxF_ext[j];
667
668
669
670
671
672
673
    }
  }
}



void ulsch_channel_compensation(int32_t **rxdataF_ext,
674
675
676
677
678
679
680
681
682
683
684
                                int32_t **ul_ch_estimates_ext,
                                int32_t **ul_ch_mag,
                                int32_t **ul_ch_magb,
                                int32_t **rxdataF_comp,
                                LTE_DL_FRAME_PARMS *frame_parms,
                                uint8_t symbol,
                                uint8_t Qm,
                                uint16_t nb_rb,
                                uint8_t output_shift)
{

685
686
687
688
689
690
691
692
693
694
695
  uint16_t rb;
  __m128i *ul_ch128,*ul_ch_mag128,*ul_ch_mag128b,*rxdataF128,*rxdataF_comp128;
  uint8_t aarx;//,symbol_mod;
  __m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;
#ifdef OFDMA_ULSCH
  __m128i QAM_amp128U,QAM_amp128bU;
#endif
  //  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;

  //    printf("comp: symbol %d\n",symbol);

696
697
#ifdef ULSCH_OFDMA

698
699
700
701
702
703
  if (Qm == 4)
    QAM_amp128U = _mm_set1_epi16(QAM16_n1);
  else if (Qm == 6) {
    QAM_amp128U  = _mm_set1_epi16(QAM64_n1);
    QAM_amp128bU = _mm_set1_epi16(QAM64_n2);
  }
704

705
#endif
706
707
708

  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {

709
710
711
712
713
714
715
    ul_ch128          = (__m128i *)&ul_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128      = (__m128i *)&ul_ch_mag[aarx][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128b     = (__m128i *)&ul_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12];
    rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
    rxdataF_comp128   = (__m128i *)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12];


716
    for (rb=0; rb<nb_rb; rb++) {
717
718
      //            printf("comp: symbol %d rb %d\n",symbol,rb);
#ifdef OFDMA_ULSCH
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
      if (Qm>2) {
        // get channel amplitude if not QPSK

        mmtmpU0 = _mm_madd_epi16(ul_ch128[0],ul_ch128[0]);

        mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);

        mmtmpU1 = _mm_madd_epi16(ul_ch128[1],ul_ch128[1]);
        mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
        mmtmpU0 = _mm_packs_epi32(mmtmpU0,mmtmpU1);

        ul_ch_mag128[0] = _mm_unpacklo_epi16(mmtmpU0,mmtmpU0);
        ul_ch_mag128b[0] = ul_ch_mag128[0];
        ul_ch_mag128[0] = _mm_mulhi_epi16(ul_ch_mag128[0],QAM_amp128U);
        ul_ch_mag128[0] = _mm_slli_epi16(ul_ch_mag128[0],2);  // 2 to compensate the scale channel estimate
        ul_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpU0,mmtmpU0);
        ul_ch_mag128b[1] = ul_ch_mag128[1];
        ul_ch_mag128[1] = _mm_mulhi_epi16(ul_ch_mag128[1],QAM_amp128U);
        ul_ch_mag128[1] = _mm_slli_epi16(ul_ch_mag128[1],2);  // 2 to compensate the scale channel estimate

        mmtmpU0 = _mm_madd_epi16(ul_ch128[2],ul_ch128[2]);
        mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
        mmtmpU1 = _mm_packs_epi32(mmtmpU0,mmtmpU0);

        ul_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpU1,mmtmpU1);
        ul_ch_mag128b[2] = ul_ch_mag128[2];

        ul_ch_mag128[2] = _mm_mulhi_epi16(ul_ch_mag128[2],QAM_amp128U);
        ul_ch_mag128[2] = _mm_slli_epi16(ul_ch_mag128[2],2); // 2 to compensate the scale channel estimate


        ul_ch_mag128b[0] = _mm_mulhi_epi16(ul_ch_mag128b[0],QAM_amp128bU);
        ul_ch_mag128b[0] = _mm_slli_epi16(ul_ch_mag128b[0],2); // 2 to compensate the scale channel estimate


        ul_ch_mag128b[1] = _mm_mulhi_epi16(ul_ch_mag128b[1],QAM_amp128bU);
        ul_ch_mag128b[1] = _mm_slli_epi16(ul_ch_mag128b[1],2); // 2 to compensate the scale channel estimate

        ul_ch_mag128b[2] = _mm_mulhi_epi16(ul_ch_mag128b[2],QAM_amp128bU);
        ul_ch_mag128b[2] = _mm_slli_epi16(ul_ch_mag128b[2],2);// 2 to compensate the scale channel estimate

760
      }
761

762
763
#else

764
      mmtmpU0 = _mm_madd_epi16(ul_ch128[0],ul_ch128[0]);
765

766
767
768
769
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_madd_epi16(ul_ch128[1],ul_ch128[1]);

      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
770

771
      mmtmpU0 = _mm_packs_epi32(mmtmpU0,mmtmpU1);
772

773
774
      ul_ch_mag128[0] = _mm_unpacklo_epi16(mmtmpU0,mmtmpU0);
      ul_ch_mag128[1] = _mm_unpackhi_epi16(mmtmpU0,mmtmpU0);
775

776
777
778
779
780
781
782
783
      mmtmpU0 = _mm_madd_epi16(ul_ch128[2],ul_ch128[2]);

      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_packs_epi32(mmtmpU0,mmtmpU0);
      ul_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpU1,mmtmpU1);

      // printf("comp: symbol %d rb %d => %d,%d,%d (output_shift %d)\n",symbol,rb,*((int16_t*)&ul_ch_mag128[0]),*((int16_t*)&ul_ch_mag128[1]),*((int16_t*)&ul_ch_mag128[2]),output_shift);
#endif
784
785
786

      // multiply by conjugated channel
      mmtmpU0 = _mm_madd_epi16(ul_ch128[0],rxdataF128[0]);
787
788
      //        print_ints("re",&mmtmpU0);

789
790
791
792
793
794
795
796
797
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(ul_ch128[0],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)&conjugate[0]);

      mmtmpU1 = _mm_madd_epi16(mmtmpU1,rxdataF128[0]);
      //      print_ints("im",&mmtmpU1);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
798
      //  print_ints("re(shift)",&mmtmpU0);
799
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
800
      //  print_ints("im(shift)",&mmtmpU1);
801
802
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
803
804
      //        print_ints("c0",&mmtmpU2);
      //  print_ints("c1",&mmtmpU3);
805
806
      rxdataF_comp128[0] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
      /*
807
808
809
              print_shorts("rx:",&rxdataF128[0]);
              print_shorts("ch:",&ul_ch128[0]);
              print_shorts("pack:",&rxdataF_comp128[0]);
810
811
812
813
814
815
816
817
818
819
820
821
822
      */
      // multiply by conjugated channel
      mmtmpU0 = _mm_madd_epi16(ul_ch128[1],rxdataF128[1]);
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(ul_ch128[1],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)conjugate);
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,rxdataF128[1]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
823

824
      rxdataF_comp128[1] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
825
826
827
      //        print_shorts("rx:",rxdataF128[1]);
      //        print_shorts("ch:",ul_ch128[1]);
      //        print_shorts("pack:",rxdataF_comp128[1]);
828
829
830
831
832
833
834
835
836
837
838
839
      //       multiply by conjugated channel
      mmtmpU0 = _mm_madd_epi16(ul_ch128[2],rxdataF128[2]);
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(ul_ch128[2],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)conjugate);
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,rxdataF128[2]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
840

841
      rxdataF_comp128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
842
843
      //        print_shorts("rx:",rxdataF128[2]);
      //        print_shorts("ch:",ul_ch128[2]);
844
      //        print_shorts("pack:",rxdataF_comp128[2]);
845

846
847
848
849
850
851
852
853
854
855
      // Add a jitter to compensate for the saturation in "packs" resulting in a bias on the DC after IDFT
      rxdataF_comp128[0] = _mm_add_epi16(rxdataF_comp128[0],(*(__m128i*)&jitter[0]));
      rxdataF_comp128[1] = _mm_add_epi16(rxdataF_comp128[1],(*(__m128i*)&jitter[0]));
      rxdataF_comp128[2] = _mm_add_epi16(rxdataF_comp128[2],(*(__m128i*)&jitter[0]));

      ul_ch128+=3;
      ul_ch_mag128+=3;
      ul_ch_mag128b+=3;
      rxdataF128+=3;
      rxdataF_comp128+=3;
856

857
858
859
860
861
862
863
    }
  }


  _mm_empty();
  _m_empty();

864
}
865
866
867
868
869
870
871
872





__m128i QAM_amp128U_0,QAM_amp128bU_0,QAM_amp128U_1,QAM_amp128bU_1;

void ulsch_channel_compensation_alamouti(int32_t **rxdataF_ext,                 // For Distributed Alamouti Combining
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
    int32_t **ul_ch_estimates_ext_0,
    int32_t **ul_ch_estimates_ext_1,
    int32_t **ul_ch_mag_0,
    int32_t **ul_ch_magb_0,
    int32_t **ul_ch_mag_1,
    int32_t **ul_ch_magb_1,
    int32_t **rxdataF_comp_0,
    int32_t **rxdataF_comp_1,
    LTE_DL_FRAME_PARMS *frame_parms,
    uint8_t symbol,
    uint8_t Qm,
    uint16_t nb_rb,
    uint8_t output_shift)
{

888
889
890
891
892
893
894
895
896
  uint16_t rb;
  __m128i *ul_ch128_0,*ul_ch128_1,*ul_ch_mag128_0,*ul_ch_mag128_1,*ul_ch_mag128b_0,*ul_ch_mag128b_1,*rxdataF128,*rxdataF_comp128_0,*rxdataF_comp128_1;
  uint8_t aarx;//,symbol_mod;
  __m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;

  //  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;

  //    printf("comp: symbol %d\n",symbol);

897
898

  if (Qm == 4) {
899
900
    QAM_amp128U_0 = _mm_set1_epi16(QAM16_n1);
    QAM_amp128U_1 = _mm_set1_epi16(QAM16_n1);
901
  } else if (Qm == 6) {
902
903
904
905
906
907
    QAM_amp128U_0  = _mm_set1_epi16(QAM64_n1);
    QAM_amp128bU_0 = _mm_set1_epi16(QAM64_n2);

    QAM_amp128U_1  = _mm_set1_epi16(QAM64_n1);
    QAM_amp128bU_1 = _mm_set1_epi16(QAM64_n2);
  }
908
909
910

  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {

911
912
913
914
915
916
917
918
919
920
921
    ul_ch128_0          = (__m128i *)&ul_ch_estimates_ext_0[aarx][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128_0      = (__m128i *)&ul_ch_mag_0[aarx][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128b_0     = (__m128i *)&ul_ch_magb_0[aarx][symbol*frame_parms->N_RB_DL*12];
    ul_ch128_1          = (__m128i *)&ul_ch_estimates_ext_1[aarx][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128_1      = (__m128i *)&ul_ch_mag_1[aarx][symbol*frame_parms->N_RB_DL*12];
    ul_ch_mag128b_1     = (__m128i *)&ul_ch_magb_1[aarx][symbol*frame_parms->N_RB_DL*12];
    rxdataF128        = (__m128i *)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12];
    rxdataF_comp128_0   = (__m128i *)&rxdataF_comp_0[aarx][symbol*frame_parms->N_RB_DL*12];
    rxdataF_comp128_1   = (__m128i *)&rxdataF_comp_1[aarx][symbol*frame_parms->N_RB_DL*12];


922
    for (rb=0; rb<nb_rb; rb++) {
923
      //      printf("comp: symbol %d rb %d\n",symbol,rb);
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
      if (Qm>2) {
        // get channel amplitude if not QPSK

        mmtmpU0 = _mm_madd_epi16(ul_ch128_0[0],ul_ch128_0[0]);

        mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);

        mmtmpU1 = _mm_madd_epi16(ul_ch128_0[1],ul_ch128_0[1]);
        mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
        mmtmpU0 = _mm_packs_epi32(mmtmpU0,mmtmpU1);

        ul_ch_mag128_0[0] = _mm_unpacklo_epi16(mmtmpU0,mmtmpU0);
        ul_ch_mag128b_0[0] = ul_ch_mag128_0[0];
        ul_ch_mag128_0[0] = _mm_mulhi_epi16(ul_ch_mag128_0[0],QAM_amp128U_0);
        ul_ch_mag128_0[0] = _mm_slli_epi16(ul_ch_mag128_0[0],2); // 2 to compensate the scale channel estimate

        ul_ch_mag128_0[1] = _mm_unpackhi_epi16(mmtmpU0,mmtmpU0);
        ul_ch_mag128b_0[1] = ul_ch_mag128_0[1];
        ul_ch_mag128_0[1] = _mm_mulhi_epi16(ul_ch_mag128_0[1],QAM_amp128U_0);
        ul_ch_mag128_0[1] = _mm_slli_epi16(ul_ch_mag128_0[1],2); // 2 to scale compensate the scale channel estimate

        mmtmpU0 = _mm_madd_epi16(ul_ch128_0[2],ul_ch128_0[2]);
        mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
        mmtmpU1 = _mm_packs_epi32(mmtmpU0,mmtmpU0);

        ul_ch_mag128_0[2] = _mm_unpacklo_epi16(mmtmpU1,mmtmpU1);
        ul_ch_mag128b_0[2] = ul_ch_mag128_0[2];

        ul_ch_mag128_0[2] = _mm_mulhi_epi16(ul_ch_mag128_0[2],QAM_amp128U_0);
        ul_ch_mag128_0[2] = _mm_slli_epi16(ul_ch_mag128_0[2],2);  //  2 to scale compensate the scale channel estimat


        ul_ch_mag128b_0[0] = _mm_mulhi_epi16(ul_ch_mag128b_0[0],QAM_amp128bU_0);
        ul_ch_mag128b_0[0] = _mm_slli_epi16(ul_ch_mag128b_0[0],2);  //  2 to scale compensate the scale channel estima


        ul_ch_mag128b_0[1] = _mm_mulhi_epi16(ul_ch_mag128b_0[1],QAM_amp128bU_0);
        ul_ch_mag128b_0[1] = _mm_slli_epi16(ul_ch_mag128b_0[1],2);   //  2 to scale compensate the scale channel estima

        ul_ch_mag128b_0[2] = _mm_mulhi_epi16(ul_ch_mag128b_0[2],QAM_amp128bU_0);
        ul_ch_mag128b_0[2] = _mm_slli_epi16(ul_ch_mag128b_0[2],2);   //  2 to scale compensate the scale channel estima




        mmtmpU0 = _mm_madd_epi16(ul_ch128_1[0],ul_ch128_1[0]);

        mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);

        mmtmpU1 = _mm_madd_epi16(ul_ch128_1[1],ul_ch128_1[1]);
        mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
        mmtmpU0 = _mm_packs_epi32(mmtmpU0,mmtmpU1);

        ul_ch_mag128_1[0] = _mm_unpacklo_epi16(mmtmpU0,mmtmpU0);
        ul_ch_mag128b_1[0] = ul_ch_mag128_1[0];
        ul_ch_mag128_1[0] = _mm_mulhi_epi16(ul_ch_mag128_1[0],QAM_amp128U_1);
        ul_ch_mag128_1[0] = _mm_slli_epi16(ul_ch_mag128_1[0],2); // 2 to compensate the scale channel estimate

        ul_ch_mag128_1[1] = _mm_unpackhi_epi16(mmtmpU0,mmtmpU0);
        ul_ch_mag128b_1[1] = ul_ch_mag128_1[1];
        ul_ch_mag128_1[1] = _mm_mulhi_epi16(ul_ch_mag128_1[1],QAM_amp128U_1);
        ul_ch_mag128_1[1] = _mm_slli_epi16(ul_ch_mag128_1[1],2); // 2 to scale compensate the scale channel estimate

        mmtmpU0 = _mm_madd_epi16(ul_ch128_1[2],ul_ch128_1[2]);
        mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
        mmtmpU1 = _mm_packs_epi32(mmtmpU0,mmtmpU0);

        ul_ch_mag128_1[2] = _mm_unpacklo_epi16(mmtmpU1,mmtmpU1);
        ul_ch_mag128b_1[2] = ul_ch_mag128_1[2];

        ul_ch_mag128_1[2] = _mm_mulhi_epi16(ul_ch_mag128_1[2],QAM_amp128U_0);
        ul_ch_mag128_1[2] = _mm_slli_epi16(ul_ch_mag128_1[2],2);  //  2 to scale compensate the scale channel estimat


        ul_ch_mag128b_1[0] = _mm_mulhi_epi16(ul_ch_mag128b_1[0],QAM_amp128bU_1);
        ul_ch_mag128b_1[0] = _mm_slli_epi16(ul_ch_mag128b_1[0],2);  //  2 to scale compensate the scale channel estima


        ul_ch_mag128b_1[1] = _mm_mulhi_epi16(ul_ch_mag128b_1[1],QAM_amp128bU_1);
        ul_ch_mag128b_1[1] = _mm_slli_epi16(ul_ch_mag128b_1[1],2);   //  2 to scale compensate the scale channel estima

        ul_ch_mag128b_1[2] = _mm_mulhi_epi16(ul_ch_mag128b_1[2],QAM_amp128bU_1);
        ul_ch_mag128b_1[2] = _mm_slli_epi16(ul_ch_mag128b_1[2],2);   //  2 to scale compensate the scale channel estima
1007
      }
1008

1009
1010
1011
1012
1013

      /************************For Computing (y)*(h0*)********************************************/

      // multiply by conjugated channel
      mmtmpU0 = _mm_madd_epi16(ul_ch128_0[0],rxdataF128[0]);
1014
1015
      //  print_ints("re",&mmtmpU0);

1016
1017
1018
1019
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(ul_ch128_0[0],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)&conjugate[0]);
1020
      //  print_ints("im",&mmtmpU1);
1021
1022
1023
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,rxdataF128[0]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
1024
      //  print_ints("re(shift)",&mmtmpU0);
1025
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
1026
      //  print_ints("im(shift)",&mmtmpU1);
1027
1028
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
1029
1030
      //        print_ints("c0",&mmtmpU2);
      //  print_ints("c1",&mmtmpU3);
1031
      rxdataF_comp128_0[0] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
1032
1033
1034
1035
      //        print_shorts("rx:",rxdataF128[0]);
      //        print_shorts("ch:",ul_ch128_0[0]);
      //        print_shorts("pack:",rxdataF_comp128_0[0]);

1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
      // multiply by conjugated channel
      mmtmpU0 = _mm_madd_epi16(ul_ch128_0[1],rxdataF128[1]);
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(ul_ch128_0[1],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)conjugate);
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,rxdataF128[1]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
1048

1049
      rxdataF_comp128_0[1] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
1050
1051
1052
      //        print_shorts("rx:",rxdataF128[1]);
      //        print_shorts("ch:",ul_ch128_0[1]);
      //        print_shorts("pack:",rxdataF_comp128_0[1]);
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
      //       multiply by conjugated channel
      mmtmpU0 = _mm_madd_epi16(ul_ch128_0[2],rxdataF128[2]);
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(ul_ch128_0[2],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)conjugate);
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,rxdataF128[2]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
1065

1066
      rxdataF_comp128_0[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
1067
1068
      //        print_shorts("rx:",rxdataF128[2]);
      //        print_shorts("ch:",ul_ch128_0[2]);
1069
      //        print_shorts("pack:",rxdataF_comp128_0[2]);
1070

1071
1072
1073
1074
1075
1076



      /*************************For Computing (y*)*(h1)************************************/
      // multiply by conjugated signal
      mmtmpU0 = _mm_madd_epi16(ul_ch128_1[0],rxdataF128[0]);
1077
1078
      //  print_ints("re",&mmtmpU0);

1079
1080
1081
1082
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(rxdataF128[0],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)&conjugate[0]);
1083
      //  print_ints("im",&mmtmpU1);
1084
1085
1086
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,ul_ch128_1[0]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
1087
      //  print_ints("re(shift)",&mmtmpU0);
1088
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
1089
      //  print_ints("im(shift)",&mmtmpU1);
1090
1091
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
1092
1093
      //        print_ints("c0",&mmtmpU2);
      //  print_ints("c1",&mmtmpU3);
1094
      rxdataF_comp128_1[0] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
1095
1096
1097
      //        print_shorts("rx:",rxdataF128[0]);
      //        print_shorts("ch_conjugate:",ul_ch128_1[0]);
      //        print_shorts("pack:",rxdataF_comp128_1[0]);
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111


      // multiply by conjugated signal
      mmtmpU0 = _mm_madd_epi16(ul_ch128_1[1],rxdataF128[1]);
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(rxdataF128[1],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)conjugate);
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,ul_ch128_1[1]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
1112

1113
      rxdataF_comp128_1[1] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
1114
1115
1116
      //        print_shorts("rx:",rxdataF128[1]);
      //        print_shorts("ch_conjugate:",ul_ch128_1[1]);
      //        print_shorts("pack:",rxdataF_comp128_1[1]);
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130


      //       multiply by conjugated signal
      mmtmpU0 = _mm_madd_epi16(ul_ch128_1[2],rxdataF128[2]);
      // mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
      mmtmpU1 = _mm_shufflelo_epi16(rxdataF128[2],_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_shufflehi_epi16(mmtmpU1,_MM_SHUFFLE(2,3,0,1));
      mmtmpU1 = _mm_sign_epi16(mmtmpU1,*(__m128i*)conjugate);
      mmtmpU1 = _mm_madd_epi16(mmtmpU1,ul_ch128_1[2]);
      // mmtmpU1 contains imag part of 4 consecutive outputs (32-bit)
      mmtmpU0 = _mm_srai_epi32(mmtmpU0,output_shift);
      mmtmpU1 = _mm_srai_epi32(mmtmpU1,output_shift);
      mmtmpU2 = _mm_unpacklo_epi32(mmtmpU0,mmtmpU1);
      mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
1131

1132
      rxdataF_comp128_1[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
1133
1134
      //        print_shorts("rx:",rxdataF128[2]);
      //        print_shorts("ch_conjugate:",ul_ch128_0[2]);
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
      //        print_shorts("pack:",rxdataF_comp128_1[2]);



      ul_ch128_0+=3;
      ul_ch_mag128_0+=3;
      ul_ch_mag128b_0+=3;
      ul_ch128_1+=3;
      ul_ch_mag128_1+=3;
      ul_ch_mag128b_1+=3;
      rxdataF128+=3;
      rxdataF_comp128_0+=3;
      rxdataF_comp128_1+=3;
1148

1149
1150
1151
1152
1153
1154
1155
    }
  }


  _mm_empty();
  _m_empty();

1156
}
1157
1158
1159
1160
1161




void ulsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms,// For Distributed Alamouti Receiver Combining
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
                    int32_t **rxdataF_comp,
                    int32_t **rxdataF_comp_0,
                    int32_t **rxdataF_comp_1,
                    int32_t **ul_ch_mag,
                    int32_t **ul_ch_magb,
                    int32_t **ul_ch_mag_0,
                    int32_t **ul_ch_magb_0,
                    int32_t **ul_ch_mag_1,
                    int32_t **ul_ch_magb_1,
                    uint8_t symbol,
                    uint16_t nb_rb)
{
1174
1175
1176
1177
1178
1179
1180

  int16_t *rxF,*rxF0,*rxF1;
  __m128i *ch_mag,*ch_magb,*ch_mag0,*ch_mag1,*ch_mag0b,*ch_mag1b;
  uint8_t rb,re,aarx;
  int32_t jj=(symbol*frame_parms->N_RB_DL*12);


1181
  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192

    rxF      = (int16_t*)&rxdataF_comp[aarx][jj];
    rxF0     = (int16_t*)&rxdataF_comp_0[aarx][jj];   // Contains (y)*(h0*)
    rxF1     = (int16_t*)&rxdataF_comp_1[aarx][jj];   // Contains (y*)*(h1)
    ch_mag   = (__m128i *)&ul_ch_mag[aarx][jj];
    ch_mag0 = (__m128i *)&ul_ch_mag_0[aarx][jj];
    ch_mag1 = (__m128i *)&ul_ch_mag_1[aarx][jj];
    ch_magb = (__m128i *)&ul_ch_magb[aarx][jj];
    ch_mag0b = (__m128i *)&ul_ch_magb_0[aarx][jj];
    ch_mag1b = (__m128i *)&ul_ch_magb_1[aarx][jj];

1193
1194
1195
1196
1197
    for (rb=0; rb<nb_rb; rb++) {

      for (re=0; re<12; re+=2) {

        // Alamouti RX combining
1198

1199
1200
        rxF[0] = rxF0[0] + rxF1[2];                   // re((y0)*(h0*))+ re((y1*)*(h1)) = re(x0)
        rxF[1] = rxF0[1] + rxF1[3];                   // im((y0)*(h0*))+ im((y1*)*(h1)) = im(x0)
1201

1202
1203
        rxF[2] = rxF0[2] - rxF1[0];                   // re((y1)*(h0*))- re((y0*)*(h1)) = re(x1)
        rxF[3] = rxF0[3] - rxF1[1];                   // im((y1)*(h0*))- im((y0*)*(h1)) = im(x1)
1204

1205
1206
1207
        rxF+=4;
        rxF0+=4;
        rxF1+=4;
1208
      }
1209

1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
      // compute levels for 16QAM or 64 QAM llr unit
      ch_mag[0] = _mm_adds_epi16(ch_mag0[0],ch_mag1[0]);
      ch_mag[1] = _mm_adds_epi16(ch_mag0[1],ch_mag1[1]);
      ch_mag[2] = _mm_adds_epi16(ch_mag0[2],ch_mag1[2]);
      ch_magb[0] = _mm_adds_epi16(ch_mag0b[0],ch_mag1b[0]);
      ch_magb[1] = _mm_adds_epi16(ch_mag0b[1],ch_mag1b[1]);
      ch_magb[2] = _mm_adds_epi16(ch_mag0b[2],ch_mag1b[2]);

      ch_mag+=3;
      ch_mag0+=3;
      ch_mag1+=3;
      ch_magb+=3;
      ch_mag0b+=3;
      ch_mag1b+=3;
    }
  }

  _mm_empty();
  _m_empty();
1229

1230
1231
1232
1233
1234
1235
1236
1237
1238
}





__m128i avg128U;

void ulsch_channel_level(int32_t **drs_ch_estimates_ext,
1239
1240
1241
1242
                         LTE_DL_FRAME_PARMS *frame_parms,
                         int32_t *avg,
                         uint16_t nb_rb)
{
1243
1244
1245
1246
1247

  int16_t rb;
  uint8_t aarx;
  __m128i *ul_ch128;

1248
1249

  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
1250
    //clear average level
1251
    avg128U = _mm_setzero_si128();
1252
1253
    ul_ch128=(__m128i *)drs_ch_estimates_ext[aarx];

1254
1255
    for (rb=0; rb<nb_rb; rb++) {

1256
1257
1258
      avg128U = _mm_add_epi32(avg128U,_mm_madd_epi16(ul_ch128[0],ul_ch128[0]));
      avg128U = _mm_add_epi32(avg128U,_mm_madd_epi16(ul_ch128[1],ul_ch128[1]));
      avg128U = _mm_add_epi32(avg128U,_mm_madd_epi16(ul_ch128[2],ul_ch128[2]));
1259
1260
1261

      ul_ch128+=3;

1262
      if (rb==0) {
1263
1264
1265
        //  print_shorts("ul_ch128",&ul_ch128[0]);
        //  print_shorts("ul_ch128",&ul_ch128[1]);
        //  print_shorts("ul_ch128",&ul_ch128[2]);
1266
      }
1267

1268
    }
1269
1270
1271
1272
1273
1274

    avg[aarx] = (((int*)&avg128U)[0] +
                 ((int*)&avg128U)[1] +
                 ((int*)&avg128U)[2] +
                 ((int*)&avg128U)[3])/(nb_rb*12);

1275
1276
    //    printf("Channel level : %d\n",avg[aarx]);
  }
1277

1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
  _mm_empty();
  _m_empty();

}

int32_t avgU[2];
int32_t avgU_0[2],avgU_1[2]; // For the Distributed Alamouti Scheme
/* --> moved to LTE_eNB_PUSCH structure
int32_t ulsch_power[2];
int32_t ulsch_power_0[2],ulsch_power_1[2];// For the distributed Alamouti Scheme
*/

void rx_ulsch(PHY_VARS_eNB *phy_vars_eNB,
1291
1292
1293
1294
1295
1296
1297
1298
1299
              uint32_t sched_subframe,
              uint8_t eNB_id,  // this is the effective sector id
              uint8_t UE_id,
              LTE_eNB_ULSCH_t **ulsch,
              uint8_t cooperation_flag)
{

  // flagMag = 0;
  LTE_eNB_COMMON *eNB_common_vars = &phy_vars_eNB->lte_eNB_common_vars;
1300
1301
1302
1303
1304
1305
  LTE_eNB_PUSCH *eNB_pusch_vars = phy_vars_eNB->lte_eNB_pusch_vars[UE_id];
  LTE_DL_FRAME_PARMS *frame_parms = &phy_vars_eNB->lte_frame_parms;

  uint32_t l,i;
  int32_t avgs;
  uint8_t log2_maxh=0,aarx;
1306
1307


1308
1309
  int32_t avgs_0,avgs_1;
  uint32_t log2_maxh_0=0,log2_maxh_1=0;
1310

1311
1312

  //  uint8_t harq_pid = ( ulsch->RRCConnRequest_flag== 0) ? subframe2harq_pid_tdd(frame_parms->tdd_config,subframe) : 0;
1313
  uint8_t harq_pid;
knopp's avatar
   
knopp committed
1314
  uint8_t Qm;
1315
1316
  uint16_t rx_power_correction;
  int16_t *llrp;
knopp's avatar
   
knopp committed
1317
  int subframe = phy_vars_eNB->proc[sched_subframe].subframe_rx;
knopp's avatar
   
knopp committed
1318

knopp's avatar
   
knopp committed
1319
  harq_pid = subframe2harq_pid(frame_parms,phy_vars_eNB->proc[sched_subframe].frame_rx,subframe);
knopp's avatar
   
knopp committed
1320
  Qm = get_Qm_ul(ulsch[UE_id]->harq_processes[harq_pid]->mcs);
1321
#ifdef DEBUG_ULSCH
1322
1323
  msg("rx_ulsch: eNB_id %d, harq_pid %d, nb_rb %d first_rb %d, cooperation %d\n",eNB_id,harq_pid,ulsch[UE_id]->harq_processes[harq_pid]->nb_rb,ulsch[UE_id]->harq_processes[harq_pid]->first_rb,
      cooperation_flag);
1324
1325
#endif //DEBUG_ULSCH

knopp's avatar
   
knopp committed
1326
  rx_power_correction = 1;
1327

1328
1329
  for (l=0; l<(frame_parms->symbols_per_tti-ulsch[UE_id]->harq_processes[harq_pid]->srs_active); l++) {

1330
1331
#ifdef DEBUG_ULSCH
    msg("rx_ulsch : symbol %d (first_rb %d,nb_rb %d), rxdataF %p, rxdataF_ext %p\n",l,
1332
1333
1334
1335
        ulsch[UE_id]->harq_processes[harq_pid]->first_rb,
        ulsch[UE_id]->harq_processes[harq_pid]->nb_rb,
        eNB_common_vars->rxdataF[eNB_id],
        eNB_pusch_vars->rxdataF_ext[eNB_id]);