diff --git a/openair1/PHY/NR_TRANSPORT/pucch_rx.c b/openair1/PHY/NR_TRANSPORT/pucch_rx.c
index eee3778c6a970c2bc1167d42680d2140b5a56370..b7534322cbbeac61e68fae224f8af492c22d3139 100644
--- a/openair1/PHY/NR_TRANSPORT/pucch_rx.c
+++ b/openair1/PHY/NR_TRANSPORT/pucch_rx.c
@@ -43,7 +43,6 @@
 #include "PHY/sse_intrin.h"
 #include "PHY/NR_UE_TRANSPORT/pucch_nr.h"
 #include <openair1/PHY/CODING/nrSmallBlock/nr_small_block_defs.h>
-#include "openair1/PHY/LTE_TRANSPORT/transport_proto.h" // for lte_gold_generic()
 #include "PHY/NR_TRANSPORT/nr_transport_common_proto.h"
 #include "PHY/NR_TRANSPORT/nr_transport_proto.h"
 #include "PHY/NR_REFSIG/nr_refsig.h"
@@ -1146,26 +1145,21 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB,
 
     // first compute DMRS component
 
-    uint32_t x1 = 0, x2 = 0, sGold = 0;
-    uint8_t *sGold8 = (uint8_t *)&sGold;
     const int scramble = pucch_pdu->dmrs_scrambling_id * 2;
     // fixme: when MR2754 will be merged, use the gold sequence cache instead of regenerate each time
-    x2 = ((1ULL << 17) * ((NR_NUMBER_OF_SYMBOLS_PER_SLOT * slot + pucch_pdu->start_symbol_index + symb + 1) * (scramble + 1))
-          + scramble)
-         % (1U << 31); // c_init calculation according to TS38.211 subclause
+    uint32_t x2 =
+        ((1ULL << 17) * ((NR_NUMBER_OF_SYMBOLS_PER_SLOT * slot + pucch_pdu->start_symbol_index + symb + 1) * (scramble + 1))
+         + scramble)
+        % (1U << 31); // c_init calculation according to TS38.211 subclause
 #ifdef DEBUG_NR_PUCCH_RX
     printf("slot %d, start_symbol_index %d, symbol %d, dmrs_scrambling_id %d\n",
            slot,pucch_pdu->start_symbol_index,symb,pucch_pdu->dmrs_scrambling_id);
 #endif
-    int reset = 1;
-    for (int i=0; i<=(pucch_pdu->prb_start>>2); i++) {
-      sGold = lte_gold_generic(&x1, &x2, reset);
-      reset = 0;
-    }
-
-    for (int group = 0; group < ngroup; group++) {
+    uint32_t *sGold = gold_cache(x2, pucch_pdu->prb_start / 4 + ngroup / 2);
+    for (int group = 0, goldIdx = pucch_pdu->prb_start / 4; group < ngroup; group++) {
       // each group has 8*nc_group_size elements, compute 1 complex correlation with DMRS per group
       // non-coherent combining across groups
+      uint8_t *sGold8 = (uint8_t *)&sGold[goldIdx];
       simde__m64 dmrs_re = byte2m64_re[sGold8[(group & 1) << 1]];
       int16_t *dmrs_re16 = (int16_t *)&dmrs_re;
       simde__m64 dmrs_im = byte2m64_im[sGold8[(group & 1) << 1]];
@@ -1238,22 +1232,22 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB,
       } //aa    
 
       if ((group & 1) == 1)
-        sGold = lte_gold_generic(&x1, &x2, 0);
+        goldIdx++;
     } // group
   } // symb
 
-  uint32_t x1, x2, sGold = 0;
   // unscrambling
-  x2 = ((pucch_pdu->rnti)<<15)+pucch_pdu->data_scrambling_id;
-  sGold = lte_gold_generic(&x1, &x2, 1);
-  uint8_t *sGold8 = (uint8_t *)&sGold;
+  uint32_t x2 = ((pucch_pdu->rnti) << 15) + pucch_pdu->data_scrambling_id;
 #ifdef DEBUG_NR_PUCCH_RX
   printf("x2 %x\n", x2);
 #endif
+  uint32_t *sGold = gold_cache(x2, pucch_pdu->nr_of_symbols * prb_size_ext / 2);
+  int goldIdx = 0;
   for (int symb=0;symb<pucch_pdu->nr_of_symbols;symb++) {
     simde__m64 c_re[4], c_im[4];
     int re_off=0;
     for (int prb=0;prb<prb_size_ext;prb+=2,re_off+=16) {
+      uint8_t *sGold8 = (uint8_t *)(sGold + goldIdx);
       for (int z = 0; z < 4; z++) {
         c_re[z] = byte2m64_re[sGold8[z]];
         c_im[z] = byte2m64_im[sGold8[z]];
@@ -1333,7 +1327,7 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB,
                r_re_ext[aa][symb][re_off+15],r_im_ext[aa][symb][re_off+15]);
 #endif      
       }
-      sGold = lte_gold_generic(&x1, &x2, 0);
+      goldIdx++;
 #ifdef DEBUG_NR_PUCCH_RX
       printf("\n");
 #endif
diff --git a/openair1/PHY/NR_UE_TRANSPORT/pucch_nr.c b/openair1/PHY/NR_UE_TRANSPORT/pucch_nr.c
index b89c7cbb957f26e7ae0d9838ecc86106a6518f8f..c73af09eef799ac31dab4736191745ea6eeba23a 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/pucch_nr.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/pucch_nr.c
@@ -40,6 +40,7 @@
 #include <openair1/PHY/CODING/nrSmallBlock/nr_small_block_defs.h>
 #include "common/utils/LOG/log.h"
 #include "common/utils/LOG/vcd_signal_dumper.h"
+#include "openair1/PHY/NR_REFSIG/nr_refsig.h"
 
 #include "T.h"
 //#define NR_UNIT_TEST 1
@@ -484,46 +485,32 @@ void nr_generate_pucch1(const PHY_VARS_NR_UE *ue,
   }
 }
 
-static inline void nr_pucch2_3_4_scrambling(uint16_t M_bit,uint16_t rnti,uint16_t n_id,uint64_t *B64,uint8_t *btilde) {
-  uint32_t x1 = 0, x2 = 0, s = 0;
-  int i;
-  uint8_t c;
+static inline void nr_pucch2_3_4_scrambling(uint16_t M_bit, uint16_t rnti, uint16_t n_id, uint64_t *B64, uint8_t *btilde)
+{
   // c_init=nRNTI*2^15+n_id according to TS 38.211 Subclause 6.3.2.6.1
-  //x2 = (rnti) + ((uint32_t)(1+nr_slot_tx)<<16)*(1+(fp->Nid_cell<<1));
-  x2 = ((rnti)<<15)+n_id;
+  const int roundedSz = (M_bit + 31) / 32;
+  uint32_t *seq = gold_cache((rnti << 15) + n_id, roundedSz);
 #ifdef DEBUG_NR_PUCCH_TX
   printf("\t\t [nr_pucch2_3_4_scrambling] gold sequence s=%x, M_bit %d\n",s,M_bit);
 #endif
 
-  uint8_t *btildep=btilde;
-  int M_bit2=M_bit > 31 ? 32 : (M_bit&31), M_bit3=M_bit;
-  uint32_t B;
-  for (int iprime=0;iprime<=(M_bit>>5);iprime++,btildep+=32) {
-    s = lte_gold_generic(&x1, &x2, (iprime==0) ? 1 : 0);
-    B=((uint32_t*)B64)[iprime];
-    for (int n=0;n<M_bit2;n+=8)
-      LOG_D(PHY,"PUCCH2 encoded %d : %d,%d,%d,%d,%d,%d,%d,%d\n",n,
-	    (B>>n)&1,
-	    (B>>(n+1))&1,
-	    (B>>(n+2))&1,
-	    (B>>(n+3))&1,
-	    (B>>(n+4))&1,
-	    (B>>(n+5))&1,
-	    (B>>(n+6))&1,
-	    (B>>(n+7))&1
-	    );
-    for (i=0; i<M_bit2; i++) {
-      c = (uint8_t)((s>>i)&1);
+  uint8_t *btildep = btilde;
+  uint32_t *B32 = (uint32_t *)B64;
+
+  for (int iprime = 0; iprime < roundedSz; iprime++, btildep += 32) {
+    const uint32_t s = seq[iprime];
+    const uint32_t B = B32[iprime];
+    LOG_D(PHY, "PUCCH2 encoded: %02x\n", B);
+    int M_bit2 = iprime == M_bit / 32 ? M_bit % 32 : 32;
+    for (int i = 0; i < M_bit2; i++) {
+      uint8_t c = (uint8_t)((s >> i) & 1);
       btildep[i] = (((B>>i)&1) ^ c);
 #ifdef DEBUG_NR_PUCCH_TX
       printf("\t\t\t btilde[%d]=%x from unscrambled bit %d and scrambling %d (%x)\n",i+(iprime<<5),btilde[i],((B>>i)&1),c,s>>i);
 #endif
     }
-    M_bit3-=32;
-    M_bit2=M_bit3 > 31 ? 32 : (M_bit3&31);
   }
 
-
 #ifdef DEBUG_NR_PUCCH_TX
   printf("\t\t [nr_pucch2_3_4_scrambling] scrambling M_bit=%d bits\n", M_bit);
 #endif
@@ -721,9 +708,7 @@ void nr_generate_pucch2(const PHY_VARS_NR_UE *ue,
    * Implementing TS 38.211 Subclause 6.3.2.5.3 Mapping to physical resources
    */
   // int32_t *txptr;
-  uint32_t x1 = 0, x2 = 0, s = 0;
-  int i=0;
-  int m=0;
+  int outSample = 0;
   uint8_t  startingSymbolIndex = pucch_pdu->start_symbol_index;
   uint16_t startingPRB = pucch_pdu->prb_start + pucch_pdu->bwp_start;
 
@@ -732,14 +717,10 @@ void nr_generate_pucch2(const PHY_VARS_NR_UE *ue,
     uint64_t temp_x2 = 1ll << 17;
     temp_x2 *= 14UL * nr_slot_tx + l + startingSymbolIndex + 1;
     temp_x2 *= 2UL * pucch_pdu->dmrs_scrambling_id + 1;
-    x2 = (temp_x2 + 2UL * pucch_pdu->dmrs_scrambling_id) % (1UL << 31);
-
-    int reset = 1;
-    for (int ii=0; ii<=(startingPRB>>2); ii++) {
-      s = lte_gold_generic(&x1, &x2, reset);
-      reset = 0;
-    }
-    m = 0;
+    temp_x2 = (temp_x2 + 2ULL * pucch_pdu->dmrs_scrambling_id) % (1UL << 31);
+    uint idxGold = startingPRB >> 2;
+    uint32_t *seq = gold_cache(temp_x2, idxGold + pucch_pdu->prb_size);
+    int m = 0;
     for (int rb=0; rb<pucch_pdu->prb_size; rb++) {
       //startingPRB = startingPRB + rb;
       const bool nb_rb_is_even = frame_parms->N_RB_DL & 1;
@@ -773,7 +754,7 @@ void nr_generate_pucch2(const PHY_VARS_NR_UE *ue,
         }
 
         if (n%3 != 1) { // mapping PUCCH according to TS38.211 subclause 6.3.2.5.3
-          txdataF[0][re_offset] = d[i + k];
+          txdataF[0][re_offset] = d[outSample + k];
 #ifdef DEBUG_NR_PUCCH_TX
           printf(
               "\t [nr_generate_pucch2] (n=%d,i=%d) mapping PUCCH to RE \t amp=%d \tofdm_symbol_size=%d \tN_RB_DL=%d "
@@ -795,8 +776,8 @@ void nr_generate_pucch2(const PHY_VARS_NR_UE *ue,
         }
 
         if (n%3 == 1) { // mapping DM-RS signal according to TS38.211 subclause 6.4.1.3.2
-          txdataF[0][re_offset].r = (int16_t)(baseVal * (1 - (2 * ((uint8_t)((s >> (2 * m)) & 1)))));
-          txdataF[0][re_offset].i = (int16_t)(baseVal * (1 - (2 * ((uint8_t)((s >> (2 * m + 1)) & 1)))));
+          txdataF[0][re_offset].r = (int16_t)(baseVal * (1 - (2 * ((uint8_t)((seq[idxGold] >> (2 * m)) & 1)))));
+          txdataF[0][re_offset].i = (int16_t)(baseVal * (1 - (2 * ((uint8_t)((seq[idxGold] >> (2 * m + 1)) & 1)))));
           m++;
 #ifdef DEBUG_NR_PUCCH_TX
           printf(
@@ -821,10 +802,10 @@ void nr_generate_pucch2(const PHY_VARS_NR_UE *ue,
         re_offset++;
       }
 
-      i+=8;
+      outSample += 8;
 
-      if ((m&((1<<4)-1))==0) {
-        s = lte_gold_generic(&x1, &x2, 0);
+      if (m % 16 == 0) {
+        idxGold++;
         m = 0;
       }
     }