diff --git a/ci-scripts/constants.py b/ci-scripts/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e7e84fb32af9a694992f8b54f3f9bbb43278c66
--- /dev/null
+++ b/ci-scripts/constants.py
@@ -0,0 +1,71 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+#---------------------------------------------------------------------
+# Python for CI of OAI-eNB + COTS-UE
+#
+#   Required Python Version
+#     Python 3.x
+#
+#   Required Python Package
+#     pexpect
+#---------------------------------------------------------------------
+
+#-----------------------------------------------------------
+# Version
+#-----------------------------------------------------------
+Version = '0.2'
+
+#-----------------------------------------------------------
+# Constants
+#-----------------------------------------------------------
+ALL_PROCESSES_OK = 0
+ENB_PROCESS_FAILED = -1
+ENB_PROCESS_OK = +1
+ENB_PROCESS_SEG_FAULT = -11
+ENB_PROCESS_ASSERTION = -12
+ENB_PROCESS_REALTIME_ISSUE = -13
+ENB_PROCESS_NOLOGFILE_TO_ANALYZE = -14
+ENB_PROCESS_SLAVE_RRU_NOT_SYNCED = -15
+HSS_PROCESS_FAILED = -2
+HSS_PROCESS_OK = +2
+MME_PROCESS_FAILED = -3
+MME_PROCESS_OK = +3
+SPGW_PROCESS_FAILED = -4
+SPGW_PROCESS_OK = +4
+UE_IP_ADDRESS_ISSUE = -5
+OAI_UE_PROCESS_NOLOGFILE_TO_ANALYZE = -20
+OAI_UE_PROCESS_COULD_NOT_SYNC = -21
+OAI_UE_PROCESS_ASSERTION = -22
+OAI_UE_PROCESS_FAILED = -23
+OAI_UE_PROCESS_NO_TUNNEL_INTERFACE = -24
+OAI_UE_PROCESS_SEG_FAULT = -25
+OAI_UE_PROCESS_OK = +6
+
+UE_STATUS_DETACHED = 0
+UE_STATUS_DETACHING = 1
+UE_STATUS_ATTACHING = 2
+UE_STATUS_ATTACHED = 3
+
+X2_HO_REQ_STATE__IDLE = 0
+X2_HO_REQ_STATE__TARGET_RECEIVES_REQ = 1
+X2_HO_REQ_STATE__TARGET_RRC_RECFG_COMPLETE = 2
+X2_HO_REQ_STATE__TARGET_SENDS_SWITCH_REQ = 3
+X2_HO_REQ_STATE__SOURCE_RECEIVES_REQ_ACK = 10
diff --git a/ci-scripts/epc.py b/ci-scripts/epc.py
new file mode 100644
index 0000000000000000000000000000000000000000..09d500f203d185fc00fd0778721a1b5e542d4a38
--- /dev/null
+++ b/ci-scripts/epc.py
@@ -0,0 +1,499 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+#---------------------------------------------------------------------
+# Python for CI of OAI-eNB + COTS-UE
+#
+#   Required Python Version
+#     Python 3.x
+#
+#   Required Python Package
+#     pexpect
+#---------------------------------------------------------------------
+
+#-----------------------------------------------------------
+# Import
+#-----------------------------------------------------------
+import sys              # arg
+import re               # reg
+import logging
+import os
+import time
+import signal
+
+from multiprocessing import Process, Lock, SimpleQueue
+
+#-----------------------------------------------------------
+# OAI Testing modules
+#-----------------------------------------------------------
+import sshconnection as SSH 
+import helpreadme as HELP
+import constants as CONST
+import html
+
+#-----------------------------------------------------------
+# Class Declaration
+#-----------------------------------------------------------
+class EPCManagement():
+
+	def __init__(self):
+		
+		self.IPAddress = ''
+		self.UserName = ''
+		self.Password = ''
+		self.SourceCodePath = ''
+		self.Type = ''
+		self.PcapFileName = ''
+		self.htmlObj = None
+		self.testCase_id = ''
+		self.MmeIPAddress = ''
+		self.containerPrefix = 'prod'
+
+#-----------------------------------------------------------
+# Setter and Getters on Public Members
+#-----------------------------------------------------------
+
+	def SetIPAddress(self, ipaddress):
+		self.IPAddress = ipaddress
+	def GetIPAddress(self):
+		return self.IPAddress
+	def SetUserName(self, username):
+		self.UserName = username
+	def GetUserName(self):
+		return self.UserName
+	def SetPassword(self, password):
+		self.Password = password
+	def GetPassword(self):
+		return self.Password
+	def SetSourceCodePath(self, sourcecodepath):
+		self.SourceCodePath = sourcecodepath
+	def GetSourceCodePath(self):
+		return self.SourceCodePath
+	def SetType(self, kind):
+		self.Type = kind
+	def GetType(self):
+		return self.Type
+	def SetHtmlObj(self, obj):
+		self.htmlObj = obj
+	def SetTestCase_id(self, idx):
+		self.testCase_id = idx
+	def GetMmeIPAddress(self):
+		return self.MmeIPAddress
+	def SetContainerPrefix(self, prefix):
+		self.containerPrefix = prefix
+
+#-----------------------------------------------------------
+# EPC management functions
+#-----------------------------------------------------------
+
+	def InitializeHSS(self):
+		if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '':
+			HELP.GenericHelp(CONST.Version)
+			HELP.EPCSrvHelp(self.IPAddress, self.UserName, self.Password, self.SourceCodePath, self.Type)
+			sys.exit('Insufficient EPC Parameters')
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			logging.debug('Using the OAI EPC Release 14 Cassandra-based HSS in Docker')
+			mySSH.command('if [ -d ' + self.SourceCodePath + '/scripts ]; then echo ' + self.Password + ' | sudo -S rm -Rf ' + self.SourceCodePath + '/scripts ; fi', '\$', 5)
+			mySSH.command('mkdir -p ' + self.SourceCodePath + '/scripts', '\$', 5)
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-hss /bin/bash -c "nohup tshark -i eth0 -i eth1 -w /tmp/hss_check_run.pcap 2>&1 > /dev/null"', '\$', 5)
+			time.sleep(5)
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-hss /bin/bash -c "nohup ./bin/oai_hss -j ./etc/hss_rel14.json --reloadkey true > hss_check_run.log 2>&1"', '\$', 5)
+		elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			logging.debug('Using the OAI EPC Release 14 Cassandra-based HSS')
+			mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5)
+			logging.debug('\u001B[1m Launching tshark on all interfaces \u001B[0m')
+			self.PcapFileName = 'epc_' + self.testCase_id + '.pcap'
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm -f ' + self.PcapFileName, '\$', 5)
+			mySSH.command('echo $USER; nohup sudo tshark -f "tcp port not 22 and port not 53" -i any -w ' + self.SourceCodePath + '/scripts/' + self.PcapFileName + ' > /tmp/tshark.log 2>&1 &', self.UserName, 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S mkdir -p logs', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm -f hss_' + self.testCase_id + '.log logs/hss*.*', '\$', 5)
+			mySSH.command('echo "oai_hss -j /usr/local/etc/oai/hss_rel14.json" > ./my-hss.sh', '\$', 5)
+			mySSH.command('chmod 755 ./my-hss.sh', '\$', 5)
+			mySSH.command('sudo daemon --unsafe --name=hss_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/hss_' + self.testCase_id + '.log ./my-hss.sh', '\$', 5)
+		elif re.match('OAI', self.Type, re.IGNORECASE):
+			logging.debug('Using the OAI EPC HSS')
+			mySSH.command('cd ' + self.SourceCodePath, '\$', 5)
+			mySSH.command('source oaienv', '\$', 5)
+			mySSH.command('cd scripts', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S ./run_hss 2>&1 | stdbuf -o0 awk \'{ print strftime("[%Y/%m/%d %H:%M:%S] ",systime()) $0 }\' | stdbuf -o0 tee -a hss_' + self.testCase_id + '.log &', 'Core state: 2 -> 3', 35)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			logging.debug('Using the ltebox simulated HSS')
+			mySSH.command('if [ -d ' + self.SourceCodePath + '/scripts ]; then echo ' + self.Password + ' | sudo -S rm -Rf ' + self.SourceCodePath + '/scripts ; fi', '\$', 5)
+			mySSH.command('mkdir -p ' + self.SourceCodePath + '/scripts', '\$', 5)
+			mySSH.command('cd /opt/hss_sim0609', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm -f hss.log', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S echo "Starting sudo session" && sudo su -c "screen -dm -S simulated_hss ./starthss"', '\$', 5)
+		else:
+			logging.error('This option should not occur!')
+		mySSH.close()
+		if self.htmlObj is not None:
+			self.htmlObj.CreateHtmlTestRow(self.Type, 'OK', CONST.ALL_PROCESSES_OK)
+
+	def InitializeMME(self):
+		if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '':
+			HELP.GenericHelp(CONST.Version)
+			HELP.EPCSrvHelp(self.IPAddress, self.UserName, self.Password, self.SourceCodePath, self.Type)
+			sys.exit('Insufficient EPC Parameters')
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			logging.debug('Using the OAI EPC Release 14 MME in Docker')
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-mme /bin/bash -c "nohup tshark -i eth0 -i lo:s10 -w /tmp/mme_check_run.pcap 2>&1 > /dev/null"', '\$', 5)
+			time.sleep(5)
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-mme /bin/bash -c "nohup ./bin/oai_mme -c ./etc/mme.conf > mme_check_run.log 2>&1"', '\$', 5)
+		elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			logging.debug('Using the OAI EPC Release 14 MME')
+			mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm -f mme_' + self.testCase_id + '.log', '\$', 5)
+			mySSH.command('echo "./run_mme --config-file /usr/local/etc/oai/mme.conf --set-virt-if" > ./my-mme.sh', '\$', 5)
+			mySSH.command('chmod 755 ./my-mme.sh', '\$', 5)
+			mySSH.command('sudo daemon --unsafe --name=mme_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/mme_' + self.testCase_id + '.log ./my-mme.sh', '\$', 5)
+		elif re.match('OAI', self.Type, re.IGNORECASE):
+			mySSH.command('cd ' + self.SourceCodePath, '\$', 5)
+			mySSH.command('source oaienv', '\$', 5)
+			mySSH.command('cd scripts', '\$', 5)
+			mySSH.command('stdbuf -o0 hostname', '\$', 5)
+			result = re.search('hostname\\\\r\\\\n(?P<host_name>[a-zA-Z0-9\-\_]+)\\\\r\\\\n', mySSH.getBefore())
+			if result is None:
+				logging.debug('\u001B[1;37;41m Hostname Not Found! \u001B[0m')
+				sys.exit(1)
+			host_name = result.group('host_name')
+			mySSH.command('echo ' + self.Password + ' | sudo -S ./run_mme 2>&1 | stdbuf -o0 tee -a mme_' + self.testCase_id + '.log &', 'MME app initialization complete', 100)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cd /opt/ltebox/tools', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S ./start_mme', '\$', 5)
+		else:
+			logging.error('This option should not occur!')
+		mySSH.close()
+		if self.htmlObj is not None:
+			self.htmlObj.CreateHtmlTestRow(self.Type, 'OK', CONST.ALL_PROCESSES_OK)
+
+	def SetMmeIPAddress(self):
+		# Not an error if we don't need an EPC
+		if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '':
+			return
+		if self.IPAddress == 'none':
+			return
+		# Only in case of Docker containers, MME IP address is not the EPC HOST IP address
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			mySSH = SSH.SSHConnection() 
+			mySSH.open(self.IPAddress, self.UserName, self.Password)
+			mySSH.command('docker inspect --format="MME_IP_ADDR = {{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}" ' + self.containerPrefix + '-oai-mme', '\$', 5)
+			result = re.search('MME_IP_ADDR = (?P<mme_ip_addr>[0-9\.]+)', mySSH.getBefore())
+			if result is not None:
+				self.MmeIPAddress = result.group('mme_ip_addr')
+				logging.debug('MME IP Address is ' + self.MmeIPAddress)
+			mySSH.close()
+		else:
+			self.MmeIPAddress = self.IPAddress
+
+	def InitializeSPGW(self):
+		if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '':
+			HELP.GenericHelp(CONST.Version)
+			HELP.EPCSrvHelp(self.IPAddress, self.UserName, self.Password, self.SourceCodePath, self.Type)
+			sys.exit('Insufficient EPC Parameters')
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			logging.debug('Using the OAI EPC Release 14 SPGW-CUPS in Docker')
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "nohup tshark -i eth0 -i lo:p5c -i lo:s5c -w /tmp/spgwc_check_run.pcap 2>&1 > /dev/null"', '\$', 5)
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "nohup tshark -i eth0 -w /tmp/spgwu_check_run.pcap 2>&1 > /dev/null"', '\$', 5)
+			time.sleep(5)
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "nohup ./bin/oai_spgwc -o -c ./etc/spgw_c.conf > spgwc_check_run.log 2>&1"', '\$', 5)
+			time.sleep(5)
+			mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "nohup ./bin/oai_spgwu -o -c ./etc/spgw_u.conf > spgwu_check_run.log 2>&1"', '\$', 5)
+		elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			logging.debug('Using the OAI EPC Release 14 SPGW-CUPS')
+			mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm -f spgwc_' + self.testCase_id + '.log spgwu_' + self.testCase_id + '.log', '\$', 5)
+			mySSH.command('echo "spgwc -c /usr/local/etc/oai/spgw_c.conf" > ./my-spgwc.sh', '\$', 5)
+			mySSH.command('chmod 755 ./my-spgwc.sh', '\$', 5)
+			mySSH.command('sudo daemon --unsafe --name=spgwc_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/spgwc_' + self.testCase_id + '.log ./my-spgwc.sh', '\$', 5)
+			time.sleep(5)
+			mySSH.command('echo "spgwu -c /usr/local/etc/oai/spgw_u.conf" > ./my-spgwu.sh', '\$', 5)
+			mySSH.command('chmod 755 ./my-spgwu.sh', '\$', 5)
+			mySSH.command('sudo daemon --unsafe --name=spgwu_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/spgwu_' + self.testCase_id + '.log ./my-spgwu.sh', '\$', 5)
+		elif re.match('OAI', self.Type, re.IGNORECASE):
+			mySSH.command('cd ' + self.SourceCodePath, '\$', 5)
+			mySSH.command('source oaienv', '\$', 5)
+			mySSH.command('cd scripts', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S ./run_spgw 2>&1 | stdbuf -o0 tee -a spgw_' + self.testCase_id + '.log &', 'Initializing SPGW-APP task interface: DONE', 30)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cd /opt/ltebox/tools', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S ./start_xGw', '\$', 5)
+		else:
+			logging.error('This option should not occur!')
+		mySSH.close()
+		if self.htmlObj is not None:
+			self.htmlObj.CreateHtmlTestRow(self.Type, 'OK', CONST.ALL_PROCESSES_OK)
+
+	def CheckHSSProcess(self, status_queue):
+		try:
+			mySSH = SSH.SSHConnection() 
+			mySSH.open(self.IPAddress, self.UserName, self.Password)
+			if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+				mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "ps aux | grep oai_hss"', '\$', 5)
+			else:
+				mySSH.command('stdbuf -o0 ps -aux | grep --color=never hss | grep -v grep', '\$', 5)
+			if re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+				result = re.search('oai_hss -j', mySSH.getBefore())
+			elif re.match('OAI', self.Type, re.IGNORECASE):
+				result = re.search('\/bin\/bash .\/run_', mySSH.getBefore())
+			elif re.match('ltebox', self.Type, re.IGNORECASE):
+				result = re.search('hss_sim s6as diam_hss', mySSH.getBefore())
+			else:
+				logging.error('This should not happen!')
+			if result is None:
+				logging.debug('\u001B[1;37;41m HSS Process Not Found! \u001B[0m')
+				status_queue.put(CONST.HSS_PROCESS_FAILED)
+			else:
+				status_queue.put(CONST.HSS_PROCESS_OK)
+			mySSH.close()
+		except:
+			os.kill(os.getppid(),signal.SIGUSR1)
+
+	def CheckMMEProcess(self, status_queue):
+		try:
+			mySSH = SSH.SSHConnection() 
+			mySSH.open(self.IPAddress, self.UserName, self.Password)
+			if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+				mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "ps aux | grep oai_mme"', '\$', 5)
+			else:
+				mySSH.command('stdbuf -o0 ps -aux | grep --color=never mme | grep -v grep', '\$', 5)
+			if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+				result = re.search('oai_mme -c ', mySSH.getBefore())
+			elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+				result = re.search('mme -c', mySSH.getBefore())
+			elif re.match('OAI', self.Type, re.IGNORECASE):
+				result = re.search('\/bin\/bash .\/run_', mySSH.getBefore())
+			elif re.match('ltebox', self.Type, re.IGNORECASE):
+				result = re.search('mme', mySSH.getBefore())
+			else:
+				logging.error('This should not happen!')
+			if result is None:
+				logging.debug('\u001B[1;37;41m MME Process Not Found! \u001B[0m')
+				status_queue.put(CONST.MME_PROCESS_FAILED)
+			else:
+				status_queue.put(CONST.MME_PROCESS_OK)
+			mySSH.close()
+		except:
+			os.kill(os.getppid(),signal.SIGUSR1)
+
+	def CheckSPGWProcess(self, status_queue):
+		try:
+			mySSH = SSH.SSHConnection() 
+			mySSH.open(self.IPAddress, self.UserName, self.Password)
+			if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+				mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "ps aux | grep oai_spgwc"', '\$', 5)
+				result = re.search('oai_spgwc -o -c ', mySSH.getBefore())
+				if result is not None:
+					mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "ps aux | grep oai_spgwu"', '\$', 5)
+					result = re.search('oai_spgwu -o -c ', mySSH.getBefore())
+			elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+				mySSH.command('stdbuf -o0 ps -aux | grep --color=never spgw | grep -v grep', '\$', 5)
+				result = re.search('spgwu -c ', mySSH.getBefore())
+			elif re.match('OAI', self.Type, re.IGNORECASE):
+				mySSH.command('stdbuf -o0 ps -aux | grep --color=never spgw | grep -v grep', '\$', 5)
+				result = re.search('\/bin\/bash .\/run_', mySSH.getBefore())
+			elif re.match('ltebox', self.Type, re.IGNORECASE):
+				mySSH.command('stdbuf -o0 ps -aux | grep --color=never xGw | grep -v grep', '\$', 5)
+				result = re.search('xGw', mySSH.getBefore())
+			else:
+				logging.error('This should not happen!')
+			if result is None:
+				logging.debug('\u001B[1;37;41m SPGW Process Not Found! \u001B[0m')
+				status_queue.put(CONST.SPGW_PROCESS_FAILED)
+			else:
+				status_queue.put(CONST.SPGW_PROCESS_OK)
+			mySSH.close()
+		except:
+			os.kill(os.getppid(),signal.SIGUSR1)
+
+	def TerminateHSS(self):
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "killall --signal SIGINT oai_hss tshark"', '\$', 5)
+			time.sleep(2)
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "ps aux | grep oai_hss"', '\$', 5)
+			result = re.search('oai_hss -j ', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "killall --signal SIGKILL oai_hss"', '\$', 5)
+		elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT oai_hss || true', '\$', 5)
+			time.sleep(2)
+			mySSH.command('stdbuf -o0  ps -aux | grep hss | grep -v grep', '\$', 5)
+			result = re.search('oai_hss -j', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL oai_hss || true', '\$', 5)
+			mySSH.command('rm -f ' + self.SourceCodePath + '/scripts/my-hss.sh', '\$', 5)
+		elif re.match('OAI', self.Type, re.IGNORECASE):
+			mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT run_hss oai_hss || true', '\$', 5)
+			time.sleep(2)
+			mySSH.command('stdbuf -o0  ps -aux | grep hss | grep -v grep', '\$', 5)
+			result = re.search('\/bin\/bash .\/run_', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL run_hss oai_hss || true', '\$', 5)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cd ' + self.SourceCodePath, '\$', 5)
+			mySSH.command('cd scripts', '\$', 5)
+			time.sleep(1)
+			mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL hss_sim', '\$', 5)
+		else:
+			logging.error('This should not happen!')
+		mySSH.close()
+		if self.htmlObj is not None:
+			self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK)
+
+	def TerminateMME(self):
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "killall --signal SIGINT oai_mme tshark"', '\$', 5)
+			time.sleep(2)
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "ps aux | grep oai_mme"', '\$', 5)
+			result = re.search('oai_mme -c ', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "killall --signal SIGKILL oai_mme"', '\$', 5)
+		elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT run_mme mme || true', '\$', 5)
+			time.sleep(2)
+			mySSH.command('stdbuf -o0 ps -aux | grep mme | grep -v grep', '\$', 5)
+			result = re.search('mme -c', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL run_mme mme || true', '\$', 5)
+			mySSH.command('rm -f ' + self.SourceCodePath + '/scripts/my-mme.sh', '\$', 5)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cd /opt/ltebox/tools', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S ./stop_mme', '\$', 5)
+		else:
+			logging.error('This should not happen!')
+		mySSH.close()
+		if self.htmlObj is not None:
+			self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK)
+
+	def TerminateSPGW(self):
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "killall --signal SIGINT oai_spgwc tshark"', '\$', 5)
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "killall --signal SIGINT oai_spgwu tshark"', '\$', 5)
+			time.sleep(2)
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "ps aux | grep oai_spgwc"', '\$', 5)
+			result = re.search('oai_spgwc -o -c ', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "killall --signal SIGKILL oai_spgwc"', '\$', 5)
+			mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "ps aux | grep oai_spgwu"', '\$', 5)
+			result = re.search('oai_spgwu -o -c ', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "killall --signal SIGKILL oai_spgwu"', '\$', 5)
+		elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT spgwc spgwu || true', '\$', 5)
+			time.sleep(2)
+			mySSH.command('stdbuf -o0 ps -aux | grep spgw | grep -v grep', '\$', 5)
+			result = re.search('spgwc -c |spgwu -c ', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL spgwc spgwu || true', '\$', 5)
+			mySSH.command('rm -f ' + self.SourceCodePath + '/scripts/my-spgw*.sh', '\$', 5)
+			mySSH.command('stdbuf -o0 ps -aux | grep tshark | grep -v grep', '\$', 5)
+			result = re.search('-w ', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT tshark || true', '\$', 5)
+				mySSH.command('echo ' + self.Password + ' | sudo -S chmod 666 ' + self.SourceCodePath + '/scripts/*.pcap', '\$', 5)
+		elif re.match('OAI', self.Type, re.IGNORECASE):
+			mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT run_spgw spgw || true', '\$', 5)
+			time.sleep(2)
+			mySSH.command('stdbuf -o0 ps -aux | grep spgw | grep -v grep', '\$', 5)
+			result = re.search('\/bin\/bash .\/run_', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL run_spgw spgw || true', '\$', 5)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cd /opt/ltebox/tools', '\$', 5)
+			mySSH.command('echo ' + self.Password + ' | sudo -S ./stop_xGw', '\$', 5)
+		else:
+			logging.error('This should not happen!')
+		mySSH.close()
+		if self.htmlObj is not None:
+			self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK)
+
+	def LogCollectHSS(self):
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5)
+		mySSH.command('rm -f hss.log.zip', '\$', 5)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-hss:/openair-hss/hss_check_run.log .', '\$', 60)
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-hss:/tmp/hss_check_run.pcap .', '\$', 60)
+			mySSH.command('zip hss.log.zip hss_check_run.*', '\$', 60)
+		elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			mySSH.command('zip hss.log.zip hss*.log', '\$', 60)
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm hss*.log', '\$', 5)
+			if re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+				mySSH.command('zip hss.log.zip logs/hss*.* *.pcap', '\$', 60)
+				mySSH.command('echo ' + self.Password + ' | sudo -S rm -f logs/hss*.* *.pcap', '\$', 5)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cp /opt/hss_sim0609/hss.log .', '\$', 60)
+			mySSH.command('zip hss.log.zip hss.log', '\$', 60)
+		else:
+			logging.error('This option should not occur!')
+		mySSH.close()
+
+	def LogCollectMME(self):
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5)
+		mySSH.command('rm -f mme.log.zip', '\$', 5)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-mme:/openair-mme/mme_check_run.log .', '\$', 60)
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-mme:/tmp/mme_check_run.pcap .', '\$', 60)
+			mySSH.command('zip mme.log.zip mme_check_run.*', '\$', 60)
+		elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			mySSH.command('zip mme.log.zip mme*.log', '\$', 60)
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm mme*.log', '\$', 5)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cp /opt/ltebox/var/log/*Log.0 .', '\$', 5)
+			mySSH.command('zip mme.log.zip mmeLog.0 s1apcLog.0 s1apsLog.0 s11cLog.0 libLog.0 s1apCodecLog.0', '\$', 60)
+		else:
+			logging.error('This option should not occur!')
+		mySSH.close()
+
+	def LogCollectSPGW(self):
+		mySSH = SSH.SSHConnection() 
+		mySSH.open(self.IPAddress, self.UserName, self.Password)
+		mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5)
+		mySSH.command('rm -f spgw.log.zip', '\$', 5)
+		if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE):
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwc:/openair-spgwc/spgwc_check_run.log .', '\$', 60)
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwu-tiny:/openair-spgwu-tiny/spgwu_check_run.log .', '\$', 60)
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwc:/tmp/spgwc_check_run.pcap .', '\$', 60)
+			mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwu-tiny:/tmp/spgwu_check_run.pcap .', '\$', 60)
+			mySSH.command('zip spgw.log.zip spgw*_check_run.*', '\$', 60)
+		elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE):
+			mySSH.command('zip spgw.log.zip spgw*.log', '\$', 60)
+			mySSH.command('echo ' + self.Password + ' | sudo -S rm spgw*.log', '\$', 5)
+		elif re.match('ltebox', self.Type, re.IGNORECASE):
+			mySSH.command('cp /opt/ltebox/var/log/xGwLog.0 .', '\$', 5)
+			mySSH.command('zip spgw.log.zip xGwLog.0', '\$', 60)
+		else:
+			logging.error('This option should not occur!')
+		mySSH.close()
+
diff --git a/ci-scripts/helpreadme.py b/ci-scripts/helpreadme.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b58c77510d72ed314e3e90c876d11abb55e49aa
--- /dev/null
+++ b/ci-scripts/helpreadme.py
@@ -0,0 +1,81 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+#---------------------------------------------------------------------
+# Python for CI of OAI-eNB + COTS-UE
+#
+#   Required Python Version
+#     Python 3.x
+#
+#   Required Python Package
+#     pexpect
+#---------------------------------------------------------------------
+
+#-----------------------------------------------------------
+# Functions Declaration
+#-----------------------------------------------------------
+
+def GenericHelp(vers):
+	print('----------------------------------------------------------------------------------------------------------------------')
+	print('main.py Ver: ' + vers)
+	print('----------------------------------------------------------------------------------------------------------------------')
+	print('python main.py [options]')
+	print('  --help  Show this help.')
+	print('  --mode=[Mode]')
+	print('      TesteNB')
+	print('      InitiateHtml, FinalizeHtml')
+	print('      TerminateeNB, TerminateUE, TerminateHSS, TerminateMME, TerminateSPGW')
+	print('      LogCollectBuild, LogCollecteNB, LogCollectHSS, LogCollectMME, LogCollectSPGW, LogCollectPing, LogCollectIperf')
+
+def GitSrvHelp(repository,branch,commit,mergeallow,targetbranch):
+	print('  --ranRepository=[OAI RAN Repository URL]                                      -- ' + repository)
+	print('  --ranBranch=[OAI RAN Repository Branch]                                       -- ' + branch)
+	print('  --ranCommitID=[OAI RAN Repository Commit SHA-1]                               -- ' + commit)
+	print('  --ranAllowMerge=[Allow Merge Request (with target branch) (true or false)]    -- ' + mergeallow)
+	print('  --ranTargetBranch=[Target Branch in case of a Merge Request]                  -- ' + targetbranch)
+
+def eNBSrvHelp(ipaddr, username, password, sourcepath):
+	print('  --eNBIPAddress=[eNB\'s IP Address]                       -- ' + ipaddr)
+	print('  --eNBUserName=[eNB\'s Login User Name]                   -- ' + username)
+	print('  --eNBPassword=[eNB\'s Login Password]                    -- ' + password)
+	print('  --eNBSourceCodePath=[eNB\'s Source Code Path]            -- ' + sourcepath)
+
+def OAIUESrvHelp(ipaddr, username, password, sourcepath):
+	print('  --UEIPAddress=[UE\'s IP Address]                         -- ' + ipaddr)
+	print('  --UEUserName=[UE\'s Login User Name]                     -- ' + username)
+	print('  --UEPassword=[UE\'s Login Password]                      -- ' + password)
+	print('  --UESourceCodePath=[UE\'s Source Code Path]              -- ' + sourcepath)
+		
+def EPCSrvHelp(ipaddr, username, password, sourcepath, epctype):
+	print('  --EPCIPAddress=[EPC\'s IP Address]                       -- ' + ipaddr)
+	print('  --EPCUserName=[EPC\'s Login User Name]                   -- ' + username)
+	print('  --EPCPassword=[EPC\'s Login Password]                    -- ' + password)
+	print('  --EPCSourceCodePath=[EPC\'s Source Code Path]            -- ' + sourcepath)
+	print('  --EPCType=[EPC\'s Type: OAI or ltebox or OAI-Rel14-CUPS] -- ' + epctype)
+
+def ADBSrvHelp(ipaddr, username, password):
+	print('  --ADBIPAddress=[ADB\'s IP Address]                       -- ' + ipaddr)
+	print('  --ADBUserName=[ADB\'s Login User Name]                   -- ' + username)
+	print('  --ADBPassword=[ADB\'s Login Password]                    -- ' + password)
+
+def XmlHelp(filename):
+	print('  --XMLTestFile=[XML Test File to be run]                  -- ' + filename)
+	print('	Note: multiple xml files can be specified (--XMLFile=File1 ... --XMLTestFile=FileN) when HTML headers are created ("InitiateHtml" mode)')
+
diff --git a/ci-scripts/html.py b/ci-scripts/html.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a6e3f8fdb45b9c5aacb5e9a98ec361a32c4ef47
--- /dev/null
+++ b/ci-scripts/html.py
@@ -0,0 +1,479 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+#---------------------------------------------------------------------
+# Python for CI of OAI-eNB + COTS-UE
+#
+#   Required Python Version
+#     Python 3.x
+#
+#   Required Python Package
+#     pexpect
+#---------------------------------------------------------------------
+
+#-----------------------------------------------------------
+# Import
+#-----------------------------------------------------------
+import sys              # arg
+import re               # reg
+import logging
+import os
+import time
+import subprocess
+from multiprocessing import Process, Lock, SimpleQueue
+
+import constants as CONST
+
+#-----------------------------------------------------------
+# Class Declaration
+#-----------------------------------------------------------
+class HTMLManagement():
+
+	def __init__(self):
+	
+		self.htmlFile = ''
+		self.htmlHeaderCreated = False
+		self.htmlFooterCreated = False
+
+		self.ranRepository = ''
+		self.ranBranch = ''
+		self.ranCommitID = ''
+		self.ranAllowMerge = False
+		self.ranTargetBranch = ''
+
+		self.nbTestXMLfiles = 0
+		self.htmlTabRefs = []
+		self.htmlTabNames = []
+		self.htmlTabIcons = []
+		self.testXMLfiles = []
+
+		self.htmleNBFailureMsg = ''
+		self.htmlUEFailureMsg = ''
+
+		self.startTime = int(round(time.time() * 1000))
+		self.testCase_id = ''
+		self.desc = ''
+
+		self.OsVersion = ['', '']
+		self.KernelVersion = ['', '']
+		self.UhdVersion = ['', '']
+		self.UsrpBoard = ['', '']
+		self.CpuNb = ['', '']
+		self.CpuModel = ['', '']
+		self.CpuMHz = ['', '']
+
+#-----------------------------------------------------------
+# Setters and Getters
+#-----------------------------------------------------------
+	def SethtmlUEFailureMsg(self,huefa):
+		self.htmlUEFailureMsg = huefa
+	def GethtmlUEFailureMsg(self):
+		return self.htmlUEFailureMsg
+	def SetHmleNBFailureMsg(self, msg):
+		self.htmleNBFailureMsg = msg
+
+	def Setdesc(self, dsc):
+		self.desc = dsc
+
+	def SetstartTime(self, sttime):
+		self.startTime = sttime
+
+	def SettestCase_id(self, tcid):
+		self.testCase_id = tcid
+	def GettestCase_id(self):
+		return self.testCase_id
+
+	def SetranRepository(self, repository):
+		self.ranRepository = repository
+	def SetranAllowMerge(self, merge):
+		self.ranAllowMerge = merge
+	def SetranBranch(self, branch):
+		self.ranBranch = branch
+	def SetranCommitID(self, commitid):
+		self.ranCommitID = commitid
+	def SetranTargetBranch(self, tbranch):
+		self.ranTargetBranch = tbranch
+
+	def SethtmlUEConnected(self, nbUEs):
+		if nbUEs > 0:
+			self.htmlUEConnected = nbUEs
+		else:
+			self.htmlUEConnected = 1
+	def SethtmlNb_Smartphones(self, nbUEs):
+		self.htmlNb_Smartphones = nbUEs
+	def SethtmlNb_CATM_Modules(self, nbUEs):
+		self.htmlNb_CATM_Modules = nbUEs
+
+	def SetnbTestXMLfiles(self, nb):
+		self.nbTestXMLfiles = nb
+	def GetnbTestXMLfiles(self):
+		return self.nbTestXMLfiles
+
+	def SettestXMLfiles(self, xmlFile):
+		self.testXMLfiles.append(xmlFile)
+	def SethtmlTabRefs(self, tabRef):
+		self.htmlTabRefs.append(tabRef)
+	def SethtmlTabNames(self, tabName):
+		self.htmlTabNames.append(tabName)
+	def SethtmlTabIcons(self, tabIcon):
+		self.htmlTabIcons.append(tabIcon)
+
+	def SetOsVersion(self, version, idx):
+		self.OsVersion[idx] = version
+	def SetKernelVersion(self, version, idx):
+		self.KernelVersion[idx] = version
+	def SetUhdVersion(self, version, idx):
+		self.UhdVersion[idx] = version
+	def SetUsrpBoard(self, version, idx):
+		self.UsrpBoard[idx] = version
+	def SetCpuNb(self, nb, idx):
+		self.CpuNb[idx] = nb
+	def SetCpuModel(self, model, idx):
+		self.CpuModel[idx] = model
+	def SetCpuMHz(self, freq, idx):
+		self.CpuMHz[idx] = freq
+
+#-----------------------------------------------------------
+# HTML structure creation functions
+#-----------------------------------------------------------
+
+
+	def CreateHtmlHeader(self, ADBIPAddress):
+		if (not self.htmlHeaderCreated):
+			logging.debug('\u001B[1m----------------------------------------\u001B[0m')
+			logging.debug('\u001B[1m  Creating HTML header \u001B[0m')
+			logging.debug('\u001B[1m----------------------------------------\u001B[0m')
+			self.htmlFile = open('test_results.html', 'w')
+			self.htmlFile.write('<!DOCTYPE html>\n')
+			self.htmlFile.write('<html class="no-js" lang="en-US">\n')
+			self.htmlFile.write('<head>\n')
+			self.htmlFile.write('  <meta name="viewport" content="width=device-width, initial-scale=1">\n')
+			self.htmlFile.write('  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">\n')
+			self.htmlFile.write('  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>\n')
+			self.htmlFile.write('  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script>\n')
+			self.htmlFile.write('  <title>Test Results for TEMPLATE_JOB_NAME job build #TEMPLATE_BUILD_ID</title>\n')
+			self.htmlFile.write('</head>\n')
+			self.htmlFile.write('<body><div class="container">\n')
+			self.htmlFile.write('  <br>\n')
+			self.htmlFile.write('  <table style="border-collapse: collapse; border: none;">\n')
+			self.htmlFile.write('    <tr style="border-collapse: collapse; border: none;">\n')
+			self.htmlFile.write('      <td style="border-collapse: collapse; border: none;">\n')
+			self.htmlFile.write('        <a href="http://www.openairinterface.org/">\n')
+			self.htmlFile.write('           <img src="http://www.openairinterface.org/wp-content/uploads/2016/03/cropped-oai_final_logo2.png" alt="" border="none" height=50 width=150>\n')
+			self.htmlFile.write('           </img>\n')
+			self.htmlFile.write('        </a>\n')
+			self.htmlFile.write('      </td>\n')
+			self.htmlFile.write('      <td style="border-collapse: collapse; border: none; vertical-align: center;">\n')
+			self.htmlFile.write('        <b><font size = "6">Job Summary -- Job: TEMPLATE_JOB_NAME -- Build-ID: TEMPLATE_BUILD_ID</font></b>\n')
+			self.htmlFile.write('      </td>\n')
+			self.htmlFile.write('    </tr>\n')
+			self.htmlFile.write('  </table>\n')
+			self.htmlFile.write('  <br>\n')
+			self.htmlFile.write('  <div class="alert alert-info"><strong> <span class="glyphicon glyphicon-dashboard"></span> TEMPLATE_STAGE_NAME</strong></div>\n')
+			self.htmlFile.write('  <table border = "1">\n')
+			self.htmlFile.write('     <tr>\n')
+			self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-time"></span> Build Start Time (UTC) </td>\n')
+			self.htmlFile.write('       <td>TEMPLATE_BUILD_TIME</td>\n')
+			self.htmlFile.write('     </tr>\n')
+			self.htmlFile.write('     <tr>\n')
+			self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-cloud-upload"></span> GIT Repository </td>\n')
+			self.htmlFile.write('       <td><a href="' + self.ranRepository + '">' + self.ranRepository + '</a></td>\n')
+			self.htmlFile.write('     </tr>\n')
+			self.htmlFile.write('     <tr>\n')
+			self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-wrench"></span> Job Trigger </td>\n')
+			if (self.ranAllowMerge):
+				self.htmlFile.write('       <td>Merge-Request</td>\n')
+			else:
+				self.htmlFile.write('       <td>Push to Branch</td>\n')
+			self.htmlFile.write('     </tr>\n')
+			self.htmlFile.write('     <tr>\n')
+			if (self.ranAllowMerge):
+				self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-log-out"></span> Source Branch </td>\n')
+			else:
+				self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-tree-deciduous"></span> Branch</td>\n')
+			self.htmlFile.write('       <td>' + self.ranBranch + '</td>\n')
+			self.htmlFile.write('     </tr>\n')
+			self.htmlFile.write('     <tr>\n')
+			if (self.ranAllowMerge):
+				self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-tag"></span> Source Commit ID </td>\n')
+			else:
+				self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-tag"></span> Commit ID </td>\n')
+			self.htmlFile.write('       <td>' + self.ranCommitID + '</td>\n')
+			self.htmlFile.write('     </tr>\n')
+			if self.ranAllowMerge != '':
+				commit_message = subprocess.check_output("git log -n1 --pretty=format:\"%s\" " + self.ranCommitID, shell=True, universal_newlines=True)
+				commit_message = commit_message.strip()
+				self.htmlFile.write('     <tr>\n')
+				if (self.ranAllowMerge):
+					self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-comment"></span> Source Commit Message </td>\n')
+				else:
+					self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-comment"></span> Commit Message </td>\n')
+				self.htmlFile.write('       <td>' + commit_message + '</td>\n')
+				self.htmlFile.write('     </tr>\n')
+			if (self.ranAllowMerge):
+				self.htmlFile.write('     <tr>\n')
+				self.htmlFile.write('       <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-log-in"></span> Target Branch </td>\n')
+				if (self.ranTargetBranch == ''):
+					self.htmlFile.write('       <td>develop</td>\n')
+				else:
+					self.htmlFile.write('       <td>' + self.ranTargetBranch + '</td>\n')
+				self.htmlFile.write('     </tr>\n')
+			self.htmlFile.write('  </table>\n')
+
+			if (ADBIPAddress != 'none'):
+				self.htmlFile.write('  <h2><span class="glyphicon glyphicon-phone"></span> <span class="glyphicon glyphicon-menu-right"></span> ' + str(self.htmlNb_Smartphones) + ' UE(s) is(are) connected to ADB bench server</h2>\n')
+				self.htmlFile.write('  <h2><span class="glyphicon glyphicon-phone"></span> <span class="glyphicon glyphicon-menu-right"></span> ' + str(self.htmlNb_CATM_Modules) + ' CAT-M UE(s) is(are) connected to bench server</h2>\n')
+			else:
+				self.htmlUEConnected = 1
+				self.htmlFile.write('  <h2><span class="glyphicon glyphicon-phone"></span> <span class="glyphicon glyphicon-menu-right"></span> 1 OAI UE(s) is(are) connected to CI bench</h2>\n')
+			self.htmlFile.write('  <br>\n')
+			self.htmlFile.write('  <ul class="nav nav-pills">\n')
+			count = 0
+			while (count < self.nbTestXMLfiles):
+				pillMsg = '    <li><a data-toggle="pill" href="#'
+				pillMsg += self.htmlTabRefs[count]
+				pillMsg += '">'
+				pillMsg += '__STATE_' + self.htmlTabNames[count] + '__'
+				pillMsg += self.htmlTabNames[count]
+				pillMsg += ' <span class="glyphicon glyphicon-'
+				pillMsg += self.htmlTabIcons[count]
+				pillMsg += '"></span></a></li>\n'
+				self.htmlFile.write(pillMsg)
+				count += 1
+			self.htmlFile.write('  </ul>\n')
+			self.htmlFile.write('  <div class="tab-content">\n')
+			self.htmlFile.close()
+
+	def CreateHtmlTabHeader(self):
+		if (not self.htmlHeaderCreated):
+			if (not os.path.isfile('test_results.html')):
+				self.CreateHtmlHeader('none')
+			self.htmlFile = open('test_results.html', 'a')
+			if (self.nbTestXMLfiles == 1):
+				self.htmlFile.write('  <div id="' + self.htmlTabRefs[0] + '" class="tab-pane fade">\n')
+				self.htmlFile.write('  <h3>Test Summary for <span class="glyphicon glyphicon-file"></span> ' + self.testXMLfiles[0] + '</h3>\n')
+			else:
+				self.htmlFile.write('  <div id="build-tab" class="tab-pane fade">\n')
+			self.htmlFile.write('  <table class="table" border = "1">\n')
+			self.htmlFile.write('      <tr bgcolor = "#33CCFF" >\n')
+			self.htmlFile.write('        <th>Relative Time (ms)</th>\n')
+			self.htmlFile.write('        <th>Test Id</th>\n')
+			self.htmlFile.write('        <th>Test Desc</th>\n')
+			self.htmlFile.write('        <th>Test Options</th>\n')
+			self.htmlFile.write('        <th>Test Status</th>\n')
+
+			i = 0
+			while (i < self.htmlUEConnected):
+				self.htmlFile.write('        <th>UE' + str(i) + ' Status</th>\n')
+				i += 1
+			self.htmlFile.write('      </tr>\n')
+			self.htmlFile.close()
+		self.htmlHeaderCreated = True
+
+	def CreateHtmlTabFooter(self, passStatus):
+		if ((not self.htmlFooterCreated) and (self.htmlHeaderCreated)):
+			self.htmlFile = open('test_results.html', 'a')
+			self.htmlFile.write('      <tr>\n')
+			self.htmlFile.write('        <th bgcolor = "#33CCFF" colspan=3>Final Tab Status</th>\n')
+			if passStatus:
+				self.htmlFile.write('        <th bgcolor = "green" colspan=' + str(2 + self.htmlUEConnected) + '><font color="white">PASS <span class="glyphicon glyphicon-ok"></span> </font></th>\n')
+			else:
+				self.htmlFile.write('        <th bgcolor = "red" colspan=' + str(2 + self.htmlUEConnected) + '><font color="white">FAIL <span class="glyphicon glyphicon-remove"></span> </font></th>\n')
+			self.htmlFile.write('      </tr>\n')
+			self.htmlFile.write('  </table>\n')
+			self.htmlFile.write('  </div>\n')
+			self.htmlFile.close()
+			time.sleep(1)
+			if passStatus:
+				cmd = "sed -i -e 's/__STATE_" + self.htmlTabNames[0] + "__//' test_results.html"
+				subprocess.run(cmd, shell=True)
+			else:
+				cmd = "sed -i -e 's/__STATE_" + self.htmlTabNames[0] + "__/<span class=\"glyphicon glyphicon-remove\"><\/span>/' test_results.html"
+				subprocess.run(cmd, shell=True)
+		self.htmlFooterCreated = False
+
+	def CreateHtmlFooter(self, passStatus):
+		if (os.path.isfile('test_results.html')):
+			self.htmlFile = open('test_results.html', 'a')
+			self.htmlFile.write('</div>\n')
+			self.htmlFile.write('  <p></p>\n')
+			self.htmlFile.write('  <table class="table table-condensed">\n')
+
+			machines = [ 'eNB', 'UE' ]
+			for machine in machines:
+				if machine == 'eNB':
+					idx = 0
+				else:
+					idx = 1
+				if self.OsVersion[idx] == '':
+					continue
+
+				self.htmlFile.write('      <tr>\n')
+				self.htmlFile.write('        <th colspan=8>' + str('eNB') + ' Server Characteristics</th>\n')
+				self.htmlFile.write('      </tr>\n')
+				self.htmlFile.write('      <tr>\n')
+				self.htmlFile.write('        <td>OS Version</td>\n')
+				self.htmlFile.write('        <td><span class="label label-default">' + self.OsVersion[idx] + '</span></td>\n')
+				self.htmlFile.write('        <td>Kernel Version</td>\n')
+				self.htmlFile.write('        <td><span class="label label-default">' + self.KernelVersion[idx] + '</span></td>\n')
+				self.htmlFile.write('        <td>UHD Version</td>\n')
+				self.htmlFile.write('        <td><span class="label label-default">' + self.UhdVersion[idx] + '</span></td>\n')
+				self.htmlFile.write('        <td>USRP Board</td>\n')
+				self.htmlFile.write('        <td><span class="label label-default">' + self.UsrpBoard[idx] + '</span></td>\n')
+				self.htmlFile.write('      </tr>\n')
+				self.htmlFile.write('      <tr>\n')
+				self.htmlFile.write('        <td>Nb CPUs</td>\n')
+				self.htmlFile.write('        <td><span class="label label-default">' + self.CpuNb[idx] + '</span></td>\n')
+				self.htmlFile.write('        <td>CPU Model Name</td>\n')
+				self.htmlFile.write('        <td><span class="label label-default">' + self.CpuModel[idx] + '</span></td>\n')
+				self.htmlFile.write('        <td>CPU Frequency</td>\n')
+				self.htmlFile.write('        <td><span class="label label-default">' + self.CpuMHz[idx] + '</span></td>\n')
+				self.htmlFile.write('        <td></td>\n')
+				self.htmlFile.write('        <td></td>\n')
+				self.htmlFile.write('      </tr>\n')
+
+			self.htmlFile.write('      <tr>\n')
+			self.htmlFile.write('        <th colspan=5 bgcolor = "#33CCFF">Final Status</th>\n')
+			if passStatus:
+				self.htmlFile.write('        <th colspan=3 bgcolor="green"><font color="white">PASS <span class="glyphicon glyphicon-ok"></span></font></th>\n')
+			else:
+				self.htmlFile.write('        <th colspan=3 bgcolor="red"><font color="white">FAIL <span class="glyphicon glyphicon-remove"></span> </font></th>\n')
+			self.htmlFile.write('      </tr>\n')
+			self.htmlFile.write('  </table>\n')
+			self.htmlFile.write('  <p></p>\n')
+			self.htmlFile.write('  <div class="well well-lg">End of Test Report -- Copyright <span class="glyphicon glyphicon-copyright-mark"></span> 2018 <a href="http://www.openairinterface.org/">OpenAirInterface</a>. All Rights Reserved.</div>\n')
+			self.htmlFile.write('</div></body>\n')
+			self.htmlFile.write('</html>\n')
+			self.htmlFile.close()
+
+	def CreateHtmlRetrySeparator(self, cntnumfails):
+		if ((not self.htmlFooterCreated) and (self.htmlHeaderCreated)):
+			self.htmlFile = open('test_results.html', 'a')
+			self.htmlFile.write('      <tr bgcolor = "#F0F0F0" >\n')
+			self.htmlFile.write('        <td colspan=' + str(5+self.htmlUEConnected) + '><b> ---- Try Run #' + str(cntnumfails) + ' ---- </b></td>\n')
+			self.htmlFile.write('      </tr>\n')
+			self.htmlFile.close()
+
+	def CreateHtmlTestRow(self, options, status, processesStatus, machine='eNB'):
+		if (self.htmlFooterCreated or (not self.htmlHeaderCreated)):
+			return
+		self.htmlFile = open('test_results.html', 'a')
+		currentTime = int(round(time.time() * 1000)) - self.startTime
+		self.htmlFile.write('      <tr>\n')
+		self.htmlFile.write('        <td bgcolor = "lightcyan" >' + format(currentTime / 1000, '.1f') + '</td>\n')
+		self.htmlFile.write('        <td bgcolor = "lightcyan" >' + self.testCase_id  + '</td>\n')
+		self.htmlFile.write('        <td>' + self.desc  + '</td>\n')
+		self.htmlFile.write('        <td>' + str(options)  + '</td>\n')
+		if (str(status) == 'OK'):
+			self.htmlFile.write('        <td bgcolor = "lightgreen" >' + str(status)  + '</td>\n')
+		elif (str(status) == 'KO'):
+			if (processesStatus == 0):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >' + str(status)  + '</td>\n')
+			elif (processesStatus == CONST.ENB_PROCESS_FAILED):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - eNB process not found</td>\n')
+			elif (processesStatus == CONST.OAI_UE_PROCESS_FAILED):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - OAI UE process not found</td>\n')
+			elif (processesStatus == CONST.ENB_PROCESS_SEG_FAULT) or (processesStatus == CONST.OAI_UE_PROCESS_SEG_FAULT):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - ' + machine + ' process ended in Segmentation Fault</td>\n')
+			elif (processesStatus == CONST.ENB_PROCESS_ASSERTION) or (processesStatus == CONST.OAI_UE_PROCESS_ASSERTION):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - ' + machine + ' process ended in Assertion</td>\n')
+			elif (processesStatus == CONST.ENB_PROCESS_REALTIME_ISSUE):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - ' + machine + ' process faced Real Time issue(s)</td>\n')
+			elif (processesStatus == CONST.ENB_PROCESS_NOLOGFILE_TO_ANALYZE) or (processesStatus == CONST.OAI_UE_PROCESS_NOLOGFILE_TO_ANALYZE):
+				self.htmlFile.write('        <td bgcolor = "orange" >OK?</td>\n')
+			elif (processesStatus == CONST.ENB_PROCESS_SLAVE_RRU_NOT_SYNCED):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - ' + machine + ' Slave RRU could not synch</td>\n')
+			elif (processesStatus == CONST.OAI_UE_PROCESS_COULD_NOT_SYNC):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - UE could not sync</td>\n')
+			elif (processesStatus == CONST.HSS_PROCESS_FAILED):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - HSS process not found</td>\n')
+			elif (processesStatus == CONST.MME_PROCESS_FAILED):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - MME process not found</td>\n')
+			elif (processesStatus == CONST.SPGW_PROCESS_FAILED):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - SPGW process not found</td>\n')
+			elif (processesStatus == CONST.UE_IP_ADDRESS_ISSUE):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >KO - Could not retrieve UE IP address</td>\n')
+			else:
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >' + str(status)  + '</td>\n')
+		else:
+			self.htmlFile.write('        <td bgcolor = "orange" >' + str(status)  + '</td>\n')
+		if (len(str(self.htmleNBFailureMsg)) > 2):
+			cellBgColor = 'white'
+			result = re.search('ended with|faced real time issues', self.htmleNBFailureMsg)
+			if result is not None:
+				cellBgColor = 'red'
+			else:
+				result = re.search('showed|Reestablishment|Could not copy eNB logfile', self.htmleNBFailureMsg)
+				if result is not None:
+					cellBgColor = 'orange'
+			self.htmlFile.write('        <td bgcolor = "' + cellBgColor + '" colspan=' + str(self.htmlUEConnected) + '><pre style="background-color:' + cellBgColor + '">' + self.htmleNBFailureMsg + '</pre></td>\n')
+			self.htmleNBFailureMsg = ''
+		elif (len(str(self.htmlUEFailureMsg)) > 2):
+			cellBgColor = 'white'
+			result = re.search('ended with|faced real time issues', self.htmlUEFailureMsg)
+			if result is not None:
+				cellBgColor = 'red'
+			else:
+				result = re.search('showed|Could not copy UE logfile|oaitun_ue1 interface is either NOT mounted or NOT configured', self.htmlUEFailureMsg)
+				if result is not None:
+					cellBgColor = 'orange'
+			self.htmlFile.write('        <td bgcolor = "' + cellBgColor + '" colspan=' + str(self.htmlUEConnected) + '><pre style="background-color:' + cellBgColor + '">' + self.htmlUEFailureMsg + '</pre></td>\n')
+			self.htmlUEFailureMsg = ''
+		else:
+			i = 0
+			while (i < self.htmlUEConnected):
+				self.htmlFile.write('        <td>-</td>\n')
+				i += 1
+		self.htmlFile.write('      </tr>\n')
+		self.htmlFile.close()
+
+	def CreateHtmlTestRowQueue(self, options, status, ue_status, ue_queue):
+		if ((not self.htmlFooterCreated) and (self.htmlHeaderCreated)):
+			self.htmlFile = open('test_results.html', 'a')
+			currentTime = int(round(time.time() * 1000)) - self.startTime
+			addOrangeBK = False
+			self.htmlFile.write('      <tr>\n')
+			self.htmlFile.write('        <td bgcolor = "lightcyan" >' + format(currentTime / 1000, '.1f') + '</td>\n')
+			self.htmlFile.write('        <td bgcolor = "lightcyan" >' + self.testCase_id  + '</td>\n')
+			self.htmlFile.write('        <td>' + self.desc  + '</td>\n')
+			self.htmlFile.write('        <td>' + str(options)  + '</td>\n')
+			if (str(status) == 'OK'):
+				self.htmlFile.write('        <td bgcolor = "lightgreen" >' + str(status)  + '</td>\n')
+			elif (str(status) == 'KO'):
+				self.htmlFile.write('        <td bgcolor = "lightcoral" >' + str(status)  + '</td>\n')
+			else:
+				addOrangeBK = True
+				self.htmlFile.write('        <td bgcolor = "orange" >' + str(status)  + '</td>\n')
+			i = 0
+			while (i < self.htmlUEConnected):
+				if (i < ue_status):
+					if (not ue_queue.empty()):
+						if (addOrangeBK):
+							self.htmlFile.write('        <td bgcolor = "orange" >' + str(ue_queue.get()).replace('white', 'orange') + '</td>\n')
+						else:
+							self.htmlFile.write('        <td>' + str(ue_queue.get()) + '</td>\n')
+					else:
+						self.htmlFile.write('        <td>-</td>\n')
+				else:
+					self.htmlFile.write('        <td>-</td>\n')
+				i += 1
+			self.htmlFile.write('      </tr>\n')
+			self.htmlFile.close()
+
diff --git a/ci-scripts/ran.py b/ci-scripts/ran.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b5add0adb5b968f8197e8bc91a283d7c32f9a7a
--- /dev/null
+++ b/ci-scripts/ran.py
@@ -0,0 +1,1059 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+#---------------------------------------------------------------------
+# Python for CI of OAI-eNB + COTS-UE
+#
+#   Required Python Version
+#     Python 3.x
+#
+#   Required Python Package
+#     pexpect
+#---------------------------------------------------------------------
+
+#-----------------------------------------------------------
+# Import
+#-----------------------------------------------------------
+import sys              # arg
+import re               # reg
+import logging
+import os
+import time
+from multiprocessing import Process, Lock, SimpleQueue
+
+#-----------------------------------------------------------
+# OAI Testing modules
+#-----------------------------------------------------------
+import sshconnection as SSH
+import epc 
+import helpreadme as HELP
+import constants as CONST
+import html
+
+#-----------------------------------------------------------
+# Class Declaration
+#-----------------------------------------------------------
+class RANManagement():
+
+	def __init__(self):
+		
+		self.prematureExit = False
+		self.ranRepository = ''
+		self.ranBranch = ''
+		self.ranAllowMerge = False
+		self.ranCommitID = ''
+		self.ranTargetBranch = ''
+		self.eNBIPAddress = ''
+		self.eNBUserName = ''
+		self.eNBPassword = ''
+		self.eNBSourceCodePath = ''
+		self.eNB1IPAddress = ''
+		self.eNB1UserName = ''
+		self.eNB1Password = ''
+		self.eNB1SourceCodePath = ''
+		self.eNB2IPAddress = ''
+		self.eNB2UserName = ''
+		self.eNB2Password = ''
+		self.eNB2SourceCodePath = ''
+		self.Build_eNB_args = ''
+		self.backgroundBuild = False
+		self.backgroundBuildTestId = ['', '', '']
+		self.Build_eNB_forced_workspace_cleanup = False
+		self.Initialize_eNB_args = ''
+		self.air_interface = 'lte'
+		self.eNB_instance = ''
+		self.eNB_serverId = ''
+		self.eNBLogFiles = ['', '', '']
+		self.eNBOptions = ['', '', '']
+		self.eNBmbmsEnables = [False, False, False]
+		self.eNBstatuses = [-1, -1, -1]
+		self.flexranCtrlInstalled = False
+		self.flexranCtrlStarted = False
+		self.testCase_id = ''
+		self.epcPcapFile = ''
+		self.htmlObj = None
+		self.epcObj = None
+
+#-----------------------------------------------------------
+# Setters and Getters on Public members
+#-----------------------------------------------------------
+
+	def SetHtmlObj(self, obj):
+		self.htmlObj = obj
+	def SetEpcObj(self, obj):
+		self.epcObj = obj
+
+	def SetflexranCtrlInstalled(self,fxrctin):
+		self.flexranCtrlInstalled = fxrctin
+	def GetflexranCtrlInstalled(self):
+		return self.flexranCtrlInstalled
+	def SetflexranCtrlStarted(self,fxrctst):
+		self.flexranCtrlStarted = fxrctst
+	def GetflexranCtrlStarted(self):
+		return self.flexranCtrlStarted
+	def SetpStatus(self, pSt):
+		self.pStatus = pSt
+	def SetranRepository(self, repository):
+		self.ranRepository = repository
+	def GetranRepository(self):
+		return self.ranRepository
+	def SetranBranch(self, branch):
+		self.ranBranch = branch
+	def GetranBranch(self):
+		return self.ranBranch
+	def SetranCommitID(self, commitid):
+		self.ranCommitID = commitid
+	def GetranCommitID(self):
+		return self.ranCommitID
+	def SeteNB_serverId(self, enbsrvid):
+		self.eNB_serverId = enbsrvid
+	def GeteNB_serverId(self):
+		return self.eNB_serverId
+	def SeteNBIPAddress(self, enbip):
+		self.eNBIPAddress = enbip
+	def GeteNBIPAddress(self):
+		return self.eNBIPAddress
+	def SeteNBUserName(self, enbusr):
+		self.eNBUserName = enbusr
+	def GeteNBUserName(self):
+		return self.eNBUserName
+	def SeteNBPassword(self, enbpw):
+		self.eNBPassword = enbpw
+	def GeteNBPassword(self):
+		return self.eNBPassword
+	def SeteNBSourceCodePath(self, enbcodepath):
+		self.eNBSourceCodePath = enbcodepath
+	def GeteNBSourceCodePath(self):
+		return self.eNBSourceCodePath
+	def SetranAllowMerge(self, merge):
+		self.ranAllowMerge = merge
+	def GetranAllowMerge(self):
+		return self.ranAllowMerge
+	def SetranTargetBranch(self, tbranch):
+		self.ranTargetBranch = tbranch
+	def GetranTargetBranch(self):
+		return self.ranTargetBranch
+	def SetBuild_eNB_args(self, enbbuildarg):
+		self.Build_eNB_args = enbbuildarg
+	def GetBuild_eNB_args(self):
+		return self.Build_eNB_args
+	def SetInitialize_eNB_args(self, initenbarg):
+		self.Initialize_eNB_args = initenbarg
+	def GetInitialize_eNB_args(self):
+		return self.Initialize_eNB_args
+	def SetbackgroundBuild(self, bkbuild):
+		self.backgroundBuild = bkbuild
+	def GetbackgroundBuild(self):
+		return self.backgroundBuild
+	def SetbackgroundBuildTestId(self, bkbuildid):
+		self.backgroundBuildTestId = bkbuildid
+	def GetbackgroundBuildTestId(self):
+		return self.backgroundBuildTestId
+	def SetBuild_eNB_forced_workspace_cleanup(self, fcdwspclean):
+		self.Build_eNB_forced_workspace_cleanup = fcdwspclean
+	def GetBuild_eNB_forced_workspace_cleanup(self):
+		return self.Build_eNB_forced_workspace_cleanup
+	def Setair_interface(self, airif):
+		self.air_interface = airif
+	def Getair_interface(self):
+		return self.air_interface
+	def SeteNB_instance(self, enbinst):
+		self.eNB_instance = enbinst
+	def GeteNB_instance(self):
+		return self.eNB_instance
+
+	def SeteNBLogFile(self, enblog, idx):
+		self.eNBLogFiles[idx] = enblog
+	def GeteNBLogFile(self, idx):
+		return self.eNBLogFiles[idx]
+
+	def GeteNBmbmsEnable(self, idx):
+		return self.eNBmbmsEnables[idx]
+
+	def SeteNB1IPAddress(self,enb1ip):
+		self.eNB1IPAddress = enb1ip
+	def GeteNB1IPAddress(self):
+		return self.eNB1IPAddress
+	def SeteNB1UserName(self, enb1usr):
+		self.eNB1UserName = enb1usr
+	def GeteNB1UserName(self):
+		return self.eNB1UserName
+	def SeteNB1Password(self, enb1pw):
+		self.eNB1Password = enb1pw
+	def GeteNB1Password(self):
+		return self.eNB1Password
+	def SeteNB1SourceCodePath(self, enb1codepath):
+		self.eNB1SourceCodePath = enb1codepath
+	def GeteNB1SourceCodePath(self):
+		return self.eNB1SourceCodePath
+
+	def SeteNB2IPAddress(self, enb2ip):
+		self.eNB2IPAddress = enb2ip
+	def GeteNB2IPAddress(self):
+		return self.eNB2IPAddress
+	def SeteNB2UserName(self, enb2usr):
+		self.eNB2UserName = enb2usr
+	def GeteNB2UserName(self):
+		return self.eNB2UserName
+	def SeteNB2Password(self, enb2pw):
+		self.eNB2Password = enb2pw
+	def GeteNB2Password(self):
+		return self.eNB2Password
+	def SeteNB2SourceCodePath(self, enb2codepath):
+		self.eNB2SourceCodePath = enb2codepath
+	def GeteNB2SourceCodePath(self):
+		return self.eNB2SourceCodePath
+
+	def SetprematureExit(self, premex):
+		self.prematureExit = premex
+	def GetprematureExit(self):
+		return self.prematureExit
+
+#-----------------------------------------------------------
+# RAN management functions
+#-----------------------------------------------------------
+
+	def BuildeNB(self):
+		if self.ranRepository == '' or self.ranBranch == '' or self.ranCommitID == '':
+			HELP.GenericHelp(CONST.Version)
+			sys.exit('Insufficient Parameter')
+		if self.eNB_serverId == '0':
+			lIpAddr = self.eNBIPAddress
+			lUserName = self.eNBUserName
+			lPassWord = self.eNBPassword
+			lSourcePath = self.eNBSourceCodePath
+		elif self.eNB_serverId == '1':
+			lIpAddr = self.eNB1IPAddress
+			lUserName = self.eNB1UserName
+			lPassWord = self.eNB1Password
+			lSourcePath = self.eNB1SourceCodePath
+		elif self.eNB_serverId == '2':
+			lIpAddr = self.eNB2IPAddress
+			lUserName = self.eNB2UserName
+			lPassWord = self.eNB2Password
+			lSourcePath = self.eNB2SourceCodePath
+		if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '':
+			HELP.GenericHelp(CONST.Version)
+			sys.exit('Insufficient Parameter')
+		mySSH = SSH.SSHConnection()
+		mySSH.open(lIpAddr, lUserName, lPassWord)
+		# Check if we build an 5G-NR gNB or an LTE eNB
+		result = re.search('--gNB', self.Build_eNB_args)
+		if result is not None:
+			self.air_interface = 'nr'
+		else:
+			self.air_interface = 'lte'
+		# Worakround for some servers, we need to erase completely the workspace
+		if self.Build_eNB_forced_workspace_cleanup:
+			mySSH.command('echo ' + lPassWord + ' | sudo -S rm -Rf ' + lSourcePath, '\$', 15)
+		if self.htmlObj is not None:
+			self.testCase_id = self.htmlObj.GettestCase_id()
+		else:
+			self.testCase_id = '000000'
+		# on RedHat/CentOS .git extension is mandatory
+		result = re.search('([a-zA-Z0-9\:\-\.\/])+\.git', self.ranRepository)
+		if result is not None:
+			full_ran_repo_name = self.ranRepository
+		else:
+			full_ran_repo_name = self.ranRepository + '.git'
+		mySSH.command('mkdir -p ' + lSourcePath, '\$', 5)
+		mySSH.command('cd ' + lSourcePath, '\$', 5)
+		mySSH.command('if [ ! -e .git ]; then stdbuf -o0 git clone ' + full_ran_repo_name + ' .; else stdbuf -o0 git fetch --prune; fi', '\$', 600)
+		# Raphael: here add a check if git clone or git fetch went smoothly
+		mySSH.command('git config user.email "jenkins@openairinterface.org"', '\$', 5)
+		mySSH.command('git config user.name "OAI Jenkins"', '\$', 5)
+		# Checking the BUILD INFO file
+		if not self.backgroundBuild:
+			mySSH.command('ls *.txt', '\$', 5)
+			result = re.search('LAST_BUILD_INFO', mySSH.getBefore())
+			if result is not None:
+				mismatch = False
+				mySSH.command('grep SRC_COMMIT LAST_BUILD_INFO.txt', '\$', 2)
+				result = re.search(self.ranCommitID, mySSH.getBefore())
+				if result is None:
+					mismatch = True
+				mySSH.command('grep MERGED_W_TGT_BRANCH LAST_BUILD_INFO.txt', '\$', 2)
+				if (self.ranAllowMerge):
+					result = re.search('YES', mySSH.getBefore())
+					if result is None:
+						mismatch = True
+					mySSH.command('grep TGT_BRANCH LAST_BUILD_INFO.txt', '\$', 2)
+					if self.ranTargetBranch == '':
+						result = re.search('develop', mySSH.getBefore())
+					else:
+						result = re.search(self.ranTargetBranch, mySSH.getBefore())
+					if result is None:
+						mismatch = True
+				else:
+					result = re.search('NO', mySSH.getBefore())
+					if result is None:
+						mismatch = True
+				if not mismatch:
+					mySSH.close()
+					if self.htmlObj is not None:
+						self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'OK', CONST.ALL_PROCESSES_OK)
+					return
+
+		mySSH.command('echo ' + lPassWord + ' | sudo -S git clean -x -d -ff', '\$', 30)
+		# if the commit ID is provided use it to point to it
+		if self.ranCommitID != '':
+			mySSH.command('git checkout -f ' + self.ranCommitID, '\$', 5)
+		# if the branch is not develop, then it is a merge request and we need to do 
+		# the potential merge. Note that merge conflicts should already been checked earlier
+		if (self.ranAllowMerge):
+			if self.ranTargetBranch == '':
+				if (self.ranBranch != 'develop') and (self.ranBranch != 'origin/develop'):
+					mySSH.command('git merge --ff origin/develop -m "Temporary merge for CI"', '\$', 5)
+			else:
+				logging.debug('Merging with the target branch: ' + self.ranTargetBranch)
+				mySSH.command('git merge --ff origin/' + self.ranTargetBranch + ' -m "Temporary merge for CI"', '\$', 5)
+		mySSH.command('source oaienv', '\$', 5)
+		mySSH.command('cd cmake_targets', '\$', 5)
+		mySSH.command('mkdir -p log', '\$', 5)
+		mySSH.command('chmod 777 log', '\$', 5)
+		# no need to remove in log (git clean did the trick)
+		if self.backgroundBuild:
+			mySSH.command('echo "./build_oai ' + self.Build_eNB_args + '" > ./my-lte-softmodem-build.sh', '\$', 5)
+			mySSH.command('chmod 775 ./my-lte-softmodem-build.sh', '\$', 5)
+			mySSH.command('echo ' + lPassWord + ' | sudo -S -E daemon --inherit --unsafe --name=build_enb_daemon --chdir=' + lSourcePath + '/cmake_targets -o ' + lSourcePath + '/cmake_targets/compile_oai_enb.log ./my-lte-softmodem-build.sh', '\$', 5)
+			mySSH.close()
+			if self.htmlObj is not None:
+				self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'OK', CONST.ALL_PROCESSES_OK)
+			self.backgroundBuildTestId[int(self.eNB_instance)] = self.testCase_id
+			return
+		mySSH.command('stdbuf -o0 ./build_oai ' + self.Build_eNB_args + ' 2>&1 | stdbuf -o0 tee compile_oai_enb.log', 'Bypassing the Tests|build have failed', 1500)
+		mySSH.close()
+		self.checkBuildeNB(lIpAddr, lUserName, lPassWord, lSourcePath, self.testCase_id)
+
+	def WaitBuildeNBisFinished(self):
+		if self.eNB_serverId == '0':
+			lIpAddr = self.eNBIPAddress
+			lUserName = self.eNBUserName
+			lPassWord = self.eNBPassword
+			lSourcePath = self.eNBSourceCodePath
+		elif self.eNB_serverId == '1':
+			lIpAddr = self.eNB1IPAddress
+			lUserName = self.eNB1UserName
+			lPassWord = self.eNB1Password
+			lSourcePath = self.eNB1SourceCodePath
+		elif self.eNB_serverId == '2':
+			lIpAddr = self.eNB2IPAddress
+			lUserName = self.eNB2UserName
+			lPassWord = self.eNB2Password
+			lSourcePath = self.eNB2SourceCodePath
+		if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '':
+			HELP.GenericHelp(CONST.Version)
+			sys.exit('Insufficient Parameter')
+		mySSH = SSH.SSHConnection()
+		mySSH.open(lIpAddr, lUserName, lPassWord)
+		count = 40
+		buildOAIprocess = True
+		while (count > 0) and buildOAIprocess:
+			mySSH.command('ps aux | grep --color=never build_ | grep -v grep', '\$', 3)
+			result = re.search('build_oai', mySSH.getBefore())
+			if result is None:
+				buildOAIprocess = False
+			else:
+				count -= 1
+				time.sleep(30)
+		mySSH.close()
+		self.checkBuildeNB(lIpAddr, lUserName, lPassWord, lSourcePath, self.backgroundBuildTestId[int(self.eNB_instance)])
+
+	def checkBuildeNB(self, lIpAddr, lUserName, lPassWord, lSourcePath, testcaseId):
+		if self.htmlObj is not None:
+			self.htmlObj.SettestCase_id(testcaseId)
+		mySSH = SSH.SSHConnection()
+		mySSH.open(lIpAddr, lUserName, lPassWord)
+		mySSH.command('cd ' + lSourcePath + '/cmake_targets', '\$', 3)
+		mySSH.command('ls ran_build/build', '\$', 3)
+		mySSH.command('ls ran_build/build', '\$', 3)
+		if self.air_interface == 'nr':
+			nodeB_prefix = 'g'
+		else:
+			nodeB_prefix = 'e'
+		buildStatus = True
+		result = re.search(self.air_interface + '-softmodem', mySSH.getBefore())
+		if result is None:
+			buildStatus = False
+		else:
+			# Generating a BUILD INFO file
+			mySSH.command('echo "SRC_BRANCH: ' + self.ranBranch + '" > ../LAST_BUILD_INFO.txt', '\$', 2)
+			mySSH.command('echo "SRC_COMMIT: ' + self.ranCommitID + '" >> ../LAST_BUILD_INFO.txt', '\$', 2)
+			if (self.ranAllowMerge):
+				mySSH.command('echo "MERGED_W_TGT_BRANCH: YES" >> ../LAST_BUILD_INFO.txt', '\$', 2)
+				if self.ranTargetBranch == '':
+					mySSH.command('echo "TGT_BRANCH: develop" >> ../LAST_BUILD_INFO.txt', '\$', 2)
+				else:
+					mySSH.command('echo "TGT_BRANCH: ' + self.ranTargetBranch + '" >> ../LAST_BUILD_INFO.txt', '\$', 2)
+			else:
+				mySSH.command('echo "MERGED_W_TGT_BRANCH: NO" >> ../LAST_BUILD_INFO.txt', '\$', 2)
+		mySSH.command('mkdir -p build_log_' + testcaseId, '\$', 5)
+		mySSH.command('mv log/* ' + 'build_log_' + testcaseId, '\$', 5)
+		mySSH.command('mv compile_oai_enb.log ' + 'build_log_' + testcaseId, '\$', 5)
+		if self.eNB_serverId != '0':
+			mySSH.command('cd cmake_targets', '\$', 5)
+			mySSH.command('if [ -e tmp_build' + testcaseId + '.zip ]; then rm -f tmp_build' + testcaseId + '.zip; fi', '\$', 5)
+			mySSH.command('zip -r -qq tmp_build' + testcaseId + '.zip build_log_' + testcaseId, '\$', 5)
+			mySSH.close()
+			if (os.path.isfile('./tmp_build' + testcaseId + '.zip')):
+				os.remove('./tmp_build' + testcaseId + '.zip')
+			mySSH.copyin(lIpAddr, lUserName, lPassWord, lSourcePath + '/cmake_targets/tmp_build' + testcaseId + '.zip', '.')
+			if (os.path.isfile('./tmp_build' + testcaseId + '.zip')):
+				mySSH.copyout(self.eNBIPAddress, self.eNBUserName, self.eNBPassword, './tmp_build' + testcaseId + '.zip', self.eNBSourceCodePath + '/cmake_targets/.')
+				os.remove('./tmp_build' + testcaseId + '.zip')
+				mySSH.open(self.eNBIPAddress, self.eNBUserName, self.eNBPassword)
+				mySSH.command('cd ' + self.eNBSourceCodePath + '/cmake_targets', '\$', 5)
+				mySSH.command('unzip -qq -DD tmp_build' + testcaseId + '.zip', '\$', 5)
+				mySSH.command('rm -f tmp_build' + testcaseId + '.zip', '\$', 5)
+				mySSH.close()
+		else:
+			mySSH.close()
+
+		if buildStatus:
+			logging.info('\u001B[1m Building OAI ' + nodeB_prefix + 'NB Pass\u001B[0m')
+			if self.htmlObj is not None:
+				self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'OK', CONST.ALL_PROCESSES_OK)
+		else:
+			logging.error('\u001B[1m Building OAI ' + nodeB_prefix + 'NB Failed\u001B[0m')
+			if self.htmlObj is not None:
+				self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'KO', CONST.ALL_PROCESSES_OK)
+				self.htmlObj.CreateHtmlTabFooter(False)
+			sys.exit(1)
+
+	def InitializeeNB(self):
+		if self.eNB_serverId == '0':
+			lIpAddr = self.eNBIPAddress
+			lUserName = self.eNBUserName
+			lPassWord = self.eNBPassword
+			lSourcePath = self.eNBSourceCodePath
+		elif self.eNB_serverId == '1':
+			lIpAddr = self.eNB1IPAddress
+			lUserName = self.eNB1UserName
+			lPassWord = self.eNB1Password
+			lSourcePath = self.eNB1SourceCodePath
+		elif self.eNB_serverId == '2':
+			lIpAddr = self.eNB2IPAddress
+			lUserName = self.eNB2UserName
+			lPassWord = self.eNB2Password
+			lSourcePath = self.eNB2SourceCodePath
+		if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '':
+			HELP.GenericHelp(CONST.Version)
+			sys.exit('Insufficient Parameter')
+
+		if self.htmlObj is not None:
+			self.testCase_id = self.htmlObj.GettestCase_id()
+		else:
+			self.testCase_id = '000000'
+		mySSH = SSH.SSHConnection()
+		
+		if (self.pStatus < 0):
+			if self.htmlObj is not None:
+				self.htmlObj.CreateHtmlTestRow(self.Initialize_eNB_args, 'KO', self.pStatus)
+				self.htmlObj.CreateHtmlTabFooter(False)
+			sys.exit(1)
+		# If tracer options is on, running tshark on EPC side and capture traffic b/ EPC and eNB
+		result = re.search('T_stdout', str(self.Initialize_eNB_args))
+		if (result is not None) and (self.epcObj is not None):
+			localEpcIpAddr = self.epcObj.GetIPAddress()
+			localEpcUserName = self.epcObj.GetUserName()
+			localEpcPassword = self.epcObj.GetPassword()
+			mySSH.open(localEpcIpAddr, localEpcUserName, localEpcPassword)
+			mySSH.command('ip addr show | awk -f /tmp/active_net_interfaces.awk | egrep -v "lo|tun"', '\$', 5)
+			result = re.search('interfaceToUse=(?P<eth_interface>[a-zA-Z0-9\-\_]+)done', mySSH.getBefore())
+			if result is not None:
+				eth_interface = result.group('eth_interface')
+				logging.debug('\u001B[1m Launching tshark on interface ' + eth_interface + '\u001B[0m')
+				self.epcPcapFile = 'enb_' + self.testCase_id + '_s1log.pcap'
+				mySSH.command('echo ' + localEpcPassword + ' | sudo -S rm -f /tmp/' + self.epcPcapFile , '\$', 5)
+				mySSH.command('echo $USER; nohup sudo tshark -f "host ' + lIpAddr +'" -i ' + eth_interface + ' -w /tmp/' + self.epcPcapFile + ' > /tmp/tshark.log 2>&1 &', localEpcUserName, 5)
+			mySSH.close()
+		mySSH.open(lIpAddr, lUserName, lPassWord)
+		mySSH.command('cd ' + lSourcePath, '\$', 5)
+		# Initialize_eNB_args usually start with -O and followed by the location in repository
+		full_config_file = self.Initialize_eNB_args.replace('-O ','')
+		extra_options = ''
+		extIdx = full_config_file.find('.conf')
+		if (extIdx > 0):
+			extra_options = full_config_file[extIdx + 5:]
+			# if tracer options is on, compiling and running T Tracer
+			result = re.search('T_stdout', str(extra_options))
+			if result is not None:
+				logging.debug('\u001B[1m Compiling and launching T Tracer\u001B[0m')
+				mySSH.command('cd common/utils/T/tracer', '\$', 5)
+				mySSH.command('make', '\$', 10)
+				mySSH.command('echo $USER; nohup ./record -d ../T_messages.txt -o ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '_record.raw -ON -off VCD -off HEAVY -off LEGACY_GROUP_TRACE -off LEGACY_GROUP_DEBUG > ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '_record.log 2>&1 &', lUserName, 5)
+				mySSH.command('cd ' + lSourcePath, '\$', 5)
+			full_config_file = full_config_file[:extIdx + 5]
+			config_path, config_file = os.path.split(full_config_file)
+		else:
+			sys.exit('Insufficient Parameter')
+		ci_full_config_file = config_path + '/ci-' + config_file
+		rruCheck = False
+		result = re.search('^rru|^rcc|^du.band', str(config_file))
+		if result is not None:
+			rruCheck = True
+		# do not reset board twice in IF4.5 case
+		result = re.search('^rru|^enb|^du.band', str(config_file))
+		if result is not None:
+			mySSH.command('echo ' + lPassWord + ' | sudo -S uhd_find_devices', '\$', 60)
+			result = re.search('type: b200', mySSH.getBefore())
+			if result is not None:
+				logging.debug('Found a B2xx device --> resetting it')
+				mySSH.command('echo ' + lPassWord + ' | sudo -S b2xx_fx3_utils --reset-device', '\$', 10)
+				# Reloading FGPA bin firmware
+				mySSH.command('echo ' + lPassWord + ' | sudo -S uhd_find_devices', '\$', 60)
+		# Make a copy and adapt to EPC / eNB IP addresses
+		mySSH.command('cp ' + full_config_file + ' ' + ci_full_config_file, '\$', 5)
+		if self.epcObj is not None:
+			localMmeIpAddr = self.epcObj.GetMmeIPAddress()
+			mySSH.command('sed -i -e \'s/CI_MME_IP_ADDR/' + localMmeIpAddr + '/\' ' + ci_full_config_file, '\$', 2);
+		mySSH.command('sed -i -e \'s/CI_ENB_IP_ADDR/' + lIpAddr + '/\' ' + ci_full_config_file, '\$', 2);
+		mySSH.command('sed -i -e \'s/CI_RCC_IP_ADDR/' + self.eNBIPAddress + '/\' ' + ci_full_config_file, '\$', 2);
+		mySSH.command('sed -i -e \'s/CI_RRU1_IP_ADDR/' + self.eNB1IPAddress + '/\' ' + ci_full_config_file, '\$', 2);
+		mySSH.command('sed -i -e \'s/CI_RRU2_IP_ADDR/' + self.eNB2IPAddress + '/\' ' + ci_full_config_file, '\$', 2);
+		if self.flexranCtrlInstalled and self.flexranCtrlStarted:
+			mySSH.command('sed -i -e \'s/FLEXRAN_ENABLED.*;/FLEXRAN_ENABLED        = "yes";/\' ' + ci_full_config_file, '\$', 2);
+		else:
+			mySSH.command('sed -i -e \'s/FLEXRAN_ENABLED.*;/FLEXRAN_ENABLED        = "no";/\' ' + ci_full_config_file, '\$', 2);
+		self.eNBmbmsEnables[int(self.eNB_instance)] = False
+		mySSH.command('grep enable_enb_m2 ' + ci_full_config_file, '\$', 2);
+		result = re.search('yes', mySSH.getBefore())
+		if result is not None:
+			self.eNBmbmsEnables[int(self.eNB_instance)] = True
+			logging.debug('\u001B[1m MBMS is enabled on this eNB\u001B[0m')
+		result = re.search('noS1', str(self.Initialize_eNB_args))
+		eNBinNoS1 = False
+		if result is not None:
+			eNBinNoS1 = True
+			logging.debug('\u001B[1m eNB is in noS1 configuration \u001B[0m')
+		# Launch eNB with the modified config file
+		mySSH.command('source oaienv', '\$', 5)
+		mySSH.command('cd cmake_targets', '\$', 5)
+		mySSH.command('echo "ulimit -c unlimited && ./ran_build/build/' + self.air_interface + '-softmodem -O ' + lSourcePath + '/' + ci_full_config_file + extra_options + '" > ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5)
+		mySSH.command('chmod 775 ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5)
+		mySSH.command('echo ' + lPassWord + ' | sudo -S rm -Rf enb_' + self.testCase_id + '.log', '\$', 5)
+		mySSH.command('hostnamectl','\$', 5)
+		result = re.search('CentOS Linux 7', mySSH.getBefore())
+		if result is not None:
+			mySSH.command('echo $USER; nohup sudo ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh > ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '.log 2>&1 &', lUserName, 10)
+		else:
+			mySSH.command('echo ' + lPassWord + ' | sudo -S -E daemon --inherit --unsafe --name=enb' + str(self.eNB_instance) + '_daemon --chdir=' + lSourcePath + '/cmake_targets -o ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '.log ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5)
+		self.eNBLogFiles[int(self.eNB_instance)] = 'enb_' + self.testCase_id + '.log'
+		if extra_options != '':
+			self.eNBOptions[int(self.eNB_instance)] = extra_options
+		time.sleep(6)
+		doLoop = True
+		loopCounter = 20
+		enbDidSync = False
+		while (doLoop):
+			loopCounter = loopCounter - 1
+			if (loopCounter == 0):
+				# In case of T tracer recording, we may need to kill it
+				result = re.search('T_stdout', str(self.Initialize_eNB_args))
+				if result is not None:
+					mySSH.command('killall --signal SIGKILL record', '\$', 5)
+				mySSH.close()
+				doLoop = False
+				logging.error('\u001B[1;37;41m eNB logging system did not show got sync! \u001B[0m')
+				if self.htmlObj is not None:
+					self.htmlObj.CreateHtmlTestRow('-O ' + config_file + extra_options, 'KO', CONST.ALL_PROCESSES_OK)
+				# In case of T tracer recording, we need to kill tshark on EPC side
+				result = re.search('T_stdout', str(self.Initialize_eNB_args))
+				if (result is not None) and (self.epcObj is not None):
+					localEpcIpAddr = self.epcObj.GetIPAddress()
+					localEpcUserName = self.epcObj.GetUserName()
+					localEpcPassword = self.epcObj.GetPassword()
+					mySSH.open(localEpcIpAddr, localEpcUserName, localEpcPassword)
+					logging.debug('\u001B[1m Stopping tshark \u001B[0m')
+					mySSH.command('echo ' + localEpcPassword + ' | sudo -S killall --signal SIGKILL tshark', '\$', 5)
+					if self.epcPcapFile  != '':
+						time.sleep(0.5)
+						mySSH.command('echo ' + localEpcPassword + ' | sudo -S chmod 666 /tmp/' + self.epcPcapFile, '\$', 5)
+					mySSH.close()
+					time.sleep(1)
+					if self.epcPcapFile != '':
+						copyin_res = mySSH.copyin(localEpcIpAddr, localEpcUserName, localEpcPassword, '/tmp/' + self.epcPcapFile, '.')
+						if (copyin_res == 0):
+							mySSH.copyout(lIpAddr, lUserName, lPassWord, self.epcPcapFile, lSourcePath + '/cmake_targets/.')
+				self.prematureExit = True
+				return
+			else:
+				mySSH.command('stdbuf -o0 cat enb_' + self.testCase_id + '.log | egrep --text --color=never -i "wait|sync|Starting"', '\$', 4)
+				if rruCheck:
+					result = re.search('wait RUs', mySSH.getBefore())
+				else:
+					result = re.search('got sync|Starting F1AP at CU', mySSH.getBefore())
+				if result is None:
+					time.sleep(6)
+				else:
+					doLoop = False
+					enbDidSync = True
+					time.sleep(10)
+
+		if enbDidSync and eNBinNoS1:
+			mySSH.command('ifconfig oaitun_enb1', '\$', 4)
+			mySSH.command('ifconfig oaitun_enb1', '\$', 4)
+			result = re.search('inet addr:1|inet 1', mySSH.getBefore())
+			if result is not None:
+				logging.debug('\u001B[1m oaitun_enb1 interface is mounted and configured\u001B[0m')
+			else:
+				logging.error('\u001B[1m oaitun_enb1 interface is either NOT mounted or NOT configured\u001B[0m')
+			if self.eNBmbmsEnables[int(self.eNB_instance)]:
+				mySSH.command('ifconfig oaitun_enm1', '\$', 4)
+				result = re.search('inet addr', mySSH.getBefore())
+				if result is not None:
+					logging.debug('\u001B[1m oaitun_enm1 interface is mounted and configured\u001B[0m')
+				else:
+					logging.error('\u001B[1m oaitun_enm1 interface is either NOT mounted or NOT configured\u001B[0m')
+		if enbDidSync:
+			self.eNBstatuses[int(self.eNB_instance)] = int(self.eNB_serverId)
+
+		mySSH.close()
+		if self.htmlObj is not None:
+			self.htmlObj.CreateHtmlTestRow('-O ' + config_file + extra_options, 'OK', CONST.ALL_PROCESSES_OK)
+		logging.debug('\u001B[1m Initialize eNB Completed\u001B[0m')
+
+	def CheckeNBProcess(self, status_queue):
+		try:
+			# At least the instance 0 SHALL be on!
+			if self.eNBstatuses[0] == 0:
+				lIpAddr = self.eNBIPAddress
+				lUserName = self.eNBUserName
+				lPassWord = self.eNBPassword
+			elif self.eNBstatuses[0] == 1:
+				lIpAddr = self.eNB1IPAddress
+				lUserName = self.eNB1UserName
+				lPassWord = self.eNB1Password
+			elif self.eNBstatuses[0] == 2:
+				lIpAddr = self.eNB2IPAddress
+				lUserName = self.eNB2UserName
+				lPassWord = self.eNB2Password
+			else:
+				lIpAddr = self.eNBIPAddress
+				lUserName = self.eNBUserName
+				lPassWord = self.eNBPassword
+			mySSH = SSH.SSHConnection()
+			mySSH.open(lIpAddr, lUserName, lPassWord)
+			mySSH.command('stdbuf -o0 ps -aux | grep --color=never ' + self.air_interface + '-softmodem | grep -v grep', '\$', 5)
+			result = re.search(self.air_interface + '-softmodem', mySSH.getBefore())
+			if result is None:
+				logging.debug('\u001B[1;37;41m eNB Process Not Found! \u001B[0m')
+				status_queue.put(CONST.ENB_PROCESS_FAILED)
+			else:
+				status_queue.put(CONST.ENB_PROCESS_OK)
+			mySSH.close()
+		except:
+			os.kill(os.getppid(),signal.SIGUSR1)
+
+	def TerminateeNB(self):
+		if self.eNB_serverId == '0':
+			lIpAddr = self.eNBIPAddress
+			lUserName = self.eNBUserName
+			lPassWord = self.eNBPassword
+			lSourcePath = self.eNBSourceCodePath
+		elif self.eNB_serverId == '1':
+			lIpAddr = self.eNB1IPAddress
+			lUserName = self.eNB1UserName
+			lPassWord = self.eNB1Password
+			lSourcePath = self.eNB1SourceCodePath
+		elif self.eNB_serverId == '2':
+			lIpAddr = self.eNB2IPAddress
+			lUserName = self.eNB2UserName
+			lPassWord = self.eNB2Password
+			lSourcePath = self.eNB2SourceCodePath
+		if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '':
+			HELP.GenericHelp(CONST.Version)
+			sys.exit('Insufficient Parameter')
+		mySSH = SSH.SSHConnection()
+		mySSH.open(lIpAddr, lUserName, lPassWord)
+		mySSH.command('cd ' + lSourcePath + '/cmake_targets', '\$', 5)
+		if self.air_interface == 'lte':
+			nodeB_prefix = 'e'
+		else:
+			nodeB_prefix = 'g'
+		mySSH.command('stdbuf -o0  ps -aux | grep --color=never softmodem | grep -v grep', '\$', 5)
+		result = re.search('-softmodem', mySSH.getBefore())
+		if result is not None:
+			mySSH.command('echo ' + lPassWord + ' | sudo -S daemon --name=enb' + str(self.eNB_instance) + '_daemon --stop', '\$', 5)
+			mySSH.command('echo ' + lPassWord + ' | sudo -S killall --signal SIGINT -r .*-softmodem || true', '\$', 5)
+			time.sleep(10)
+			mySSH.command('stdbuf -o0  ps -aux | grep --color=never softmodem | grep -v grep', '\$', 5)
+			result = re.search('-softmodem', mySSH.getBefore())
+			if result is not None:
+				mySSH.command('echo ' + lPassWord + ' | sudo -S killall --signal SIGKILL -r .*-softmodem || true', '\$', 5)
+				time.sleep(5)
+		mySSH.command('rm -f my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5)
+		mySSH.close()
+		# If tracer options is on, stopping tshark on EPC side
+		result = re.search('T_stdout', str(self.Initialize_eNB_args))
+		if (result is not None) and (self.epcObj is not None):
+			localEpcIpAddr = self.epcObj.GetIPAddress()
+			localEpcUserName = self.epcObj.GetUserName()
+			localEpcPassword = self.epcObj.GetPassword()
+			mySSH.open(localEpcIpAddr, localEpcUserName, localEpcPassword)
+			logging.debug('\u001B[1m Stopping tshark \u001B[0m')
+			mySSH.command('echo ' + localEpcPassword + ' | sudo -S killall --signal SIGKILL tshark', '\$', 5)
+			time.sleep(1)
+			if self.epcPcapFile != '':
+				mySSH.command('echo ' + localEpcPassword + ' | sudo -S chmod 666 /tmp/' + self.epcPcapFile, '\$', 5)
+				mySSH.copyin(localEpcIpAddr, localEpcUserName, localEpcPassword, '/tmp/' + self.epcPcapFile, '.')
+				mySSH.copyout(lIpAddr, lUserName, lPassWord, self.epcPcapFile, lSourcePath + '/cmake_targets/.')
+			mySSH.close()
+			logging.debug('\u001B[1m Replaying RAW record file\u001B[0m')
+			mySSH.open(lIpAddr, lUserName, lPassWord)
+			mySSH.command('cd ' + lSourcePath + '/common/utils/T/tracer/', '\$', 5)
+			enbLogFile = self.eNBLogFiles[int(self.eNB_instance)]
+			raw_record_file = enbLogFile.replace('.log', '_record.raw')
+			replay_log_file = enbLogFile.replace('.log', '_replay.log')
+			extracted_txt_file = enbLogFile.replace('.log', '_extracted_messages.txt')
+			extracted_log_file = enbLogFile.replace('.log', '_extracted_messages.log')
+			mySSH.command('./extract_config -i ' + lSourcePath + '/cmake_targets/' + raw_record_file + ' > ' + lSourcePath + '/cmake_targets/' + extracted_txt_file, '\$', 5)
+			mySSH.command('echo $USER; nohup ./replay -i ' + lSourcePath + '/cmake_targets/' + raw_record_file + ' > ' + lSourcePath + '/cmake_targets/' + replay_log_file + ' 2>&1 &', lUserName, 5)
+			mySSH.command('./textlog -d ' +  lSourcePath + '/cmake_targets/' + extracted_txt_file + ' -no-gui -ON -full > ' + lSourcePath + '/cmake_targets/' + extracted_log_file, '\$', 5)
+			mySSH.close()
+			mySSH.copyin(lIpAddr, lUserName, lPassWord, lSourcePath + '/cmake_targets/' + extracted_log_file, '.')
+			logging.debug('\u001B[1m Analyzing eNB replay logfile \u001B[0m')
+			logStatus = self.AnalyzeLogFile_eNB(extracted_log_file)
+			if self.htmlObj is not None:
+				self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK)
+			self.eNBLogFiles[int(self.eNB_instance)] = ''
+		else:
+			analyzeFile = False
+			if self.eNBLogFiles[int(self.eNB_instance)] != '':
+				analyzeFile = True
+				fileToAnalyze = self.eNBLogFiles[int(self.eNB_instance)]
+				self.eNBLogFiles[int(self.eNB_instance)] = ''
+			if analyzeFile:
+				copyin_res = mySSH.copyin(lIpAddr, lUserName, lPassWord, lSourcePath + '/cmake_targets/' + fileToAnalyze, '.')
+				if (copyin_res == -1):
+					logging.debug('\u001B[1;37;41m Could not copy ' + nodeB_prefix + 'NB logfile to analyze it! \u001B[0m')
+					if self.htmlObj is not None:
+						self.htmlObj.SetHmleNBFailureMsg('Could not copy ' + nodeB_prefix + 'NB logfile to analyze it!')
+						self.htmlObj.CreateHtmlTestRow('N/A', 'KO', CONST.ENB_PROCESS_NOLOGFILE_TO_ANALYZE)
+					self.eNBmbmsEnables[int(self.eNB_instance)] = False
+					return
+				if self.eNB_serverId != '0':
+					mySSH.copyout(self.eNBIPAddress, self.eNBUserName, self.eNBPassword, './' + fileToAnalyze, self.eNBSourceCodePath + '/cmake_targets/')
+				logging.debug('\u001B[1m Analyzing ' + nodeB_prefix + 'NB logfile \u001B[0m ' + fileToAnalyze)
+				logStatus = self.AnalyzeLogFile_eNB(fileToAnalyze)
+				if (logStatus < 0):
+					if self.htmlObj is not None:
+						self.htmlObj.CreateHtmlTestRow('N/A', 'KO', logStatus)
+					self.preamtureExit = True
+					self.eNBmbmsEnables[int(self.eNB_instance)] = False
+					return
+				else:
+					if self.htmlObj is not None:
+						self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK)
+			else:
+				if self.htmlObj is not None:
+					self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK)
+		self.eNBmbmsEnables[int(self.eNB_instance)] = False
+		self.eNBstatuses[int(self.eNB_instance)] = -1
+
+	def LogCollecteNB(self):
+		mySSH = SSH.SSHConnection()
+		mySSH.open(self.eNBIPAddress, self.eNBUserName, self.eNBPassword)
+		mySSH.command('cd ' + self.eNBSourceCodePath, '\$', 5)
+		mySSH.command('cd cmake_targets', '\$', 5)
+		mySSH.command('echo ' + self.eNBPassword + ' | sudo -S rm -f enb.log.zip', '\$', 5)
+		mySSH.command('echo ' + self.eNBPassword + ' | sudo -S zip enb.log.zip enb*.log core* enb_*record.raw enb_*.pcap enb_*txt', '\$', 60)
+		mySSH.command('echo ' + self.eNBPassword + ' | sudo -S rm enb*.log core* enb_*record.raw enb_*.pcap enb_*txt', '\$', 5)
+		mySSH.close()
+
+	def AnalyzeLogFile_eNB(self, eNBlogFile):
+		if (not os.path.isfile('./' + eNBlogFile)):
+			return -1
+		enb_log_file = open('./' + eNBlogFile, 'r')
+		exitSignalReceived = False
+		foundAssertion = False
+		msgAssertion = ''
+		msgLine = 0
+		foundSegFault = False
+		foundRealTimeIssue = False
+		rrcSetupComplete = 0
+		rrcReleaseRequest = 0
+		rrcReconfigRequest = 0
+		rrcReconfigComplete = 0
+		rrcReestablishRequest = 0
+		rrcReestablishComplete = 0
+		rrcReestablishReject = 0
+		rlcDiscardBuffer = 0
+		rachCanceledProcedure = 0
+		uciStatMsgCount = 0
+		pdcpFailure = 0
+		ulschFailure = 0
+		ulschReceiveOK = 0
+		gnbRxTxWakeUpFailure = 0
+		cdrxActivationMessageCount = 0
+		dropNotEnoughRBs = 0
+		mbmsRequestMsg = 0
+		htmleNBFailureMsg = ''
+		isRRU = False
+		isSlave = False
+		slaveReceivesFrameResyncCmd = False
+		X2HO_state = CONST.X2_HO_REQ_STATE__IDLE
+		X2HO_inNbProcedures = 0
+		X2HO_outNbProcedures = 0
+		for line in enb_log_file.readlines():
+			if X2HO_state == CONST.X2_HO_REQ_STATE__IDLE:
+				result = re.search('target eNB Receives X2 HO Req X2AP_HANDOVER_REQ', str(line))
+				if result is not None:
+					X2HO_state = CONST.X2_HO_REQ_STATE__TARGET_RECEIVES_REQ
+				result = re.search('source eNB receives the X2 HO ACK X2AP_HANDOVER_REQ_ACK', str(line))
+				if result is not None:
+					X2HO_state = CONST.X2_HO_REQ_STATE__SOURCE_RECEIVES_REQ_ACK
+			if X2HO_state == CONST.X2_HO_REQ_STATE__TARGET_RECEIVES_REQ:
+				result = re.search('Received LTE_RRCConnectionReconfigurationComplete from UE', str(line))
+				if result is not None:
+					X2HO_state = CONST.X2_HO_REQ_STATE__TARGET_RRC_RECFG_COMPLETE
+			if X2HO_state == CONST.X2_HO_REQ_STATE__TARGET_RRC_RECFG_COMPLETE:
+				result = re.search('issue rrc_eNB_send_PATH_SWITCH_REQ', str(line))
+				if result is not None:
+					X2HO_state = CONST.X2_HO_REQ_STATE__TARGET_SENDS_SWITCH_REQ
+			if X2HO_state == CONST.X2_HO_REQ_STATE__TARGET_SENDS_SWITCH_REQ:
+				result = re.search('received path switch ack S1AP_PATH_SWITCH_REQ_ACK', str(line))
+				if result is not None:
+					X2HO_state = CONST.X2_HO_REQ_STATE__IDLE
+					X2HO_inNbProcedures += 1
+			if X2HO_state == CONST.X2_HO_REQ_STATE__SOURCE_RECEIVES_REQ_ACK:
+				result = re.search('source eNB receives the X2 UE CONTEXT RELEASE X2AP_UE_CONTEXT_RELEASE', str(line))
+				if result is not None:
+					X2HO_state = CONST.X2_HO_REQ_STATE__IDLE
+					X2HO_outNbProcedures += 1
+
+			if self.eNBOptions[int(self.eNB_instance)] != '':
+				res1 = re.search('max_rxgain (?P<requested_option>[0-9]+)', self.eNBOptions[int(self.eNB_instance)])
+				res2 = re.search('max_rxgain (?P<applied_option>[0-9]+)',  str(line))
+				if res1 is not None and res2 is not None:
+					requested_option = int(res1.group('requested_option'))
+					applied_option = int(res2.group('applied_option'))
+					if requested_option == applied_option:
+						htmleNBFailureMsg += '<span class="glyphicon glyphicon-ok-circle"></span> Command line option(s) correctly applied <span class="glyphicon glyphicon-arrow-right"></span> ' + self.eNBOptions[int(self.eNB_instance)] + '\n\n'
+					else:
+						htmleNBFailureMsg += '<span class="glyphicon glyphicon-ban-circle"></span> Command line option(s) NOT applied <span class="glyphicon glyphicon-arrow-right"></span> ' + self.eNBOptions[int(self.eNB_instance)] + '\n\n'
+			result = re.search('Exiting OAI softmodem', str(line))
+			if result is not None:
+				exitSignalReceived = True
+			result = re.search('[Ss]egmentation [Ff]ault', str(line))
+			if result is not None and not exitSignalReceived:
+				foundSegFault = True
+			result = re.search('[Cc]ore [dD]ump', str(line))
+			if result is not None and not exitSignalReceived:
+				foundSegFault = True
+			result = re.search('./ran_build/build/lte-softmodem', str(line))
+			if result is not None and not exitSignalReceived:
+				foundSegFault = True
+			result = re.search('[Aa]ssertion', str(line))
+			if result is not None and not exitSignalReceived:
+				foundAssertion = True
+			result = re.search('LLL', str(line))
+			if result is not None and not exitSignalReceived:
+				foundRealTimeIssue = True
+			if foundAssertion and (msgLine < 3):
+				msgLine += 1
+				msgAssertion += str(line)
+			result = re.search('Setting function for RU', str(line))
+			if result is not None:
+				isRRU = True
+			if isRRU:
+				result = re.search('RU 0 is_slave=yes', str(line))
+				if result is not None:
+					isSlave = True
+				if isSlave:
+					result = re.search('Received RRU_frame_resynch command', str(line))
+					if result is not None:
+						slaveReceivesFrameResyncCmd = True
+			result = re.search('LTE_RRCConnectionSetupComplete from UE', str(line))
+			if result is not None:
+				rrcSetupComplete += 1
+			result = re.search('Generate LTE_RRCConnectionRelease|Generate RRCConnectionRelease', str(line))
+			if result is not None:
+				rrcReleaseRequest += 1
+			result = re.search('Generate LTE_RRCConnectionReconfiguration', str(line))
+			if result is not None:
+				rrcReconfigRequest += 1
+			result = re.search('LTE_RRCConnectionReconfigurationComplete from UE rnti', str(line))
+			if result is not None:
+				rrcReconfigComplete += 1
+			result = re.search('LTE_RRCConnectionReestablishmentRequest', str(line))
+			if result is not None:
+				rrcReestablishRequest += 1
+			result = re.search('LTE_RRCConnectionReestablishmentComplete', str(line))
+			if result is not None:
+				rrcReestablishComplete += 1
+			result = re.search('LTE_RRCConnectionReestablishmentReject', str(line))
+			if result is not None:
+				rrcReestablishReject += 1
+			result = re.search('CDRX configuration activated after RRC Connection', str(line))
+			if result is not None:
+				cdrxActivationMessageCount += 1
+			result = re.search('uci->stat', str(line))
+			if result is not None:
+				uciStatMsgCount += 1
+			result = re.search('PDCP.*Out of Resources.*reason', str(line))
+			if result is not None:
+				pdcpFailure += 1
+			result = re.search('could not wakeup gNB rxtx process', str(line))
+			if result is not None:
+				gnbRxTxWakeUpFailure += 1
+			result = re.search('ULSCH in error in round|ULSCH 0 in error', str(line))
+			if result is not None:
+				ulschFailure += 1
+			result = re.search('ULSCH received ok', str(line))
+			if result is not None:
+				ulschReceiveOK += 1
+			result = re.search('BAD all_segments_received', str(line))
+			if result is not None:
+				rlcDiscardBuffer += 1
+			result = re.search('Canceled RA procedure for UE rnti', str(line))
+			if result is not None:
+				rachCanceledProcedure += 1
+			result = re.search('dropping, not enough RBs', str(line))
+			if result is not None:
+				dropNotEnoughRBs += 1
+			if self.eNBmbmsEnables[int(self.eNB_instance)]:
+				result = re.search('MBMS USER-PLANE.*Requesting.*bytes from RLC', str(line))
+				if result is not None:
+					mbmsRequestMsg += 1
+		enb_log_file.close()
+		logging.debug('   File analysis completed')
+		if self.air_interface == 'lte':
+			nodeB_prefix = 'e'
+		else:
+			nodeB_prefix = 'g'
+		if self.air_interface == 'nr':
+			if ulschReceiveOK > 0:
+				statMsg = nodeB_prefix + 'NB showed ' + str(ulschReceiveOK) + ' "ULSCH received ok" message(s)'
+				logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m')
+				htmleNBFailureMsg += statMsg + '\n'
+			if gnbRxTxWakeUpFailure > 0:
+				statMsg = nodeB_prefix + 'NB showed ' + str(gnbRxTxWakeUpFailure) + ' "could not wakeup gNB rxtx process" message(s)'
+				logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m')
+				htmleNBFailureMsg += statMsg + '\n'
+		if uciStatMsgCount > 0:
+			statMsg = nodeB_prefix + 'NB showed ' + str(uciStatMsgCount) + ' "uci->stat" message(s)'
+			logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m')
+			htmleNBFailureMsg += statMsg + '\n'
+		if pdcpFailure > 0:
+			statMsg = nodeB_prefix + 'NB showed ' + str(pdcpFailure) + ' "PDCP Out of Resources" message(s)'
+			logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m')
+			htmleNBFailureMsg += statMsg + '\n'
+		if ulschFailure > 0:
+			statMsg = nodeB_prefix + 'NB showed ' + str(ulschFailure) + ' "ULSCH in error in round" message(s)'
+			logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m')
+			htmleNBFailureMsg += statMsg + '\n'
+		if dropNotEnoughRBs > 0:
+			statMsg = 'eNB showed ' + str(dropNotEnoughRBs) + ' "dropping, not enough RBs" message(s)'
+			logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m')
+			htmleNBFailureMsg += statMsg + '\n'
+		if rrcSetupComplete > 0:
+			rrcMsg = nodeB_prefix + 'NB completed ' + str(rrcSetupComplete) + ' RRC Connection Setup(s)'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+			rrcMsg = ' -- ' + str(rrcSetupComplete) + ' were completed'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+		if rrcReleaseRequest > 0:
+			rrcMsg = nodeB_prefix + 'NB requested ' + str(rrcReleaseRequest) + ' RRC Connection Release(s)'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+		if rrcReconfigRequest > 0 or rrcReconfigComplete > 0:
+			rrcMsg = nodeB_prefix + 'NB requested ' + str(rrcReconfigRequest) + ' RRC Connection Reconfiguration(s)'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+			rrcMsg = ' -- ' + str(rrcReconfigComplete) + ' were completed'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+		if rrcReestablishRequest > 0 or rrcReestablishComplete > 0 or rrcReestablishReject > 0:
+			rrcMsg = nodeB_prefix + 'NB requested ' + str(rrcReestablishRequest) + ' RRC Connection Reestablishment(s)'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+			rrcMsg = ' -- ' + str(rrcReestablishComplete) + ' were completed'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+			rrcMsg = ' -- ' + str(rrcReestablishReject) + ' were rejected'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+		if self.eNBmbmsEnables[int(self.eNB_instance)]:
+			if mbmsRequestMsg > 0:
+				rrcMsg = 'eNB requested ' + str(mbmsRequestMsg) + ' times the RLC for MBMS USER-PLANE'
+				logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+				htmleNBFailureMsg += rrcMsg + '\n'
+		if X2HO_inNbProcedures > 0:
+			rrcMsg = 'eNB completed ' + str(X2HO_inNbProcedures) + ' X2 Handover Connection procedure(s)'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+		if X2HO_outNbProcedures > 0:
+			rrcMsg = 'eNB completed ' + str(X2HO_outNbProcedures) + ' X2 Handover Release procedure(s)'
+			logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rrcMsg + '\n'
+		if self.eNBOptions[int(self.eNB_instance)] != '':
+			res1 = re.search('drx_Config_present prSetup', self.eNBOptions[int(self.eNB_instance)])
+			if res1 is not None:
+				if cdrxActivationMessageCount > 0:
+					rrcMsg = 'eNB activated the CDRX Configuration for ' + str(cdrxActivationMessageCount) + ' time(s)'
+					logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m')
+					htmleNBFailureMsg += rrcMsg + '\n'
+				else:
+					rrcMsg = 'eNB did NOT ACTIVATE the CDRX Configuration'
+					logging.debug('\u001B[1;37;43m ' + rrcMsg + ' \u001B[0m')
+					htmleNBFailureMsg += rrcMsg + '\n'
+		if rachCanceledProcedure > 0:
+			rachMsg = nodeB_prefix + 'NB cancelled ' + str(rachCanceledProcedure) + ' RA procedure(s)'
+			logging.debug('\u001B[1;30;43m ' + rachMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rachMsg + '\n'
+		if isRRU:
+			if isSlave:
+				if slaveReceivesFrameResyncCmd:
+					rruMsg = 'Slave RRU received the RRU_frame_resynch command from RAU'
+					logging.debug('\u001B[1;30;43m ' + rruMsg + ' \u001B[0m')
+					htmleNBFailureMsg += rruMsg + '\n'
+				else:
+					rruMsg = 'Slave RRU DID NOT receive the RRU_frame_resynch command from RAU'
+					logging.debug('\u001B[1;37;41m ' + rruMsg + ' \u001B[0m')
+					htmleNBFailureMsg += rruMsg + '\n'
+					self.prematureExit(True)
+					return CONST.ENB_PROCESS_SLAVE_RRU_NOT_SYNCED
+		if foundSegFault:
+			logging.debug('\u001B[1;37;41m ' + nodeB_prefix + 'NB ended with a Segmentation Fault! \u001B[0m')
+			if self.htmlObj is not None:
+				self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg)
+			return CONST.ENB_PROCESS_SEG_FAULT
+		if foundAssertion:
+			logging.debug('\u001B[1;37;41m ' + nodeB_prefix + 'NB ended with an assertion! \u001B[0m')
+			htmleNBFailureMsg += msgAssertion
+			if self.htmlObj is not None:
+				self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg)
+			return CONST.ENB_PROCESS_ASSERTION
+		if foundRealTimeIssue:
+			logging.debug('\u001B[1;37;41m ' + nodeB_prefix + 'NB faced real time issues! \u001B[0m')
+			htmleNBFailureMsg += nodeB_prefix + 'NB faced real time issues!\n'
+			#return CONST.ENB_PROCESS_REALTIME_ISSUE
+		if rlcDiscardBuffer > 0:
+			rlcMsg = nodeB_prefix + 'NB RLC discarded ' + str(rlcDiscardBuffer) + ' buffer(s)'
+			logging.debug('\u001B[1;37;41m ' + rlcMsg + ' \u001B[0m')
+			htmleNBFailureMsg += rlcMsg + '\n'
+			if self.htmlObj is not None:
+				self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg)
+			return CONST.ENB_PROCESS_REALTIME_ISSUE
+		if self.htmlObj is not None:
+			self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg)
+		return 0
diff --git a/ci-scripts/sshconnection.py b/ci-scripts/sshconnection.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba0f900f2940482589e3e9711c942031af88cd9a
--- /dev/null
+++ b/ci-scripts/sshconnection.py
@@ -0,0 +1,221 @@
+#/*
+# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The OpenAirInterface Software Alliance licenses this file to You under
+# * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+# * except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# *      http://www.openairinterface.org/?page_id=698
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# *-------------------------------------------------------------------------------
+# * For more information about the OpenAirInterface (OAI) Software Alliance:
+# *      contact@openairinterface.org
+# */
+#---------------------------------------------------------------------
+# Python for CI of OAI-eNB + COTS-UE
+#
+#   Required Python Version
+#     Python 3.x
+#
+#   Required Python Package
+#     pexpect
+#---------------------------------------------------------------------
+
+#-----------------------------------------------------------
+# Import
+#-----------------------------------------------------------
+import pexpect          # pexpect
+import logging
+import time             # sleep
+import re
+import sys
+
+#-----------------------------------------------------------
+# Class Declaration
+#-----------------------------------------------------------
+class SSHConnection():
+	def __init__(self):
+		self.ssh = ''
+		self.picocom_closure = False
+
+	def disablePicocomClosure(self):
+		self.picocom_closure = False
+
+	def enablePicocomClosure(self):
+		self.picocom_closure = True
+
+	def open(self, ipaddress, username, password):
+		extraSshOptions = ''
+		count = 0
+		connect_status = False
+		if ipaddress == '192.168.18.197':
+			extraSshOptions = ' -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'
+		while count < 4:
+			self.ssh = pexpect.spawn('ssh', [username + '@' + ipaddress + extraSshOptions], timeout = 5)
+			self.sshresponse = self.ssh.expect(['Are you sure you want to continue connecting (yes/no)?', 'password:', 'Last login', pexpect.EOF, pexpect.TIMEOUT])
+			if self.sshresponse == 0:
+				self.ssh.sendline('yes')
+				self.sshresponse = self.ssh.expect(['password:', username + '@'])
+				if self.sshresponse == 0:
+					self.ssh.sendline(password)
+				self.sshresponse = self.ssh.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+				if self.sshresponse == 0:
+					count = 10
+					connect_status = True
+				else:
+					logging.debug('self.sshresponse = ' + str(self.sshresponse))
+			elif self.sshresponse == 1:
+				self.ssh.sendline(password)
+				self.sshresponse = self.ssh.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+				if self.sshresponse == 0:
+					count = 10
+					connect_status = True
+				else:
+					logging.debug('self.sshresponse = ' + str(self.sshresponse))
+			elif self.sshresponse == 2:
+				# Checking if we are really on the remote client defined by its IP address
+				self.command('stdbuf -o0 ifconfig | egrep --color=never "inet addr:|inet "', '\$', 5)
+				result = re.search(str(ipaddress), str(self.ssh.before))
+				if result is None:
+					self.close()
+				else:
+					count = 10
+					connect_status = True
+			else:
+				# debug output
+				logging.debug(str(self.ssh.before))
+				logging.debug('self.sshresponse = ' + str(self.sshresponse))
+			# adding a tempo when failure
+			if not connect_status:
+				time.sleep(1)
+			count += 1
+		if connect_status:
+			pass
+		else:
+			sys.exit('SSH Connection Failed')
+
+	def command(self, commandline, expectedline, timeout):
+		logging.debug(commandline)
+		self.ssh.timeout = timeout
+		self.ssh.sendline(commandline)
+		self.sshresponse = self.ssh.expect([expectedline, pexpect.EOF, pexpect.TIMEOUT])
+		if self.sshresponse == 0:
+			return 0
+		elif self.sshresponse == 1:
+			logging.debug('\u001B[1;37;41m Unexpected EOF \u001B[0m')
+			logging.debug('Expected Line : ' + expectedline)
+			logging.debug(str(self.ssh.before))
+			sys.exit(self.sshresponse)
+		elif self.sshresponse == 2:
+			logging.debug('\u001B[1;37;41m Unexpected TIMEOUT \u001B[0m')
+			logging.debug('Expected Line : ' + expectedline)
+			result = re.search('ping |iperf |picocom', str(commandline))
+			if result is None:
+				logging.debug(str(self.ssh.before))
+				sys.exit(self.sshresponse)
+			else:
+				return -1
+		else:
+			logging.debug('\u001B[1;37;41m Unexpected Others \u001B[0m')
+			logging.debug('Expected Line : ' + expectedline)
+			sys.exit(self.sshresponse)
+
+	def close(self):
+		self.ssh.timeout = 5
+		self.ssh.sendline('exit')
+		self.sshresponse = self.ssh.expect([pexpect.EOF, pexpect.TIMEOUT])
+		if self.sshresponse == 0:
+			pass
+		elif self.sshresponse == 1:
+			if not self.picocom_closure:
+				logging.debug('\u001B[1;37;41m Unexpected TIMEOUT during closing\u001B[0m')
+		else:
+			logging.debug('\u001B[1;37;41m Unexpected Others during closing\u001B[0m')
+
+	def copyin(self, ipaddress, username, password, source, destination):
+		count = 0
+		copy_status = False
+		logging.debug('scp '+ username + '@' + ipaddress + ':' + source + ' ' + destination)
+		while count < 10:
+			scp_spawn = pexpect.spawn('scp '+ username + '@' + ipaddress + ':' + source + ' ' + destination, timeout = 100)
+			scp_response = scp_spawn.expect(['Are you sure you want to continue connecting (yes/no)?', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+			if scp_response == 0:
+				scp_spawn.sendline('yes')
+				scp_spawn.expect('password:')
+				scp_spawn.sendline(password)
+				scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+				if scp_response == 0:
+					count = 10
+					copy_status = True
+				else:
+					logging.debug('1 - scp_response = ' + str(scp_response))
+			elif scp_response == 1:
+				scp_spawn.sendline(password)
+				scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+				if scp_response == 0 or scp_response == 3:
+					count = 10
+					copy_status = True
+				else:
+					logging.debug('2 - scp_response = ' + str(scp_response))
+			elif scp_response == 2:
+				count = 10
+				copy_status = True
+			else:
+				logging.debug('3 - scp_response = ' + str(scp_response))
+			# adding a tempo when failure
+			if not copy_status:
+				time.sleep(1)
+			count += 1
+		if copy_status:
+			return 0
+		else:
+			return -1
+
+	def copyout(self, ipaddress, username, password, source, destination):
+		count = 0
+		copy_status = False
+		logging.debug('scp ' + source + ' ' + username + '@' + ipaddress + ':' + destination)
+		while count < 4:
+			scp_spawn = pexpect.spawn('scp ' + source + ' ' + username + '@' + ipaddress + ':' + destination, timeout = 100)
+			scp_response = scp_spawn.expect(['Are you sure you want to continue connecting (yes/no)?', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+			if scp_response == 0:
+				scp_spawn.sendline('yes')
+				scp_spawn.expect('password:')
+				scp_spawn.sendline(password)
+				scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+				if scp_response == 0:
+					count = 10
+					copy_status = True
+				else:
+					logging.debug('1 - scp_response = ' + str(scp_response))
+			elif scp_response == 1:
+				scp_spawn.sendline(password)
+				scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT])
+				if scp_response == 0 or scp_response == 3:
+					count = 10
+					copy_status = True
+				else:
+					logging.debug('2 - scp_response = ' + str(scp_response))
+			elif scp_response == 2:
+				count = 10
+				copy_status = True
+			else:
+				logging.debug('3 - scp_response = ' + str(scp_response))
+			# adding a tempo when failure
+			if not copy_status:
+				time.sleep(1)
+			count += 1
+		if copy_status:
+			pass
+		else:
+			sys.exit('SCP failed')
+
+	def getBefore(self):
+		return str(self.ssh.before)
diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt
index bb8f9024f95e0ef1b227361564bde8fa04a70c42..dcd063ff5c115617d9f8c98d85732b5d2835a86e 100644
--- a/cmake_targets/CMakeLists.txt
+++ b/cmake_targets/CMakeLists.txt
@@ -2825,6 +2825,7 @@ target_link_libraries(smallblocktest
   m pthread ${ATLAS_LIBRARIES} dl
   )
 
+<<<<<<< HEAD
 ###################################################
 # For CUDA library 
 ###################################################
@@ -2833,10 +2834,15 @@ CUDA_ADD_LIBRARY(LDPC_CU
   )
 CUDA_ADD_CUFFT_TO_TARGET(LDPC_CU)
 cuda_add_executable(ldpctest
+=======
+add_executable(ldpctest  
+  ${PHY_NR_CODINGIF}
+>>>>>>> origin/develop
   ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c
   ${T_SOURCE}
   ${SHLIB_LOADER_SOURCES}
   )
+<<<<<<< HEAD
 
 target_link_libraries(ldpctest -ldl
   -Wl,--start-group 
@@ -2851,6 +2857,8 @@ target_link_libraries(ldpctest -ldl
   # ${T_SOURCE}
   # ${SHLIB_LOADER_SOURCES}
   # )
+=======
+>>>>>>> origin/develop
 add_dependencies( ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc ) 
 
 target_link_libraries(ldpctest
diff --git a/common/utils/T/tracer/hacks/pilot_timeplot.sh b/common/utils/T/tracer/hacks/pilot_timeplot.sh
new file mode 100755
index 0000000000000000000000000000000000000000..0d9c4694a627e97bd4d9570de6b21627893971a4
--- /dev/null
+++ b/common/utils/T/tracer/hacks/pilot_timeplot.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# use UP and DOWN arrow keys to scroll the view displayed by timeplot
+
+while read -n 1 key
+do
+  case "$key" in
+  'B' )
+    kill -SIGUSR1 `ps aux|grep timeplot|grep -v grep|grep -v sh|tr -s ' ' :|cut -f 2 -d :`
+  ;;
+  'A' )
+    kill -SIGUSR2 `ps aux|grep timeplot|grep -v grep|grep -v sh|tr -s ' ' :|cut -f 2 -d :`
+  ;;
+  esac
+done
diff --git a/doc/L2NFAPI_NOS1.md b/doc/L2NFAPI_NOS1.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb4c32ec084bf2258657bfa0bb96d01253825807
--- /dev/null
+++ b/doc/L2NFAPI_NOS1.md
@@ -0,0 +1,349 @@
+<table style="border-collapse: collapse; border: none;">
+  <tr style="border-collapse: collapse; border: none;">
+    <td style="border-collapse: collapse; border: none;">
+      <a href="http://www.openairinterface.org/">
+         <img src="./images/oai_final_logo.png" alt="" border=3 height=50 width=150>
+         </img>
+      </a>
+    </td>
+    <td style="border-collapse: collapse; border: none; vertical-align: center;">
+      <b><font size = "5">L2 nFAPI Simulator (no S1 Mode / 2-host deployment)</font></b>
+    </td>
+  </tr>
+</table>
+
+## Table of Contents ##
+
+1.   [Environment](#1-environment)
+2.   [Retrieve the OAI eNB-UE source code](#2-retrieve-the-oai-enb-ue-source-code)
+3.   [Setup of the USIM information in UE folder](#3-setup-of-the-usim-information-in-ue-folder)
+4.   [Setup of the Configuration files](#4-setup-of-the-configuration-files)
+     1.   [The eNB Configuration file](#41-the-enb-configuration-file)
+     2.   [The UE Configuration file](#42-the-ue-configuration-file)
+5.   [Build OAI UE and eNodeB](#5-build-oai-ue-and-enodeb)
+6.   [Start the eNB](#6-start-the-enb)
+7.   [Start the UE](#7-start-the-ue)
+8.   [Test with ping](#8-test-with-ping)
+9.   [Limitations](#9-limitations)
+
+# 1. Environment #
+
+You may not have access to an EPC or you don't want to hassle to deploy one.
+
+2 servers are used in this deployment. You can use Virtual Machines instead of each server; like it is done in the CI process.
+
+*  Machine B contains the OAI eNB executable (`lte-softmodem`)
+*  Machine C contains the OAI UE(s) executable (`lte-uesoftmodem`)
+
+Example of L2 nFAPI Simulator testing environment:
+
+<img src="./images/L2-sim-noS1-2-host-deployment.png" alt="" border=3>
+
+Note that the IP addresses are indicative and need to be adapted to your environment.
+
+# 2. Retrieve the OAI eNB-UE source code #
+
+At the time of writing, the tag used in the `develop` branch to do this documentation was `2020.w16`.
+
+The tutorial should be valid for the `master` branch tags such as `v1.2.0` or `v1.2.1`. But you may face issues that could be fixed in newer `develop` tags.
+
+Please try to use the same commit ID on both eNB/UE hosts.
+
+```bash
+$ ssh sudousername@machineB
+git clone https://gitlab.eurecom.fr/oai/openairinterface5g.git enb_folder
+cd enb_folder
+git checkout develop
+```
+
+```bash
+$ ssh sudousername@machineC
+git clone https://gitlab.eurecom.fr/oai/openairinterface5g.git ue_folder
+cd ue_folder
+git checkout develop
+```
+
+# 3. Setup of the USIM information in UE folder #
+
+```bash
+$ ssh sudousername@machineC
+cd ue_folder
+# Edit openair3/NAS/TOOLS/ue_eurecom_test_sfr.conf with your preferred editor
+```
+
+Edit the USIM information within this file in order to match the HSS database. They **HAVE TO** match:
+
+*  PLMN+MSIN and IMSI of users table of HSS database **SHALL** be the same.
+*  OPC of this file and OPC of users table of HSS database **SHALL** be the same.
+*  USIM_API_K of this file and the key of users table of HSS database **SHALL** be the same.
+
+When testing multiple UEs, it is necessary to add other UEs information like described below for 2 Users. Only UE0 (first UE) information is written in the original file.
+
+```
+UE0:
+{
+    USER: {
+        IMEI="356113022094149";
+        MANUFACTURER="EURECOM";
+        MODEL="LTE Android PC";
+        PIN="0000";
+    };
+
+    SIM: {
+        MSIN="0000000001";  // <-- Modify here
+        USIM_API_K="8baf473f2f8fd09487cccbd7097c6862";
+        OPC="e734f8734007d6c5ce7a0508809e7e9c";
+        MSISDN="33611123456";
+    };
+...
+};
+// Copy the UE0 and edit
+UE1: // <- Edit here
+{
+    USER: {
+        IMEI="356113022094149";
+        MANUFACTURER="EURECOM";
+        MODEL="LTE Android PC";
+        PIN="0000";
+    };
+
+    SIM: {
+        MSIN="0000000002";  // <-- Modify here
+        USIM_API_K="8baf473f2f8fd09487cccbd7097c6862";
+        OPC="e734f8734007d6c5ce7a0508809e7e9c";
+        MSISDN="33611123456";
+    };
+...
+};
+```
+
+You can repeat the operation for as many users you want to test with.
+
+# 4. Setup of the Configuration files #
+
+**CAUTION: both proposed configuration files resides in the ci-scripts realm. You can copy them but you CANNOT push any modification on these 2 files as part of an MR without informing the CI team.**
+
+## 4.1. The eNB Configuration file ##
+
+```bash
+$ ssh sudousername@machineB
+cd enb_folder
+# Edit ci-scripts/conf_files/rcc.band7.tm1.nfapi.conf with your preferred editor
+```
+
+First verify the nFAPI interface setup on the physical ethernet interface of machineB and put the proper IP addresses for both hosts.
+
+```
+MACRLCs = (
+        {
+        num_cc = 1;
+        local_s_if_name  = "ens3";             // <-- HERE
+        remote_s_address = "192.168.122.169";  // <-- HERE
+        local_s_address  = "192.168.122.31";   // <-- HERE
+        local_s_portc    = 50001;
+        remote_s_portc   = 50000;
+        local_s_portd    = 50011;
+        remote_s_portd   = 50010;
+        tr_s_preference = "nfapi";
+        tr_n_preference = "local_RRC";
+        }
+);
+```
+
+If you are testing more than 16 UEs, a proper setting on the RUs is necessary. **Note that this part is NOT present in the original configuration file**.
+
+```
+RUs = (
+    {
+       local_rf       = "yes"
+         nb_tx          = 1
+         nb_rx          = 1
+         att_tx         = 20
+         att_rx         = 0;
+         bands          = [38];
+         max_pdschReferenceSignalPower = -23;
+         max_rxgain                    = 116;
+         eNB_instances  = [0];
+    }
+);
+```
+
+Last, the S1 interface shall be properly set.
+
+```
+    ////////// MME parameters:
+    mme_ip_address      = ( { ipv4       = "CI_MME_IP_ADDR"; // replace with 192.168.122.195
+                              ipv6       = "192:168:30::17";
+                              active     = "yes";
+                              preference = "ipv4";
+                            }
+                          );
+
+    NETWORK_INTERFACES :
+    {
+        ENB_INTERFACE_NAME_FOR_S1_MME            = "ens3";            // replace with the proper interface name
+        ENB_IPV4_ADDRESS_FOR_S1_MME              = "CI_ENB_IP_ADDR";  // replace with 192.168.122.31
+        ENB_INTERFACE_NAME_FOR_S1U               = "ens3";            // replace with the proper interface name
+        ENB_IPV4_ADDRESS_FOR_S1U                 = "CI_ENB_IP_ADDR";  // replace with 192.168.122.31
+        ENB_PORT_FOR_S1U                         = 2152; # Spec 2152
+        ENB_IPV4_ADDRESS_FOR_X2C                 = "CI_ENB_IP_ADDR";  // replace with 192.168.122.31
+        ENB_PORT_FOR_X2C                         = 36422; # Spec 36422
+
+    };
+```
+
+## 4.2. The UE Configuration file ##
+
+```bash
+$ ssh sudousername@machineB
+cd ue_folder
+# Edit ci-scripts/conf_files/ue.nfapi.conf with your preferred editor
+```
+
+Verify the nFAPI interface setup on the loopback interface.
+
+```
+L1s = (
+        {
+        num_cc = 1;
+        tr_n_preference = "nfapi";
+        local_n_if_name  = "ens3";            // <- HERE
+        remote_n_address = "192.168.122.31";  // <- HERE
+        local_n_address  = "192.168.122.169"; // <- HERE
+        local_n_portc    = 50000;
+        remote_n_portc   = 50001;
+        local_n_portd    = 50010;
+        remote_n_portd   = 50011;
+        }
+);
+```
+
+# 5. Build OAI UE and eNodeB #
+
+See [Build documentation](./BUILD.md).
+
+# 6. Start the eNB #
+
+In the first terminal (the one you used to build the eNB):
+
+```bash
+$ ssh sudousername@machineB
+cd enb_folder/cmake_targets
+sudo -E ./ran_build/build/lte-softmodem -O ../ci-scripts/conf_files/rcc.band7.tm1.nfapi.conf --noS1 > enb.log 2>&1
+sleep 10
+ifconfig
+ens3      Link encap:Ethernet  HWaddr XX:XX:XX:XX:XX:XX
+          inet addr:192.168.122.31  Bcast:192.168.122.255  Mask:255.255.255.0
+....
+oaitun_enb1 Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
+          inet addr:10.0.1.1  P-t-P:192.172.0.2  Mask:255.255.255.0
+....
+```
+
+If you don't use redirection, you can test but many logs are printed on the console and this may affect performance of the L2-nFAPI simulator.
+
+We do recommend the redirection in steady mode once your setup is correct.
+
+# 7. Start the UE #
+
+In the second terminal (the one you used to build the UE):
+
+```bash
+$ ssh sudousername@machineC
+cd ue_folder/cmake_targets
+# Test 64 UEs, 1 thread in FDD mode
+sudo -E ./ran_build/build/lte-uesoftmodem -O ../ci-scripts/conf_files/ue.nfapi.conf --noS1 --L2-emul 3 --num-ues 64 --nums_ue_thread 1 --nokrnmod 1 > ue.log 2>&1
+# Test 64 UEs, 1 thread in TDD mode
+sudo -E ./ran_build/build/lte-uesoftmodem -O ../ci-scripts/conf_files/ue.nfapi.conf --noS1 --L2-emul 3 --num-ues 64 --nums_ue_thread 1 --nokrnmod 1 -T 1 > ue.log 2>&1
+# The "-T 1" option means TDD config
+```
+
+-   The number of UEs can set by using `--num-ues` option and the maximum UE number is 255 (with the `--mu*` options, otherwise 16).
+-   The number of threads can set with the `--nums-ue-thread`. This number **SHALL NOT** be greater than the number of UEs.
+    * At the time of writing, it seems to be enough to run on a single thread.
+-   The `--nokrnmod 1` option makes use of the preferred and supported tunnel interface.
+-   How many UE that can be tested depends on hardware (server , PC, etc) performance in your environment.
+
+For example, running with 4 UEs:
+
+```bash
+$ ssh sudousername@machineC
+cd ue_folder/cmake_targets
+sudo -E ./ran_build/build/lte-uesoftmodem -O ../ci-scripts/conf_files/ue.nfapi.conf --noS1 --L2-emul 3 --num-ues 64 --nums_ue_thread 1 --nokrnmod 1 > ue.log 2>&1
+sleep 10
+ifconfig
+ens3      Link encap:Ethernet  HWaddr XX:XX:XX:XX:XX:XX
+          inet addr:192.168.122.169  Bcast:192.168.122.255  Mask:255.255.255.0
+....
+oaitun_ue1 Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
+          inet addr:10.0.1.2  P-t-P:192.172.0.2  Mask:255.255.255.0
+....
+oaitun_ue2 Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
+          inet addr:10.0.1.3  P-t-P:192.172.0.3  Mask:255.255.255.0
+....
+oaitun_ue3 Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
+          inet addr:10.0.1.4  P-t-P:192.172.0.4  Mask:255.255.255.0
+....
+oaitun_ue4 Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
+          inet addr:10.0.1.5  P-t-P:192.172.0.5  Mask:255.255.255.0
+....
+oaitun_uem1 Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00
+          inet addr:10.0.2.2  P-t-P:10.0.2.2  Mask:255.255.255.0
+....
+....
+```
+
+Having the 4 oaitun_ue tunnel interfaces up and with an allocated address means the connection with EPC went alright.
+
+# 8. Test with ping #
+
+In a third terminal, after around 10 seconds, the UE(s) shall be connected to the eNB: Check with ifconfig
+
+```bash
+$ ssh sudousername@machineB
+# Ping UE1 IP address based on the EPC pool used: in this example:
+ping -I oaitun_enb1 -c 20 10.0.1.2
+# Ping UE4 IP address based on the EPC pool used: in this example:
+ping -I oaitun_enb1 -c 20 10.0.1.5
+```
+
+Ping from the UE side:
+
+```bash
+$ ssh sudousername@machineC
+ping -I oaitun_ue1 -c 20 10.0.1.1
+ping -I oaitun_ue3 -c 20 10.0.1.1
+```
+
+iperf operations can also be performed.
+
+DL traffic:
+
+```bash
+$ ssh sudousername@machineC
+iperf -B 10.0.1.2 -u -s -i 1 -fm -p 5002
+$ ssh sudousername@machineB
+iperf -c 10.0.1.2 -u -t 30 -b 3M -i 1 -fm -B 10.0.1.1 -p 5002
+```
+
+UL traffic:
+
+```bash
+$ ssh sudousername@machineB
+iperf -B 10.0.1.1 -u -s -i 1 -fm -p 5002
+$ ssh sudousername@machineC
+iperf -c 10.0.1.1 -u -t 30 -b 2M -i 1 -fm -B 10.0.1.2 -p 5002
+```
+
+# 9. Limitations #
+
+
+----
+
+[oai wiki home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home)
+
+[oai softmodem features](FEATURE_SET.md)
+
+[oai softmodem build procedure](BUILD.md)
+
+[L2 nfapi simulator](L2NFAPI.md)
diff --git a/doc/SystemX-tutorial-design.md b/doc/SystemX-tutorial-design.md
new file mode 100644
index 0000000000000000000000000000000000000000..3e4496f7de6c22811d99984777fde9008bc8e282
--- /dev/null
+++ b/doc/SystemX-tutorial-design.md
@@ -0,0 +1,336 @@
+# OpenAirInterface for SystemX
+
+# Terminology
+
+****This document use the 5G terminology****
+
+**Central Unit (CU):** It is a logical node that includes the gNB
+functions like Transfer of user data, Mobility control, Radio access
+network sharing, Positioning, Session Management etc., except those
+functions allocated exclusively to the DU. CU controls the operation of
+DUs over front-haul (Fs) interface. A central unit (CU) may also be
+known as BBU/REC/RCC/C-RAN/V-RAN/VNF
+
+**Distributed Unit (DU):** This logical node includes a subset of the
+gNB functions, depending on the functional split option. Its operation
+is controlled by the CU. Distributed Unit (DU) also known with other
+names like RRH/RRU/RE/RU/PNF.
+
+In OpenAir code, the terminology is often RU and BBU.
+
+# OpenAirUsage
+
+## EPC and general environment
+
+### OAI EPC
+
+Use the stable OAI EPC, that can run in one machine (VM or standalone)
+
+Draft description:
+<https://open-cells.com/index.php/2017/08/22/all-in-one-openairinterface-august-22nd/>
+
+## Standalone 4G
+
+EPC+eNB on one machine, the UE can be commercial or OAI UE.
+
+### USRP B210
+
+Main current issue: traffic is good only on coaxial link between UE and
+eNB (probably power management issue).
+
+### Simulated RF
+
+Running eNB+UE both OAI can be done over a virtual RF link.
+
+The UE current status is that threads synchronization is implicit in
+some cases. As the RF simulator is very quick, a “sleep()” is required
+in the UE main loop
+
+(line 1744, targets/RT/USER/lte-ue.c).
+
+Running also the UE in the same machine is possible with simulated RF.
+
+Running in same machine is simpler, offers about infinite speed for
+virtual RF samples transmission.
+
+A specific configuration is required because the EPC Sgi interface has
+the same IP tunnel end point as the UE.
+
+So, we have to create a network namespace for the UE and to route data
+in/out of the namespace.
+
+```bash
+ip netns delete aNameSpace 2&gt; /dev/null
+
+ip link delete v-eth1 2&gt; /dev/null
+
+ip netns add aNameSpace
+
+ip link add v-eth1 type veth peer name v-peer1
+
+ip link set v-peer1 netns aNameSpace
+
+ip addr add 10.200.1.1/24 dev v-eth1
+
+ip link set v-eth1 up
+
+iptables -t nat -A POSTROUTING -s 10.200.1.0/255.255.255.0 -o enp0s31f6 \
+-j MASQUERADE
+
+iptables -A FORWARD -i enp0s31f6 -o v-eth1 -j ACCEPT
+
+iptables -A FORWARD -o enp0s31f6 -i v-eth1 -j ACCEPT
+
+ip netns exec aNameSpace ip link set dev lo up
+
+ip netns exec aNameSpace ip addr add 10.200.1.2/24 dev v-peer1
+
+ip netns exec aNameSpace ip link set v-peer1 up
+
+ip netns exec aNameSpace bash
+```
+
+After the last command, the Linux shell is in the new namespace, ready
+to run the UE.
+
+To make user plan traffic, the traffic generator has to run in the same
+namespace
+
+```bash
+ip netns exec aNameSpace bash
+```
+
+The traffic genenrator has to specify the interface:
+
+```bash
+route add default oaitun_ue1
+```
+
+or specify the outgoing route in the traffic generator (like option “-I”
+in ping command).
+
+## Split 6 DL 4G
+
+The contract describes to reuse the uplink existing if4p5 and to develop
+is this work the downlink “functional split 6”.
+
+The customer required after signature to develop also the uplink
+functional split 6. This is accepted, as long as the whole work is
+research with no delivery completeness warranty.
+
+### Simulation
+
+To be able to verify the new features and to help in all future
+developments, Open Cells added and improved the Rf board simulator
+during this contract.
+
+We added the channel modeling simulation, that offer to simulate various
+3GPP defined channels.
+
+### Main loop
+
+The main log is in RF simulator is in
+
+ `targets/RT/USER/lte-ru.c and targets/RT/USER/lte-enb.c`
+
+As this piece of SW is very complex and doesn’t meet our goals
+(functional split 6), a cleaned version replaces these 2 files in
+executables/ocp-main.c (openair1/SCHED/prach\_procedures.c is also
+replaced by this new file as it only launching the RACH actual work in a
+way not compatible with our FS6).
+
+The main loop cadences the I/Q samples reception, signal processing and
+I/Q samples sending.
+
+The main loop uses extensively function pointers to call the right
+processing function depending on the split case.
+
+A lot of OAI reduntant global variables contains the same semantic data: time,frame, subframe.
+The reworked main loop take care of a uniq variable that comes directly from harware: RF board sampling number.
+
+To use OAI, we need to set all OAI variables that derivates from this timestamp value. The function setAllfromTS() implements this.
+
+### Splitted main level
+
+When FS6 is actived, a main loop for DU (du_fs6()) a main loop for CU case replaces the uniq eNB main loop.
+
+Each of these main loops calls initialization of OAI LTE data and the FS6 transport layer initialization.
+
+Then, it runs a infinite loop on: set time, call UL and DL. The time comes from the RF board, so the DU sends the time to the CU.
+
+This is enough for RF board dialog, but the FS6 is higher in SW layers,
+we need to cut higher functions inside downlink and uplink procedures.
+
+As much as possible, the FS6 code is in the directory OPENAIR_DIR/executables. When a given OAI piece of code is small or need complex changes, it is reworked in the file fs6-main.c. The functions naming keeps the OAI function name, adding suffix _fromsplit() or _tosplit().
+
+When this organization would lead to large code copy, it is better to insert modifications in OAI code. This is done in two files: 
+
+- openair1/SCHED/phy_procedures_lte_eNb.c: to send signaling channels computation results
+    - the function sendFs6Ulharq() centralizes all signaling channels forwarding to CU
+- openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c: to deal with FS6 user plane split
+    - sendFs6Ul() is used once to forward user plane to CU
+
+
+### DownLink
+
+The main procedure is phy\_procedures\_eNB\_TX()
+
+This is building the common channels (beacon, multi-UE signaling).
+
+The FS6 split breaks this function into pieces:
+
+*   The multi-UE signals, built by common\_signal\_procedures(),
+    subframe2harq\_pid(), generate\_dci\_top(), subframe2harq\_pid()
+    *   These functions run in the DU, nevertheless all  context has to be sent 
+    (it is also needed partially for UL spitting)
+    * Run in the DU also to meet the requirement of pushing
+        in DU the data encoded with large redundancy (&gt;3 redundancy)
+        
+*   the per UE data: pdsch\_procedures() needs further splitting:
+
+    *   dlsch\_encoding\_all() that makes the encoding: turbo code
+        and lte\_rate\_matching\_turbo() that will be in the DU (some
+        unlikely cases can reach redundancy up to x3, when MCS is very
+        low (negative SINR cases)).
+
+        *   dlsch\_encoding() output needs to be transmitted between the
+            DU and the CU for functional split 6.
+            * dlsch\_scrambling() that will go in the DU
+            * dlsch\_modulation() that will go in the DU
+   
+   The du user plane data is made of expanded bit in OAI at FS6 split level. 1 pair of functions compact back these bits into 8bits/byte before sending data and expand it again in the DU data reception (functions: fs6Dl(un)pack()).
+
+### Uplink
+
+The uplink require configuration that is part of the DL transmission.
+
+It interprets the signalling to extract the RACH and the per UE data
+channels.
+
+Ocp-main.c:rxtx() calls directly the entry procedure
+phy\_procedures\_eNB\_uespec\_RX() calls:
+
+*   rx\_ulsch() that demodulate and extract soft bits per UE.
+
+    *   This function runs in the DU
+    *   the output data will be processes in the DU, so it needs to be
+        transmitted to the DU
+*   ulsch\_decoding() that do lte\_rate\_matching\_turbo\_rx()
+    sub\_block\_deinterleaving\_turbo() 
+    then turbo decode that is in the CU
+*   fill\_ulsch\_cqi\_indication()  fill\_crc\_indication() , fill\_rx\_indication()
+          *   DU performs the signal processing of each channel data, prepare and sent to the CU the computed result
+
+* Random access channel detection runs in the DU
+      * the DU reports to the CU only the detected temprary identifier for RACH response
+
+
+### signaling data in each direction (UL and DL)
+
+
+*   each LTE channel needs to be propagated between CU and DU
+    * the simplest are the almost static data such as PSS/SSS, that need only static eNB parameters and primary information (frame numbering)
+    * all the other channels require data transmission CU to DU and DU to CU
+    * the general design push all the low level processing for these channels in the DU 
+    * the CU interface transports only signal processing results (UL) or configuration to create the RF signal (DL case)
+* HARQ is detected in the DU, then only the ACK or NACK is reported to CU
+
+* the CU have to control the power and MCS (modulation and coding scheme)
+    * the DU performs the signal processing and report only the decoded data like the CQI
+  * as the DU performas the modulation, scrambling and puncturing, each data packet is associated with the LTE parameters required for these features
+       * in DL, the CU associates the control parameters and the user plane data
+       * in UL, the CU sends upfront the scheduled UL data to the DU.  So, the DU have the required knowledge to decode the next subframes in time.
+
+### UDP transport layer
+
+A general UDP transport layer is in executables/transport\_split.c
+
+Linux offers a UDP socket builtin timeout, that we use.
+
+In and out buffers are memory zones that contains compacted
+(concatenated) UDP chunks.
+
+For output, sendSubFrame() sends each UDP chunk
+
+For input, receiveSubFrame() collects all UDP chunks for a group (a
+subframe in OAI LTE case). It returns in the following cases:
+
+-   all chunks are received
+-   a timeout expired
+-   a chunk from the next subframe already arrived
+
+### Functional split 6 usage
+
+The ocp cleaned main hale to be used: run ocp-softmodem instead of
+lte-softmodem.
+
+The functionality and parameters is the same, enhanced with FS6 mode.
+
+The end line option “--split73” enables the fs6 (also called split 7.3) mode and decided to be cu or du.
+
+Example:
+
+```bash
+./ocp-softmodem -O $OPENAIR_DIR/enb.fs6.example.conf --rfsim  --log_config.phy_log_level debug --split73 cu:127.0.0.1
+```
+
+Run the CU init of the split 6 eNB, that will call du on 127.0.0.1 address
+
+```bash
+./ocp-softmodem -O $OPENAIR_DIR/enb.fs6.example.conf --rfsim  --log_config.phy_log_level debug --split73 du:127.0.0.1
+```
+
+will run the du, calling the cu on 127.0.0.1
+
+If the CU and the DU are not on the same machine, the remote address of each side need to be specified as per this example
+
+```bash
+./ocp-softmodem -O $OPENAIR_DIR/enb.fs6.example.conf --rfsim  --log_config.phy_log_level debug --split73 du:192.168.1.55
+```
+
+runs the functional split 6 DU
+
+```bash
+./lte-uesoftmodem -C 2685000000 -r 50 --rfsim --rfsimulator.serveraddr 192.168.1.1 -d
+```
+
+Runs the UE (to have the UE signal scope, compile it with make uescope)
+
+CU+DU+UE can run with option `--noS1` to avoid to use a EPC and/or with `--rfsim` to simulate RF board
+
+
+## 5G and F1
+
+Today 5G achievement is limited to physical layer.
+
+The available modulation is 40MHz, that require one X310 or N300 for the
+gNB and a X310 or N300 for the nrUE.
+
+### Usage with X310
+
+Linux configuration:
+<https://files.ettus.com/manual/page_usrp_x3x0_config.html>
+
+We included most of this configuration included in OAI source code.
+
+Remain to set the NIC (network interface card) MTU to 9000 (jumbo
+frames).
+
+### Running 5G
+
+Usage with RFsimulator:
+
+**gNB**
+
+```bash
+sudo RFSIMULATOR=server ./nr-softmodem -O \
+../../../targets/PROJECTS/GENERIC-LTE-EPC/CONF/gnb.band78.tm1.106PRB.usrpn300.conf \
+--parallel-config PARALLEL\_SINGLE\_THREAD
+```
+
+**nrUE**
+
+```bash
+sudo RFSIMULATOR=127.0.0.1 ./nr-uesoftmodem --numerology 1 -r 106 -C \
+3510000000 -d
+```
diff --git a/doc/images/L2-sim-S1-3-host-deployment.png b/doc/images/L2-sim-S1-3-host-deployment.png
new file mode 100644
index 0000000000000000000000000000000000000000..4049ebd7a8ace60df576a988f32442526af7f13e
Binary files /dev/null and b/doc/images/L2-sim-S1-3-host-deployment.png differ
diff --git a/doc/images/L2-sim-noS1-2-host-deployment.png b/doc/images/L2-sim-noS1-2-host-deployment.png
new file mode 100644
index 0000000000000000000000000000000000000000..02d29b7b3cdfb05dc912548e001fb11aa1d96fc8
Binary files /dev/null and b/doc/images/L2-sim-noS1-2-host-deployment.png differ
diff --git a/executables/main-fs6.c b/executables/main-fs6.c
new file mode 100644
index 0000000000000000000000000000000000000000..ffa44bd978758cc13923499970ff8743e938543f
--- /dev/null
+++ b/executables/main-fs6.c
@@ -0,0 +1,1608 @@
+/*
+* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The OpenAirInterface Software Alliance licenses this file to You under
+* the OAI Public License, Version 1.1  (the "License"); you may not use this file
+* except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.openairinterface.org/?page_id=698
+*
+* Author and copyright: Laurent Thomas, open-cells.com
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*-------------------------------------------------------------------------------
+* For more information about the OpenAirInterface (OAI) Software Alliance:
+*      contact@openairinterface.org
+*/
+
+#include <stdint.h>
+#include <common/utils/LOG/log.h>
+#include <common/utils/system.h>
+#include <common/config/config_userapi.h>
+#include <targets/RT/USER/lte-softmodem.h>
+#include <openair1/PHY/defs_eNB.h>
+#include <openair1/PHY/phy_extern.h>
+#include <nfapi/oai_integration/vendor_ext.h>
+#include <openair1/SCHED/fapi_l1.h>
+#include <openair1/PHY/INIT/phy_init.h>
+#include <openair2/LAYER2/MAC/mac_extern.h>
+#include <openair1/PHY/LTE_REFSIG/lte_refsig.h>
+#include <nfapi/oai_integration/nfapi_pnf.h>
+#include <executables/split_headers.h>
+#include <nfapi/oai_integration/vendor_ext.h>
+#include <openair1/PHY/INIT/lte_init.c>
+#include <openair1/PHY/LTE_ESTIMATION/lte_estimation.h>
+#include <executables/split_headers.h>
+#include <openair1/PHY/CODING/coding_extern.h>
+#include <threadPool/thread-pool.h>
+#include <emmintrin.h>
+
+#define FS6_BUF_SIZE 1000*1000
+static UDPsock_t sockFS6;
+
+int sum(uint8_t *b, int s) {
+  int sum=0;
+
+  for (int i=0; i < s; i++)
+    sum+=b[i];
+
+  return sum;
+}
+
+static inline int cmpintRev(const void *a, const void *b) {
+  uint64_t *aa=(uint64_t *)a;
+  uint64_t *bb=(uint64_t *)b;
+  return (int)(*bb-*aa);
+}
+
+static inline void printMeas2(char *txt, Meas *M, int period, bool MaxMin) {
+  if (M->iterations%period == 0 ) {
+    char txt2[512];
+    sprintf(txt2,"%s avg=%" PRIu64 " iterations=%" PRIu64 " %s=%"
+            PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 "\n",
+            txt,
+            M->sum/M->iterations,
+            M->iterations,
+            MaxMin?"max":"min",
+            M->maxArray[1],M->maxArray[2], M->maxArray[3],M->maxArray[4], M->maxArray[5],
+            M->maxArray[6],M->maxArray[7], M->maxArray[8],M->maxArray[9],M->maxArray[10]);
+#if T_TRACER
+    LOG_W(PHY,"%s",txt2);
+#else
+    printf("%s",txt2);
+#endif
+  }
+}
+
+static inline void updateTimesReset(uint64_t start, Meas *M, int period, bool MaxMin, char *txt) {
+  if (start!=0) {
+    uint64_t end=rdtsc();
+    long long diff=(end-start)/(cpuf*1000);
+    M->maxArray[0]=diff;
+    M->sum+=diff;
+    M->iterations++;
+
+    if ( MaxMin)
+      qsort(M->maxArray, 11, sizeof(uint64_t), cmpint);
+    else
+      qsort(M->maxArray, 11, sizeof(uint64_t), cmpintRev);
+
+    printMeas2(txt,M,period, MaxMin);
+
+    if (M->iterations%period == 0 ) {
+      bzero(M,sizeof(*M));
+
+      if (!MaxMin)
+        for (int i=0; i<11; i++)
+          M->maxArray[i]=INT_MAX;
+    }
+  }
+}
+
+static inline void measTransportTime(uint64_t DuSend, uint64_t CuMicroSec, Meas *M, int period, bool MaxMin, char *txt) {
+  if (DuSend!=0) {
+    uint64_t end=rdtsc();
+    long long diff=(end-DuSend)/(cpuf*1000)-CuMicroSec;
+    M->maxArray[0]=diff;
+    M->sum+=diff;
+    M->iterations++;
+
+    if ( MaxMin)
+      qsort(M->maxArray, 11, sizeof(uint64_t), cmpint);
+    else
+      qsort(M->maxArray, 11, sizeof(uint64_t), cmpintRev);
+
+    printMeas2(txt,M,period, MaxMin);
+
+    if (M->iterations%period == 0 ) {
+      bzero(M,sizeof(*M));
+
+      if (!MaxMin)
+        for (int i=0; i<11; i++)
+          M->maxArray[i]=INT_MAX;
+    }
+  }
+}
+
+#define ceil16_bytes(a) ((((a+15)/16)*16)/8)
+
+static void fs6Dlunpack(void *out, void *in, int szUnpacked) {
+  static uint64_t *lut=NULL;
+
+  if (!lut) {
+    lut=(uint64_t *) malloc(sizeof(*lut)*256);
+
+    for (int i=0; i <256; i++)
+      for (int j=0; j<8; j++)
+        ((uint8_t *)(lut+i))[7-j]=(i>>j)&1;
+  }
+
+  int64_t *out_64 = (int64_t *)out;
+  int sz=ceil16_bytes(szUnpacked);
+
+  for (int i=0; i<sz; i++)
+    out_64[i]=lut[((uint8_t *)in)[i]];
+
+  return;
+}
+
+
+static void fs6Dlpack(void *out, void *in, int szUnpacked) {
+  __m128i zeros=_mm_set1_epi8(0);
+  __m128i shuffle=_mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
+  const int loop=ceil16_bytes(szUnpacked)/sizeof(uint16_t);
+  __m128i *iter=(__m128i *)in;
+
+  for (int i=0; i < loop; i++) {
+    __m128i tmp=_mm_shuffle_epi8(_mm_cmpgt_epi8(*iter++,zeros),shuffle);
+    ((uint16_t *)out)[i]=(uint16_t)_mm_movemask_epi8(tmp);
+  }
+}
+
+void prach_eNB_tosplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc) {
+  fs6_ul_t *header=(fs6_ul_t *) commonUDPdata(bufferZone);
+
+  if (is_prach_subframe(&eNB->frame_parms, proc->frame_prach,proc->subframe_prach)<=0)
+    return;
+
+  RU_t *ru;
+  int aa=0;
+  int ru_aa;
+
+  for (int i=0; i<eNB->num_RU; i++) {
+    ru=eNB->RU_list[i];
+
+    for (ru_aa=0,aa=0; ru_aa<ru->nb_rx; ru_aa++,aa++) {
+      eNB->prach_vars.rxsigF[0][aa] = eNB->RU_list[i]->prach_rxsigF[ru_aa];
+      int ce_level;
+
+      for (ce_level=0; ce_level<4; ce_level++)
+        eNB->prach_vars_br.rxsigF[ce_level][aa] = eNB->RU_list[i]->prach_rxsigF_br[ce_level][ru_aa];
+    }
+  }
+
+  ocp_rx_prach(eNB,
+               proc,
+               eNB->RU_list[0],
+               header->max_preamble,
+               header->max_preamble_energy,
+               header->max_preamble_delay,
+               header->avg_preamble_energy,
+               proc->frame_prach,
+               0,
+               false
+              );
+  // run PRACH detection for CE-level 0 only for now when br_flag is set
+  /* fixme: seems not operational and may overwrite regular LTE prach detection
+   * OAI code can call is sequence
+  rx_prach(eNB,
+           eNB->RU_list[0],
+           header->max_preamble,
+           header->max_preamble_energy,
+           header->max_preamble_delay,
+           header->avg_preamble_energy,
+           frame,
+           0,
+           true
+          );
+  */
+  LOG_D(PHY,"RACH detection index 0: max preamble: %u, energy: %u, delay: %u, avg energy: %u\n",
+        header->max_preamble[0],
+        header->max_preamble_energy[0],
+        header->max_preamble_delay[0],
+        header->avg_preamble_energy[0]
+       );
+  return;
+}
+
+void prach_eNB_fromsplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc) {
+  fs6_ul_t *header=(fs6_ul_t *) commonUDPdata(bufferZone);
+  uint16_t *max_preamble=header->max_preamble;
+  uint16_t *max_preamble_energy=header->max_preamble_energy;
+  uint16_t *max_preamble_delay=header->max_preamble_delay;
+  uint16_t *avg_preamble_energy=header->avg_preamble_energy;
+  int subframe=proc->subframe_prach;
+  int frame=proc->frame_prach;
+  // Fixme: not clear why we call twice with "br" and without
+  int br_flag=0;
+
+  if (br_flag==1) {
+    int             prach_mask;
+    prach_mask = is_prach_subframe (&eNB->frame_parms, proc->frame_prach_br, proc->subframe_prach_br);
+    eNB->UL_INFO.rach_ind_br.rach_indication_body.preamble_list = eNB->preamble_list_br;
+    int             ind = 0;
+    int             ce_level = 0;
+    /* Save for later, it doesn't work
+       for (int ind=0,ce_level=0;ce_level<4;ce_level++) {
+
+       if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[ce_level]==1)&&
+       (prach_mask&(1<<(1+ce_level)) > 0) && // prach is active and CE level has finished its repetitions
+       (eNB->prach_vars_br.repetition_number[ce_level]==
+       eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level])) {
+
+    */
+
+    if (eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[0] == 1) {
+      if ((eNB->prach_energy_counter == 100) && (max_preamble_energy[0] > eNB->measurements.prach_I0 + eNB->prach_DTX_threshold_emtc[0])) {
+        eNB->UL_INFO.rach_ind_br.rach_indication_body.number_of_preambles++;
+        eNB->preamble_list_br[ind].preamble_rel8.timing_advance = max_preamble_delay[ind];      //
+        eNB->preamble_list_br[ind].preamble_rel8.preamble = max_preamble[ind];
+        // note: fid is implicitly 0 here, this is the rule for eMTC RA-RNTI from 36.321, Section 5.1.4
+        eNB->preamble_list_br[ind].preamble_rel8.rnti = 1 + subframe + (60*(eNB->prach_vars_br.first_frame[ce_level] % 40));
+        eNB->preamble_list_br[ind].instance_length = 0; //don't know exactly what this is
+        eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type = 1 + ce_level;    // CE Level
+        LOG_I (PHY, "Filling NFAPI indication for RACH %d CELevel %d (mask %x) : TA %d, Preamble %d, rnti %x, rach_resource_type %d\n",
+               ind,
+               ce_level,
+               prach_mask,
+               eNB->preamble_list_br[ind].preamble_rel8.timing_advance,
+               eNB->preamble_list_br[ind].preamble_rel8.preamble, eNB->preamble_list_br[ind].preamble_rel8.rnti, eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type);
+      }
+    }
+
+    /*
+      ind++;
+      }
+      } */// ce_level
+  } else if ((eNB->prach_energy_counter == 100) &&
+             (max_preamble_energy[0] > eNB->measurements.prach_I0+eNB->prach_DTX_threshold)) {
+    LOG_I(PHY,"[eNB %d/%d][RAPROC] Frame %d, subframe %d Initiating RA procedure with preamble %d, energy %d.%d dB, delay %d\n",
+          eNB->Mod_id,
+          eNB->CC_id,
+          frame,
+          subframe,
+          max_preamble[0],
+          max_preamble_energy[0]/10,
+          max_preamble_energy[0]%10,
+          max_preamble_delay[0]);
+    pthread_mutex_lock(&eNB->UL_INFO_mutex);
+    eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles  = 1;
+    eNB->UL_INFO.rach_ind.rach_indication_body.preamble_list        = &eNB->preamble_list[0];
+    eNB->UL_INFO.rach_ind.rach_indication_body.tl.tag               = NFAPI_RACH_INDICATION_BODY_TAG;
+    eNB->UL_INFO.rach_ind.header.message_id                         = NFAPI_RACH_INDICATION;
+    eNB->UL_INFO.rach_ind.sfn_sf                                    = frame<<4 | subframe;
+    eNB->preamble_list[0].preamble_rel8.tl.tag                = NFAPI_PREAMBLE_REL8_TAG;
+    eNB->preamble_list[0].preamble_rel8.timing_advance        = max_preamble_delay[0];
+    eNB->preamble_list[0].preamble_rel8.preamble              = max_preamble[0];
+    eNB->preamble_list[0].preamble_rel8.rnti                  = 1+subframe;  // note: fid is implicitly 0 here
+    eNB->preamble_list[0].preamble_rel13.rach_resource_type   = 0;
+    eNB->preamble_list[0].instance_length                     = 0; //don't know exactly what this is
+
+    if (NFAPI_MODE==NFAPI_MODE_PNF) {  // If NFAPI PNF then we need to send the message to the VNF
+      LOG_D(PHY,"Filling NFAPI indication for RACH : SFN_SF:%d TA %d, Preamble %d, rnti %x, rach_resource_type %d\n",
+            NFAPI_SFNSF2DEC(eNB->UL_INFO.rach_ind.sfn_sf),
+            eNB->preamble_list[0].preamble_rel8.timing_advance,
+            eNB->preamble_list[0].preamble_rel8.preamble,
+            eNB->preamble_list[0].preamble_rel8.rnti,
+            eNB->preamble_list[0].preamble_rel13.rach_resource_type);
+      oai_nfapi_rach_ind(&eNB->UL_INFO.rach_ind);
+      eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles = 0;
+    }
+
+    pthread_mutex_unlock(&eNB->UL_INFO_mutex);
+  } // max_preamble_energy > prach_I0 + 100
+  else {
+    eNB->measurements.prach_I0 = ((eNB->measurements.prach_I0*900)>>10) + ((avg_preamble_energy[0]*124)>>10);
+
+    if (eNB->prach_energy_counter < 100)
+      eNB->prach_energy_counter++;
+  }
+}
+
+void sendFs6Ulharq(enum pckType type, int UEid, PHY_VARS_eNB *eNB, LTE_eNB_UCI *uci, int frame, int subframe, uint8_t *harq_ack, uint8_t tdd_mapping_mode, uint16_t tdd_multiplexing_mask,
+                   uint16_t rnti,
+                   int32_t stat) {
+  static int current_fsf=-1;
+  int fsf=frame*16+subframe;
+  uint8_t *bufferZone=eNB->FS6bufferZone;
+  commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone;
+  // move to the end
+  uint8_t *firstFreeByte=bufferZone;
+  int curBlock=0;
+
+  if ( current_fsf != fsf ) {
+    for (int i=0; i < FirstUDPheader->nbBlocks; i++) {
+      AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,"");
+      firstFreeByte+=alignedSize(firstFreeByte);
+      curBlock++;
+    }
+
+    commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte;
+    FirstUDPheader->nbBlocks++;
+    newUDPheader->blockID=curBlock;
+    newUDPheader->contentBytes=sizeof(fs6_ul_t)+sizeof(fs6_ul_uespec_uci_t);
+    hULUEuci(newUDPheader)->type=fs6ULcch;
+    hULUEuci(newUDPheader)->nb_active_ue=0;
+  } else
+    for (int i=0; i < FirstUDPheader->nbBlocks-1; i++) {
+      AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,"");
+      firstFreeByte+=alignedSize(firstFreeByte);
+      curBlock++;
+    }
+
+  LOG_D(PHY,"FS6 du, block: %d: adding ul harq/sr: %d, rnti: %d, ueid: %d\n",
+        curBlock, type, rnti, UEid);
+  commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte;
+  fs6_ul_uespec_uci_element_t *tmp=(fs6_ul_uespec_uci_element_t *)(hULUEuci(newUDPheader)+1);
+  tmp+=hULUEuci(newUDPheader)->nb_active_ue;
+  tmp->type=type;
+  tmp->UEid=UEid;
+  tmp->frame=frame;
+  tmp->subframe=subframe;
+
+  if (uci != NULL)
+    memcpy(&tmp->uci, uci, sizeof(*uci));
+  else
+    tmp->uci.ue_id=0xFFFF;
+
+  if (harq_ack != NULL)
+    memcpy(tmp->harq_ack, harq_ack, 4);
+
+  tmp->tdd_mapping_mode=tdd_mapping_mode;
+  tmp->tdd_multiplexing_mask=tdd_multiplexing_mask;
+  tmp->n0_subband_power_dB=eNB->measurements.n0_subband_power_dB[0][0];
+  tmp->rnti=rnti;
+  tmp->stat=stat;
+  hULUEuci(newUDPheader)->nb_active_ue++;
+  newUDPheader->contentBytes+=sizeof(fs6_ul_uespec_uci_element_t);
+}
+
+
+void sendFs6Ul(PHY_VARS_eNB *eNB, int UE_id, int harq_pid, int segmentID, int16_t *data, int dataLen, int r_offset) {
+  uint8_t *bufferZone=eNB->FS6bufferZone;
+  commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone;
+  // move to the end
+  uint8_t *firstFreeByte=bufferZone;
+  int curBlock=0;
+
+  for (int i=0; i < FirstUDPheader->nbBlocks; i++) {
+    AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,"");
+    firstFreeByte+=alignedSize(firstFreeByte);
+    curBlock++;
+  }
+
+  commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte;
+  FirstUDPheader->nbBlocks++;
+  newUDPheader->blockID=curBlock;
+  newUDPheader->contentBytes=sizeof(fs6_ul_t)+sizeof(fs6_ul_uespec_t) + dataLen;
+  hULUE(newUDPheader)->type=fs6ULsch;
+  hULUE(newUDPheader)->UE_id=UE_id;
+  hULUE(newUDPheader)->harq_id=harq_pid;
+  memcpy(hULUE(newUDPheader)->ulsch_power,
+         eNB->pusch_vars[UE_id]->ulsch_power,
+         sizeof(int)*2);
+  hULUE(newUDPheader)->cqi_crc_status=eNB->ulsch[UE_id]->harq_processes[harq_pid]->cqi_crc_status;
+  hULUE(newUDPheader)->O_ACK=eNB->ulsch[UE_id]->harq_processes[harq_pid]->O_ACK;
+  memcpy(hULUE(newUDPheader)->o_ACK, eNB->ulsch[UE_id]->harq_processes[harq_pid]->o_ACK,
+         sizeof(eNB->ulsch[UE_id]->harq_processes[harq_pid]->o_ACK));
+  hULUE(newUDPheader)->ta=lte_est_timing_advance_pusch(eNB, UE_id);
+  hULUE(newUDPheader)->segment=segmentID;
+  memcpy(hULUE(newUDPheader)->o, eNB->ulsch[UE_id]->harq_processes[harq_pid]->o,
+         sizeof(eNB->ulsch[UE_id]->harq_processes[harq_pid]->o));
+  memcpy(hULUE(newUDPheader)+1, data, dataLen);
+  hULUE(newUDPheader)->segLen=dataLen;
+  hULUE(newUDPheader)->r_offset=r_offset;
+  hULUE(newUDPheader)->G=eNB->ulsch[UE_id]->harq_processes[harq_pid]->G;
+}
+
+void pusch_procedures_tosplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc) {
+  uint32_t harq_pid;
+  LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms;
+  const int subframe = proc->subframe_rx;
+  const int frame    = proc->frame_rx;
+
+  for (int i = 0; i < NUMBER_OF_UE_MAX; i++) {
+    LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[i];
+
+    if (ulsch->ue_type > NOCE)
+      harq_pid = 0;
+    else
+      harq_pid= subframe2harq_pid(&eNB->frame_parms,frame,subframe);
+
+    LTE_UL_eNB_HARQ_t *ulsch_harq = ulsch->harq_processes[harq_pid];
+
+    if (ulsch->rnti>0)
+      LOG_D(PHY,"eNB->ulsch[%d]->harq_processes[harq_pid:%d] SFN/SF:%04d%d: PUSCH procedures, UE %d/%x ulsch_harq[status:%d SFN/SF:%04d%d active: %d handled:%d]\n",
+            i, harq_pid, frame,subframe,i,ulsch->rnti,
+            ulsch_harq->status, ulsch_harq->frame, ulsch_harq->subframe, ulsch_harq->status, ulsch_harq->handled);
+
+    if ((ulsch) &&
+        (ulsch->rnti>0) &&
+        (ulsch_harq->status == ACTIVE) &&
+        ((ulsch_harq->frame == frame)	    || (ulsch_harq->repetition_number >1) ) &&
+        ((ulsch_harq->subframe == subframe) || (ulsch_harq->repetition_number >1) ) &&
+        (ulsch_harq->handled == 0)) {
+      // UE has ULSCH scheduling
+      for (int rb=0;
+           rb<=ulsch_harq->nb_rb;
+           rb++) {
+        int rb2 = rb+ulsch_harq->first_rb;
+        eNB->rb_mask_ul[rb2>>5] |= (1<<(rb2&31));
+      }
+
+      LOG_D(PHY,"[eNB %d] frame %d, subframe %d: Scheduling ULSCH Reception for UE %d \n",
+            eNB->Mod_id, frame, subframe, i);
+      uint8_t nPRS= fp->pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[subframe<<1];
+      ulsch->cyclicShift = (ulsch_harq->n_DMRS2 +
+                            fp->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift +
+                            nPRS)%12;
+      AssertFatal(ulsch_harq->TBS>0,"illegal TBS %d\n",ulsch_harq->TBS);
+      LOG_D(PHY,
+            "[eNB %d][PUSCH %d] Frame %d Subframe %d Demodulating PUSCH: dci_alloc %d, rar_alloc %d, round %d, first_rb %d, nb_rb %d, Qm %d, TBS %d, rv %d, cyclic_shift %d (n_DMRS2 %d, cyclicShift_common %d, ), O_ACK %d, beta_cqi %d \n",
+            eNB->Mod_id,harq_pid,frame,subframe,
+            ulsch_harq->dci_alloc,
+            ulsch_harq->rar_alloc,
+            ulsch_harq->round,
+            ulsch_harq->first_rb,
+            ulsch_harq->nb_rb,
+            ulsch_harq->Qm,
+            ulsch_harq->TBS,
+            ulsch_harq->rvidx,
+            ulsch->cyclicShift,
+            ulsch_harq->n_DMRS2,
+            fp->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift,
+            ulsch_harq->O_ACK,
+            ulsch->beta_offset_cqi_times8);
+      start_meas(&eNB->ulsch_demodulation_stats);
+      eNB->FS6bufferZone=bufferZone;
+      rx_ulsch(eNB, proc, i);
+      stop_meas(&eNB->ulsch_demodulation_stats);
+      // TBD: add datablock for transmission
+      start_meas(&eNB->ulsch_decoding_stats);
+      ulsch_decoding(eNB,proc,
+                     i,
+                     0, // control_only_flag
+                     ulsch_harq->V_UL_DAI,
+                     ulsch_harq->nb_rb>20 ? 1 : 0);
+      stop_meas(&eNB->ulsch_decoding_stats);
+    }
+  }
+}
+
+void phy_procedures_eNB_uespec_RX_tosplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc) {
+  //RX processing for ue-specific resources
+  LTE_DL_FRAME_PARMS *fp = &eNB->frame_parms;
+  const int       subframe = proc->subframe_rx;
+  const int       frame = proc->frame_rx;
+  /* TODO: use correct rxdata */
+
+  if ((fp->frame_type == TDD) && (subframe_select(fp,subframe)!=SF_UL)) return;
+
+  LOG_D (PHY, "[eNB %d] Frame %d: Doing phy_procedures_eNB_uespec_RX(%d)\n", eNB->Mod_id, frame, subframe);
+  eNB->rb_mask_ul[0] = 0;
+  eNB->rb_mask_ul[1] = 0;
+  eNB->rb_mask_ul[2] = 0;
+  eNB->rb_mask_ul[3] = 0;
+  // Fix me here, these should be locked
+  eNB->UL_INFO.rx_ind.rx_indication_body.number_of_pdus  = 0;
+  eNB->UL_INFO.crc_ind.crc_indication_body.number_of_crcs = 0;
+  // Call SRS first since all others depend on presence of SRS or lack thereof
+  srs_procedures (eNB, proc);
+  eNB->first_run_I0_measurements = 0;
+  uci_procedures (eNB, proc);
+
+  if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { // If PNF or monolithic
+    pusch_procedures_tosplit(bufferZone, bufSize, eNB,proc);
+  }
+
+  lte_eNB_I0_measurements (eNB, subframe, 0, eNB->first_run_I0_measurements);
+  int min_I0=1000,max_I0=0;
+
+  if ((frame==0) && (subframe==4)) {
+    for (int i=0; i<eNB->frame_parms.N_RB_UL; i++) {
+      if (i==(eNB->frame_parms.N_RB_UL>>1) - 1) i+=2;
+
+      if (eNB->measurements.n0_subband_power_tot_dB[i]<min_I0)
+        min_I0 = eNB->measurements.n0_subband_power_tot_dB[i];
+
+      if (eNB->measurements.n0_subband_power_tot_dB[i]>max_I0)
+        max_I0 = eNB->measurements.n0_subband_power_tot_dB[i];
+    }
+
+    LOG_I (PHY, "max_I0 %d, min_I0 %d\n", max_I0, min_I0);
+  }
+
+  return;
+}
+
+
+void fill_rx_indication_from_split(uint8_t *bufferZone, PHY_VARS_eNB *eNB,int UE_id,int frame,int subframe, ul_propagation_t *ul_propa) {
+  nfapi_rx_indication_pdu_t *pdu;
+  int             timing_advance_update;
+  uint32_t        harq_pid;
+
+  if (eNB->ulsch[UE_id]->ue_type > 0)
+    harq_pid = 0;
+  else
+    harq_pid = subframe2harq_pid (&eNB->frame_parms,
+                                  frame, subframe);
+
+  pthread_mutex_lock(&eNB->UL_INFO_mutex);
+  eNB->UL_INFO.rx_ind.sfn_sf                    = frame<<4| subframe;
+  eNB->UL_INFO.rx_ind.rx_indication_body.tl.tag = NFAPI_RX_INDICATION_BODY_TAG;
+  pdu                                    = &eNB->UL_INFO.rx_ind.rx_indication_body.rx_pdu_list[eNB->UL_INFO.rx_ind.rx_indication_body.number_of_pdus];
+  //  pdu->rx_ue_information.handle          = eNB->ulsch[UE_id]->handle;
+  pdu->rx_ue_information.tl.tag          = NFAPI_RX_UE_INFORMATION_TAG;
+  pdu->rx_ue_information.rnti            = eNB->ulsch[UE_id]->rnti;
+  pdu->rx_indication_rel8.tl.tag         = NFAPI_RX_INDICATION_REL8_TAG;
+  pdu->rx_indication_rel8.length         = eNB->ulsch[UE_id]->harq_processes[harq_pid]->TBS>>3;
+  pdu->rx_indication_rel8.offset         = 1;   // DJP - I dont understand - but broken unless 1 ????  0;  // filled in at the end of the UL_INFO formation
+  pdu->data                              = eNB->ulsch[UE_id]->harq_processes[harq_pid]->decodedBytes;
+  // estimate timing advance for MAC
+  timing_advance_update                  = ul_propa[UE_id].ta;
+
+  //  if (timing_advance_update > 10) { dump_ulsch(eNB,frame,subframe,UE_id); exit(-1);}
+  //  if (timing_advance_update < -10) { dump_ulsch(eNB,frame,subframe,UE_id); exit(-1);}
+  switch (eNB->frame_parms.N_RB_DL) {
+    case 6:                      /* nothing to do */
+      break;
+
+    case 15:
+      timing_advance_update /= 2;
+      break;
+
+    case 25:
+      timing_advance_update /= 4;
+      break;
+
+    case 50:
+      timing_advance_update /= 8;
+      break;
+
+    case 75:
+      timing_advance_update /= 12;
+      break;
+
+    case 100:
+      timing_advance_update /= 16;
+      break;
+
+    default:
+      abort ();
+  }
+
+  // put timing advance command in 0..63 range
+  timing_advance_update += 31;
+
+  if (timing_advance_update < 0)
+    timing_advance_update = 0;
+
+  if (timing_advance_update > 63)
+    timing_advance_update = 63;
+
+  pdu->rx_indication_rel8.timing_advance = timing_advance_update;
+  // estimate UL_CQI for MAC (from antenna port 0 only)
+  int SNRtimes10 = dB_fixed_times10(eNB->pusch_vars[UE_id]->ulsch_power[0]) - 10 * eNB->measurements.n0_subband_power_dB[0][0];
+
+  if (SNRtimes10 < -640)
+    pdu->rx_indication_rel8.ul_cqi = 0;
+  else if (SNRtimes10 > 635)
+    pdu->rx_indication_rel8.ul_cqi = 255;
+  else
+    pdu->rx_indication_rel8.ul_cqi = (640 + SNRtimes10) / 5;
+
+  LOG_D(PHY,"[PUSCH %d] Frame %d Subframe %d Filling RX_indication with SNR %d (%d), timing_advance %d (update %d)\n",
+        harq_pid,frame,subframe,SNRtimes10,pdu->rx_indication_rel8.ul_cqi,pdu->rx_indication_rel8.timing_advance,
+        timing_advance_update);
+  eNB->UL_INFO.rx_ind.rx_indication_body.number_of_pdus++;
+  eNB->UL_INFO.rx_ind.sfn_sf = frame<<4 | subframe;
+  pthread_mutex_unlock(&eNB->UL_INFO_mutex);
+}
+
+void pusch_procedures_fromsplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, ul_propagation_t *ul_propa) {
+  //LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms;
+  const int subframe = proc->subframe_rx;
+  const int frame    = proc->frame_rx;
+  uint32_t harq_pid;
+  uint32_t harq_pid0 = subframe2harq_pid(&eNB->frame_parms,frame,subframe);
+
+  for (int i = 0; i < NUMBER_OF_UE_MAX; i++) {
+    LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[i];
+
+    if (ulsch->ue_type > NOCE) harq_pid = 0;
+    else harq_pid=harq_pid0;
+
+    LTE_UL_eNB_HARQ_t *ulsch_harq = ulsch->harq_processes[harq_pid];
+
+    if (ulsch->rnti>0)
+      LOG_D(PHY,"eNB->ulsch[%d]->harq_processes[harq_pid:%d] SFN/SF:%04d%d: PUSCH procedures, UE %d/%x ulsch_harq[status:%d SFN/SF:%04d%d handled:%d]\n",
+            i, harq_pid, frame,subframe,i,ulsch->rnti,
+            ulsch_harq->status, ulsch_harq->frame, ulsch_harq->subframe, ulsch_harq->handled);
+
+    if ((ulsch) &&
+        (ulsch->rnti>0) &&
+        (ulsch_harq->status == ACTIVE) &&
+        (ulsch_harq->frame == frame) &&
+        (ulsch_harq->subframe == subframe) &&
+        (ulsch_harq->handled == 0)) {
+      // UE has ULSCH scheduling
+      for (int rb=0;
+           rb<=ulsch_harq->nb_rb;
+           rb++) {
+        int rb2 = rb+ulsch_harq->first_rb;
+        eNB->rb_mask_ul[rb2>>5] |= (1<<(rb2&31));
+      }
+
+      start_meas(&eNB->ulsch_decoding_stats);
+      // This is a new packet, so compute quantities regarding segmentation
+      ulsch_harq->B = ulsch_harq->TBS+24;
+      lte_segmentation(NULL,
+                       NULL,
+                       ulsch_harq->B,
+                       &ulsch_harq->C,
+                       &ulsch_harq->Cplus,
+                       &ulsch_harq->Cminus,
+                       &ulsch_harq->Kplus,
+                       &ulsch_harq->Kminus,
+                       &ulsch_harq->F);
+      ulsch_decoding_data(eNB, proc, i, harq_pid,
+                          ulsch_harq->nb_rb>20 ? 1 : 0);
+      stop_meas(&eNB->ulsch_decoding_stats);
+    } //     if ((ulsch) &&
+    //         (ulsch->rnti>0) &&
+    //         (ulsch_harq->status == ACTIVE))
+    else if ((ulsch) &&
+             (ulsch->rnti>0) &&
+             (ulsch_harq->status == ACTIVE) &&
+             (ulsch_harq->frame == frame) &&
+             (ulsch_harq->subframe == subframe) &&
+             (ulsch_harq->handled == 1)) {
+      // this harq process is stale, kill it, this 1024 frames later (10s), consider reducing that
+      ulsch_harq->status = SCH_IDLE;
+      ulsch_harq->handled = 0;
+      ulsch->harq_mask &= ~(1 << harq_pid);
+      LOG_W (PHY, "Removing stale ULSCH config for UE %x harq_pid %d (harq_mask is now 0x%2.2x)\n", ulsch->rnti, harq_pid, ulsch->harq_mask);
+    }
+  }   //   for (i=0; i<NUMBER_OF_UE_MAX; i++)
+
+  while (proc->nbDecode > 0) {
+    notifiedFIFO_elt_t *req=pullTpool(proc->respDecode, proc->threadPool);
+    postDecode(proc, req);
+    delNotifiedFIFO_elt(req);
+  }
+}
+
+void recvFs6Ul(uint8_t *bufferZone, int nbBlocks, PHY_VARS_eNB *eNB, ul_propagation_t *ul_propa) {
+  void *bufPtr=bufferZone;
+
+  for (int i=0; i < nbBlocks; i++) { //nbBlocks is the actual received blocks
+    if ( ((commonUDP_t *)bufPtr)->contentBytes > sizeof(fs6_ul_t) ) {
+      int type=hULUE(bufPtr)->type;
+
+      if ( type == fs6ULsch)  {
+        LTE_eNB_ULSCH_t *ulsch =eNB->ulsch[hULUE(bufPtr)->UE_id];
+        LTE_UL_eNB_HARQ_t *ulsch_harq=ulsch->harq_processes[hULUE(bufPtr)->harq_id];
+        memcpy(ulsch_harq->eUL+hULUE(bufPtr)->r_offset,
+               hULUE(bufPtr)+1,
+               hULUE(bufPtr)->segLen);
+        memcpy(eNB->pusch_vars[hULUE(bufPtr)->UE_id]->ulsch_power,
+               hULUE(bufPtr)->ulsch_power,
+               sizeof(int)*2);
+        ulsch_harq->G=hULUE(bufPtr)->G;
+        ulsch_harq->cqi_crc_status=hULUE(bufPtr)->cqi_crc_status;
+        //ulsch_harq->O_ACK= hULUE(bufPtr)->O_ACK;
+        memcpy(ulsch_harq->o_ACK, hULUE(bufPtr)->o_ACK,
+               sizeof(ulsch_harq->o_ACK));
+        memcpy(ulsch_harq->o,hULUE(bufPtr)->o, sizeof(ulsch_harq->o));
+        ul_propa[hULUE(bufPtr)->UE_id].ta=hULUE(bufPtr)->ta;
+        LOG_D(PHY,"Received ulsch data for: rnti:%x, cqi_crc_status %d O_ACK: %d, segment: %d, seglen: %d  \n",
+              ulsch->rnti, ulsch_harq->cqi_crc_status, ulsch_harq->O_ACK,hULUE(bufPtr)->segment, hULUE(bufPtr)->segLen);
+      } else if ( type == fs6ULcch ) {
+        int nb_uci=hULUEuci(bufPtr)->nb_active_ue;
+        fs6_ul_uespec_uci_element_t *tmp=(fs6_ul_uespec_uci_element_t *)(hULUEuci(bufPtr)+1);
+
+        for (int j=0; j < nb_uci ; j++) {
+          LOG_D(PHY,"FS6 cu, block: %d/%d: received ul harq/sr: %d, rnti: %d, ueid: %d\n",
+                i, j, type, tmp->rnti, tmp->UEid);
+          eNB->measurements.n0_subband_power_dB[0][0]=tmp->n0_subband_power_dB;
+
+          if (tmp->uci.ue_id != 0xFFFF)
+            memcpy(&eNB->uci_vars[tmp->UEid],&tmp->uci, sizeof(tmp->uci));
+
+          if ( tmp->type == fs6ULindicationHarq )
+            fill_uci_harq_indication (tmp->UEid, eNB, &eNB->uci_vars[tmp->UEid],
+                                      tmp->frame, tmp->subframe, tmp->harq_ack,
+                                      tmp->tdd_mapping_mode, tmp->tdd_multiplexing_mask);
+          else if ( tmp->type == fs6ULindicationSr )
+            fill_sr_indication(tmp->UEid, eNB,tmp->rnti,tmp->frame,tmp->subframe,tmp->stat);
+          else
+            LOG_E(PHY, "Split FS6: impossible UL harq type\n");
+
+          tmp++;
+        }
+      } else
+        LOG_E(PHY, "FS6 ul packet type impossible\n" );
+    }
+
+    bufPtr+=alignedSize(bufPtr);
+  }
+}
+
+void phy_procedures_eNB_uespec_RX_fromsplit(uint8_t *bufferZone, int nbBlocks,PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc) {
+  // The configuration arrived in Dl, so we can extract the UL data
+  ul_propagation_t ul_propa[NUMBER_OF_UE_MAX];
+  recvFs6Ul(bufferZone, nbBlocks, eNB, ul_propa);
+
+  // dirty memory allocation in OAI...
+  for (int i = 0; i < NUMBER_OF_UCI_VARS_MAX; i++)
+    if ( eNB->uci_vars[i].frame == proc->frame_rx &&
+         eNB->uci_vars[i].subframe == proc->subframe_rx )
+      eNB->uci_vars[i].active=0;
+
+  pusch_procedures_fromsplit(bufferZone, nbBlocks, eNB, proc, ul_propa);
+}
+
+void rcvFs6DL(uint8_t *bufferZone, int nbBlocks, PHY_VARS_eNB *eNB, int frame, int subframe) {
+  void *bufPtr=bufferZone;
+
+  for (int i=0; i < nbBlocks; i++) { //nbBlocks is the actual received blocks
+    if ( ((commonUDP_t *)bufPtr)->contentBytes > sizeof(fs6_dl_t) ) {
+      int type=hDLUE(bufPtr)->type;
+
+      if ( type == fs6DlConfig) {
+        int curUE=hDLUE(bufPtr)->UE_id;
+        LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[curUE][0];
+        LTE_DL_eNB_HARQ_t *dlsch_harq=dlsch0->harq_processes[hDLUE(bufPtr)->harq_pid];
+#ifdef PHY_TX_THREAD
+        dlsch0->active[subframe] = 1;
+#else
+        dlsch0->active = 1;
+#endif
+        dlsch0->harq_ids[frame%2][subframe]=hDLUE(bufPtr)->harq_pid;
+        dlsch0->rnti=hDLUE(bufPtr)->rnti;
+        dlsch0->sqrt_rho_a=hDLUE(bufPtr)->sqrt_rho_a;
+        dlsch0->sqrt_rho_b=hDLUE(bufPtr)->sqrt_rho_b;
+        dlsch_harq->nb_rb=hDLUE(bufPtr)->nb_rb;
+        memcpy(dlsch_harq->rb_alloc, hDLUE(bufPtr)->rb_alloc, sizeof(hDLUE(bufPtr)->rb_alloc));
+        dlsch_harq->Qm=hDLUE(bufPtr)->Qm;
+        dlsch_harq->Nl=hDLUE(bufPtr)->Nl;
+        dlsch_harq->pdsch_start=hDLUE(bufPtr)->pdsch_start;
+#ifdef PHY_TX_THREAD
+        dlsch_harq->CEmode = hDLUE(bufPtr)->CEmode;
+        dlsch_harq->i0=hDLUE(bufPtr)->i0;
+        dlsch_harq->sib1_br_flag=hDLUE(bufPtr)->sib1_br_flag;
+#else
+        dlsch0->i0=hDLUE(bufPtr)->i0;
+        dlsch0->sib1_br_flag=hDLUE(bufPtr)->sib1_br_flag;
+#endif
+        fs6Dlunpack(dlsch_harq->eDL,
+                    hDLUE(bufPtr)+1, hDLUE(bufPtr)->dataLen);
+        LOG_D(PHY,"received %d bits, in harq id: %di fsf: %d.%d, sum %d\n",
+              hDLUE(bufPtr)->dataLen, hDLUE(bufPtr)->harq_pid, frame, subframe, sum(dlsch_harq->eDL, hDLUE(bufPtr)->dataLen));
+      } else if (type == fs6UlConfig) {
+        int nbUE=(((commonUDP_t *)bufPtr)->contentBytes - sizeof(fs6_dl_t)) / sizeof( fs6_dl_ulsched_t ) ;
+#define cpyVal(a) memcpy(&ulsch_harq->a,&hTxULUE(bufPtr)->a, sizeof(ulsch_harq->a))
+
+        for ( int i=0; i < nbUE; i++ ) {
+          int curUE=hTxULUE(bufPtr)->UE_id;
+          LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[curUE];
+          LTE_UL_eNB_HARQ_t *ulsch_harq=ulsch->harq_processes[hTxULUE(bufPtr)->harq_pid];
+          ulsch->ue_type=hTxULUE(bufPtr)->ue_type;
+          ulsch->harq_mask=hTxULUE(bufPtr)->harq_mask;
+          ulsch->Mlimit=hTxULUE(bufPtr)->Mlimit;
+          ulsch->max_turbo_iterations=hTxULUE(bufPtr)->max_turbo_iterations;
+          ulsch->bundling=hTxULUE(bufPtr)->bundling;
+          ulsch->beta_offset_cqi_times8=hTxULUE(bufPtr)->beta_offset_cqi_times8;
+          ulsch->beta_offset_ri_times8=hTxULUE(bufPtr)->beta_offset_ri_times8;
+          ulsch->beta_offset_harqack_times8=hTxULUE(bufPtr)->beta_offset_harqack_times8;
+          ulsch->Msg3_active=hTxULUE(bufPtr)->Msg3_active;
+          ulsch->cyclicShift=hTxULUE(bufPtr)->cyclicShift;
+          ulsch->cooperation_flag=hTxULUE(bufPtr)->cooperation_flag;
+          ulsch->num_active_cba_groups=hTxULUE(bufPtr)->num_active_cba_groups;
+          memcpy(ulsch->cba_rnti,hTxULUE(bufPtr)->cba_rnti,sizeof(ulsch->cba_rnti));//NUM_MAX_CBA_GROUP];
+          ulsch->rnti=hTxULUE(bufPtr)->rnti;
+          ulsch_harq->nb_rb=hTxULUE(bufPtr)->nb_rb;
+          ulsch_harq->handled=0;
+          ulsch_harq->status = ACTIVE;
+          ulsch_harq->frame = frame;
+          ulsch_harq->subframe = subframe;
+          ulsch_harq->first_rb=hTxULUE(bufPtr)->first_rb;
+          ulsch_harq->O_RI=hTxULUE(bufPtr)->O_RI;
+          ulsch_harq->Or1=hTxULUE(bufPtr)->Or1;
+          ulsch_harq->Msc_initial=hTxULUE(bufPtr)->Msc_initial;
+          ulsch_harq->Nsymb_initial=hTxULUE(bufPtr)->Nsymb_initial;
+          ulsch_harq->V_UL_DAI=hTxULUE(bufPtr)->V_UL_DAI;
+          ulsch_harq->Qm=hTxULUE(bufPtr)->Qm;
+          ulsch_harq->srs_active=hTxULUE(bufPtr)->srs_active;
+          ulsch_harq->TBS=hTxULUE(bufPtr)->TBS;
+          ulsch_harq->Nsymb_pusch=hTxULUE(bufPtr)->Nsymb_pusch;
+          cpyVal(dci_alloc);
+          cpyVal(rar_alloc);
+          cpyVal(status);
+          cpyVal(Msg3_flag);
+          cpyVal(phich_active);
+          cpyVal(phich_ACK);
+          cpyVal(previous_first_rb);
+          cpyVal(B);
+          cpyVal(G);
+          //cpyVal(o);
+          cpyVal(uci_format);
+          cpyVal(Or2);
+          cpyVal(o_RI);
+          cpyVal(o_ACK);
+          cpyVal(O_ACK);
+          //cpyVal(q);
+          cpyVal(o_RCC);
+          cpyVal(q_ACK);
+          cpyVal(q_RI);
+          cpyVal(RTC);
+          cpyVal(ndi);
+          cpyVal(round);
+          cpyVal(rvidx);
+          cpyVal(Nl);
+          cpyVal(n_DMRS);
+          cpyVal(previous_n_DMRS);
+          cpyVal(n_DMRS2);
+          cpyVal(delta_TF);
+          cpyVal(repetition_number );
+          cpyVal(total_number_of_repetitions);
+          LOG_D(PHY,"Received request to perform ulsch for: rnti:%d, fsf: %d/%d, O_ACK: %d\n",
+                ulsch->rnti, frame, subframe, ulsch_harq->O_ACK);
+        }
+      } else if ( type == fs6ULConfigCCH ) {
+        fs6_dl_uespec_ulcch_element_t *tmp=(fs6_dl_uespec_ulcch_element_t *)(hTxULcch(bufPtr)+1);
+
+        for (int i=0; i< hTxULcch(bufPtr)->nb_active_ue; i++ )
+          memcpy(&eNB->uci_vars[tmp->UE_id], &tmp->cch_vars, sizeof(tmp->cch_vars));
+      }  else
+        LOG_E(PHY, "Impossible block in fs6 DL\n");
+    }
+
+    bufPtr+=alignedSize(bufPtr);
+  }
+}
+
+void phy_procedures_eNB_TX_fromsplit(uint8_t *bufferZone, int nbBlocks, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int do_meas ) {
+  LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms;
+  int subframe=proc->subframe_tx;
+  int frame=proc->frame_tx;
+  //LTE_UL_eNB_HARQ_t *ulsch_harq;
+  eNB->pdcch_vars[subframe&1].num_pdcch_symbols=hDL(bufferZone)->num_pdcch_symbols;
+  eNB->pdcch_vars[subframe&1].num_dci=hDL(bufferZone)->num_dci;
+  uint8_t num_mdci = eNB->mpdcch_vars[subframe&1].num_dci = hDL(bufferZone)->num_mdci;
+  eNB->pbch_configured=true;
+  memcpy(eNB->pbch_pdu,hDL(bufferZone)->pbch_pdu, 4);
+
+  // Remove all scheduled DL, we will populate from the CU sending
+  for (int UE_id=0; UE_id<NUMBER_OF_UE_MAX; UE_id++) {
+    LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[UE_id][0];
+
+    if ( dlsch0 && dlsch0->rnti>0 ) {
+#ifdef PHY_TX_THREAD
+      dlsch0->active[subframe] = 0;
+#else
+      dlsch0->active = 0;
+#endif
+    }
+  }
+
+  rcvFs6DL(bufferZone, nbBlocks, eNB, frame, subframe);
+
+  if (do_meas==1) {
+    start_meas(&eNB->phy_proc_tx);
+    start_meas(&eNB->dlsch_common_and_dci);
+  }
+
+  // clear the transmit data array for the current subframe
+  for (int aa = 0; aa < fp->nb_antenna_ports_eNB; aa++) {
+    memset (&eNB->common_vars.txdataF[aa][subframe * fp->ofdm_symbol_size * (fp->symbols_per_tti)],
+            0, fp->ofdm_symbol_size * (fp->symbols_per_tti) * sizeof (int32_t));
+  }
+
+  if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) {
+    if (is_pmch_subframe(frame,subframe,fp)) {
+      pmch_procedures(eNB,proc);
+    } else {
+      // this is not a pmch subframe, so generate PSS/SSS/PBCH
+      common_signal_procedures(eNB,frame, subframe);
+    }
+  }
+
+  // clear previous allocation information for all UEs
+  for (int i = 0; i < NUMBER_OF_UE_MAX; i++) {
+    //if (eNB->dlsch[i][0])
+    //eNB->dlsch[i][0]->subframe_tx[subframe] = 0;
+  }
+
+  if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) {
+    for (int i=0; i< hDL(bufferZone)->num_dci; i++)
+      eNB->pdcch_vars[subframe&1].dci_alloc[i]=hDL(bufferZone)->dci_alloc[i];
+
+    LOG_D (PHY, "Frame %d, subframe %d: Calling generate_dci_top (pdcch) (num_dci %" PRIu8 ")\n", frame, subframe, hDL(bufferZone)->num_dci);
+    generate_dci_top(hDL(bufferZone)->num_pdcch_symbols,
+                     hDL(bufferZone)->num_dci,
+                     &eNB->pdcch_vars[subframe&1].dci_alloc[0],
+                     0,
+                     hDL(bufferZone)->amp,
+                     fp,
+                     eNB->common_vars.txdataF,
+                     subframe);
+
+    if (num_mdci > 0) {
+      LOG_D (PHY, "[eNB %" PRIu8 "] Frame %d, subframe %d: Calling generate_mdci_top (mpdcch) (num_dci %" PRIu8 ")\n", eNB->Mod_id, frame, subframe, num_mdci);
+      generate_mdci_top (eNB, frame, subframe, AMP, eNB->common_vars.txdataF);
+    }
+  }
+
+  for (int UE_id=0; UE_id<NUMBER_OF_UE_MAX; UE_id++) {
+    LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[UE_id][0];
+    LTE_eNB_DLSCH_t *dlsch1 = eNB->dlsch[UE_id][1];
+
+    if ((dlsch0)&&(dlsch0->rnti>0)&&
+#ifdef PHY_TX_THREAD
+        (dlsch0->active[subframe] == 1)
+#else
+        (dlsch0->active == 1)
+#endif
+       ) {
+      uint64_t sum=0;
+
+      for ( int i= subframe * fp->ofdm_symbol_size * (fp->symbols_per_tti);
+            i< (subframe+1) * fp->ofdm_symbol_size * (fp->symbols_per_tti);
+            i++)
+        sum+=((int32_t *)(eNB->common_vars.txdataF[0]))[i];
+
+      LOG_D(PHY,"frame: %d, subframe: %d, sum of dlsch mod v1: %lx\n", frame, subframe, sum);
+      int harq_pid=dlsch0->harq_ids[frame%2][subframe];
+      pdsch_procedures(eNB,
+                       proc,
+                       harq_pid,
+                       dlsch0,
+                       dlsch1);
+    }
+  }
+
+  eNB->phich_vars[subframe&1]=hDL(bufferZone)->phich_vars;
+  generate_phich_top(eNB,
+                     proc,
+                     AMP);
+}
+
+#define cpyToDu(a) hTxULUE(newUDPheader)->a=ulsch->a
+#define cpyToDuHarq(a) hTxULUE(newUDPheader)->a=ulsch_harq->a
+#define memcpyToDuHarq(a) memcpy(&hTxULUE(newUDPheader)->a,&ulsch_harq->a, sizeof(ulsch_harq->a));
+
+void appendFs6TxULUE(uint8_t *bufferZone, LTE_DL_FRAME_PARMS *fp, int curUE, LTE_eNB_ULSCH_t *ulsch, int frame, int subframe) {
+  commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone;
+  // move to the end
+  uint8_t *firstFreeByte=bufferZone;
+  int curBlock=0;
+
+  for (int i=0; i < FirstUDPheader->nbBlocks; i++) {
+    AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,"");
+    firstFreeByte+=alignedSize(firstFreeByte);
+    curBlock++;
+  }
+
+  commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte;
+  FirstUDPheader->nbBlocks++;
+  newUDPheader->blockID=curBlock;
+  newUDPheader->contentBytes=sizeof(fs6_dl_t)+sizeof(fs6_dl_ulsched_t);
+  // We skip the fs6 DL header, that is populated by caller
+  // This header will be duplicated during sending
+  hTxULUE(newUDPheader)->type=fs6UlConfig;
+  hTxULUE(newUDPheader)->UE_id=curUE;
+  int harq_pid;
+
+  if (ulsch->ue_type > NOCE)
+    // LTE-M case
+    harq_pid = 0;
+  else
+    harq_pid = subframe2harq_pid(fp, frame, subframe);
+
+  LTE_UL_eNB_HARQ_t *ulsch_harq=ulsch->harq_processes[harq_pid];
+  hTxULUE(newUDPheader)->harq_pid=harq_pid;
+  cpyToDu(ue_type);
+  cpyToDu(harq_mask);
+  cpyToDu(Mlimit);
+  cpyToDu(max_turbo_iterations);
+  cpyToDu(bundling);
+  cpyToDu(beta_offset_cqi_times8);
+  cpyToDu(beta_offset_ri_times8);
+  cpyToDu(beta_offset_harqack_times8);
+  cpyToDu(Msg3_active);
+  cpyToDu(cyclicShift);
+  cpyToDu(cooperation_flag);
+  cpyToDu(num_active_cba_groups);
+  memcpy(hTxULUE(newUDPheader)->cba_rnti,ulsch->cba_rnti,sizeof(ulsch->cba_rnti));//NUM_MAX_CBA_GROUP];
+  cpyToDu(rnti);
+  cpyToDuHarq(nb_rb);
+  cpyToDuHarq(Msc_initial);
+  cpyToDuHarq(Nsymb_initial);
+  cpyToDuHarq(O_RI);
+  cpyToDuHarq(Or1);
+  cpyToDuHarq(first_rb);
+  cpyToDuHarq(V_UL_DAI);
+  cpyToDuHarq(Qm);
+  cpyToDuHarq(srs_active);
+  cpyToDuHarq(TBS);
+  cpyToDuHarq(Nsymb_pusch);
+  memcpyToDuHarq(dci_alloc);
+  memcpyToDuHarq(rar_alloc);
+  memcpyToDuHarq(status);
+  memcpyToDuHarq(Msg3_flag);
+  memcpyToDuHarq(phich_active);
+  memcpyToDuHarq(phich_ACK);
+  memcpyToDuHarq(previous_first_rb);
+  memcpyToDuHarq(B);
+  memcpyToDuHarq(G);
+  //memcpyToDuHarq(o);
+  memcpyToDuHarq(uci_format);
+  memcpyToDuHarq(Or2);
+  memcpyToDuHarq(o_RI);
+  memcpyToDuHarq(o_ACK);
+  memcpyToDuHarq(O_ACK);
+  //memcpyToDuHarq(q);
+  memcpyToDuHarq(o_RCC);
+  memcpyToDuHarq(q_ACK);
+  memcpyToDuHarq(q_RI);
+  memcpyToDuHarq(RTC);
+  memcpyToDuHarq(ndi);
+  memcpyToDuHarq(round);
+  memcpyToDuHarq(rvidx);
+  memcpyToDuHarq(Nl);
+  memcpyToDuHarq(n_DMRS);
+  memcpyToDuHarq(previous_n_DMRS);
+  memcpyToDuHarq(n_DMRS2);
+  memcpyToDuHarq(delta_TF);
+  memcpyToDuHarq(repetition_number );
+  memcpyToDuHarq(total_number_of_repetitions);
+  LOG_D(PHY,"Added request to perform ulsch for: rnti:%x, fsf: %d/%d\n", ulsch->rnti, frame, subframe);
+}
+
+void appendFs6DLUE(uint8_t *bufferZone, LTE_DL_FRAME_PARMS *fp, int UE_id, int8_t harq_pid, LTE_eNB_DLSCH_t *dlsch0, LTE_DL_eNB_HARQ_t *harqData, int frame, int subframe) {
+  commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone;
+  // move to the end
+  uint8_t *firstFreeByte=bufferZone;
+  int curBlock=0;
+
+  for (int i=0; i < FirstUDPheader->nbBlocks; i++) {
+    AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,"");
+    firstFreeByte+=alignedSize(firstFreeByte);
+    curBlock++;
+  }
+
+  int UEdataLen= get_G(fp,
+                       harqData->nb_rb,
+                       harqData->rb_alloc,
+                       harqData->Qm,
+                       harqData->Nl,
+                       harqData->pdsch_start,
+                       frame,subframe,
+                       0);
+  AssertFatal(firstFreeByte+ceil16_bytes(UEdataLen)+sizeof(fs6_dl_t) <= bufferZone+FS6_BUF_SIZE, "");
+  commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte;
+  FirstUDPheader->nbBlocks++;
+  newUDPheader->blockID=curBlock;
+  newUDPheader->contentBytes=sizeof(fs6_dl_t)+sizeof(fs6_dl_uespec_t) + ceil16_bytes(UEdataLen);
+  // We skip the fs6 DL header, that is populated by caller
+  // This header will be duplicated during sending
+  hDLUE(newUDPheader)->type=fs6DlConfig;
+  hDLUE(newUDPheader)->UE_id=UE_id;
+  hDLUE(newUDPheader)->harq_pid=harq_pid;
+  hDLUE(newUDPheader)->rnti=dlsch0->rnti;
+  hDLUE(newUDPheader)->sqrt_rho_a=dlsch0->sqrt_rho_a;
+  hDLUE(newUDPheader)->sqrt_rho_b=dlsch0->sqrt_rho_b;
+  hDLUE(newUDPheader)->nb_rb=harqData->nb_rb;
+  memcpy(hDLUE(newUDPheader)->rb_alloc, harqData->rb_alloc, sizeof(harqData->rb_alloc));
+  hDLUE(newUDPheader)->Qm=harqData->Qm;
+  hDLUE(newUDPheader)->Nl=harqData->Nl;
+  hDLUE(newUDPheader)->pdsch_start=harqData->pdsch_start;
+#ifdef PHY_TX_THREAD
+  hDLUE(newUDPheader)->CEmode=harqData->CEmode;
+  hDLUE(newUDPheader)->i0=harqData->i0;
+  hDLUE(newUDPheader)->sib1_br_flag=harqData->sib1_br_flag;
+#else
+  hDLUE(newUDPheader)->i0=dlsch0->i0;
+  hDLUE(newUDPheader)->sib1_br_flag=dlsch0->sib1_br_flag;
+#endif
+  hDLUE(newUDPheader)->dataLen=UEdataLen;
+  fs6Dlpack(hDLUE(newUDPheader)+1, harqData->eDL, UEdataLen);
+  LOG_D(PHY,"sending %d bits, in harq id: %di fsf: %d.%d, sum %d\n",
+        UEdataLen, harq_pid, frame, subframe, sum(harqData->eDL, UEdataLen));
+  //for (int i=0; i < UEdataLen; i++)
+  //LOG_D(PHY,"buffer ei[%d]:%hhx\n", i, ( (uint8_t *)(hDLUE(newUDPheader)+1) )[i]);
+}
+
+void appendFs6DLUEcch(uint8_t *bufferZone, PHY_VARS_eNB *eNB, int frame, int subframe) {
+  commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone;
+  // move to the end
+  uint8_t *firstFreeByte=bufferZone;
+  int curBlock=0;
+
+  for (int i=0; i < FirstUDPheader->nbBlocks; i++) {
+    AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,"");
+    firstFreeByte+=alignedSize(firstFreeByte);
+    curBlock++;
+  }
+
+  commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte;
+  bool first_UE=true;
+
+  for (int i = 0; i < NUMBER_OF_UCI_VARS_MAX; i++) {
+    LTE_eNB_UCI *uci = &(eNB->uci_vars[i]);
+
+    if ((uci->active == 1) && (uci->frame == frame) && (uci->subframe == subframe)) {
+      LOG_D(PHY,"Frame %d, subframe %d: adding uci procedures (type %d) for %d \n",
+            frame,
+            subframe,
+            uci->type,
+            i);
+
+      if ( first_UE ) {
+        FirstUDPheader->nbBlocks++;
+        newUDPheader->blockID=curBlock;
+        newUDPheader->contentBytes=sizeof(fs6_dl_t)+sizeof(fs6_dl_uespec_ulcch_t);
+        hTxULcch(newUDPheader)->type=fs6ULConfigCCH;
+        hTxULcch(newUDPheader)->nb_active_ue=0;
+        first_UE=false;
+      }
+
+      fs6_dl_uespec_ulcch_element_t *tmp=(fs6_dl_uespec_ulcch_element_t *)(hTxULcch(newUDPheader)+1);
+      tmp+=hTxULcch(newUDPheader)->nb_active_ue;
+      tmp->UE_id=i;
+      memcpy(&tmp->cch_vars,uci, sizeof(tmp->cch_vars));
+      hTxULcch(newUDPheader)->nb_active_ue++;
+      newUDPheader->contentBytes+=sizeof(fs6_dl_uespec_ulcch_element_t);
+    }
+  }
+}
+
+void phy_procedures_eNB_TX_tosplit(uint8_t *bufferZone, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int do_meas, uint8_t *buf, int bufSize) {
+  int frame=proc->frame_tx;
+  int subframe=proc->subframe_tx;
+  LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms;
+
+  if ((fp->frame_type == TDD) && (subframe_select (fp, subframe) == SF_UL)) {
+    LOG_W(HW,"no sending in eNB_TX\n");
+    return;
+  }
+
+  // clear previous allocation information for all UEs
+  for (int i = 0; i < NUMBER_OF_UE_MAX; i++) {
+    //if (eNB->dlsch[i][0])
+    //eNB->dlsch[i][0]->subframe_tx[subframe] = 0;
+  }
+
+  // Send to DU the UL scheduled for future UL subframe
+  for (int i=0; i<NUMBER_OF_UE_MAX; i++) {
+    int harq_pid;
+    LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[i];
+    if (ulsch == NULL)
+      continue;
+
+    if (ulsch->ue_type > NOCE)
+      harq_pid = 0;
+    else
+      harq_pid= subframe2harq_pid(&eNB->frame_parms,frame,subframe);
+
+    LTE_UL_eNB_HARQ_t *ulsch_harq = ulsch->harq_processes[harq_pid];
+
+    if (ulsch->rnti>0) {
+      LOG_D(PHY,"check in UL scheduled harq %d: rnti %d, tx frame %d/%d, ulsch: %d, %d/%d (handled: %d)\n",
+            harq_pid, ulsch->rnti, frame, subframe, ulsch_harq->status, ulsch_harq->frame, ulsch_harq->subframe, ulsch_harq->handled);
+    }
+
+    for (int k=0; k<8; k++) {
+      ulsch_harq = ulsch->harq_processes[k];
+      if (ulsch_harq == NULL)
+        continue;
+
+      if ((ulsch->rnti>0) &&
+          (ulsch_harq->status == ACTIVE) &&
+          (ulsch_harq->frame == frame) &&
+          (ulsch_harq->subframe == subframe) &&
+          (ulsch_harq->handled == 0)
+         )
+        appendFs6TxULUE(bufferZone,
+                        fp,
+                        i,
+                        ulsch,
+                        frame,
+                        subframe
+                       );
+    }
+  }
+
+  appendFs6DLUEcch(bufferZone,
+                   eNB,
+                   frame,
+                   subframe
+                  );
+  uint8_t num_pdcch_symbols = eNB->pdcch_vars[subframe&1].num_pdcch_symbols;
+  uint8_t num_dci           = eNB->pdcch_vars[subframe&1].num_dci;
+  uint8_t num_mdci          = eNB->mpdcch_vars[subframe&1].num_dci;
+  memcpy(hDL(bufferZone)->pbch_pdu,eNB->pbch_pdu,4);
+
+  if ( num_dci <= 8 )
+    LOG_D(PHY,"num_pdcch_symbols %"PRIu8",number dci %"PRIu8"\n",num_pdcch_symbols, num_dci);
+  else {
+    LOG_E(PHY, "Num dci too large for current FS6 implementation, reducing to 8 dci (was %d)\n",  num_dci);
+    num_dci=8;
+  }
+
+  if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) {
+    hDL(bufferZone)->num_pdcch_symbols=num_pdcch_symbols;
+    hDL(bufferZone)->num_dci=num_dci;
+    hDL(bufferZone)->num_mdci=num_mdci;
+    hDL(bufferZone)->amp=AMP;
+
+    for (int i=0; i< hDL(bufferZone)->num_dci; i++)
+      hDL(bufferZone)->dci_alloc[i]=eNB->pdcch_vars[subframe&1].dci_alloc[i];
+
+    LOG_D(PHY, "pbch configured: %d\n", eNB->pbch_configured);
+  }
+
+  if (do_meas==1) stop_meas(&eNB->dlsch_common_and_dci);
+
+  if (do_meas==1) start_meas(&eNB->dlsch_ue_specific);
+
+  for (int UE_id=0; UE_id<NUMBER_OF_UE_MAX; UE_id++) {
+    LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[UE_id][0];
+
+    if ((dlsch0)&&(dlsch0->rnti>0)&&
+#ifdef PHY_TX_THREAD
+        (dlsch0->active[subframe] == 1)
+#else
+        (dlsch0->active == 1)
+#endif
+       ) {
+      // get harq_pid
+      int harq_pid = dlsch0->harq_ids[frame%2][subframe];
+      AssertFatal(harq_pid>=0,"harq_pid is negative\n");
+
+      if (harq_pid>=8) {
+        if (dlsch0->ue_type == NOCE)
+          LOG_E(PHY,"harq_pid:%d corrupt must be 0-7 UE_id:%d frame:%d subframe:%d rnti:%x [ %1d.%1d.%1d.%1d.%1d.%1d.%1d.%1d\n", harq_pid,UE_id,frame,subframe,dlsch0->rnti,
+                dlsch0->harq_ids[frame%2][0],
+                dlsch0->harq_ids[frame%2][1],
+                dlsch0->harq_ids[frame%2][2],
+                dlsch0->harq_ids[frame%2][3],
+                dlsch0->harq_ids[frame%2][4],
+                dlsch0->harq_ids[frame%2][5],
+                dlsch0->harq_ids[frame%2][6],
+                dlsch0->harq_ids[frame%2][7]);
+      } else {
+        if (dlsch_procedures(eNB,
+                             proc,
+                             harq_pid,
+                             dlsch0,
+                             &eNB->UE_stats[(uint32_t)UE_id])) {
+          // data in: dlsch0 harq_processes[harq_pid]->e
+          /* length
+             get_G(fp,
+             dlsch_harq->nb_rb,
+             dlsch_harq->rb_alloc,
+             dlsch_harq->Qm,
+             dlsch_harq->Nl,
+             dlsch_harq->pdsch_start,
+             frame,subframe,
+             0)
+             need harq_pid
+          */
+          LTE_DL_eNB_HARQ_t *dlsch_harq=dlsch0->harq_processes[harq_pid];
+          appendFs6DLUE(bufferZone,
+                        fp,
+                        UE_id,
+                        harq_pid,
+                        dlsch0,
+                        dlsch_harq,
+                        frame,
+                        subframe
+                       );
+        }
+      }
+    } else if ((dlsch0)&&(dlsch0->rnti>0)&&
+#ifdef PHY_TX_THREAD
+               (dlsch0->active[subframe] == 0)
+#else
+               (dlsch0->active == 0)
+#endif
+              ) {
+      // clear subframe TX flag since UE is not scheduled for PDSCH in this subframe (so that we don't look for PUCCH later)
+      //dlsch0->subframe_tx[subframe]=0;
+    }
+  }
+
+  hDL(bufferZone)->phich_vars=eNB->phich_vars[subframe&1];
+
+  if (do_meas==1) stop_meas(&eNB->dlsch_ue_specific);
+
+  if (do_meas==1) stop_meas(&eNB->phy_proc_tx);
+
+  // MBMS is not working in OAI
+  if (hDL(bufferZone)->num_mdci) abort();
+
+  return;
+}
+
+void *DL_du_fs6(void *arg) {
+  RU_t *ru=(RU_t *)arg;
+  static uint64_t lastTS;
+  L1_rxtx_proc_t L1proc= {0};
+  // We pick the global thread pool from the legacy code global vars
+  L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool;
+  L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode;
+  L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode;
+  initStaticTime(begingWait);
+  initStaticTime(begingProcessing);
+  initRefTimes(fullLoop);
+  initRefTimes(DuHigh);
+  initRefTimes(DuLow);
+  initRefTimes(transportTime);
+
+  while (1) {
+    for (int i=0; i<ru->num_eNB; i++) {
+      initBufferZone(bufferZone);
+      pickStaticTime(begingWait);
+      int nb_blocks=receiveSubFrame(&sockFS6, bufferZone, sizeof(bufferZone), CTsentCUv0 );
+      updateTimesReset(begingWait, &fullLoop, 1000, false, "DU wait CU");
+
+      if (nb_blocks > 0) {
+        if ( lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti < hUDP(bufferZone)->timestamp) {
+          LOG_E(HW,"Missed a subframe: expecting: %lu, received %lu\n",
+                lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti,
+                hUDP(bufferZone)->timestamp);
+        } else if ( lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti > hUDP(bufferZone)->timestamp) {
+          LOG_E(HW,"Received a subframe in past time from CU (dropping it): expecting: %lu, received %lu\n",
+                lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti,
+                hUDP(bufferZone)->timestamp);
+        }
+
+        pickStaticTime(begingProcessing);
+        lastTS=hUDP(bufferZone)->timestamp;
+        setAllfromTS(hUDP(bufferZone)->timestamp - sf_ahead*ru->eNB_list[i]->frame_parms.samples_per_tti, &L1proc);
+        measTransportTime(hDL(bufferZone)->DuClock, hDL(bufferZone)->CuSpentMicroSec,
+                          &transportTime, 1000, false, "Transport time, to CU + from CU for one subframe");
+        phy_procedures_eNB_TX_fromsplit( bufferZone, nb_blocks, ru->eNB_list[i], &L1proc, 1);
+        updateTimesReset(begingProcessing, &DuHigh, 1000, false, "DU high layer1 processing for DL");
+      } else
+        LOG_E(PHY,"DL not received for subframe\n");
+    }
+
+    pickStaticTime(begingProcessing);
+    feptx_prec(ru, L1proc.frame_tx,L1proc.subframe_tx );
+    feptx_ofdm(ru, L1proc.frame_tx,L1proc.subframe_tx );
+    ocp_tx_rf(ru, &L1proc);
+    updateTimesReset(begingProcessing, &DuLow, 1000, false, "DU low layer1 processing for DL");
+
+    if ( IS_SOFTMODEM_RFSIM )
+      return NULL;
+  }
+
+  return NULL;
+}
+
+void UL_du_fs6(RU_t *ru, L1_rxtx_proc_t *proc) {
+  initStaticTime(begingWait);
+  initRefTimes(fullLoop);
+  pickStaticTime(begingWait);
+  rx_rf(ru, proc);
+  updateTimesReset(begingWait, &fullLoop, 1000, false, "DU wait USRP");
+  // front end processing: convert from time domain to frequency domain
+  // fills rxdataF buffer
+  fep_full(ru, proc->subframe_rx);
+  // Fixme: datamodel issue
+  PHY_VARS_eNB *eNB = RC.eNB[0][0];
+
+  if (NFAPI_MODE==NFAPI_MODE_PNF) {
+    // I am a PNF and I need to let nFAPI know that we have a (sub)frame tick
+    //add_subframe(&frame, &subframe, 4);
+    //oai_subframe_ind(proc->frame_tx, proc->subframe_tx);
+    oai_subframe_ind(proc->frame_rx, proc->subframe_rx);
+  }
+
+  initBufferZone(bufferZone);
+  hUDP(bufferZone)->timestamp=proc->timestamp_rx;
+  prach_eNB_tosplit(bufferZone, FS6_BUF_SIZE, eNB, proc );
+
+  if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) {
+    phy_procedures_eNB_uespec_RX_tosplit(bufferZone, FS6_BUF_SIZE, eNB, proc );
+  }
+
+  if (hUDP(bufferZone)->nbBlocks==0) {
+    hUDP(bufferZone)->nbBlocks=1; // We have to send the signaling, even is there is no user plan data (no UE)
+    hUDP(bufferZone)->blockID=0;
+    hUDP(bufferZone)->contentBytes=sizeof(fs6_ul_t);
+  }
+
+  for (int i=0; i<ru->num_eNB; i++) {
+    sendSubFrame(&sockFS6, bufferZone, sizeof(fs6_ul_t), CTsentDUv0);
+  }
+}
+
+void DL_cu_fs6(RU_t *ru, L1_rxtx_proc_t *proc, uint64_t  DuClock, uint64_t startCycle) {
+  initRefTimes(CUprocessing);
+  // Fixme: datamodel issue
+  PHY_VARS_eNB *eNB = RC.eNB[0][0];
+  pthread_mutex_lock(&eNB->UL_INFO_mutex);
+  eNB->UL_INFO.frame     = proc->frame_rx;
+  eNB->UL_INFO.subframe  = proc->subframe_rx;
+  eNB->UL_INFO.module_id = eNB->Mod_id;
+  eNB->UL_INFO.CC_id     = eNB->CC_id;
+  eNB->if_inst->UL_indication(&eNB->UL_INFO, proc);
+  pthread_mutex_unlock(&eNB->UL_INFO_mutex);
+  initBufferZone(bufferZone);
+  phy_procedures_eNB_TX_tosplit(bufferZone, eNB, proc, 1, bufferZone, FS6_BUF_SIZE);
+  hUDP(bufferZone)->timestamp=proc->timestamp_tx;
+
+  if (hUDP(bufferZone)->nbBlocks==0) {
+    hUDP(bufferZone)->nbBlocks=1; // We have to send the signaling, even is there is no user plan data (no UE)
+    hUDP(bufferZone)->blockID=0;
+    hUDP(bufferZone)->contentBytes=sizeof(fs6_dl_t);
+  }
+
+  hDL(bufferZone)->DuClock=DuClock;
+  hDL(bufferZone)->CuSpentMicroSec=(rdtsc()-startCycle)/(cpuf*1000);
+  updateTimesReset(startCycle, &CUprocessing, 1000,  true,"CU entire processing from recv to send");
+  sendSubFrame(&sockFS6, bufferZone, sizeof(fs6_dl_t), CTsentCUv0 );
+  return;
+}
+
+void UL_cu_fs6(RU_t *ru, L1_rxtx_proc_t *proc, uint64_t *TS, uint64_t *DuClock, uint64_t *startProcessing) {
+  initBufferZone(bufferZone);
+  initStaticTime(begingWait);
+  initRefTimes(fullLoop);
+  pickStaticTime(begingWait);
+  int nb_blocks=receiveSubFrame(&sockFS6, bufferZone, sizeof(bufferZone), CTsentDUv0 );
+  * DuClock=hUDP(bufferZone)->senderClock;
+  * startProcessing=rdtsc();
+  updateTimesReset(begingWait, &fullLoop, 1000, false, "CU wait DU");
+
+  if (nb_blocks ==0) {
+    LOG_W(PHY, "CU lost a subframe\n");
+    return;
+  }
+
+  if (nb_blocks != hUDP(bufferZone)->nbBlocks )
+    LOG_W(PHY, "received %d blocks for %d expected\n", nb_blocks, hUDP(bufferZone)->nbBlocks);
+
+  if ( *TS != hUDP(bufferZone)->timestamp ) {
+    LOG_W(HW, "CU received time: %lu instead of %lu expected\n", hUDP(bufferZone)->timestamp, *TS);
+    *TS=hUDP(bufferZone)->timestamp;
+  }
+
+  setAllfromTS(hUDP(bufferZone)->timestamp, proc);
+  PHY_VARS_eNB *eNB = RC.eNB[0][0];
+
+  if (is_prach_subframe(&eNB->frame_parms, proc->frame_prach,proc->subframe_prach)>0)
+    prach_eNB_fromsplit(bufferZone, sizeof(bufferZone), eNB, proc);
+
+  release_UE_in_freeList(eNB->Mod_id);
+
+  if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) {
+    phy_procedures_eNB_uespec_RX_fromsplit(bufferZone, nb_blocks, eNB, proc);
+  }
+}
+
+void *cu_fs6(void *arg) {
+  setbuf(stdout, NULL);
+  setbuf(stderr, NULL);
+  RU_t               *ru      = (RU_t *)arg;
+  //RU_proc_t          *proc    = &ru->proc;
+  fill_rf_config(ru,ru->rf_config_file);
+  init_frame_parms(ru->frame_parms,1);
+  phy_init_RU(ru);
+  wait_sync("ru_thread");
+  char remoteIP[1024];
+  strncpy(remoteIP,get_softmodem_params()->split73+3, 1023); //three first char should be cu: or du:
+  char port_def[256]=DU_PORT;
+
+  for (int i=0; i <1000; i++)
+    if (remoteIP[i]==':') {
+      strncpy(port_def,remoteIP+i+1,255);
+      remoteIP[i]=0;
+      break;
+    }
+    
+  AssertFatal(createUDPsock(NULL, CU_PORT, remoteIP, port_def, &sockFS6), "");
+  L1_rxtx_proc_t L1proc= {0};
+  // We pick the global thread pool from the legacy code global vars
+  L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool;
+  L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode;
+  L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode;
+  uint64_t timeStamp=0;
+  initStaticTime(begingWait);
+  initStaticTime(begingWait2);
+  initRefTimes(waitDUAndProcessingUL);
+  initRefTimes(makeSendDL);
+  initRefTimes(fullLoop);
+  uint64_t DuClock=0, startProcessing=0;
+
+  while(1) {
+    timeStamp+=ru->frame_parms->samples_per_tti;
+    updateTimesReset(begingWait, &fullLoop, 1000, true, "CU for full SubFrame (must be less 1ms)");
+    pickStaticTime(begingWait);
+    UL_cu_fs6(ru, &L1proc, &timeStamp, &DuClock, &startProcessing);
+    updateTimesReset(begingWait, &waitDUAndProcessingUL, 1000,  true,"CU Time in wait Rx + Ul processing");
+    pickStaticTime(begingWait2);
+    DL_cu_fs6(ru, &L1proc, DuClock, startProcessing);
+    updateTimesReset(begingWait2, &makeSendDL, 1000,  true,"CU Time in DL build+send");
+  }
+
+  return NULL;
+}
+
+void *du_fs6(void *arg) {
+  setbuf(stdout, NULL);
+  setbuf(stderr, NULL);
+  RU_t               *ru      = (RU_t *)arg;
+  //RU_proc_t          *proc    = &ru->proc;
+  fill_rf_config(ru,ru->rf_config_file);
+  init_frame_parms(ru->frame_parms,1);
+  phy_init_RU(ru);
+  init_rf(ru);
+  wait_sync("ru_thread");
+  char remoteIP[1024];
+  strncpy(remoteIP,get_softmodem_params()->split73+3,1023); //three first char should be cu: or du:
+  char port_def[256]=CU_PORT;
+
+  for (int i=0; i <1000; i++)
+    if (remoteIP[i]==':') {
+      strncpy(port_def,remoteIP+i+1,255);
+      remoteIP[i]=0;
+      break;
+    }
+
+  AssertFatal(createUDPsock(NULL, DU_PORT, remoteIP, port_def, &sockFS6), "");
+
+  if (ru->rfdevice.trx_start_func(&ru->rfdevice) != 0)
+    LOG_E(HW,"Could not start the RF device\n");
+  else
+    LOG_I(PHY,"RU %d rf device ready\n",ru->idx);
+
+  initStaticTime(begingWait);
+  initRefTimes(waitRxAndProcessingUL);
+  initRefTimes(fullLoop);
+  pthread_t t;
+
+  if ( !IS_SOFTMODEM_RFSIM )
+    threadCreate(&t, DL_du_fs6, (void *)ru, "MainDuTx", -1, OAI_PRIORITY_RT_MAX);
+
+  L1_rxtx_proc_t L1proc= {0};
+  // We pick the global thread pool from the legacy code global vars
+  L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool;
+  L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode;
+  L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode;
+
+  while(!oai_exit) {
+    updateTimesReset(begingWait, &fullLoop, 1000,  true,"DU for full SubFrame (must be less 1ms)");
+    pickStaticTime(begingWait);
+    UL_du_fs6(ru, &L1proc);
+
+    if ( IS_SOFTMODEM_RFSIM )
+      DL_du_fs6((void *)ru);
+
+    updateTimesReset(begingWait, &waitRxAndProcessingUL, 1000,  true,"DU Time in wait Rx + Ul processing");
+  }
+
+  ru->rfdevice.trx_end_func(&ru->rfdevice);
+  LOG_I(PHY,"RU %d rf device stopped\n",ru->idx);
+  return NULL;
+}
diff --git a/executables/main-ocp.c b/executables/main-ocp.c
new file mode 100644
index 0000000000000000000000000000000000000000..d2ee9418dcaa2efb4059324a1e177d25f27967fe
--- /dev/null
+++ b/executables/main-ocp.c
@@ -0,0 +1,1401 @@
+/*
+* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The OpenAirInterface Software Alliance licenses this file to You under
+* the OAI Public License, Version 1.1  (the "License"); you may not use this file
+* except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.openairinterface.org/?page_id=698
+*
+* Author and copyright: Laurent Thomas, open-cells.com
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*-------------------------------------------------------------------------------
+* For more information about the OpenAirInterface (OAI) Software Alliance:
+*      contact@openairinterface.org
+*/
+
+
+/*
+ * This file replaces
+ * targets/RT/USER/lte-softmodem.c
+ * targets/RT/USER/rt_wrapper.c
+ * targets/RT/USER/lte-ru.c
+ * targets/RT/USER/lte-enb.c
+ * targets/RT/USER/ru_control.c
+ * openair1/SCHED/prach_procedures.c
+ * The merger of OpenAir central code to this branch
+ * should check if these 3 files are modified and analyze if code code has to be copied in here
+ */
+#define  _GNU_SOURCE
+#include <pthread.h>
+
+#include <common/utils/LOG/log.h>
+#include <common/utils/system.h>
+#include <common/utils/assertions.h>
+static int DEFBANDS[] = {7};
+static int DEFENBS[] = {0};
+#include <common/config/config_userapi.h>
+#include <targets/RT/USER/lte-softmodem.h>
+#include <openair1/PHY/defs_eNB.h>
+#include <openair1/PHY/phy_extern.h>
+#include <nfapi/oai_integration/vendor_ext.h>
+#include <openair1/SCHED/fapi_l1.h>
+#include <openair1/PHY/INIT/phy_init.h>
+#include <openair2/LAYER2/MAC/mac_extern.h>
+#include <openair1/PHY/LTE_REFSIG/lte_refsig.h>
+#include <nfapi/oai_integration/nfapi_pnf.h>
+#include <executables/split_headers.h>
+#include <common/utils/threadPool/thread-pool.h>
+#include <openair2/ENB_APP/NB_IoT_interface.h>
+#include <common/utils/load_module_shlib.h>
+#include <targets/COMMON/create_tasks.h>
+#include <openair1/PHY/TOOLS/phy_scope_interface.h>
+#include <openair2/UTIL/OPT/opt.h>
+#include <openair1/SIMULATION/TOOLS/sim.h>
+#include <openair1/PHY/phy_vars.h>
+#include <openair1/SCHED/sched_common_vars.h>
+#include <openair2/LAYER2/MAC/mac_vars.h>
+#include <openair2/RRC/LTE/rrc_vars.h>
+
+pthread_cond_t nfapi_sync_cond;
+pthread_mutex_t nfapi_sync_mutex;
+int nfapi_sync_var=-1; //!< protected by mutex \ref nfapi_sync_mutex
+pthread_cond_t sync_cond;
+pthread_mutex_t sync_mutex;
+int sync_var=-1; //!< protected by mutex \ref sync_mutex.
+int config_sync_var=-1;
+volatile int oai_exit = 0;
+double cpuf;
+uint16_t sf_ahead=4;
+int otg_enabled;
+uint64_t  downlink_frequency[MAX_NUM_CCs][4];
+int32_t   uplink_frequency_offset[MAX_NUM_CCs][4];
+int split73;
+char * split73_config;
+int split73;
+
+static void *ru_thread( void *param );
+void kill_RU_proc(RU_t *ru) {
+}
+void kill_eNB_proc(int inst) {
+}
+void free_transport(PHY_VARS_eNB *eNB) {
+}
+void reset_opp_meas(void) {
+}
+extern void  phy_free_RU(RU_t *);
+
+void exit_function(const char *file, const char *function, const int line, const char *s) {
+  if (s != NULL) {
+    printf("%s:%d %s() Exiting OAI softmodem: %s\n",file,line, function, s);
+  }
+
+  close_log_mem();
+  oai_exit = 1;
+
+  if (RC.ru == NULL)
+    exit(-1); // likely init not completed, prevent crash or hang, exit now...
+
+  for (int ru_id=0; ru_id<RC.nb_RU; ru_id++) {
+    if (RC.ru[ru_id] && RC.ru[ru_id]->rfdevice.trx_end_func) {
+      RC.ru[ru_id]->rfdevice.trx_end_func(&RC.ru[ru_id]->rfdevice);
+      RC.ru[ru_id]->rfdevice.trx_end_func = NULL;
+    }
+
+    if (RC.ru[ru_id] && RC.ru[ru_id]->ifdevice.trx_end_func) {
+      RC.ru[ru_id]->ifdevice.trx_end_func(&RC.ru[ru_id]->ifdevice);
+      RC.ru[ru_id]->ifdevice.trx_end_func = NULL;
+    }
+  }
+
+  sleep(1); //allow lte-softmodem threads to exit first
+  exit(1);
+}
+
+// Fixme: there are many mistakes in the datamodel and in redondant variables
+// TDD is also mode complex
+void setAllfromTS(uint64_t TS, L1_rxtx_proc_t *proc) {
+  for (int i=0; i < RC.nb_inst; i++) {
+    for (int j=0; j<RC.nb_CC[i]; j++) {
+      LTE_DL_FRAME_PARMS *fp=&RC.eNB[i][j]->frame_parms;
+      uint64_t TStx=TS+(sf_ahead)*fp->samples_per_tti;
+      uint64_t TSrach=TS;//-fp->samples_per_tti;
+      proc->timestamp_rx=  TS;
+      proc->timestamp_tx=  TStx;
+      proc->subframe_rx=   (TS    / fp->samples_per_tti)%10;
+      proc->subframe_prach=(TSrach    / fp->samples_per_tti)%10;
+      proc->subframe_prach_br=(TSrach / fp->samples_per_tti)%10;
+      proc->frame_rx=      (TS    / (fp->samples_per_tti*10))&1023;
+      proc->frame_prach=   (TSrach    / (fp->samples_per_tti*10))&1023;
+      proc->frame_prach_br=(TSrach    / (fp->samples_per_tti*10))&1023;
+      proc->frame_tx=      (TStx  / (fp->samples_per_tti*10))&1023;
+      proc->subframe_tx=  (TStx  / fp->samples_per_tti)%10;
+    }
+  }
+
+  return;
+}
+
+void init_RU_proc(RU_t *ru) {
+  pthread_t t;
+
+  switch(split73) {
+  case SPLIT73_CU:
+    threadCreate(&t, cu_fs6, (void *)ru, "MainCu", -1, OAI_PRIORITY_RT_MAX);
+    break;
+  case SPLIT73_DU:
+    threadCreate(&t, du_fs6, (void *)ru, "MainDuRx", -1, OAI_PRIORITY_RT_MAX);
+    break;
+  default:
+    threadCreate(&t,  ru_thread, (void *)ru, "MainRu", -1, OAI_PRIORITY_RT_MAX);
+  }
+}
+
+// Create per UE structures
+void init_transport(PHY_VARS_eNB *eNB) {
+  LTE_DL_FRAME_PARMS *fp = &eNB->frame_parms;
+  LOG_I(PHY, "Initialise transport\n");
+
+  for (int i=0; i<NUMBER_OF_UE_MAX; i++) {
+    LOG_D(PHY,"Allocating Transport Channel Buffers for DLSCH, UE %d\n",i);
+
+    for (int j=0; j<2; j++) {
+      AssertFatal( (eNB->dlsch[i][j] = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL,0,fp)) != NULL,
+                   "Can't get eNB dlsch structures for UE %d \n", i);
+      eNB->dlsch[i][j]->rnti=0;
+      LOG_D(PHY,"dlsch[%d][%d] => %p rnti:%d\n",i,j,eNB->dlsch[i][j], eNB->dlsch[i][j]->rnti);
+    }
+
+    LOG_D(PHY,"Allocating Transport Channel Buffer for ULSCH, UE %d\n",i);
+    AssertFatal((eNB->ulsch[1+i] = new_eNB_ulsch(MAX_TURBO_ITERATIONS,fp->N_RB_UL, 0)) != NULL,
+                "Can't get eNB ulsch structures\n");
+    // this is the transmission mode for the signalling channels
+    // this will be overwritten with the real transmission mode by the RRC once the UE is connected
+    eNB->transmission_mode[i] = fp->nb_antenna_ports_eNB==1 ? 1 : 2;
+  }
+
+  // ULSCH for RA
+  AssertFatal( (eNB->ulsch[0] = new_eNB_ulsch(MAX_TURBO_ITERATIONS, fp->N_RB_UL, 0)) !=NULL,
+               "Can't get eNB ulsch structures\n");
+  eNB->dlsch_SI  = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL, 0, fp);
+  LOG_D(PHY,"eNB %d.%d : SI %p\n",eNB->Mod_id,eNB->CC_id,eNB->dlsch_SI);
+  eNB->dlsch_ra  = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL, 0, fp);
+  LOG_D(PHY,"eNB %d.%d : RA %p\n",eNB->Mod_id,eNB->CC_id,eNB->dlsch_ra);
+  eNB->dlsch_MCH = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL, 0, fp);
+  LOG_D(PHY,"eNB %d.%d : MCH %p\n",eNB->Mod_id,eNB->CC_id,eNB->dlsch_MCH);
+  eNB->rx_total_gain_dB=130;
+
+  for(int i=0; i<NUMBER_OF_UE_MAX; i++)
+    eNB->mu_mimo_mode[i].dl_pow_off = 2;
+
+  eNB->check_for_total_transmissions = 0;
+  eNB->check_for_MUMIMO_transmissions = 0;
+  eNB->FULL_MUMIMO_transmissions = 0;
+  eNB->check_for_SUMIMO_transmissions = 0;
+  fp->pucch_config_common.deltaPUCCH_Shift = 1;
+}
+
+void init_eNB_afterRU(void) {
+  for (int inst=0; inst<RC.nb_inst; inst++) {
+    for (int CC_id=0; CC_id<RC.nb_CC[inst]; CC_id++) {
+      PHY_VARS_eNB *eNB = RC.eNB[inst][CC_id];
+      phy_init_lte_eNB(eNB,0,0);
+      eNB->frame_parms.nb_antennas_rx       = 0;
+      eNB->frame_parms.nb_antennas_tx       = 0;
+      eNB->prach_vars.rxsigF[0] = (int16_t **)malloc16(64*sizeof(int16_t *));
+
+      for (int ce_level=0; ce_level<4; ce_level++) {
+        eNB->prach_vars_br.rxsigF[ce_level] = (int16_t **)malloc16(64*sizeof(int16_t *));
+      }
+
+      for (int ru_id=0,aa=0; ru_id<eNB->num_RU; ru_id++) {
+        eNB->frame_parms.nb_antennas_rx    += eNB->RU_list[ru_id]->nb_rx;
+        eNB->frame_parms.nb_antennas_tx    += eNB->RU_list[ru_id]->nb_tx;
+        AssertFatal(eNB->RU_list[ru_id]->common.rxdataF!=NULL,
+                    "RU %d : common.rxdataF is NULL\n",
+                    eNB->RU_list[ru_id]->idx);
+        AssertFatal(eNB->RU_list[ru_id]->prach_rxsigF!=NULL,
+                    "RU %d : prach_rxsigF is NULL\n",
+                    eNB->RU_list[ru_id]->idx);
+
+        for (int i=0; i<eNB->RU_list[ru_id]->nb_rx; aa++,i++) {
+          LOG_I(PHY,"Attaching RU %d antenna %d to eNB antenna %d\n",eNB->RU_list[ru_id]->idx,i,aa);
+          eNB->prach_vars.rxsigF[0][aa]    =  eNB->RU_list[ru_id]->prach_rxsigF[i];
+
+          for (int ce_level=0; ce_level<4; ce_level++)
+            eNB->prach_vars_br.rxsigF[ce_level][aa] = eNB->RU_list[ru_id]->prach_rxsigF_br[ce_level][i];
+
+          eNB->common_vars.rxdataF[aa]     =  eNB->RU_list[ru_id]->common.rxdataF[i];
+        }
+      }
+
+      AssertFatal( eNB->frame_parms.nb_antennas_rx > 0 && eNB->frame_parms.nb_antennas_rx < 4, "");
+      AssertFatal( eNB->frame_parms.nb_antennas_tx > 0 && eNB->frame_parms.nb_antennas_rx < 4, "");
+      LOG_I(PHY,"inst %d, CC_id %d : nb_antennas_rx %d\n",inst,CC_id,eNB->frame_parms.nb_antennas_rx);
+      init_transport(eNB);
+      //init_precoding_weights(RC.eNB[inst][CC_id]);
+    }
+  }
+}
+
+void init_eNB(int single_thread_flag,int wait_for_sync) {
+  AssertFatal(RC.eNB != NULL,"RC.eNB must have been allocated\n");
+
+  for (int inst=0; inst<RC.nb_L1_inst; inst++) {
+    AssertFatal(RC.eNB[inst] != NULL,"RC.eNB[%d] must have been allocated\n", inst);
+
+    for (int CC_id=0; CC_id<RC.nb_L1_CC[inst]; CC_id++) {
+      AssertFatal(RC.eNB[inst][CC_id] != NULL,"RC.eNB[%d][%d] must have been allocated\n", inst, CC_id);
+      PHY_VARS_eNB *eNB = RC.eNB[inst][CC_id];
+      eNB->abstraction_flag   = 0;
+      eNB->single_thread_flag = single_thread_flag;
+      AssertFatal((eNB->if_inst         = IF_Module_init(inst))!=NULL,"Cannot register interface");
+      eNB->if_inst->schedule_response   = schedule_response;
+      eNB->if_inst->PHY_config_req      = phy_config_request;
+      memset((void *)&eNB->UL_INFO,0,sizeof(eNB->UL_INFO));
+      memset((void *)&eNB->Sched_INFO,0,sizeof(eNB->Sched_INFO));
+      pthread_mutex_init( &eNB->UL_INFO_mutex, NULL);
+      LOG_I(PHY,"Setting indication lists\n");
+      eNB->UL_INFO.rx_ind.rx_indication_body.rx_pdu_list   = eNB->rx_pdu_list;
+      eNB->UL_INFO.crc_ind.crc_indication_body.crc_pdu_list = eNB->crc_pdu_list;
+      eNB->UL_INFO.sr_ind.sr_indication_body.sr_pdu_list = eNB->sr_pdu_list;
+      eNB->UL_INFO.harq_ind.harq_indication_body.harq_pdu_list = eNB->harq_pdu_list;
+      eNB->UL_INFO.cqi_ind.cqi_indication_body.cqi_pdu_list = eNB->cqi_pdu_list;
+      eNB->UL_INFO.cqi_ind.cqi_indication_body.cqi_raw_pdu_list = eNB->cqi_raw_pdu_list;
+      eNB->prach_energy_counter = 0;
+    }
+  }
+
+  SET_LOG_DEBUG(PRACH);
+}
+
+void stop_eNB(int nb_inst) {
+  for (int inst=0; inst<nb_inst; inst++) {
+    LOG_I(PHY,"Killing eNB %d processing threads\n",inst);
+    kill_eNB_proc(inst);
+  }
+}
+
+// this is for RU with local RF unit
+void fill_rf_config(RU_t *ru, char *rf_config_file) {
+  int i;
+  LTE_DL_FRAME_PARMS *fp   = ru->frame_parms;
+  openair0_config_t *cfg   = &ru->openair0_cfg;
+  //printf("////////////////numerology in config = %d\n",numerology);
+  int numerology = get_softmodem_params()->numerology;
+
+  if(fp->N_RB_DL == 100) {
+    if(numerology == 0) {
+      if (fp->threequarter_fs) {
+        cfg->sample_rate=23.04e6;
+        cfg->samples_per_frame = 230400;
+        cfg->tx_bw = 10e6;
+        cfg->rx_bw = 10e6;
+      } else {
+        cfg->sample_rate=30.72e6;
+        cfg->samples_per_frame = 307200;
+        cfg->tx_bw = 10e6;
+        cfg->rx_bw = 10e6;
+      }
+    } else if(numerology == 1) {
+      cfg->sample_rate=61.44e6;
+      cfg->samples_per_frame = 307200;
+      cfg->tx_bw = 20e6;
+      cfg->rx_bw = 20e6;
+    } else if(numerology == 2) {
+      cfg->sample_rate=122.88e6;
+      cfg->samples_per_frame = 307200;
+      cfg->tx_bw = 40e6;
+      cfg->rx_bw = 40e6;
+    } else {
+      LOG_E(PHY,"Wrong input for numerology %d\n setting to 20MHz normal CP configuration",numerology);
+      cfg->sample_rate=30.72e6;
+      cfg->samples_per_frame = 307200;
+      cfg->tx_bw = 10e6;
+      cfg->rx_bw = 10e6;
+    }
+  } else if(fp->N_RB_DL == 50) {
+    cfg->sample_rate=15.36e6;
+    cfg->samples_per_frame = 153600;
+    cfg->tx_bw = 5e6;
+    cfg->rx_bw = 5e6;
+  } else if (fp->N_RB_DL == 25) {
+    cfg->sample_rate=7.68e6;
+    cfg->samples_per_frame = 76800;
+    cfg->tx_bw = 2.5e6;
+    cfg->rx_bw = 2.5e6;
+  } else if (fp->N_RB_DL == 6) {
+    cfg->sample_rate=1.92e6;
+    cfg->samples_per_frame = 19200;
+    cfg->tx_bw = 1.5e6;
+    cfg->rx_bw = 1.5e6;
+  } else AssertFatal(1==0,"Unknown N_RB_DL %d\n",fp->N_RB_DL);
+
+  if (fp->frame_type==TDD)
+    cfg->duplex_mode = duplex_mode_TDD;
+  else //FDD
+    cfg->duplex_mode = duplex_mode_FDD;
+
+  cfg->Mod_id = 0;
+  cfg->num_rb_dl=fp->N_RB_DL;
+  cfg->tx_num_channels=ru->nb_tx;
+  cfg->rx_num_channels=ru->nb_rx;
+  cfg->clock_source=get_softmodem_params()->clock_source;
+
+  for (i=0; i<ru->nb_tx; i++) {
+    cfg->tx_freq[i] = (double)fp->dl_CarrierFreq;
+    cfg->rx_freq[i] = (double)fp->ul_CarrierFreq;
+    cfg->tx_gain[i] = (double)ru->att_tx;
+    cfg->rx_gain[i] = ru->max_rxgain-(double)ru->att_rx;
+    cfg->configFilename = rf_config_file;
+    LOG_I(PHY,"channel %d, Setting tx_gain offset %f, rx_gain offset %f, tx_freq %f, rx_freq %f\n",
+          i, cfg->tx_gain[i],
+          cfg->rx_gain[i],
+          cfg->tx_freq[i],
+          cfg->rx_freq[i]);
+  }
+}
+
+/* this function maps the RU tx and rx buffers to the available rf chains.
+   Each rf chain is is addressed by the card number and the chain on the card. The
+   rf_map specifies for each antenna port, on which rf chain the mapping should start. Multiple
+   antennas are mapped to successive RF chains on the same card. */
+int setup_RU_buffers(RU_t *ru) {
+  //uint16_t N_TA_offset = 0;
+  LTE_DL_FRAME_PARMS *frame_parms;
+  AssertFatal(ru, "ru is NULL");
+  frame_parms = ru->frame_parms;
+  LOG_I(PHY,"setup_RU_buffers: frame_parms = %p\n",frame_parms);
+
+  if (frame_parms->frame_type == TDD) {
+    if (frame_parms->N_RB_DL == 100) {
+      ru->N_TA_offset = 624;
+    } else if (frame_parms->N_RB_DL == 50) {
+      ru->N_TA_offset = 624/2;
+      ru->sf_extension       /= 2;
+      ru->end_of_burst_delay /= 2;
+    } else if (frame_parms->N_RB_DL == 25) {
+      ru->N_TA_offset = 624/4;
+      ru->sf_extension       /= 4;
+      ru->end_of_burst_delay /= 4;
+    } else {
+      LOG_E(PHY,"not handled, todo\n");
+      exit(1);
+    }
+  } else {
+    ru->N_TA_offset = 0;
+    ru->sf_extension = 0;
+    ru->end_of_burst_delay = 0;
+  }
+
+  return(0);
+}
+
+void init_precoding_weights(PHY_VARS_eNB *eNB) {
+  int layer,ru_id,aa,re,ue,tb;
+  LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms;
+  RU_t *ru;
+  LTE_eNB_DLSCH_t *dlsch;
+
+  // init precoding weigths
+  for (ue=0; ue<NUMBER_OF_UE_MAX; ue++) {
+    for (tb=0; tb<2; tb++) {
+      dlsch = eNB->dlsch[ue][tb];
+
+      for (layer=0; layer<4; layer++) {
+        int nb_tx=0;
+
+        for (ru_id=0; ru_id<RC.nb_RU; ru_id++) {
+          ru = RC.ru[ru_id];
+          nb_tx+=ru->nb_tx;
+        }
+
+        dlsch->ue_spec_bf_weights[layer] = (int32_t **)malloc16(nb_tx*sizeof(int32_t *));
+
+        for (aa=0; aa<nb_tx; aa++) {
+          dlsch->ue_spec_bf_weights[layer][aa] = (int32_t *)malloc16(fp->ofdm_symbol_size*sizeof(int32_t));
+
+          for (re=0; re<fp->ofdm_symbol_size; re++) {
+            dlsch->ue_spec_bf_weights[layer][aa][re] = 0x00007fff;
+          }
+        }
+      }
+    }
+  }
+}
+
+void ocp_rx_prach(PHY_VARS_eNB *eNB,
+                  L1_rxtx_proc_t *proc,
+                  RU_t *ru,
+                  uint16_t *max_preamble,
+                  uint16_t *max_preamble_energy,
+                  uint16_t *max_preamble_delay,
+                  uint16_t *avg_preamble_energy,
+                  uint16_t Nf,
+                  uint8_t tdd_mapindex,
+                  uint8_t br_flag) {
+  int i;
+  int prach_mask=0;
+
+  if (br_flag == 0) {
+    rx_prach0(eNB,ru,proc->frame_prach, proc->subframe_prach,
+              max_preamble,max_preamble_energy,max_preamble_delay,avg_preamble_energy,Nf,tdd_mapindex,0,0);
+  } else { // This is procedure for eMTC, basically handling the repetitions
+    prach_mask = is_prach_subframe(&eNB->frame_parms,proc->frame_prach_br,proc->subframe_prach_br);
+
+    for (i=0; i<4; i++) {
+      if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[i]==1) &&
+          ((prach_mask&(1<<(i+1))) > 0)) { // check that prach CE level is active now
+
+        // if first reception in group of repetitions store frame for later (in RA-RNTI for Msg2)
+        if (eNB->prach_vars_br.repetition_number[i]==0) eNB->prach_vars_br.first_frame[i]=proc->frame_prach_br;
+
+        // increment repetition number
+        eNB->prach_vars_br.repetition_number[i]++;
+        // do basic PRACH reception
+        rx_prach0(eNB,ru,proc->frame_prach, proc->subframe_prach_br,
+                  max_preamble,max_preamble_energy,max_preamble_delay,avg_preamble_energy,Nf,tdd_mapindex,1,i);
+
+        // if last repetition, clear counter
+        if (eNB->prach_vars_br.repetition_number[i] == eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[i]) {
+          eNB->prach_vars_br.repetition_number[i]=0;
+        }
+      }
+    } /* for i ... */
+  } /* else br_flag == 0 */
+}
+
+void prach_procedures_ocp(PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int br_flag) {
+  uint16_t max_preamble[4],max_preamble_energy[4],max_preamble_delay[4],avg_preamble_energy[4];
+  RU_t *ru;
+  int aa=0;
+  int ru_aa;
+
+  for (int i=0; i<eNB->num_RU; i++) {
+    ru=eNB->RU_list[i];
+
+    for (ru_aa=0,aa=0; ru_aa<ru->nb_rx; ru_aa++,aa++) {
+      eNB->prach_vars.rxsigF[0][aa] = eNB->RU_list[i]->prach_rxsigF[ru_aa];
+      int ce_level;
+
+      if (br_flag==1)
+        for (ce_level=0; ce_level<4; ce_level++)
+          eNB->prach_vars_br.rxsigF[ce_level][aa] = eNB->RU_list[i]->prach_rxsigF_br[ce_level][ru_aa];
+    }
+  }
+
+  // run PRACH detection for CE-level 0 only for now when br_flag is set
+  ocp_rx_prach(eNB,
+               proc,
+               eNB->RU_list[0],
+               &max_preamble[0],
+               &max_preamble_energy[0],
+               &max_preamble_delay[0],
+               &avg_preamble_energy[0],
+               proc->frame_prach,
+               0
+               ,br_flag
+              );
+  LOG_D(PHY,"RACH detection index 0: max preamble: %u, energy: %u, delay: %u, avg energy: %u\n",
+        max_preamble[0],
+        max_preamble_energy[0],
+        max_preamble_delay[0],
+        avg_preamble_energy[0]
+       );
+
+  if (br_flag==1) {
+    int             prach_mask;
+    prach_mask = is_prach_subframe (&eNB->frame_parms, proc->frame_prach_br, proc->subframe_prach_br);
+    eNB->UL_INFO.rach_ind_br.rach_indication_body.preamble_list = eNB->preamble_list_br;
+    int             ind = 0;
+    int             ce_level = 0;
+    /* Save for later, it doesn't work
+       for (int ind=0,ce_level=0;ce_level<4;ce_level++) {
+
+       if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[ce_level]==1)&&
+       (prach_mask&(1<<(1+ce_level)) > 0) && // prach is active and CE level has finished its repetitions
+       (eNB->prach_vars_br.repetition_number[ce_level]==
+       eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level])) {
+
+    */
+
+    if (eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[0] == 1) {
+      if ((eNB->prach_energy_counter == 100) && (max_preamble_energy[0] > eNB->measurements.prach_I0 + eNB->prach_DTX_threshold_emtc[0])) {
+        eNB->UL_INFO.rach_ind_br.rach_indication_body.number_of_preambles++;
+        eNB->preamble_list_br[ind].preamble_rel8.timing_advance = max_preamble_delay[ind];      //
+        eNB->preamble_list_br[ind].preamble_rel8.preamble = max_preamble[ind];
+        // note: fid is implicitly 0 here, this is the rule for eMTC RA-RNTI from 36.321, Section 5.1.4
+        eNB->preamble_list_br[ind].preamble_rel8.rnti = 1 + proc->subframe_prach + (60*(eNB->prach_vars_br.first_frame[ce_level] % 40));
+        eNB->preamble_list_br[ind].instance_length = 0; //don't know exactly what this is
+        eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type = 1 + ce_level;    // CE Level
+        LOG_I (PHY, "Filling NFAPI indication for RACH %d CELevel %d (mask %x) : TA %d, Preamble %d, rnti %x, rach_resource_type %d\n",
+               ind,
+               ce_level,
+               prach_mask,
+               eNB->preamble_list_br[ind].preamble_rel8.timing_advance,
+               eNB->preamble_list_br[ind].preamble_rel8.preamble, eNB->preamble_list_br[ind].preamble_rel8.rnti, eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type);
+      }
+    }
+
+    /*
+      ind++;
+      }
+      } */// ce_level
+  } else if ((eNB->prach_energy_counter == 100) &&
+             (max_preamble_energy[0] > eNB->measurements.prach_I0+eNB->prach_DTX_threshold)) {
+    LOG_I(PHY,"[eNB %d/%d][RAPROC] Frame %d, subframe %d Initiating RA procedure with preamble %d, energy %d.%d dB, delay %d\n",
+          eNB->Mod_id,
+          eNB->CC_id,
+          proc->frame_prach,
+          proc->subframe_prach,
+          max_preamble[0],
+          max_preamble_energy[0]/10,
+          max_preamble_energy[0]%10,
+          max_preamble_delay[0]);
+    pthread_mutex_lock(&eNB->UL_INFO_mutex);
+    eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles  = 1;
+    eNB->UL_INFO.rach_ind.rach_indication_body.preamble_list        = &eNB->preamble_list[0];
+    eNB->UL_INFO.rach_ind.rach_indication_body.tl.tag               = NFAPI_RACH_INDICATION_BODY_TAG;
+    eNB->UL_INFO.rach_ind.header.message_id                         = NFAPI_RACH_INDICATION;
+    eNB->UL_INFO.rach_ind.sfn_sf                                    = proc->frame_prach<<4 | proc->subframe_prach;
+    eNB->preamble_list[0].preamble_rel8.tl.tag                = NFAPI_PREAMBLE_REL8_TAG;
+    eNB->preamble_list[0].preamble_rel8.timing_advance        = max_preamble_delay[0];
+    eNB->preamble_list[0].preamble_rel8.preamble              = max_preamble[0];
+    eNB->preamble_list[0].preamble_rel8.rnti                  = 1+proc->subframe_prach;  // note: fid is implicitly 0 here
+    eNB->preamble_list[0].preamble_rel13.rach_resource_type   = 0;
+    eNB->preamble_list[0].instance_length                     = 0; //don't know exactly what this is
+
+    if (NFAPI_MODE==NFAPI_MODE_PNF) {  // If NFAPI PNF then we need to send the message to the VNF
+      LOG_D(PHY,"Filling NFAPI indication for RACH : SFN_SF:%d TA %d, Preamble %d, rnti %x, rach_resource_type %d\n",
+            NFAPI_SFNSF2DEC(eNB->UL_INFO.rach_ind.sfn_sf),
+            eNB->preamble_list[0].preamble_rel8.timing_advance,
+            eNB->preamble_list[0].preamble_rel8.preamble,
+            eNB->preamble_list[0].preamble_rel8.rnti,
+            eNB->preamble_list[0].preamble_rel13.rach_resource_type);
+      oai_nfapi_rach_ind(&eNB->UL_INFO.rach_ind);
+      eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles = 0;
+    }
+
+    pthread_mutex_unlock(&eNB->UL_INFO_mutex);
+  } // max_preamble_energy > prach_I0 + 100
+  else {
+    eNB->measurements.prach_I0 = ((eNB->measurements.prach_I0*900)>>10) + ((avg_preamble_energy[0]*124)>>10);
+
+    if (eNB->prach_energy_counter < 100)
+      eNB->prach_energy_counter++;
+  }
+} // else br_flag
+
+void prach_eNB(PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int frame,int subframe) {
+  // check if we have to detect PRACH first
+  if (is_prach_subframe(&eNB->frame_parms, frame,subframe)>0) {
+    prach_procedures_ocp(eNB, proc, 0);
+    prach_procedures_ocp(eNB, proc, 1);
+  }
+}
+
+static inline int rxtx(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc, char *thread_name) {
+  AssertFatal( eNB !=NULL, "");
+
+  if (NFAPI_MODE==NFAPI_MODE_PNF) {
+    // I am a PNF and I need to let nFAPI know that we have a (sub)frame tick
+    //add_subframe(&frame, &subframe, 4);
+    //oai_subframe_ind(proc->frame_tx, proc->subframe_tx);
+    oai_subframe_ind(proc->frame_rx, proc->subframe_rx);
+  }
+
+  AssertFatal( !(NFAPI_MODE==NFAPI_MODE_PNF &&
+                 eNB->pdcch_vars[proc->subframe_tx&1].num_pdcch_symbols == 0), "");
+  prach_eNB(eNB,proc,proc->frame_rx,proc->subframe_rx);
+  release_UE_in_freeList(eNB->Mod_id);
+
+  // UE-specific RX processing for subframe n
+  if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) {
+    phy_procedures_eNB_uespec_RX(eNB, proc);
+  }
+
+  pthread_mutex_lock(&eNB->UL_INFO_mutex);
+  eNB->UL_INFO.frame     = proc->frame_rx;
+  eNB->UL_INFO.subframe  = proc->subframe_rx;
+  eNB->UL_INFO.module_id = eNB->Mod_id;
+  eNB->UL_INFO.CC_id     = eNB->CC_id;
+  eNB->if_inst->UL_indication(&eNB->UL_INFO, proc);
+  pthread_mutex_unlock(&eNB->UL_INFO_mutex);
+  phy_procedures_eNB_TX(eNB, proc, 1);
+  return(0);
+}
+
+void rx_rf(RU_t *ru, L1_rxtx_proc_t *proc) {
+  LTE_DL_FRAME_PARMS *fp = ru->frame_parms;
+  void *rxp[ru->nb_rx];
+  unsigned int rxs;
+  int i;
+  openair0_timestamp ts=0, timestamp_rx;
+  static openair0_timestamp old_ts=0;
+
+  for (i=0; i<ru->nb_rx; i++)
+    //receive in the next slot
+    rxp[i] = (void *)&ru->common.rxdata[i][((proc->subframe_rx+1)%10)*fp->samples_per_tti];
+
+  rxs = ru->rfdevice.trx_read_func(&ru->rfdevice,
+                                   &ts,
+                                   rxp,
+                                   fp->samples_per_tti,
+                                   ru->nb_rx);
+  timestamp_rx = ts-ru->ts_offset;
+
+  //  AssertFatal(rxs == fp->samples_per_tti,
+  //        "rx_rf: Asked for %d samples, got %d from SDR\n",fp->samples_per_tti,rxs);
+  if(rxs != fp->samples_per_tti) {
+    LOG_E(PHY,"rx_rf: Asked for %d samples, got %d from SDR\n",fp->samples_per_tti,rxs);
+#if defined(USRP_REC_PLAY)
+    exit_fun("Exiting IQ record/playback");
+#else
+    //exit_fun( "problem receiving samples" );
+    LOG_E(PHY, "problem receiving samples");
+#endif
+  }
+
+  if (old_ts != 0 && timestamp_rx - old_ts != fp->samples_per_tti) {
+    LOG_E(HW,"impossible shift in rx stream, rx: %ld, previous rx distance: %ld, should be %d\n", timestamp_rx, proc->timestamp_rx - old_ts, fp->samples_per_tti);
+    //ru->ts_offset += (proc->timestamp_rx - old_ts - fp->samples_per_tti);
+    //proc->timestamp_rx = ts-ru->ts_offset;
+  }
+
+  old_ts=timestamp_rx;
+  setAllfromTS(timestamp_rx, proc);
+}
+
+void ocp_tx_rf(RU_t *ru, L1_rxtx_proc_t *proc) {
+  LTE_DL_FRAME_PARMS *fp = ru->frame_parms;
+  void *txp[ru->nb_tx];
+  int i;
+  lte_subframe_t SF_type     = subframe_select(fp,proc->subframe_tx%10);
+  lte_subframe_t prevSF_type = subframe_select(fp,(proc->subframe_tx+9)%10);
+  int sf_extension = 0;
+
+  if ((SF_type == SF_DL) ||
+      (SF_type == SF_S)) {
+    int siglen=fp->samples_per_tti,flags=1;
+
+    if (SF_type == SF_S) {
+      /* end_of_burst_delay is used to stop TX only "after a while".
+       * If we stop right after effective signal, with USRP B210 and
+       * B200mini, we observe a high EVM on the S subframe (on the
+       * PSS).
+       * A value of 400 (for 30.72MHz) solves this issue. This is
+       * the default.
+       */
+      siglen = (fp->ofdm_symbol_size + fp->nb_prefix_samples0)
+               + (fp->dl_symbols_in_S_subframe - 1) * (fp->ofdm_symbol_size + fp->nb_prefix_samples)
+               + ru->end_of_burst_delay;
+      flags=3; // end of burst
+    }
+
+    if (fp->frame_type == TDD &&
+        SF_type == SF_DL &&
+        prevSF_type == SF_UL) {
+      flags = 2; // start of burst
+      sf_extension = ru->sf_extension;
+    }
+
+#if defined(__x86_64) || defined(__i386__)
+#ifdef __AVX2__
+    sf_extension = (sf_extension)&0xfffffff8;
+#else
+    sf_extension = (sf_extension)&0xfffffffc;
+#endif
+#elif defined(__arm__)
+    sf_extension = (sf_extension)&0xfffffffc;
+#endif
+
+    for (i=0; i<ru->nb_tx; i++)
+      txp[i] = (void *)&ru->common.txdata[i][(proc->subframe_tx*fp->samples_per_tti)-sf_extension];
+
+    /* add fail safe for late command end */
+    // prepare tx buffer pointers
+    ru->rfdevice.trx_write_func(&ru->rfdevice,
+                                proc->timestamp_tx+ru->ts_offset-ru->openair0_cfg.tx_sample_advance-sf_extension,
+                                txp,
+                                siglen+sf_extension,
+                                ru->nb_tx,
+                                flags);
+    LOG_D(PHY,"[TXPATH] RU %d tx_rf, writing to TS %llu, frame %d, subframe %d\n",ru->idx,
+          (long long unsigned int)proc->timestamp_tx,proc->frame_tx,proc->subframe_tx);
+  }
+
+  return;
+}
+
+static void *ru_thread( void *param ) {
+  setbuf(stdout, NULL);
+  setbuf(stderr, NULL);
+  RU_t *ru = (RU_t *)param;
+  L1_rxtx_proc_t L1proc= {0};
+  // We pick the global thread pool from the legacy code global vars
+  L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool;
+  L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode;
+  L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode;
+
+  if (ru->if_south == LOCAL_RF) { // configure RF parameters only
+    fill_rf_config(ru,ru->rf_config_file);
+    init_frame_parms(ru->frame_parms,1);
+    phy_init_RU(ru);
+    init_rf(ru);
+  }
+
+  AssertFatal(setup_RU_buffers(ru)==0, "Exiting, cannot initialize RU Buffers\n");
+  LOG_I(PHY, "Signaling main thread that RU %d is ready\n",ru->idx);
+  wait_sync("ru_thread");
+
+  // Start RF device if any
+  if (ru->rfdevice.trx_start_func(&ru->rfdevice) != 0)
+    LOG_E(HW,"Could not start the RF device\n");
+  else LOG_I(PHY,"RU %d rf device ready\n",ru->idx);
+
+  // This is a forever while loop, it loops over subframes which are scheduled by incoming samples from HW devices
+  while (!oai_exit) {
+    // synchronization on input FH interface, acquire signals/data and block
+    rx_rf(ru, &L1proc);
+    // do RX front-end processing (frequency-shift, dft) if needed
+    fep_full(ru, L1proc.subframe_rx);
+
+    // At this point, all information for subframe has been received on FH interface
+    // If this proc is to provide synchronization, do so
+    // Fixme: not used
+    // wakeup_slaves(proc);
+    for (int i=0; i<ru->num_eNB; i++) {
+      char string[20];
+      sprintf(string,"Incoming RU %d",ru->idx);
+
+      if (rxtx(ru->eNB_list[i],&L1proc,string) < 0)
+        LOG_E(PHY,"eNB %d CC_id %d failed during execution\n",
+              ru->eNB_list[i]->Mod_id,ru->eNB_list[i]->CC_id);
+    }
+
+    // do TX front-end processing if needed (precoding and/or IDFTs)
+    feptx_prec(ru, L1proc.frame_tx, L1proc.subframe_tx);
+    // do OFDM if needed
+    feptx_ofdm(ru, L1proc.frame_tx, L1proc.subframe_tx);
+    // do outgoing fronthaul (south) if needed
+    ocp_tx_rf(ru, &L1proc);
+  }
+
+  LOG_W(PHY,"Exiting ru_thread \n");
+  ru->rfdevice.trx_end_func(&ru->rfdevice);
+  LOG_I(PHY,"RU %d rf device stopped\n",ru->idx);
+  return NULL;
+}
+
+int init_rf(RU_t *ru) {
+  char name[256];
+  pthread_getname_np(pthread_self(),name, 255);
+  pthread_setname_np(pthread_self(),"UHD for OAI");
+  int ret=openair0_device_load(&ru->rfdevice,&ru->openair0_cfg);
+  pthread_setname_np(pthread_self(),name);
+  return ret;
+}
+
+void init_RU(char *rf_config_file, int send_dmrssync) {
+  RU_t *ru;
+  PHY_VARS_eNB *eNB0= (PHY_VARS_eNB *)NULL;
+  int i;
+  int CC_id;
+  // read in configuration file)
+  LOG_I(PHY,"configuring RU from file\n");
+  LOG_I(PHY,"number of L1 instances %d, number of RU %d, number of CPU cores %d\n",
+        RC.nb_L1_inst,RC.nb_RU,get_nprocs());
+
+  if (RC.nb_CC != 0)
+    for (i=0; i<RC.nb_L1_inst; i++)
+      for (CC_id=0; CC_id<RC.nb_CC[i]; CC_id++)
+        RC.eNB[i][CC_id]->num_RU=0;
+
+  LOG_D(PHY,"Process RUs RC.nb_RU:%d\n",RC.nb_RU);
+
+  for (int ru_id=0; ru_id<RC.nb_RU; ru_id++) {
+    LOG_D(PHY,"Process RC.ru[%d]\n",ru_id);
+    ru               = RC.ru[ru_id];
+    ru->rf_config_file = rf_config_file;
+    ru->idx          = ru_id;
+    ru->ts_offset    = 0;
+
+    if (ru->is_slave == 1) {
+      ru->in_synch    = 0;
+      ru->generate_dmrs_sync = 0;
+    } else {
+      ru->in_synch    = 1;
+      ru->generate_dmrs_sync=send_dmrssync;
+    }
+
+    ru->cmd      = EMPTY;
+    ru->south_out_cnt= 0;
+
+    //    ru->generate_dmrs_sync = (ru->is_slave == 0) ? 1 : 0;
+    if (ru->generate_dmrs_sync == 1) {
+      generate_ul_ref_sigs();
+      ru->dmrssync = (int16_t *)malloc16_clear(ru->frame_parms->ofdm_symbol_size*2*sizeof(int16_t));
+    }
+
+    ru->wakeup_L1_sleeptime = 2000;
+    ru->wakeup_L1_sleep_cnt_max  = 3;
+
+    if (ru->num_eNB > 0) {
+      LOG_D(PHY, "%s() RC.ru[%d].num_eNB:%d ru->eNB_list[0]:%p RC.eNB[0][0]:%p rf_config_file:%s\n",
+            __FUNCTION__, ru_id, ru->num_eNB, ru->eNB_list[0], RC.eNB[0][0], ru->rf_config_file);
+      AssertFatal(ru->eNB_list[0], "ru->eNB_list is not initialized\n");
+    } else {
+      LOG_E(PHY,"Wrong data model, assigning eNB 0, carrier 0 to RU 0\n");
+      ru->eNB_list[0] = RC.eNB[0][0];
+      ru->num_eNB=1;
+    }
+
+    eNB0 = ru->eNB_list[0];
+    // datamodel error in regular OAI: a RU uses one single eNB carrier parameters!
+    ru->frame_parms = &eNB0->frame_parms;
+
+    for (i=0; i<ru->num_eNB; i++) {
+      eNB0 = ru->eNB_list[i];
+      int ruIndex=eNB0->num_RU++;
+      eNB0->RU_list[ruIndex] = ru;
+    }
+  } // for ru_id
+}
+
+void stop_RU(int nb_ru) {
+  for (int inst = 0; inst < nb_ru; inst++) {
+    LOG_I(PHY, "Stopping RU %d processing threads\n", inst);
+    kill_RU_proc(RC.ru[inst]);
+  }
+}
+
+/* --------------------------------------------------------*/
+/* from here function to use configuration module          */
+static int DEFBFW[] = {0x00007fff};
+void RCconfig_RU(void) {
+  paramdef_t RUParams[] = RUPARAMS_DESC;
+  paramlist_def_t RUParamList = {CONFIG_STRING_RU_LIST,NULL,0};
+  config_getlist( &RUParamList,RUParams,sizeof(RUParams)/sizeof(paramdef_t), NULL);
+
+  if ( RUParamList.numelt == 0  ) {
+    LOG_W(PHY, "Calling RCconfig_RU while no ru\n");
+    RC.nb_RU = 0;
+    return;
+  } // setting != NULL
+
+  if ( RC.ru != NULL ) {
+    LOG_W(PHY, "Calling RCconfig_RU twice (nb ru=%d), ignoring the second call data structure is %p\n",
+          RUParamList.numelt,RC.ru);
+    return;
+  }
+
+  RC.ru = (RU_t **)malloc(RC.nb_RU*sizeof(RU_t *));
+
+  for (int j = 0; j < RC.nb_RU; j++) {
+    RC.ru[j] = (RU_t *)calloc(sizeof(RU_t), 1);
+    RC.ru[j]->idx = j;
+    LOG_I(PHY,"Creating RC.ru[%d]:%p\n", j, RC.ru[j]);
+    RC.ru[j]->if_timing = synch_to_ext_device;
+    paramdef_t *vals=RUParamList.paramarray[j];
+
+    if (RC.nb_L1_inst >0)
+      RC.ru[j]->num_eNB = vals[RU_ENB_LIST_IDX].numelt;
+    else
+      RC.ru[j]->num_eNB = 0;
+
+    for (int i=0; i<RC.ru[j]->num_eNB; i++)
+      RC.ru[j]->eNB_list[i] = RC.eNB[vals[RU_ENB_LIST_IDX].iptr[i]][0];
+
+    if (config_isparamset(vals, RU_SDR_ADDRS)) {
+      RC.ru[j]->openair0_cfg.sdr_addrs = strdup(*(vals[RU_SDR_ADDRS].strptr));
+    }
+
+    if (config_isparamset(vals, RU_SDR_CLK_SRC)) {
+      char *paramVal=*(vals[RU_SDR_CLK_SRC].strptr);
+      LOG_D(PHY, "RU clock source set as %s\n", paramVal);
+
+      if (strcmp(paramVal, "internal") == 0) {
+        RC.ru[j]->openair0_cfg.clock_source = internal;
+      } else if (strcmp(paramVal, "external") == 0) {
+        RC.ru[j]->openair0_cfg.clock_source = external;
+      } else if (strcmp(paramVal, "gpsdo") == 0) {
+        RC.ru[j]->openair0_cfg.clock_source = gpsdo;
+      } else {
+        LOG_E(PHY, "Erroneous RU clock source in the provided configuration file: '%s'\n", paramVal);
+      }
+    }
+
+    if (strcmp(*(vals[RU_LOCAL_RF_IDX].strptr), "yes") == 0) {
+      if ( !(config_isparamset(vals,RU_LOCAL_IF_NAME_IDX)) ) {
+        RC.ru[j]->if_south  = LOCAL_RF;
+        RC.ru[j]->function  = eNodeB_3GPP;
+        LOG_I(PHY, "Setting function for RU %d to eNodeB_3GPP\n",j);
+      } else {
+        RC.ru[j]->eth_params.local_if_name = strdup(*(vals[RU_LOCAL_IF_NAME_IDX].strptr));
+        RC.ru[j]->eth_params.my_addr       = strdup(*(vals[RU_LOCAL_ADDRESS_IDX].strptr));
+        RC.ru[j]->eth_params.remote_addr   = strdup(*(vals[RU_REMOTE_ADDRESS_IDX].strptr));
+        RC.ru[j]->eth_params.my_portc      = *(vals[RU_LOCAL_PORTC_IDX].uptr);
+        RC.ru[j]->eth_params.remote_portc  = *(vals[RU_REMOTE_PORTC_IDX].uptr);
+        RC.ru[j]->eth_params.my_portd      = *(vals[RU_LOCAL_PORTD_IDX].uptr);
+        RC.ru[j]->eth_params.remote_portd  = *(vals[RU_REMOTE_PORTD_IDX].uptr);
+      }
+
+      RC.ru[j]->max_pdschReferenceSignalPower = *(vals[RU_MAX_RS_EPRE_IDX].uptr);;
+      RC.ru[j]->max_rxgain                    = *(vals[RU_MAX_RXGAIN_IDX].uptr);
+      RC.ru[j]->num_bands                     = vals[RU_BAND_LIST_IDX].numelt;
+      /* sf_extension is in unit of samples for 30.72MHz here, has to be scaled later */
+      RC.ru[j]->sf_extension                  = *(vals[RU_SF_EXTENSION_IDX].uptr);
+      RC.ru[j]->end_of_burst_delay            = *(vals[RU_END_OF_BURST_DELAY_IDX].uptr);
+
+      for (int i=0; i<RC.ru[j]->num_bands; i++) RC.ru[j]->band[i] = vals[RU_BAND_LIST_IDX].iptr[i];
+    } else {
+      LOG_I(PHY,"RU %d: Transport %s\n",j,*(vals[RU_TRANSPORT_PREFERENCE_IDX].strptr));
+      RC.ru[j]->eth_params.local_if_name    = strdup(*(vals[RU_LOCAL_IF_NAME_IDX].strptr));
+      RC.ru[j]->eth_params.my_addr          = strdup(*(vals[RU_LOCAL_ADDRESS_IDX].strptr));
+      RC.ru[j]->eth_params.remote_addr      = strdup(*(vals[RU_REMOTE_ADDRESS_IDX].strptr));
+      RC.ru[j]->eth_params.my_portc         = *(vals[RU_LOCAL_PORTC_IDX].uptr);
+      RC.ru[j]->eth_params.remote_portc     = *(vals[RU_REMOTE_PORTC_IDX].uptr);
+      RC.ru[j]->eth_params.my_portd         = *(vals[RU_LOCAL_PORTD_IDX].uptr);
+      RC.ru[j]->eth_params.remote_portd     = *(vals[RU_REMOTE_PORTD_IDX].uptr);
+    }  /* strcmp(local_rf, "yes") != 0 */
+
+    RC.ru[j]->nb_tx                             = *(vals[RU_NB_TX_IDX].uptr);
+    RC.ru[j]->nb_rx                             = *(vals[RU_NB_RX_IDX].uptr);
+    RC.ru[j]->att_tx                            = *(vals[RU_ATT_TX_IDX].uptr);
+    RC.ru[j]->att_rx                            = *(vals[RU_ATT_RX_IDX].uptr);
+  }// j=0..num_rus
+
+  return;
+}
+
+
+static void get_options(void) {
+  CONFIG_SETRTFLAG(CONFIG_NOEXITONHELP);
+  get_common_options(SOFTMODEM_ENB_BIT);
+  CONFIG_CLEARRTFLAG(CONFIG_NOEXITONHELP);
+
+  if ( !(CONFIG_ISFLAGSET(CONFIG_ABORT)) ) {
+    memset((void *)&RC,0,sizeof(RC));
+    /* Read RC configuration file */
+    RCConfig();
+    NB_eNB_INST = RC.nb_inst;
+    printf("Configuration: nb_rrc_inst %d, nb_L1_inst %d, nb_ru %d\n",NB_eNB_INST,RC.nb_L1_inst,RC.nb_RU);
+
+    if (!IS_SOFTMODEM_NONBIOT) {
+      load_NB_IoT();
+      printf("               nb_nbiot_rrc_inst %d, nb_nbiot_L1_inst %d, nb_nbiot_macrlc_inst %d\n",
+             RC.nb_nb_iot_rrc_inst, RC.nb_nb_iot_L1_inst, RC.nb_nb_iot_macrlc_inst);
+    } else {
+      printf("All Nb-IoT instances disabled\n");
+      RC.nb_nb_iot_rrc_inst=RC.nb_nb_iot_L1_inst=RC.nb_nb_iot_macrlc_inst=0;
+    }
+  }
+}
+
+void set_default_frame_parms(LTE_DL_FRAME_PARMS *frame_parms[MAX_NUM_CCs]) {
+  int CC_id;
+
+  for (CC_id=0; CC_id<MAX_NUM_CCs; CC_id++) {
+    frame_parms[CC_id] = (LTE_DL_FRAME_PARMS *) malloc(sizeof(LTE_DL_FRAME_PARMS));
+    /* Set some default values that may be overwritten while reading options */
+    frame_parms[CC_id]->frame_type          = FDD;
+    frame_parms[CC_id]->tdd_config          = 3;
+    frame_parms[CC_id]->tdd_config_S        = 0;
+    frame_parms[CC_id]->N_RB_DL             = 100;
+    frame_parms[CC_id]->N_RB_UL             = 100;
+    frame_parms[CC_id]->Ncp                 = NORMAL;
+    frame_parms[CC_id]->Ncp_UL              = NORMAL;
+    frame_parms[CC_id]->Nid_cell            = 0;
+    frame_parms[CC_id]->num_MBSFN_config    = 0;
+    frame_parms[CC_id]->nb_antenna_ports_eNB  = 1;
+    frame_parms[CC_id]->nb_antennas_tx      = 1;
+    frame_parms[CC_id]->nb_antennas_rx      = 1;
+    frame_parms[CC_id]->nushift             = 0;
+    frame_parms[CC_id]->phich_config_common.phich_resource = oneSixth;
+    frame_parms[CC_id]->phich_config_common.phich_duration = normal;
+    // UL RS Config
+    frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift = 0;//n_DMRS1 set to 0
+    frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.groupHoppingEnabled = 0;
+    frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.sequenceHoppingEnabled = 0;
+    frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.groupAssignmentPUSCH = 0;
+    frame_parms[CC_id]->prach_config_common.rootSequenceIndex=22;
+    frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig=1;
+    frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.prach_ConfigIndex=0;
+    frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.highSpeedFlag=0;
+    frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.prach_FreqOffset=0;
+    //    downlink_frequency[CC_id][0] = 2680000000; // Use float to avoid issue with frequency over 2^31.
+    //    downlink_frequency[CC_id][1] = downlink_frequency[CC_id][0];
+    //    downlink_frequency[CC_id][2] = downlink_frequency[CC_id][0];
+    //    downlink_frequency[CC_id][3] = downlink_frequency[CC_id][0];
+    //printf("Downlink for CC_id %d frequency set to %u\n", CC_id, downlink_frequency[CC_id][0]);
+    frame_parms[CC_id]->dl_CarrierFreq=downlink_frequency[CC_id][0];
+  }
+}
+
+void init_pdcp(void) {
+  if (!NODE_IS_DU(RC.rrc[0]->node_type)) {
+    pdcp_layer_init();
+    uint32_t pdcp_initmask = (IS_SOFTMODEM_NOS1) ?
+                             (PDCP_USE_NETLINK_BIT | LINK_ENB_PDCP_TO_IP_DRIVER_BIT) : LINK_ENB_PDCP_TO_GTPV1U_BIT;
+
+    if (IS_SOFTMODEM_NOS1)
+      pdcp_initmask = pdcp_initmask | ENB_NAS_USE_TUN_BIT | SOFTMODEM_NOKRNMOD_BIT  ;
+
+    pdcp_initmask = pdcp_initmask | ENB_NAS_USE_TUN_W_MBMS_BIT;
+
+    if ( split73!=SPLIT73_DU)
+      pdcp_module_init(pdcp_initmask);
+
+    if (NODE_IS_CU(RC.rrc[0]->node_type)) {
+      pdcp_set_rlc_data_req_func((send_rlc_data_req_func_t)proto_agent_send_rlc_data_req);
+    } else {
+      pdcp_set_rlc_data_req_func((send_rlc_data_req_func_t) rlc_data_req);
+      pdcp_set_pdcp_data_ind_func((pdcp_data_ind_func_t) pdcp_data_ind);
+    }
+  } else {
+    pdcp_set_pdcp_data_ind_func((pdcp_data_ind_func_t) proto_agent_send_pdcp_data_ind);
+  }
+}
+
+static  void wait_nfapi_init(char *thread_name) {
+  printf( "waiting for NFAPI PNF connection and population of global structure (%s)\n",thread_name);
+  pthread_mutex_lock( &nfapi_sync_mutex );
+
+  while (nfapi_sync_var<0)
+    pthread_cond_wait( &nfapi_sync_cond, &nfapi_sync_mutex );
+
+  pthread_mutex_unlock(&nfapi_sync_mutex);
+  printf( "NFAPI: got sync (%s)\n", thread_name);
+}
+
+void terminate_task(module_id_t mod_id, task_id_t from, task_id_t to) {
+  LOG_I(ENB_APP, "sending TERMINATE_MESSAGE from task %s (%d) to task %s (%d)\n",
+        itti_get_task_name(from), from, itti_get_task_name(to), to);
+  MessageDef *msg;
+  msg = itti_alloc_new_message (from, TERMINATE_MESSAGE);
+  itti_send_msg_to_task (to, ENB_MODULE_ID_TO_INSTANCE(mod_id), msg);
+}
+
+int stop_L1L2(module_id_t enb_id) {
+  LOG_W(ENB_APP, "stopping lte-softmodem\n");
+
+  if (!RC.ru) {
+    LOG_UI(ENB_APP, "no RU configured\n");
+    return -1;
+  }
+
+  /* these tasks need to pick up new configuration */
+  terminate_task(enb_id, TASK_ENB_APP, TASK_RRC_ENB);
+  oai_exit = 1;
+  LOG_I(ENB_APP, "calling kill_RU_proc() for instance %d\n", enb_id);
+  kill_RU_proc(RC.ru[enb_id]);
+  LOG_I(ENB_APP, "calling kill_eNB_proc() for instance %d\n", enb_id);
+  kill_eNB_proc(enb_id);
+  oai_exit = 0;
+
+  for (int cc_id = 0; cc_id < RC.nb_CC[enb_id]; cc_id++) {
+    free_transport(RC.eNB[enb_id][cc_id]);
+    phy_free_lte_eNB(RC.eNB[enb_id][cc_id]);
+  }
+
+  phy_free_RU(RC.ru[enb_id]);
+  free_lte_top();
+  return 0;
+}
+
+/*
+ * Restart the lte-softmodem after it has been soft-stopped with stop_L1L2()
+ */
+int restart_L1L2(module_id_t enb_id) {
+  RU_t *ru = RC.ru[enb_id];
+  MessageDef *msg_p = NULL;
+  LOG_W(ENB_APP, "restarting lte-softmodem\n");
+  /* block threads */
+  pthread_mutex_lock(&sync_mutex);
+  sync_var = -1;
+  pthread_mutex_unlock(&sync_mutex);
+  RC.ru_mask |= (1 << ru->idx);
+  /* copy the changed frame parameters to the RU */
+  /* TODO this should be done for all RUs associated to this eNB */
+  memcpy(&ru->frame_parms, &RC.eNB[enb_id][0]->frame_parms, sizeof(LTE_DL_FRAME_PARMS));
+  /* reset the list of connected UEs in the MAC, since in this process with
+   * loose all UEs (have to reconnect) */
+  init_UE_info(&RC.mac[enb_id]->UE_info);
+  LOG_I(ENB_APP, "attempting to create ITTI tasks\n");
+  // No more rrc thread, as many race conditions are hidden behind
+  rrc_enb_init();
+  itti_mark_task_ready(TASK_RRC_ENB);
+  /* pass a reconfiguration request which will configure everything down to
+   * RC.eNB[i][j]->frame_parms, too */
+  msg_p = itti_alloc_new_message(TASK_ENB_APP, RRC_CONFIGURATION_REQ);
+  RRC_CONFIGURATION_REQ(msg_p) = RC.rrc[enb_id]->configuration;
+  itti_send_msg_to_task(TASK_RRC_ENB, ENB_MODULE_ID_TO_INSTANCE(enb_id), msg_p);
+  /* TODO XForms might need to be restarted, but it is currently (09/02/18)
+   * broken, so we cannot test it */
+  init_RU_proc(ru);
+  ru->rf_map.card = 0;
+  ru->rf_map.chain = 0; /* CC_id + chain_offset;*/
+  init_eNB_afterRU();
+  printf("Sending sync to all threads\n");
+  pthread_mutex_lock(&sync_mutex);
+  sync_var=0;
+  pthread_cond_broadcast(&sync_cond);
+  pthread_mutex_unlock(&sync_mutex);
+  return 0;
+}
+
+int main ( int argc, char **argv ) {
+  int i;
+  int CC_id = 0;
+  int node_type = ngran_eNB;
+  AssertFatal(load_configmodule(argc,argv,0), "[SOFTMODEM] Error, configuration module init failed\n");
+  logInit();
+  printf("Reading in command-line options\n");
+  get_options ();
+  AssertFatal(!CONFIG_ISFLAGSET(CONFIG_ABORT),"Getting configuration failed\n");
+  EPC_MODE_ENABLED = !IS_SOFTMODEM_NOS1;
+#if T_TRACER
+  T_Config_Init();
+#endif
+  configure_linux();
+  cpuf=get_cpu_freq_GHz();
+  set_taus_seed (0);
+
+  if (opp_enabled ==1)
+    reset_opp_meas();
+
+  itti_init(TASK_MAX, THREAD_MAX, MESSAGES_ID_MAX, tasks_info, messages_info);
+  init_opt();
+#ifndef PACKAGE_VERSION
+#  define PACKAGE_VERSION "UNKNOWN-EXPERIMENTAL"
+#endif
+  LOG_I(HW, "Version: %s\n", PACKAGE_VERSION);
+
+  /* Read configuration */
+  if (RC.nb_inst > 0) {
+    // Allocate memory from RC variable
+    read_config_and_init();
+  } else {
+    printf("RC.nb_inst = 0, Initializing L1\n");
+    RCconfig_L1();
+  }
+
+  /* We need to read RU configuration before FlexRAN starts so it knows what
+   * splits to report. Actual RU start comes later. */
+  if (RC.nb_RU > 0 && NFAPI_MODE != NFAPI_MODE_VNF) {
+    RCconfig_RU();
+    LOG_I(PHY,
+          "number of L1 instances %d, number of RU %d, number of CPU cores %d\n",
+          RC.nb_L1_inst, RC.nb_RU, get_nprocs());
+  }
+
+  if ( strlen(get_softmodem_params()->split73) > 0 ) {
+    char tmp[1024]={0};
+    strncpy(tmp,get_softmodem_params()->split73, 1023);
+    tmp[2]=0;
+    if ( strncasecmp(tmp,"cu", 2)==0 )
+      split73=SPLIT73_CU;
+    else if ( strncasecmp(tmp,"du", 2)==0 )
+      split73=SPLIT73_DU;
+    else
+      AssertFatal(false,"split73 syntax: <cu|du>:<remote ip addr>[:<ip port>] (string found: %s) \n",get_softmodem_params()->split73);
+  }
+  
+  if (RC.nb_inst > 0) {
+    /* Start the agent. If it is turned off in the configuration, it won't start */
+    for (i = 0; i < RC.nb_inst; i++) {
+      flexran_agent_start(i);
+    }
+    
+    /* initializes PDCP and sets correct RLC Request/PDCP Indication callbacks
+     * for monolithic/F1 modes */
+    init_pdcp();
+    AssertFatal(create_tasks(1)==0,"cannot create ITTI tasks\n");
+
+    for (int enb_id = 0; enb_id < RC.nb_inst; enb_id++) {
+      MessageDef *msg_p = itti_alloc_new_message (TASK_ENB_APP, RRC_CONFIGURATION_REQ);
+      RRC_CONFIGURATION_REQ(msg_p) = RC.rrc[enb_id]->configuration;
+      itti_send_msg_to_task (TASK_RRC_ENB, ENB_MODULE_ID_TO_INSTANCE(enb_id), msg_p);
+    }
+
+    node_type = RC.rrc[0]->node_type;
+  }
+
+  if (RC.nb_inst > 0 && NODE_IS_CU(node_type)) {
+    protocol_ctxt_t ctxt;
+    ctxt.module_id = 0 ;
+    ctxt.instance = 0;
+    ctxt.rnti = 0;
+    ctxt.enb_flag = 1;
+    ctxt.frame = 0;
+    ctxt.subframe = 0;
+    pdcp_run(&ctxt);
+  }
+
+  /* start threads if only L1 or not a CU */
+  if (RC.nb_inst == 0 || !NODE_IS_CU(node_type) || NFAPI_MODE == NFAPI_MODE_PNF || NFAPI_MODE == NFAPI_MODE_VNF) {
+    // init UE_PF_PO and mutex lock
+    pthread_mutex_init(&ue_pf_po_mutex, NULL);
+    memset (&UE_PF_PO[0][0], 0, sizeof(UE_PF_PO_t)*MAX_MOBILES_PER_ENB*MAX_NUM_CCs);
+    pthread_cond_init(&sync_cond,NULL);
+    pthread_mutex_init(&sync_mutex, NULL);
+
+    if (NFAPI_MODE!=NFAPI_MONOLITHIC) {
+      LOG_I(ENB_APP,"NFAPI*** - mutex and cond created - will block shortly for completion of PNF connection\n");
+      pthread_cond_init(&sync_cond,NULL);
+      pthread_mutex_init(&sync_mutex, NULL);
+    }
+
+    if (NFAPI_MODE==NFAPI_MODE_VNF) {// VNF
+#if defined(PRE_SCD_THREAD)
+      init_ru_vnf();  // ru pointer is necessary for pre_scd.
+#endif
+      wait_nfapi_init("main?");
+    }
+
+    LOG_I(ENB_APP,"START MAIN THREADS\n");
+    // start the main threads
+    number_of_cards = 1;
+    printf("RC.nb_L1_inst:%d\n", RC.nb_L1_inst);
+
+    if (RC.nb_L1_inst > 0) {
+      printf("Initializing eNB threads single_thread_flag:%d wait_for_sync:%d\n",
+             get_softmodem_params()->single_thread_flag,
+             get_softmodem_params()->wait_for_sync);
+      init_eNB(get_softmodem_params()->single_thread_flag,
+               get_softmodem_params()->wait_for_sync);
+    }
+
+    for (int x=0; x < RC.nb_L1_inst; x++)
+      for (int CC_id=0; CC_id<RC.nb_L1_CC[x]; CC_id++) {
+        L1_rxtx_proc_t *L1proc= &RC.eNB[x][CC_id]->proc.L1_proc;
+        L1proc->threadPool=(tpool_t *)malloc(sizeof(tpool_t));
+        L1proc->respEncode=(notifiedFIFO_t *) malloc(sizeof(notifiedFIFO_t));
+        L1proc->respDecode=(notifiedFIFO_t *) malloc(sizeof(notifiedFIFO_t));
+
+        if ( strlen(get_softmodem_params()->threadPoolConfig) > 0 )
+          initTpool(get_softmodem_params()->threadPoolConfig, L1proc->threadPool, true);
+        else
+          initTpool("n", L1proc->threadPool, true);
+
+        initNotifiedFIFO(L1proc->respEncode);
+        initNotifiedFIFO(L1proc->respDecode);
+      }
+  }
+
+  printf("About to Init RU threads RC.nb_RU:%d\n", RC.nb_RU);
+
+  // RU thread and some L1 procedure aren't necessary in VNF or L2 FAPI simulator.
+  // but RU thread deals with pre_scd and this is necessary in VNF and simulator.
+  // some initialization is necessary and init_ru_vnf do this.
+  if (RC.nb_RU >0 && NFAPI_MODE!=NFAPI_MODE_VNF) {
+    printf("Initializing RU threads\n");
+    init_RU(get_softmodem_params()->rf_config_file,
+            get_softmodem_params()->send_dmrs_sync);
+
+    for (int ru_id=0; ru_id<RC.nb_RU; ru_id++) {
+      RC.ru[ru_id]->rf_map.card=0;
+      RC.ru[ru_id]->rf_map.chain=CC_id+(get_softmodem_params()->chain_offset);
+      LOG_I(PHY,"Starting ru_thread %d\n",ru_id);
+      init_RU_proc(RC.ru[ru_id]);
+    }
+
+    config_sync_var=0;
+
+    if (NFAPI_MODE==NFAPI_MODE_PNF) { // PNF
+      wait_nfapi_init("main?");
+    }
+
+    LOG_I(ENB_APP,"RC.nb_RU:%d\n", RC.nb_RU);
+    // once all RUs are ready intiailize the rest of the eNBs ((dependence on final RU parameters after configuration)
+    printf("ALL RUs ready - init eNBs\n");
+
+    if (NFAPI_MODE!=NFAPI_MODE_PNF && NFAPI_MODE!=NFAPI_MODE_VNF) {
+      LOG_I(ENB_APP,"Not NFAPI mode - call init_eNB_afterRU()\n");
+      init_eNB_afterRU();
+    } else {
+      LOG_I(ENB_APP,"NFAPI mode - DO NOT call init_eNB_afterRU()\n");
+    }
+
+    LOG_UI(ENB_APP,"ALL RUs ready - ALL eNBs ready\n");
+    // connect the TX/RX buffers
+    sleep(1); /* wait for thread activation */
+    LOG_I(ENB_APP,"Sending sync to all threads\n");
+    pthread_mutex_lock(&sync_mutex);
+    sync_var=0;
+    pthread_cond_broadcast(&sync_cond);
+    pthread_mutex_unlock(&sync_mutex);
+    config_check_unknown_cmdlineopt(CONFIG_CHECKALLSECTIONS);
+  }
+
+  create_tasks_mbms(1);
+  // wait for end of program
+  LOG_UI(ENB_APP,"TYPE <CTRL-C> TO TERMINATE\n");
+  // CI -- Flushing the std outputs for the previous marker to show on the eNB / DU / CU log file
+  fflush(stdout);
+  fflush(stderr);
+
+  // end of CI modifications
+  //getchar();
+  if(IS_SOFTMODEM_DOFORMS)
+    load_softscope("enb");
+
+  itti_wait_tasks_end();
+  oai_exit=1;
+  LOG_I(ENB_APP,"oai_exit=%d\n",oai_exit);
+  // stop threads
+
+  if (RC.nb_inst == 0 || !NODE_IS_CU(node_type)) {
+    if(IS_SOFTMODEM_DOFORMS)
+      end_forms();
+
+    LOG_I(ENB_APP,"stopping MODEM threads\n");
+    stop_eNB(NB_eNB_INST);
+    stop_RU(RC.nb_RU);
+
+    /* release memory used by the RU/eNB threads (incomplete), after all
+     * threads have been stopped (they partially use the same memory) */
+    for (int inst = 0; inst < NB_eNB_INST; inst++) {
+      for (int cc_id = 0; cc_id < RC.nb_CC[inst]; cc_id++) {
+        free_transport(RC.eNB[inst][cc_id]);
+        phy_free_lte_eNB(RC.eNB[inst][cc_id]);
+      }
+    }
+
+    for (int inst = 0; inst < RC.nb_RU; inst++) {
+      phy_free_RU(RC.ru[inst]);
+    }
+
+    free_lte_top();
+    end_configmodule();
+    pthread_cond_destroy(&sync_cond);
+    pthread_mutex_destroy(&sync_mutex);
+    pthread_cond_destroy(&nfapi_sync_cond);
+    pthread_mutex_destroy(&nfapi_sync_mutex);
+    pthread_mutex_destroy(&ue_pf_po_mutex);
+
+    for(int ru_id=0; ru_id<RC.nb_RU; ru_id++) {
+      if (RC.ru[ru_id]->rfdevice.trx_end_func) {
+        RC.ru[ru_id]->rfdevice.trx_end_func(&RC.ru[ru_id]->rfdevice);
+        RC.ru[ru_id]->rfdevice.trx_end_func = NULL;
+      }
+
+      if (RC.ru[ru_id]->ifdevice.trx_end_func) {
+        RC.ru[ru_id]->ifdevice.trx_end_func(&RC.ru[ru_id]->ifdevice);
+        RC.ru[ru_id]->ifdevice.trx_end_func = NULL;
+      }
+    }
+  }
+
+  terminate_opt();
+  logClean();
+  printf("Bye.\n");
+  return 0;
+}
diff --git a/executables/split_headers.h b/executables/split_headers.h
new file mode 100644
index 0000000000000000000000000000000000000000..4e328f74c418c7f902014b99630dd4fbe2130e7a
--- /dev/null
+++ b/executables/split_headers.h
@@ -0,0 +1,329 @@
+/*
+* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The OpenAirInterface Software Alliance licenses this file to You under
+* the OAI Public License, Version 1.1  (the "License"); you may not use this file
+* except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.openairinterface.org/?page_id=698
+*
+* Author and copyright: Laurent Thomas, open-cells.com
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*-------------------------------------------------------------------------------
+* For more information about the OpenAirInterface (OAI) Software Alliance:
+*      contact@openairinterface.org
+*/
+
+
+#ifndef __SPLIT_HEADERS_H
+#define __SPLIT_HEADERS_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <openair1/PHY/defs_eNB.h>
+
+#define CU_PORT "7878"
+#define DU_PORT "8787"
+#define SPLIT73_CU 1
+#define SPLIT73_DU 2
+extern int split73;
+
+#define MTU 65536
+#define UDP_TIMEOUT 900000L // in micro  second (struct timeval, NOT struct timespec)
+// linux may timeout for a much longer time (up to 10ms)
+#define blockAlign 32 //bytes to align memory for SIMD copy (256 bits vectors)
+
+// FS6 transport configuration and handler
+typedef struct {
+  char *sourceIP;
+  char *sourcePort;
+  char *destIP;
+  char *destPort;
+  struct addrinfo *destAddr;
+  int sockHandler;
+} UDPsock_t;
+
+#define CTsentCUv0 0xA500
+#define CTsentDUv0 0x5A00
+
+// Main FS6 transport layer header
+// All packets starts with this header
+typedef struct commonUDP_s {
+  uint64_t timestamp; // id of the group (subframe for LTE)
+  uint16_t nbBlocks;       // total number of blocks for this timestamp
+  uint16_t blockID;        // id: 0..nbBocks-1
+  uint16_t contentType;    // defines the content format
+  uint16_t contentBytes;   // will be sent in a UDP packet, so must be < 2^16 bytes
+  uint64_t senderClock;
+} commonUDP_t;
+
+// FS6 UL common header (DU to CU)
+// gives the RACH detection data and is always sent to inform the CU that a subframe arrived
+typedef struct {
+  uint16_t max_preamble[4];
+  uint16_t max_preamble_energy[4];
+  uint16_t max_preamble_delay[4];
+  uint16_t avg_preamble_energy[4];
+} fs6_ul_t;
+
+// FS6 DL common header (CU to DU)
+// gives the DCI configuration from each subframe
+typedef struct {
+  uint8_t pbch_pdu[4];
+  int num_pdcch_symbols;
+  int num_dci;
+  DCI_ALLOC_t dci_alloc[8];
+  int num_mdci;
+  int amp;
+  LTE_eNB_PHICH phich_vars;
+  uint64_t DuClock;
+  uint64_t CuSpentMicroSec;
+} fs6_dl_t;
+
+// a value to type all sub packets,
+// to detect errors, and to be able to extend to other versions
+// the first byte of each sub structure should match one of these values
+enum pckType {
+  fs6UlConfig=25,
+  fs6DlConfig=26,
+  fs6ULConfigCCH=27,
+  fs6ULsch=28,
+  fs6ULcch=29,
+  fs6ULindicationHarq=40,
+  fs6ULindicationSr=41,
+};
+
+// CU to DU definition of a future UL subframe decode
+// defines a UE future data plane
+typedef struct {
+  enum pckType type:8;
+  uint16_t UE_id;
+  int8_t harq_pid;
+  UE_type_t ue_type;
+
+  uint8_t dci_alloc;
+  uint8_t rar_alloc;
+  SCH_status_t status;
+  uint8_t Msg3_flag;
+  uint8_t subframe;
+  uint32_t frame;
+  uint8_t handled;
+  uint8_t phich_active;
+  uint8_t phich_ACK;
+  uint16_t previous_first_rb;
+  uint32_t B;
+  uint32_t G;
+  UCI_format_t uci_format;
+  uint8_t Or2;
+  uint8_t o_RI[2];
+  uint8_t o_ACK[4];
+  uint8_t O_ACK;
+  uint8_t o_RCC;
+  int16_t q_ACK[MAX_ACK_PAYLOAD];
+  int16_t q_RI[MAX_RI_PAYLOAD];
+  uint32_t RTC[MAX_NUM_ULSCH_SEGMENTS];
+  uint8_t ndi;
+  uint8_t round;
+  uint8_t rvidx;
+  uint8_t Nl;
+  uint8_t n_DMRS;
+  uint8_t previous_n_DMRS;
+  uint8_t n_DMRS2;
+  int32_t delta_TF;
+  uint32_t repetition_number ;
+  uint32_t total_number_of_repetitions;
+
+  uint16_t harq_mask;
+  uint16_t nb_rb;
+  uint8_t Qm;
+  uint16_t first_rb;
+  uint8_t O_RI;
+  uint8_t Or1;
+  uint16_t Msc_initial;
+  uint8_t Nsymb_initial;
+  uint8_t V_UL_DAI;
+  uint8_t srs_active;
+  uint32_t TBS;
+  uint8_t Nsymb_pusch;
+  uint8_t Mlimit;
+  uint8_t max_turbo_iterations;
+  uint8_t bundling;
+  uint16_t beta_offset_cqi_times8;
+  uint16_t beta_offset_ri_times8;
+  uint16_t beta_offset_harqack_times8;
+  uint8_t Msg3_active;
+  uint16_t rnti;
+  uint8_t cyclicShift;
+  uint8_t cooperation_flag;
+  uint8_t num_active_cba_groups;
+  uint16_t cba_rnti[4];//NUM_MAX_CBA_GROUP];
+} fs6_dl_ulsched_t;
+
+// CU to DU defintion of a DL packet for a given UE
+// The data itself is padded at the end of this structure
+typedef struct {
+  enum pckType type:8;
+  int UE_id;
+  int8_t harq_pid;
+  uint16_t rnti;
+  int16_t sqrt_rho_a;
+  int16_t sqrt_rho_b;
+  CEmode_t CEmode:8;
+  uint16_t nb_rb;
+  uint8_t Qm;
+  int8_t Nl;
+  uint8_t pdsch_start;
+  uint8_t sib1_br_flag;
+  uint16_t i0;
+  uint32_t rb_alloc[4];
+  int dataLen;
+} fs6_dl_uespec_t;
+
+// CU to DU definition of CCH channel
+typedef struct {
+  int16_t UE_id;
+  LTE_eNB_UCI cch_vars;
+} fs6_dl_uespec_ulcch_element_t;
+
+// header to group all UE CCH channels definitions in one UDP packet
+typedef struct {
+  enum pckType type:8;
+  int16_t nb_active_ue;
+} fs6_dl_uespec_ulcch_t;
+
+// code internal, not transmitted as this
+typedef struct {
+  int ta;
+}  ul_propagation_t;
+
+// One UE UL data, data plane, UE data appended after the header
+typedef struct {
+  enum pckType type:8;
+  short UE_id;
+  uint8_t harq_id;
+  uint8_t segment;
+  int segLen;
+  int r_offset;
+  int G;
+  int ulsch_power[2];
+  uint8_t o_ACK[4];
+  uint8_t O_ACK;
+  int ta;
+  uint8_t o[MAX_CQI_BYTES];
+  uint8_t cqi_crc_status;
+} fs6_ul_uespec_t;
+
+// UL UCI (control plane), per UE
+typedef struct {
+  enum pckType type:8;
+  int UEid;
+  int frame;
+  int subframe;
+  LTE_eNB_UCI uci;
+  uint8_t harq_ack[4];
+  uint8_t tdd_mapping_mode;
+  uint16_t tdd_multiplexing_mask;
+  unsigned short n0_subband_power_dB;
+  uint16_t rnti;
+  int32_t stat;
+} fs6_ul_uespec_uci_element_t;
+
+// all segments UCI grouped in one UDP packet
+typedef struct {
+  enum pckType type:8;
+  int16_t nb_active_ue;
+}  fs6_ul_uespec_uci_t;
+
+
+bool createUDPsock (char *sourceIP, char *sourcePort, char *destIP, char *destPort, UDPsock_t *result);
+int receiveSubFrame(UDPsock_t *sock, void *bufferZone,  int bufferSize, uint16_t contentType);
+int sendSubFrame(UDPsock_t *sock, void *bufferZone, ssize_t secondHeaderSize, uint16_t contentType);
+
+#define initBufferZone(xBuf) \
+  uint8_t xBuf[FS6_BUF_SIZE];   \
+  ((commonUDP_t *)xBuf)->nbBlocks=0;
+
+#define hUDP(xBuf) ((commonUDP_t *)xBuf)
+#define hDL(xBuf)  ((fs6_dl_t*)(((commonUDP_t *)xBuf)+1))
+#define hUL(xBuf)  ((fs6_ul_t*)(((commonUDP_t *)xBuf)+1))
+#define hDLUE(xBuf) ((fs6_dl_uespec_t*) (((fs6_dl_t*)(((commonUDP_t *)xBuf)+1))+1))
+#define hTxULUE(xBuf) ((fs6_dl_ulsched_t*) (((fs6_dl_t*)(((commonUDP_t *)xBuf)+1))+1))
+#define hTxULcch(xBuf) ((fs6_dl_uespec_ulcch_t*) (((fs6_dl_t*)(((commonUDP_t *)xBuf)+1))+1))
+#define hULUE(xBuf) ((fs6_ul_uespec_t*) (((fs6_ul_t*)(((commonUDP_t *)xBuf)+1))+1))
+#define hULUEuci(xBuf) ((fs6_ul_uespec_uci_t*) (((fs6_ul_t*)(((commonUDP_t *)xBuf)+1))+1))
+
+static inline size_t alignedSize(uint8_t *ptr) {
+  commonUDP_t *header=(commonUDP_t *) ptr;
+  return ((header->contentBytes+sizeof(commonUDP_t)+blockAlign-1)/blockAlign)*blockAlign;
+}
+
+static inline void *commonUDPdata(uint8_t *ptr) {
+  return (void *) (((commonUDP_t *)ptr)+1);
+}
+
+void setAllfromTS(uint64_t TS, L1_rxtx_proc_t *proc);
+void sendFs6Ulharq(enum pckType type, int UEid, PHY_VARS_eNB *eNB,LTE_eNB_UCI *uci, int frame, int subframe, uint8_t *harq_ack, uint8_t tdd_mapping_mode, uint16_t tdd_multiplexing_mask,
+                   uint16_t rnti,  int32_t stat);
+void sendFs6Ul(PHY_VARS_eNB *eNB, int UE_id, int harq_pid, int segmentID, int16_t *data, int dataLen, int r_offset);
+void *cu_fs6(void *arg);
+void *du_fs6(void *arg);
+void fill_rf_config(RU_t *ru, char *rf_config_file);
+int init_rf(RU_t *ru);
+void rx_rf(RU_t *ru, L1_rxtx_proc_t *proc);
+void tx_rf(RU_t *ru, L1_rxtx_proc_t *proc);
+void common_signal_procedures (PHY_VARS_eNB *eNB,int frame, int subframe);
+void pmch_procedures(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc);
+bool dlsch_procedures(PHY_VARS_eNB *eNB,
+                      L1_rxtx_proc_t *proc,
+                      int harq_pid,
+                      LTE_eNB_DLSCH_t *dlsch,
+                      LTE_eNB_UE_stats *ue_stats) ;
+void postDecode(L1_rxtx_proc_t *proc, notifiedFIFO_elt_t *req);
+void pdsch_procedures(PHY_VARS_eNB *eNB,
+                      L1_rxtx_proc_t *proc,
+                      int harq_pid,
+                      LTE_eNB_DLSCH_t *dlsch,
+                      LTE_eNB_DLSCH_t *dlsch1);
+void srs_procedures(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc);
+void uci_procedures(PHY_VARS_eNB *eNB,
+                    L1_rxtx_proc_t *proc);
+void ocp_rx_prach(PHY_VARS_eNB *eNB,
+		  L1_rxtx_proc_t *proc,
+		  RU_t *ru,
+		  uint16_t *max_preamble,
+		  uint16_t *max_preamble_energy,
+		  uint16_t *max_preamble_delay,
+		  uint16_t *avg_preamble_energy,
+		  uint16_t Nf,
+		  uint8_t tdd_mapindex,
+		  uint8_t br_flag);
+void rx_prach0(PHY_VARS_eNB *eNB,
+               RU_t *ru,
+	       int frame_prach,
+	       int subframe,
+               uint16_t *max_preamble,
+               uint16_t *max_preamble_energy,
+               uint16_t *max_preamble_delay,
+               uint16_t *avg_preamble_energy,
+               uint16_t Nf,
+               uint8_t tdd_mapindex,
+               uint8_t br_flag,
+               uint8_t ce_level
+	       );
+void ocp_tx_rf(RU_t *ru, L1_rxtx_proc_t *proc);
+
+// mistakes in main OAI
+void  phy_init_RU(RU_t *);
+void fep_full(RU_t *ru, int subframe);
+void feptx_prec(RU_t *ru,int frame,int subframe);
+void feptx_ofdm(RU_t *ru, int frame, int subframe);
+void oai_subframe_ind(uint16_t sfn, uint16_t sf);
+extern uint16_t sf_ahead;
+#endif
diff --git a/executables/transport_split.c b/executables/transport_split.c
new file mode 100644
index 0000000000000000000000000000000000000000..f380eb90e36977727e7b493b200999fa3edf6316
--- /dev/null
+++ b/executables/transport_split.c
@@ -0,0 +1,195 @@
+/*
+* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The OpenAirInterface Software Alliance licenses this file to You under
+* the OAI Public License, Version 1.1  (the "License"); you may not use this file
+* except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.openairinterface.org/?page_id=698
+*
+* Author and copyright: Laurent Thomas, open-cells.com
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*-------------------------------------------------------------------------------
+* For more information about the OpenAirInterface (OAI) Software Alliance:
+*      contact@openairinterface.org
+*/
+
+
+
+#include <executables/split_headers.h>
+#include <sys/types.h>          /* See NOTES */
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <netdb.h>
+#include <targets/RT/USER/lte-softmodem.h>
+
+bool createUDPsock (char *sourceIP, char *sourcePort, char *destIP, char *destPort, UDPsock_t *result) {
+  struct addrinfo hints= {0}, *servinfo, *p;
+  hints.ai_family = AF_UNSPEC;
+  hints.ai_socktype = SOCK_DGRAM;
+  hints.ai_flags = AI_PASSIVE;
+  int status;
+
+  if ((status = getaddrinfo(sourceIP, sourcePort, &hints, &servinfo)) != 0) {
+    LOG_E(GTPU,"getaddrinfo error: %s\n", gai_strerror(status));
+    return false;
+  }
+
+  // loop through all the results and bind to the first we can
+  for(p = servinfo; p != NULL; p = p->ai_next) {
+    if ((result->sockHandler = socket(p->ai_family, p->ai_socktype,
+                                      p->ai_protocol)) == -1) {
+      LOG_W(GTPU,"socket: %s\n", strerror(errno));
+      continue;
+    }
+
+    if (bind(result->sockHandler, p->ai_addr, p->ai_addrlen) == -1) {
+      close(result->sockHandler);
+      LOG_W(GTPU,"bind: %s\n", strerror(errno));
+      continue;
+    }
+
+    break; // if we get here, we must have connected successfully
+  }
+
+  if (p == NULL) {
+    // looped off the end of the list with no successful bind
+    LOG_E(GTPU,"failed to bind socket: %s %s \n",sourceIP,sourcePort);
+    return false;
+  }
+
+  freeaddrinfo(servinfo); // all done with this structure
+
+  if ((status = getaddrinfo(destIP, destPort, &hints, &servinfo)) != 0) {
+    LOG_E(GTPU,"getaddrinfo error: %s\n", gai_strerror(status));
+    return false;
+  }
+
+  if (servinfo) {
+    result->destAddr=servinfo;
+  } else {
+    LOG_E(PHY,"No valid UDP addr: %s:%s\n",destIP, destPort);
+    return false;
+  }
+
+  int enable=1;
+  AssertFatal(setsockopt(result->sockHandler, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))==0,"");
+  struct timeval tv= {0,UDP_TIMEOUT};
+
+  if (IS_SOFTMODEM_RFSIM)
+    tv.tv_sec=2; //debug: wait 2 seconds for human understanding
+
+  AssertFatal(setsockopt(result->sockHandler, SOL_SOCKET, SO_RCVTIMEO,&tv,sizeof(tv)) ==0,"");
+  // Make a send/recv buffer larger than a a couple of subframe
+  // so the kernel will store for us in and out paquets
+  int buff=1000*1000*10;
+  AssertFatal ( setsockopt(result->sockHandler, SOL_SOCKET, SO_SNDBUF, &buff, sizeof(buff)) == 0, "");
+  AssertFatal ( setsockopt(result->sockHandler, SOL_SOCKET, SO_RCVBUF, &buff, sizeof(buff)) == 0, "");
+  return true;
+}
+
+// sock: udp socket
+// bufferZone: a reception area of bufferSize
+int receiveSubFrame(UDPsock_t *sock, void *bufferZone,  int bufferSize, uint16_t contentType) {
+  int rcved=0;
+  commonUDP_t *bufOrigin=(commonUDP_t *)bufferZone;
+  static uint8_t crossData[65536];
+  static int crossDataSize=0;
+
+  if (crossDataSize) {
+    LOG_D(HW,"copy a block received in previous subframe\n");
+    memcpy(bufferZone, crossData, crossDataSize);
+    rcved=1;
+    bufferZone+=crossDataSize;
+    crossDataSize=0;
+  }
+
+  do {
+    //read all subframe data from the control unit
+    int ret=recv(sock->sockHandler, bufferZone, bufferSize, 0);
+
+    if ( ret==-1) {
+      if ( errno == EWOULDBLOCK || errno== EINTR ) {
+        LOG_I(HW,"Received: Timeout, subframe incomplete\n");
+        return  rcved;
+      } else {
+        LOG_E(HW,"Critical issue in socket: %s\n", strerror(errno));
+        return -1;
+      }
+    } else {
+      if (hUDP(bufferZone)->contentType != contentType)
+        abort();
+
+      if (rcved && bufOrigin->timestamp != hUDP(bufferZone)->timestamp ) {
+        if ( hUDP(bufferZone)->timestamp > bufOrigin->timestamp ) {
+          LOG_W(HW,"Received data for TS: %lu before end of TS : %lu completion\n",
+                hUDP(bufferZone)->timestamp,
+                bufOrigin->timestamp);
+          memcpy(crossData, bufferZone, ret );
+          crossDataSize=ret;
+          return rcved;
+        } else {
+          LOG_W(HW,"Dropping late packet\n");
+          continue;
+        }
+      }
+
+      rcved++;
+      bufferZone+=ret;
+    }
+
+    LOG_D(HW,"Received: blocks: %d/%d, size %d, TS: %lu\n",
+          rcved, bufOrigin->nbBlocks, ret, bufOrigin->timestamp);
+  } while ( rcved == 0 || rcved < bufOrigin->nbBlocks );
+
+  return rcved;
+}
+
+int sendSubFrame(UDPsock_t *sock, void *bufferZone, ssize_t secondHeaderSize, uint16_t contentType) {
+  commonUDP_t *UDPheader=(commonUDP_t *)bufferZone ;
+  UDPheader->contentType=contentType;
+  UDPheader->senderClock=rdtsc();
+  int nbBlocks=UDPheader->nbBlocks;
+  int blockId=0;
+
+  if (nbBlocks <= 0 ) {
+    LOG_E(PHY,"FS6: can't send blocks: %d\n", nbBlocks);
+    return 0;
+  }
+
+  do {
+    if (blockId > 0 ) {
+      commonUDP_t *currentHeader=(commonUDP_t *)bufferZone;
+      currentHeader->timestamp=UDPheader->timestamp;
+      currentHeader->nbBlocks=UDPheader->nbBlocks;
+      currentHeader->blockID=blockId;
+      currentHeader->contentType=UDPheader->contentType;
+      memcpy(commonUDPdata((void *)currentHeader), commonUDPdata(bufferZone), secondHeaderSize);
+    }
+
+    blockId++;
+    int sz=alignedSize(bufferZone);
+    // Let's use the first address returned by getaddrinfo()
+    int ret=sendto(sock->sockHandler, bufferZone, sz, 0,
+                   sock->destAddr->ai_addr, sock->destAddr->ai_addrlen);
+
+    if ( ret != sz )
+      LOG_W(HW,"Wrote socket doesn't return size %d (val: %d, errno:%d, %s)\n",
+            sz, ret, errno, strerror(errno));
+
+    LOG_D(HW,"Sent: TS: %lu, blocks %d/%d, block size : %d \n",
+          UDPheader->timestamp, UDPheader->nbBlocks-nbBlocks, UDPheader->nbBlocks, sz);
+    bufferZone+=sz;
+    nbBlocks--;
+  } while (nbBlocks);
+
+  return 0;
+}
diff --git a/openair1/PHY/NR_REFSIG/scrambling_luts.c b/openair1/PHY/NR_REFSIG/scrambling_luts.c
new file mode 100644
index 0000000000000000000000000000000000000000..9d957de0f716f2f6929110c63015b3d6e4a9c970
--- /dev/null
+++ b/openair1/PHY/NR_REFSIG/scrambling_luts.c
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/* Lookup tables for 3GPP scrambling/unscrambling */
+
+/* Author R. Knopp / EURECOM / OpenAirInterface.org */
+#ifndef __SCRAMBLING_LUTS__C__
+#define __SCRAMBLING_LUTS__C__
+
+#include "PHY/impl_defs_nr.h"
+#include "PHY/sse_intrin.h"
+
+__m64 byte2m64_re[256];
+__m64 byte2m64_im[256];
+
+void init_byte2m64(void) {
+
+  for (int s=0;s<256;s++) {
+    byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*(s&1)),0);
+    byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>1)&1)),0);
+    byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*((s>>2)&1)),1);
+    byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>3)&1)),1);
+    byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*((s>>4)&1)),2);
+    byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>5)&1)),2);
+    byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*((s>>6)&1)),3);
+    byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>7)&1)),3);
+     printf("init_scrambling_luts: s %x (%d) ((%d,%d),(%d,%d),(%d,%d),(%d,%d))\n",
+	    ((uint16_t*)&s)[0],
+	    (1-2*(s&1)),
+	    ((int16_t*)&byte2m64_re[s])[0],((int16_t*)&byte2m64_im[s])[0],    
+	    ((int16_t*)&byte2m64_re[s])[1],((int16_t*)&byte2m64_im[s])[1],    
+	    ((int16_t*)&byte2m64_re[s])[2],((int16_t*)&byte2m64_im[s])[2],    
+	    ((int16_t*)&byte2m64_re[s])[3],((int16_t*)&byte2m64_im[s])[3]);    
+
+  }
+}
+
+void init_scrambling_luts(void) {
+
+  init_byte2m64();
+
+}
+
+#endif
diff --git a/openair1/PHY/NR_TRANSPORT/nr_uci_tools_common.c b/openair1/PHY/NR_TRANSPORT/nr_uci_tools_common.c
new file mode 100644
index 0000000000000000000000000000000000000000..2b0cbe72b3a91c8757c12d3ccbf194d5d08f740c
--- /dev/null
+++ b/openair1/PHY/NR_TRANSPORT/nr_uci_tools_common.c
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file PHY/NR_TRANSPORT/nr_dci_tools_common.c
+ * \brief
+ * \author
+ * \date 2020
+ * \version 0.1
+ * \company Eurecom
+ * \email:
+ * \note
+ * \warning
+ */
+
+#include "nr_dci.h"
+
+void nr_group_sequence_hopping (pucch_GroupHopping_t PUCCH_GroupHopping,
+  				uint32_t n_id,
+  				uint8_t n_hop,
+  				int nr_tti_tx,
+  				uint8_t *u,
+  				uint8_t *v) {
+  /*
+   * Implements TS 38.211 subclause 6.3.2.2.1 Group and sequence hopping
+   * The following variables are set by higher layers:
+   *    - PUCCH_GroupHopping:
+   *    - n_id: higher-layer parameter hoppingId
+   *    - n_hop: frequency hopping index
+   *             if intra-slot frequency hopping is disabled by the higher-layer parameter PUCCH-frequency-hopping
+   *                n_hop=0
+   *             if frequency hopping is enabled by the higher-layer parameter PUCCH-frequency-hopping
+   *                n_hop=0 for the first hop
+   *                n_hop=1 for the second hop
+   */
+  // depending on the value of the PUCCH_GroupHopping, we will obtain different values for u,v
+  //pucch_GroupHopping_t PUCCH_GroupHopping = ue->pucch_config_common_nr->pucch_GroupHopping; // from higher layers FIXME!!!
+  // n_id defined as per TS 38.211 subclause 6.3.2.2.1 (is given by the higher-layer parameter hoppingId)
+  // it is hoppingId from PUCCH-ConfigCommon:
+  // Cell-Specific scrambling ID for group hoppping and sequence hopping if enabled
+  // Corresponds to L1 parameter 'HoppingID' (see 38.211, section 6.3.2.2) BIT STRING (SIZE (10))
+  //uint16_t n_id = ue->pucch_config_common_nr->hoppingId; // from higher layers FIXME!!!
+#ifdef DEBUG_NR_PUCCH_TX
+  printf("\t\t [nr_group_sequence_hopping] PUCCH_GroupHopping=%u, n_id=%u \n",PUCCH_GroupHopping,n_id);
+#endif
+  uint8_t f_ss=0,f_gh=0;
+  *u=0;
+  *v=0;
+  uint32_t c_init = 0; 
+  uint32_t x1,s; // TS 38.211 Subclause 5.2.1
+  int l = 32, minShift = ((2*nr_tti_tx+n_hop)<<3);
+  int tmpShift =0;
+#ifdef DEBUG_NR_PUCCH_TX
+  printf("\t\t [nr_group_sequence_hopping] calculating u,v -> ");
+#endif
+
+  if (PUCCH_GroupHopping == neither) { // PUCCH_GroupHopping 'neither'
+    f_ss = n_id%30;
+  }
+
+  if (PUCCH_GroupHopping == enable) { // PUCCH_GroupHopping 'enabled'
+    c_init = floor(n_id/30); // we initialize c_init to calculate u,v according to 6.3.2.2.1 of 38.211
+    s = lte_gold_generic(&x1, &c_init, 1); // TS 38.211 Subclause 5.2.1
+    for (int m=0; m<8; m++) {
+      while(minShift >= l) {
+        s = lte_gold_generic(&x1, &c_init, 0);
+        l = l+32;
+      }
+
+      tmpShift = (minShift&((1<<5)-1)); //minShift%32;
+      f_gh = f_gh + ((1<<m)*((uint8_t)((s>>tmpShift)&1)));
+      minShift ++;
+    }
+
+    f_gh = f_gh%30;
+    f_ss = n_id%30;
+    /*    for (int m=0; m<8; m++){
+          f_gh = f_gh + ((1<<m)*((uint8_t)((s>>(8*(2*nr_tti_tx+n_hop)+m))&1))); // Not sure we have to use nr_tti_tx FIXME!!!
+        }
+        f_gh = f_gh%30;
+        f_ss = n_id%30;*/
+  }
+
+  if (PUCCH_GroupHopping == disable) { // PUCCH_GroupHopping 'disabled'
+    c_init = (1<<5)*floor(n_id/30)+(n_id%30); // we initialize c_init to calculate u,v
+    s = lte_gold_generic(&x1, &c_init, 1); // TS 38.211 Subclause 5.2.1
+    f_ss = n_id%30;
+    l = 32, minShift = (2*nr_tti_tx+n_hop);
+
+    while(minShift >= l) {
+      s = lte_gold_generic(&x1, &c_init, 0);
+      l = l+32;
+    }
+
+    tmpShift = (minShift&((1<<5)-1)); //minShift%32;
+    *v = (uint8_t)((s>>tmpShift)&1);
+    //    *v = (uint8_t)((s>>(2*nr_tti_tx+n_hop))&1); // Not sure we have to use nr_tti_tx FIXME!!!
+  }
+
+  *u = (f_gh+f_ss)%30;
+#ifdef DEBUG_NR_PUCCH_TX
+  printf("%d,%d\n",*u,*v);
+#endif
+}
+
+double nr_cyclic_shift_hopping(uint32_t n_id,
+                               uint8_t m0,
+                               uint8_t mcs,
+                               uint8_t lnormal,
+                               uint8_t lprime,
+                               int nr_tti_tx) {
+  /*
+   * Implements TS 38.211 subclause 6.3.2.2.2 Cyclic shift hopping
+   *     - n_id: higher-layer parameter hoppingId
+   *     - m0: provided by higher layer parameter PUCCH-F0-F1-initial-cyclic-shift of PUCCH-F0-resource-config
+   *     - mcs: mcs=0 except for PUCCH format 0 when it depends on information to be transmitted according to TS 38.213 subclause 9.2
+   *     - lnormal: lnormal is the OFDM symbol number in the PUCCH transmission where l=0 corresponds to the first OFDM symbol of the PUCCH transmission
+   *     - lprime: lprime is the index of the OFDM symbol in the slot that corresponds to the first OFDM symbol of the PUCCH transmission in the slot given by [5, TS 38.213]
+   */
+  // alpha_init initialized to 2*PI/12=0.5235987756
+  double alpha = 0.5235987756;
+  uint32_t c_init = n_id; // we initialize c_init again to calculate n_cs
+
+  uint32_t x1,s = lte_gold_generic(&x1, &c_init, 1); // TS 38.211 Subclause 5.2.1
+  uint8_t n_cs=0;
+  int l = 32, minShift = (14*8*nr_tti_tx )+ 8*(lnormal+lprime);
+  int tmpShift =0;
+#ifdef DEBUG_NR_PUCCH_TX
+  printf("\t\t [nr_cyclic_shift_hopping] calculating alpha (cyclic shift) using c_init=%u -> \n",c_init);
+#endif
+
+  for (int m=0; m<8; m++) {
+    while(minShift >= l) {
+      s = lte_gold_generic(&x1, &c_init, 0);
+      l = l+32;
+    }
+
+    tmpShift = (minShift&((1<<5)-1)); //minShift%32;
+    minShift ++;
+    n_cs = n_cs+((1<<m)*((uint8_t)((s>>tmpShift)&1)));
+    // calculating n_cs (Not sure we have to use nr_tti_tx FIXME!!!)
+    // n_cs = n_cs+((1<<m)*((uint8_t)((s>>((14*8*nr_tti_tx) + 8*(lnormal+lprime) + m))&1)));
+  }
+
+  alpha = (alpha * (double)((m0+mcs+n_cs)%12));
+#ifdef DEBUG_NR_PUCCH_TX
+  printf("n_cs=%d -> %lf\n",n_cs,alpha);
+#endif
+  return(alpha);
+}
diff --git a/openair1/PHY/TOOLS/dfts_load.c b/openair1/PHY/TOOLS/dfts_load.c
new file mode 100644
index 0000000000000000000000000000000000000000..d758f46a4330977dd7cd3a667ede8f8998ef6614
--- /dev/null
+++ b/openair1/PHY/TOOLS/dfts_load.c
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/*! \file openair1/PHY/CODING/coding_nr_load.c
+ * \brief: load library implementing coding/decoding algorithms
+ * \author Francois TABURET
+ * \date 2020
+ * \version 0.1
+ * \company NOKIA BellLabs France
+ * \email: francois.taburet@nokia-bell-labs.com
+ * \note
+ * \warning
+ */
+#define _GNU_SOURCE 
+#include <sys/types.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include "assertions.h"
+#include "common/utils/LOG/log.h"
+#define OAIDFTS_LOADER
+#include "tools_defs.h"
+#include "common/config/config_userapi.h" 
+#include "common/utils/load_module_shlib.h" 
+
+
+/* function description array, to be used when loading the dfts/idfts lib */
+static loader_shlibfunc_t shlib_fdesc[2];
+static char *arg[64]={"phytest","-O","cmdlineonly::dbgl0"};
+
+
+int load_dftslib(void) {
+	 
+	 char *ptr = (char*)config_get_if();
+     if ( ptr==NULL )  {// phy simulators, config module possibly not loaded
+     	 load_configmodule(3,(char **)arg,CONFIG_ENABLECMDLINEONLY) ;
+     	 logInit();
+     }	 
+     shlib_fdesc[0].fname = "dft";
+     shlib_fdesc[1].fname = "idft";
+     int ret=load_module_shlib("dfts",shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL);
+     AssertFatal( (ret >= 0),"Error loading dftsc decoder");
+     dft = (dftfunc_t)shlib_fdesc[0].fptr;
+     idft = (idftfunc_t)shlib_fdesc[1].fptr;
+return 0;
+}
+
+
diff --git a/openair1/PHY/TOOLS/oai_dfts.c b/openair1/PHY/TOOLS/oai_dfts.c
new file mode 100644
index 0000000000000000000000000000000000000000..bd516cd1e05c8aa973826feb34c760745996f7f4
--- /dev/null
+++ b/openair1/PHY/TOOLS/oai_dfts.c
@@ -0,0 +1,9707 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <math.h>
+#include <pthread.h>
+#include <execinfo.h>
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#define OAIDFTS_MAIN
+#ifndef MR_MAIN
+#include "PHY/defs_common.h"
+#include "PHY/impl_defs_top.h"
+#else
+#include "time_meas.h"
+#include "LOG/log.h"
+#define debug_msg
+#define ONE_OVER_SQRT2_Q15 23170
+
+int oai_exit=0;
+#endif
+
+#define ONE_OVER_SQRT3_Q15 18919
+
+#include "../sse_intrin.h"
+
+#include "assertions.h"
+
+#define print_shorts(s,x) printf("%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7])
+#define print_shorts256(s,x) printf("%s %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7],(x)[8],(x)[9],(x)[10],(x)[11],(x)[12],(x)[13],(x)[14],(x)[15])
+
+#define print_ints(s,x) printf("%s %d %d %d %d\n",s,(x)[0],(x)[1],(x)[2],(x)[3])
+
+
+const static int16_t conjugatedft[32] __attribute__((aligned(32))) = {-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1};
+
+
+const static int16_t reflip[32]  __attribute__((aligned(32))) = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1};
+
+
+
+
+
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void cmac(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
+static inline void cmac(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
+{
+
+  __m128i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32;
+
+  cmac_tmp    = _mm_sign_epi16(b,*(__m128i*)reflip);
+  cmac_tmp_re32  = _mm_madd_epi16(a,cmac_tmp);
+
+ 
+  //  cmac_tmp    = _mm_shufflelo_epi16(b,_MM_SHUFFLE(2,3,0,1));
+  //  cmac_tmp    = _mm_shufflehi_epi16(cmac_tmp,_MM_SHUFFLE(2,3,0,1));
+  cmac_tmp = _mm_shuffle_epi8(b,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  cmac_tmp_im32  = _mm_madd_epi16(cmac_tmp,a);
+
+  *re32 = _mm_add_epi32(*re32,cmac_tmp_re32);
+  *im32 = _mm_add_epi32(*im32,cmac_tmp_im32);
+}
+
+static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
+static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
+{
+
+  __m128i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32;
+
+
+  cmac_tmp_re32  = _mm_madd_epi16(a,b);
+
+
+  cmac_tmp    = _mm_sign_epi16(b,*(__m128i*)reflip);
+  //  cmac_tmp    = _mm_shufflelo_epi16(b,_MM_SHUFFLE(2,3,0,1));
+  //  cmac_tmp    = _mm_shufflehi_epi16(cmac_tmp,_MM_SHUFFLE(2,3,0,1));
+  cmac_tmp = _mm_shuffle_epi8(cmac_tmp,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  cmac_tmp_im32  = _mm_madd_epi16(cmac_tmp,a);
+
+  *re32 = _mm_add_epi32(*re32,cmac_tmp_re32);
+  *im32 = _mm_add_epi32(*im32,cmac_tmp_im32);
+}
+
+#ifdef __AVX2__
+static inline void cmac_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
+static inline void cmac_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
+{
+
+  __m256i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32;
+  __m256i imshuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,19,18,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  cmac_tmp       = _mm256_sign_epi16(b,*(__m256i*)reflip);
+  cmac_tmp_re32  = _mm256_madd_epi16(a,cmac_tmp);
+
+  cmac_tmp       = _mm256_shuffle_epi8(b,imshuffle);
+  cmac_tmp_im32  = _mm256_madd_epi16(cmac_tmp,a);
+
+  *re32 = _mm256_add_epi32(*re32,cmac_tmp_re32);
+  *im32 = _mm256_add_epi32(*im32,cmac_tmp_im32);
+}
+
+static inline void cmacc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
+static inline void cmacc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
+{
+
+  __m256i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32;
+  __m256i imshuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,19,18,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  cmac_tmp_re32   = _mm256_madd_epi16(a,b);
+
+
+  cmac_tmp        = _mm256_sign_epi16(b,*(__m256i*)reflip);
+  cmac_tmp        = _mm256_shuffle_epi8(b,imshuffle);
+  cmac_tmp_im32   = _mm256_madd_epi16(cmac_tmp,a);
+
+  *re32 = _mm256_add_epi32(*re32,cmac_tmp_re32);
+  *im32 = _mm256_add_epi32(*im32,cmac_tmp_im32);
+}
+
+#endif
+
+static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
+
+static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
+{
+
+  register __m128i mmtmpb;
+
+  mmtmpb    = _mm_sign_epi16(b,*(__m128i*)reflip);
+  *re32     = _mm_madd_epi16(a,mmtmpb);
+  //  mmtmpb    = _mm_shufflelo_epi16(b,_MM_SHUFFLE(2,3,0,1));
+  //  mmtmpb    = _mm_shufflehi_epi16(mmtmpb,_MM_SHUFFLE(2,3,0,1));
+  mmtmpb        = _mm_shuffle_epi8(b,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  *im32  = _mm_madd_epi16(a,mmtmpb);
+
+}
+
+#ifdef __AVX2__
+static inline void cmult_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
+
+static inline void cmult_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
+{
+
+  register __m256i mmtmpb;
+  __m256i const perm_mask = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  mmtmpb    = _mm256_sign_epi16(b,*(__m256i*)reflip);
+  *re32     = _mm256_madd_epi16(a,mmtmpb);
+  mmtmpb    = _mm256_shuffle_epi8(b,perm_mask);
+  *im32     = _mm256_madd_epi16(a,mmtmpb);
+
+}
+
+#endif
+
+static inline void cmultc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
+
+static inline void cmultc(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
+{
+
+  register __m128i mmtmpb;
+
+  *re32     = _mm_madd_epi16(a,b);
+  mmtmpb    = _mm_sign_epi16(b,*(__m128i*)reflip);
+  mmtmpb    = _mm_shuffle_epi8(mmtmpb,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  *im32  = _mm_madd_epi16(a,mmtmpb);
+
+}
+
+#ifdef __AVX2__
+static inline void cmultc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline));
+
+static inline void cmultc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32)
+{
+
+  register __m256i mmtmpb;
+  __m256i const perm_mask = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  *re32     = _mm256_madd_epi16(a,b);
+  mmtmpb    = _mm256_sign_epi16(b,*(__m256i*)reflip);
+  mmtmpb    = _mm256_shuffle_epi8(mmtmpb,perm_mask);
+  *im32     = _mm256_madd_epi16(a,mmtmpb);
+
+}
+
+#endif
+
+static inline __m128i cpack(__m128i xre,__m128i xim) __attribute__((always_inline));
+
+static inline __m128i cpack(__m128i xre,__m128i xim)
+{
+
+  register __m128i cpack_tmp1,cpack_tmp2;
+
+  cpack_tmp1 = _mm_unpacklo_epi32(xre,xim);
+  cpack_tmp2 = _mm_unpackhi_epi32(xre,xim);
+  return(_mm_packs_epi32(_mm_srai_epi32(cpack_tmp1,15),_mm_srai_epi32(cpack_tmp2,15)));
+
+}
+
+#ifdef __AVX2__
+static inline __m256i cpack_256(__m256i xre,__m256i xim) __attribute__((always_inline));
+
+static inline __m256i cpack_256(__m256i xre,__m256i xim)
+{
+
+  register __m256i cpack_tmp1,cpack_tmp2;
+
+  cpack_tmp1 = _mm256_unpacklo_epi32(xre,xim);
+  cpack_tmp2 = _mm256_unpackhi_epi32(xre,xim);
+  return(_mm256_packs_epi32(_mm256_srai_epi32(cpack_tmp1,15),_mm256_srai_epi32(cpack_tmp2,15)));
+
+}
+
+#endif
+
+static inline void packed_cmult(__m128i a,__m128i b, __m128i *c) __attribute__((always_inline));
+
+static inline void packed_cmult(__m128i a,__m128i b, __m128i *c)
+{
+
+  __m128i cre,cim;
+  cmult(a,b,&cre,&cim);
+  *c = cpack(cre,cim);
+
+}
+
+#ifdef __AVX2__
+static inline void packed_cmult_256(__m256i a,__m256i b, __m256i *c) __attribute__((always_inline));
+
+static inline void packed_cmult_256(__m256i a,__m256i b, __m256i *c)
+{
+
+  __m256i cre,cim;
+  cmult_256(a,b,&cre,&cim);
+  *c = cpack_256(cre,cim);
+
+}
+#endif
+
+static inline void packed_cmultc(__m128i a,__m128i b, __m128i *c) __attribute__((always_inline));
+
+static inline void packed_cmultc(__m128i a,__m128i b, __m128i *c)
+{
+
+  __m128i cre,cim;
+
+  cmultc(a,b,&cre,&cim);
+  *c = cpack(cre,cim);
+
+}
+
+#ifdef __AVX2__
+static inline void packed_cmultc_256(__m256i a,__m256i b, __m256i *c) __attribute__((always_inline));
+
+static inline void packed_cmultc_256(__m256i a,__m256i b, __m256i *c)
+{
+
+  __m256i cre,cim;
+
+  cmultc_256(a,b,&cre,&cim);
+  *c = cpack_256(cre,cim);
+
+}
+#endif
+
+static inline __m128i packed_cmult2(__m128i a,__m128i b,__m128i b2) __attribute__((always_inline));
+
+static inline __m128i packed_cmult2(__m128i a,__m128i b,__m128i b2)
+{
+
+
+  register __m128i cre,cim;
+
+  cre       = _mm_madd_epi16(a,b);
+  cim       = _mm_madd_epi16(a,b2);
+
+  return(cpack(cre,cim));
+
+}
+
+#ifdef __AVX2__
+static inline __m256i packed_cmult2_256(__m256i a,__m256i b,__m256i b2) __attribute__((always_inline));
+
+static inline __m256i packed_cmult2_256(__m256i a,__m256i b,__m256i b2)
+{
+
+
+  register __m256i cre,cim;
+
+  cre       = _mm256_madd_epi16(a,b);
+  cim       = _mm256_madd_epi16(a,b2);
+
+  return(cpack_256(cre,cim));
+
+}
+#endif
+
+#elif defined (__arm__)
+static inline void cmac(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline));
+static inline void cmac(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32)
+{
+
+  
+  int32x4_t ab_re0,ab_re1,ab_im0,ab_im1;
+  int16x8_t bflip = vrev32q_s16(b);
+  int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip);
+
+  ab_re0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&bconj)[0]);
+  ab_re1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&bconj)[1]);
+  ab_im0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&bflip)[0]);
+  ab_im1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&bflip)[1]);
+  *re32 = vqaddq_s32(*re32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]),
+					vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1])));
+  *im32 = vqaddq_s32(*im32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]),
+					vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1])));
+}
+
+static inline void cmacc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline));
+static inline void cmacc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32)
+{
+  int32x4_t ab_re0,ab_re1,ab_im0,ab_im1;
+  int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip);
+  int16x8_t bflip = vrev32q_s16(bconj);
+
+  ab_re0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&b)[0]);
+  ab_re1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&b)[1]);
+  ab_im0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&bflip)[0]);
+  ab_im1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&bflip)[1]);
+  *re32 = vqaddq_s32(*re32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]),
+					vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1])));
+  *im32 = vqaddq_s32(*im32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]),
+					vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1])));
+
+}
+
+static inline void cmult(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline));
+static inline void cmult(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32)
+{
+  int32x4_t ab_re0,ab_re1,ab_im0,ab_im1;
+  int16x8_t bflip = vrev32q_s16(b);
+  int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip);
+  int16x4_t al,ah,bcl,bch,bfl,bfh;
+  int32x2_t abr0l,abr0h,abr1l,abr1h,abi0l,abi0h,abi1l,abi1h;
+
+  al  = vget_low_s16(a);      ah = vget_high_s16(a);
+  bcl = vget_low_s16(bconj);  bch = vget_high_s16(bconj);
+  bfl = vget_low_s16(bflip);  bfh = vget_high_s16(bflip);
+
+  ab_re0 = vmull_s16(al,bcl);
+  ab_re1 = vmull_s16(ah,bch);
+  ab_im0 = vmull_s16(al,bfl);
+  ab_im1 = vmull_s16(ah,bfh);
+  abr0l = vget_low_s32(ab_re0); abr0h = vget_high_s32(ab_re0);
+  abr1l = vget_low_s32(ab_re1); abr1h = vget_high_s32(ab_re1);
+  abi0l = vget_low_s32(ab_im0); abi0h = vget_high_s32(ab_im0);
+  abi1l = vget_low_s32(ab_im1); abi1h = vget_high_s32(ab_im1);
+
+  *re32 = vcombine_s32(vpadd_s32(abr0l,abr0h),
+                       vpadd_s32(abr1l,abr1h));
+  *im32 = vcombine_s32(vpadd_s32(abi0l,abi0h),
+                       vpadd_s32(abi1l,abi1h));
+}
+
+static inline void cmultc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline));
+
+static inline void cmultc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32)
+{
+  int32x4_t ab_re0,ab_re1,ab_im0,ab_im1;
+  int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip);
+  int16x8_t bflip = vrev32q_s16(bconj);
+  int16x4_t al,ah,bl,bh,bfl,bfh; 
+  int32x2_t abr0l,abr0h,abr1l,abr1h,abi0l,abi0h,abi1l,abi1h;
+  al  = vget_low_s16(a);     ah = vget_high_s16(a);
+  bl  = vget_low_s16(b);     bh = vget_high_s16(b);
+  bfl = vget_low_s16(bflip); bfh = vget_high_s16(bflip);
+
+  ab_re0 = vmull_s16(al,bl);
+  ab_re1 = vmull_s16(ah,bh);
+  ab_im0 = vmull_s16(al,bfl);
+  ab_im1 = vmull_s16(ah,bfh);
+
+  abr0l = vget_low_s32(ab_re0); abr0h = vget_high_s32(ab_re0);
+  abr1l = vget_low_s32(ab_re1); abr1h = vget_high_s32(ab_re1);
+  abi0l = vget_low_s32(ab_im0); abi0h = vget_high_s32(ab_im0);
+  abi1l = vget_low_s32(ab_im1); abi1h = vget_high_s32(ab_im1);
+
+  *re32 = vcombine_s32(vpadd_s32(abr0l,abr0h),
+		       vpadd_s32(abr1l,abr1h));
+  *im32 = vcombine_s32(vpadd_s32(abi0l,abi0h),
+		       vpadd_s32(abi1l,abi1h));
+
+}
+
+
+static inline int16x8_t cpack(int32x4_t xre,int32x4_t xim) __attribute__((always_inline));
+
+static inline int16x8_t cpack(int32x4_t xre,int32x4_t xim)
+{
+  int32x4x2_t xtmp;
+
+  xtmp = vzipq_s32(xre,xim);
+  return(vcombine_s16(vqshrn_n_s32(xtmp.val[0],15),vqshrn_n_s32(xtmp.val[1],15)));
+
+}
+
+
+static inline void packed_cmult(int16x8_t a,int16x8_t b, int16x8_t *c) __attribute__((always_inline));
+
+static inline void packed_cmult(int16x8_t a,int16x8_t b, int16x8_t *c)
+{
+
+  int32x4_t cre,cim;
+  cmult(a,b,&cre,&cim);
+  *c = cpack(cre,cim);
+
+}
+
+
+static inline void packed_cmultc(int16x8_t a,int16x8_t b, int16x8_t *c) __attribute__((always_inline));
+
+static inline void packed_cmultc(int16x8_t a,int16x8_t b, int16x8_t *c)
+{
+
+  int32x4_t cre,cim;
+
+  cmultc(a,b,&cre,&cim);
+  *c = cpack(cre,cim);
+
+}
+
+static inline int16x8_t packed_cmult2(int16x8_t a,int16x8_t b,  int16x8_t b2) __attribute__((always_inline));
+
+static inline int16x8_t packed_cmult2(int16x8_t a,int16x8_t b,  int16x8_t b2)
+{
+
+  
+
+  int32x4_t ab_re0,ab_re1,ab_im0,ab_im1,cre,cim;
+  
+  ab_re0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&b)[0]);
+  ab_re1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&b)[1]);
+  ab_im0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&b2)[0]);
+  ab_im1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&b2)[1]);
+  cre = vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]),
+		     vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1]));
+  cim = vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]),
+		     vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1]));
+  return(cpack(cre,cim));
+
+}
+
+#endif
+
+const static int16_t W0s[16]__attribute__((aligned(32))) = {32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0};
+
+const static int16_t W13s[16]__attribute__((aligned(32))) = {-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378};
+const static int16_t W23s[16]__attribute__((aligned(32))) = {-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378};
+
+const static int16_t W15s[16]__attribute__((aligned(32))) = {10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163};
+const static int16_t W25s[16]__attribute__((aligned(32))) = {-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260};
+const static int16_t W35s[16]__attribute__((aligned(32))) = {-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260};
+const static int16_t W45s[16]__attribute__((aligned(32))) = {10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163};
+
+#if defined(__x86_64__) || defined(__i386__)
+const __m128i *W0 = (__m128i *)W0s;
+const __m128i *W13 = (__m128i *)W13s;
+const __m128i *W23 = (__m128i *)W23s;
+const __m128i *W15 = (__m128i *)W15s;
+const __m128i *W25 = (__m128i *)W25s;
+const __m128i *W35 = (__m128i *)W35s;
+const __m128i *W45 = (__m128i *)W45s;
+
+#ifdef __AVX2__
+const __m256i *W0_256 =  (__m256i *)W0s;
+const __m256i *W13_256 = (__m256i *)W13s;
+const __m256i *W23_256 = (__m256i *)W23s;
+const __m256i *W15_256 = (__m256i *)W15s;
+const __m256i *W25_256 = (__m256i *)W25s;
+const __m256i *W35_256 = (__m256i *)W35s;
+const __m256i *W45_256 = (__m256i *)W45s;
+#endif
+
+#elif defined(__arm__)
+int16x8_t *W0  = (int16x8_t *)W0s;
+int16x8_t *W13 = (int16x8_t *)W13s;
+int16x8_t *W23 = (int16x8_t *)W23s;
+int16x8_t *W15 = (int16x8_t *)W15s;
+int16x8_t *W25 = (int16x8_t *)W25s;
+int16x8_t *W35 = (int16x8_t *)W35s;
+int16x8_t *W45 = (int16x8_t *)W45s;
+#endif
+const static int16_t dft_norm_table[16] = {9459,  //12
+					   6689,//24
+					   5461,//36
+					   4729,//482
+					   4230,//60
+					   23170,//72
+					   3344,//96
+					   3153,//108
+					   2991,//120
+					   18918,//sqrt(3),//144
+					   18918,//sqrt(3),//180
+					   16384,//2, //192
+					   18918,//sqrt(3), // 216
+					   16384,//2, //240
+					   18918,//sqrt(3), // 288
+					   14654
+}; //sqrt(5) //300
+
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void bfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw)__attribute__((always_inline));
+
+static inline void bfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw)
+{
+
+  __m128i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i;
+  __m128i bfly2_tmp1,bfly2_tmp2;
+
+  cmult(*(x0),*(W0),&x0r_2,&x0i_2);
+  cmult(*(x1),*(tw),&x1r_2,&x1i_2);
+
+  dy0r = _mm_srai_epi32(_mm_add_epi32(x0r_2,x1r_2),15);
+  dy1r = _mm_srai_epi32(_mm_sub_epi32(x0r_2,x1r_2),15);
+  dy0i = _mm_srai_epi32(_mm_add_epi32(x0i_2,x1i_2),15);
+  //  printf("y0i %d\n",((int16_t *)y0i)[0]);
+  dy1i = _mm_srai_epi32(_mm_sub_epi32(x0i_2,x1i_2),15);
+
+  bfly2_tmp1 = _mm_unpacklo_epi32(dy0r,dy0i);
+  bfly2_tmp2 = _mm_unpackhi_epi32(dy0r,dy0i);
+  *y0 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+
+  bfly2_tmp1 = _mm_unpacklo_epi32(dy1r,dy1i);
+  bfly2_tmp2 = _mm_unpackhi_epi32(dy1r,dy1i);
+  *y1 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+}
+
+#ifdef __AVX2__
+
+static inline void bfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)__attribute__((always_inline));
+
+static inline void bfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)
+{
+
+  __m256i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i;
+  __m256i bfly2_tmp1,bfly2_tmp2;
+
+  cmult_256(*(x0),*(W0_256),&x0r_2,&x0i_2);
+  cmult_256(*(x1),*(tw),&x1r_2,&x1i_2);
+
+  dy0r = _mm256_srai_epi32(_mm256_add_epi32(x0r_2,x1r_2),15);
+  dy1r = _mm256_srai_epi32(_mm256_sub_epi32(x0r_2,x1r_2),15);
+  dy0i = _mm256_srai_epi32(_mm256_add_epi32(x0i_2,x1i_2),15);
+  //  printf("y0i %d\n",((int16_t *)y0i)[0]);
+  dy1i = _mm256_srai_epi32(_mm256_sub_epi32(x0i_2,x1i_2),15);
+
+  bfly2_tmp1 = _mm256_unpacklo_epi32(dy0r,dy0i);
+  bfly2_tmp2 = _mm256_unpackhi_epi32(dy0r,dy0i);
+  *y0 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+
+  bfly2_tmp1 = _mm256_unpacklo_epi32(dy1r,dy1i);
+  bfly2_tmp2 = _mm256_unpackhi_epi32(dy1r,dy1i);
+  *y1 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+}
+
+#endif
+
+#elif defined(__arm__)
+
+static inline void bfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw)__attribute__((always_inline));
+
+static inline void bfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw)
+{
+
+  int32x4_t x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i;
+
+  cmult(*(x0),*(W0),&x0r_2,&x0i_2);
+  cmult(*(x1),*(tw),&x1r_2,&x1i_2);
+
+  dy0r = vqaddq_s32(x0r_2,x1r_2);
+  dy1r = vqsubq_s32(x0r_2,x1r_2);
+  dy0i = vqaddq_s32(x0i_2,x1i_2);
+  dy1i = vqsubq_s32(x0i_2,x1i_2);
+
+  *y0 = cpack(dy0r,dy0i);
+  *y1 = cpack(dy1r,dy1i);
+}
+
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void bfly2_tw1(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1)__attribute__((always_inline));
+
+static inline void bfly2_tw1(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1)
+{
+
+  *y0  = _mm_adds_epi16(*x0,*x1);
+  *y1  = _mm_subs_epi16(*x0,*x1);
+
+}
+
+#elif defined(__arm__)
+
+static inline void bfly2_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1)__attribute__((always_inline));
+
+static inline void bfly2_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1)
+{
+
+  *y0  = vqaddq_s16(*x0,*x1);
+  *y1  = vqsubq_s16(*x0,*x1);
+
+}
+#endif
+ 
+#if defined(__x86_64__) || defined(__i386__)
+
+
+
+static inline void bfly2_16(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1, __m128i *tw, __m128i *twb)__attribute__((always_inline));
+
+static inline void bfly2_16(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1, __m128i *tw, __m128i *twb)
+{
+
+  //  register __m128i x1t;
+  __m128i x1t;
+
+  x1t = packed_cmult2(*(x1),*(tw),*(twb));
+  /*
+  print_shorts("x0",(int16_t*)x0);
+  print_shorts("x1",(int16_t*)x1);
+  print_shorts("tw",(int16_t*)tw);
+  print_shorts("twb",(int16_t*)twb);
+  print_shorts("x1t",(int16_t*)&x1t);*/
+  *y0  = _mm_adds_epi16(*x0,x1t);
+  *y1  = _mm_subs_epi16(*x0,x1t);
+  /*  print_shorts("y0",(int16_t*)y0);
+      print_shorts("y1",(int16_t*)y1);*/
+}
+
+#ifdef __AVX2__
+
+static inline void bfly2_16_256(__m256i *x0, __m256i *x1, __m256i *y0, __m256i *y1, __m256i *tw, __m256i *twb)__attribute__((always_inline));
+
+static inline void bfly2_16_256(__m256i *x0, __m256i *x1, __m256i *y0, __m256i *y1, __m256i *tw, __m256i *twb)
+{
+
+  //  register __m256i x1t;
+  __m256i x1t;
+
+  x1t = packed_cmult2_256(*(x1),*(tw),*(twb));
+  /*
+  print_shorts256("x0",(int16_t*)x0);
+  print_shorts256("x1",(int16_t*)x1);
+  print_shorts256("tw",(int16_t*)tw);
+  print_shorts256("twb",(int16_t*)twb);
+  print_shorts256("x1t",(int16_t*)&x1t);*/
+  *y0  = _mm256_adds_epi16(*x0,x1t);
+  *y1  = _mm256_subs_epi16(*x0,x1t);
+  
+  /*print_shorts256("y0",(int16_t*)y0);
+    print_shorts256("y1",(int16_t*)y1);*/
+}
+#endif
+
+
+#elif defined(__arm__)
+
+static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1, int16x8_t *tw, int16x8_t *twb)__attribute__((always_inline));
+
+static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1, int16x8_t *tw, int16x8_t *twb)
+{
+
+  *y0  = vqaddq_s16(*x0,*x1);
+  *y1  = vqsubq_s16(*x0,*x1);
+
+}
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void ibfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw)__attribute__((always_inline));
+
+static inline void ibfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw)
+{
+
+  __m128i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i;
+  __m128i bfly2_tmp1,bfly2_tmp2;
+
+  cmultc(*(x0),*(W0),&x0r_2,&x0i_2);
+  cmultc(*(x1),*(tw),&x1r_2,&x1i_2);
+
+  dy0r = _mm_srai_epi32(_mm_add_epi32(x0r_2,x1r_2),15);
+  dy1r = _mm_srai_epi32(_mm_sub_epi32(x0r_2,x1r_2),15);
+  dy0i = _mm_srai_epi32(_mm_add_epi32(x0i_2,x1i_2),15);
+  //  printf("y0i %d\n",((int16_t *)y0i)[0]);
+  dy1i = _mm_srai_epi32(_mm_sub_epi32(x0i_2,x1i_2),15);
+
+  bfly2_tmp1 = _mm_unpacklo_epi32(dy0r,dy0i);
+  bfly2_tmp2 = _mm_unpackhi_epi32(dy0r,dy0i);
+  *y0 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+
+  bfly2_tmp1 = _mm_unpacklo_epi32(dy1r,dy1i);
+  bfly2_tmp2 = _mm_unpackhi_epi32(dy1r,dy1i);
+  *y1 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+}
+
+#ifdef __AVX2__
+static inline void ibfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)__attribute__((always_inline));
+
+static inline void ibfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)
+{
+
+  __m256i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i;
+  __m256i bfly2_tmp1,bfly2_tmp2;
+
+  cmultc_256(*(x0),*(W0_256),&x0r_2,&x0i_2);
+  cmultc_256(*(x1),*(tw),&x1r_2,&x1i_2);
+
+  dy0r = _mm256_srai_epi32(_mm256_add_epi32(x0r_2,x1r_2),15);
+  dy1r = _mm256_srai_epi32(_mm256_sub_epi32(x0r_2,x1r_2),15);
+  dy0i = _mm256_srai_epi32(_mm256_add_epi32(x0i_2,x1i_2),15);
+  //  printf("y0i %d\n",((int16_t *)y0i)[0]);
+  dy1i = _mm256_srai_epi32(_mm256_sub_epi32(x0i_2,x1i_2),15);
+
+  bfly2_tmp1 = _mm256_unpacklo_epi32(dy0r,dy0i);
+  bfly2_tmp2 = _mm256_unpackhi_epi32(dy0r,dy0i);
+  *y0 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+
+  bfly2_tmp1 = _mm256_unpacklo_epi32(dy1r,dy1i);
+  bfly2_tmp2 = _mm256_unpackhi_epi32(dy1r,dy1i);
+  *y1 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2);
+}
+#endif
+
+#elif defined(__arm__)
+static inline void ibfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw)
+{
+
+  int32x4_t x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i;
+
+  cmultc(*(x0),*(W0),&x0r_2,&x0i_2);
+  cmultc(*(x1),*(tw),&x1r_2,&x1i_2);
+
+  dy0r = vqaddq_s32(x0r_2,x1r_2);
+  dy1r = vqsubq_s32(x0r_2,x1r_2);
+  dy0i = vqaddq_s32(x0i_2,x1i_2);
+  dy1i = vqsubq_s32(x0i_2,x1i_2);
+
+  *y0 = cpack(dy0r,dy0i);
+  *y1 = cpack(dy1r,dy1i);
+
+}
+
+#endif
+
+
+
+
+// This is the radix-3 butterfly (fft)
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static inline void bfly3(__m128i *x0,__m128i *x1,__m128i *x2,
+                         __m128i *y0,__m128i *y1,__m128i *y2,
+                         __m128i *tw1,__m128i *tw2) __attribute__((always_inline));
+
+static inline void bfly3(__m128i *x0,__m128i *x1,__m128i *x2,
+                         __m128i *y0,__m128i *y1,__m128i *y2,
+                         __m128i *tw1,__m128i *tw2)
+{
+
+  __m128i tmpre,tmpim,x1_2,x2_2;
+
+  packed_cmult(*(x1),*(tw1),&x1_2);
+  packed_cmult(*(x2),*(tw2),&x2_2);
+  *(y0)  = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1_2,x2_2));
+  cmult(x1_2,*(W13),&tmpre,&tmpim);
+  cmac(x2_2,*(W23),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = _mm_adds_epi16(*(x0),*(y1));
+  cmult(x1_2,*(W23),&tmpre,&tmpim);
+  cmac(x2_2,*(W13),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = _mm_adds_epi16(*(x0),*(y2));
+}
+
+#ifdef __AVX2__
+
+static inline void bfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
+			     __m256i *y0,__m256i *y1,__m256i *y2,
+			     __m256i *tw1,__m256i *tw2) __attribute__((always_inline));
+
+static inline void bfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
+			     __m256i *y0,__m256i *y1,__m256i *y2,
+			     __m256i *tw1,__m256i *tw2)
+{ 
+
+  __m256i tmpre,tmpim,x1_2,x2_2;
+
+  packed_cmult_256(*(x1),*(tw1),&x1_2);
+  packed_cmult_256(*(x2),*(tw2),&x2_2);
+  *(y0)  = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,x2_2));
+  cmult_256(x1_2,*(W13_256),&tmpre,&tmpim);
+  cmac_256(x2_2,*(W23_256),&tmpre,&tmpim);
+  *(y1) = cpack_256(tmpre,tmpim);
+  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  cmult_256(x1_2,*(W23_256),&tmpre,&tmpim);
+  cmac_256(x2_2,*(W13_256),&tmpre,&tmpim);
+  *(y2) = cpack_256(tmpre,tmpim);
+  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+}
+#endif
+
+#elif defined(__arm__)
+static inline void bfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
+                         int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,
+                         int16x8_t *tw1,int16x8_t *tw2) __attribute__((always_inline));
+
+static inline void bfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
+                         int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,
+                         int16x8_t *tw1,int16x8_t *tw2)
+{
+
+  int32x4_t tmpre,tmpim;
+  int16x8_t x1_2,x2_2;
+
+  packed_cmult(*(x1),*(tw1),&x1_2);
+  packed_cmult(*(x2),*(tw2),&x2_2);
+  *(y0)  = vqaddq_s16(*(x0),vqaddq_s16(x1_2,x2_2));
+  cmult(x1_2,*(W13),&tmpre,&tmpim);
+  cmac(x2_2,*(W23),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = vqaddq_s16(*(x0),*(y1));
+  cmult(x1_2,*(W23),&tmpre,&tmpim);
+  cmac(x2_2,*(W13),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = vqaddq_s16(*(x0),*(y2));
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void ibfly3(__m128i *x0,__m128i *x1,__m128i *x2,
+			  __m128i *y0,__m128i *y1,__m128i *y2,
+			  __m128i *tw1,__m128i *tw2) __attribute__((always_inline));
+
+static inline void ibfly3(__m128i *x0,__m128i *x1,__m128i *x2,
+			  __m128i *y0,__m128i *y1,__m128i *y2,
+			  __m128i *tw1,__m128i *tw2)
+{
+
+  __m128i tmpre,tmpim,x1_2,x2_2;
+
+  packed_cmultc(*(x1),*(tw1),&x1_2);
+  packed_cmultc(*(x2),*(tw2),&x2_2);
+  *(y0)  = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1_2,x2_2));
+  cmultc(x1_2,*(W13),&tmpre,&tmpim);
+  cmacc(x2_2,*(W23),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = _mm_adds_epi16(*(x0),*(y1));
+  cmultc(x1_2,*(W23),&tmpre,&tmpim);
+  cmacc(x2_2,*(W13),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = _mm_adds_epi16(*(x0),*(y2));
+}
+
+#ifdef __AVX2__
+
+static inline void ibfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
+			      __m256i *y0,__m256i *y1,__m256i *y2,
+			      __m256i *tw1,__m256i *tw2) __attribute__((always_inline));
+
+static inline void ibfly3_256(__m256i *x0,__m256i *x1,__m256i *x2,
+			      __m256i *y0,__m256i *y1,__m256i *y2,
+			      __m256i *tw1,__m256i *tw2)
+{ 
+
+  __m256i tmpre,tmpim,x1_2,x2_2;
+
+  packed_cmultc_256(*(x1),*(tw1),&x1_2);
+  packed_cmultc_256(*(x2),*(tw2),&x2_2);
+  *(y0)  = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,x2_2));
+  cmultc_256(x1_2,*(W13_256),&tmpre,&tmpim);
+  cmacc_256(x2_2,*(W23_256),&tmpre,&tmpim);
+  *(y1) = cpack_256(tmpre,tmpim);
+  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  cmultc_256(x1_2,*(W23_256),&tmpre,&tmpim);
+  cmacc_256(x2_2,*(W13_256),&tmpre,&tmpim);
+  *(y2) = cpack_256(tmpre,tmpim);
+  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+}
+#endif
+
+#elif defined(__arm__)
+static inline void ibfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
+			  int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,
+			  int16x8_t *tw1,int16x8_t *tw2) __attribute__((always_inline));
+
+static inline void ibfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
+			  int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,
+			  int16x8_t *tw1,int16x8_t *tw2)
+{
+
+  int32x4_t tmpre,tmpim;
+  int16x8_t x1_2,x2_2;
+
+  packed_cmultc(*(x1),*(tw1),&x1_2);
+  packed_cmultc(*(x2),*(tw2),&x2_2);
+  *(y0)  = vqaddq_s16(*(x0),vqaddq_s16(x1_2,x2_2));
+  cmultc(x1_2,*(W13),&tmpre,&tmpim);
+  cmacc(x2_2,*(W23),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = vqaddq_s16(*(x0),*(y1));
+  cmultc(x1_2,*(W23),&tmpre,&tmpim);
+  cmacc(x2_2,*(W13),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = vqaddq_s16(*(x0),*(y2));
+}
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void bfly3_tw1(__m128i *x0,__m128i *x1,__m128i *x2,
+                             __m128i *y0,__m128i *y1,__m128i *y2) __attribute__((always_inline));
+
+static inline void bfly3_tw1(__m128i *x0,__m128i *x1,__m128i *x2,
+                             __m128i *y0,__m128i *y1,__m128i *y2)
+{
+
+  __m128i tmpre,tmpim;
+
+  *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),*(x2)));
+  cmult(*(x1),*(W13),&tmpre,&tmpim);
+  cmac(*(x2),*(W23),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = _mm_adds_epi16(*(x0),*(y1));
+  cmult(*(x1),*(W23),&tmpre,&tmpim);
+  cmac(*(x2),*(W13),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = _mm_adds_epi16(*(x0),*(y2));
+}
+
+#ifdef __AVX2__
+
+static inline void bfly3_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,
+				 __m256i *y0,__m256i *y1,__m256i *y2) __attribute__((always_inline));
+
+static inline void bfly3_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,
+				 __m256i *y0,__m256i *y1,__m256i *y2)
+{
+
+  __m256i tmpre,tmpim;
+
+  *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(*(x1),*(x2)));
+  cmult_256(*(x1),*(W13_256),&tmpre,&tmpim);
+  cmac_256(*(x2),*(W23_256),&tmpre,&tmpim);
+  *(y1) = cpack_256(tmpre,tmpim);
+  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  cmult_256(*(x1),*(W23_256),&tmpre,&tmpim);
+  cmac_256(*(x2),*(W13_256),&tmpre,&tmpim);
+  *(y2) = cpack_256(tmpre,tmpim);
+  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+}
+#endif
+
+#elif defined(__arm__)
+static inline void bfly3_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
+                             int16x8_t *y0,int16x8_t *y1,int16x8_t *y2) __attribute__((always_inline));
+
+static inline void bfly3_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,
+                             int16x8_t *y0,int16x8_t *y1,int16x8_t *y2)
+{
+
+  int32x4_t tmpre,tmpim;
+
+  *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),*(x2)));
+  cmult(*(x1),*(W13),&tmpre,&tmpim);
+  cmac(*(x2),*(W23),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = vqaddq_s16(*(x0),*(y1));
+  cmult(*(x1),*(W23),&tmpre,&tmpim);
+  cmac(*(x2),*(W13),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = vqaddq_s16(*(x0),*(y2));
+
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void bfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                         __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                         __m128i *tw1,__m128i *tw2,__m128i *tw3)__attribute__((always_inline));
+
+static inline void bfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                         __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                         __m128i *tw1,__m128i *tw2,__m128i *tw3)
+{
+
+  __m128i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i;
+
+  //  cmult(*(x0),*(W0),&x0r_2,&x0i_2);
+  cmult(*(x1),*(tw1),&x1r_2,&x1i_2);
+  cmult(*(x2),*(tw2),&x2r_2,&x2i_2);
+  cmult(*(x3),*(tw3),&x3r_2,&x3i_2);
+  //  dy0r = _mm_add_epi32(x0r_2,_mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2)));
+  //  dy0i = _mm_add_epi32(x0i_2,_mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2)));
+  //  *(y0)  = cpack(dy0r,dy0i);
+  dy0r = _mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2));
+  dy0i = _mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2));
+  *(y0)  = _mm_add_epi16(*(x0),cpack(dy0r,dy0i));
+  //  dy1r = _mm_add_epi32(x0r_2,_mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2)));
+  //  dy1i = _mm_sub_epi32(x0i_2,_mm_add_epi32(x1r_2,_mm_sub_epi32(x2i_2,x3r_2)));
+  //  *(y1)  = cpack(dy1r,dy1i);
+  dy1r = _mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2));
+  dy1i = _mm_sub_epi32(_mm_sub_epi32(x3r_2,x2i_2),x1r_2);
+  *(y1)  = _mm_add_epi16(*(x0),cpack(dy1r,dy1i));
+  //  dy2r = _mm_sub_epi32(x0r_2,_mm_sub_epi32(x1r_2,_mm_sub_epi32(x2r_2,x3r_2)));
+  //  dy2i = _mm_sub_epi32(x0i_2,_mm_sub_epi32(x1i_2,_mm_sub_epi32(x2i_2,x3i_2)));
+  //  *(y2)  = cpack(dy2r,dy2i);
+  dy2r = _mm_sub_epi32(_mm_sub_epi32(x2r_2,x3r_2),x1r_2);
+  dy2i = _mm_sub_epi32(_mm_sub_epi32(x2i_2,x3i_2),x1i_2);
+  *(y2)  = _mm_add_epi16(*(x0),cpack(dy2r,dy2i));
+  //  dy3r = _mm_sub_epi32(x0r_2,_mm_add_epi32(x1i_2,_mm_sub_epi32(x2r_2,x3i_2)));
+  //  dy3i = _mm_add_epi32(x0i_2,_mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2)));
+  //  *(y3) = cpack(dy3r,dy3i);
+  dy3r = _mm_sub_epi32(_mm_sub_epi32(x3i_2,x2r_2),x1i_2);
+  dy3i = _mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2));
+  *(y3) = _mm_add_epi16(*(x0),cpack(dy3r,dy3i));
+}
+
+#ifdef __AVX2__
+static inline void bfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+			     __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+			     __m256i *tw1,__m256i *tw2,__m256i *tw3)__attribute__((always_inline));
+
+static inline void bfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+			     __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+			     __m256i *tw1,__m256i *tw2,__m256i *tw3)
+{
+
+  __m256i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i;
+
+  //  cmult(*(x0),*(W0),&x0r_2,&x0i_2);
+  cmult_256(*(x1),*(tw1),&x1r_2,&x1i_2);
+  cmult_256(*(x2),*(tw2),&x2r_2,&x2i_2);
+  cmult_256(*(x3),*(tw3),&x3r_2,&x3i_2);
+  //  dy0r = _mm_add_epi32(x0r_2,_mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2)));
+  //  dy0i = _mm_add_epi32(x0i_2,_mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2)));
+  //  *(y0)  = cpack(dy0r,dy0i);
+  dy0r = _mm256_add_epi32(x1r_2,_mm256_add_epi32(x2r_2,x3r_2));
+  dy0i = _mm256_add_epi32(x1i_2,_mm256_add_epi32(x2i_2,x3i_2));
+  *(y0)  = _mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i));
+  //  dy1r = _mm_add_epi32(x0r_2,_mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2)));
+  //  dy1i = _mm_sub_epi32(x0i_2,_mm_add_epi32(x1r_2,_mm_sub_epi32(x2i_2,x3r_2)));
+  //  *(y1)  = cpack(dy1r,dy1i);
+  dy1r = _mm256_sub_epi32(x1i_2,_mm256_add_epi32(x2r_2,x3i_2));
+  dy1i = _mm256_sub_epi32(_mm256_sub_epi32(x3r_2,x2i_2),x1r_2);
+  *(y1)  = _mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i));
+  //  dy2r = _mm_sub_epi32(x0r_2,_mm_sub_epi32(x1r_2,_mm_sub_epi32(x2r_2,x3r_2)));
+  //  dy2i = _mm_sub_epi32(x0i_2,_mm_sub_epi32(x1i_2,_mm_sub_epi32(x2i_2,x3i_2)));
+  //  *(y2)  = cpack(dy2r,dy2i);
+  dy2r = _mm256_sub_epi32(_mm256_sub_epi32(x2r_2,x3r_2),x1r_2);
+  dy2i = _mm256_sub_epi32(_mm256_sub_epi32(x2i_2,x3i_2),x1i_2);
+  *(y2)  = _mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i));
+  //  dy3r = _mm_sub_epi32(x0r_2,_mm_add_epi32(x1i_2,_mm_sub_epi32(x2r_2,x3i_2)));
+  //  dy3i = _mm_add_epi32(x0i_2,_mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2)));
+  //  *(y3) = cpack(dy3r,dy3i);
+  dy3r = _mm256_sub_epi32(_mm256_sub_epi32(x3i_2,x2r_2),x1i_2);
+  dy3i = _mm256_sub_epi32(x1r_2,_mm256_add_epi32(x2i_2,x3r_2));
+  *(y3) = _mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i));
+}
+#endif
+#elif defined(__arm__)
+static inline void bfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                         int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+                         int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3)__attribute__((always_inline));
+
+static inline void bfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                         int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+                         int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3)
+{
+
+  int32x4_t x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i;
+
+  //  cmult(*(x0),*(W0),&x0r_2,&x0i_2);
+  cmult(*(x1),*(tw1),&x1r_2,&x1i_2);
+  cmult(*(x2),*(tw2),&x2r_2,&x2i_2);
+  cmult(*(x3),*(tw3),&x3r_2,&x3i_2);
+  //  dy0r = _mm_add_epi32(x0r_2,_mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2)));
+  //  dy0i = _mm_add_epi32(x0i_2,_mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2)));
+  //  *(y0)  = cpack(dy0r,dy0i);
+  dy0r = vqaddq_s32(x1r_2,vqaddq_s32(x2r_2,x3r_2));
+  dy0i = vqaddq_s32(x1i_2,vqaddq_s32(x2i_2,x3i_2));
+  *(y0)  = vqaddq_s16(*(x0),cpack(dy0r,dy0i));
+  //  dy1r = _mm_add_epi32(x0r_2,_mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2)));
+  //  dy1i = _mm_sub_epi32(x0i_2,_mm_add_epi32(x1r_2,_mm_sub_epi32(x2i_2,x3r_2)));
+  //  *(y1)  = cpack(dy1r,dy1i);
+  dy1r = vqsubq_s32(x1i_2,vqaddq_s32(x2r_2,x3i_2));
+  dy1i = vqsubq_s32(vqsubq_s32(x3r_2,x2i_2),x1r_2);
+  *(y1)  = vqaddq_s16(*(x0),cpack(dy1r,dy1i));
+  //  dy2r = _mm_sub_epi32(x0r_2,_mm_sub_epi32(x1r_2,_mm_sub_epi32(x2r_2,x3r_2)));
+  //  dy2i = _mm_sub_epi32(x0i_2,_mm_sub_epi32(x1i_2,_mm_sub_epi32(x2i_2,x3i_2)));
+  //  *(y2)  = cpack(dy2r,dy2i);
+  dy2r = vqsubq_s32(vqsubq_s32(x2r_2,x3r_2),x1r_2);
+  dy2i = vqsubq_s32(vqsubq_s32(x2i_2,x3i_2),x1i_2);
+  *(y2)  = vqaddq_s16(*(x0),cpack(dy2r,dy2i));
+  //  dy3r = _mm_sub_epi32(x0r_2,_mm_add_epi32(x1i_2,_mm_sub_epi32(x2r_2,x3i_2)));
+  //  dy3i = _mm_add_epi32(x0i_2,_mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2)));
+  //  *(y3) = cpack(dy3r,dy3i);
+  dy3r = vqsubq_s32(vqsubq_s32(x3i_2,x2r_2),x1i_2);
+  dy3i = vqsubq_s32(x1r_2,vqaddq_s32(x2i_2,x3r_2));
+  *(y3) = vqaddq_s16(*(x0),cpack(dy3r,dy3i));
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void ibfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                          __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                          __m128i *tw1,__m128i *tw2,__m128i *tw3)__attribute__((always_inline));
+
+static inline void ibfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                          __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                          __m128i *tw1,__m128i *tw2,__m128i *tw3)
+{
+
+  __m128i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i;
+
+
+  cmultc(*(x1),*(tw1),&x1r_2,&x1i_2);
+  cmultc(*(x2),*(tw2),&x2r_2,&x2i_2);
+  cmultc(*(x3),*(tw3),&x3r_2,&x3i_2);
+
+  dy0r = _mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2));
+  dy0i = _mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2));
+  *(y0)  = _mm_add_epi16(*(x0),cpack(dy0r,dy0i));
+  dy3r = _mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2));
+  dy3i = _mm_sub_epi32(_mm_sub_epi32(x3r_2,x2i_2),x1r_2);
+  *(y3)  = _mm_add_epi16(*(x0),cpack(dy3r,dy3i));
+  dy2r = _mm_sub_epi32(_mm_sub_epi32(x2r_2,x3r_2),x1r_2);
+  dy2i = _mm_sub_epi32(_mm_sub_epi32(x2i_2,x3i_2),x1i_2);
+  *(y2)  = _mm_add_epi16(*(x0),cpack(dy2r,dy2i));
+  dy1r = _mm_sub_epi32(_mm_sub_epi32(x3i_2,x2r_2),x1i_2);
+  dy1i = _mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2));
+  *(y1) = _mm_add_epi16(*(x0),cpack(dy1r,dy1i));
+}
+
+#ifdef __AVX2__
+
+static inline void ibfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+			      __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+			      __m256i *tw1,__m256i *tw2,__m256i *tw3)__attribute__((always_inline));
+
+static inline void ibfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+			      __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+			      __m256i *tw1,__m256i *tw2,__m256i *tw3)
+{
+
+  __m256i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i;
+
+
+  cmultc_256(*(x1),*(tw1),&x1r_2,&x1i_2);
+  cmultc_256(*(x2),*(tw2),&x2r_2,&x2i_2);
+  cmultc_256(*(x3),*(tw3),&x3r_2,&x3i_2);
+
+  dy0r = _mm256_add_epi32(x1r_2,_mm256_add_epi32(x2r_2,x3r_2));
+  dy0i = _mm256_add_epi32(x1i_2,_mm256_add_epi32(x2i_2,x3i_2));
+  *(y0)  = _mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i));
+  dy3r = _mm256_sub_epi32(x1i_2,_mm256_add_epi32(x2r_2,x3i_2));
+  dy3i = _mm256_sub_epi32(_mm256_sub_epi32(x3r_2,x2i_2),x1r_2);
+  *(y3)  = _mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i));
+  dy2r = _mm256_sub_epi32(_mm256_sub_epi32(x2r_2,x3r_2),x1r_2);
+  dy2i = _mm256_sub_epi32(_mm256_sub_epi32(x2i_2,x3i_2),x1i_2);
+  *(y2)  = _mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i));
+  dy1r = _mm256_sub_epi32(_mm256_sub_epi32(x3i_2,x2r_2),x1i_2);
+  dy1i = _mm256_sub_epi32(x1r_2,_mm256_add_epi32(x2i_2,x3r_2));
+  *(y1) = _mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i));
+}
+
+#endif
+#elif defined(__arm__)
+
+static inline void ibfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                          int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+                          int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3)__attribute__((always_inline));
+
+static inline void ibfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                          int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+                          int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3)
+{
+
+  int32x4_t x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i;
+
+
+  cmultc(*(x1),*(tw1),&x1r_2,&x1i_2);
+  cmultc(*(x2),*(tw2),&x2r_2,&x2i_2);
+  cmultc(*(x3),*(tw3),&x3r_2,&x3i_2);
+
+  dy0r  = vqaddq_s32(x1r_2,vqaddq_s32(x2r_2,x3r_2));
+  dy0i  = vqaddq_s32(x1i_2,vqaddq_s32(x2i_2,x3i_2));
+  *(y0) = vqaddq_s16(*(x0),cpack(dy0r,dy0i));
+  dy3r  = vqsubq_s32(x1i_2,vqaddq_s32(x2r_2,x3i_2));
+  dy3i  = vqsubq_s32(vqsubq_s32(x3r_2,x2i_2),x1r_2);
+  *(y3) = vqaddq_s16(*(x0),cpack(dy3r,dy3i));
+  dy2r  = vqsubq_s32(vqsubq_s32(x2r_2,x3r_2),x1r_2);
+  dy2i  = vqsubq_s32(vqsubq_s32(x2i_2,x3i_2),x1i_2);
+  *(y2) = vqaddq_s16(*(x0),cpack(dy2r,dy2i));
+  dy1r  = vqsubq_s32(vqsubq_s32(x3i_2,x2r_2),x1i_2);
+  dy1i  = vqsubq_s32(x1r_2,vqaddq_s32(x2i_2,x3r_2));
+  *(y1) = vqaddq_s16(*(x0),cpack(dy1r,dy1i));
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static inline void bfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                             __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3)__attribute__((always_inline));
+
+static inline void bfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                             __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3)
+{
+  register __m128i x1_flip,x3_flip,x02t,x13t;
+  register __m128i complex_shuffle = _mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  x02t    = _mm_adds_epi16(*(x0),*(x2));
+  x13t    = _mm_adds_epi16(*(x1),*(x3));
+  *(y0)   = _mm_adds_epi16(x02t,x13t);
+  *(y2)   = _mm_subs_epi16(x02t,x13t);
+  x1_flip = _mm_sign_epi16(*(x1),*(__m128i*)conjugatedft);
+  x1_flip = _mm_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm_sign_epi16(*(x3),*(__m128i*)conjugatedft);
+  x3_flip = _mm_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = _mm_subs_epi16(*(x0),*(x2));
+  x13t    = _mm_subs_epi16(x1_flip,x3_flip);
+  *(y1)   = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y3)   = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  /*
+  *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),_mm_adds_epi16(*(x2),*(x3))));
+  x1_flip = _mm_sign_epi16(*(x1),*(__m128i*)conjugatedft);
+  x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x3_flip = _mm_sign_epi16(*(x3),*(__m128i*)conjugatedft);
+  x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  *(y1)   = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(*(x2),x3_flip)));
+  *(y2)   = _mm_subs_epi16(*(x0),_mm_subs_epi16(*(x1),_mm_subs_epi16(*(x2),*(x3))));
+  *(y3)   = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(*(x2),x3_flip)));
+  */
+}
+
+#ifdef __AVX2__
+
+static inline void bfly4_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+				 __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3)__attribute__((always_inline));
+
+static inline void bfly4_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+				 __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3)
+{
+  register __m256i x1_flip,x3_flip,x02t,x13t;
+  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  x02t    = _mm256_adds_epi16(*(x0),*(x2));
+  x13t    = _mm256_adds_epi16(*(x1),*(x3));
+  *(y0)   = _mm256_adds_epi16(x02t,x13t);
+  *(y2)   = _mm256_subs_epi16(x02t,x13t);
+  x1_flip = _mm256_sign_epi16(*(x1),*(__m256i*)conjugatedft);
+  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm256_sign_epi16(*(x3),*(__m256i*)conjugatedft);
+  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = _mm256_subs_epi16(*(x0),*(x2));
+  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
+  *(y1)   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y3)   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+}
+#endif
+
+#elif defined(__arm__)
+
+static inline void bfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                             int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)__attribute__((always_inline));
+
+static inline void bfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                             int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)
+{
+
+  register int16x8_t x1_flip,x3_flip;
+
+  *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),vqaddq_s16(*(x2),*(x3))));
+  x1_flip = vrev32q_s16(vmulq_s16(*(x1),*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(*(x3),*(int16x8_t*)conjugatedft));
+  *(y1)   = vqaddq_s16(*(x0),vqsubq_s16(x1_flip,vqaddq_s16(*(x2),x3_flip)));
+  *(y2)   = vqsubq_s16(*(x0),vqsubq_s16(*(x1),vqsubq_s16(*(x2),*(x3))));
+  *(y3)   = vqsubq_s16(*(x0),vqaddq_s16(x1_flip,vqsubq_s16(*(x2),x3_flip)));
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static inline void ibfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                              __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3)__attribute__((always_inline));
+
+static inline void ibfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                              __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3)
+{
+
+  register __m128i x1_flip,x3_flip;
+
+  *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),_mm_adds_epi16(*(x2),*(x3))));
+
+  x1_flip = _mm_sign_epi16(*(x1),*(__m128i*)conjugatedft);
+  //  x1_flip = _mm_shufflelo_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1));
+  //  x1_flip = _mm_shufflehi_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1));
+  x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x3_flip = _mm_sign_epi16(*(x3),*(__m128i*)conjugatedft);
+  //  x3_flip = _mm_shufflelo_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1));
+  //  x3_flip = _mm_shufflehi_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1));
+  x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  *(y1)   = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(*(x2),x3_flip)));
+  *(y2)   = _mm_subs_epi16(*(x0),_mm_subs_epi16(*(x1),_mm_subs_epi16(*(x2),*(x3))));
+  *(y3)   = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(*(x2),x3_flip)));
+}
+
+
+#elif defined(__arm__)
+static inline void ibfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+			      int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)__attribute__((always_inline));
+
+static inline void ibfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+			      int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)
+{
+
+  register int16x8_t x1_flip,x3_flip;
+
+  *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),vqaddq_s16(*(x2),*(x3))));
+  x1_flip = vrev32q_s16(vmulq_s16(*(x1),*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(*(x3),*(int16x8_t*)conjugatedft));
+  *(y1)   = vqsubq_s16(*(x0),vqaddq_s16(x1_flip,vqsubq_s16(*(x2),x3_flip)));
+  *(y2)   = vqsubq_s16(*(x0),vqsubq_s16(*(x1),vqsubq_s16(*(x2),*(x3))));
+  *(y3)   = vqaddq_s16(*(x0),vqsubq_s16(x1_flip,vqaddq_s16(*(x2),x3_flip)));
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void bfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                            __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                            __m128i *tw1,__m128i *tw2,__m128i *tw3,
+                            __m128i *tw1b,__m128i *tw2b,__m128i *tw3b)__attribute__((always_inline));
+
+static inline void bfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                            __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                            __m128i *tw1,__m128i *tw2,__m128i *tw3,
+                            __m128i *tw1b,__m128i *tw2b,__m128i *tw3b)
+{
+
+  register __m128i x1t,x2t,x3t,x02t,x13t;
+  register __m128i x1_flip,x3_flip;
+
+  x1t = packed_cmult2(*(x1),*(tw1),*(tw1b));
+  x2t = packed_cmult2(*(x2),*(tw2),*(tw2b));
+  x3t = packed_cmult2(*(x3),*(tw3),*(tw3b));
+
+
+  //  bfly4_tw1(x0,&x1t,&x2t,&x3t,y0,y1,y2,y3);
+  x02t  = _mm_adds_epi16(*(x0),x2t);
+  x13t  = _mm_adds_epi16(x1t,x3t);
+  /*
+  *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1t,_mm_adds_epi16(x2t,x3t)));
+  *(y2)   = _mm_subs_epi16(*(x0),_mm_subs_epi16(x1t,_mm_subs_epi16(x2t,x3t)));
+  */
+  *(y0)   = _mm_adds_epi16(x02t,x13t);
+  *(y2)   = _mm_subs_epi16(x02t,x13t);
+
+  x1_flip = _mm_sign_epi16(x1t,*(__m128i*)conjugatedft);
+  //  x1_flip = _mm_shufflelo_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1));
+  //  x1_flip = _mm_shufflehi_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1));
+  x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x3_flip = _mm_sign_epi16(x3t,*(__m128i*)conjugatedft);
+  //  x3_flip = _mm_shufflelo_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1));
+  //  x3_flip = _mm_shufflehi_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1));
+  x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x02t  = _mm_subs_epi16(*(x0),x2t);
+  x13t  = _mm_subs_epi16(x1_flip,x3_flip);
+  /*
+  *(y1)   = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(x2t,x3_flip)));  // x0 + x1f - x2 - x3f
+  *(y3)   = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(x2t,x3_flip)));  // x0 - x1f - x2 + x3f
+  */
+  *(y1)   = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y3)   = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+}
+
+#ifdef __AVX2__
+static inline void bfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+				__m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+				__m256i *tw1,__m256i *tw2,__m256i *tw3,
+				__m256i *tw1b,__m256i *tw2b,__m256i *tw3b)__attribute__((always_inline));
+
+static inline void bfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+				__m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+				__m256i *tw1,__m256i *tw2,__m256i *tw3,
+				__m256i *tw1b,__m256i *tw2b,__m256i *tw3b)
+{
+
+  register __m256i x1t,x2t,x3t,x02t,x13t;
+  register __m256i x1_flip,x3_flip;
+  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  // each input xi is assumed to be to consecutive vectors xi0 xi1 on which to perform the 8 butterflies
+  // [xi00 xi01 xi02 xi03 xi10 xi20 xi30 xi40]
+  // each output yi is the same
+
+  x1t = packed_cmult2_256(*(x1),*(tw1),*(tw1b));
+  x2t = packed_cmult2_256(*(x2),*(tw2),*(tw2b));
+  x3t = packed_cmult2_256(*(x3),*(tw3),*(tw3b));
+
+  x02t  = _mm256_adds_epi16(*(x0),x2t);
+  x13t  = _mm256_adds_epi16(x1t,x3t);
+  *(y0)   = _mm256_adds_epi16(x02t,x13t);
+  *(y2)   = _mm256_subs_epi16(x02t,x13t);
+
+  x1_flip = _mm256_sign_epi16(x1t,*(__m256i*)conjugatedft);
+  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm256_sign_epi16(x3t,*(__m256i*)conjugatedft);
+  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t  = _mm256_subs_epi16(*(x0),x2t);
+  x13t  = _mm256_subs_epi16(x1_flip,x3_flip);
+  *(y1)   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y3)   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+}
+
+#endif
+
+#elif defined(__arm__)
+
+static inline void bfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                            int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+                            int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,
+                            int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b)__attribute__((always_inline));
+
+static inline void bfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+                            int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+                            int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,
+                            int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b)
+{
+
+  register int16x8_t x1t,x2t,x3t,x02t,x13t;
+  register int16x8_t x1_flip,x3_flip;
+
+  x1t = packed_cmult2(*(x1),*(tw1),*(tw1b));
+  x2t = packed_cmult2(*(x2),*(tw2),*(tw2b));
+  x3t = packed_cmult2(*(x3),*(tw3),*(tw3b));
+
+
+
+  x02t  = vqaddq_s16(*(x0),x2t);
+  x13t  = vqaddq_s16(x1t,x3t);
+  *(y0)   = vqaddq_s16(x02t,x13t);
+  *(y2)   = vqsubq_s16(x02t,x13t);
+  x1_flip = vrev32q_s16(vmulq_s16(x1t,*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(x3t,*(int16x8_t*)conjugatedft));
+  x02t  = vqsubq_s16(*(x0),x2t);
+  x13t  = vqsubq_s16(x1_flip,x3_flip);
+  *(y1)   = vqaddq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y3)   = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
+}
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void ibfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                             __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                             __m128i *tw1,__m128i *tw2,__m128i *tw3,
+                             __m128i *tw1b,__m128i *tw2b,__m128i *tw3b)__attribute__((always_inline));
+
+static inline void ibfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3,
+                             __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3,
+                             __m128i *tw1,__m128i *tw2,__m128i *tw3,
+                             __m128i *tw1b,__m128i *tw2b,__m128i *tw3b)
+{
+
+  register __m128i x1t,x2t,x3t,x02t,x13t;
+  register __m128i x1_flip,x3_flip;
+
+  x1t = packed_cmult2(*(x1),*(tw1),*(tw1b));
+  x2t = packed_cmult2(*(x2),*(tw2),*(tw2b));
+  x3t = packed_cmult2(*(x3),*(tw3),*(tw3b));
+
+
+  //  bfly4_tw1(x0,&x1t,&x2t,&x3t,y0,y1,y2,y3);
+  x02t  = _mm_adds_epi16(*(x0),x2t);
+  x13t  = _mm_adds_epi16(x1t,x3t);
+  /*
+  *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1t,_mm_adds_epi16(x2t,x3t)));
+  *(y2)   = _mm_subs_epi16(*(x0),_mm_subs_epi16(x1t,_mm_subs_epi16(x2t,x3t)));
+  */
+  *(y0)   = _mm_adds_epi16(x02t,x13t);
+  *(y2)   = _mm_subs_epi16(x02t,x13t);
+
+  x1_flip = _mm_sign_epi16(x1t,*(__m128i*)conjugatedft);
+  //  x1_flip = _mm_shufflelo_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1));
+  //  x1_flip = _mm_shufflehi_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1));
+  x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x3_flip = _mm_sign_epi16(x3t,*(__m128i*)conjugatedft);
+  //  x3_flip = _mm_shufflelo_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1));
+  //  x3_flip = _mm_shufflehi_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1));
+  x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x02t  = _mm_subs_epi16(*(x0),x2t);
+  x13t  = _mm_subs_epi16(x1_flip,x3_flip);
+  /*
+  *(y1)   = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(x2t,x3_flip)));  // x0 + x1f - x2 - x3f
+  *(y3)   = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(x2t,x3_flip)));  // x0 - x1f - x2 + x3f
+  */
+  *(y3)   = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y1)   = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+}
+
+#ifdef __AVX2__
+static inline void ibfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+				 __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+				 __m256i *tw1,__m256i *tw2,__m256i *tw3,
+				 __m256i *tw1b,__m256i *tw2b,__m256i *tw3b)__attribute__((always_inline));
+
+static inline void ibfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3,
+				 __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3,
+				 __m256i *tw1,__m256i *tw2,__m256i *tw3,
+				 __m256i *tw1b,__m256i *tw2b,__m256i *tw3b)
+{
+
+  register __m256i x1t,x2t,x3t,x02t,x13t;
+  register __m256i x1_flip,x3_flip;
+  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  // each input xi is assumed to be to consecutive vectors xi0 xi1 on which to perform the 8 butterflies
+  // [xi00 xi01 xi02 xi03 xi10 xi20 xi30 xi40]
+  // each output yi is the same
+
+  x1t = packed_cmult2_256(*(x1),*(tw1),*(tw1b));
+  x2t = packed_cmult2_256(*(x2),*(tw2),*(tw2b));
+  x3t = packed_cmult2_256(*(x3),*(tw3),*(tw3b));
+
+  x02t  = _mm256_adds_epi16(*(x0),x2t);
+  x13t  = _mm256_adds_epi16(x1t,x3t);
+  *(y0)   = _mm256_adds_epi16(x02t,x13t);
+  *(y2)   = _mm256_subs_epi16(x02t,x13t);
+
+  x1_flip = _mm256_sign_epi16(x1t,*(__m256i*)conjugatedft);
+  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm256_sign_epi16(x3t,*(__m256i*)conjugatedft);
+  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t  = _mm256_subs_epi16(*(x0),x2t);
+  x13t  = _mm256_subs_epi16(x1_flip,x3_flip);
+  *(y3)   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  *(y1)   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+}
+#endif
+
+#elif defined(__arm__)
+static inline void ibfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+			     int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+			     int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,
+			     int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b)__attribute__((always_inline));
+
+static inline void ibfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3,
+			     int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3,
+			     int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,
+			     int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b)
+{
+
+  register int16x8_t x1t,x2t,x3t,x02t,x13t;
+  register int16x8_t x1_flip,x3_flip;
+
+  x1t = packed_cmult2(*(x1),*(tw1),*(tw1b));
+  x2t = packed_cmult2(*(x2),*(tw2),*(tw2b));
+  x3t = packed_cmult2(*(x3),*(tw3),*(tw3b));
+
+  x02t    = vqaddq_s16(*(x0),x2t);
+  x13t    = vqaddq_s16(x1t,x3t);
+  *(y0)   = vqaddq_s16(x02t,x13t);
+  *(y2)   = vqsubq_s16(x02t,x13t);
+  x1_flip = vrev32q_s16(vmulq_s16(x1t,*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(x3t,*(int16x8_t*)conjugatedft));
+  x02t    = vqsubq_s16(*(x0),x2t);
+  x13t    = vqsubq_s16(x1_flip,x3_flip);
+  *(y3)   = vqaddq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
+  *(y1)   = vqsubq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void bfly5(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4,
+                         __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4,
+                         __m128i *tw1,__m128i *tw2,__m128i *tw3,__m128i *tw4)__attribute__((always_inline));
+
+static inline void bfly5(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4,
+                         __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4,
+                         __m128i *tw1,__m128i *tw2,__m128i *tw3,__m128i *tw4)
+{
+
+
+
+  __m128i x1_2,x2_2,x3_2,x4_2,tmpre,tmpim;
+
+  packed_cmult(*(x1),*(tw1),&x1_2);
+  packed_cmult(*(x2),*(tw2),&x2_2);
+  packed_cmult(*(x3),*(tw3),&x3_2);
+  packed_cmult(*(x4),*(tw4),&x4_2);
+
+  *(y0)  = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1_2,_mm_adds_epi16(x2_2,_mm_adds_epi16(x3_2,x4_2))));
+  cmult(x1_2,*(W15),&tmpre,&tmpim);
+  cmac(x2_2,*(W25),&tmpre,&tmpim);
+  cmac(x3_2,*(W35),&tmpre,&tmpim);
+  cmac(x4_2,*(W45),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = _mm_adds_epi16(*(x0),*(y1));
+
+  cmult(x1_2,*(W25),&tmpre,&tmpim);
+  cmac(x2_2,*(W45),&tmpre,&tmpim);
+  cmac(x3_2,*(W15),&tmpre,&tmpim);
+  cmac(x4_2,*(W35),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = _mm_adds_epi16(*(x0),*(y2));
+
+  cmult(x1_2,*(W35),&tmpre,&tmpim);
+  cmac(x2_2,*(W15),&tmpre,&tmpim);
+  cmac(x3_2,*(W45),&tmpre,&tmpim);
+  cmac(x4_2,*(W25),&tmpre,&tmpim);
+  *(y3) = cpack(tmpre,tmpim);
+  *(y3) = _mm_adds_epi16(*(x0),*(y3));
+
+  cmult(x1_2,*(W45),&tmpre,&tmpim);
+  cmac(x2_2,*(W35),&tmpre,&tmpim);
+  cmac(x3_2,*(W25),&tmpre,&tmpim);
+  cmac(x4_2,*(W15),&tmpre,&tmpim);
+  *(y4) = cpack(tmpre,tmpim);
+  *(y4) = _mm_adds_epi16(*(x0),*(y4));
+
+
+}
+
+#ifdef __AVX2__
+
+static inline void bfly5_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4,
+			     __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4,
+			     __m256i *tw1,__m256i *tw2,__m256i *tw3,__m256i *tw4)__attribute__((always_inline));
+
+static inline void bfly5_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4,
+			     __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4,
+			     __m256i *tw1,__m256i *tw2,__m256i *tw3,__m256i *tw4)
+{
+
+
+
+  __m256i x1_2,x2_2,x3_2,x4_2,tmpre,tmpim;
+
+  packed_cmult_256(*(x1),*(tw1),&x1_2);
+  packed_cmult_256(*(x2),*(tw2),&x2_2);
+  packed_cmult_256(*(x3),*(tw3),&x3_2);
+  packed_cmult_256(*(x4),*(tw4),&x4_2);
+
+  *(y0)  = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,_mm256_adds_epi16(x2_2,_mm256_adds_epi16(x3_2,x4_2))));
+  cmult_256(x1_2,*(W15_256),&tmpre,&tmpim);
+  cmac_256(x2_2,*(W25_256),&tmpre,&tmpim);
+  cmac_256(x3_2,*(W35_256),&tmpre,&tmpim);
+  cmac_256(x4_2,*(W45_256),&tmpre,&tmpim);
+  *(y1) = cpack_256(tmpre,tmpim);
+  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+
+  cmult_256(x1_2,*(W25_256),&tmpre,&tmpim);
+  cmac_256(x2_2,*(W45_256),&tmpre,&tmpim);
+  cmac_256(x3_2,*(W15_256),&tmpre,&tmpim);
+  cmac_256(x4_2,*(W35_256),&tmpre,&tmpim);
+  *(y2) = cpack_256(tmpre,tmpim);
+  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+
+  cmult_256(x1_2,*(W35_256),&tmpre,&tmpim);
+  cmac_256(x2_2,*(W15_256),&tmpre,&tmpim);
+  cmac_256(x3_2,*(W45_256),&tmpre,&tmpim);
+  cmac_256(x4_2,*(W25_256),&tmpre,&tmpim);
+  *(y3) = cpack_256(tmpre,tmpim);
+  *(y3) = _mm256_adds_epi16(*(x0),*(y3));
+
+  cmult_256(x1_2,*(W45_256),&tmpre,&tmpim);
+  cmac_256(x2_2,*(W35_256),&tmpre,&tmpim);
+  cmac_256(x3_2,*(W25_256),&tmpre,&tmpim);
+  cmac_256(x4_2,*(W15_256),&tmpre,&tmpim);
+  *(y4) = cpack_256(tmpre,tmpim);
+  *(y4) = _mm256_adds_epi16(*(x0),*(y4));
+
+
+}
+#endif
+
+#elif defined(__arm__)
+static inline void bfly5(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4,
+                         int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4,
+                         int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,int16x8_t *tw4)__attribute__((always_inline));
+
+static inline void bfly5(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4,
+                         int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4,
+                         int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,int16x8_t *tw4)
+{
+
+
+
+  int16x8_t x1_2,x2_2,x3_2,x4_2;
+  int32x4_t tmpre,tmpim;
+
+  packed_cmult(*(x1),*(tw1),&x1_2);
+  packed_cmult(*(x2),*(tw2),&x2_2);
+  packed_cmult(*(x3),*(tw3),&x3_2);
+  packed_cmult(*(x4),*(tw4),&x4_2);
+
+  *(y0)  = vqaddq_s16(*(x0),vqaddq_s16(x1_2,vqaddq_s16(x2_2,vqaddq_s16(x3_2,x4_2))));
+  cmult(x1_2,*(W15),&tmpre,&tmpim);
+  cmac(x2_2,*(W25),&tmpre,&tmpim);
+  cmac(x3_2,*(W35),&tmpre,&tmpim);
+  cmac(x4_2,*(W45),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = vqaddq_s16(*(x0),*(y1));
+
+  cmult(x1_2,*(W25),&tmpre,&tmpim);
+  cmac(x2_2,*(W45),&tmpre,&tmpim);
+  cmac(x3_2,*(W15),&tmpre,&tmpim);
+  cmac(x4_2,*(W35),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = vqaddq_s16(*(x0),*(y2));
+
+  cmult(x1_2,*(W35),&tmpre,&tmpim);
+  cmac(x2_2,*(W15),&tmpre,&tmpim);
+  cmac(x3_2,*(W45),&tmpre,&tmpim);
+  cmac(x4_2,*(W25),&tmpre,&tmpim);
+  *(y3) = cpack(tmpre,tmpim);
+  *(y3) = vqaddq_s16(*(x0),*(y3));
+
+  cmult(x1_2,*(W45),&tmpre,&tmpim);
+  cmac(x2_2,*(W35),&tmpre,&tmpim);
+  cmac(x3_2,*(W25),&tmpre,&tmpim);
+  cmac(x4_2,*(W15),&tmpre,&tmpim);
+  *(y4) = cpack(tmpre,tmpim);
+  *(y4) = vqaddq_s16(*(x0),*(y4));
+
+
+}
+
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void bfly5_tw1(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4,
+                             __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4) __attribute__((always_inline));
+
+static inline void bfly5_tw1(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4,
+                             __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4)
+{
+
+  __m128i tmpre,tmpim;
+
+  *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),_mm_adds_epi16(*(x2),_mm_adds_epi16(*(x3),*(x4)))));
+  cmult(*(x1),*(W15),&tmpre,&tmpim);
+  cmac(*(x2),*(W25),&tmpre,&tmpim);
+  cmac(*(x3),*(W35),&tmpre,&tmpim);
+  cmac(*(x4),*(W45),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = _mm_adds_epi16(*(x0),*(y1));
+  cmult(*(x1),*(W25),&tmpre,&tmpim);
+  cmac(*(x2),*(W45),&tmpre,&tmpim);
+  cmac(*(x3),*(W15),&tmpre,&tmpim);
+  cmac(*(x4),*(W35),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = _mm_adds_epi16(*(x0),*(y2));
+  cmult(*(x1),*(W35),&tmpre,&tmpim);
+  cmac(*(x2),*(W15),&tmpre,&tmpim);
+  cmac(*(x3),*(W45),&tmpre,&tmpim);
+  cmac(*(x4),*(W25),&tmpre,&tmpim);
+  *(y3) = cpack(tmpre,tmpim);
+  *(y3) = _mm_adds_epi16(*(x0),*(y3));
+  cmult(*(x1),*(W45),&tmpre,&tmpim);
+  cmac(*(x2),*(W35),&tmpre,&tmpim);
+  cmac(*(x3),*(W25),&tmpre,&tmpim);
+  cmac(*(x4),*(W15),&tmpre,&tmpim);
+  *(y4) = cpack(tmpre,tmpim);
+  *(y4) = _mm_adds_epi16(*(x0),*(y4));
+}
+
+#ifdef __AVX2__
+static inline void bfly5_tw1_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4,
+				 __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4) __attribute__((always_inline));
+
+static inline void bfly5_tw1_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4,
+				 __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4)
+{
+
+  __m256i tmpre,tmpim;
+
+  *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(*(x1),_mm256_adds_epi16(*(x2),_mm256_adds_epi16(*(x3),*(x4)))));
+  cmult_256(*(x1),*(W15_256),&tmpre,&tmpim);
+  cmac_256(*(x2),*(W25_256),&tmpre,&tmpim);
+  cmac_256(*(x3),*(W35_256),&tmpre,&tmpim);
+  cmac_256(*(x4),*(W45_256),&tmpre,&tmpim);
+  *(y1) = cpack_256(tmpre,tmpim);
+  *(y1) = _mm256_adds_epi16(*(x0),*(y1));
+  cmult_256(*(x1),*(W25_256),&tmpre,&tmpim);
+  cmac_256(*(x2),*(W45_256),&tmpre,&tmpim);
+  cmac_256(*(x3),*(W15_256),&tmpre,&tmpim);
+  cmac_256(*(x4),*(W35_256),&tmpre,&tmpim);
+  *(y2) = cpack_256(tmpre,tmpim);
+  *(y2) = _mm256_adds_epi16(*(x0),*(y2));
+  cmult_256(*(x1),*(W35_256),&tmpre,&tmpim);
+  cmac_256(*(x2),*(W15_256),&tmpre,&tmpim);
+  cmac_256(*(x3),*(W45_256),&tmpre,&tmpim);
+  cmac_256(*(x4),*(W25_256),&tmpre,&tmpim);
+  *(y3) = cpack_256(tmpre,tmpim);
+  *(y3) = _mm256_adds_epi16(*(x0),*(y3));
+  cmult_256(*(x1),*(W45_256),&tmpre,&tmpim);
+  cmac_256(*(x2),*(W35_256),&tmpre,&tmpim);
+  cmac_256(*(x3),*(W25_256),&tmpre,&tmpim);
+  cmac_256(*(x4),*(W15_256),&tmpre,&tmpim);
+  *(y4) = cpack_256(tmpre,tmpim);
+  *(y4) = _mm256_adds_epi16(*(x0),*(y4));
+}
+#endif
+#elif defined(__arm__)
+static inline void bfly5_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4,
+                             int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4) __attribute__((always_inline));
+
+static inline void bfly5_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4,
+                             int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4)
+{
+
+  int32x4_t tmpre,tmpim;
+
+  *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),vqaddq_s16(*(x2),vqaddq_s16(*(x3),*(x4)))));
+  cmult(*(x1),*(W15),&tmpre,&tmpim);
+  cmac(*(x2),*(W25),&tmpre,&tmpim);
+  cmac(*(x3),*(W35),&tmpre,&tmpim);
+  cmac(*(x4),*(W45),&tmpre,&tmpim);
+  *(y1) = cpack(tmpre,tmpim);
+  *(y1) = vqaddq_s16(*(x0),*(y1));
+  cmult(*(x1),*(W25),&tmpre,&tmpim);
+  cmac(*(x2),*(W45),&tmpre,&tmpim);
+  cmac(*(x3),*(W15),&tmpre,&tmpim);
+  cmac(*(x4),*(W35),&tmpre,&tmpim);
+  *(y2) = cpack(tmpre,tmpim);
+  *(y2) = vqaddq_s16(*(x0),*(y2));
+  cmult(*(x1),*(W35),&tmpre,&tmpim);
+  cmac(*(x2),*(W15),&tmpre,&tmpim);
+  cmac(*(x3),*(W45),&tmpre,&tmpim);
+  cmac(*(x4),*(W25),&tmpre,&tmpim);
+  *(y3) = cpack(tmpre,tmpim);
+  *(y3) = vqaddq_s16(*(x0),*(y3));
+  cmult(*(x1),*(W45),&tmpre,&tmpim);
+  cmac(*(x2),*(W35),&tmpre,&tmpim);
+  cmac(*(x3),*(W25),&tmpre,&tmpim);
+  cmac(*(x4),*(W15),&tmpre,&tmpim);
+  *(y4) = cpack(tmpre,tmpim);
+  *(y4) = vqaddq_s16(*(x0),*(y4));
+}
+
+#endif
+// performs 4x4 transpose of input x (complex interleaved) using 128bit SIMD intrinsics
+// i.e. x = [x0r x0i x1r x1i ... x15r x15i], y = [x0r x0i x4r x4i x8r x8i x12r x12i x1r x1i x5r x5i x9r x9i x13r x13i x2r x2i ... x15r x15i]
+
+#if defined(__x86_64__) || defined(__i386__)
+static inline void transpose16(__m128i *x,__m128i *y) __attribute__((always_inline));
+static inline void transpose16(__m128i *x,__m128i *y)
+{
+  register __m128i ytmp0,ytmp1,ytmp2,ytmp3;
+
+  ytmp0 = _mm_unpacklo_epi32(x[0],x[1]);
+  ytmp1 = _mm_unpackhi_epi32(x[0],x[1]);
+  ytmp2 = _mm_unpacklo_epi32(x[2],x[3]);
+  ytmp3 = _mm_unpackhi_epi32(x[2],x[3]);
+  y[0]    = _mm_unpacklo_epi64(ytmp0,ytmp2);
+  y[1]    = _mm_unpackhi_epi64(ytmp0,ytmp2);
+  y[2]    = _mm_unpacklo_epi64(ytmp1,ytmp3);
+  y[3]    = _mm_unpackhi_epi64(ytmp1,ytmp3);
+}
+
+#elif defined(__arm__)
+static inline void transpose16(int16x8_t *x,int16x8_t *y) __attribute__((always_inline));
+static inline void transpose16(int16x8_t *x,int16x8_t *y)
+{
+  register uint32x4x2_t ytmp0,ytmp1;
+
+  ytmp0 = vtrnq_u32((uint32x4_t)(x[0]),(uint32x4_t)(x[1]));
+  ytmp1 = vtrnq_u32((uint32x4_t)(x[2]),(uint32x4_t)(x[3]));
+
+  y[0]  = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0]));
+  y[1]  = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0]));
+  y[2]  = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1]));
+  y[3]  = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1]));
+}
+
+# endif
+// same as above but output is offset by off
+#if defined(__x86_64__) || defined(__i386__)
+static inline void transpose16_ooff(__m128i *x,__m128i *y,int off) __attribute__((always_inline));
+
+static inline void transpose16_ooff(__m128i *x,__m128i *y,int off)
+{
+  register __m128i ytmp0,ytmp1,ytmp2,ytmp3;
+  __m128i *y2=y;
+
+  ytmp0 = _mm_unpacklo_epi32(x[0],x[1]); // x00 x10 x01 x11
+  ytmp1 = _mm_unpackhi_epi32(x[0],x[1]); // x02 x12 x03 x13
+  ytmp2 = _mm_unpacklo_epi32(x[2],x[3]); // x20 x30 x21 x31
+  ytmp3 = _mm_unpackhi_epi32(x[2],x[3]); // x22 x32 x23 x33
+  *y2     = _mm_unpacklo_epi64(ytmp0,ytmp2); // x00 x10 x20 x30 
+  y2+=off;
+  *y2     = _mm_unpackhi_epi64(ytmp0,ytmp2); // x01 x11 x21 x31
+  y2+=off;
+  *y2     = _mm_unpacklo_epi64(ytmp1,ytmp3); // x02 x12 x22 x32
+  y2+=off;
+  *y2     = _mm_unpackhi_epi64(ytmp1,ytmp3); // x03 x13 x23 x33
+}
+
+#ifdef __AVX2__
+
+static inline void transpose16_ooff_simd256(__m256i *x,__m256i *y,int off) __attribute__((always_inline));
+static inline void transpose16_ooff_simd256(__m256i *x,__m256i *y,int off)
+{
+  register __m256i ytmp0,ytmp1,ytmp2,ytmp3,ytmp4,ytmp5,ytmp6,ytmp7;
+  __m256i *y2=y;
+  __m256i const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
+
+  ytmp0 = _mm256_permutevar8x32_epi32(x[0],perm_mask);  // x00 x10 x01 x11 x02 x12 x03 x13
+  ytmp1 = _mm256_permutevar8x32_epi32(x[1],perm_mask);  // x20 x30 x21 x31 x22 x32 x23 x33
+  ytmp2 = _mm256_permutevar8x32_epi32(x[2],perm_mask);  // x40 x50 x41 x51 x42 x52 x43 x53
+  ytmp3 = _mm256_permutevar8x32_epi32(x[3],perm_mask);  // x60 x70 x61 x71 x62 x72 x63 x73
+  ytmp4 = _mm256_unpacklo_epi64(ytmp0,ytmp1);           // x00 x10 x20 x30 x01 x11 x21 x31
+  ytmp5 = _mm256_unpackhi_epi64(ytmp0,ytmp1);           // x02 x12 x22 x32 x03 x13 x23 x33
+  ytmp6 = _mm256_unpacklo_epi64(ytmp2,ytmp3);           // x40 x50 x60 x70 x41 x51 x61 x71
+  ytmp7 = _mm256_unpackhi_epi64(ytmp2,ytmp3);           // x42 x52 x62 x72 x43 x53 x63 x73
+
+  *y2    = _mm256_insertf128_si256(ytmp4,_mm256_extracti128_si256(ytmp6,0),1);  //x00 x10 x20 x30 x40 x50 x60 x70
+  y2+=off;  
+  *y2    = _mm256_insertf128_si256(ytmp6,_mm256_extracti128_si256(ytmp4,1),0);  //x01 x11 x21 x31 x41 x51 x61 x71
+  y2+=off;  
+  *y2    = _mm256_insertf128_si256(ytmp5,_mm256_extracti128_si256(ytmp7,0),1);  //x00 x10 x20 x30 x40 x50 x60 x70
+  y2+=off;  
+  *y2    = _mm256_insertf128_si256(ytmp7,_mm256_extracti128_si256(ytmp5,1),0);  //x01 x11 x21 x31 x41 x51 x61 x71
+}
+#endif
+
+#elif defined(__arm__)
+static inline void transpose16_ooff(int16x8_t *x,int16x8_t *y,int off) __attribute__((always_inline));
+
+static inline void transpose16_ooff(int16x8_t *x,int16x8_t *y,int off)
+{
+  int16x8_t *y2=y;
+  register uint32x4x2_t ytmp0,ytmp1;
+
+  ytmp0 = vtrnq_u32((uint32x4_t)(x[0]),(uint32x4_t)(x[1]));
+  ytmp1 = vtrnq_u32((uint32x4_t)(x[2]),(uint32x4_t)(x[3]));
+
+  *y2   = (int16x8_t)vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0])); y2+=off;
+  *y2   = (int16x8_t)vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1])); y2+=off;
+  *y2   = (int16x8_t)vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0])); y2+=off;
+  *y2   = (int16x8_t)vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1]));
+
+
+}
+
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static inline void transpose4_ooff(__m64 *x,__m64 *y,int off)__attribute__((always_inline));
+static inline void transpose4_ooff(__m64 *x,__m64 *y,int off)
+{
+  y[0]   = _mm_unpacklo_pi32(x[0],x[1]);
+  y[off] = _mm_unpackhi_pi32(x[0],x[1]);
+
+  // x[0] = [x0 x1]
+  // x[1] = [x2 x3]
+  // y[0] = [x0 x2]
+  // y[off] = [x1 x3]
+}
+#ifdef __AVX2__
+static inline void transpose4_ooff_simd256(__m256i *x,__m256i *y,int off)__attribute__((always_inline));
+static inline void transpose4_ooff_simd256(__m256i *x,__m256i *y,int off)
+{
+  __m256i const perm_mask = _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0);
+  __m256i perm_tmp0,perm_tmp1;
+
+  // x[0] = [x0 x1 x2 x3 x4 x5 x6 x7]
+  // x[1] = [x8 x9 x10 x11 x12 x13 x14]
+  // y[0] = [x0 x2 x4 x6 x8 x10 x12 x14]
+  // y[off] = [x1 x3 x5 x7 x9 x11 x13 x15]
+  perm_tmp0 = _mm256_permutevar8x32_epi32(x[0],perm_mask);
+  perm_tmp1 = _mm256_permutevar8x32_epi32(x[1],perm_mask);
+  y[0]   = _mm256_insertf128_si256(perm_tmp0,_mm256_extracti128_si256(perm_tmp1,0),1);
+  y[off] = _mm256_insertf128_si256(perm_tmp1,_mm256_extracti128_si256(perm_tmp0,1),0);
+}
+#endif
+#elif (__arm__)
+
+static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off)__attribute__((always_inline));
+static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off)
+{
+  uint32x2x2_t ytmp = vtrn_u32((uint32x2_t)x[0],(uint32x2_t)x[1]);
+
+  y[0]   = (int16x4_t)ytmp.val[0];
+  y[off] = (int16x4_t)ytmp.val[1];
+}
+
+#endif
+
+// 16-point optimized DFT kernel
+
+const static int16_t tw16[24] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,
+                                                  32767,0,23169,-23170,0     ,-32767,-23170,-23170,
+                                                  32767,0,12539,-30273,-23170,-23170,-30273,12539
+                                                };
+
+const static int16_t tw16a[24] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273,
+                                                  32767,0,23169,23170,0     ,32767,-23170,23170,
+                                                  32767,0,12539,30273,-23170,23170,-30273,-12539
+                                                 };
+
+const static int16_t tw16b[24] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539,
+                                                   0,32767,-23170,23169,-32767,0     ,-23170,-23170,
+                                                   0,32767,-30273,12539,-23170,-23170,12539 ,-30273
+                                                 };
+
+const static int16_t tw16c[24] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539,
+                                                   0,32767,23170,23169,32767,0     ,23170 ,-23170,
+                                                   0,32767,30273,12539,23170,-23170,-12539,-30273
+                                                 };
+
+#ifdef __AVX2__
+
+const static int16_t tw16rep[48] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,
+						     32767,0,23169,-23170,0     ,-32767,-23170,-23170,32767,0,23169,-23170,0     ,-32767,-23170,-23170,
+						     32767,0,12539,-30273,-23170,-23170,-30273,12539,32767,0,12539,-30273,-23170,-23170,-30273,12539
+                                                   };
+
+const static int16_t tw16arep[48] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273,32767,0,30272,12540,23169 ,23170,12539 ,30273,
+						     32767,0,23169,23170,0     ,32767,-23170,23170,32767,0,23169,23170,0     ,32767,-23170,23170,
+						     32767,0,12539,30273,-23170,23170,-30273,-12539,32767,0,12539,30273,-23170,23170,-30273,-12539
+                                                    }; 
+
+const static int16_t tw16brep[48] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539,0,32767,-12540,30272,-23170,23169 ,-30273,12539,
+                                                      0,32767,-23170,23169,-32767,0     ,-23170,-23170,0,32767,-23170,23169,-32767,0     ,-23170,-23170,
+                                                      0,32767,-30273,12539,-23170,-23170,12539 ,-30273,0,32767,-30273,12539,-23170,-23170,12539 ,-30273
+                                                    };
+
+const static int16_t tw16crep[48] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539,0,32767,12540,30272,23170,23169 ,30273 ,12539,
+						      0,32767,23170,23169,32767,0     ,23170 ,-23170,0,32767,23170,23169,32767,0     ,23170 ,-23170,
+						      0,32767,30273,12539,23170,-23170,-12539,-30273,0,32767,30273,12539,23170,-23170,-12539,-30273
+                                                    };
+
+#endif /* __AVX2__ */
+
+
+
+static inline void dft16(int16_t *x,int16_t *y) __attribute__((always_inline));
+
+static inline void dft16(int16_t *x,int16_t *y)
+{
+
+#if defined(__x86_64__) || defined(__i386__)
+
+  __m128i *tw16a_128=(__m128i *)tw16a,*tw16b_128=(__m128i *)tw16b,*x128=(__m128i *)x,*y128=(__m128i *)y;
+
+
+
+  /*  This is the original version before unrolling
+
+  bfly4_tw1(x128,x128+1,x128+2,x128+3,
+      y128,y128+1,y128+2,y128+3);
+
+  transpose16(y128,ytmp);
+
+  bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3,
+     y128,y128+1,y128+2,y128+3,
+     tw16_128,tw16_128+1,tw16_128+2);
+  */
+
+  register __m128i x1_flip,x3_flip,x02t,x13t;
+  register __m128i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3;
+  register __m128i complex_shuffle = _mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  // First stage : 4 Radix-4 butterflies without input twiddles
+
+  x02t    = _mm_adds_epi16(x128[0],x128[2]);
+  x13t    = _mm_adds_epi16(x128[1],x128[3]);
+  xtmp0   = _mm_adds_epi16(x02t,x13t);
+  xtmp2   = _mm_subs_epi16(x02t,x13t);
+  x1_flip = _mm_sign_epi16(x128[1],*(__m128i*)conjugatedft);
+  x1_flip = _mm_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm_sign_epi16(x128[3],*(__m128i*)conjugatedft);
+  x3_flip = _mm_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = _mm_subs_epi16(x128[0],x128[2]);
+  x13t    = _mm_subs_epi16(x1_flip,x3_flip);
+  xtmp1   = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp3   = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  ytmp0   = _mm_unpacklo_epi32(xtmp0,xtmp1);
+  ytmp1   = _mm_unpackhi_epi32(xtmp0,xtmp1);
+  ytmp2   = _mm_unpacklo_epi32(xtmp2,xtmp3);
+  ytmp3   = _mm_unpackhi_epi32(xtmp2,xtmp3);
+  xtmp0   = _mm_unpacklo_epi64(ytmp0,ytmp2);
+  xtmp1   = _mm_unpackhi_epi64(ytmp0,ytmp2);
+  xtmp2   = _mm_unpacklo_epi64(ytmp1,ytmp3);
+  xtmp3   = _mm_unpackhi_epi64(ytmp1,ytmp3);
+
+  // Second stage : 4 Radix-4 butterflies with input twiddles
+  xtmp1 = packed_cmult2(xtmp1,tw16a_128[0],tw16b_128[0]);
+  xtmp2 = packed_cmult2(xtmp2,tw16a_128[1],tw16b_128[1]);
+  xtmp3 = packed_cmult2(xtmp3,tw16a_128[2],tw16b_128[2]);
+
+  x02t    = _mm_adds_epi16(xtmp0,xtmp2);
+  x13t    = _mm_adds_epi16(xtmp1,xtmp3);
+  y128[0] = _mm_adds_epi16(x02t,x13t);
+  y128[2] = _mm_subs_epi16(x02t,x13t);
+  x1_flip = _mm_sign_epi16(xtmp1,*(__m128i*)conjugatedft);
+  x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x3_flip = _mm_sign_epi16(xtmp3,*(__m128i*)conjugatedft);
+  x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x02t    = _mm_subs_epi16(xtmp0,xtmp2);
+  x13t    = _mm_subs_epi16(x1_flip,x3_flip);
+  y128[1] = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  y128[3] = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+#elif defined(__arm__)
+
+  int16x8_t *tw16a_128=(int16x8_t *)tw16a,*tw16b_128=(int16x8_t *)tw16b,*x128=(int16x8_t *)x,*y128=(int16x8_t *)y;
+
+  /*  This is the original version before unrolling
+
+  bfly4_tw1(x128,x128+1,x128+2,x128+3,
+      y128,y128+1,y128+2,y128+3);
+
+  transpose16(y128,ytmp);
+
+  bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3,
+     y128,y128+1,y128+2,y128+3,
+     tw16_128,tw16_128+1,tw16_128+2);
+  */
+
+  register int16x8_t x1_flip,x3_flip,x02t,x13t;
+  register int16x8_t xtmp0,xtmp1,xtmp2,xtmp3;
+  register uint32x4x2_t ytmp0,ytmp1;
+  register int16x8_t ytmp0b,ytmp1b,ytmp2b,ytmp3b;
+
+  // First stage : 4 Radix-4 butterflies without input twiddles
+  
+  x02t    = vqaddq_s16(x128[0],x128[2]);
+  x13t    = vqaddq_s16(x128[1],x128[3]);
+  xtmp0   = vqaddq_s16(x02t,x13t);
+  xtmp2   = vqsubq_s16(x02t,x13t);
+  x1_flip = vrev32q_s16(vmulq_s16(x128[1],*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(x128[3],*(int16x8_t*)conjugatedft));
+  x02t    = vqsubq_s16(x128[0],x128[2]);
+  x13t    = vqsubq_s16(x1_flip,x3_flip);
+  xtmp1   = vqaddq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp3   = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  ytmp0  = vtrnq_u32((uint32x4_t)(xtmp0),(uint32x4_t)(xtmp1));
+// y0[0] = [x00 x10 x02 x12], y0[1] = [x01 x11 x03 x13]
+  ytmp1  = vtrnq_u32((uint32x4_t)(xtmp2),(uint32x4_t)(xtmp3));
+// y1[0] = [x20 x30 x22 x32], y1[1] = [x21 x31 x23 x33]
+
+
+  ytmp0b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0]));
+// y0 = [x00 x10 x20 x30] 
+  ytmp1b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1]));
+// t1 = [x01 x11 x21 x31] 
+  ytmp2b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0]));
+// t2 = [x02 x12 x22 x32]
+  ytmp3b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1]));
+// t3 = [x03 x13 x23 x33]
+
+
+  // Second stage : 4 Radix-4 butterflies with input twiddles
+  xtmp1 = packed_cmult2(ytmp1b,tw16a_128[0],tw16b_128[0]);
+  xtmp2 = packed_cmult2(ytmp2b,tw16a_128[1],tw16b_128[1]);
+  xtmp3 = packed_cmult2(ytmp3b,tw16a_128[2],tw16b_128[2]);
+
+  x02t    = vqaddq_s16(ytmp0b,xtmp2);
+  x13t    = vqaddq_s16(xtmp1,xtmp3);
+  y128[0] = vqaddq_s16(x02t,x13t);
+  y128[2] = vqsubq_s16(x02t,x13t);
+  x1_flip = vrev32q_s16(vmulq_s16(xtmp1,*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(xtmp3,*(int16x8_t*)conjugatedft));
+  x02t    = vqsubq_s16(ytmp0b,xtmp2);
+  x13t    = vqsubq_s16(x1_flip,x3_flip);
+  y128[1] = vqaddq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  y128[3] = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+
+#endif
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+#ifdef __AVX2__
+// Does two 16-point DFTS (x[0 .. 15] is 128 LSBs of input vector, x[16..31] is in 128 MSBs) 
+static inline void dft16_simd256(int16_t *x,int16_t *y) __attribute__((always_inline));
+static inline void dft16_simd256(int16_t *x,int16_t *y)
+{
+
+  __m256i *tw16a_256=(__m256i *)tw16arep,*tw16b_256=(__m256i *)tw16brep,*x256=(__m256i *)x,*y256=(__m256i *)y;
+
+  __m256i x1_flip,x3_flip,x02t,x13t;
+  __m256i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3;
+  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  // First stage : 4 Radix-4 butterflies without input twiddles
+
+  x02t    = _mm256_adds_epi16(x256[0],x256[2]);
+  x13t    = _mm256_adds_epi16(x256[1],x256[3]);
+  xtmp0   = _mm256_adds_epi16(x02t,x13t);
+  xtmp2   = _mm256_subs_epi16(x02t,x13t);
+  x1_flip = _mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft);
+  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft);
+  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = _mm256_subs_epi16(x256[0],x256[2]);
+  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
+  xtmp1   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp3   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  /*  print_shorts256("xtmp0",(int16_t*)&xtmp0);
+      print_shorts256("xtmp1",(int16_t*)&xtmp1);
+  print_shorts256("xtmp2",(int16_t*)&xtmp2);
+  print_shorts256("xtmp3",(int16_t*)&xtmp3);*/
+
+  ytmp0   = _mm256_unpacklo_epi32(xtmp0,xtmp1);  
+  ytmp1   = _mm256_unpackhi_epi32(xtmp0,xtmp1);
+  ytmp2   = _mm256_unpacklo_epi32(xtmp2,xtmp3);
+  ytmp3   = _mm256_unpackhi_epi32(xtmp2,xtmp3);
+  xtmp0   = _mm256_unpacklo_epi64(ytmp0,ytmp2);
+  xtmp1   = _mm256_unpackhi_epi64(ytmp0,ytmp2);
+  xtmp2   = _mm256_unpacklo_epi64(ytmp1,ytmp3);
+  xtmp3   = _mm256_unpackhi_epi64(ytmp1,ytmp3);
+
+  // Second stage : 4 Radix-4 butterflies with input twiddles
+  xtmp1 = packed_cmult2_256(xtmp1,tw16a_256[0],tw16b_256[0]);
+  xtmp2 = packed_cmult2_256(xtmp2,tw16a_256[1],tw16b_256[1]);
+  xtmp3 = packed_cmult2_256(xtmp3,tw16a_256[2],tw16b_256[2]);
+
+  /*  print_shorts256("xtmp0",(int16_t*)&xtmp0);
+  print_shorts256("xtmp1",(int16_t*)&xtmp1);
+  print_shorts256("xtmp2",(int16_t*)&xtmp2);
+  print_shorts256("xtmp3",(int16_t*)&xtmp3);*/
+
+  x02t    = _mm256_adds_epi16(xtmp0,xtmp2);
+  x13t    = _mm256_adds_epi16(xtmp1,xtmp3);
+  ytmp0   = _mm256_adds_epi16(x02t,x13t);
+  ytmp2   = _mm256_subs_epi16(x02t,x13t);
+  x1_flip = _mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft);
+  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft);
+  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = _mm256_subs_epi16(xtmp0,xtmp2);
+  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
+  ytmp1   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  ytmp3   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+ 
+
+  // [y0  y1  y2  y3  y16 y17 y18 y19]
+  // [y4  y5  y6  y7  y20 y21 y22 y23]
+  // [y8  y9  y10 y11 y24 y25 y26 y27]
+  // [y12 y13 y14 y15 y28 y29 y30 y31]
+
+  y256[0] = _mm256_insertf128_si256(ytmp0,_mm256_extracti128_si256(ytmp1,0),1);
+  y256[1] = _mm256_insertf128_si256(ytmp2,_mm256_extracti128_si256(ytmp3,0),1);
+  y256[2] = _mm256_insertf128_si256(ytmp1,_mm256_extracti128_si256(ytmp0,1),0);
+  y256[3] = _mm256_insertf128_si256(ytmp3,_mm256_extracti128_si256(ytmp2,1),0);
+
+  // [y0  y1  y2  y3  y4  y5  y6  y7]
+  // [y8  y9  y10 y11 y12 y13 y14 y15]
+  // [y16 y17 y18 y19 y20 y21 y22 y23]
+  // [y24 y25 y26 y27 y28 y29 y30 y31]
+}
+
+#endif  
+#endif
+static inline void idft16(int16_t *x,int16_t *y) __attribute__((always_inline));
+
+static inline void idft16(int16_t *x,int16_t *y)
+{
+
+#if defined(__x86_64__) || defined(__i386__)
+  __m128i *tw16a_128=(__m128i *)tw16,*tw16b_128=(__m128i *)tw16c,*x128=(__m128i *)x,*y128=(__m128i *)y;
+
+  /*
+  bfly4_tw1(x128,x128+1,x128+2,x128+3,
+      y128,y128+1,y128+2,y128+3);
+
+  transpose16(y128,ytmp);
+
+  bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3,
+     y128,y128+1,y128+2,y128+3,
+     tw16_128,tw16_128+1,tw16_128+2);
+  */
+
+  register __m128i x1_flip,x3_flip,x02t,x13t;
+  register __m128i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3;
+
+  // First stage : 4 Radix-4 butterflies without input twiddles
+
+  x02t    = _mm_adds_epi16(x128[0],x128[2]);
+  x13t    = _mm_adds_epi16(x128[1],x128[3]);
+  xtmp0   = _mm_adds_epi16(x02t,x13t);
+  xtmp2   = _mm_subs_epi16(x02t,x13t);
+  x1_flip = _mm_sign_epi16(x128[1],*(__m128i*)conjugatedft);
+  x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x3_flip = _mm_sign_epi16(x128[3],*(__m128i*)conjugatedft);
+  x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x02t    = _mm_subs_epi16(x128[0],x128[2]);
+  x13t    = _mm_subs_epi16(x1_flip,x3_flip);
+  xtmp3   = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp1   = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  ytmp0   = _mm_unpacklo_epi32(xtmp0,xtmp1);
+  ytmp1   = _mm_unpackhi_epi32(xtmp0,xtmp1);
+  ytmp2   = _mm_unpacklo_epi32(xtmp2,xtmp3);
+  ytmp3   = _mm_unpackhi_epi32(xtmp2,xtmp3);
+  xtmp0   = _mm_unpacklo_epi64(ytmp0,ytmp2);
+  xtmp1   = _mm_unpackhi_epi64(ytmp0,ytmp2);
+  xtmp2   = _mm_unpacklo_epi64(ytmp1,ytmp3);
+  xtmp3   = _mm_unpackhi_epi64(ytmp1,ytmp3);
+
+  // Second stage : 4 Radix-4 butterflies with input twiddles
+  xtmp1 = packed_cmult2(xtmp1,tw16a_128[0],tw16b_128[0]);
+  xtmp2 = packed_cmult2(xtmp2,tw16a_128[1],tw16b_128[1]);
+  xtmp3 = packed_cmult2(xtmp3,tw16a_128[2],tw16b_128[2]);
+
+  x02t    = _mm_adds_epi16(xtmp0,xtmp2);
+  x13t    = _mm_adds_epi16(xtmp1,xtmp3);
+  y128[0] = _mm_adds_epi16(x02t,x13t);
+  y128[2] = _mm_subs_epi16(x02t,x13t);
+  x1_flip = _mm_sign_epi16(xtmp1,*(__m128i*)conjugatedft);
+  x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x3_flip = _mm_sign_epi16(xtmp3,*(__m128i*)conjugatedft);
+  x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2));
+  x02t    = _mm_subs_epi16(xtmp0,xtmp2);
+  x13t    = _mm_subs_epi16(x1_flip,x3_flip);
+  y128[3] = _mm_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  y128[1] = _mm_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+#elif defined(__arm__)
+  int16x8_t *tw16a_128=(int16x8_t *)tw16,*tw16b_128=(int16x8_t *)tw16c,*x128=(int16x8_t *)x,*y128=(int16x8_t *)y;
+
+  /*  This is the original version before unrolling
+
+  bfly4_tw1(x128,x128+1,x128+2,x128+3,
+      y128,y128+1,y128+2,y128+3);
+
+  transpose16(y128,ytmp);
+
+  bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3,
+     y128,y128+1,y128+2,y128+3,
+     tw16_128,tw16_128+1,tw16_128+2);
+  */
+
+  register int16x8_t x1_flip,x3_flip,x02t,x13t;
+  register int16x8_t xtmp0,xtmp1,xtmp2,xtmp3;
+  register uint32x4x2_t ytmp0,ytmp1;
+  register int16x8_t ytmp0b,ytmp1b,ytmp2b,ytmp3b;
+
+  // First stage : 4 Radix-4 butterflies without input twiddles
+
+  x02t    = vqaddq_s16(x128[0],x128[2]);
+  x13t    = vqaddq_s16(x128[1],x128[3]);
+  xtmp0   = vqaddq_s16(x02t,x13t);
+  xtmp2   = vqsubq_s16(x02t,x13t);
+  x1_flip = vrev32q_s16(vmulq_s16(x128[1],*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(x128[3],*(int16x8_t*)conjugatedft));
+  x02t    = vqsubq_s16(x128[0],x128[2]);
+  x13t    = vqsubq_s16(x1_flip,x3_flip);
+  xtmp3   = vqaddq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp1   = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  ytmp0  = vtrnq_u32((uint32x4_t)(xtmp0),(uint32x4_t)(xtmp1));
+// y0[0] = [x00 x10 x02 x12], y0[1] = [x01 x11 x03 x13]
+  ytmp1  = vtrnq_u32((uint32x4_t)(xtmp2),(uint32x4_t)(xtmp3));
+// y1[0] = [x20 x30 x22 x32], y1[1] = [x21 x31 x23 x33]
+
+
+  ytmp0b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0]));
+// y0 = [x00 x10 x20 x30] 
+  ytmp1b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1]));
+// t1 = [x01 x11 x21 x31] 
+  ytmp2b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0]));
+// t2 = [x02 x12 x22 x32]
+  ytmp3b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1]));
+// t3 = [x03 x13 x23 x33]
+
+  // Second stage : 4 Radix-4 butterflies with input twiddles
+  xtmp1 = packed_cmult2(ytmp1b,tw16a_128[0],tw16b_128[0]);
+  xtmp2 = packed_cmult2(ytmp2b,tw16a_128[1],tw16b_128[1]);
+  xtmp3 = packed_cmult2(ytmp3b,tw16a_128[2],tw16b_128[2]);
+
+  x02t    = vqaddq_s16(ytmp0b,xtmp2);
+  x13t    = vqaddq_s16(xtmp1,xtmp3);
+  y128[0] = vqaddq_s16(x02t,x13t);
+  y128[2] = vqsubq_s16(x02t,x13t);
+  x1_flip = vrev32q_s16(vmulq_s16(xtmp1,*(int16x8_t*)conjugatedft));
+  x3_flip = vrev32q_s16(vmulq_s16(xtmp3,*(int16x8_t*)conjugatedft));
+  x02t    = vqsubq_s16(ytmp0b,xtmp2);
+  x13t    = vqsubq_s16(x1_flip,x3_flip);
+  y128[3] = vqaddq_s16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  y128[1] = vqsubq_s16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+#endif
+}
+
+void idft16f(int16_t *x,int16_t *y) {
+  idft16(x,y);
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+#ifdef __AVX2__
+// Does two 16-point IDFTS (x[0 .. 15] is 128 LSBs of input vector, x[16..31] is in 128 MSBs) 
+static inline void idft16_simd256(int16_t *x,int16_t *y) __attribute__((always_inline));
+static inline void idft16_simd256(int16_t *x,int16_t *y)
+{
+
+  __m256i *tw16a_256=(__m256i *)tw16rep,*tw16b_256=(__m256i *)tw16crep,*x256=(__m256i *)x,*y256=(__m256i *)y;
+  register __m256i x1_flip,x3_flip,x02t,x13t;
+  register __m256i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3;
+  register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
+
+  // First stage : 4 Radix-4 butterflies without input twiddles
+
+  x02t    = _mm256_adds_epi16(x256[0],x256[2]);
+  x13t    = _mm256_adds_epi16(x256[1],x256[3]);
+  xtmp0   = _mm256_adds_epi16(x02t,x13t);
+  xtmp2   = _mm256_subs_epi16(x02t,x13t);
+  x1_flip = _mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft);
+  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft);
+  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = _mm256_subs_epi16(x256[0],x256[2]);
+  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
+  xtmp3   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  xtmp1   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  ytmp0   = _mm256_unpacklo_epi32(xtmp0,xtmp1);  
+  ytmp1   = _mm256_unpackhi_epi32(xtmp0,xtmp1);
+  ytmp2   = _mm256_unpacklo_epi32(xtmp2,xtmp3);
+  ytmp3   = _mm256_unpackhi_epi32(xtmp2,xtmp3);
+  xtmp0   = _mm256_unpacklo_epi64(ytmp0,ytmp2);
+  xtmp1   = _mm256_unpackhi_epi64(ytmp0,ytmp2);
+  xtmp2   = _mm256_unpacklo_epi64(ytmp1,ytmp3);
+  xtmp3   = _mm256_unpackhi_epi64(ytmp1,ytmp3);
+
+  // Second stage : 4 Radix-4 butterflies with input twiddles
+  xtmp1 = packed_cmult2_256(xtmp1,tw16a_256[0],tw16b_256[0]);
+  xtmp2 = packed_cmult2_256(xtmp2,tw16a_256[1],tw16b_256[1]);
+  xtmp3 = packed_cmult2_256(xtmp3,tw16a_256[2],tw16b_256[2]);
+
+  x02t    = _mm256_adds_epi16(xtmp0,xtmp2);
+  x13t    = _mm256_adds_epi16(xtmp1,xtmp3);
+  ytmp0   = _mm256_adds_epi16(x02t,x13t);
+  ytmp2   = _mm256_subs_epi16(x02t,x13t);
+  x1_flip = _mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft);
+  x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle);
+  x3_flip = _mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft);
+  x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle);
+  x02t    = _mm256_subs_epi16(xtmp0,xtmp2);
+  x13t    = _mm256_subs_epi16(x1_flip,x3_flip);
+  ytmp3   = _mm256_adds_epi16(x02t,x13t);  // x0 + x1f - x2 - x3f
+  ytmp1   = _mm256_subs_epi16(x02t,x13t);  // x0 - x1f - x2 + x3f
+
+  // [y0  y1  y2  y3  y16 y17 y18 y19]
+  // [y4  y5  y6  y7  y20 y21 y22 y23]
+  // [y8  y9  y10 y11 y24 y25 y26 y27]
+  // [y12 y13 y14 y15 y28 y29 y30 y31]
+
+  y256[0] = _mm256_insertf128_si256(ytmp0,_mm256_extracti128_si256(ytmp1,0),1);
+  y256[1] = _mm256_insertf128_si256(ytmp2,_mm256_extracti128_si256(ytmp3,0),1);
+  y256[2] = _mm256_insertf128_si256(ytmp1,_mm256_extracti128_si256(ytmp0,1),0);
+  y256[3] = _mm256_insertf128_si256(ytmp3,_mm256_extracti128_si256(ytmp2,1),0);
+
+}
+#endif  
+#endif
+
+// 64-point optimized DFT
+
+const static int16_t tw64[96] __attribute__((aligned(32))) = { 
+32767,0,32609,-3212,32137,-6393,31356,-9512,
+30272,-12540,28897,-15447,27244,-18205,25329,-20788,
+23169,-23170,20787,-25330,18204,-27245,15446,-28898,
+12539,-30273,9511,-31357,6392,-32138,3211,-32610,
+32767,0,32137,-6393,30272,-12540,27244,-18205,
+23169,-23170,18204,-27245,12539,-30273,6392,-32138,
+0,-32767,-6393,-32138,-12540,-30273,-18205,-27245,
+-23170,-23170,-27245,-18205,-30273,-12540,-32138,-6393,
+32767,0,31356,-9512,27244,-18205,20787,-25330,
+12539,-30273,3211,-32610,-6393,-32138,-15447,-28898,
+-23170,-23170,-28898,-15447,-32138,-6393,-32610,3211,
+-30273,12539,-25330,20787,-18205,27244,-9512,31356
+                                                };
+const static int16_t tw64a[96] __attribute__((aligned(32))) = { 
+32767,0,32609,3212,32137,6393,31356,9512,
+30272,12540,28897,15447,27244,18205,25329,20788,
+23169,23170,20787,25330,18204,27245,15446,28898,
+12539,30273,9511,31357,6392,32138,3211,32610,
+32767,0,32137,6393,30272,12540,27244,18205,
+23169,23170,18204,27245,12539,30273,6392,32138,
+0,32767,-6393,32138,-12540,30273,-18205,27245,
+-23170,23170,-27245,18205,-30273,12540,-32138,6393,
+32767,0,31356,9512,27244,18205,20787,25330,
+12539,30273,3211,32610,-6393,32138,-15447,28898,
+-23170,23170,-28898,15447,-32138,6393,-32610,-3211,
+-30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356
+                                                 };
+const static int16_t tw64b[96] __attribute__((aligned(32))) = { 
+0,32767,-3212,32609,-6393,32137,-9512,31356,
+-12540,30272,-15447,28897,-18205,27244,-20788,25329,
+-23170,23169,-25330,20787,-27245,18204,-28898,15446,
+-30273,12539,-31357,9511,-32138,6392,-32610,3211,
+0,32767,-6393,32137,-12540,30272,-18205,27244,
+-23170,23169,-27245,18204,-30273,12539,-32138,6392,
+-32767,0,-32138,-6393,-30273,-12540,-27245,-18205,
+-23170,-23170,-18205,-27245,-12540,-30273,-6393,-32138,
+0,32767,-9512,31356,-18205,27244,-25330,20787,
+-30273,12539,-32610,3211,-32138,-6393,-28898,-15447,
+-23170,-23170,-15447,-28898,-6393,-32138,3211,-32610,
+12539,-30273,20787,-25330,27244,-18205,31356,-9512
+                                                 };
+const static int16_t tw64c[96] __attribute__((aligned(32))) = { 
+0,32767,3212,32609,6393,32137,9512,31356,
+12540,30272,15447,28897,18205,27244,20788,25329,
+23170,23169,25330,20787,27245,18204,28898,15446,
+30273,12539,31357,9511,32138,6392,32610,3211,
+0,32767,6393,32137,12540,30272,18205,27244,
+23170,23169,27245,18204,30273,12539,32138,6392,
+32767,0,32138,-6393,30273,-12540,27245,-18205,
+23170,-23170,18205,-27245,12540,-30273,6393,-32138,
+0,32767,9512,31356,18205,27244,25330,20787,
+30273,12539,32610,3211,32138,-6393,28898,-15447,
+23170,-23170,15447,-28898,6393,-32138,-3211,-32610,
+-12539,-30273,-20787,-25330,-27244,-18205,-31356,-9512
+                                                 };
+#if defined(__x86_64__) || defined(__i386__)
+#define simd_q15_t __m128i
+#define simdshort_q15_t __m64
+#define shiftright_int16(a,shift) _mm_srai_epi16(a,shift)
+#define set1_int16(a) _mm_set1_epi16(a);
+#define mulhi_int16(a,b) _mm_mulhrs_epi16 (a,b)
+#ifdef __AVX2__
+#define simd256_q15_t __m256i
+#define shiftright_int16_simd256(a,shift) _mm256_srai_epi16(a,shift)
+#define set1_int16_simd256(a) _mm256_set1_epi16(a);
+#define mulhi_int16_simd256(a,b) _mm256_mulhrs_epi16(a,b); //_mm256_slli_epi16(_mm256_mulhi_epi16(a,b),1);
+#endif
+
+#elif defined(__arm__)
+#define simd_q15_t int16x8_t
+#define simdshort_q15_t int16x4_t
+#define shiftright_int16(a,shift) vshrq_n_s16(a,shift)
+#define set1_int16(a) vdupq_n_s16(a)
+#define mulhi_int16(a,b) vqdmulhq_s16(a,b);
+#define _mm_empty() 
+#define _m_empty()
+
+#endif
+
+#ifndef __AVX2__
+void dft64(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[16],ytmp[16],*tw64a_128=(simd_q15_t *)tw64a,*tw64b_128=(simd_q15_t *)tw64b,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y;
+
+
+#ifdef D64STATS
+  time_stats_t ts_t,ts_d,ts_b;
+
+  reset_meas(&ts_t);
+  reset_meas(&ts_d);
+  reset_meas(&ts_b);
+  start_meas(&ts_t);
+#endif
+
+
+  transpose16_ooff(x128,xtmp,4);
+  // xtmp0  = x00 x10 x20 x30
+  // xtmp4  = x01 x11 x21 x31
+  // xtmp8  = x02 x12 x22 x32
+  // xtmp12 = x03 x13 x23 x33
+  transpose16_ooff(x128+4,xtmp+1,4);
+  // xtmp1  = x40 x50 x60 x70
+  // xtmp5  = x41 x51 x61 x71
+  // xtmp9  = x42 x52 x62 x72
+  // xtmp13 = x43 x53 x63 x73
+  transpose16_ooff(x128+8,xtmp+2,4);
+  // xtmp2  = x80 x90 xa0 xb0
+  // xtmp6  = x41 x51 x61 x71
+  // xtmp10 = x82 x92 xa2 xb2
+  // xtmp14 = x83 x93 xa3 xb3
+  transpose16_ooff(x128+12,xtmp+3,4);
+  // xtmp3  = xc0 xd0 xe0 xf0
+  // xtmp7  = xc1 xd1 xe1 xf1
+  // xtmp11 = xc2 xd2 xe2 xf2
+  // xtmp15 = xc3 xd3 xe3 xf3
+
+#ifdef D64STATS
+  stop_meas(&ts_t);
+  start_meas(&ts_d);
+#endif
+
+  // xtmp0  = x00 x10 x20 x30
+  // xtmp1  = x40 x50 x60 x70
+  // xtmp2  = x80 x90 xa0 xb0
+  // xtmp3  = xc0 xd0 xe0 xf0
+  dft16((int16_t*)(xtmp),(int16_t*)ytmp);
+
+  // xtmp4  = x01 x11 x21 x31
+  // xtmp5  = x41 x51 x61 x71
+  // xtmp6  = x81 x91 xa1 xb1
+  // xtmp7  = xc1 xd1 xe1 xf1
+  dft16((int16_t*)(xtmp+4),(int16_t*)(ytmp+4));
+  dft16((int16_t*)(xtmp+8),(int16_t*)(ytmp+8));
+  dft16((int16_t*)(xtmp+12),(int16_t*)(ytmp+12));
+
+
+#ifdef D64STATS
+  stop_meas(&ts_d);
+  start_meas(&ts_b);
+#endif
+
+
+  bfly4_16(ytmp,ytmp+4,ytmp+8,ytmp+12,
+           y128,y128+4,y128+8,y128+12,
+           tw64a_128,tw64a_128+4,tw64a_128+8,
+           tw64b_128,tw64b_128+4,tw64b_128+8);
+
+  bfly4_16(ytmp+1,ytmp+5,ytmp+9,ytmp+13,
+           y128+1,y128+5,y128+9,y128+13,
+           tw64a_128+1,tw64a_128+5,tw64a_128+9,
+           tw64b_128+1,tw64b_128+5,tw64b_128+9);
+
+  bfly4_16(ytmp+2,ytmp+6,ytmp+10,ytmp+14,
+           y128+2,y128+6,y128+10,y128+14,
+           tw64a_128+2,tw64a_128+6,tw64a_128+10,
+           tw64b_128+2,tw64b_128+6,tw64b_128+10);
+
+  bfly4_16(ytmp+3,ytmp+7,ytmp+11,ytmp+15,
+           y128+3,y128+7,y128+11,y128+15,
+           tw64a_128+3,tw64a_128+7,tw64a_128+11,
+           tw64b_128+3,tw64b_128+7,tw64b_128+11);
+
+#ifdef D64STATS
+  stop_meas(&ts_b);
+  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
+#endif
+
+
+  if (scale>0) {
+    y128[0]  = shiftright_int16(y128[0],3);
+    y128[1]  = shiftright_int16(y128[1],3);
+    y128[2]  = shiftright_int16(y128[2],3);
+    y128[3]  = shiftright_int16(y128[3],3);
+    y128[4]  = shiftright_int16(y128[4],3);
+    y128[5]  = shiftright_int16(y128[5],3);
+    y128[6]  = shiftright_int16(y128[6],3);
+    y128[7]  = shiftright_int16(y128[7],3);
+    y128[8]  = shiftright_int16(y128[8],3);
+    y128[9]  = shiftright_int16(y128[9],3);
+    y128[10] = shiftright_int16(y128[10],3);
+    y128[11] = shiftright_int16(y128[11],3);
+    y128[12] = shiftright_int16(y128[12],3);
+    y128[13] = shiftright_int16(y128[13],3);
+    y128[14] = shiftright_int16(y128[14],3);
+    y128[15] = shiftright_int16(y128[15],3);
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else // __AVX2__
+void dft64(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[16],ytmp[16],*tw64a_256=(simd256_q15_t *)tw64a,*tw64b_256=(simd256_q15_t *)tw64b,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y;
+  simd256_q15_t xintl0,xintl1,xintl2,xintl3,xintl4,xintl5,xintl6,xintl7;
+  simd256_q15_t const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
+
+
+#ifdef D64STATS
+  time_stats_t ts_t,ts_d,ts_b;
+
+  reset_meas(&ts_t);
+  reset_meas(&ts_d);
+  reset_meas(&ts_b);
+  start_meas(&ts_t);
+#endif
+
+#ifdef D64STATS
+  stop_meas(&ts_t);
+  start_meas(&ts_d);
+#endif
+  /*  
+  print_shorts256("x2560",(int16_t*)x256);
+  print_shorts256("x2561",(int16_t*)(x256+1));
+  print_shorts256("x2562",(int16_t*)(x256+2));
+  print_shorts256("x2563",(int16_t*)(x256+3));
+  print_shorts256("x2564",(int16_t*)(x256+4));
+  print_shorts256("x2565",(int16_t*)(x256+5));
+  print_shorts256("x2566",(int16_t*)(x256+6));
+  print_shorts256("x2567",(int16_t*)(x256+7));
+  */
+  xintl0 = _mm256_permutevar8x32_epi32(x256[0],perm_mask);  // x0  x4  x1  x5  x2  x6  x3  x7
+  xintl1 = _mm256_permutevar8x32_epi32(x256[1],perm_mask);  // x8  x12 x9  x13 x10 x14 x11 x15
+  xintl2 = _mm256_permutevar8x32_epi32(x256[2],perm_mask);  // x16 x20 x17 x21 x18 x22 x19 x23
+  xintl3 = _mm256_permutevar8x32_epi32(x256[3],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl4 = _mm256_permutevar8x32_epi32(x256[4],perm_mask);  // x32 x28 x25 x29 x26 x30 x27 x31
+  xintl5 = _mm256_permutevar8x32_epi32(x256[5],perm_mask);  // x40 x28 x25 x29 x26 x30 x27 x31
+  xintl6 = _mm256_permutevar8x32_epi32(x256[6],perm_mask);  // x48 x28 x25 x29 x26 x30 x27 x31
+  xintl7 = _mm256_permutevar8x32_epi32(x256[7],perm_mask);  // x56 x28 x25 x29 x26 x30 x27 x31
+  /*
+  print_shorts256("xintl0",(int16_t*)&xintl0);
+  print_shorts256("xintl1",(int16_t*)&xintl1);
+  print_shorts256("xintl2",(int16_t*)&xintl2);
+  print_shorts256("xintl3",(int16_t*)&xintl3);
+  print_shorts256("xintl4",(int16_t*)&xintl4);
+  print_shorts256("xintl5",(int16_t*)&xintl5);
+  print_shorts256("xintl6",(int16_t*)&xintl6);
+  print_shorts256("xintl7",(int16_t*)&xintl7);
+  */
+  xtmp[0] = _mm256_unpacklo_epi64(xintl0,xintl1);        // x0  x4  x8  x12 x1  x5  x9  x13
+  xtmp[4] = _mm256_unpackhi_epi64(xintl0,xintl1);        // x2  x6  x10 x14 x3  x7  x11 x15
+  xtmp[1] = _mm256_unpacklo_epi64(xintl2,xintl3);        // x16 x20 x24 x28 x17 x21 x25 x29
+  xtmp[5] = _mm256_unpackhi_epi64(xintl2,xintl3);        // x18 x22 x26 x30 x19 x23 x27 x31
+  xtmp[2] = _mm256_unpacklo_epi64(xintl4,xintl5);        // x32 x36 x40 x44 x33 x37 x41 x45
+  xtmp[6] = _mm256_unpackhi_epi64(xintl4,xintl5);        // x34 x38 x42 x46 x35 x39 x43 x47
+  xtmp[3] = _mm256_unpacklo_epi64(xintl6,xintl7);        // x48 x52 x56 x60 x49 x53 x57 x61
+  xtmp[7] = _mm256_unpackhi_epi64(xintl6,xintl7);        // x50 x54 x58 x62 x51 x55 x59 x63
+  /*
+  print_shorts256("xtmp0",(int16_t*)xtmp);
+  print_shorts256("xtmp1",(int16_t*)(xtmp+1));
+  print_shorts256("xtmp2",(int16_t*)(xtmp+2));
+  print_shorts256("xtmp3",(int16_t*)(xtmp+3));
+  print_shorts256("xtmp4",(int16_t*)(xtmp+4));
+  print_shorts256("xtmp5",(int16_t*)(xtmp+5));
+  print_shorts256("xtmp6",(int16_t*)(xtmp+6));
+  print_shorts256("xtmp7",(int16_t*)(xtmp+7));
+  */
+  dft16_simd256((int16_t*)(xtmp),(int16_t*)ytmp);
+  // [y0  y1  y2  y3  y4  y5  y6  y7]
+  // [y8  y9  y10 y11 y12 y13 y14 y15]
+  // [y16 y17 y18 y19 y20 y21 y22 y23]
+  // [y24 y25 y26 y27 y28 y29 y30 y31]
+  /*
+  print_shorts256("ytmp0",(int16_t*)ytmp);
+  print_shorts256("ytmp1",(int16_t*)(ytmp+1));
+  print_shorts256("ytmp2",(int16_t*)(ytmp+2));
+  print_shorts256("ytmp3",(int16_t*)(ytmp+3));
+  */
+  dft16_simd256((int16_t*)(xtmp+4),(int16_t*)(ytmp+4));
+  // [y32 y33 y34 y35 y36 y37 y38 y39]
+  // [y40 y41 y42 y43 y44 y45 y46 y47]
+  // [y48 y49 y50 y51 y52 y53 y54 y55]
+  // [y56 y57 y58 y59 y60 y61 y62 y63]
+  /*
+  print_shorts256("ytmp4",(int16_t*)(ytmp+4));
+  print_shorts256("ytmp5",(int16_t*)(ytmp+5));
+  print_shorts256("ytmp6",(int16_t*)(ytmp+6));
+  print_shorts256("ytmp7",(int16_t*)(ytmp+7));
+  */
+#ifdef D64STATS
+  stop_meas(&ts_d);
+  start_meas(&ts_b);
+#endif
+
+
+  bfly4_16_256(ytmp,ytmp+2,ytmp+4,ytmp+6,
+	       y256,y256+2,y256+4,y256+6,
+	       tw64a_256,tw64a_256+2,tw64a_256+4,
+	       tw64b_256,tw64b_256+2,tw64b_256+4);
+  // [y0  y1  y2  y3  y4  y5  y6  y7]
+  // [y16 y17 y18 y19 y20 y21 y22 y23]
+  // [y32 y33 y34 y35 y36 y37 y38 y39]
+  // [y48 y49 y50 y51 y52 y53 y54 y55]
+
+  bfly4_16_256(ytmp+1,ytmp+3,ytmp+5,ytmp+7,
+	       y256+1,y256+3,y256+5,y256+7,
+	       tw64a_256+1,tw64a_256+3,tw64a_256+5,
+	       tw64b_256+1,tw64b_256+3,tw64b_256+5);
+  // [y8  y9  y10 y11 y12 y13 y14 y15]
+  // [y24 y25 y26 y27 y28 y29 y30 y31]
+  // [y40 y41 y42 y43 y44 y45 y46 y47]
+  // [y56 y57 y58 y59 y60 y61 y62 y63]
+  /*  
+  print_shorts256("y256_0",(int16_t*)&y256[0]);
+  print_shorts256("y256_1",(int16_t*)&y256[1]);
+  print_shorts256("y256_2",(int16_t*)&y256[2]);
+  print_shorts256("y256_3",(int16_t*)&y256[3]);
+  print_shorts256("y256_4",(int16_t*)&y256[4]);
+  print_shorts256("y256_5",(int16_t*)&y256[5]);
+  print_shorts256("y256_6",(int16_t*)&y256[6]);
+  print_shorts256("y256_7",(int16_t*)&y256[7]);
+  */
+
+#ifdef D64STATS
+  stop_meas(&ts_b);
+  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
+#endif
+
+
+  if (scale>0) {
+    y256[0]  = shiftright_int16_simd256(y256[0],3);
+    y256[1]  = shiftright_int16_simd256(y256[1],3);
+    y256[2]  = shiftright_int16_simd256(y256[2],3);
+    y256[3]  = shiftright_int16_simd256(y256[3],3);
+    y256[4]  = shiftright_int16_simd256(y256[4],3);
+    y256[5]  = shiftright_int16_simd256(y256[5],3);
+    y256[6]  = shiftright_int16_simd256(y256[6],3);
+    y256[7]  = shiftright_int16_simd256(y256[7],3);
+  }
+
+  _mm_empty();
+  _m_empty();
+
+
+}
+#endif
+
+#ifndef __AVX2__
+void idft64(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[16],ytmp[16],*tw64a_128=(simd_q15_t *)tw64,*tw64b_128=(simd_q15_t *)tw64c,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y;
+
+
+#ifdef D64STATS
+  time_stats_t ts_t,ts_d,ts_b;
+
+  reset_meas(&ts_t);
+  reset_meas(&ts_d);
+  reset_meas(&ts_b);
+  start_meas(&ts_t);
+#endif
+
+
+  transpose16_ooff(x128,xtmp,4);
+  transpose16_ooff(x128+4,xtmp+1,4);
+  transpose16_ooff(x128+8,xtmp+2,4);
+  transpose16_ooff(x128+12,xtmp+3,4);
+
+
+#ifdef D64STATS
+  stop_meas(&ts_t);
+  start_meas(&ts_d);
+#endif
+
+
+  idft16((int16_t*)(xtmp),(int16_t*)ytmp);
+  idft16((int16_t*)(xtmp+4),(int16_t*)(ytmp+4));
+  idft16((int16_t*)(xtmp+8),(int16_t*)(ytmp+8));
+  idft16((int16_t*)(xtmp+12),(int16_t*)(ytmp+12));
+
+
+#ifdef D64STATS
+  stop_meas(&ts_d);
+  start_meas(&ts_b);
+#endif
+
+
+  ibfly4_16(ytmp,ytmp+4,ytmp+8,ytmp+12,
+            y128,y128+4,y128+8,y128+12,
+            tw64a_128,tw64a_128+4,tw64a_128+8,
+            tw64b_128,tw64b_128+4,tw64b_128+8);
+  ibfly4_16(ytmp+1,ytmp+5,ytmp+9,ytmp+13,
+            y128+1,y128+5,y128+9,y128+13,
+            tw64a_128+1,tw64a_128+5,tw64a_128+9,
+            tw64b_128+1,tw64b_128+5,tw64b_128+9);
+
+  ibfly4_16(ytmp+2,ytmp+6,ytmp+10,ytmp+14,
+            y128+2,y128+6,y128+10,y128+14,
+            tw64a_128+2,tw64a_128+6,tw64a_128+10,
+            tw64b_128+2,tw64b_128+6,tw64b_128+10);
+
+  ibfly4_16(ytmp+3,ytmp+7,ytmp+11,ytmp+15,
+            y128+3,y128+7,y128+11,y128+15,
+            tw64a_128+3,tw64a_128+7,tw64a_128+11,
+            tw64b_128+3,tw64b_128+7,tw64b_128+11);
+
+#ifdef D64STATS
+  stop_meas(&ts_b);
+  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
+#endif
+
+
+  if (scale>0) {
+
+    y128[0]  = shiftright_int16(y128[0],3);
+    y128[1]  = shiftright_int16(y128[1],3);
+    y128[2]  = shiftright_int16(y128[2],3);
+    y128[3]  = shiftright_int16(y128[3],3);
+    y128[4]  = shiftright_int16(y128[4],3);
+    y128[5]  = shiftright_int16(y128[5],3);
+    y128[6]  = shiftright_int16(y128[6],3);
+    y128[7]  = shiftright_int16(y128[7],3);
+    y128[8]  = shiftright_int16(y128[8],3);
+    y128[9]  = shiftright_int16(y128[9],3);
+    y128[10] = shiftright_int16(y128[10],3);
+    y128[11] = shiftright_int16(y128[11],3);
+    y128[12] = shiftright_int16(y128[12],3);
+    y128[13] = shiftright_int16(y128[13],3);
+    y128[14] = shiftright_int16(y128[14],3);
+    y128[15] = shiftright_int16(y128[15],3);
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else // __AVX2__
+void idft64(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[16],ytmp[16],*tw64a_256=(simd256_q15_t *)tw64,*tw64b_256=(simd256_q15_t *)tw64c,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y;
+  register simd256_q15_t xintl0,xintl1,xintl2,xintl3,xintl4,xintl5,xintl6,xintl7;
+  simd256_q15_t const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
+
+
+#ifdef D64STATS
+  time_stats_t ts_t,ts_d,ts_b;
+
+  reset_meas(&ts_t);
+  reset_meas(&ts_d);
+  reset_meas(&ts_b);
+  start_meas(&ts_t);
+#endif
+
+#ifdef D64STATS
+  stop_meas(&ts_t);
+  start_meas(&ts_d);
+#endif
+
+  xintl0 = _mm256_permutevar8x32_epi32(x256[0],perm_mask);  // x0  x4  x1  x5  x2  x6  x3  x7
+  xintl1 = _mm256_permutevar8x32_epi32(x256[1],perm_mask);  // x8  x12 x9  x13 x10 x14 x11 x15
+  xintl2 = _mm256_permutevar8x32_epi32(x256[2],perm_mask);  // x16 x20 x17 x21 x18 x22 x19 x23
+  xintl3 = _mm256_permutevar8x32_epi32(x256[3],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl4 = _mm256_permutevar8x32_epi32(x256[4],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl5 = _mm256_permutevar8x32_epi32(x256[5],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl6 = _mm256_permutevar8x32_epi32(x256[6],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+  xintl7 = _mm256_permutevar8x32_epi32(x256[7],perm_mask);  // x24 x28 x25 x29 x26 x30 x27 x31
+
+  xtmp[0] = _mm256_unpacklo_epi64(xintl0,xintl1);        // x0  x4  x8  x12 x1  x5  x9  x13
+  xtmp[4] = _mm256_unpackhi_epi64(xintl0,xintl1);        // x2  x6  x10 x14 x3  x7  x11 x15
+  xtmp[1] = _mm256_unpacklo_epi64(xintl2,xintl3);        // x16 x20 x24 x28 x17 x21 x25 x29
+  xtmp[5] = _mm256_unpackhi_epi64(xintl2,xintl3);        // x18 x22 x26 x30 x19 x23 x27 x31
+  xtmp[2] = _mm256_unpacklo_epi64(xintl4,xintl5);        // x32 x36 x40 x44 x33 x37 x41 x45
+  xtmp[6] = _mm256_unpackhi_epi64(xintl4,xintl5);        // x34 x38 x42 x46 x35 x39 x43 x47
+  xtmp[3] = _mm256_unpacklo_epi64(xintl6,xintl7);        // x48 x52 x56 x60 x49 x53 x57 x61
+  xtmp[7] = _mm256_unpackhi_epi64(xintl6,xintl7);        // x50 x54 x58 x62 x51 x55 x59 x63
+
+
+  idft16_simd256((int16_t*)(xtmp),(int16_t*)ytmp);
+  // [y0  y1  y2  y3  y16 y17 y18 y19]
+  // [y4  y5  y6  y7  y20 y21 y22 y23]
+  // [y8  y9  y10 y11 y24 y25 y26 y27]
+  // [y12 y13 y14 y15 y28 y29 y30 y31]
+
+  idft16_simd256((int16_t*)(xtmp+4),(int16_t*)(ytmp+4));
+  // [y32 y33 y34 y35 y48 y49 y50 y51]
+  // [y36 y37 y38 y39 y52 y53 y54 y55]
+  // [y40 y41 y42 y43 y56 y57 y58 y59]
+  // [y44 y45 y46 y47 y60 y61 y62 y63]
+
+#ifdef D64STATS
+  stop_meas(&ts_d);
+  start_meas(&ts_b);
+#endif
+
+
+  ibfly4_16_256(ytmp,ytmp+2,ytmp+4,ytmp+6,
+		y256,y256+2,y256+4,y256+6,
+		tw64a_256,tw64a_256+2,tw64a_256+4,
+		tw64b_256,tw64b_256+2,tw64b_256+4);
+  // [y0  y1  y2  y3  y4  y5  y6  y7]
+  // [y16 y17 y18 y19 y20 y21 y22 y23]
+  // [y32 y33 y34 y35 y36 y37 y38 y39]
+  // [y48 y49 y50 y51 y52 y53 y54 y55]
+
+  ibfly4_16_256(ytmp+1,ytmp+3,ytmp+5,ytmp+7,
+		y256+1,y256+3,y256+5,y256+7,
+		tw64a_256+1,tw64a_256+3,tw64a_256+5,
+		tw64b_256+1,tw64b_256+3,tw64b_256+5);
+  // [y8  y9  y10 y11 y12 y13 y14 y15]
+  // [y24 y25 y26 y27 y28 y29 y30 y31]
+  // [y40 y41 y42 y43 y44 y45 y46 y47]
+  // [y56 y57 y58 y59 y60 y61 y62 y63]
+
+
+#ifdef D64STATS
+  stop_meas(&ts_b);
+  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
+#endif
+
+
+  if (scale>0) {
+    y256[0]  = shiftright_int16_simd256(y256[0],3);
+    y256[1]  = shiftright_int16_simd256(y256[1],3);
+    y256[2]  = shiftright_int16_simd256(y256[2],3);
+    y256[3]  = shiftright_int16_simd256(y256[3],3);
+    y256[4]  = shiftright_int16_simd256(y256[4],3);
+    y256[5]  = shiftright_int16_simd256(y256[5],3);
+    y256[6]  = shiftright_int16_simd256(y256[6],3);
+    y256[7]  = shiftright_int16_simd256(y256[7],3);
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+#endif
+
+int16_t tw128[128] __attribute__((aligned(32))) = {  32767,0,32727,-1608,32609,-3212,32412,-4808,32137,-6393,31785,-7962,31356,-9512,30851,-11039,30272,-12540,29621,-14010,28897,-15447,28105,-16846,27244,-18205,26318,-19520,25329,-20788,24278,-22005,23169,-23170,22004,-24279,20787,-25330,19519,-26319,18204,-27245,16845,-28106,15446,-28898,14009,-29622,12539,-30273,11038,-30852,9511,-31357,7961,-31786,6392,-32138,4807,-32413,3211,-32610,1607,-32728,0,-32767,-1608,-32728,-3212,-32610,-4808,-32413,-6393,-32138,-7962,-31786,-9512,-31357,-11039,-30852,-12540,-30273,-14010,-29622,-15447,-28898,-16846,-28106,-18205,-27245,-19520,-26319,-20788,-25330,-22005,-24279,-23170,-23170,-24279,-22005,-25330,-20788,-26319,-19520,-27245,-18205,-28106,-16846,-28898,-15447,-29622,-14010,-30273,-12540,-30852,-11039,-31357,-9512,-31786,-7962,-32138,-6393,-32413,-4808,-32610,-3212,-32728,-1608};
+
+int16_t tw128a[128] __attribute__((aligned(32))) = { 32767,0,32727,1608,32609,3212,32412,4808,32137,6393,31785,7962,31356,9512,30851,11039,30272,12540,29621,14010,28897,15447,28105,16846,27244,18205,26318,19520,25329,20788,24278,22005,23169,23170,22004,24279,20787,25330,19519,26319,18204,27245,16845,28106,15446,28898,14009,29622,12539,30273,11038,30852,9511,31357,7961,31786,6392,32138,4807,32413,3211,32610,1607,32728,0,32767,-1608,32728,-3212,32610,-4808,32413,-6393,32138,-7962,31786,-9512,31357,-11039,30852,-12540,30273,-14010,29622,-15447,28898,-16846,28106,-18205,27245,-19520,26319,-20788,25330,-22005,24279,-23170,23170,-24279,22005,-25330,20788,-26319,19520,-27245,18205,-28106,16846,-28898,15447,-29622,14010,-30273,12540,-30852,11039,-31357,9512,-31786,7962,-32138,6393,-32413,4808,-32610,3212,-32728,1608};
+
+int16_t tw128b[128] __attribute__((aligned(32))) = {0,32767,-1608,32727,-3212,32609,-4808,32412,-6393,32137,-7962,31785,-9512,31356,-11039,30851,-12540,30272,-14010,29621,-15447,28897,-16846,28105,-18205,27244,-19520,26318,-20788,25329,-22005,24278,-23170,23169,-24279,22004,-25330,20787,-26319,19519,-27245,18204,-28106,16845,-28898,15446,-29622,14009,-30273,12539,-30852,11038,-31357,9511,-31786,7961,-32138,6392,-32413,4807,-32610,3211,-32728,1607,-32767,0,-32728,-1608,-32610,-3212,-32413,-4808,-32138,-6393,-31786,-7962,-31357,-9512,-30852,-11039,-30273,-12540,-29622,-14010,-28898,-15447,-28106,-16846,-27245,-18205,-26319,-19520,-25330,-20788,-24279,-22005,-23170,-23170,-22005,-24279,-20788,-25330,-19520,-26319,-18205,-27245,-16846,-28106,-15447,-28898,-14010,-29622,-12540,-30273,-11039,-30852,-9512,-31357,-7962,-31786,-6393,-32138,-4808,-32413,-3212,-32610,-1608,-32728};
+
+int16_t tw128c[128] __attribute__((aligned(32))) = {0,32767,1608,32727,3212,32609,4808,32412,6393,32137,7962,31785,9512,31356,11039,30851,12540,30272,14010,29621,15447,28897,16846,28105,18205,27244,19520,26318,20788,25329,22005,24278,23170,23169,24279,22004,25330,20787,26319,19519,27245,18204,28106,16845,28898,15446,29622,14009,30273,12539,30852,11038,31357,9511,31786,7961,32138,6392,32413,4807,32610,3211,32728,1607,32767,0,32728,-1608,32610,-3212,32413,-4808,32138,-6393,31786,-7962,31357,-9512,30852,-11039,30273,-12540,29622,-14010,28898,-15447,28106,-16846,27245,-18205,26319,-19520,25330,-20788,24279,-22005,23170,-23170,22005,-24279,20788,-25330,19520,-26319,18205,-27245,16846,-28106,15447,-28898,14010,-29622,12540,-30273,11039,-30852,9512,-31357,7962,-31786,6393,-32138,4808,-32413,3212,-32610,1608,-32728};
+
+#ifndef __AVX2__
+void dft128(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[64],*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[32],*tw128a_128p=(simd_q15_t *)tw128a,*tw128b_128p=(simd_q15_t *)tw128b,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+
+
+  transpose4_ooff(x64  ,xtmp,32);
+  transpose4_ooff(x64+2,xtmp+1,32);
+  transpose4_ooff(x64+4,xtmp+2,32);
+  transpose4_ooff(x64+6,xtmp+3,32);
+  transpose4_ooff(x64+8,xtmp+4,32);
+  transpose4_ooff(x64+10,xtmp+5,32);
+  transpose4_ooff(x64+12,xtmp+6,32);
+  transpose4_ooff(x64+14,xtmp+7,32);
+  transpose4_ooff(x64+16,xtmp+8,32);
+  transpose4_ooff(x64+18,xtmp+9,32);
+  transpose4_ooff(x64+20,xtmp+10,32);
+  transpose4_ooff(x64+22,xtmp+11,32);
+  transpose4_ooff(x64+24,xtmp+12,32);
+  transpose4_ooff(x64+26,xtmp+13,32);
+  transpose4_ooff(x64+28,xtmp+14,32);
+  transpose4_ooff(x64+30,xtmp+15,32);
+  transpose4_ooff(x64+32,xtmp+16,32);
+  transpose4_ooff(x64+34,xtmp+17,32);
+  transpose4_ooff(x64+36,xtmp+18,32);
+  transpose4_ooff(x64+38,xtmp+19,32);
+  transpose4_ooff(x64+40,xtmp+20,32);
+  transpose4_ooff(x64+42,xtmp+21,32);
+  transpose4_ooff(x64+44,xtmp+22,32);
+  transpose4_ooff(x64+46,xtmp+23,32);
+  transpose4_ooff(x64+48,xtmp+24,32);
+  transpose4_ooff(x64+50,xtmp+25,32);
+  transpose4_ooff(x64+52,xtmp+26,32);
+  transpose4_ooff(x64+54,xtmp+27,32);
+  transpose4_ooff(x64+56,xtmp+28,32);
+  transpose4_ooff(x64+58,xtmp+29,32);
+  transpose4_ooff(x64+60,xtmp+30,32);
+  transpose4_ooff(x64+62,xtmp+31,32);
+
+  dft64((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+16),1);
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("dft128a.m","dfta",ytmp,64,1,1);
+    LOG_M("dft128b.m","dftb",ytmp+16,64,1,1);
+  }
+  for (i=0; i<16; i++) {
+    bfly2_16(ytmpp,ytmpp+16,
+             y128p,y128p+16,
+             tw128a_128p,
+             tw128b_128p);
+    tw128a_128p++;
+    tw128b_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    y128[0] = mulhi_int16(y128[0],ONE_OVER_SQRT2_Q15_128);
+    y128[1] = mulhi_int16(y128[1],ONE_OVER_SQRT2_Q15_128);
+    y128[2] = mulhi_int16(y128[2],ONE_OVER_SQRT2_Q15_128);
+    y128[3] = mulhi_int16(y128[3],ONE_OVER_SQRT2_Q15_128);
+    y128[4] = mulhi_int16(y128[4],ONE_OVER_SQRT2_Q15_128);
+    y128[5] = mulhi_int16(y128[5],ONE_OVER_SQRT2_Q15_128);
+    y128[6] = mulhi_int16(y128[6],ONE_OVER_SQRT2_Q15_128);
+    y128[7] = mulhi_int16(y128[7],ONE_OVER_SQRT2_Q15_128);
+    y128[8] = mulhi_int16(y128[8],ONE_OVER_SQRT2_Q15_128);
+    y128[9] = mulhi_int16(y128[9],ONE_OVER_SQRT2_Q15_128);
+    y128[10] = mulhi_int16(y128[10],ONE_OVER_SQRT2_Q15_128);
+    y128[11] = mulhi_int16(y128[11],ONE_OVER_SQRT2_Q15_128);
+    y128[12] = mulhi_int16(y128[12],ONE_OVER_SQRT2_Q15_128);
+    y128[13] = mulhi_int16(y128[13],ONE_OVER_SQRT2_Q15_128);
+    y128[14] = mulhi_int16(y128[14],ONE_OVER_SQRT2_Q15_128);
+    y128[15] = mulhi_int16(y128[15],ONE_OVER_SQRT2_Q15_128);
+    y128[16] = mulhi_int16(y128[16],ONE_OVER_SQRT2_Q15_128);
+    y128[17] = mulhi_int16(y128[17],ONE_OVER_SQRT2_Q15_128);
+    y128[18] = mulhi_int16(y128[18],ONE_OVER_SQRT2_Q15_128);
+    y128[19] = mulhi_int16(y128[19],ONE_OVER_SQRT2_Q15_128);
+    y128[20] = mulhi_int16(y128[20],ONE_OVER_SQRT2_Q15_128);
+    y128[21] = mulhi_int16(y128[21],ONE_OVER_SQRT2_Q15_128);
+    y128[22] = mulhi_int16(y128[22],ONE_OVER_SQRT2_Q15_128);
+    y128[23] = mulhi_int16(y128[23],ONE_OVER_SQRT2_Q15_128);
+    y128[24] = mulhi_int16(y128[24],ONE_OVER_SQRT2_Q15_128);
+    y128[25] = mulhi_int16(y128[25],ONE_OVER_SQRT2_Q15_128);
+    y128[26] = mulhi_int16(y128[26],ONE_OVER_SQRT2_Q15_128);
+    y128[27] = mulhi_int16(y128[27],ONE_OVER_SQRT2_Q15_128);
+    y128[28] = mulhi_int16(y128[28],ONE_OVER_SQRT2_Q15_128);
+    y128[29] = mulhi_int16(y128[29],ONE_OVER_SQRT2_Q15_128);
+    y128[30] = mulhi_int16(y128[30],ONE_OVER_SQRT2_Q15_128);
+    y128[31] = mulhi_int16(y128[31],ONE_OVER_SQRT2_Q15_128);
+
+
+  }
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+     LOG_M("dft128out.m","dft128",y,128,1,1);
+     exit(-1);
+  }
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else // __AVX2__
+void dft128(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[16],*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[16],*y256=(simd256_q15_t*)y;
+  simd256_q15_t *tw128a_256p=(simd256_q15_t *)tw128a,*tw128b_256p=(simd256_q15_t *)tw128b,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+
+  transpose4_ooff_simd256(x256  ,xtmp,8);
+  transpose4_ooff_simd256(x256+2,xtmp+1,8);
+  transpose4_ooff_simd256(x256+4,xtmp+2,8);
+  transpose4_ooff_simd256(x256+6,xtmp+3,8);
+  transpose4_ooff_simd256(x256+8,xtmp+4,8);
+  transpose4_ooff_simd256(x256+10,xtmp+5,8);
+  transpose4_ooff_simd256(x256+12,xtmp+6,8);
+  transpose4_ooff_simd256(x256+14,xtmp+7,8);
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {  
+     LOG_M("dft128ina_256.m","dftina",xtmp,64,1,1);
+     LOG_M("dft128inb_256.m","dftinb",xtmp+8,64,1,1);
+  }
+
+  dft64((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1);
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {  
+    LOG_M("dft128outa_256.m","dftouta",ytmp,64,1,1);
+    LOG_M("dft128outb_256.m","dftoutb",ytmp+8,64,1,1);
+  }
+
+  for (i=0; i<8; i++) {
+    bfly2_16_256(ytmpp,ytmpp+8,
+		 y256p,y256p+8,
+		 tw128a_256p,
+		 tw128b_256p);
+    tw128a_256p++;
+    tw128b_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256);
+    y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256);
+    y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256);
+    y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256);
+    y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256);
+    y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256);
+    y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256);
+    y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256);
+    y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256);
+    y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256);
+    y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256);
+    y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256);
+    y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256);
+    y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256);
+    y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256);
+    y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256);
+
+  }
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {  
+   LOG_M("dft128.m","dft",y256,128,1,1);
+   exit(-1);
+  }
+}
+
+#endif
+
+#ifndef __AVX2__
+void idft128(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[64],*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[32],*tw128_128p=(simd_q15_t *)tw128,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+
+
+  transpose4_ooff(x64  ,xtmp,32);
+  transpose4_ooff(x64+2,xtmp+1,32);
+  transpose4_ooff(x64+4,xtmp+2,32);
+  transpose4_ooff(x64+6,xtmp+3,32);
+  transpose4_ooff(x64+8,xtmp+4,32);
+  transpose4_ooff(x64+10,xtmp+5,32);
+  transpose4_ooff(x64+12,xtmp+6,32);
+  transpose4_ooff(x64+14,xtmp+7,32);
+  transpose4_ooff(x64+16,xtmp+8,32);
+  transpose4_ooff(x64+18,xtmp+9,32);
+  transpose4_ooff(x64+20,xtmp+10,32);
+  transpose4_ooff(x64+22,xtmp+11,32);
+  transpose4_ooff(x64+24,xtmp+12,32);
+  transpose4_ooff(x64+26,xtmp+13,32);
+  transpose4_ooff(x64+28,xtmp+14,32);
+  transpose4_ooff(x64+30,xtmp+15,32);
+  transpose4_ooff(x64+32,xtmp+16,32);
+  transpose4_ooff(x64+34,xtmp+17,32);
+  transpose4_ooff(x64+36,xtmp+18,32);
+  transpose4_ooff(x64+38,xtmp+19,32);
+  transpose4_ooff(x64+40,xtmp+20,32);
+  transpose4_ooff(x64+42,xtmp+21,32);
+  transpose4_ooff(x64+44,xtmp+22,32);
+  transpose4_ooff(x64+46,xtmp+23,32);
+  transpose4_ooff(x64+48,xtmp+24,32);
+  transpose4_ooff(x64+50,xtmp+25,32);
+  transpose4_ooff(x64+52,xtmp+26,32);
+  transpose4_ooff(x64+54,xtmp+27,32);
+  transpose4_ooff(x64+56,xtmp+28,32);
+  transpose4_ooff(x64+58,xtmp+29,32);
+  transpose4_ooff(x64+60,xtmp+30,32);
+  transpose4_ooff(x64+62,xtmp+31,32);
+
+  idft64((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+16),1);
+
+
+  for (i=0; i<16; i++) {
+    ibfly2(ytmpp,ytmpp+16,
+           y128p,y128p+16,
+           tw128_128p);
+    tw128_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    y128[0]  = mulhi_int16(y128[0],ONE_OVER_SQRT2_Q15_128);
+    y128[1]  = mulhi_int16(y128[1],ONE_OVER_SQRT2_Q15_128);
+    y128[2]  = mulhi_int16(y128[2],ONE_OVER_SQRT2_Q15_128);
+    y128[3]  = mulhi_int16(y128[3],ONE_OVER_SQRT2_Q15_128);
+    y128[4]  = mulhi_int16(y128[4],ONE_OVER_SQRT2_Q15_128);
+    y128[5]  = mulhi_int16(y128[5],ONE_OVER_SQRT2_Q15_128);
+    y128[6]  = mulhi_int16(y128[6],ONE_OVER_SQRT2_Q15_128);
+    y128[7]  = mulhi_int16(y128[7],ONE_OVER_SQRT2_Q15_128);
+    y128[8]  = mulhi_int16(y128[8],ONE_OVER_SQRT2_Q15_128);
+    y128[9]  = mulhi_int16(y128[9],ONE_OVER_SQRT2_Q15_128);
+    y128[10] = mulhi_int16(y128[10],ONE_OVER_SQRT2_Q15_128);
+    y128[11] = mulhi_int16(y128[11],ONE_OVER_SQRT2_Q15_128);
+    y128[12] = mulhi_int16(y128[12],ONE_OVER_SQRT2_Q15_128);
+    y128[13] = mulhi_int16(y128[13],ONE_OVER_SQRT2_Q15_128);
+    y128[14] = mulhi_int16(y128[14],ONE_OVER_SQRT2_Q15_128);
+    y128[15] = mulhi_int16(y128[15],ONE_OVER_SQRT2_Q15_128);
+    y128[16] = mulhi_int16(y128[16],ONE_OVER_SQRT2_Q15_128);
+    y128[17] = mulhi_int16(y128[17],ONE_OVER_SQRT2_Q15_128);
+    y128[18] = mulhi_int16(y128[18],ONE_OVER_SQRT2_Q15_128);
+    y128[19] = mulhi_int16(y128[19],ONE_OVER_SQRT2_Q15_128);
+    y128[20] = mulhi_int16(y128[20],ONE_OVER_SQRT2_Q15_128);
+    y128[21] = mulhi_int16(y128[21],ONE_OVER_SQRT2_Q15_128);
+    y128[22] = mulhi_int16(y128[22],ONE_OVER_SQRT2_Q15_128);
+    y128[23] = mulhi_int16(y128[23],ONE_OVER_SQRT2_Q15_128);
+    y128[24] = mulhi_int16(y128[24],ONE_OVER_SQRT2_Q15_128);
+    y128[25] = mulhi_int16(y128[25],ONE_OVER_SQRT2_Q15_128);
+    y128[26] = mulhi_int16(y128[26],ONE_OVER_SQRT2_Q15_128);
+    y128[27] = mulhi_int16(y128[27],ONE_OVER_SQRT2_Q15_128);
+    y128[28] = mulhi_int16(y128[28],ONE_OVER_SQRT2_Q15_128);
+    y128[29] = mulhi_int16(y128[29],ONE_OVER_SQRT2_Q15_128);
+    y128[30] = mulhi_int16(y128[30],ONE_OVER_SQRT2_Q15_128);
+    y128[31] = mulhi_int16(y128[31],ONE_OVER_SQRT2_Q15_128);
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else // __AVX2__
+void idft128(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[16],*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[16],*y256=(simd256_q15_t*)y;
+  simd256_q15_t *tw128_256p=(simd256_q15_t *)tw128,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+
+
+  transpose4_ooff_simd256(x256  ,xtmp,8);
+  transpose4_ooff_simd256(x256+2,xtmp+1,8);
+  transpose4_ooff_simd256(x256+4,xtmp+2,8);
+  transpose4_ooff_simd256(x256+6,xtmp+3,8);
+  transpose4_ooff_simd256(x256+8,xtmp+4,8);
+  transpose4_ooff_simd256(x256+10,xtmp+5,8);
+  transpose4_ooff_simd256(x256+12,xtmp+6,8);
+  transpose4_ooff_simd256(x256+14,xtmp+7,8);
+
+  idft64((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1);
+
+
+  for (i=0; i<8; i++) {
+    ibfly2_256(ytmpp,ytmpp+8,
+	       y256p,y256p+8,
+	       tw128_256p);
+    tw128_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256);
+    y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256);
+    y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256);
+    y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256);
+    y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256);
+    y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256);
+    y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256);
+    y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256);
+    y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256);
+    y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256);
+    y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256);
+    y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256);
+    y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256);
+    y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256);
+    y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256);
+    y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256);
+
+  }
+
+}
+
+#endif
+
+int16_t tw256[384] __attribute__((aligned(32))) = {  32767,0,32757,-805,32727,-1608,32678,-2411,32609,-3212,32520,-4012,32412,-4808,32284,-5602,32137,-6393,31970,-7180,31785,-7962,31580,-8740,31356,-9512,31113,-10279,30851,-11039,30571,-11793,30272,-12540,29955,-13279,29621,-14010,29268,-14733,28897,-15447,28510,-16151,28105,-16846,27683,-17531,27244,-18205,26789,-18868,26318,-19520,25831,-20160,25329,-20788,24811,-21403,24278,-22005,23731,-22595,23169,-23170,22594,-23732,22004,-24279,21402,-24812,20787,-25330,20159,-25832,19519,-26319,18867,-26790,18204,-27245,17530,-27684,16845,-28106,16150,-28511,15446,-28898,14732,-29269,14009,-29622,13278,-29956,12539,-30273,11792,-30572,11038,-30852,10278,-31114,9511,-31357,8739,-31581,7961,-31786,7179,-31971,6392,-32138,5601,-32285,4807,-32413,4011,-32521,3211,-32610,2410,-32679,1607,-32728,804,-32758,
+                                                     32767,0,32727,-1608,32609,-3212,32412,-4808,32137,-6393,31785,-7962,31356,-9512,30851,-11039,30272,-12540,29621,-14010,28897,-15447,28105,-16846,27244,-18205,26318,-19520,25329,-20788,24278,-22005,23169,-23170,22004,-24279,20787,-25330,19519,-26319,18204,-27245,16845,-28106,15446,-28898,14009,-29622,12539,-30273,11038,-30852,9511,-31357,7961,-31786,6392,-32138,4807,-32413,3211,-32610,1607,-32728,0,-32767,-1608,-32728,-3212,-32610,-4808,-32413,-6393,-32138,-7962,-31786,-9512,-31357,-11039,-30852,-12540,-30273,-14010,-29622,-15447,-28898,-16846,-28106,-18205,-27245,-19520,-26319,-20788,-25330,-22005,-24279,-23170,-23170,-24279,-22005,-25330,-20788,-26319,-19520,-27245,-18205,-28106,-16846,-28898,-15447,-29622,-14010,-30273,-12540,-30852,-11039,-31357,-9512,-31786,-7962,-32138,-6393,-32413,-4808,-32610,-3212,-32728,-1608,
+                                                     32767,0,32678,-2411,32412,-4808,31970,-7180,31356,-9512,30571,-11793,29621,-14010,28510,-16151,27244,-18205,25831,-20160,24278,-22005,22594,-23732,20787,-25330,18867,-26790,16845,-28106,14732,-29269,12539,-30273,10278,-31114,7961,-31786,5601,-32285,3211,-32610,804,-32758,-1608,-32728,-4012,-32521,-6393,-32138,-8740,-31581,-11039,-30852,-13279,-29956,-15447,-28898,-17531,-27684,-19520,-26319,-21403,-24812,-23170,-23170,-24812,-21403,-26319,-19520,-27684,-17531,-28898,-15447,-29956,-13279,-30852,-11039,-31581,-8740,-32138,-6393,-32521,-4012,-32728,-1608,-32758,804,-32610,3211,-32285,5601,-31786,7961,-31114,10278,-30273,12539,-29269,14732,-28106,16845,-26790,18867,-25330,20787,-23732,22594,-22005,24278,-20160,25831,-18205,27244,-16151,28510,-14010,29621,-11793,30571,-9512,31356,-7180,31970,-4808,32412,-2411,32678
+                                                  };
+
+int16_t tw256a[384] __attribute__((aligned(32))) = { 32767,0,32757,804,32727,1607,32678,2410,32609,3211,32520,4011,32412,4807,32284,5601,32137,6392,31970,7179,31785,7961,31580,8739,31356,9511,31113,10278,30851,11038,30571,11792,30272,12539,29955,13278,29621,14009,29268,14732,28897,15446,28510,16150,28105,16845,27683,17530,27244,18204,26789,18867,26318,19519,25831,20159,25329,20787,24811,21402,24278,22004,23731,22594,23169,23169,22594,23731,22004,24278,21402,24811,20787,25329,20159,25831,19519,26318,18867,26789,18204,27244,17530,27683,16845,28105,16150,28510,15446,28897,14732,29268,14009,29621,13278,29955,12539,30272,11792,30571,11038,30851,10278,31113,9511,31356,8739,31580,7961,31785,7179,31970,6392,32137,5601,32284,4807,32412,4011,32520,3211,32609,2410,32678,1607,32727,804,32757,
+                                                     32767,0,32727,1607,32609,3211,32412,4807,32137,6392,31785,7961,31356,9511,30851,11038,30272,12539,29621,14009,28897,15446,28105,16845,27244,18204,26318,19519,25329,20787,24278,22004,23169,23169,22004,24278,20787,25329,19519,26318,18204,27244,16845,28105,15446,28897,14009,29621,12539,30272,11038,30851,9511,31356,7961,31785,6392,32137,4807,32412,3211,32609,1607,32727,0,32767,-1608,32727,-3212,32609,-4808,32412,-6393,32137,-7962,31785,-9512,31356,-11039,30851,-12540,30272,-14010,29621,-15447,28897,-16846,28105,-18205,27244,-19520,26318,-20788,25329,-22005,24278,-23170,23169,-24279,22004,-25330,20787,-26319,19519,-27245,18204,-28106,16845,-28898,15446,-29622,14009,-30273,12539,-30852,11038,-31357,9511,-31786,7961,-32138,6392,-32413,4807,-32610,3211,-32728,1607,
+                                                     32767,0,32678,2410,32412,4807,31970,7179,31356,9511,30571,11792,29621,14009,28510,16150,27244,18204,25831,20159,24278,22004,22594,23731,20787,25329,18867,26789,16845,28105,14732,29268,12539,30272,10278,31113,7961,31785,5601,32284,3211,32609,804,32757,-1608,32727,-4012,32520,-6393,32137,-8740,31580,-11039,30851,-13279,29955,-15447,28897,-17531,27683,-19520,26318,-21403,24811,-23170,23169,-24812,21402,-26319,19519,-27684,17530,-28898,15446,-29956,13278,-30852,11038,-31581,8739,-32138,6392,-32521,4011,-32728,1607,-32758,-805,-32610,-3212,-32285,-5602,-31786,-7962,-31114,-10279,-30273,-12540,-29269,-14733,-28106,-16846,-26790,-18868,-25330,-20788,-23732,-22595,-22005,-24279,-20160,-25832,-18205,-27245,-16151,-28511,-14010,-29622,-11793,-30572,-9512,-31357,-7180,-31971,-4808,-32413,-2411,-32679
+                                                   };
+
+int16_t tw256b[384] __attribute__((aligned(32))) = {0,32767,-805,32757,-1608,32727,-2411,32678,-3212,32609,-4012,32520,-4808,32412,-5602,32284,-6393,32137,-7180,31970,-7962,31785,-8740,31580,-9512,31356,-10279,31113,-11039,30851,-11793,30571,-12540,30272,-13279,29955,-14010,29621,-14733,29268,-15447,28897,-16151,28510,-16846,28105,-17531,27683,-18205,27244,-18868,26789,-19520,26318,-20160,25831,-20788,25329,-21403,24811,-22005,24278,-22595,23731,-23170,23169,-23732,22594,-24279,22004,-24812,21402,-25330,20787,-25832,20159,-26319,19519,-26790,18867,-27245,18204,-27684,17530,-28106,16845,-28511,16150,-28898,15446,-29269,14732,-29622,14009,-29956,13278,-30273,12539,-30572,11792,-30852,11038,-31114,10278,-31357,9511,-31581,8739,-31786,7961,-31971,7179,-32138,6392,-32285,5601,-32413,4807,-32521,4011,-32610,3211,-32679,2410,-32728,1607,-32758,804,
+                                                    0,32767,-1608,32727,-3212,32609,-4808,32412,-6393,32137,-7962,31785,-9512,31356,-11039,30851,-12540,30272,-14010,29621,-15447,28897,-16846,28105,-18205,27244,-19520,26318,-20788,25329,-22005,24278,-23170,23169,-24279,22004,-25330,20787,-26319,19519,-27245,18204,-28106,16845,-28898,15446,-29622,14009,-30273,12539,-30852,11038,-31357,9511,-31786,7961,-32138,6392,-32413,4807,-32610,3211,-32728,1607,-32767,0,-32728,-1608,-32610,-3212,-32413,-4808,-32138,-6393,-31786,-7962,-31357,-9512,-30852,-11039,-30273,-12540,-29622,-14010,-28898,-15447,-28106,-16846,-27245,-18205,-26319,-19520,-25330,-20788,-24279,-22005,-23170,-23170,-22005,-24279,-20788,-25330,-19520,-26319,-18205,-27245,-16846,-28106,-15447,-28898,-14010,-29622,-12540,-30273,-11039,-30852,-9512,-31357,-7962,-31786,-6393,-32138,-4808,-32413,-3212,-32610,-1608,-32728,
+                                                    0,32767,-2411,32678,-4808,32412,-7180,31970,-9512,31356,-11793,30571,-14010,29621,-16151,28510,-18205,27244,-20160,25831,-22005,24278,-23732,22594,-25330,20787,-26790,18867,-28106,16845,-29269,14732,-30273,12539,-31114,10278,-31786,7961,-32285,5601,-32610,3211,-32758,804,-32728,-1608,-32521,-4012,-32138,-6393,-31581,-8740,-30852,-11039,-29956,-13279,-28898,-15447,-27684,-17531,-26319,-19520,-24812,-21403,-23170,-23170,-21403,-24812,-19520,-26319,-17531,-27684,-15447,-28898,-13279,-29956,-11039,-30852,-8740,-31581,-6393,-32138,-4012,-32521,-1608,-32728,804,-32758,3211,-32610,5601,-32285,7961,-31786,10278,-31114,12539,-30273,14732,-29269,16845,-28106,18867,-26790,20787,-25330,22594,-23732,24278,-22005,25831,-20160,27244,-18205,28510,-16151,29621,-14010,30571,-11793,31356,-9512,31970,-7180,32412,-4808,32678,-2411
+                                                   };
+#ifndef __AVX2__
+void dft256(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[64],ytmp[64],*tw256a_128p=(simd_q15_t *)tw256a,*tw256b_128p=(simd_q15_t *)tw256b,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+
+#ifdef D256STATS
+  time_stats_t ts_t,ts_d,ts_b;
+
+  reset_meas(&ts_t);
+  reset_meas(&ts_d);
+  reset_meas(&ts_b);
+  start_meas(&ts_t);
+#endif
+  /*
+  for (i=0,j=0;i<64;i+=4,j++) {
+    transpose16_ooff(x128+i,xtmp+j,16);
+  }
+  */
+  transpose16_ooff(x128+0,xtmp+0,16);
+  transpose16_ooff(x128+4,xtmp+1,16);
+  transpose16_ooff(x128+8,xtmp+2,16);
+  transpose16_ooff(x128+12,xtmp+3,16);
+  transpose16_ooff(x128+16,xtmp+4,16);
+  transpose16_ooff(x128+20,xtmp+5,16);
+  transpose16_ooff(x128+24,xtmp+6,16);
+  transpose16_ooff(x128+28,xtmp+7,16);
+  transpose16_ooff(x128+32,xtmp+8,16);
+  transpose16_ooff(x128+36,xtmp+9,16);
+  transpose16_ooff(x128+40,xtmp+10,16);
+  transpose16_ooff(x128+44,xtmp+11,16);
+  transpose16_ooff(x128+48,xtmp+12,16);
+  transpose16_ooff(x128+52,xtmp+13,16);
+  transpose16_ooff(x128+56,xtmp+14,16);
+  transpose16_ooff(x128+60,xtmp+15,16);
+
+#ifdef D256STATS
+  stop_meas(&ts_t);
+  start_meas(&ts_d);
+#endif
+
+  dft64((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  dft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1);
+  dft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
+  dft64((int16_t*)(xtmp+48),(int16_t*)(ytmp+48),1);
+
+#ifdef D256STATS
+  stop_meas(&ts_d);
+  start_meas(&ts_b);
+#endif
+
+  for (i=0; i<16; i+=4) {
+    bfly4_16(ytmpp,ytmpp+16,ytmpp+32,ytmpp+48,
+             y128p,y128p+16,y128p+32,y128p+48,
+             tw256a_128p,tw256a_128p+16,tw256a_128p+32,
+             tw256b_128p,tw256b_128p+16,tw256b_128p+32);
+    bfly4_16(ytmpp+1,ytmpp+17,ytmpp+33,ytmpp+49,
+             y128p+1,y128p+17,y128p+33,y128p+49,
+             tw256a_128p+1,tw256a_128p+17,tw256a_128p+33,
+             tw256b_128p+1,tw256b_128p+17,tw256b_128p+33);
+    bfly4_16(ytmpp+2,ytmpp+18,ytmpp+34,ytmpp+50,
+             y128p+2,y128p+18,y128p+34,y128p+50,
+             tw256a_128p+2,tw256a_128p+18,tw256a_128p+34,
+             tw256b_128p+2,tw256b_128p+18,tw256b_128p+34);
+    bfly4_16(ytmpp+3,ytmpp+19,ytmpp+35,ytmpp+51,
+             y128p+3,y128p+19,y128p+35,y128p+51,
+             tw256a_128p+3,tw256a_128p+19,tw256a_128p+35,
+             tw256b_128p+3,tw256b_128p+19,tw256b_128p+35);
+    tw256a_128p+=4;
+    tw256b_128p+=4;
+    y128p+=4;
+    ytmpp+=4;
+  }
+
+#ifdef D256STATS
+  stop_meas(&ts_b);
+  printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff);
+#endif
+
+  if (scale>0) {
+
+    for (i=0; i<4; i++) {
+      y128[0]  = shiftright_int16(y128[0],1);
+      y128[1]  = shiftright_int16(y128[1],1);
+      y128[2]  = shiftright_int16(y128[2],1);
+      y128[3]  = shiftright_int16(y128[3],1);
+      y128[4]  = shiftright_int16(y128[4],1);
+      y128[5]  = shiftright_int16(y128[5],1);
+      y128[6]  = shiftright_int16(y128[6],1);
+      y128[7]  = shiftright_int16(y128[7],1);
+      y128[8]  = shiftright_int16(y128[8],1);
+      y128[9]  = shiftright_int16(y128[9],1);
+      y128[10] = shiftright_int16(y128[10],1);
+      y128[11] = shiftright_int16(y128[11],1);
+      y128[12] = shiftright_int16(y128[12],1);
+      y128[13] = shiftright_int16(y128[13],1);
+      y128[14] = shiftright_int16(y128[14],1);
+      y128[15] = shiftright_int16(y128[15],1);
+
+      y128+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+
+
+void idft256(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[64],ytmp[64],*tw256_128p=(simd_q15_t *)tw256,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<64; i+=4,j++) {
+    transpose16_ooff(x128+i,xtmp+j,16);
+  }
+
+
+  idft64((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  idft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1);
+  idft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
+  idft64((int16_t*)(xtmp+48),(int16_t*)(ytmp+48),1);
+
+  for (i=0; i<16; i++) {
+    ibfly4(ytmpp,ytmpp+16,ytmpp+32,ytmpp+48,
+           y128p,y128p+16,y128p+32,y128p+48,
+           tw256_128p,tw256_128p+16,tw256_128p+32);
+    tw256_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<4; i++) {
+      y128[0]  = shiftright_int16(y128[0],1);
+      y128[1]  = shiftright_int16(y128[1],1);
+      y128[2]  = shiftright_int16(y128[2],1);
+      y128[3]  = shiftright_int16(y128[3],1);
+      y128[4]  = shiftright_int16(y128[4],1);
+      y128[5]  = shiftright_int16(y128[5],1);
+      y128[6]  = shiftright_int16(y128[6],1);
+      y128[7]  = shiftright_int16(y128[7],1);
+      y128[8]  = shiftright_int16(y128[8],1);
+      y128[9]  = shiftright_int16(y128[9],1);
+      y128[10] = shiftright_int16(y128[10],1);
+      y128[11] = shiftright_int16(y128[11],1);
+      y128[12] = shiftright_int16(y128[12],1);
+      y128[13] = shiftright_int16(y128[13],1);
+      y128[14] = shiftright_int16(y128[14],1);
+      y128[15] = shiftright_int16(y128[15],1);
+
+      y128+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else //__AVX2__
+
+void dft256(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[32],ytmp[32],*tw256a_256p=(simd256_q15_t *)tw256a,*tw256b_256p=(simd256_q15_t *)tw256b,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+
+  transpose16_ooff_simd256(x256+0,xtmp+0,8);
+  transpose16_ooff_simd256(x256+4,xtmp+1,8);
+  transpose16_ooff_simd256(x256+8,xtmp+2,8);
+  transpose16_ooff_simd256(x256+12,xtmp+3,8);
+  transpose16_ooff_simd256(x256+16,xtmp+4,8);
+  transpose16_ooff_simd256(x256+20,xtmp+5,8);
+  transpose16_ooff_simd256(x256+24,xtmp+6,8);
+  transpose16_ooff_simd256(x256+28,xtmp+7,8);
+  /*
+  char vname[10];
+  for (i=0;i<32;i++) {
+    sprintf(vname,"xtmp%d",i);
+    print_shorts256(vname,(int16_t*)(xtmp+i));
+  }
+  exit(-1);*/
+
+  dft64((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  dft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1);
+  dft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1);
+  dft64((int16_t*)(xtmp+24),(int16_t*)(ytmp+24),1);
+
+
+  bfly4_16_256(ytmpp,ytmpp+8,ytmpp+16,ytmpp+24,
+	       y256p,y256p+8,y256p+16,y256p+24,
+	       tw256a_256p,tw256a_256p+8,tw256a_256p+16,
+	       tw256b_256p,tw256b_256p+8,tw256b_256p+16);
+  bfly4_16_256(ytmpp+1,ytmpp+9,ytmpp+17,ytmpp+25,
+	       y256p+1,y256p+9,y256p+17,y256p+25,
+	       tw256a_256p+1,tw256a_256p+9,tw256a_256p+17,
+	       tw256b_256p+1,tw256b_256p+9,tw256b_256p+17);
+  bfly4_16_256(ytmpp+2,ytmpp+10,ytmpp+18,ytmpp+26,
+	       y256p+2,y256p+10,y256p+18,y256p+26,
+	       tw256a_256p+2,tw256a_256p+10,tw256a_256p+18,
+	       tw256b_256p+2,tw256b_256p+10,tw256b_256p+18);
+  bfly4_16_256(ytmpp+3,ytmpp+11,ytmpp+19,ytmpp+27,
+	       y256p+3,y256p+11,y256p+19,y256p+27,
+	       tw256a_256p+3,tw256a_256p+11,tw256a_256p+19,
+	       tw256b_256p+3,tw256b_256p+11,tw256b_256p+19);
+  bfly4_16_256(ytmpp+4,ytmpp+12,ytmpp+20,ytmpp+28,
+	       y256p+4,y256p+12,y256p+20,y256p+28,
+	       tw256a_256p+4,tw256a_256p+12,tw256a_256p+20,
+	       tw256b_256p+4,tw256b_256p+12,tw256b_256p+20);
+  bfly4_16_256(ytmpp+5,ytmpp+13,ytmpp+21,ytmpp+29,
+	       y256p+5,y256p+13,y256p+21,y256p+29,
+	       tw256a_256p+5,tw256a_256p+13,tw256a_256p+21,
+	       tw256b_256p+5,tw256b_256p+13,tw256b_256p+21);
+  bfly4_16_256(ytmpp+6,ytmpp+14,ytmpp+22,ytmpp+30,
+	       y256p+6,y256p+14,y256p+22,y256p+30,
+	       tw256a_256p+6,tw256a_256p+14,tw256a_256p+22,
+	       tw256b_256p+6,tw256b_256p+14,tw256b_256p+22);
+  bfly4_16_256(ytmpp+7,ytmpp+15,ytmpp+23,ytmpp+31,
+	       y256p+7,y256p+15,y256p+23,y256p+31,
+	       tw256a_256p+7,tw256a_256p+15,tw256a_256p+23,
+	       tw256b_256p+7,tw256b_256p+15,tw256b_256p+23);
+
+  if (scale>0) {
+
+    for (i=0; i<2; i++) {
+      y256[0]  = shiftright_int16_simd256(y256[0],1);
+      y256[1]  = shiftright_int16_simd256(y256[1],1);
+      y256[2]  = shiftright_int16_simd256(y256[2],1);
+      y256[3]  = shiftright_int16_simd256(y256[3],1);
+      y256[4]  = shiftright_int16_simd256(y256[4],1);
+      y256[5]  = shiftright_int16_simd256(y256[5],1);
+      y256[6]  = shiftright_int16_simd256(y256[6],1);
+      y256[7]  = shiftright_int16_simd256(y256[7],1);
+      y256[8]  = shiftright_int16_simd256(y256[8],1);
+      y256[9]  = shiftright_int16_simd256(y256[9],1);
+      y256[10] = shiftright_int16_simd256(y256[10],1);
+      y256[11] = shiftright_int16_simd256(y256[11],1);
+      y256[12] = shiftright_int16_simd256(y256[12],1);
+      y256[13] = shiftright_int16_simd256(y256[13],1);
+      y256[14] = shiftright_int16_simd256(y256[14],1);
+      y256[15] = shiftright_int16_simd256(y256[15],1);
+
+      y256+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft256(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[32],ytmp[32],*tw256_256p=(simd256_q15_t *)tw256,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+
+  transpose16_ooff_simd256(x256+0,xtmp+0,8);
+  transpose16_ooff_simd256(x256+4,xtmp+1,8);
+  transpose16_ooff_simd256(x256+8,xtmp+2,8);
+  transpose16_ooff_simd256(x256+12,xtmp+3,8);
+  transpose16_ooff_simd256(x256+16,xtmp+4,8);
+  transpose16_ooff_simd256(x256+20,xtmp+5,8);
+  transpose16_ooff_simd256(x256+24,xtmp+6,8);
+  transpose16_ooff_simd256(x256+28,xtmp+7,8);
+  
+  idft64((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  idft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1);
+  idft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1);
+  idft64((int16_t*)(xtmp+24),(int16_t*)(ytmp+24),1);
+  
+  
+  ibfly4_256(ytmpp,ytmpp+8,ytmpp+16,ytmpp+24,
+	     y256p,y256p+8,y256p+16,y256p+24,
+	     tw256_256p,tw256_256p+8,tw256_256p+16);
+
+  ibfly4_256(ytmpp+1,ytmpp+9,ytmpp+17,ytmpp+25,
+	     y256p+1,y256p+9,y256p+17,y256p+25,
+	     tw256_256p+1,tw256_256p+9,tw256_256p+17);
+
+  ibfly4_256(ytmpp+2,ytmpp+10,ytmpp+18,ytmpp+26,
+	     y256p+2,y256p+10,y256p+18,y256p+26,
+	     tw256_256p+2,tw256_256p+10,tw256_256p+18);
+
+  ibfly4_256(ytmpp+3,ytmpp+11,ytmpp+19,ytmpp+27,
+	     y256p+3,y256p+11,y256p+19,y256p+27,
+	     tw256_256p+3,tw256_256p+11,tw256_256p+19);
+
+  ibfly4_256(ytmpp+4,ytmpp+12,ytmpp+20,ytmpp+28,
+	     y256p+4,y256p+12,y256p+20,y256p+28,
+	     tw256_256p+4,tw256_256p+12,tw256_256p+20);
+
+  ibfly4_256(ytmpp+5,ytmpp+13,ytmpp+21,ytmpp+29,
+	     y256p+5,y256p+13,y256p+21,y256p+29,
+	     tw256_256p+5,tw256_256p+13,tw256_256p+21);
+
+  ibfly4_256(ytmpp+6,ytmpp+14,ytmpp+22,ytmpp+30,
+	     y256p+6,y256p+14,y256p+22,y256p+30,
+	     tw256_256p+6,tw256_256p+14,tw256_256p+22);
+
+  ibfly4_256(ytmpp+7,ytmpp+15,ytmpp+23,ytmpp+31,
+	     y256p+7,y256p+15,y256p+23,y256p+31,
+	     tw256_256p+7,tw256_256p+15,tw256_256p+23);
+
+  
+  if (scale>0) {
+
+    for (i=0; i<2; i++) {
+      y256[0]  = shiftright_int16_simd256(y256[0],1);
+      y256[1]  = shiftright_int16_simd256(y256[1],1);
+      y256[2]  = shiftright_int16_simd256(y256[2],1);
+      y256[3]  = shiftright_int16_simd256(y256[3],1);
+      y256[4]  = shiftright_int16_simd256(y256[4],1);
+      y256[5]  = shiftright_int16_simd256(y256[5],1);
+      y256[6]  = shiftright_int16_simd256(y256[6],1);
+      y256[7]  = shiftright_int16_simd256(y256[7],1);
+      y256[8]  = shiftright_int16_simd256(y256[8],1);
+      y256[9]  = shiftright_int16_simd256(y256[9],1);
+      y256[10] = shiftright_int16_simd256(y256[10],1);
+      y256[11] = shiftright_int16_simd256(y256[11],1);
+      y256[12] = shiftright_int16_simd256(y256[12],1);
+      y256[13] = shiftright_int16_simd256(y256[13],1);
+      y256[14] = shiftright_int16_simd256(y256[14],1);
+      y256[15] = shiftright_int16_simd256(y256[15],1);
+
+      y256+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#endif
+int16_t tw512[512] __attribute__((aligned(32))) = {
+  32767,0,32764,-403,32757,-805,32744,-1207,32727,-1608,32705,-2010,32678,-2411,32646,-2812,32609,-3212,32567,-3612,32520,-4012,32468,-4410,32412,-4808,32350,-5206,32284,-5602,32213,-5998,32137,-6393,32056,-6787,31970,-7180,31880,-7572,31785,-7962,31684,-8352,31580,-8740,31470,-9127,31356,-9512,31236,-9896,31113,-10279,30984,-10660,30851,-11039,30713,-11417,30571,-11793,30424,-12167,30272,-12540,30116,-12910,29955,-13279,29790,-13646,29621,-14010,29446,-14373,29268,-14733,29085,-15091,28897,-15447,28706,-15800,28510,-16151,28309,-16500,28105,-16846,27896,-17190,27683,-17531,27466,-17869,27244,-18205,27019,-18538,26789,-18868,26556,-19195,26318,-19520,26077,-19841,25831,-20160,25582,-20475,25329,-20788,25072,-21097,24811,-21403,24546,-21706,24278,-22005,24006,-22302,23731,-22595,23452,-22884,23169,-23170,22883,-23453,22594,-23732,22301,-24007,22004,-24279,21705,-24547,21402,-24812,21096,-25073,20787,-25330,20474,-25583,20159,-25832,19840,-26078,19519,-26319,19194,-26557,18867,-26790,18537,-27020,18204,-27245,17868,-27467,17530,-27684,17189,-27897,16845,-28106,16499,-28310,16150,-28511,15799,-28707,15446,-28898,15090,-29086,14732,-29269,14372,-29447,14009,-29622,13645,-29791,13278,-29956,12909,-30117,12539,-30273,12166,-30425,11792,-30572,11416,-30714,11038,-30852,10659,-30985,10278,-31114,9895,-31237,9511,-31357,9126,-31471,8739,-31581,8351,-31685,7961,-31786,7571,-31881,7179,-31971,6786,-32057,6392,-32138,5997,-32214,5601,-32285,5205,-32351,4807,-32413,4409,-32469,4011,-32521,3611,-32568,3211,-32610,2811,-32647,2410,-32679,2009,-32706,1607,-32728,1206,-32745,804,-32758,402,-32765,0,-32767,-403,-32765,-805,-32758,-1207,-32745,-1608,-32728,-2010,-32706,-2411,-32679,-2812,-32647,-3212,-32610,-3612,-32568,-4012,-32521,-4410,-32469,-4808,-32413,-5206,-32351,-5602,-32285,-5998,-32214,-6393,-32138,-6787,-32057,-7180,-31971,-7572,-31881,-7962,-31786,-8352,-31685,-8740,-31581,-9127,-31471,-9512,-31357,-9896,-31237,-10279,-31114,-10660,-30985,-11039,-30852,-11417,-30714,-11793,-30572,-12167,-30425,-12540,-30273,-12910,-30117,-13279,-29956,-13646,-29791,-14010,-29622,-14373,-29447,-14733,-29269,-15091,-29086,-15447,-28898,-15800,-28707,-16151,-28511,-16500,-28310,-16846,-28106,-17190,-27897,-17531,-27684,-17869,-27467,-18205,-27245,-18538,-27020,-18868,-26790,-19195,-26557,-19520,-26319,-19841,-26078,-20160,-25832,-20475,-25583,-20788,-25330,-21097,-25073,-21403,-24812,-21706,-24547,-22005,-24279,-22302,-24007,-22595,-23732,-22884,-23453,-23170,-23170,-23453,-22884,-23732,-22595,-24007,-22302,-24279,-22005,-24547,-21706,-24812,-21403,-25073,-21097,-25330,-20788,-25583,-20475,-25832,-20160,-26078,-19841,-26319,-19520,-26557,-19195,-26790,-18868,-27020,-18538,-27245,-18205,-27467,-17869,-27684,-17531,-27897,-17190,-28106,-16846,-28310,-16500,-28511,-16151,-28707,-15800,-28898,-15447,-29086,-15091,-29269,-14733,-29447,-14373,-29622,-14010,-29791,-13646,-29956,-13279,-30117,-12910,-30273,-12540,-30425,-12167,-30572,-11793,-30714,-11417,-30852,-11039,-30985,-10660,-31114,-10279,-31237,-9896,-31357,-9512,-31471,-9127,-31581,-8740,-31685,-8352,-31786,-7962,-31881,-7572,-31971,-7180,-32057,-6787,-32138,-6393,-32214,-5998,-32285,-5602,-32351,-5206,-32413,-4808,-32469,-4410,-32521,-4012,-32568,-3612,-32610,-3212,-32647,-2812,-32679,-2411,-32706,-2010,-32728,-1608,-32745,-1207,-32758,-805,-32765,-403
+};
+
+int16_t tw512a[512] __attribute__((aligned(32))) = {
+  32767,0,32764,403,32757,805,32744,1207,32727,1608,32705,2010,32678,2411,32646,2812,32609,3212,32567,3612,32520,4012,32468,4410,32412,4808,32350,5206,32284,5602,32213,5998,32137,6393,32056,6787,31970,7180,31880,7572,31785,7962,31684,8352,31580,8740,31470,9127,31356,9512,31236,9896,31113,10279,30984,10660,30851,11039,30713,11417,30571,11793,30424,12167,30272,12540,30116,12910,29955,13279,29790,13646,29621,14010,29446,14373,29268,14733,29085,15091,28897,15447,28706,15800,28510,16151,28309,16500,28105,16846,27896,17190,27683,17531,27466,17869,27244,18205,27019,18538,26789,18868,26556,19195,26318,19520,26077,19841,25831,20160,25582,20475,25329,20788,25072,21097,24811,21403,24546,21706,24278,22005,24006,22302,23731,22595,23452,22884,23169,23170,22883,23453,22594,23732,22301,24007,22004,24279,21705,24547,21402,24812,21096,25073,20787,25330,20474,25583,20159,25832,19840,26078,19519,26319,19194,26557,18867,26790,18537,27020,18204,27245,17868,27467,17530,27684,17189,27897,16845,28106,16499,28310,16150,28511,15799,28707,15446,28898,15090,29086,14732,29269,14372,29447,14009,29622,13645,29791,13278,29956,12909,30117,12539,30273,12166,30425,11792,30572,11416,30714,11038,30852,10659,30985,10278,31114,9895,31237,9511,31357,9126,31471,8739,31581,8351,31685,7961,31786,7571,31881,7179,31971,6786,32057,6392,32138,5997,32214,5601,32285,5205,32351,4807,32413,4409,32469,4011,32521,3611,32568,3211,32610,2811,32647,2410,32679,2009,32706,1607,32728,1206,32745,804,32758,402,32765,0,32767,-403,32765,-805,32758,-1207,32745,-1608,32728,-2010,32706,-2411,32679,-2812,32647,-3212,32610,-3612,32568,-4012,32521,-4410,32469,-4808,32413,-5206,32351,-5602,32285,-5998,32214,-6393,32138,-6787,32057,-7180,31971,-7572,31881,-7962,31786,-8352,31685,-8740,31581,-9127,31471,-9512,31357,-9896,31237,-10279,31114,-10660,30985,-11039,30852,-11417,30714,-11793,30572,-12167,30425,-12540,30273,-12910,30117,-13279,29956,-13646,29791,-14010,29622,-14373,29447,-14733,29269,-15091,29086,-15447,28898,-15800,28707,-16151,28511,-16500,28310,-16846,28106,-17190,27897,-17531,27684,-17869,27467,-18205,27245,-18538,27020,-18868,26790,-19195,26557,-19520,26319,-19841,26078,-20160,25832,-20475,25583,-20788,25330,-21097,25073,-21403,24812,-21706,24547,-22005,24279,-22302,24007,-22595,23732,-22884,23453,-23170,23170,-23453,22884,-23732,22595,-24007,22302,-24279,22005,-24547,21706,-24812,21403,-25073,21097,-25330,20788,-25583,20475,-25832,20160,-26078,19841,-26319,19520,-26557,19195,-26790,18868,-27020,18538,-27245,18205,-27467,17869,-27684,17531,-27897,17190,-28106,16846,-28310,16500,-28511,16151,-28707,15800,-28898,15447,-29086,15091,-29269,14733,-29447,14373,-29622,14010,-29791,13646,-29956,13279,-30117,12910,-30273,12540,-30425,12167,-30572,11793,-30714,11417,-30852,11039,-30985,10660,-31114,10279,-31237,9896,-31357,9512,-31471,9127,-31581,8740,-31685,8352,-31786,7962,-31881,7572,-31971,7180,-32057,6787,-32138,6393,-32214,5998,-32285,5602,-32351,5206,-32413,4808,-32469,4410,-32521,4012,-32568,3612,-32610,3212,-32647,2812,-32679,2411,-32706,2010,-32728,1608,-32745,1207,-32758,805,-32765,403
+};
+
+
+
+int16_t tw512b[512] __attribute__((aligned(32))) = {
+  0,32767,-403,32764,-805,32757,-1207,32744,-1608,32727,-2010,32705,-2411,32678,-2812,32646,-3212,32609,-3612,32567,-4012,32520,-4410,32468,-4808,32412,-5206,32350,-5602,32284,-5998,32213,-6393,32137,-6787,32056,-7180,31970,-7572,31880,-7962,31785,-8352,31684,-8740,31580,-9127,31470,-9512,31356,-9896,31236,-10279,31113,-10660,30984,-11039,30851,-11417,30713,-11793,30571,-12167,30424,-12540,30272,-12910,30116,-13279,29955,-13646,29790,-14010,29621,-14373,29446,-14733,29268,-15091,29085,-15447,28897,-15800,28706,-16151,28510,-16500,28309,-16846,28105,-17190,27896,-17531,27683,-17869,27466,-18205,27244,-18538,27019,-18868,26789,-19195,26556,-19520,26318,-19841,26077,-20160,25831,-20475,25582,-20788,25329,-21097,25072,-21403,24811,-21706,24546,-22005,24278,-22302,24006,-22595,23731,-22884,23452,-23170,23169,-23453,22883,-23732,22594,-24007,22301,-24279,22004,-24547,21705,-24812,21402,-25073,21096,-25330,20787,-25583,20474,-25832,20159,-26078,19840,-26319,19519,-26557,19194,-26790,18867,-27020,18537,-27245,18204,-27467,17868,-27684,17530,-27897,17189,-28106,16845,-28310,16499,-28511,16150,-28707,15799,-28898,15446,-29086,15090,-29269,14732,-29447,14372,-29622,14009,-29791,13645,-29956,13278,-30117,12909,-30273,12539,-30425,12166,-30572,11792,-30714,11416,-30852,11038,-30985,10659,-31114,10278,-31237,9895,-31357,9511,-31471,9126,-31581,8739,-31685,8351,-31786,7961,-31881,7571,-31971,7179,-32057,6786,-32138,6392,-32214,5997,-32285,5601,-32351,5205,-32413,4807,-32469,4409,-32521,4011,-32568,3611,-32610,3211,-32647,2811,-32679,2410,-32706,2009,-32728,1607,-32745,1206,-32758,804,-32765,402,-32767,0,-32765,-403,-32758,-805,-32745,-1207,-32728,-1608,-32706,-2010,-32679,-2411,-32647,-2812,-32610,-3212,-32568,-3612,-32521,-4012,-32469,-4410,-32413,-4808,-32351,-5206,-32285,-5602,-32214,-5998,-32138,-6393,-32057,-6787,-31971,-7180,-31881,-7572,-31786,-7962,-31685,-8352,-31581,-8740,-31471,-9127,-31357,-9512,-31237,-9896,-31114,-10279,-30985,-10660,-30852,-11039,-30714,-11417,-30572,-11793,-30425,-12167,-30273,-12540,-30117,-12910,-29956,-13279,-29791,-13646,-29622,-14010,-29447,-14373,-29269,-14733,-29086,-15091,-28898,-15447,-28707,-15800,-28511,-16151,-28310,-16500,-28106,-16846,-27897,-17190,-27684,-17531,-27467,-17869,-27245,-18205,-27020,-18538,-26790,-18868,-26557,-19195,-26319,-19520,-26078,-19841,-25832,-20160,-25583,-20475,-25330,-20788,-25073,-21097,-24812,-21403,-24547,-21706,-24279,-22005,-24007,-22302,-23732,-22595,-23453,-22884,-23170,-23170,-22884,-23453,-22595,-23732,-22302,-24007,-22005,-24279,-21706,-24547,-21403,-24812,-21097,-25073,-20788,-25330,-20475,-25583,-20160,-25832,-19841,-26078,-19520,-26319,-19195,-26557,-18868,-26790,-18538,-27020,-18205,-27245,-17869,-27467,-17531,-27684,-17190,-27897,-16846,-28106,-16500,-28310,-16151,-28511,-15800,-28707,-15447,-28898,-15091,-29086,-14733,-29269,-14373,-29447,-14010,-29622,-13646,-29791,-13279,-29956,-12910,-30117,-12540,-30273,-12167,-30425,-11793,-30572,-11417,-30714,-11039,-30852,-10660,-30985,-10279,-31114,-9896,-31237,-9512,-31357,-9127,-31471,-8740,-31581,-8352,-31685,-7962,-31786,-7572,-31881,-7180,-31971,-6787,-32057,-6393,-32138,-5998,-32214,-5602,-32285,-5206,-32351,-4808,-32413,-4410,-32469,-4012,-32521,-3612,-32568,-3212,-32610,-2812,-32647,-2411,-32679,-2010,-32706,-1608,-32728,-1207,-32745,-805,-32758,-403,-32765
+};
+
+int16_t tw512c[512] __attribute__((aligned(32))) = {
+  0,32767,403,32764,805,32757,1207,32744,1608,32727,2010,32705,2411,32678,2812,32646,3212,32609,3612,32567,4012,32520,4410,32468,4808,32412,5206,32350,5602,32284,5998,32213,6393,32137,6787,32056,7180,31970,7572,31880,7962,31785,8352,31684,8740,31580,9127,31470,9512,31356,9896,31236,10279,31113,10660,30984,11039,30851,11417,30713,11793,30571,12167,30424,12540,30272,12910,30116,13279,29955,13646,29790,14010,29621,14373,29446,14733,29268,15091,29085,15447,28897,15800,28706,16151,28510,16500,28309,16846,28105,17190,27896,17531,27683,17869,27466,18205,27244,18538,27019,18868,26789,19195,26556,19520,26318,19841,26077,20160,25831,20475,25582,20788,25329,21097,25072,21403,24811,21706,24546,22005,24278,22302,24006,22595,23731,22884,23452,23170,23169,23453,22883,23732,22594,24007,22301,24279,22004,24547,21705,24812,21402,25073,21096,25330,20787,25583,20474,25832,20159,26078,19840,26319,19519,26557,19194,26790,18867,27020,18537,27245,18204,27467,17868,27684,17530,27897,17189,28106,16845,28310,16499,28511,16150,28707,15799,28898,15446,29086,15090,29269,14732,29447,14372,29622,14009,29791,13645,29956,13278,30117,12909,30273,12539,30425,12166,30572,11792,30714,11416,30852,11038,30985,10659,31114,10278,31237,9895,31357,9511,31471,9126,31581,8739,31685,8351,31786,7961,31881,7571,31971,7179,32057,6786,32138,6392,32214,5997,32285,5601,32351,5205,32413,4807,32469,4409,32521,4011,32568,3611,32610,3211,32647,2811,32679,2410,32706,2009,32728,1607,32745,1206,32758,804,32765,402,32767,0,32765,-403,32758,-805,32745,-1207,32728,-1608,32706,-2010,32679,-2411,32647,-2812,32610,-3212,32568,-3612,32521,-4012,32469,-4410,32413,-4808,32351,-5206,32285,-5602,32214,-5998,32138,-6393,32057,-6787,31971,-7180,31881,-7572,31786,-7962,31685,-8352,31581,-8740,31471,-9127,31357,-9512,31237,-9896,31114,-10279,30985,-10660,30852,-11039,30714,-11417,30572,-11793,30425,-12167,30273,-12540,30117,-12910,29956,-13279,29791,-13646,29622,-14010,29447,-14373,29269,-14733,29086,-15091,28898,-15447,28707,-15800,28511,-16151,28310,-16500,28106,-16846,27897,-17190,27684,-17531,27467,-17869,27245,-18205,27020,-18538,26790,-18868,26557,-19195,26319,-19520,26078,-19841,25832,-20160,25583,-20475,25330,-20788,25073,-21097,24812,-21403,24547,-21706,24279,-22005,24007,-22302,23732,-22595,23453,-22884,23170,-23170,22884,-23453,22595,-23732,22302,-24007,22005,-24279,21706,-24547,21403,-24812,21097,-25073,20788,-25330,20475,-25583,20160,-25832,19841,-26078,19520,-26319,19195,-26557,18868,-26790,18538,-27020,18205,-27245,17869,-27467,17531,-27684,17190,-27897,16846,-28106,16500,-28310,16151,-28511,15800,-28707,15447,-28898,15091,-29086,14733,-29269,14373,-29447,14010,-29622,13646,-29791,13279,-29956,12910,-30117,12540,-30273,12167,-30425,11793,-30572,11417,-30714,11039,-30852,10660,-30985,10279,-31114,9896,-31237,9512,-31357,9127,-31471,8740,-31581,8352,-31685,7962,-31786,7572,-31881,7180,-31971,6787,-32057,6393,-32138,5998,-32214,5602,-32285,5206,-32351,4808,-32413,4410,-32469,4012,-32521,3612,-32568,3212,-32610,2812,-32647,2411,-32679,2010,-32706,1608,-32728,1207,-32745,805,-32758,403,-32765
+};
+
+#ifndef __AVX2__
+void dft512(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[256],*xtmpp,*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[128],*tw512a_128p=(simd_q15_t *)tw512a,*tw512b_128p=(simd_q15_t *)tw512b,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+
+  xtmpp = xtmp;
+
+  for (i=0; i<4; i++) {
+    transpose4_ooff(x64  ,xtmpp,128);
+    transpose4_ooff(x64+2,xtmpp+1,128);
+    transpose4_ooff(x64+4,xtmpp+2,128);
+    transpose4_ooff(x64+6,xtmpp+3,128);
+    transpose4_ooff(x64+8,xtmpp+4,128);
+    transpose4_ooff(x64+10,xtmpp+5,128);
+    transpose4_ooff(x64+12,xtmpp+6,128);
+    transpose4_ooff(x64+14,xtmpp+7,128);
+    transpose4_ooff(x64+16,xtmpp+8,128);
+    transpose4_ooff(x64+18,xtmpp+9,128);
+    transpose4_ooff(x64+20,xtmpp+10,128);
+    transpose4_ooff(x64+22,xtmpp+11,128);
+    transpose4_ooff(x64+24,xtmpp+12,128);
+    transpose4_ooff(x64+26,xtmpp+13,128);
+    transpose4_ooff(x64+28,xtmpp+14,128);
+    transpose4_ooff(x64+30,xtmpp+15,128);
+    transpose4_ooff(x64+32,xtmpp+16,128);
+    transpose4_ooff(x64+34,xtmpp+17,128);
+    transpose4_ooff(x64+36,xtmpp+18,128);
+    transpose4_ooff(x64+38,xtmpp+19,128);
+    transpose4_ooff(x64+40,xtmpp+20,128);
+    transpose4_ooff(x64+42,xtmpp+21,128);
+    transpose4_ooff(x64+44,xtmpp+22,128);
+    transpose4_ooff(x64+46,xtmpp+23,128);
+    transpose4_ooff(x64+48,xtmpp+24,128);
+    transpose4_ooff(x64+50,xtmpp+25,128);
+    transpose4_ooff(x64+52,xtmpp+26,128);
+    transpose4_ooff(x64+54,xtmpp+27,128);
+    transpose4_ooff(x64+56,xtmpp+28,128);
+    transpose4_ooff(x64+58,xtmpp+29,128);
+    transpose4_ooff(x64+60,xtmpp+30,128);
+    transpose4_ooff(x64+62,xtmpp+31,128);
+    x64+=64;
+    xtmpp+=32;
+  }
+
+  dft256((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+64),1);
+
+
+  for (i=0; i<64; i+=8) {
+    bfly2_16(ytmpp,ytmpp+64,
+             y128p,y128p+64,
+             tw512a_128p,
+             tw512b_128p);
+    bfly2_16(ytmpp+1,ytmpp+65,
+             y128p+1,y128p+65,
+             tw512a_128p+1,
+             tw512b_128p+1);
+    bfly2_16(ytmpp+2,ytmpp+66,
+             y128p+2,y128p+66,
+             tw512a_128p+2,
+             tw512b_128p+2);
+    bfly2_16(ytmpp+3,ytmpp+67,
+             y128p+3,y128p+67,
+             tw512a_128p+3,
+             tw512b_128p+3);
+    bfly2_16(ytmpp+4,ytmpp+68,
+             y128p+4,y128p+68,
+             tw512a_128p+4,
+             tw512b_128p+4);
+    bfly2_16(ytmpp+5,ytmpp+69,
+             y128p+5,y128p+69,
+             tw512a_128p+5,
+             tw512b_128p+5);
+    bfly2_16(ytmpp+6,ytmpp+70,
+             y128p+6,y128p+70,
+             tw512a_128p+6,
+             tw512b_128p+6);
+    bfly2_16(ytmpp+7,ytmpp+71,
+             y128p+7,y128p+71,
+             tw512a_128p+7,
+             tw512b_128p+7);
+    tw512a_128p+=8;
+    tw512b_128p+=8;
+    y128p+=8;
+    ytmpp+=8;
+  }
+
+  if (scale>0) {
+    y128p = y128;
+
+    for (i=0; i<8; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft512(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[256],*xtmpp,*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[128],*tw512_128p=(simd_q15_t *)tw512,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+
+  xtmpp = xtmp;
+
+  for (i=0; i<4; i++) {
+    transpose4_ooff(x64  ,xtmpp,128);
+    transpose4_ooff(x64+2,xtmpp+1,128);
+    transpose4_ooff(x64+4,xtmpp+2,128);
+    transpose4_ooff(x64+6,xtmpp+3,128);
+    transpose4_ooff(x64+8,xtmpp+4,128);
+    transpose4_ooff(x64+10,xtmpp+5,128);
+    transpose4_ooff(x64+12,xtmpp+6,128);
+    transpose4_ooff(x64+14,xtmpp+7,128);
+    transpose4_ooff(x64+16,xtmpp+8,128);
+    transpose4_ooff(x64+18,xtmpp+9,128);
+    transpose4_ooff(x64+20,xtmpp+10,128);
+    transpose4_ooff(x64+22,xtmpp+11,128);
+    transpose4_ooff(x64+24,xtmpp+12,128);
+    transpose4_ooff(x64+26,xtmpp+13,128);
+    transpose4_ooff(x64+28,xtmpp+14,128);
+    transpose4_ooff(x64+30,xtmpp+15,128);
+    transpose4_ooff(x64+32,xtmpp+16,128);
+    transpose4_ooff(x64+34,xtmpp+17,128);
+    transpose4_ooff(x64+36,xtmpp+18,128);
+    transpose4_ooff(x64+38,xtmpp+19,128);
+    transpose4_ooff(x64+40,xtmpp+20,128);
+    transpose4_ooff(x64+42,xtmpp+21,128);
+    transpose4_ooff(x64+44,xtmpp+22,128);
+    transpose4_ooff(x64+46,xtmpp+23,128);
+    transpose4_ooff(x64+48,xtmpp+24,128);
+    transpose4_ooff(x64+50,xtmpp+25,128);
+    transpose4_ooff(x64+52,xtmpp+26,128);
+    transpose4_ooff(x64+54,xtmpp+27,128);
+    transpose4_ooff(x64+56,xtmpp+28,128);
+    transpose4_ooff(x64+58,xtmpp+29,128);
+    transpose4_ooff(x64+60,xtmpp+30,128);
+    transpose4_ooff(x64+62,xtmpp+31,128);
+    x64+=64;
+    xtmpp+=32;
+  }
+
+  idft256((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+64),1);
+
+
+  for (i=0; i<64; i++) {
+    ibfly2(ytmpp,ytmpp+64,
+           y128p,y128p+64,
+           tw512_128p);
+    tw512_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y128p = y128;
+
+    for (i=0; i<8; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else //__AVX2__
+
+void dft512(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[64],*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[64],*y256=(simd256_q15_t*)y;
+  simd256_q15_t *tw512_256p=(simd256_q15_t*)tw512,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+
+
+  transpose4_ooff_simd256(x256  ,xtmp,32);
+  transpose4_ooff_simd256(x256+2,xtmp+1,32);
+  transpose4_ooff_simd256(x256+4,xtmp+2,32);
+  transpose4_ooff_simd256(x256+6,xtmp+3,32);
+  transpose4_ooff_simd256(x256+8,xtmp+4,32);
+  transpose4_ooff_simd256(x256+10,xtmp+5,32);
+  transpose4_ooff_simd256(x256+12,xtmp+6,32);
+  transpose4_ooff_simd256(x256+14,xtmp+7,32);
+  transpose4_ooff_simd256(x256+16,xtmp+8,32);
+  transpose4_ooff_simd256(x256+18,xtmp+9,32);
+  transpose4_ooff_simd256(x256+20,xtmp+10,32);
+  transpose4_ooff_simd256(x256+22,xtmp+11,32);
+  transpose4_ooff_simd256(x256+24,xtmp+12,32);
+  transpose4_ooff_simd256(x256+26,xtmp+13,32);
+  transpose4_ooff_simd256(x256+28,xtmp+14,32);
+  transpose4_ooff_simd256(x256+30,xtmp+15,32);
+  transpose4_ooff_simd256(x256+32,xtmp+16,32);
+  transpose4_ooff_simd256(x256+34,xtmp+17,32);
+  transpose4_ooff_simd256(x256+36,xtmp+18,32);
+  transpose4_ooff_simd256(x256+38,xtmp+19,32);
+  transpose4_ooff_simd256(x256+40,xtmp+20,32);
+  transpose4_ooff_simd256(x256+42,xtmp+21,32);
+  transpose4_ooff_simd256(x256+44,xtmp+22,32);
+  transpose4_ooff_simd256(x256+46,xtmp+23,32);
+  transpose4_ooff_simd256(x256+48,xtmp+24,32);
+  transpose4_ooff_simd256(x256+50,xtmp+25,32);
+  transpose4_ooff_simd256(x256+52,xtmp+26,32);
+  transpose4_ooff_simd256(x256+54,xtmp+27,32);
+  transpose4_ooff_simd256(x256+56,xtmp+28,32);
+  transpose4_ooff_simd256(x256+58,xtmp+29,32);
+  transpose4_ooff_simd256(x256+60,xtmp+30,32);
+  transpose4_ooff_simd256(x256+62,xtmp+31,32);
+
+  dft256((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
+
+
+  for (i=0; i<32; i++) {
+    bfly2_256(ytmpp,ytmpp+32,
+	      y256p,y256p+32,
+	      tw512_256p);
+    tw512_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0;i<4;i++) {
+      y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256);
+      y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256);
+      y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256);
+      y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256);
+      y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256);
+      y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256);
+      y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256);
+      y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256);
+      y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256);
+      y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256);
+      y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256);
+      y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256);
+      y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256);
+      y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256);
+      y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256);
+      y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256);
+      y256+=16;
+    }
+  }
+
+}
+
+void idft512(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[64],*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[64],*y256=(simd256_q15_t*)y;
+  simd256_q15_t *tw512_256p=(simd256_q15_t *)tw512,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+
+
+  transpose4_ooff_simd256(x256  ,xtmp,32);
+  transpose4_ooff_simd256(x256+2,xtmp+1,32);
+  transpose4_ooff_simd256(x256+4,xtmp+2,32);
+  transpose4_ooff_simd256(x256+6,xtmp+3,32);
+  transpose4_ooff_simd256(x256+8,xtmp+4,32);
+  transpose4_ooff_simd256(x256+10,xtmp+5,32);
+  transpose4_ooff_simd256(x256+12,xtmp+6,32);
+  transpose4_ooff_simd256(x256+14,xtmp+7,32);
+  transpose4_ooff_simd256(x256+16,xtmp+8,32);
+  transpose4_ooff_simd256(x256+18,xtmp+9,32);
+  transpose4_ooff_simd256(x256+20,xtmp+10,32);
+  transpose4_ooff_simd256(x256+22,xtmp+11,32);
+  transpose4_ooff_simd256(x256+24,xtmp+12,32);
+  transpose4_ooff_simd256(x256+26,xtmp+13,32);
+  transpose4_ooff_simd256(x256+28,xtmp+14,32);
+  transpose4_ooff_simd256(x256+30,xtmp+15,32);
+  transpose4_ooff_simd256(x256+32,xtmp+16,32);
+  transpose4_ooff_simd256(x256+34,xtmp+17,32);
+  transpose4_ooff_simd256(x256+36,xtmp+18,32);
+  transpose4_ooff_simd256(x256+38,xtmp+19,32);
+  transpose4_ooff_simd256(x256+40,xtmp+20,32);
+  transpose4_ooff_simd256(x256+42,xtmp+21,32);
+  transpose4_ooff_simd256(x256+44,xtmp+22,32);
+  transpose4_ooff_simd256(x256+46,xtmp+23,32);
+  transpose4_ooff_simd256(x256+48,xtmp+24,32);
+  transpose4_ooff_simd256(x256+50,xtmp+25,32);
+  transpose4_ooff_simd256(x256+52,xtmp+26,32);
+  transpose4_ooff_simd256(x256+54,xtmp+27,32);
+  transpose4_ooff_simd256(x256+56,xtmp+28,32);
+  transpose4_ooff_simd256(x256+58,xtmp+29,32);
+  transpose4_ooff_simd256(x256+60,xtmp+30,32);
+  transpose4_ooff_simd256(x256+62,xtmp+31,32);
+
+  idft256((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
+
+
+  for (i=0; i<32; i++) {
+    ibfly2_256(ytmpp,ytmpp+32,
+	       y256p,y256p+32,
+	       tw512_256p);
+    tw512_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0;i<4;i++) {
+      y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256);
+      y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256);
+      y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256);
+      y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256);
+      y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256);
+      y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256);
+      y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256);
+      y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256);
+      y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256);
+      y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256);
+      y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256);
+      y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256);
+      y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256);
+      y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256);
+      y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256);
+      y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256);
+      y256+=16;
+    }
+  }
+
+}
+
+#endif
+
+int16_t tw1024[1536] __attribute__((aligned(32)));
+
+#ifndef __AVX2__
+void dft1024(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[256],ytmp[256],*tw1024_128p=(simd_q15_t *)tw1024,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<256; i+=4,j++) {
+    transpose16_ooff(x128+i,xtmp+j,64);
+  }
+
+
+  dft256((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  dft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1);
+  dft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
+  dft256((int16_t*)(xtmp+192),(int16_t*)(ytmp+192),1);
+
+  for (i=0; i<64; i++) {
+    bfly4(ytmpp,ytmpp+64,ytmpp+128,ytmpp+192,
+          y128p,y128p+64,y128p+128,y128p+192,
+          tw1024_128p,tw1024_128p+64,tw1024_128p+128);
+    tw1024_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<16; i++) {
+      y128[0]  = shiftright_int16(y128[0],1);
+      y128[1]  = shiftright_int16(y128[1],1);
+      y128[2]  = shiftright_int16(y128[2],1);
+      y128[3]  = shiftright_int16(y128[3],1);
+      y128[4]  = shiftright_int16(y128[4],1);
+      y128[5]  = shiftright_int16(y128[5],1);
+      y128[6]  = shiftright_int16(y128[6],1);
+      y128[7]  = shiftright_int16(y128[7],1);
+      y128[8]  = shiftright_int16(y128[8],1);
+      y128[9]  = shiftright_int16(y128[9],1);
+      y128[10] = shiftright_int16(y128[10],1);
+      y128[11] = shiftright_int16(y128[11],1);
+      y128[12] = shiftright_int16(y128[12],1);
+      y128[13] = shiftright_int16(y128[13],1);
+      y128[14] = shiftright_int16(y128[14],1);
+      y128[15] = shiftright_int16(y128[15],1);
+
+      y128+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft1024(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[256],ytmp[256],*tw1024_128p=(simd_q15_t *)tw1024,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<256; i+=4,j++) {
+    transpose16_ooff(x128+i,xtmp+j,64);
+  }
+
+
+  idft256((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  idft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1);
+  idft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
+  idft256((int16_t*)(xtmp+192),(int16_t*)(ytmp+192),1);
+
+  for (i=0; i<64; i++) {
+    ibfly4(ytmpp,ytmpp+64,ytmpp+128,ytmpp+192,
+           y128p,y128p+64,y128p+128,y128p+192,
+           tw1024_128p,tw1024_128p+64,tw1024_128p+128);
+    tw1024_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<16; i++) {
+      y128[0]  = shiftright_int16(y128[0],1);
+      y128[1]  = shiftright_int16(y128[1],1);
+      y128[2]  = shiftright_int16(y128[2],1);
+      y128[3]  = shiftright_int16(y128[3],1);
+      y128[4]  = shiftright_int16(y128[4],1);
+      y128[5]  = shiftright_int16(y128[5],1);
+      y128[6]  = shiftright_int16(y128[6],1);
+      y128[7]  = shiftright_int16(y128[7],1);
+      y128[8]  = shiftright_int16(y128[8],1);
+      y128[9]  = shiftright_int16(y128[9],1);
+      y128[10] = shiftright_int16(y128[10],1);
+      y128[11] = shiftright_int16(y128[11],1);
+      y128[12] = shiftright_int16(y128[12],1);
+      y128[13] = shiftright_int16(y128[13],1);
+      y128[14] = shiftright_int16(y128[14],1);
+      y128[15] = shiftright_int16(y128[15],1);
+
+      y128+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else //__AVX2__
+void dft1024(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[128],ytmp[128],*tw1024_256p=(simd256_q15_t *)tw1024,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<128; i+=4,j++) {
+    transpose16_ooff_simd256(x256+i,xtmp+j,32);
+  }
+
+
+  dft256((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  dft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
+  dft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1);
+  dft256((int16_t*)(xtmp+96),(int16_t*)(ytmp+96),1);
+
+  for (i=0; i<32; i++) {
+    bfly4_256(ytmpp,ytmpp+32,ytmpp+64,ytmpp+96,
+	      y256p,y256p+32,y256p+64,y256p+96,
+	      tw1024_256p,tw1024_256p+32,tw1024_256p+64);
+    tw1024_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<8; i++) {
+      y256[0]  = shiftright_int16_simd256(y256[0],1);
+      y256[1]  = shiftright_int16_simd256(y256[1],1);
+      y256[2]  = shiftright_int16_simd256(y256[2],1);
+      y256[3]  = shiftright_int16_simd256(y256[3],1);
+      y256[4]  = shiftright_int16_simd256(y256[4],1);
+      y256[5]  = shiftright_int16_simd256(y256[5],1);
+      y256[6]  = shiftright_int16_simd256(y256[6],1);
+      y256[7]  = shiftright_int16_simd256(y256[7],1);
+      y256[8]  = shiftright_int16_simd256(y256[8],1);
+      y256[9]  = shiftright_int16_simd256(y256[9],1);
+      y256[10] = shiftright_int16_simd256(y256[10],1);
+      y256[11] = shiftright_int16_simd256(y256[11],1);
+      y256[12] = shiftright_int16_simd256(y256[12],1);
+      y256[13] = shiftright_int16_simd256(y256[13],1);
+      y256[14] = shiftright_int16_simd256(y256[14],1);
+      y256[15] = shiftright_int16_simd256(y256[15],1);
+
+      y256+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft1024(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[128],ytmp[128],*tw1024_256p=(simd256_q15_t *)tw1024,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<128; i+=4,j++) {
+    transpose16_ooff_simd256(x256+i,xtmp+j,32);
+  }
+
+
+  idft256((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  idft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1);
+  idft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1);
+  idft256((int16_t*)(xtmp+96),(int16_t*)(ytmp+96),1);
+
+  for (i=0; i<32; i++) {
+    ibfly4_256(ytmpp,ytmpp+32,ytmpp+64,ytmpp+96,
+	       y256p,y256p+32,y256p+64,y256p+96,
+	       tw1024_256p,tw1024_256p+32,tw1024_256p+64);
+    tw1024_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<8; i++) {
+      y256[0]  = shiftright_int16_simd256(y256[0],1);
+      y256[1]  = shiftright_int16_simd256(y256[1],1);
+      y256[2]  = shiftright_int16_simd256(y256[2],1);
+      y256[3]  = shiftright_int16_simd256(y256[3],1);
+      y256[4]  = shiftright_int16_simd256(y256[4],1);
+      y256[5]  = shiftright_int16_simd256(y256[5],1);
+      y256[6]  = shiftright_int16_simd256(y256[6],1);
+      y256[7]  = shiftright_int16_simd256(y256[7],1);
+      y256[8]  = shiftright_int16_simd256(y256[8],1);
+      y256[9]  = shiftright_int16_simd256(y256[9],1);
+      y256[10] = shiftright_int16_simd256(y256[10],1);
+      y256[11] = shiftright_int16_simd256(y256[11],1);
+      y256[12] = shiftright_int16_simd256(y256[12],1);
+      y256[13] = shiftright_int16_simd256(y256[13],1);
+      y256[14] = shiftright_int16_simd256(y256[14],1);
+      y256[15] = shiftright_int16_simd256(y256[15],1);
+
+      y256+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+#endif
+
+int16_t tw2048[2048] __attribute__((aligned(32)));
+
+#ifndef __AVX2__
+void dft2048(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[1024],*xtmpp,*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[512],*tw2048_128p=(simd_q15_t *)tw2048,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+
+  xtmpp = xtmp;
+
+  for (i=0; i<16; i++) {
+    transpose4_ooff(x64  ,xtmpp,512);
+    transpose4_ooff(x64+2,xtmpp+1,512);
+    transpose4_ooff(x64+4,xtmpp+2,512);
+    transpose4_ooff(x64+6,xtmpp+3,512);
+    transpose4_ooff(x64+8,xtmpp+4,512);
+    transpose4_ooff(x64+10,xtmpp+5,512);
+    transpose4_ooff(x64+12,xtmpp+6,512);
+    transpose4_ooff(x64+14,xtmpp+7,512);
+    transpose4_ooff(x64+16,xtmpp+8,512);
+    transpose4_ooff(x64+18,xtmpp+9,512);
+    transpose4_ooff(x64+20,xtmpp+10,512);
+    transpose4_ooff(x64+22,xtmpp+11,512);
+    transpose4_ooff(x64+24,xtmpp+12,512);
+    transpose4_ooff(x64+26,xtmpp+13,512);
+    transpose4_ooff(x64+28,xtmpp+14,512);
+    transpose4_ooff(x64+30,xtmpp+15,512);
+    transpose4_ooff(x64+32,xtmpp+16,512);
+    transpose4_ooff(x64+34,xtmpp+17,512);
+    transpose4_ooff(x64+36,xtmpp+18,512);
+    transpose4_ooff(x64+38,xtmpp+19,512);
+    transpose4_ooff(x64+40,xtmpp+20,512);
+    transpose4_ooff(x64+42,xtmpp+21,512);
+    transpose4_ooff(x64+44,xtmpp+22,512);
+    transpose4_ooff(x64+46,xtmpp+23,512);
+    transpose4_ooff(x64+48,xtmpp+24,512);
+    transpose4_ooff(x64+50,xtmpp+25,512);
+    transpose4_ooff(x64+52,xtmpp+26,512);
+    transpose4_ooff(x64+54,xtmpp+27,512);
+    transpose4_ooff(x64+56,xtmpp+28,512);
+    transpose4_ooff(x64+58,xtmpp+29,512);
+    transpose4_ooff(x64+60,xtmpp+30,512);
+    transpose4_ooff(x64+62,xtmpp+31,512);
+    x64+=64;
+    xtmpp+=32;
+  }
+
+  dft1024((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+256),1);
+
+
+  for (i=0; i<256; i++) {
+    bfly2(ytmpp,ytmpp+256,
+          y128p,y128p+256,
+          tw2048_128p);
+    tw2048_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y128p = y128;
+
+    for (i=0; i<32; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+
+}
+
+void idft2048(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[1024],*xtmpp,*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[512],*tw2048_128p=(simd_q15_t *)tw2048,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+
+  xtmpp = xtmp;
+
+  for (i=0; i<16; i++) {
+    transpose4_ooff(x64  ,xtmpp,512);
+    transpose4_ooff(x64+2,xtmpp+1,512);
+    transpose4_ooff(x64+4,xtmpp+2,512);
+    transpose4_ooff(x64+6,xtmpp+3,512);
+    transpose4_ooff(x64+8,xtmpp+4,512);
+    transpose4_ooff(x64+10,xtmpp+5,512);
+    transpose4_ooff(x64+12,xtmpp+6,512);
+    transpose4_ooff(x64+14,xtmpp+7,512);
+    transpose4_ooff(x64+16,xtmpp+8,512);
+    transpose4_ooff(x64+18,xtmpp+9,512);
+    transpose4_ooff(x64+20,xtmpp+10,512);
+    transpose4_ooff(x64+22,xtmpp+11,512);
+    transpose4_ooff(x64+24,xtmpp+12,512);
+    transpose4_ooff(x64+26,xtmpp+13,512);
+    transpose4_ooff(x64+28,xtmpp+14,512);
+    transpose4_ooff(x64+30,xtmpp+15,512);
+    transpose4_ooff(x64+32,xtmpp+16,512);
+    transpose4_ooff(x64+34,xtmpp+17,512);
+    transpose4_ooff(x64+36,xtmpp+18,512);
+    transpose4_ooff(x64+38,xtmpp+19,512);
+    transpose4_ooff(x64+40,xtmpp+20,512);
+    transpose4_ooff(x64+42,xtmpp+21,512);
+    transpose4_ooff(x64+44,xtmpp+22,512);
+    transpose4_ooff(x64+46,xtmpp+23,512);
+    transpose4_ooff(x64+48,xtmpp+24,512);
+    transpose4_ooff(x64+50,xtmpp+25,512);
+    transpose4_ooff(x64+52,xtmpp+26,512);
+    transpose4_ooff(x64+54,xtmpp+27,512);
+    transpose4_ooff(x64+56,xtmpp+28,512);
+    transpose4_ooff(x64+58,xtmpp+29,512);
+    transpose4_ooff(x64+60,xtmpp+30,512);
+    transpose4_ooff(x64+62,xtmpp+31,512);
+    x64+=64;
+    xtmpp+=32;
+  }
+
+  idft1024((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+256),1);
+
+
+  for (i=0; i<256; i++) {
+    ibfly2(ytmpp,ytmpp+256,
+           y128p,y128p+256,
+           tw2048_128p);
+    tw2048_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y128p = y128;
+
+    for (i=0; i<32; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else // __AVX2__
+
+void dft2048(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[256],*xtmpp,*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[256],*tw2048_256p=(simd256_q15_t *)tw2048,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+
+
+  xtmpp = xtmp;
+
+  for (i=0; i<4; i++) {
+    transpose4_ooff_simd256(x256  ,xtmpp,128);
+    transpose4_ooff_simd256(x256+2,xtmpp+1,128);
+    transpose4_ooff_simd256(x256+4,xtmpp+2,128);
+    transpose4_ooff_simd256(x256+6,xtmpp+3,128);
+    transpose4_ooff_simd256(x256+8,xtmpp+4,128);
+    transpose4_ooff_simd256(x256+10,xtmpp+5,128);
+    transpose4_ooff_simd256(x256+12,xtmpp+6,128);
+    transpose4_ooff_simd256(x256+14,xtmpp+7,128);
+    transpose4_ooff_simd256(x256+16,xtmpp+8,128);
+    transpose4_ooff_simd256(x256+18,xtmpp+9,128);
+    transpose4_ooff_simd256(x256+20,xtmpp+10,128);
+    transpose4_ooff_simd256(x256+22,xtmpp+11,128);
+    transpose4_ooff_simd256(x256+24,xtmpp+12,128);
+    transpose4_ooff_simd256(x256+26,xtmpp+13,128);
+    transpose4_ooff_simd256(x256+28,xtmpp+14,128);
+    transpose4_ooff_simd256(x256+30,xtmpp+15,128);
+    transpose4_ooff_simd256(x256+32,xtmpp+16,128);
+    transpose4_ooff_simd256(x256+34,xtmpp+17,128);
+    transpose4_ooff_simd256(x256+36,xtmpp+18,128);
+    transpose4_ooff_simd256(x256+38,xtmpp+19,128);
+    transpose4_ooff_simd256(x256+40,xtmpp+20,128);
+    transpose4_ooff_simd256(x256+42,xtmpp+21,128);
+    transpose4_ooff_simd256(x256+44,xtmpp+22,128);
+    transpose4_ooff_simd256(x256+46,xtmpp+23,128);
+    transpose4_ooff_simd256(x256+48,xtmpp+24,128);
+    transpose4_ooff_simd256(x256+50,xtmpp+25,128);
+    transpose4_ooff_simd256(x256+52,xtmpp+26,128);
+    transpose4_ooff_simd256(x256+54,xtmpp+27,128);
+    transpose4_ooff_simd256(x256+56,xtmpp+28,128);
+    transpose4_ooff_simd256(x256+58,xtmpp+29,128);
+    transpose4_ooff_simd256(x256+60,xtmpp+30,128);
+    transpose4_ooff_simd256(x256+62,xtmpp+31,128);
+    x256+=64;
+    xtmpp+=32;
+  }
+
+  dft1024((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
+
+
+  for (i=0; i<128; i++) {
+    bfly2_256(ytmpp,ytmpp+128,
+	      y256p,y256p+128,
+	      tw2048_256p);
+    tw2048_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y256p = y256;
+
+    for (i=0; i<16; i++) {
+      y256p[0]  = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
+      y256p[1]  = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
+      y256p[2]  = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
+      y256p[3]  = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
+      y256p[4]  = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
+      y256p[5]  = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
+      y256p[6]  = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
+      y256p[7]  = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
+      y256p[8]  = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
+      y256p[9]  = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
+      y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
+      y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
+      y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
+      y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
+      y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
+      y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
+      y256p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft2048(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[256],*xtmpp,*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[256],*tw2048_256p=(simd256_q15_t *)tw2048,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+
+  xtmpp = xtmp;
+  
+  for (i=0; i<4; i++) {
+    transpose4_ooff_simd256(x256  ,xtmpp,128);
+    transpose4_ooff_simd256(x256+2,xtmpp+1,128);
+    transpose4_ooff_simd256(x256+4,xtmpp+2,128);
+    transpose4_ooff_simd256(x256+6,xtmpp+3,128);
+    transpose4_ooff_simd256(x256+8,xtmpp+4,128);
+    transpose4_ooff_simd256(x256+10,xtmpp+5,128);
+    transpose4_ooff_simd256(x256+12,xtmpp+6,128);
+    transpose4_ooff_simd256(x256+14,xtmpp+7,128);
+    transpose4_ooff_simd256(x256+16,xtmpp+8,128);
+    transpose4_ooff_simd256(x256+18,xtmpp+9,128);
+    transpose4_ooff_simd256(x256+20,xtmpp+10,128);
+    transpose4_ooff_simd256(x256+22,xtmpp+11,128);
+    transpose4_ooff_simd256(x256+24,xtmpp+12,128);
+    transpose4_ooff_simd256(x256+26,xtmpp+13,128);
+    transpose4_ooff_simd256(x256+28,xtmpp+14,128);
+    transpose4_ooff_simd256(x256+30,xtmpp+15,128);
+    transpose4_ooff_simd256(x256+32,xtmpp+16,128);
+    transpose4_ooff_simd256(x256+34,xtmpp+17,128);
+    transpose4_ooff_simd256(x256+36,xtmpp+18,128);
+    transpose4_ooff_simd256(x256+38,xtmpp+19,128);
+    transpose4_ooff_simd256(x256+40,xtmpp+20,128);
+    transpose4_ooff_simd256(x256+42,xtmpp+21,128);
+    transpose4_ooff_simd256(x256+44,xtmpp+22,128);
+    transpose4_ooff_simd256(x256+46,xtmpp+23,128);
+    transpose4_ooff_simd256(x256+48,xtmpp+24,128);
+    transpose4_ooff_simd256(x256+50,xtmpp+25,128);
+    transpose4_ooff_simd256(x256+52,xtmpp+26,128);
+    transpose4_ooff_simd256(x256+54,xtmpp+27,128);
+    transpose4_ooff_simd256(x256+56,xtmpp+28,128);
+    transpose4_ooff_simd256(x256+58,xtmpp+29,128);
+    transpose4_ooff_simd256(x256+60,xtmpp+30,128);
+    transpose4_ooff_simd256(x256+62,xtmpp+31,128);
+    x256+=64;
+    xtmpp+=32;
+  }
+
+  idft1024((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
+
+
+  for (i=0; i<128; i++) {
+    ibfly2_256(ytmpp,ytmpp+128,
+	       y256p,y256p+128,
+	       tw2048_256p);
+    tw2048_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y256p = y256;
+
+    for (i=0; i<16; i++) {
+      y256p[0]  = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
+      y256p[1]  = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
+      y256p[2]  = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
+      y256p[3]  = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
+      y256p[4]  = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
+      y256p[5]  = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
+      y256p[6]  = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
+      y256p[7]  = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
+      y256p[8]  = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
+      y256p[9]  = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
+      y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
+      y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
+      y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
+      y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
+      y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
+      y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
+      y256p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#endif
+
+
+
+int16_t tw4096[3*2*1024];
+
+#ifndef __AVX2__
+void dft4096(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[1024],ytmp[1024],*tw4096_128p=(simd_q15_t *)tw4096,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<1024; i+=4,j++) {
+    transpose16_ooff(x128+i,xtmp+j,256);
+  }
+
+
+  dft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  dft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1);
+  dft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
+  dft1024((int16_t*)(xtmp+768),(int16_t*)(ytmp+768),1);
+
+  for (i=0; i<256; i++) {
+    bfly4(ytmpp,ytmpp+256,ytmpp+512,ytmpp+768,
+          y128p,y128p+256,y128p+512,y128p+768,
+          tw4096_128p,tw4096_128p+256,tw4096_128p+512);
+    tw4096_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<64; i++) {
+      y128[0]  = shiftright_int16(y128[0],1);
+      y128[1]  = shiftright_int16(y128[1],1);
+      y128[2]  = shiftright_int16(y128[2],1);
+      y128[3]  = shiftright_int16(y128[3],1);
+      y128[4]  = shiftright_int16(y128[4],1);
+      y128[5]  = shiftright_int16(y128[5],1);
+      y128[6]  = shiftright_int16(y128[6],1);
+      y128[7]  = shiftright_int16(y128[7],1);
+      y128[8]  = shiftright_int16(y128[8],1);
+      y128[9]  = shiftright_int16(y128[9],1);
+      y128[10] = shiftright_int16(y128[10],1);
+      y128[11] = shiftright_int16(y128[11],1);
+      y128[12] = shiftright_int16(y128[12],1);
+      y128[13] = shiftright_int16(y128[13],1);
+      y128[14] = shiftright_int16(y128[14],1);
+      y128[15] = shiftright_int16(y128[15],1);
+
+      y128+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+ 
+
+void idft4096(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t xtmp[1024],ytmp[1024],*tw4096_128p=(simd_q15_t *)tw4096,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<1024; i+=4,j++) {
+    transpose16_ooff(x128+i,xtmp+j,256);
+  }
+
+
+  idft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  idft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1);
+  idft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
+  idft1024((int16_t*)(xtmp+768),(int16_t*)(ytmp+768),1);
+
+  for (i=0; i<256; i++) {
+    ibfly4(ytmpp,ytmpp+256,ytmpp+512,ytmpp+768,
+           y128p,y128p+256,y128p+512,y128p+768,
+           tw4096_128p,tw4096_128p+256,tw4096_128p+512);
+    tw4096_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<64; i++) {
+      y128[0]  = shiftright_int16(y128[0],scale);
+      y128[1]  = shiftright_int16(y128[1],scale);
+      y128[2]  = shiftright_int16(y128[2],scale);
+      y128[3]  = shiftright_int16(y128[3],scale);
+      y128[4]  = shiftright_int16(y128[4],scale);
+      y128[5]  = shiftright_int16(y128[5],scale);
+      y128[6]  = shiftright_int16(y128[6],scale);
+      y128[7]  = shiftright_int16(y128[7],scale);
+      y128[8]  = shiftright_int16(y128[8],scale);
+      y128[9]  = shiftright_int16(y128[9],scale);
+      y128[10] = shiftright_int16(y128[10],scale);
+      y128[11] = shiftright_int16(y128[11],scale);
+      y128[12] = shiftright_int16(y128[12],scale);
+      y128[13] = shiftright_int16(y128[13],scale);
+      y128[14] = shiftright_int16(y128[14],scale);
+      y128[15] = shiftright_int16(y128[15],scale);
+
+      y128+=16;
+    }
+ 
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else //__AVX2__
+void dft4096(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[512],ytmp[512],*tw4096_256p=(simd256_q15_t *)tw4096,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<512; i+=4,j++) {
+    transpose16_ooff_simd256(x256+i,xtmp+j,128);
+  }
+
+
+  dft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  dft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
+  dft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1);
+  dft1024((int16_t*)(xtmp+384),(int16_t*)(ytmp+384),1);
+
+  for (i=0; i<128; i++) {
+    bfly4_256(ytmpp,ytmpp+128,ytmpp+256,ytmpp+384,
+	      y256p,y256p+128,y256p+256,y256p+384,
+	      tw4096_256p,tw4096_256p+128,tw4096_256p+256);
+    tw4096_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<32; i++) {
+      y256[0]  = shiftright_int16_simd256(y256[0],1);
+      y256[1]  = shiftright_int16_simd256(y256[1],1);
+      y256[2]  = shiftright_int16_simd256(y256[2],1);
+      y256[3]  = shiftright_int16_simd256(y256[3],1);
+      y256[4]  = shiftright_int16_simd256(y256[4],1);
+      y256[5]  = shiftright_int16_simd256(y256[5],1);
+      y256[6]  = shiftright_int16_simd256(y256[6],1);
+      y256[7]  = shiftright_int16_simd256(y256[7],1);
+      y256[8]  = shiftright_int16_simd256(y256[8],1);
+      y256[9]  = shiftright_int16_simd256(y256[9],1);
+      y256[10] = shiftright_int16_simd256(y256[10],1);
+      y256[11] = shiftright_int16_simd256(y256[11],1);
+      y256[12] = shiftright_int16_simd256(y256[12],1);
+      y256[13] = shiftright_int16_simd256(y256[13],1);
+      y256[14] = shiftright_int16_simd256(y256[14],1);
+      y256[15] = shiftright_int16_simd256(y256[15],1);
+
+      y256+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft4096(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[512],ytmp[512],*tw4096_256p=(simd256_q15_t *)tw4096,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i,j;
+
+  for (i=0,j=0; i<512; i+=4,j++) {
+    transpose16_ooff_simd256(x256+i,xtmp+j,128);
+  }
+
+
+  idft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1);
+  idft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1);
+  idft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1);
+  idft1024((int16_t*)(xtmp+384),(int16_t*)(ytmp+384),1);
+
+  for (i=0; i<128; i++) {
+    ibfly4_256(ytmpp,ytmpp+128,ytmpp+256,ytmpp+384,
+	       y256p,y256p+128,y256p+256,y256p+384,
+	       tw4096_256p,tw4096_256p+128,tw4096_256p+256);
+    tw4096_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+
+    for (i=0; i<32; i++) {
+      y256[0]  = shiftright_int16_simd256(y256[0],1);
+      y256[1]  = shiftright_int16_simd256(y256[1],1);
+      y256[2]  = shiftright_int16_simd256(y256[2],1);
+      y256[3]  = shiftright_int16_simd256(y256[3],1);
+      y256[4]  = shiftright_int16_simd256(y256[4],1);
+      y256[5]  = shiftright_int16_simd256(y256[5],1);
+      y256[6]  = shiftright_int16_simd256(y256[6],1);
+      y256[7]  = shiftright_int16_simd256(y256[7],1);
+      y256[8]  = shiftright_int16_simd256(y256[8],1);
+      y256[9]  = shiftright_int16_simd256(y256[9],1);
+      y256[10] = shiftright_int16_simd256(y256[10],1);
+      y256[11] = shiftright_int16_simd256(y256[11],1);
+      y256[12] = shiftright_int16_simd256(y256[12],1);
+      y256[13] = shiftright_int16_simd256(y256[13],1);
+      y256[14] = shiftright_int16_simd256(y256[14],1);
+      y256[15] = shiftright_int16_simd256(y256[15],1);
+
+      y256+=16;
+    }
+
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#endif //__AVX2__
+
+
+int16_t tw8192[2*4096] __attribute__((aligned(32)));
+
+#ifndef __AVX2__
+void dft8192(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[4096],*xtmpp,*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[1024],*tw8192_128p=(simd_q15_t *)tw8192,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+  
+  xtmpp = xtmp;
+
+  for (i=0; i<64; i++) {
+    transpose4_ooff(x64  ,xtmpp,2048);
+    transpose4_ooff(x64+2,xtmpp+1,2048);
+    transpose4_ooff(x64+4,xtmpp+2,2048);
+    transpose4_ooff(x64+6,xtmpp+3,2048);
+    transpose4_ooff(x64+8,xtmpp+4,2048);
+    transpose4_ooff(x64+10,xtmpp+5,2048);
+    transpose4_ooff(x64+12,xtmpp+6,2048);
+    transpose4_ooff(x64+14,xtmpp+7,2048);
+    transpose4_ooff(x64+16,xtmpp+8,2048);
+    transpose4_ooff(x64+18,xtmpp+9,2048);
+    transpose4_ooff(x64+20,xtmpp+10,2048);
+    transpose4_ooff(x64+22,xtmpp+11,2048);
+    transpose4_ooff(x64+24,xtmpp+12,2048);
+    transpose4_ooff(x64+26,xtmpp+13,2048);
+    transpose4_ooff(x64+28,xtmpp+14,2048);
+    transpose4_ooff(x64+30,xtmpp+15,2048);
+    transpose4_ooff(x64+32,xtmpp+16,2048);
+    transpose4_ooff(x64+34,xtmpp+17,2048);
+    transpose4_ooff(x64+36,xtmpp+18,2048);
+    transpose4_ooff(x64+38,xtmpp+19,2048);
+    transpose4_ooff(x64+40,xtmpp+20,2048);
+    transpose4_ooff(x64+42,xtmpp+21,2048);
+    transpose4_ooff(x64+44,xtmpp+22,2048);
+    transpose4_ooff(x64+46,xtmpp+23,2048);
+    transpose4_ooff(x64+48,xtmpp+24,2048);
+    transpose4_ooff(x64+50,xtmpp+25,2048);
+    transpose4_ooff(x64+52,xtmpp+26,2048);
+    transpose4_ooff(x64+54,xtmpp+27,2048);
+    transpose4_ooff(x64+56,xtmpp+28,2048);
+    transpose4_ooff(x64+58,xtmpp+29,2048);
+    transpose4_ooff(x64+60,xtmpp+30,2048);
+    transpose4_ooff(x64+62,xtmpp+31,2048);
+    x64+=64;
+    xtmpp+=32;
+  }
+
+  dft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+1024),1);
+
+
+  for (i=0; i<1024; i++) {
+    bfly2(ytmpp,ytmpp+1024,
+          y128p,y128p+1024,
+          tw8192_128p);
+    tw8192_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y128p = y128;
+
+    for (i=0; i<128; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft8192(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simdshort_q15_t xtmp[4096],*xtmpp,*x64 = (simdshort_q15_t *)x;
+  simd_q15_t ytmp[2048],*tw8192_128p=(simd_q15_t *)tw8192,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y;
+  simd_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15);
+  
+  xtmpp = xtmp;
+
+  for (i=0; i<64; i++) {
+    transpose4_ooff(x64  ,xtmpp,2048);
+    transpose4_ooff(x64+2,xtmpp+1,2048);
+    transpose4_ooff(x64+4,xtmpp+2,2048);
+    transpose4_ooff(x64+6,xtmpp+3,2048);
+    transpose4_ooff(x64+8,xtmpp+4,2048);
+    transpose4_ooff(x64+10,xtmpp+5,2048);
+    transpose4_ooff(x64+12,xtmpp+6,2048);
+    transpose4_ooff(x64+14,xtmpp+7,2048);
+    transpose4_ooff(x64+16,xtmpp+8,2048);
+    transpose4_ooff(x64+18,xtmpp+9,2048);
+    transpose4_ooff(x64+20,xtmpp+10,2048);
+    transpose4_ooff(x64+22,xtmpp+11,2048);
+    transpose4_ooff(x64+24,xtmpp+12,2048);
+    transpose4_ooff(x64+26,xtmpp+13,2048);
+    transpose4_ooff(x64+28,xtmpp+14,2048);
+    transpose4_ooff(x64+30,xtmpp+15,2048);
+    transpose4_ooff(x64+32,xtmpp+16,2048);
+    transpose4_ooff(x64+34,xtmpp+17,2048);
+    transpose4_ooff(x64+36,xtmpp+18,2048);
+    transpose4_ooff(x64+38,xtmpp+19,2048);
+    transpose4_ooff(x64+40,xtmpp+20,2048);
+    transpose4_ooff(x64+42,xtmpp+21,2048);
+    transpose4_ooff(x64+44,xtmpp+22,2048);
+    transpose4_ooff(x64+46,xtmpp+23,2048);
+    transpose4_ooff(x64+48,xtmpp+24,2048);
+    transpose4_ooff(x64+50,xtmpp+25,2048);
+    transpose4_ooff(x64+52,xtmpp+26,2048);
+    transpose4_ooff(x64+54,xtmpp+27,2048);
+    transpose4_ooff(x64+56,xtmpp+28,2048);
+    transpose4_ooff(x64+58,xtmpp+29,2048);
+    transpose4_ooff(x64+60,xtmpp+30,2048);
+    transpose4_ooff(x64+62,xtmpp+31,2048);
+    x64+=64;
+    xtmpp+=32;
+  }
+
+  idft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+1024),1);
+
+
+  for (i=0; i<1024; i++) {
+    ibfly2(ytmpp,ytmpp+1024,
+           y128p,y128p+1024,
+           tw8192_128p);
+    tw8192_128p++;
+    y128p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y128p = y128;
+
+    for (i=0; i<128; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#else // __AVX2__
+void dft8192(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[1024],*xtmpp,*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[1024],*tw8192_256p=(simd256_q15_t *)tw8192,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+  
+  xtmpp = xtmp;
+
+  for (i=0; i<16; i++) {
+    transpose4_ooff_simd256(x256  ,xtmpp,512);
+    transpose4_ooff_simd256(x256+2,xtmpp+1,512);
+    transpose4_ooff_simd256(x256+4,xtmpp+2,512);
+    transpose4_ooff_simd256(x256+6,xtmpp+3,512);
+    transpose4_ooff_simd256(x256+8,xtmpp+4,512);
+    transpose4_ooff_simd256(x256+10,xtmpp+5,512);
+    transpose4_ooff_simd256(x256+12,xtmpp+6,512);
+    transpose4_ooff_simd256(x256+14,xtmpp+7,512);
+    transpose4_ooff_simd256(x256+16,xtmpp+8,512);
+    transpose4_ooff_simd256(x256+18,xtmpp+9,512);
+    transpose4_ooff_simd256(x256+20,xtmpp+10,512);
+    transpose4_ooff_simd256(x256+22,xtmpp+11,512);
+    transpose4_ooff_simd256(x256+24,xtmpp+12,512);
+    transpose4_ooff_simd256(x256+26,xtmpp+13,512);
+    transpose4_ooff_simd256(x256+28,xtmpp+14,512);
+    transpose4_ooff_simd256(x256+30,xtmpp+15,512);
+    transpose4_ooff_simd256(x256+32,xtmpp+16,512);
+    transpose4_ooff_simd256(x256+34,xtmpp+17,512);
+    transpose4_ooff_simd256(x256+36,xtmpp+18,512);
+    transpose4_ooff_simd256(x256+38,xtmpp+19,512);
+    transpose4_ooff_simd256(x256+40,xtmpp+20,512);
+    transpose4_ooff_simd256(x256+42,xtmpp+21,512);
+    transpose4_ooff_simd256(x256+44,xtmpp+22,512);
+    transpose4_ooff_simd256(x256+46,xtmpp+23,512);
+    transpose4_ooff_simd256(x256+48,xtmpp+24,512);
+    transpose4_ooff_simd256(x256+50,xtmpp+25,512);
+    transpose4_ooff_simd256(x256+52,xtmpp+26,512);
+    transpose4_ooff_simd256(x256+54,xtmpp+27,512);
+    transpose4_ooff_simd256(x256+56,xtmpp+28,512);
+    transpose4_ooff_simd256(x256+58,xtmpp+29,512);
+    transpose4_ooff_simd256(x256+60,xtmpp+30,512);
+    transpose4_ooff_simd256(x256+62,xtmpp+31,512);
+    x256+=64;
+    xtmpp+=32;
+  }
+
+  dft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  dft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
+
+
+  for (i=0; i<512; i++) {
+    bfly2_256(ytmpp,ytmpp+512,
+	      y256p,y256p+512,
+	      tw8192_256p);
+    tw8192_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y256p = y256;
+
+    for (i=0; i<64; i++) {
+      y256p[0]  = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
+      y256p[1]  = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
+      y256p[2]  = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
+      y256p[3]  = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
+      y256p[4]  = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
+      y256p[5]  = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
+      y256p[6]  = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
+      y256p[7]  = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
+      y256p[8]  = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
+      y256p[9]  = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
+      y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
+      y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
+      y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
+      y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
+      y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
+      y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
+      y256p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft8192(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd256_q15_t xtmp[1024],*xtmpp,*x256 = (simd256_q15_t *)x;
+  simd256_q15_t ytmp[1024],*tw8192_256p=(simd256_q15_t *)tw8192,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y;
+  simd256_q15_t *ytmpp = &ytmp[0];
+  int i;
+  simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15);
+  
+  xtmpp = xtmp;
+
+  for (i=0; i<16; i++) {
+    transpose4_ooff_simd256(x256  ,xtmpp,512);
+    transpose4_ooff_simd256(x256+2,xtmpp+1,512);
+    transpose4_ooff_simd256(x256+4,xtmpp+2,512);
+    transpose4_ooff_simd256(x256+6,xtmpp+3,512);
+    transpose4_ooff_simd256(x256+8,xtmpp+4,512);
+    transpose4_ooff_simd256(x256+10,xtmpp+5,512);
+    transpose4_ooff_simd256(x256+12,xtmpp+6,512);
+    transpose4_ooff_simd256(x256+14,xtmpp+7,512);
+    transpose4_ooff_simd256(x256+16,xtmpp+8,512);
+    transpose4_ooff_simd256(x256+18,xtmpp+9,512);
+    transpose4_ooff_simd256(x256+20,xtmpp+10,512);
+    transpose4_ooff_simd256(x256+22,xtmpp+11,512);
+    transpose4_ooff_simd256(x256+24,xtmpp+12,512);
+    transpose4_ooff_simd256(x256+26,xtmpp+13,512);
+    transpose4_ooff_simd256(x256+28,xtmpp+14,512);
+    transpose4_ooff_simd256(x256+30,xtmpp+15,512);
+    transpose4_ooff_simd256(x256+32,xtmpp+16,512);
+    transpose4_ooff_simd256(x256+34,xtmpp+17,512);
+    transpose4_ooff_simd256(x256+36,xtmpp+18,512);
+    transpose4_ooff_simd256(x256+38,xtmpp+19,512);
+    transpose4_ooff_simd256(x256+40,xtmpp+20,512);
+    transpose4_ooff_simd256(x256+42,xtmpp+21,512);
+    transpose4_ooff_simd256(x256+44,xtmpp+22,512);
+    transpose4_ooff_simd256(x256+46,xtmpp+23,512);
+    transpose4_ooff_simd256(x256+48,xtmpp+24,512);
+    transpose4_ooff_simd256(x256+50,xtmpp+25,512);
+    transpose4_ooff_simd256(x256+52,xtmpp+26,512);
+    transpose4_ooff_simd256(x256+54,xtmpp+27,512);
+    transpose4_ooff_simd256(x256+56,xtmpp+28,512);
+    transpose4_ooff_simd256(x256+58,xtmpp+29,512);
+    transpose4_ooff_simd256(x256+60,xtmpp+30,512);
+    transpose4_ooff_simd256(x256+62,xtmpp+31,512);
+    x256+=64;
+    xtmpp+=32;
+  }
+
+  idft4096((int16_t*)(xtmp),(int16_t*)ytmp,1);
+  idft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1);
+
+
+  for (i=0; i<512; i++) {
+    ibfly2_256(ytmpp,ytmpp+512,
+	       y256p,y256p+512,
+	       tw8192_256p);
+    tw8192_256p++;
+    y256p++;
+    ytmpp++;
+  }
+
+  if (scale>0) {
+    y256p = y256;
+
+    for (i=0; i<64; i++) {
+      y256p[0]  = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128);
+      y256p[1]  = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128);
+      y256p[2]  = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128);
+      y256p[3]  = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128);
+      y256p[4]  = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128);
+      y256p[5]  = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128);
+      y256p[6]  = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128);
+      y256p[7]  = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128);
+      y256p[8]  = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128);
+      y256p[9]  = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128);
+      y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128);
+      y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128);
+      y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128);
+      y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128);
+      y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128);
+      y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128);
+      y256p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+
+#endif
+
+int16_t twa1536[1024],twb1536[1024];
+
+// 512 x 3
+void idft1536(int16_t *input, int16_t *output, unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][512 ]__attribute__((aligned(32)));
+  uint32_t tmpo[3][512] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<512; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  idft512((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  idft512((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  idft512((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  for (i=0,i2=0; i<1024; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+1024+i),(simd_q15_t*)(output+2048+i),
+          (simd_q15_t*)(twa1536+i),(simd_q15_t*)(twb1536+i));
+  }
+
+
+  if (scale==1) {
+    for (i=0; i<24; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void dft1536(int16_t *input, int16_t *output, unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][512] __attribute__((aligned(32)));
+  uint32_t tmpo[3][512] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<512; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  dft512((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  dft512((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  dft512((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  /*
+  for (i=1; i<512; i++) {
+    tmpo[0][i] = tmpo[0][i<<1];
+    tmpo[1][i] = tmpo[1][i<<1];
+    tmpo[2][i] = tmpo[2][i<<1];
+    }*/
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("dft1536out0.m","o0",tmpo[0],2048,1,1);
+    LOG_M("dft1536out1.m","o1",tmpo[1],2048,1,1);
+    LOG_M("dft1536out2.m","o2",tmpo[2],2048,1,1);
+  }
+  for (i=0,i2=0; i<1024; i+=8,i2+=4)  {
+    bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+1024+i),(simd_q15_t*)(output+2048+i),
+          (simd_q15_t*)(twa1536+i),(simd_q15_t*)(twb1536+i));
+  }
+
+  if (scale==1) {
+    for (i=0; i<24; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+int16_t twa3072[2048] __attribute__((aligned(32)));
+int16_t twb3072[2048] __attribute__((aligned(32)));
+// 1024 x 3
+void dft3072(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][1024] __attribute__((aligned(32)));
+  uint32_t tmpo[3][1024] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<1024; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  dft1024((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  dft1024((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  dft1024((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  for (i=0,i2=0; i<2048; i+=8,i2+=4)  {
+    bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+2048+i),(simd_q15_t*)(output+4096+i),
+          (simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i));
+  }
+
+  if (scale==1) {
+    for (i=0; i<48; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+}
+
+void idft3072(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][1024]__attribute__((aligned(32)));
+  uint32_t tmpo[3][1024] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<1024; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+  idft1024((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  idft1024((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  idft1024((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  for (i=0,i2=0; i<2048; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+2048+i),(simd_q15_t*)(output+4096+i),
+          (simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i));
+  }
+
+
+  if (scale==1) {
+    for (i=0; i<48; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+}
+
+
+int16_t twa6144[4096] __attribute__((aligned(32)));
+int16_t twb6144[4096] __attribute__((aligned(32)));
+
+void idft6144(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][2048] __attribute__((aligned(32)));
+  uint32_t tmpo[3][2048] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<2048; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  idft2048((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  idft2048((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  idft2048((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("idft6144in.m","in",input,6144,1,1);
+    LOG_M("idft6144out0.m","o0",tmpo[0],2048,1,1);
+    LOG_M("idft6144out1.m","o1",tmpo[1],2048,1,1);
+    LOG_M("idft6144out2.m","o2",tmpo[2],2048,1,1);
+  }
+
+  for (i=0,i2=0; i<4096; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
+	   (simd_q15_t*)(output+i),(simd_q15_t*)(output+4096+i),(simd_q15_t*)(output+8192+i),
+	   (simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i));
+  }
+
+
+  if (scale==1) {
+    for (i=0; i<96; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+
+void dft6144(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][2048] __attribute__((aligned(32)));
+  uint32_t tmpo[3][2048] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<2048; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  dft2048((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  dft2048((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  dft2048((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+
+  /*
+  for (i=1; i<2048; i++) {
+    tmpo[0][i] = tmpo[0][i<<1];
+    tmpo[1][i] = tmpo[1][i<<1];
+    tmpo[2][i] = tmpo[2][i<<1];
+    }*/
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("ft6144out0.m","o0",tmpo[0],2048,1,1);
+    LOG_M("ft6144out1.m","o1",tmpo[1],2048,1,1);
+    LOG_M("ft6144out2.m","o2",tmpo[2],2048,1,1);
+  }
+  for (i=0,i2=0; i<4096; i+=8,i2+=4)  {
+    bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+4096+i),(simd_q15_t*)(output+8192+i),
+          (simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i));
+  }
+
+  if (scale==1) {
+    for (i=0; i<96; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+  _mm_empty();
+  _m_empty();
+
+}
+
+int16_t twa9216[6144] __attribute__((aligned(32)));
+int16_t twb9216[6144] __attribute__((aligned(32)));
+// 3072 x 3
+void dft9216(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+void idft9216(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+int16_t twa12288[8192] __attribute__((aligned(32)));
+int16_t twb12288[8192] __attribute__((aligned(32)));
+// 4096 x 3
+void dft12288(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][4096] __attribute__((aligned(32)));
+  uint32_t tmpo[3][4096] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<4096; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  dft4096((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale);
+  dft4096((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale);
+  dft4096((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale);
+  /*
+  for (i=1; i<4096; i++) {
+    tmpo[0][i] = tmpo[0][i<<1];
+    tmpo[1][i] = tmpo[1][i<<1];
+    tmpo[2][i] = tmpo[2][i<<1];
+    }*/
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("dft12288out0.m","o0",tmpo[0],4096,1,1);
+    LOG_M("dft12288out1.m","o1",tmpo[1],4096,1,1);
+    LOG_M("dft12288out2.m","o2",tmpo[2],4096,1,1);
+  }
+  for (i=0,i2=0; i<8192; i+=8,i2+=4)  {
+    bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+8192+i),(simd_q15_t*)(output+16384+i),
+          (simd_q15_t*)(twa12288+i),(simd_q15_t*)(twb12288+i));
+  }
+
+  if (scale==1) {
+    for (i=0; i<192; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+  _mm_empty();
+  _m_empty();
+
+}
+
+void idft12288(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][4096] __attribute__((aligned(32)));
+  uint32_t tmpo[3][4096] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<4096; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+
+
+  idft4096((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale);
+  idft4096((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale);
+  idft4096((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale);
+
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("idft12288in.m","in",input,12288,1,1);
+    LOG_M("idft12288out0.m","o0",tmpo[0],4096,1,1);
+    LOG_M("idft12288out1.m","o1",tmpo[1],4096,1,1);
+    LOG_M("idft12288out2.m","o2",tmpo[2],4096,1,1);
+  }
+
+  for (i=0,i2=0; i<8192; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+8192+i),(simd_q15_t*)(output+16384+i),
+          (simd_q15_t*)(twa12288+i),(simd_q15_t*)(twb12288+i));
+  }
+
+  if (scale==1) {
+    for (i=0; i<192; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+  _mm_empty();
+  _m_empty();
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+     LOG_M("idft12288out.m","out",output,6144,1,1);
+  }
+}
+
+int16_t twa18432[12288] __attribute__((aligned(32)));
+int16_t twb18432[12288] __attribute__((aligned(32)));
+// 6144 x 3
+void dft18432(int16_t *input, int16_t *output,unsigned char scale) {
+
+  int i,i2,j;
+  uint32_t tmp[3][6144] __attribute__((aligned(32)));
+  uint32_t tmpo[3][6144] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<6144; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  dft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale);
+  dft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale);
+  dft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale);
+
+  for (i=0,i2=0; i<12288; i+=8,i2+=4)  {
+    bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+12288+i),(simd_q15_t*)(output+24576+i),
+          (simd_q15_t*)(twa18432+i),(simd_q15_t*)(twb18432+i));
+  }
+  if (scale==1) {
+    for (i=0; i<288; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+void idft18432(int16_t *input, int16_t *output,unsigned char scale) {
+
+  int i,i2,j;
+  uint32_t tmp[3][6144] __attribute__((aligned(32)));
+  uint32_t tmpo[3][6144] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<6144; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  idft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale);
+  idft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale);
+  idft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale);
+
+  for (i=0,i2=0; i<12288; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+	   (simd_q15_t*)(output+i),(simd_q15_t*)(output+12288+i),(simd_q15_t*)(output+24576+i),
+	   (simd_q15_t*)(twa18432+i),(simd_q15_t*)(twb18432+i));
+  }
+  if (scale==1) {
+    for (i=0; i<288; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+
+int16_t twa24576[16384] __attribute__((aligned(32)));
+int16_t twb24576[16384] __attribute__((aligned(32)));
+// 8192 x 3
+void dft24576(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][8192] __attribute__((aligned(32)));
+  uint32_t tmpo[3][8192] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<8192; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  dft8192((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  dft8192((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  dft8192((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+  /*
+  for (i=1; i<8192; i++) {
+    tmpo[0][i] = tmpo[0][i<<1];
+    tmpo[1][i] = tmpo[1][i<<1];
+    tmpo[2][i] = tmpo[2][i<<1];
+    }*/
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("dft24576out0.m","o0",tmpo[0],8192,1,1);
+    LOG_M("dft24576out1.m","o1",tmpo[1],8192,1,1);
+    LOG_M("dft24576out2.m","o2",tmpo[2],8192,1,1);
+  }
+  for (i=0,i2=0; i<16384; i+=8,i2+=4)  {
+    bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+16384+i),(simd_q15_t*)(output+32768+i),
+          (simd_q15_t*)(twa24576+i),(simd_q15_t*)(twb24576+i));
+  }
+
+
+  if (scale==1) {
+    for (i=0; i<384; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+  _mm_empty();
+  _m_empty();
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+     LOG_M("out.m","out",output,24576,1,1);
+  }
+}
+
+void idft24576(int16_t *input, int16_t *output,unsigned char scale)
+{
+  int i,i2,j;
+  uint32_t tmp[3][8192] __attribute__((aligned(32)));
+  uint32_t tmpo[3][8192] __attribute__((aligned(32)));
+  simd_q15_t *y128p=(simd_q15_t*)output;
+  simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15);
+
+  for (i=0,j=0; i<8192; i++) {
+    tmp[0][i] = ((uint32_t *)input)[j++];
+    tmp[1][i] = ((uint32_t *)input)[j++];
+    tmp[2][i] = ((uint32_t *)input)[j++];
+  }
+
+  idft8192((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1);
+  idft8192((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1);
+  idft8192((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1);
+  
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("idft24576in.m","in",input,24576,1,1);
+    LOG_M("idft24576out0.m","o0",tmpo[0],8192,1,1);
+    LOG_M("idft24576out1.m","o1",tmpo[1],8192,1,1);
+    LOG_M("idft24576out2.m","o2",tmpo[2],8192,1,1);
+  }
+
+  for (i=0,i2=0; i<16384; i+=8,i2+=4)  {
+    ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]),
+          (simd_q15_t*)(output+i),(simd_q15_t*)(output+16384+i),(simd_q15_t*)(output+32768+i),
+          (simd_q15_t*)(twa24576+i),(simd_q15_t*)(twb24576+i));
+  }
+  if (scale==1) {
+    for (i=0; i<384; i++) {
+      y128p[0]  = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128);
+      y128p[1]  = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128);
+      y128p[2]  = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128);
+      y128p[3]  = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128);
+      y128p[4]  = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128);
+      y128p[5]  = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128);
+      y128p[6]  = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128);
+      y128p[7]  = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128);
+      y128p[8]  = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128);
+      y128p[9]  = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128);
+      y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128);
+      y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128);
+      y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128);
+      y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128);
+      y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128);
+      y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128);
+      y128p+=16;
+    }
+  }
+  _mm_empty();
+  _m_empty();
+
+  if (LOG_DUMPFLAG(DEBUG_DFT)) {
+    LOG_M("idft24576out.m","out",output,24576,1,1);
+  }
+}
+
+int16_t twa36864[24576] __attribute__((aligned(32)));
+int16_t twb36884[24576] __attribute__((aligned(32)));
+// 12288 x 3
+void dft36864(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+void idft36864(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+int16_t twa49152[32768] __attribute__((aligned(32)));
+int16_t twb49152[32768] __attribute__((aligned(32)));
+// 16384 x 3
+void dft49152(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+void idft49152(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+int16_t twa73728[49152] __attribute__((aligned(32)));
+int16_t twb73728[49152] __attribute__((aligned(32)));
+// 24576 x 3
+void dft73728(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+void idft73728(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+
+int16_t twa98304[49152] __attribute__((aligned(32)));
+int16_t twb98304[49152] __attribute__((aligned(32)));
+// 32768 x 3
+void dft98304(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+void idft98304(int16_t *input, int16_t *output,uint8_t scale) {
+
+  AssertFatal(1==0,"Need to do this ..\n");
+}
+
+ 
+///  THIS SECTION IS FOR ALL PUSCH DFTS (i.e. radix 2^a * 3^b * 4^c * 5^d)
+///  They use twiddles for 4-way parallel DFTS (i.e. 4 DFTS with interleaved input/output)
+
+static int16_t W1_12s[8]__attribute__((aligned(32))) = {28377,-16383,28377,-16383,28377,-16383,28377,-16383};
+static int16_t W2_12s[8]__attribute__((aligned(32))) = {16383,-28377,16383,-28377,16383,-28377,16383,-28377};
+static int16_t W3_12s[8]__attribute__((aligned(32))) = {0,-32767,0,-32767,0,-32767,0,-32767};
+static int16_t W4_12s[8]__attribute__((aligned(32))) = {-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377};
+static int16_t W6_12s[8]__attribute__((aligned(32))) = {-32767,0,-32767,0,-32767,0,-32767,0};
+
+simd_q15_t *W1_12=(simd_q15_t *)W1_12s;
+simd_q15_t *W2_12=(simd_q15_t *)W2_12s;
+simd_q15_t *W3_12=(simd_q15_t *)W3_12s;
+simd_q15_t *W4_12=(simd_q15_t *)W4_12s;
+simd_q15_t *W6_12=(simd_q15_t *)W6_12s;
+
+
+static simd_q15_t norm128;
+
+static inline void dft12f(simd_q15_t *x0,
+                          simd_q15_t *x1,
+                          simd_q15_t *x2,
+                          simd_q15_t *x3,
+                          simd_q15_t *x4,
+                          simd_q15_t *x5,
+                          simd_q15_t *x6,
+                          simd_q15_t *x7,
+                          simd_q15_t *x8,
+                          simd_q15_t *x9,
+                          simd_q15_t *x10,
+                          simd_q15_t *x11,
+                          simd_q15_t *y0,
+                          simd_q15_t *y1,
+                          simd_q15_t *y2,
+                          simd_q15_t *y3,
+                          simd_q15_t *y4,
+                          simd_q15_t *y5,
+                          simd_q15_t *y6,
+                          simd_q15_t *y7,
+                          simd_q15_t *y8,
+                          simd_q15_t *y9,
+                          simd_q15_t *y10,
+                          simd_q15_t *y11) __attribute__((always_inline));
+
+static inline void dft12f(simd_q15_t *x0,
+                          simd_q15_t *x1,
+                          simd_q15_t *x2,
+                          simd_q15_t *x3,
+                          simd_q15_t *x4,
+                          simd_q15_t *x5,
+                          simd_q15_t *x6,
+                          simd_q15_t *x7,
+                          simd_q15_t *x8,
+                          simd_q15_t *x9,
+                          simd_q15_t *x10,
+                          simd_q15_t *x11,
+                          simd_q15_t *y0,
+                          simd_q15_t *y1,
+                          simd_q15_t *y2,
+                          simd_q15_t *y3,
+                          simd_q15_t *y4,
+                          simd_q15_t *y5,
+                          simd_q15_t *y6,
+                          simd_q15_t *y7,
+                          simd_q15_t *y8,
+                          simd_q15_t *y9,
+                          simd_q15_t *y10,
+                          simd_q15_t *y11)
+{
+
+
+  simd_q15_t tmp_dft12[12];
+
+  simd_q15_t *tmp_dft12_ptr = &tmp_dft12[0];
+
+  // msg("dft12\n");
+
+  bfly4_tw1(x0,
+            x3,
+            x6,
+            x9,
+            tmp_dft12_ptr,
+            tmp_dft12_ptr+3,
+            tmp_dft12_ptr+6,
+            tmp_dft12_ptr+9);
+
+
+  bfly4_tw1(x1,
+            x4,
+            x7,
+            x10,
+            tmp_dft12_ptr+1,
+            tmp_dft12_ptr+4,
+            tmp_dft12_ptr+7,
+            tmp_dft12_ptr+10);
+
+
+  bfly4_tw1(x2,
+            x5,
+            x8,
+            x11,
+            tmp_dft12_ptr+2,
+            tmp_dft12_ptr+5,
+            tmp_dft12_ptr+8,
+            tmp_dft12_ptr+11);
+
+  //  k2=0;
+  bfly3_tw1(tmp_dft12_ptr,
+            tmp_dft12_ptr+1,
+            tmp_dft12_ptr+2,
+            y0,
+            y4,
+            y8);
+
+
+
+  //  k2=1;
+  bfly3(tmp_dft12_ptr+3,
+        tmp_dft12_ptr+4,
+        tmp_dft12_ptr+5,
+        y1,
+        y5,
+        y9,
+        W1_12,
+        W2_12);
+
+
+
+  //  k2=2;
+  bfly3(tmp_dft12_ptr+6,
+        tmp_dft12_ptr+7,
+        tmp_dft12_ptr+8,
+        y2,
+        y6,
+        y10,
+        W2_12,
+        W4_12);
+
+  //  k2=3;
+  bfly3(tmp_dft12_ptr+9,
+        tmp_dft12_ptr+10,
+        tmp_dft12_ptr+11,
+        y3,
+        y7,
+        y11,
+        W3_12,
+        W6_12);
+
+}
+
+
+
+
+void dft12(int16_t *x,int16_t *y ,unsigned char scale_flag)
+{
+
+  simd_q15_t *x128 = (simd_q15_t *)x,*y128 = (simd_q15_t *)y;
+  dft12f(&x128[0],
+         &x128[1],
+         &x128[2],
+         &x128[3],
+         &x128[4],
+         &x128[5],
+         &x128[6],
+         &x128[7],
+         &x128[8],
+         &x128[9],
+         &x128[10],
+         &x128[11],
+         &y128[0],
+         &y128[1],
+         &y128[2],
+         &y128[3],
+         &y128[4],
+         &y128[5],
+         &y128[6],
+         &y128[7],
+         &y128[8],
+         &y128[9],
+         &y128[10],
+         &y128[11]);
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+#ifdef __AVX2__
+
+static int16_t W1_12s_256[16]__attribute__((aligned(32))) = {28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383};
+static int16_t W2_12s_256[16]__attribute__((aligned(32))) = {16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377};
+static int16_t W3_12s_256[16]__attribute__((aligned(32))) = {0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767};
+static int16_t W4_12s_256[16]__attribute__((aligned(32))) = {-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377};
+static int16_t W6_12s_256[16]__attribute__((aligned(32))) = {-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0};
+
+simd256_q15_t *W1_12_256=(simd256_q15_t *)W1_12s_256;
+simd256_q15_t *W2_12_256=(simd256_q15_t *)W2_12s_256;
+simd256_q15_t *W3_12_256=(simd256_q15_t *)W3_12s_256;
+simd256_q15_t *W4_12_256=(simd256_q15_t *)W4_12s_256;
+simd256_q15_t *W6_12_256=(simd256_q15_t *)W6_12s_256;
+
+
+
+static inline void dft12f_simd256(simd256_q15_t *x0,
+				  simd256_q15_t *x1,
+				  simd256_q15_t *x2,
+				  simd256_q15_t *x3,
+				  simd256_q15_t *x4,
+				  simd256_q15_t *x5,
+				  simd256_q15_t *x6,
+				  simd256_q15_t *x7,
+				  simd256_q15_t *x8,
+				  simd256_q15_t *x9,
+				  simd256_q15_t *x10,
+				  simd256_q15_t *x11,
+				  simd256_q15_t *y0,
+				  simd256_q15_t *y1,
+				  simd256_q15_t *y2,
+				  simd256_q15_t *y3,
+				  simd256_q15_t *y4,
+				  simd256_q15_t *y5,
+				  simd256_q15_t *y6,
+				  simd256_q15_t *y7,
+				  simd256_q15_t *y8,
+				  simd256_q15_t *y9,
+				  simd256_q15_t *y10,
+				  simd256_q15_t *y11) __attribute__((always_inline));
+
+static inline void dft12f_simd256(simd256_q15_t *x0,
+				  simd256_q15_t *x1,
+				  simd256_q15_t *x2,
+				  simd256_q15_t *x3,
+				  simd256_q15_t *x4,
+				  simd256_q15_t *x5,
+				  simd256_q15_t *x6,
+				  simd256_q15_t *x7,
+				  simd256_q15_t *x8,
+				  simd256_q15_t *x9,
+				  simd256_q15_t *x10,
+				  simd256_q15_t *x11,
+				  simd256_q15_t *y0,
+				  simd256_q15_t *y1,
+				  simd256_q15_t *y2,
+				  simd256_q15_t *y3,
+				  simd256_q15_t *y4,
+				  simd256_q15_t *y5,
+				  simd256_q15_t *y6,
+				  simd256_q15_t *y7,
+				  simd256_q15_t *y8,
+				  simd256_q15_t *y9,
+				  simd256_q15_t *y10,
+				  simd256_q15_t *y11)
+{
+
+
+  simd256_q15_t tmp_dft12[12];
+
+  simd256_q15_t *tmp_dft12_ptr = &tmp_dft12[0];
+
+  // msg("dft12\n");
+
+  bfly4_tw1_256(x0,
+		x3,
+		x6,
+		x9,
+		tmp_dft12_ptr,
+		tmp_dft12_ptr+3,
+		tmp_dft12_ptr+6,
+		tmp_dft12_ptr+9);
+
+
+  bfly4_tw1_256(x1,
+		x4,
+		x7,
+		x10,
+		tmp_dft12_ptr+1,
+		tmp_dft12_ptr+4,
+		tmp_dft12_ptr+7,
+		tmp_dft12_ptr+10);
+  
+
+  bfly4_tw1_256(x2,
+		x5,
+		x8,
+		x11,
+		tmp_dft12_ptr+2,
+		tmp_dft12_ptr+5,
+		tmp_dft12_ptr+8,
+		tmp_dft12_ptr+11);
+  
+  //  k2=0;
+  bfly3_tw1_256(tmp_dft12_ptr,
+		tmp_dft12_ptr+1,
+		tmp_dft12_ptr+2,
+		y0,
+		y4,
+		y8);
+  
+  
+  
+  //  k2=1;
+  bfly3_256(tmp_dft12_ptr+3,
+	    tmp_dft12_ptr+4,
+	    tmp_dft12_ptr+5,
+	    y1,
+	    y5,
+	    y9,
+	    W1_12_256,
+	    W2_12_256);
+  
+  
+  
+  //  k2=2;
+  bfly3_256(tmp_dft12_ptr+6,
+	    tmp_dft12_ptr+7,
+	    tmp_dft12_ptr+8,
+	    y2,
+	    y6,
+	    y10,
+	    W2_12_256,
+	    W4_12_256);
+  
+  //  k2=3;
+  bfly3_256(tmp_dft12_ptr+9,
+	    tmp_dft12_ptr+10,
+	    tmp_dft12_ptr+11,
+	    y3,
+	    y7,
+	    y11,
+	    W3_12_256,
+	    W6_12_256);
+  
+}
+
+
+
+
+void dft12_simd256(int16_t *x,int16_t *y)
+{
+
+  simd256_q15_t *x256 = (simd256_q15_t *)x,*y256 = (simd256_q15_t *)y;
+  dft12f_simd256(&x256[0],
+		 &x256[1],
+		 &x256[2],
+		 &x256[3],
+		 &x256[4],
+		 &x256[5],
+		 &x256[6],
+		 &x256[7],
+		 &x256[8],
+		 &x256[9],
+		 &x256[10],
+		 &x256[11],
+		 &y256[0],
+		 &y256[1],
+		 &y256[2],
+		 &y256[3],
+		 &y256[4],
+		 &y256[5],
+		 &y256[6],
+		 &y256[7],
+		 &y256[8],
+		 &y256[9],
+		 &y256[10],
+		 &y256[11]);
+  
+  _mm_empty();
+  _m_empty();
+
+}
+
+#endif
+
+static int16_t tw24[88]__attribute__((aligned(32)));
+
+void dft24(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *tw128=(simd_q15_t *)&tw24[0];
+  simd_q15_t ytmp128[24];//=&ytmp128array[0];
+  int i,j,k;
+
+  //  msg("dft24\n");
+  dft12f(x128,
+         x128+2,
+         x128+4,
+         x128+6,
+         x128+8,
+         x128+10,
+         x128+12,
+         x128+14,
+         x128+16,
+         x128+18,
+         x128+20,
+         x128+22,
+         ytmp128,
+         ytmp128+2,
+         ytmp128+4,
+         ytmp128+6,
+         ytmp128+8,
+         ytmp128+10,
+         ytmp128+12,
+         ytmp128+14,
+         ytmp128+16,
+         ytmp128+18,
+         ytmp128+20,
+         ytmp128+22);
+  //  msg("dft24b\n");
+
+  dft12f(x128+1,
+         x128+3,
+         x128+5,
+         x128+7,
+         x128+9,
+         x128+11,
+         x128+13,
+         x128+15,
+         x128+17,
+         x128+19,
+         x128+21,
+         x128+23,
+         ytmp128+1,
+         ytmp128+3,
+         ytmp128+5,
+         ytmp128+7,
+         ytmp128+9,
+         ytmp128+11,
+         ytmp128+13,
+         ytmp128+15,
+         ytmp128+17,
+         ytmp128+19,
+         ytmp128+21,
+         ytmp128+23);
+
+  //  msg("dft24c\n");
+
+  bfly2_tw1(ytmp128,
+            ytmp128+1,
+            y128,
+            y128+12);
+
+  //  msg("dft24d\n");
+
+  for (i=2,j=1,k=0; i<24; i+=2,j++,k++) {
+
+    bfly2(ytmp128+i,
+          ytmp128+i+1,
+          y128+j,
+          y128+j+12,
+          tw128+k);
+    //    msg("dft24e\n");
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[1]);
+
+    for (i=0; i<24; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa36[88]__attribute__((aligned(32)));
+static int16_t twb36[88]__attribute__((aligned(32)));
+
+void dft36(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa36[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb36[0];
+  simd_q15_t ytmp128[36];//&ytmp128array[0];
+
+
+  int i,j,k;
+
+  dft12f(x128,
+         x128+3,
+         x128+6,
+         x128+9,
+         x128+12,
+         x128+15,
+         x128+18,
+         x128+21,
+         x128+24,
+         x128+27,
+         x128+30,
+         x128+33,
+         ytmp128,
+         ytmp128+3,
+         ytmp128+6,
+         ytmp128+9,
+         ytmp128+12,
+         ytmp128+15,
+         ytmp128+18,
+         ytmp128+21,
+         ytmp128+24,
+         ytmp128+27,
+         ytmp128+30,
+         ytmp128+33);
+
+  dft12f(x128+1,
+         x128+4,
+         x128+7,
+         x128+10,
+         x128+13,
+         x128+16,
+         x128+19,
+         x128+22,
+         x128+25,
+         x128+28,
+         x128+31,
+         x128+34,
+         ytmp128+1,
+         ytmp128+4,
+         ytmp128+7,
+         ytmp128+10,
+         ytmp128+13,
+         ytmp128+16,
+         ytmp128+19,
+         ytmp128+22,
+         ytmp128+25,
+         ytmp128+28,
+         ytmp128+31,
+         ytmp128+34);
+
+  dft12f(x128+2,
+         x128+5,
+         x128+8,
+         x128+11,
+         x128+14,
+         x128+17,
+         x128+20,
+         x128+23,
+         x128+26,
+         x128+29,
+         x128+32,
+         x128+35,
+         ytmp128+2,
+         ytmp128+5,
+         ytmp128+8,
+         ytmp128+11,
+         ytmp128+14,
+         ytmp128+17,
+         ytmp128+20,
+         ytmp128+23,
+         ytmp128+26,
+         ytmp128+29,
+         ytmp128+32,
+         ytmp128+35);
+
+
+  bfly3_tw1(ytmp128,
+            ytmp128+1,
+            ytmp128+2,
+            y128,
+            y128+12,
+            y128+24);
+
+  for (i=3,j=1,k=0; i<36; i+=3,j++,k++) {
+
+    bfly3(ytmp128+i,
+          ytmp128+i+1,
+          ytmp128+i+2,
+          y128+j,
+          y128+j+12,
+          y128+j+24,
+          twa128+k,
+          twb128+k);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[2]);
+
+    for (i=0; i<36; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa48[88]__attribute__((aligned(32)));
+static int16_t twb48[88]__attribute__((aligned(32)));
+static int16_t twc48[88]__attribute__((aligned(32)));
+
+void dft48(int16_t *x, int16_t *y,unsigned char scale_flag)
+{
+
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa48[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb48[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc48[0];
+  simd_q15_t ytmp128[48];//=&ytmp128array[0];
+  int i,j,k;
+
+
+  dft12f(x128,
+         x128+4,
+         x128+8,
+         x128+12,
+         x128+16,
+         x128+20,
+         x128+24,
+         x128+28,
+         x128+32,
+         x128+36,
+         x128+40,
+         x128+44,
+         ytmp128,
+         ytmp128+4,
+         ytmp128+8,
+         ytmp128+12,
+         ytmp128+16,
+         ytmp128+20,
+         ytmp128+24,
+         ytmp128+28,
+         ytmp128+32,
+         ytmp128+36,
+         ytmp128+40,
+         ytmp128+44);
+
+
+  dft12f(x128+1,
+         x128+5,
+         x128+9,
+         x128+13,
+         x128+17,
+         x128+21,
+         x128+25,
+         x128+29,
+         x128+33,
+         x128+37,
+         x128+41,
+         x128+45,
+         ytmp128+1,
+         ytmp128+5,
+         ytmp128+9,
+         ytmp128+13,
+         ytmp128+17,
+         ytmp128+21,
+         ytmp128+25,
+         ytmp128+29,
+         ytmp128+33,
+         ytmp128+37,
+         ytmp128+41,
+         ytmp128+45);
+
+
+  dft12f(x128+2,
+         x128+6,
+         x128+10,
+         x128+14,
+         x128+18,
+         x128+22,
+         x128+26,
+         x128+30,
+         x128+34,
+         x128+38,
+         x128+42,
+         x128+46,
+         ytmp128+2,
+         ytmp128+6,
+         ytmp128+10,
+         ytmp128+14,
+         ytmp128+18,
+         ytmp128+22,
+         ytmp128+26,
+         ytmp128+30,
+         ytmp128+34,
+         ytmp128+38,
+         ytmp128+42,
+         ytmp128+46);
+
+
+  dft12f(x128+3,
+         x128+7,
+         x128+11,
+         x128+15,
+         x128+19,
+         x128+23,
+         x128+27,
+         x128+31,
+         x128+35,
+         x128+39,
+         x128+43,
+         x128+47,
+         ytmp128+3,
+         ytmp128+7,
+         ytmp128+11,
+         ytmp128+15,
+         ytmp128+19,
+         ytmp128+23,
+         ytmp128+27,
+         ytmp128+31,
+         ytmp128+35,
+         ytmp128+39,
+         ytmp128+43,
+         ytmp128+47);
+
+
+
+  bfly4_tw1(ytmp128,
+            ytmp128+1,
+            ytmp128+2,
+            ytmp128+3,
+            y128,
+            y128+12,
+            y128+24,
+            y128+36);
+
+
+
+  for (i=4,j=1,k=0; i<48; i+=4,j++,k++) {
+
+    bfly4(ytmp128+i,
+          ytmp128+i+1,
+          ytmp128+i+2,
+          ytmp128+i+3,
+          y128+j,
+          y128+j+12,
+          y128+j+24,
+          y128+j+36,
+          twa128+k,
+          twb128+k,
+          twc128+k);
+
+  }
+
+  if (scale_flag == 1) {
+    norm128 = set1_int16(dft_norm_table[3]);
+
+    for (i=0; i<48; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa60[88]__attribute__((aligned(32)));
+static int16_t twb60[88]__attribute__((aligned(32)));
+static int16_t twc60[88]__attribute__((aligned(32)));
+static int16_t twd60[88]__attribute__((aligned(32)));
+
+void dft60(int16_t *x,int16_t *y,unsigned char scale)
+{
+
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa60[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb60[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc60[0];
+  simd_q15_t *twd128=(simd_q15_t *)&twd60[0];
+  simd_q15_t ytmp128[60];//=&ytmp128array[0];
+  int i,j,k;
+
+  dft12f(x128,
+         x128+5,
+         x128+10,
+         x128+15,
+         x128+20,
+         x128+25,
+         x128+30,
+         x128+35,
+         x128+40,
+         x128+45,
+         x128+50,
+         x128+55,
+         ytmp128,
+         ytmp128+5,
+         ytmp128+10,
+         ytmp128+15,
+         ytmp128+20,
+         ytmp128+25,
+         ytmp128+30,
+         ytmp128+35,
+         ytmp128+40,
+         ytmp128+45,
+         ytmp128+50,
+         ytmp128+55);
+
+  dft12f(x128+1,
+         x128+6,
+         x128+11,
+         x128+16,
+         x128+21,
+         x128+26,
+         x128+31,
+         x128+36,
+         x128+41,
+         x128+46,
+         x128+51,
+         x128+56,
+         ytmp128+1,
+         ytmp128+6,
+         ytmp128+11,
+         ytmp128+16,
+         ytmp128+21,
+         ytmp128+26,
+         ytmp128+31,
+         ytmp128+36,
+         ytmp128+41,
+         ytmp128+46,
+         ytmp128+51,
+         ytmp128+56);
+
+  dft12f(x128+2,
+         x128+7,
+         x128+12,
+         x128+17,
+         x128+22,
+         x128+27,
+         x128+32,
+         x128+37,
+         x128+42,
+         x128+47,
+         x128+52,
+         x128+57,
+         ytmp128+2,
+         ytmp128+7,
+         ytmp128+12,
+         ytmp128+17,
+         ytmp128+22,
+         ytmp128+27,
+         ytmp128+32,
+         ytmp128+37,
+         ytmp128+42,
+         ytmp128+47,
+         ytmp128+52,
+         ytmp128+57);
+
+  dft12f(x128+3,
+         x128+8,
+         x128+13,
+         x128+18,
+         x128+23,
+         x128+28,
+         x128+33,
+         x128+38,
+         x128+43,
+         x128+48,
+         x128+53,
+         x128+58,
+         ytmp128+3,
+         ytmp128+8,
+         ytmp128+13,
+         ytmp128+18,
+         ytmp128+23,
+         ytmp128+28,
+         ytmp128+33,
+         ytmp128+38,
+         ytmp128+43,
+         ytmp128+48,
+         ytmp128+53,
+         ytmp128+58);
+
+  dft12f(x128+4,
+         x128+9,
+         x128+14,
+         x128+19,
+         x128+24,
+         x128+29,
+         x128+34,
+         x128+39,
+         x128+44,
+         x128+49,
+         x128+54,
+         x128+59,
+         ytmp128+4,
+         ytmp128+9,
+         ytmp128+14,
+         ytmp128+19,
+         ytmp128+24,
+         ytmp128+29,
+         ytmp128+34,
+         ytmp128+39,
+         ytmp128+44,
+         ytmp128+49,
+         ytmp128+54,
+         ytmp128+59);
+
+  bfly5_tw1(ytmp128,
+            ytmp128+1,
+            ytmp128+2,
+            ytmp128+3,
+            ytmp128+4,
+            y128,
+            y128+12,
+            y128+24,
+            y128+36,
+            y128+48);
+
+  for (i=5,j=1,k=0; i<60; i+=5,j++,k++) {
+
+    bfly5(ytmp128+i,
+          ytmp128+i+1,
+          ytmp128+i+2,
+          ytmp128+i+3,
+          ytmp128+i+4,
+          y128+j,
+          y128+j+12,
+          y128+j+24,
+          y128+j+36,
+          y128+j+48,
+          twa128+k,
+          twb128+k,
+          twc128+k,
+          twd128+k);
+  }
+
+  if (scale == 1) {
+    norm128 = set1_int16(dft_norm_table[4]);
+
+    for (i=0; i<60; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+//      printf("y[%d] = (%d,%d)\n",i,((int16_t*)&y128[i])[0],((int16_t*)&y128[i])[1]);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t tw72[280]__attribute__((aligned(32)));
+
+void dft72(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *tw128=(simd_q15_t *)&tw72[0];
+  simd_q15_t x2128[72];// = (simd_q15_t *)&x2128array[0];
+
+  simd_q15_t ytmp128[72];//=&ytmp128array2[0];
+
+  for (i=0,j=0; i<36; i++,j+=2) {
+    x2128[i]    = x128[j];    // even inputs
+    x2128[i+36] = x128[j+1];  // odd inputs
+  }
+
+  dft36((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft36((int16_t *)(x2128+36),(int16_t *)(ytmp128+36),1);
+
+  bfly2_tw1(ytmp128,ytmp128+36,y128,y128+36);
+
+  for (i=1,j=0; i<36; i++,j++) {
+    bfly2(ytmp128+i,
+          ytmp128+36+i,
+          y128+i,
+          y128+36+i,
+          tw128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[5]);
+
+    for (i=0; i<72; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t tw96[376]__attribute__((aligned(32)));
+
+void dft96(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *tw128=(simd_q15_t *)&tw96[0];
+  simd_q15_t x2128[96];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[96];//=&ytmp128array2[0];
+
+
+  for (i=0,j=0; i<48; i++,j+=2) {
+    x2128[i]    = x128[j];
+    x2128[i+48] = x128[j+1];
+  }
+
+  dft48((int16_t *)x2128,(int16_t *)ytmp128,0);
+  dft48((int16_t *)(x2128+48),(int16_t *)(ytmp128+48),0);
+
+
+  bfly2_tw1(ytmp128,ytmp128+48,y128,y128+48);
+
+  for (i=1,j=0; i<48; i++,j++) {
+    bfly2(ytmp128+i,
+          ytmp128+48+i,
+          y128+i,
+          y128+48+i,
+          tw128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[6]);
+
+    for (i=0; i<96; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa108[280]__attribute__((aligned(32)));
+static int16_t twb108[280]__attribute__((aligned(32)));
+
+void dft108(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa108[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb108[0];
+  simd_q15_t x2128[108];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[108];//=&ytmp128array2[0];
+
+
+  for (i=0,j=0; i<36; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+36] = x128[j+1];
+    x2128[i+72] = x128[j+2];
+  }
+
+  dft36((int16_t *)x2128,(int16_t *)ytmp128,0);
+  dft36((int16_t *)(x2128+36),(int16_t *)(ytmp128+36),0);
+  dft36((int16_t *)(x2128+72),(int16_t *)(ytmp128+72),0);
+
+  bfly3_tw1(ytmp128,ytmp128+36,ytmp128+72,y128,y128+36,y128+72);
+
+  for (i=1,j=0; i<36; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+36+i,
+          ytmp128+72+i,
+          y128+i,
+          y128+36+i,
+          y128+72+i,
+          twa128+j,
+          twb128+j);
+
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[7]);
+
+    for (i=0; i<108; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t tw120[472]__attribute__((aligned(32)));
+void dft120(int16_t *x,int16_t *y, unsigned char scale_flag)
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *tw128=(simd_q15_t *)&tw120[0];
+  simd_q15_t x2128[120];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[120];//=&ytmp128array2[0];
+
+  for (i=0,j=0; i<60; i++,j+=2) {
+    x2128[i]    = x128[j];
+    x2128[i+60] = x128[j+1];
+  }
+
+  dft60((int16_t *)x2128,(int16_t *)ytmp128,0);
+  dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),0);
+
+
+  bfly2_tw1(ytmp128,ytmp128+60,y128,y128+60);
+
+  for (i=1,j=0; i<60; i++,j++) {
+    bfly2(ytmp128+i,
+          ytmp128+60+i,
+          y128+i,
+          y128+60+i,
+          tw128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[8]);
+
+    for (i=0; i<120; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa144[376]__attribute__((aligned(32)));
+static int16_t twb144[376]__attribute__((aligned(32)));
+
+void dft144(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa144[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb144[0];
+  simd_q15_t x2128[144];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[144];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<48; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+48] = x128[j+1];
+    x2128[i+96] = x128[j+2];
+  }
+
+  dft48((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft48((int16_t *)(x2128+48),(int16_t *)(ytmp128+48),1);
+  dft48((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1);
+
+  bfly3_tw1(ytmp128,ytmp128+48,ytmp128+96,y128,y128+48,y128+96);
+
+  for (i=1,j=0; i<48; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+48+i,
+          ytmp128+96+i,
+          y128+i,
+          y128+48+i,
+          y128+96+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[9]);
+
+    for (i=0; i<144; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa180[472]__attribute__((aligned(32)));
+static int16_t twb180[472]__attribute__((aligned(32)));
+
+void dft180(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa180[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb180[0];
+  simd_q15_t x2128[180];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[180];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<60; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+60] = x128[j+1];
+    x2128[i+120] = x128[j+2];
+  }
+
+  dft60((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),1);
+  dft60((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1);
+
+  bfly3_tw1(ytmp128,ytmp128+60,ytmp128+120,y128,y128+60,y128+120);
+
+  for (i=1,j=0; i<60; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+60+i,
+          ytmp128+120+i,
+          y128+i,
+          y128+60+i,
+          y128+120+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[10]);
+
+    for (i=0; i<180; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa192[376]__attribute__((aligned(32)));
+static int16_t twb192[376]__attribute__((aligned(32)));
+static int16_t twc192[376]__attribute__((aligned(32)));
+
+void dft192(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa192[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb192[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc192[0];
+  simd_q15_t x2128[192];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[192];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<48; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+48] = x128[j+1];
+    x2128[i+96] = x128[j+2];
+    x2128[i+144] = x128[j+3];
+  }
+
+  dft48((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft48((int16_t *)(x2128+48),(int16_t *)(ytmp128+48),1);
+  dft48((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1);
+  dft48((int16_t *)(x2128+144),(int16_t *)(ytmp128+144),1);
+
+  bfly4_tw1(ytmp128,ytmp128+48,ytmp128+96,ytmp128+144,y128,y128+48,y128+96,y128+144);
+
+  for (i=1,j=0; i<48; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+48+i,
+          ytmp128+96+i,
+          ytmp128+144+i,
+          y128+i,
+          y128+48+i,
+          y128+96+i,
+          y128+144+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[11]);
+
+    for (i=0; i<192; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa216[568]__attribute__((aligned(32)));
+static int16_t twb216[568]__attribute__((aligned(32)));
+
+void dft216(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa216[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb216[0];
+  simd_q15_t x2128[216];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[216];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<72; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+72] = x128[j+1];
+    x2128[i+144] = x128[j+2];
+  }
+
+  dft72((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft72((int16_t *)(x2128+72),(int16_t *)(ytmp128+72),1);
+  dft72((int16_t *)(x2128+144),(int16_t *)(ytmp128+144),1);
+
+  bfly3_tw1(ytmp128,ytmp128+72,ytmp128+144,y128,y128+72,y128+144);
+
+  for (i=1,j=0; i<72; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+72+i,
+          ytmp128+144+i,
+          y128+i,
+          y128+72+i,
+          y128+144+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[12]);
+
+    for (i=0; i<216; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa240[472]__attribute__((aligned(32)));
+static int16_t twb240[472]__attribute__((aligned(32)));
+static int16_t twc240[472]__attribute__((aligned(32)));
+
+void dft240(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa240[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb240[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc240[0];
+  simd_q15_t x2128[240];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[240];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<60; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+60] = x128[j+1];
+    x2128[i+120] = x128[j+2];
+    x2128[i+180] = x128[j+3];
+  }
+
+  dft60((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),1);
+  dft60((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1);
+  dft60((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1);
+
+  bfly4_tw1(ytmp128,ytmp128+60,ytmp128+120,ytmp128+180,y128,y128+60,y128+120,y128+180);
+
+  for (i=1,j=0; i<60; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+60+i,
+          ytmp128+120+i,
+          ytmp128+180+i,
+          y128+i,
+          y128+60+i,
+          y128+120+i,
+          y128+180+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[13]);
+
+    for (i=0; i<240; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa288[760]__attribute__((aligned(32)));
+static int16_t twb288[760]__attribute__((aligned(32)));
+
+void dft288(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa288[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb288[0];
+  simd_q15_t x2128[288];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[288];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<96; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+96] = x128[j+1];
+    x2128[i+192] = x128[j+2];
+  }
+
+  dft96((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft96((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1);
+  dft96((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1);
+
+  bfly3_tw1(ytmp128,ytmp128+96,ytmp128+192,y128,y128+96,y128+192);
+
+  for (i=1,j=0; i<96; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+96+i,
+          ytmp128+192+i,
+          y128+i,
+          y128+96+i,
+          y128+192+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<288; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa300[472]__attribute__((aligned(32)));
+static int16_t twb300[472]__attribute__((aligned(32)));
+static int16_t twc300[472]__attribute__((aligned(32)));
+static int16_t twd300[472]__attribute__((aligned(32)));
+
+void dft300(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa300[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb300[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc300[0];
+  simd_q15_t *twd128=(simd_q15_t *)&twd300[0];
+  simd_q15_t x2128[300];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[300];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<60; i++,j+=5) {
+    x2128[i]    = x128[j];
+    x2128[i+60] = x128[j+1];
+    x2128[i+120] = x128[j+2];
+    x2128[i+180] = x128[j+3];
+    x2128[i+240] = x128[j+4];
+  }
+
+  dft60((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),1);
+  dft60((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1);
+  dft60((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1);
+  dft60((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1);
+
+  bfly5_tw1(ytmp128,ytmp128+60,ytmp128+120,ytmp128+180,ytmp128+240,y128,y128+60,y128+120,y128+180,y128+240);
+
+  for (i=1,j=0; i<60; i++,j++) {
+    bfly5(ytmp128+i,
+          ytmp128+60+i,
+          ytmp128+120+i,
+          ytmp128+180+i,
+          ytmp128+240+i,
+          y128+i,
+          y128+60+i,
+          y128+120+i,
+          y128+180+i,
+          y128+240+i,
+          twa128+j,
+          twb128+j,
+          twc128+j,
+          twd128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[15]);
+
+    for (i=0; i<300; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+static int16_t twa324[107*2*4];
+static int16_t twb324[107*2*4];
+
+void dft324(int16_t *x,int16_t *y,unsigned char scale_flag)  // 108 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa324[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb324[0];
+  simd_q15_t x2128[324];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[324];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<108; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+108] = x128[j+1];
+    x2128[i+216] = x128[j+2];
+  }
+
+  dft108((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft108((int16_t *)(x2128+108),(int16_t *)(ytmp128+108),1);
+  dft108((int16_t *)(x2128+216),(int16_t *)(ytmp128+216),1);
+
+  bfly3_tw1(ytmp128,ytmp128+108,ytmp128+216,y128,y128+108,y128+216);
+
+  for (i=1,j=0; i<108; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+108+i,
+          ytmp128+216+i,
+          y128+i,
+          y128+108+i,
+          y128+216+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<324; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa360[119*2*4];
+static int16_t twb360[119*2*4];
+
+void dft360(int16_t *x,int16_t *y,unsigned char scale_flag)  // 120 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa360[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb360[0];
+  simd_q15_t x2128[360];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[360];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<120; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+120] = x128[j+1];
+    x2128[i+240] = x128[j+2];
+  }
+
+  dft120((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft120((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1);
+  dft120((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1);
+
+  bfly3_tw1(ytmp128,ytmp128+120,ytmp128+240,y128,y128+120,y128+240);
+
+  for (i=1,j=0; i<120; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+120+i,
+          ytmp128+240+i,
+          y128+i,
+          y128+120+i,
+          y128+240+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<360; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa384[95*2*4];
+static int16_t twb384[95*2*4];
+static int16_t twc384[95*2*4];
+
+void dft384(int16_t *x,int16_t *y,unsigned char scale_flag)  // 96 x 4
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa384[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb384[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc384[0];
+  simd_q15_t x2128[384];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[384];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<96; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+96] = x128[j+1];
+    x2128[i+192] = x128[j+2];
+    x2128[i+288] = x128[j+3];
+  }
+
+  dft96((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft96((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1);
+  dft96((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1);
+  dft96((int16_t *)(x2128+288),(int16_t *)(ytmp128+288),1);
+
+  bfly4_tw1(ytmp128,ytmp128+96,ytmp128+192,ytmp128+288,y128,y128+96,y128+192,y128+288);
+
+  for (i=1,j=0; i<96; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+96+i,
+          ytmp128+192+i,
+          ytmp128+288+i,
+          y128+i,
+          y128+96+i,
+          y128+192+i,
+          y128+288+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+
+    for (i=0; i<384; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa432[107*2*4];
+static int16_t twb432[107*2*4];
+static int16_t twc432[107*2*4];
+
+void dft432(int16_t *x,int16_t *y,unsigned char scale_flag)  // 108 x 4
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa432[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb432[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc432[0];
+  simd_q15_t x2128[432];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[432];//=&ytmp128array2[0];
+
+
+  for (i=0,j=0; i<108; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+108] = x128[j+1];
+    x2128[i+216] = x128[j+2];
+    x2128[i+324] = x128[j+3];
+  }
+
+  dft108((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft108((int16_t *)(x2128+108),(int16_t *)(ytmp128+108),1);
+  dft108((int16_t *)(x2128+216),(int16_t *)(ytmp128+216),1);
+  dft108((int16_t *)(x2128+324),(int16_t *)(ytmp128+324),1);
+
+  bfly4_tw1(ytmp128,ytmp128+108,ytmp128+216,ytmp128+324,y128,y128+108,y128+216,y128+324);
+
+  for (i=1,j=0; i<108; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+108+i,
+          ytmp128+216+i,
+          ytmp128+324+i,
+          y128+i,
+          y128+108+i,
+          y128+216+i,
+          y128+324+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+
+    for (i=0; i<432; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+static int16_t twa480[119*2*4];
+static int16_t twb480[119*2*4];
+static int16_t twc480[119*2*4];
+
+void dft480(int16_t *x,int16_t *y,unsigned char scale_flag)  // 120 x 4
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa480[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb480[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc480[0];
+  simd_q15_t x2128[480];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[480];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<120; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+120] = x128[j+1];
+    x2128[i+240] = x128[j+2];
+    x2128[i+360] = x128[j+3];
+  }
+
+  dft120((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft120((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1);
+  dft120((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1);
+  dft120((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1);
+
+  bfly4_tw1(ytmp128,ytmp128+120,ytmp128+240,ytmp128+360,y128,y128+120,y128+240,y128+360);
+
+  for (i=1,j=0; i<120; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+120+i,
+          ytmp128+240+i,
+          ytmp128+360+i,
+          y128+i,
+          y128+120+i,
+          y128+240+i,
+          y128+360+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+
+    for (i=0; i<480; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+
+static int16_t twa540[179*2*4];
+static int16_t twb540[179*2*4];
+
+void dft540(int16_t *x,int16_t *y,unsigned char scale_flag)  // 180 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa540[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb540[0];
+  simd_q15_t x2128[540];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[540];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<180; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+180] = x128[j+1];
+    x2128[i+360] = x128[j+2];
+  }
+
+  dft180((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft180((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1);
+  dft180((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1);
+
+  bfly3_tw1(ytmp128,ytmp128+180,ytmp128+360,y128,y128+180,y128+360);
+
+  for (i=1,j=0; i<180; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+180+i,
+          ytmp128+360+i,
+          y128+i,
+          y128+180+i,
+          y128+360+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<540; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa576[191*2*4];
+static int16_t twb576[191*2*4];
+
+void dft576(int16_t *x,int16_t *y,unsigned char scale_flag)  // 192 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa576[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb576[0];
+  simd_q15_t x2128[576];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[576];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<192; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+192] = x128[j+1];
+    x2128[i+384] = x128[j+2];
+  }
+
+
+  dft192((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft192((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1);
+  dft192((int16_t *)(x2128+384),(int16_t *)(ytmp128+384),1);
+
+  bfly3_tw1(ytmp128,ytmp128+192,ytmp128+384,y128,y128+192,y128+384);
+
+  for (i=1,j=0; i<192; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+192+i,
+          ytmp128+384+i,
+          y128+i,
+          y128+192+i,
+          y128+384+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<576; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+};
+
+
+static int16_t twa600[299*2*4];
+
+void dft600(int16_t *x,int16_t *y,unsigned char scale_flag)  // 300 x 2
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *tw128=(simd_q15_t *)&twa600[0];
+  simd_q15_t x2128[600];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[600];//=&ytmp128array2[0];
+
+
+  for (i=0,j=0; i<300; i++,j+=2) {
+    x2128[i]    = x128[j];
+    x2128[i+300] = x128[j+1];
+  }
+
+  dft300((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft300((int16_t *)(x2128+300),(int16_t *)(ytmp128+300),1);
+
+
+  bfly2_tw1(ytmp128,ytmp128+300,y128,y128+300);
+
+  for (i=1,j=0; i<300; i++,j++) {
+    bfly2(ytmp128+i,
+          ytmp128+300+i,
+          y128+i,
+          y128+300+i,
+          tw128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(ONE_OVER_SQRT2_Q15);
+
+    for (i=0; i<600; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+};
+
+
+static int16_t twa648[215*2*4];
+static int16_t twb648[215*2*4];
+
+void dft648(int16_t *x,int16_t *y,unsigned char scale_flag)  // 216 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa648[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb648[0];
+  simd_q15_t x2128[648];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[648];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<216; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+216] = x128[j+1];
+    x2128[i+432] = x128[j+2];
+  }
+
+  dft216((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft216((int16_t *)(x2128+216),(int16_t *)(ytmp128+216),1);
+  dft216((int16_t *)(x2128+432),(int16_t *)(ytmp128+432),1);
+
+  bfly3_tw1(ytmp128,ytmp128+216,ytmp128+432,y128,y128+216,y128+432);
+
+  for (i=1,j=0; i<216; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+216+i,
+          ytmp128+432+i,
+          y128+i,
+          y128+216+i,
+          y128+432+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<648; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+
+static int16_t twa720[179*2*4];
+static int16_t twb720[179*2*4];
+static int16_t twc720[179*2*4];
+
+
+void dft720(int16_t *x,int16_t *y,unsigned char scale_flag)  // 180 x 4
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa720[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb720[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc720[0];
+  simd_q15_t x2128[720];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[720];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<180; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+180] = x128[j+1];
+    x2128[i+360] = x128[j+2];
+    x2128[i+540] = x128[j+3];
+  }
+
+  dft180((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft180((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1);
+  dft180((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1);
+  dft180((int16_t *)(x2128+540),(int16_t *)(ytmp128+540),1);
+
+  bfly4_tw1(ytmp128,ytmp128+180,ytmp128+360,ytmp128+540,y128,y128+180,y128+360,y128+540);
+
+  for (i=1,j=0; i<180; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+180+i,
+          ytmp128+360+i,
+          ytmp128+540+i,
+          y128+i,
+          y128+180+i,
+          y128+360+i,
+          y128+540+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+
+    for (i=0; i<720; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa768[191*2*4];
+static int16_t twb768[191*2*4];
+static int16_t twc768[191*2*4];
+
+void dft768(int16_t *x,int16_t *y,unsigned char scale_flag) { // 192x 4;
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa768[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb768[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc768[0];
+  simd_q15_t x2128[768];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[768];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<192; i++,j+=4) {
+    x2128[i]     = x128[j];
+    x2128[i+192] = x128[j+1];
+    x2128[i+384] = x128[j+2];
+    x2128[i+576] = x128[j+3];
+  }
+
+  dft192((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft192((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1);
+  dft192((int16_t *)(x2128+384),(int16_t *)(ytmp128+384),1);
+  dft192((int16_t *)(x2128+576),(int16_t *)(ytmp128+576),1);
+
+  bfly4_tw1(ytmp128,ytmp128+192,ytmp128+384,ytmp128+576,y128,y128+192,y128+384,y128+576);
+
+  for (i=1,j=0; i<192; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+192+i,
+          ytmp128+384+i,
+          ytmp128+576+i,
+          y128+i,
+          y128+192+i,
+          y128+384+i,
+          y128+576+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+
+    for (i=0; i<768; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+
+}
+
+
+static int16_t twa864[287*2*4];
+static int16_t twb864[287*2*4];
+
+void dft864(int16_t *x,int16_t *y,unsigned char scale_flag)  // 288 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa864[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb864[0];
+  simd_q15_t x2128[864];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[864];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<288; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+288] = x128[j+1];
+    x2128[i+576] = x128[j+2];
+  }
+
+  dft288((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft288((int16_t *)(x2128+288),(int16_t *)(ytmp128+288),1);
+  dft288((int16_t *)(x2128+576),(int16_t *)(ytmp128+576),1);
+
+  bfly3_tw1(ytmp128,ytmp128+288,ytmp128+576,y128,y128+288,y128+576);
+
+  for (i=1,j=0; i<288; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+288+i,
+          ytmp128+576+i,
+          y128+i,
+          y128+288+i,
+          y128+576+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<864; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa900[299*2*4];
+static int16_t twb900[299*2*4];
+
+void dft900(int16_t *x,int16_t *y,unsigned char scale_flag)  // 300 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa900[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb900[0];
+  simd_q15_t x2128[900];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[900];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<300; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+300] = x128[j+1];
+    x2128[i+600] = x128[j+2];
+  }
+
+  dft300((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft300((int16_t *)(x2128+300),(int16_t *)(ytmp128+300),1);
+  dft300((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1);
+
+  bfly3_tw1(ytmp128,ytmp128+300,ytmp128+600,y128,y128+300,y128+600);
+
+  for (i=1,j=0; i<300; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+300+i,
+          ytmp128+600+i,
+          y128+i,
+          y128+300+i,
+          y128+600+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<900; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+
+static int16_t twa960[239*2*4];
+static int16_t twb960[239*2*4];
+static int16_t twc960[239*2*4];
+
+
+void dft960(int16_t *x,int16_t *y,unsigned char scale_flag)  // 240 x 4
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa960[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb960[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc960[0];
+  simd_q15_t x2128[960];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[960];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<240; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+240] = x128[j+1];
+    x2128[i+480] = x128[j+2];
+    x2128[i+720] = x128[j+3];
+  }
+
+  dft240((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft240((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1);
+  dft240((int16_t *)(x2128+480),(int16_t *)(ytmp128+480),1);
+  dft240((int16_t *)(x2128+720),(int16_t *)(ytmp128+720),1);
+
+  bfly4_tw1(ytmp128,ytmp128+240,ytmp128+480,ytmp128+720,y128,y128+240,y128+480,y128+720);
+
+  for (i=1,j=0; i<240; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+240+i,
+          ytmp128+480+i,
+          ytmp128+720+i,
+          y128+i,
+          y128+240+i,
+          y128+480+i,
+          y128+720+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+
+    for (i=0; i<960; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+
+static int16_t twa972[323*2*4];
+static int16_t twb972[323*2*4];
+
+void dft972(int16_t *x,int16_t *y,unsigned char scale_flag)  // 324 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa972[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb972[0];
+  simd_q15_t x2128[972];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[972];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<324; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+324] = x128[j+1];
+    x2128[i+648] = x128[j+2];
+  }
+
+  dft324((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft324((int16_t *)(x2128+324),(int16_t *)(ytmp128+324),1);
+  dft324((int16_t *)(x2128+648),(int16_t *)(ytmp128+648),1);
+
+  bfly3_tw1(ytmp128,ytmp128+324,ytmp128+648,y128,y128+324,y128+648);
+
+  for (i=1,j=0; i<324; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+324+i,
+          ytmp128+648+i,
+          y128+i,
+          y128+324+i,
+          y128+648+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<972; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa1080[359*2*4];
+static int16_t twb1080[359*2*4];
+
+void dft1080(int16_t *x,int16_t *y,unsigned char scale_flag)  // 360 x 3
+{
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa1080[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb1080[0];
+  simd_q15_t x2128[1080];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[1080];//=&ytmp128array3[0];
+
+
+
+  for (i=0,j=0; i<360; i++,j+=3) {
+    x2128[i]    = x128[j];
+    x2128[i+360] = x128[j+1];
+    x2128[i+720] = x128[j+2];
+  }
+
+  dft360((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft360((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1);
+  dft360((int16_t *)(x2128+720),(int16_t *)(ytmp128+720),1);
+
+  bfly3_tw1(ytmp128,ytmp128+360,ytmp128+720,y128,y128+360,y128+720);
+
+  for (i=1,j=0; i<360; i++,j++) {
+    bfly3(ytmp128+i,
+          ytmp128+360+i,
+          ytmp128+720+i,
+          y128+i,
+          y128+360+i,
+          y128+720+i,
+          twa128+j,
+          twb128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(dft_norm_table[14]);
+
+    for (i=0; i<1080; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+};
+
+static int16_t twa1152[287*2*4];
+static int16_t twb1152[287*2*4];
+static int16_t twc1152[287*2*4];
+
+void dft1152(int16_t *x,int16_t *y,unsigned char scale_flag)  // 288 x 4
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa1152[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb1152[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc1152[0];
+  simd_q15_t x2128[1152];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[1152];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<288; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+288] = x128[j+1];
+    x2128[i+576] = x128[j+2];
+    x2128[i+864] = x128[j+3];
+  }
+
+  dft288((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft288((int16_t *)(x2128+288),(int16_t *)(ytmp128+288),1);
+  dft288((int16_t *)(x2128+576),(int16_t *)(ytmp128+576),1);
+  dft288((int16_t *)(x2128+864),(int16_t *)(ytmp128+864),1);
+
+  bfly4_tw1(ytmp128,ytmp128+288,ytmp128+576,ytmp128+864,y128,y128+288,y128+576,y128+864);
+
+  for (i=1,j=0; i<288; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+288+i,
+          ytmp128+576+i,
+          ytmp128+864+i,
+          y128+i,
+          y128+288+i,
+          y128+576+i,
+          y128+864+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+
+    for (i=0; i<1152; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+};
+
+int16_t twa1200[4784];
+int16_t twb1200[4784];
+int16_t twc1200[4784];
+
+void dft1200(int16_t *x,int16_t *y,unsigned char scale_flag)
+{
+
+  int i,j;
+  simd_q15_t *x128=(simd_q15_t *)x;
+  simd_q15_t *y128=(simd_q15_t *)y;
+  simd_q15_t *twa128=(simd_q15_t *)&twa1200[0];
+  simd_q15_t *twb128=(simd_q15_t *)&twb1200[0];
+  simd_q15_t *twc128=(simd_q15_t *)&twc1200[0];
+  simd_q15_t x2128[1200];// = (simd_q15_t *)&x2128array[0];
+  simd_q15_t ytmp128[1200];//=&ytmp128array2[0];
+
+
+
+  for (i=0,j=0; i<300; i++,j+=4) {
+    x2128[i]    = x128[j];
+    x2128[i+300] = x128[j+1];
+    x2128[i+600] = x128[j+2];
+    x2128[i+900] = x128[j+3];
+  }
+
+  dft300((int16_t *)x2128,(int16_t *)ytmp128,1);
+  dft300((int16_t *)(x2128+300),(int16_t *)(ytmp128+300),1);
+  dft300((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1);
+  dft300((int16_t *)(x2128+900),(int16_t *)(ytmp128+900),1);
+
+  bfly4_tw1(ytmp128,ytmp128+300,ytmp128+600,ytmp128+900,y128,y128+300,y128+600,y128+900);
+
+  for (i=1,j=0; i<300; i++,j++) {
+    bfly4(ytmp128+i,
+          ytmp128+300+i,
+          ytmp128+600+i,
+          ytmp128+900+i,
+          y128+i,
+          y128+300+i,
+          y128+600+i,
+          y128+900+i,
+          twa128+j,
+          twb128+j,
+          twc128+j);
+  }
+
+  if (scale_flag==1) {
+    norm128 = set1_int16(16384);//dft_norm_table[13]);
+    for (i=0; i<1200; i++) {
+      y128[i] = mulhi_int16(y128[i],norm128);
+    }
+  }
+
+  _mm_empty();
+  _m_empty();
+
+}
+
+void init_rad4(int N,int16_t *tw) {
+
+  int16_t *twa = tw;
+  int16_t *twb = twa+(N/2);
+  int16_t *twc = twb+(N/2);
+  int i;
+
+  for (i=0;i<(N/4);i++) {
+    *twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++;
+    *twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++;
+    *twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++;
+    *twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++;
+    *twc = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); twc++;
+    *twc = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N)); twc++;
+  }
+}
+void init_rad4_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc) {
+
+  int i,j;
+
+  for (i=1;i<(N/4);i++) {
+    twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); 
+    twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
+    twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N));
+    twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N));
+    twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N));
+    twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N));
+    for (j=1;j<4;j++) {
+      ((int32_t*)twa)[j]=((int32_t*)twa)[0];
+      ((int32_t*)twb)[j]=((int32_t*)twb)[0];
+      ((int32_t*)twc)[j]=((int32_t*)twc)[0];
+    }
+    twa+=8;
+    twb+=8;
+    twc+=8;
+  }
+}
+
+void init_rad2(int N,int16_t *tw) {
+
+  int16_t *twa = tw;
+  int i;
+
+  for (i=0;i<(N>>1);i++) {
+    *twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++;
+    *twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++;
+  }
+}
+
+void init_rad2_rep(int N,int16_t *twa) {
+
+  int i,j;
+
+  for (i=1;i<(N/2);i++) {
+    twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); 
+    twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
+    for (j=1;j<4;j++) {
+      ((int32_t*)twa)[j]=((int32_t*)twa)[0];
+    }
+    twa+=8;
+  }
+}
+
+void init_rad3(int N,int16_t *twa,int16_t *twb) {
+
+  int i;
+
+  for (i=0;i<(N/3);i++) {
+    *twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++;
+    *twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++;
+    *twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++;
+    *twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++;
+  }
+}
+
+void init_rad3_rep(int N,int16_t *twa,int16_t *twb) {
+
+  int i,j;
+
+  for (i=1;i<(N/3);i++) {
+    twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); 
+    twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
+    twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N));
+    twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N));
+    for (j=1;j<4;j++) {
+      ((int32_t*)twa)[j]=((int32_t*)twa)[0];
+      ((int32_t*)twb)[j]=((int32_t*)twb)[0];
+    }
+    twa+=8;
+    twb+=8;
+  }
+}
+
+void init_rad5_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc,int16_t *twd) {
+
+  int i,j;
+
+  for (i=1;i<(N/5);i++) {
+    twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); 
+    twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N));
+    twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N));
+    twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N));
+    twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N));
+    twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N));
+    twd[0] = (int16_t)round(32767.0*cos(2*M_PI*4*i/N));
+    twd[1] = -(int16_t)round(32767.0*sin(2*M_PI*4*i/N));
+    for (j=1;j<4;j++) {
+      ((int32_t*)twa)[j]=((int32_t*)twa)[0];
+      ((int32_t*)twb)[j]=((int32_t*)twb)[0];
+      ((int32_t*)twc)[j]=((int32_t*)twc)[0];
+      ((int32_t*)twd)[j]=((int32_t*)twd)[0];
+    }
+    twa+=8;
+    twb+=8;
+    twc+=8;
+    twd+=8;                  
+  }
+}
+/*----------------------------------------------------------------*/
+/* dft library entry points:                                      */
+
+int dfts_autoinit(void)
+{
+  init_rad4(1024,tw1024);
+  init_rad2(2048,tw2048);
+  init_rad4(4096,tw4096);
+  init_rad2(8192,tw8192);
+  
+  init_rad3(1536,twa1536,twb1536);
+  init_rad3(3072,twa3072,twb3072);
+  init_rad3(6144,twa6144,twb6144);
+  init_rad3(12288,twa12288,twb12288);
+  init_rad3(18432,twa18432,twb18432);
+  init_rad3(24576,twa24576,twb24576);
+
+  init_rad2_rep(24,tw24);
+  init_rad3_rep(36,twa36,twb36);
+  init_rad4_rep(48,twa48,twb48,twc48);
+  init_rad5_rep(60,twa60,twb60,twc60,twd60);
+  init_rad2_rep(72,tw72);
+  init_rad2_rep(96,tw96);
+  init_rad3_rep(108,twa108,twb108);
+  init_rad2_rep(120,tw120);
+  init_rad3_rep(144,twa144,twb144);
+  init_rad3_rep(180,twa180,twb180);
+  init_rad4_rep(192,twa192,twb192,twc192);
+  init_rad3_rep(216,twa216,twb216);
+  init_rad4_rep(240,twa240,twb240,twc240);
+  init_rad3_rep(288,twa288,twb288);
+  init_rad5_rep(300,twa300,twb300,twc300,twd300);
+  init_rad3_rep(324,twa324,twb324);
+  init_rad3_rep(360,twa360,twb360);
+  init_rad4_rep(384,twa384,twb384,twc384);
+  init_rad4_rep(432,twa432,twb432,twc432);
+  init_rad4_rep(480,twa480,twb480,twc480);
+  init_rad3_rep(540,twa540,twb540);
+  init_rad3_rep(576,twa576,twb576);
+  init_rad2_rep(600,twa600);
+  init_rad3_rep(648,twa648,twb648);
+  init_rad4_rep(720,twa720,twb720,twc720);
+  init_rad4_rep(768,twa768,twb768,twc768);
+  init_rad3_rep(864,twa864,twb864);
+  init_rad3_rep(900,twa900,twb900);
+  init_rad4_rep(960,twa960,twb960,twc960);
+  init_rad3_rep(972,twa972,twb972);
+  init_rad3_rep(1080,twa1080,twb1080);
+  init_rad4_rep(1152,twa1152,twb1152,twc1152);
+  init_rad4_rep(1200,twa1200,twb1200,twc1200);
+  return 0;
+}
+
+
+
+
+void dft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){
+	AssertFatal((sizeidx>=0 && sizeidx<(int)DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx);
+	dft_ftab[sizeidx](sigF,sig,scale_flag);
+};
+
+void idft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){
+	AssertFatal((sizeidx>=0 && sizeidx<(int)IDFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx);
+	idft_ftab[sizeidx](sigF,sig,scale_flag);
+};
+
+/*---------------------------------------------------------------------------------------*/
+
+#ifdef MR_MAIN
+#include <string.h>
+#include <stdio.h>
+
+#define LOG_M write_output
+int write_output(const char *fname,const char *vname,void *data,int length,int dec,char format)
+{
+
+  FILE *fp=NULL;
+  int i;
+
+
+  printf("Writing %d elements of type %d to %s\n",length,format,fname);
+
+
+  if (format == 10 || format ==11 || format == 12 || format == 13 || format == 14) {
+    fp = fopen(fname,"a+");
+  } else if (format != 10 && format !=11  && format != 12 && format != 13 && format != 14) {
+    fp = fopen(fname,"w+");
+  }
+
+
+
+  if (fp== NULL) {
+    printf("[OPENAIR][FILE OUTPUT] Cannot open file %s\n",fname);
+    return(-1);
+  }
+
+  if (format != 10 && format !=11  && format != 12 && format != 13 && format != 14)
+    fprintf(fp,"%s = [",vname);
+
+
+  switch (format) {
+  case 0:   // real 16-bit
+
+    for (i=0; i<length; i+=dec) {
+      fprintf(fp,"%d\n",((short *)data)[i]);
+    }
+
+    break;
+
+  case 1:  // complex 16-bit
+  case 13:
+  case 14:
+  case 15:
+
+    for (i=0; i<length<<1; i+=(2*dec)) {
+      fprintf(fp,"%d + j*(%d)\n",((short *)data)[i],((short *)data)[i+1]);
+
+    }
+
+
+    break;
+
+  case 2:  // real 32-bit
+    for (i=0; i<length; i+=dec) {
+      fprintf(fp,"%d\n",((int *)data)[i]);
+    }
+
+    break;
+
+  case 3: // complex 32-bit
+    for (i=0; i<length<<1; i+=(2*dec)) {
+      fprintf(fp,"%d + j*(%d)\n",((int *)data)[i],((int *)data)[i+1]);
+    }
+
+    break;
+
+  case 4: // real 8-bit
+    for (i=0; i<length; i+=dec) {
+      fprintf(fp,"%d\n",((char *)data)[i]);
+    }
+
+    break;
+
+  case 5: // complex 8-bit
+    for (i=0; i<length<<1; i+=(2*dec)) {
+      fprintf(fp,"%d + j*(%d)\n",((char *)data)[i],((char *)data)[i+1]);
+    }
+
+    break;
+
+  case 6:  // real 64-bit
+    for (i=0; i<length; i+=dec) {
+      fprintf(fp,"%lld\n",((long long*)data)[i]);
+    }
+
+    break;
+
+  case 7: // real double
+    for (i=0; i<length; i+=dec) {
+      fprintf(fp,"%g\n",((double *)data)[i]);
+    }
+
+    break;
+
+  case 8: // complex double
+    for (i=0; i<length<<1; i+=2*dec) {
+      fprintf(fp,"%g + j*(%g)\n",((double *)data)[i], ((double *)data)[i+1]);
+    }
+
+    break;
+
+  case 9: // real unsigned 8-bit
+    for (i=0; i<length; i+=dec) {
+      fprintf(fp,"%d\n",((unsigned char *)data)[i]);
+    }
+
+    break;
+
+
+  case 10 : // case eren 16 bit complex :
+
+    for (i=0; i<length<<1; i+=(2*dec)) {
+
+      if((i < 2*(length-1)) && (i > 0))
+        fprintf(fp,"%d + j*(%d),",((short *)data)[i],((short *)data)[i+1]);
+      else if (i == 2*(length-1))
+        fprintf(fp,"%d + j*(%d);",((short *)data)[i],((short *)data)[i+1]);
+      else if (i == 0)
+        fprintf(fp,"\n%d + j*(%d),",((short *)data)[i],((short *)data)[i+1]);
+
+
+
+    }
+
+    break;
+
+  case 11 : //case eren 16 bit real for channel magnitudes:
+    for (i=0; i<length; i+=dec) {
+
+      if((i <(length-1))&& (i > 0))
+        fprintf(fp,"%d,",((short *)data)[i]);
+      else if (i == (length-1))
+        fprintf(fp,"%d;",((short *)data)[i]);
+      else if (i == 0)
+        fprintf(fp,"\n%d,",((short *)data)[i]);
+    }
+
+    printf("\n erennnnnnnnnnnnnnn: length :%d",length);
+    break;
+
+  case 12 : // case eren for log2_maxh real unsigned 8 bit
+    fprintf(fp,"%d \n",((unsigned char *)&data)[0]);
+    break;
+
+  }
+
+  if (format != 10 && format !=11 && format !=12 && format != 13 && format != 15) {
+    fprintf(fp,"];\n");
+    fclose(fp);
+    return(0);
+  } else if (format == 10 || format ==11 || format == 12 || format == 13 || format == 15) {
+    fclose(fp);
+    return(0);
+  }
+
+  return 0;
+}
+
+
+int main(int argc, char**argv)
+{
+
+
+  time_stats_t ts;
+#ifdef __AVX2__
+  simd256_q15_t x[4096],x2[4096],y[4096],tw0,tw1,tw2,tw3;
+#else
+  simd_q15_t x[8192],y[8192],tw0,tw1,tw2,tw3;
+#endif
+  int i;
+  simd_q15_t *x128=(simd_q15_t*)x,*y128=(simd_q15_t*)y;
+
+  dfts_autoinit();
+
+  set_taus_seed(0);
+  opp_enabled = 1;
+ /* 
+    ((int16_t *)&tw0)[0] = 32767;
+    ((int16_t *)&tw0)[1] = 0;
+    ((int16_t *)&tw0)[2] = 32767;
+    ((int16_t *)&tw0)[3] = 0;
+    ((int16_t *)&tw0)[4] = 32767;
+    ((int16_t *)&tw0)[5] = 0;
+    ((int16_t *)&tw0)[6] = 32767;
+    ((int16_t *)&tw0)[7] = 0;
+
+    ((int16_t *)&tw1)[0] = 32767;
+    ((int16_t *)&tw1)[1] = 0;
+    ((int16_t *)&tw1)[2] = 32767;
+    ((int16_t *)&tw1)[3] = 0;
+    ((int16_t *)&tw1)[4] = 32767;
+    ((int16_t *)&tw1)[5] = 0;
+    ((int16_t *)&tw1)[6] = 32767;
+    ((int16_t *)&tw1)[7] = 0;
+
+    ((int16_t *)&tw2)[0] = 32767;
+    ((int16_t *)&tw2)[1] = 0;
+    ((int16_t *)&tw2)[2] = 32767;
+    ((int16_t *)&tw2)[3] = 0;
+    ((int16_t *)&tw2)[4] = 32767;
+    ((int16_t *)&tw2)[5] = 0;
+    ((int16_t *)&tw2)[6] = 32767;
+    ((int16_t *)&tw2)[7] = 0;
+
+    ((int16_t *)&tw3)[0] = 32767;
+    ((int16_t *)&tw3)[1] = 0;
+    ((int16_t *)&tw3)[2] = 32767;
+    ((int16_t *)&tw3)[3] = 0;
+    ((int16_t *)&tw3)[4] = 32767;
+    ((int16_t *)&tw3)[5] = 0;
+    ((int16_t *)&tw3)[6] = 32767;
+    ((int16_t *)&tw3)[7] = 0;
+ */
+    for (i=0;i<300;i++) {
+#if defined(__x86_64__) || defined(__i386__)
+#ifndef __AVX2__
+      x[i] = _mm_set1_epi32(taus());
+      x[i] = _mm_srai_epi16(x[i],4);
+#else
+      x[i] = _mm256_set1_epi32(taus());
+      x[i] = _mm256_srai_epi16(x[i],4);
+#endif
+#elif defined(__arm__)
+      x[i] = (int16x8_t)vdupq_n_s32(taus());
+      x[i] = vshrq_n_s16(x[i],4);
+#endif
+    }
+      /*
+    bfly2_tw1(x,x+1,y,y+1);
+    printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1]);
+    printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3]);
+    printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5]);
+    printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7]);
+    bfly2(x,x+1,y,y+1, &tw0);
+    printf("0(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1]);
+    printf("1(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3]);
+    printf("2(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5]);
+    printf("3(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7]);
+    bfly2(x,x+1,y,y+1, &tw0);
+
+    bfly3_tw1(x,x+1,x+2,y, y+1,y+2);
+    printf("0(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1],((int16_t*)&y[2])[0],((int16_t*)&y[2])[1]);
+    printf("1(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3],((int16_t*)&y[2])[2],((int16_t*)&y[2])[3]);
+    printf("2(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5],((int16_t*)&y[2])[4],((int16_t*)&y[2])[5]);
+    printf("3(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7],((int16_t*)&y[2])[6],((int16_t*)&y[2])[7]);
+    bfly3(x,x+1,x+2,y, y+1,y+2,&tw0,&tw1);
+
+    printf("0(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1],((int16_t*)&y[2])[0],((int16_t*)&y[2])[1]);
+    printf("1(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3],((int16_t*)&y[2])[2],((int16_t*)&y[2])[3]);
+    printf("2(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5],((int16_t*)&y[2])[4],((int16_t*)&y[2])[5]);
+    printf("3(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7],((int16_t*)&y[2])[6],((int16_t*)&y[2])[7]);
+
+
+    bfly4_tw1(x,x+1,x+2,x+3,y, y+1,y+2,y+3);
+    printf("(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n",
+     ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],
+     ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1],
+     ((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1],
+     ((int16_t*)&y[2])[0],((int16_t*)&y[2])[1],((int16_t*)&y[3])[0],((int16_t*)&y[3])[1]);
+
+    bfly4(x,x+1,x+2,x+3,y, y+1,y+2,y+3,&tw0,&tw1,&tw2);
+    printf("0(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n",
+     ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],
+     ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1],
+     ((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1],
+     ((int16_t*)&y[2])[0],((int16_t*)&y[2])[1],((int16_t*)&y[3])[0],((int16_t*)&y[3])[1]);
+    printf("1(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n",
+     ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],
+     ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1],
+     ((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3],
+     ((int16_t*)&y[2])[2],((int16_t*)&y[2])[3],((int16_t*)&y[3])[2],((int16_t*)&y[3])[3]);
+    printf("2(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n",
+     ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],
+     ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1],
+     ((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5],
+     ((int16_t*)&y[2])[4],((int16_t*)&y[2])[5],((int16_t*)&y[3])[4],((int16_t*)&y[3])[5]);
+    printf("3(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n",
+     ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],
+     ((int16_t*)&x[2])[6],((int16_t*)&x[2])[7],((int16_t*)&x[3])[6],((int16_t*)&x[3])[7],
+     ((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7],
+     ((int16_t*)&y[2])[0],((int16_t*)&y[2])[1],((int16_t*)&y[3])[0],((int16_t*)&y[3])[1]);
+
+    bfly5_tw1(x,x+1,x+2,x+3,x+4,y,y+1,y+2,y+3,y+4);
+
+    for (i=0;i<5;i++)
+      printf("%d,%d,",
+       ((int16_t*)&x[i])[0],((int16_t*)&x[i])[1]);
+    printf("\n");
+    for (i=0;i<5;i++)
+      printf("%d,%d,",
+       ((int16_t*)&y[i])[0],((int16_t*)&y[i])[1]);
+    printf("\n");
+
+    bfly5(x,x+1,x+2,x+3,x+4,y, y+1,y+2,y+3,y+4,&tw0,&tw1,&tw2,&tw3);
+    for (i=0;i<5;i++)
+      printf("%d,%d,",
+       ((int16_t*)&x[i])[0],((int16_t*)&x[i])[1]);
+    printf("\n");
+    for (i=0;i<5;i++)
+      printf("%d,%d,",
+       ((int16_t*)&y[i])[0],((int16_t*)&y[i])[1]);
+    printf("\n");
+
+
+    printf("\n\n12-point\n");
+    dft12f(x,
+     x+1,
+     x+2,
+     x+3,
+     x+4,
+     x+5,
+     x+6,
+     x+7,
+     x+8,
+     x+9,
+     x+10,
+     x+11,
+     y,
+     y+1,
+     y+2,
+     y+3,
+     y+4,
+     y+5,
+     y+6,
+     y+7,
+     y+8,
+     y+9,
+     y+10,
+     y+11);
+
+
+    printf("X: ");
+    for (i=0;i<12;i++)
+      printf("%d,%d,",((int16_t*)(&x[i]))[0],((int16_t *)(&x[i]))[1]);
+    printf("\nY:");
+    for (i=0;i<12;i++)
+      printf("%d,%d,",((int16_t*)(&y[i]))[0],((int16_t *)(&y[i]))[1]);
+    printf("\n");
+
+ */
+
+    for (i=0;i<32;i++) {
+      ((int16_t*)x)[i] = (int16_t)((taus()&0xffff))>>5;
+    }
+    memset((void*)&y[0],0,16*4);
+    idft16((int16_t *)x,(int16_t *)y);
+    printf("\n\n16-point\n");
+    printf("X: ");
+    for (i=0;i<4;i++)
+      printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]);
+    printf("\nY:");
+
+    for (i=0;i<4;i++)
+      printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]);
+    printf("\n");
+ 
+  memset((void*)&x[0],0,2048*4);
+      
+  for (i=0; i<2048; i+=4) {
+     ((int16_t*)x)[i<<1] = 1024;
+     ((int16_t*)x)[1+(i<<1)] = 0;
+     ((int16_t*)x)[2+(i<<1)] = 0;
+     ((int16_t*)x)[3+(i<<1)] = 1024;
+     ((int16_t*)x)[4+(i<<1)] = -1024;
+     ((int16_t*)x)[5+(i<<1)] = 0;
+     ((int16_t*)x)[6+(i<<1)] = 0;
+     ((int16_t*)x)[7+(i<<1)] = -1024;
+     }
+  /*
+  for (i=0; i<2048; i+=2) {
+     ((int16_t*)x)[i<<1] = 1024;
+     ((int16_t*)x)[1+(i<<1)] = 0;
+     ((int16_t*)x)[2+(i<<1)] = -1024;
+     ((int16_t*)x)[3+(i<<1)] = 0;
+     }
+       
+  for (i=0;i<2048*2;i++) {
+    ((int16_t*)x)[i] = i/2;//(int16_t)((taus()&0xffff))>>5;
+  }
+     */
+  memset((void*)&x[0],0,64*sizeof(int32_t));
+  for (i=2;i<36;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=(128-36);i<128;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  idft64((int16_t *)x,(int16_t *)y,1);
+  
+
+  printf("64-point\n");
+  printf("X: ");
+  for (i=0;i<8;i++)
+    print_shorts256("",((int16_t *)x)+(i*16));
+
+  printf("\nY:");
+
+  for (i=0;i<8;i++)
+    print_shorts256("",((int16_t *)y)+(i*16));
+  printf("\n");
+
+  
+
+
+  idft64((int16_t *)x,(int16_t *)y,1);
+  idft64((int16_t *)x,(int16_t *)y,1);
+  idft64((int16_t *)x,(int16_t *)y,1);
+  reset_meas(&ts);
+
+  for (i=0; i<10000000; i++) {
+    start_meas(&ts);
+    idft64((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+
+  }
+  /*
+  printf("\n\n64-point (%f cycles, #trials %d)\n",(double)ts.diff/(double)ts.trials,ts.trials);
+  //  LOG_M("x64.m","x64",x,64,1,1);
+  LOG_M("y64.m","y64",y,64,1,1);
+  LOG_M("x64.m","x64",x,64,1,1);
+  */
+/*
+  printf("X: ");
+  for (i=0;i<16;i++)
+    printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]);
+  printf("\nY:");
+
+  for (i=0;i<16;i++)
+    printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]);
+  printf("\n");
+
+  idft64((int16_t*)y,(int16_t*)x,1);
+  printf("X: ");
+  for (i=0;i<16;i++)
+    printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]);
+ 
+  for (i=0; i<256; i++) {
+    ((int16_t*)x)[i] = (int16_t)((taus()&0xffff))>>5;
+  }
+*/
+  
+  memset((void*)&x[0],0,128*4);
+  for (i=2;i<72;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=(256-72);i<256;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft128((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n128-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y128.m","y128",y,128,1,1);
+  LOG_M("x128.m","x128",x,128,1,1);
+/*
+  printf("X: ");
+   for (i=0;i<32;i++)
+     printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]);
+   printf("\nY:");
+
+   for (i=0;i<32;i++)
+     printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]);
+   printf("\n");
+*/
+
+  /*
+  for (i=0; i<512; i++) {
+    ((int16_t*)x)[i] = (int16_t)((taus()&0xffff))>>5;
+  }
+  
+  memset((void*)&y[0],0,256*4);
+  */
+  memset((void*)&x[0],0,256*sizeof(int32_t));
+  for (i=2;i<144;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=(512-144);i<512;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft256((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n256-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y256.m","y256",y,256,1,1);
+  LOG_M("x256.m","x256",x,256,1,1);
+
+  memset((void*)&x[0],0,512*sizeof(int32_t));
+  for (i=2;i<302;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=(1024-300);i<1024;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft512((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n512-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y512.m","y512",y,512,1,1);
+  LOG_M("x512.m","x512",x,512,1,1);
+  /*
+  printf("X: ");
+  for (i=0;i<64;i++)
+    printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]);
+  printf("\nY:");
+
+  for (i=0;i<64;i++)
+    printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]);
+  printf("\n");
+  */
+
+  memset((void*)x,0,1024*sizeof(int32_t));
+  for (i=2;i<602;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*724;i<2048;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft1024((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n1024-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y1024.m","y1024",y,1024,1,1);
+  LOG_M("x1024.m","x1024",x,1024,1,1);
+
+
+  memset((void*)x,0,1536*sizeof(int32_t));
+  for (i=2;i<1202;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(1536-600);i<3072;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft1536((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  write_output("y1536.m","y1536",y,1536,1,1);
+  write_output("x1536.m","x1536",x,1536,1,1);
+
+
+  memset((void*)x,0,2048*sizeof(int32_t));
+  for (i=2;i<1202;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(2048-600);i<4096;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    dft2048((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n2048-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y2048.m","y2048",y,2048,1,1);
+  LOG_M("x2048.m","x2048",x,2048,1,1);
+
+// NR 80Mhz, 217 PRB, 3/4 sampling
+  memset((void*)x, 0, 3072*sizeof(int32_t));
+  for (i=2;i<2506;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(3072-1252);i<6144;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft3072((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n3072-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  write_output("y3072.m","y3072",y,3072,1,1);
+  write_output("x3072.m","x3072",x,3072,1,1);
+
+
+  memset((void*)x,0,4096*sizeof(int32_t));
+  for (i=0;i<2400;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(4096-1200);i<8192;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft4096((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n4096-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y4096.m","y4096",y,4096,1,1);
+  LOG_M("x4096.m","x4096",x,4096,1,1);
+
+  dft4096((int16_t *)y,(int16_t *)x2,1);
+  LOG_M("x4096_2.m","x4096_2",x2,4096,1,1);
+
+// NR 160Mhz, 434 PRB, 3/4 sampling
+  memset((void*)x, 0, 6144*sizeof(int32_t));
+  for (i=2;i<5010;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(6144-2504);i<12288;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+
+  reset_meas(&ts);
+
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft6144((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n6144-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  write_output("y6144.m","y6144",y,6144,1,1);
+  write_output("x6144.m","x6144",x,6144,1,1);
+
+  memset((void*)x,0,8192*sizeof(int32_t));
+  for (i=2;i<4802;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(8192-2400);i<16384;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft8192((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y8192.m","y8192",y,8192,1,1);
+  LOG_M("x8192.m","x8192",x,8192,1,1);
+
+  memset((void*)x,0,1536*sizeof(int32_t));
+  for (i=2;i<1202;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(1536-600);i<3072;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft1536((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y1536.m","y1536",y,1536,1,1);
+  LOG_M("x1536.m","x1536",x,1536,1,1);
+
+  printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y8192.m","y8192",y,8192,1,1);
+  LOG_M("x8192.m","x8192",x,8192,1,1);
+
+  memset((void*)x,0,3072*sizeof(int32_t));
+  for (i=2;i<1202;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(3072-600);i<3072;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft3072((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n3072-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y3072.m","y3072",y,3072,1,1);
+  LOG_M("x3072.m","x3072",x,3072,1,1);
+
+  memset((void*)x,0,6144*sizeof(int32_t));
+  for (i=2;i<4802;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(6144-2400);i<12288;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft6144((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n6144-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y6144.m","y6144",y,6144,1,1);
+  LOG_M("x6144.m","x6144",x,6144,1,1);
+
+  memset((void*)x,0,12288*sizeof(int32_t));
+  for (i=2;i<9602;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(12288-4800);i<24576;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft12288((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n12288-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y12288.m","y12288",y,12288,1,1);
+  LOG_M("x12288.m","x12288",x,12288,1,1);
+
+  memset((void*)x,0,18432*sizeof(int32_t));
+  for (i=2;i<14402;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(18432-7200);i<36864;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft18432((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n18432-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y18432.m","y18432",y,18432,1,1);
+  LOG_M("x18432.m","x18432",x,18432,1,1);
+
+  memset((void*)x,0,24576*sizeof(int32_t));
+  for (i=2;i<19202;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  for (i=2*(24576-19200);i<49152;i++) {
+    if ((taus() & 1)==0)
+      ((int16_t*)x)[i] = 364;
+    else
+      ((int16_t*)x)[i] = -364;
+  }
+  reset_meas(&ts);
+  for (i=0; i<10000; i++) {
+    start_meas(&ts);
+    idft24576((int16_t *)x,(int16_t *)y,1);
+    stop_meas(&ts);
+  }
+
+  printf("\n\n24576-point(%f cycles)\n",(double)ts.diff/(double)ts.trials);
+  LOG_M("y24576.m","y24576",y,24576,1,1);
+  LOG_M("x24576.m","x24576",x,24576,1,1);
+
+  int dftsizes[33]={24,36,48,60,72,96,108,120,144,180,192,216,240,288,300,324,360,384,432,480,540,576,600,648,720,768,864,900,960,972,1080,1152,1200};
+  void (*dft)(int16_t *x,int16_t *y,uint8_t scale)[33] = {dft24,dft36,dft48,dft60,dft72,dft96,dft108,dft120,dft144,dft180,dft192,dft216,dft240,dft288,dft300,dft324,dft360,dft384,dft432,dft480,dft540,dft576,dft600,dft648,dft720,dft768,dft864,dft900,dft960,dft972,dft1080,dft1152,dft1200};
+  for (int n=0;n<33;n++) {
+    // 4xN-point DFT
+    memset((void*)x,0,dftsizes[n]*8*sizeof(int16_t));
+    for (i=0;i<dftsizes[n]*8;i+=8) {
+      if ((taus() & 1)==0)
+	((int16_t*)x)[i]   = 364;
+      else
+	((int16_t*)x)[i]   = -364;
+      if ((taus() & 1)==0)
+	((int16_t*)x)[i+1] = 364;
+      else
+	((int16_t*)x)[i+1] = -364;
+    }
+    
+    reset_meas(&ts);
+    for (i=0; i<10000; i++) {
+      start_meas(&ts);
+      (dft[n])((int16_t *)x,(int16_t *)y,1);
+      stop_meas(&ts);
+    }
+    
+    printf("\n\n4x%d-point(%f cycles)\n",dftsizes[n],(double)ts.diff/(double)ts.trials);
+    char ystr[5],xstr[5],ystr2[5],xstr2[5];
+    sprintf(ystr,"y%d.m",dftsizes[n]);
+    sprintf(xstr,"x%d.m",dftsizes[n]);
+    sprintf(ystr2,"y%d",dftsizes[n]);
+    sprintf(xstr2,"x%d",dftsizes[n]);
+    LOG_M(ystr,ystr2,y,dftsizes[n]*4,1,1);
+    LOG_M(xstr,xstr2,x,dftsizes[n]*4,1,1);
+  }
+
+
+  return(0);
+}
+
+
+#endif
diff --git a/openair2/LAYER2/rlc_v2/TODO b/openair2/LAYER2/rlc_v2/TODO
new file mode 100644
index 0000000000000000000000000000000000000000..0778d4320b888ac2cf9b695f0e3129863656fa2a
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/TODO
@@ -0,0 +1,18 @@
+RLC AM
+======
+
+- 36.322 5.4 Re-establishment procedure
+  when possible, reassemble RLC SDUs from any byte segments of AMD PDUs
+  with SN < VR(MR) in the receiving side, remove RLC headers when doing
+  so and deliver all reassembled RLC SDUs to upper layer in ascending order
+  of the RLC SN, if not delivered before;
+
+- 36.322 5.2.3 Status reporting
+  delay triggering the STATUS report until x < VR(MS) or x >= VR(MR)
+
+- 36.322 5.1.3.2.3 Actions when a RLC data PDU is placed in the reception
+  buffer
+  [...] and in-sequence byte segments of the AMD PDU with SN = VR(R) [...]
+
+- use SOstart/SOend in NACK reporting, do not NACK full PDU if
+  parts of it have been received
diff --git a/openair2/LAYER2/rlc_v2/asn1_utils.c b/openair2/LAYER2/rlc_v2/asn1_utils.c
new file mode 100644
index 0000000000000000000000000000000000000000..46f7d90da57d2cb7d15cee8c60614a49a832e955
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/asn1_utils.c
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include "rlc.h"
+
+int decode_t_reordering(int v)
+{
+  static int tab[32] = {
+    0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
+    90, 95, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 1600
+  };
+
+  if (v < 0 || v > 31) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  return tab[v];
+}
+
+int decode_t_status_prohibit(int v)
+{
+  static int tab[62] = {
+    0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90,
+    95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165,
+    170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, 230, 235, 240,
+    245, 250, 300, 350, 400, 450, 500, 800, 1000, 1200, 1600, 2000, 2400
+  };
+
+  if (v < 0 || v > 61) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  return tab[v];
+}
+
+int decode_t_poll_retransmit(int v)
+{
+  static int tab[59] = {
+    5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95,
+    100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170,
+    175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, 230, 235, 240, 245,
+    250, 300, 350, 400, 450, 500, 800, 1000, 2000, 4000
+  };
+
+  if (v < 0 || v > 58) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  return tab[v];
+}
+
+int decode_poll_pdu(int v)
+{
+  static int tab[8] = {
+    4, 8, 16, 32, 64, 128, 256, -1 /* -1 means infinity */
+  };
+
+  if (v < 0 || v > 7) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  return tab[v];
+}
+
+int decode_poll_byte(int v)
+{
+  static int tab[15] = {
+    25, 50, 75, 100, 125, 250, 375, 500, 750, 1000, 1250, 1500, 2000, 3000,
+    -1 /* -1 means infinity */
+  };
+
+  if (v < 0 || v > 14) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  if (tab[v] == -1) return -1;
+  return tab[v] * 1024;
+}
+
+int decode_max_retx_threshold(int v)
+{
+  static int tab[8] = {
+    1, 2, 3, 4, 6, 8, 16, 32
+  };
+
+  if (v < 0 || v > 7) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  return tab[v];
+}
+
+int decode_sn_field_length(int v)
+{
+  static int tab[2] = {
+    5, 10
+  };
+
+  if (v < 0 || v > 1) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  return tab[v];
+}
diff --git a/openair2/LAYER2/rlc_v2/asn1_utils.h b/openair2/LAYER2/rlc_v2/asn1_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..61394c9c6991ccdc32722bfb039bfdac82a741ae
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/asn1_utils.h
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _ASN1_UTILS_H_
+#define _ASN1_UTILS_H_
+
+int decode_t_reordering(int v);
+int decode_t_status_prohibit(int v);
+int decode_t_poll_retransmit(int v);
+int decode_poll_pdu(int v);
+int decode_poll_byte(int v);
+int decode_max_retx_threshold(int v);
+int decode_sn_field_length(int v);
+
+#endif /* _ASN1_UTILS_H_ */
diff --git a/openair2/LAYER2/rlc_v2/rlc_entity.c b/openair2/LAYER2/rlc_v2/rlc_entity.c
new file mode 100644
index 0000000000000000000000000000000000000000..d774e2b7e17788f71a0edc178295f1a682488469
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_entity.c
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include "rlc_entity.h"
+
+#include <stdlib.h>
+
+#include "rlc_entity_am.h"
+#include "rlc_entity_um.h"
+
+#include "LOG/log.h"
+
+rlc_entity_t *new_rlc_entity_am(
+    int rx_maxsize,
+    int tx_maxsize,
+    void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity,
+                      char *buf, int size),
+    void *deliver_sdu_data,
+    void (*sdu_successful_delivery)(void *sdu_successful_delivery_data,
+                                    struct rlc_entity_t *entity,
+                                    int sdu_id),
+    void *sdu_successful_delivery_data,
+    void (*max_retx_reached)(void *max_retx_reached_data,
+                             struct rlc_entity_t *entity),
+    void *max_retx_reached_data,
+    int t_reordering,
+    int t_status_prohibit,
+    int t_poll_retransmit,
+    int poll_pdu,
+    int poll_byte,
+    int max_retx_threshold)
+{
+  rlc_entity_am_t *ret;
+
+  ret = calloc(1, sizeof(rlc_entity_am_t));
+  if (ret == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  ret->common.recv_pdu      = rlc_entity_am_recv_pdu;
+  ret->common.buffer_status = rlc_entity_am_buffer_status;
+  ret->common.generate_pdu  = rlc_entity_am_generate_pdu;
+
+  ret->common.recv_sdu         = rlc_entity_am_recv_sdu;
+
+  ret->common.set_time = rlc_entity_am_set_time;
+
+  ret->common.discard_sdu = rlc_entity_am_discard_sdu;
+
+  ret->common.reestablishment = rlc_entity_am_reestablishment;
+
+  ret->common.delete = rlc_entity_am_delete;
+
+  ret->common.deliver_sdu      = deliver_sdu;
+  ret->common.deliver_sdu_data = deliver_sdu_data;
+
+  ret->common.sdu_successful_delivery      = sdu_successful_delivery;
+  ret->common.sdu_successful_delivery_data = sdu_successful_delivery_data;
+
+  ret->common.max_retx_reached      = max_retx_reached;
+  ret->common.max_retx_reached_data = max_retx_reached_data;
+
+  ret->rx_maxsize         = rx_maxsize;
+  ret->tx_maxsize         = tx_maxsize;
+  ret->t_reordering       = t_reordering;
+  ret->t_status_prohibit  = t_status_prohibit;
+  ret->t_poll_retransmit  = t_poll_retransmit;
+  ret->poll_pdu           = poll_pdu;
+  ret->poll_byte          = poll_byte;
+  ret->max_retx_threshold = max_retx_threshold;
+
+  return (rlc_entity_t *)ret;
+}
+
+rlc_entity_t *new_rlc_entity_um(
+    int rx_maxsize,
+    int tx_maxsize,
+    void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity,
+                      char *buf, int size),
+    void *deliver_sdu_data,
+    int t_reordering,
+    int sn_field_length)
+{
+  rlc_entity_um_t *ret;
+
+  ret = calloc(1, sizeof(rlc_entity_um_t));
+  if (ret == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  ret->common.recv_pdu      = rlc_entity_um_recv_pdu;
+  ret->common.buffer_status = rlc_entity_um_buffer_status;
+  ret->common.generate_pdu  = rlc_entity_um_generate_pdu;
+
+  ret->common.recv_sdu         = rlc_entity_um_recv_sdu;
+
+  ret->common.set_time = rlc_entity_um_set_time;
+
+  ret->common.discard_sdu = rlc_entity_um_discard_sdu;
+
+  ret->common.reestablishment = rlc_entity_um_reestablishment;
+
+  ret->common.delete = rlc_entity_um_delete;
+
+  ret->common.deliver_sdu      = deliver_sdu;
+  ret->common.deliver_sdu_data = deliver_sdu_data;
+
+  ret->sn_field_length    = sn_field_length;
+  ret->rx_maxsize         = rx_maxsize;
+  ret->tx_maxsize         = tx_maxsize;
+  ret->t_reordering       = t_reordering;
+
+  if (sn_field_length == 5)
+    ret->sn_modulus = 32;
+  else if (sn_field_length == 10)
+    ret->sn_modulus = 1024;
+  else {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  ret->window_size = ret->sn_modulus / 2;
+
+  return (rlc_entity_t *)ret;
+}
diff --git a/openair2/LAYER2/rlc_v2/rlc_entity.h b/openair2/LAYER2/rlc_v2/rlc_entity.h
new file mode 100644
index 0000000000000000000000000000000000000000..c9b35204f03e92d305dc0bba1b40e4d36bd8964e
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_entity.h
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _RLC_ENTITY_H_
+#define _RLC_ENTITY_H_
+
+#include <stdint.h>
+
+#define SDU_MAX 16000   /* maximum PDCP SDU size is 8188, let's take more */
+
+typedef struct {
+  int status_size;
+  int tx_size;
+  int retx_size;
+} rlc_entity_buffer_status_t;
+
+typedef struct rlc_entity_t {
+  /* functions provided by the RLC module */
+  void (*recv_pdu)(struct rlc_entity_t *entity, char *buffer, int size);
+  rlc_entity_buffer_status_t (*buffer_status)(
+      struct rlc_entity_t *entity, int maxsize);
+  int (*generate_pdu)(struct rlc_entity_t *entity, char *buffer, int size);
+
+  void (*recv_sdu)(struct rlc_entity_t *entity, char *buffer, int size,
+                   int sdu_id);
+
+  void (*set_time)(struct rlc_entity_t *entity, uint64_t now);
+
+  void (*discard_sdu)(struct rlc_entity_t *entity, int sdu_id);
+
+  void (*reestablishment)(struct rlc_entity_t *entity);
+
+  void (*delete)(struct rlc_entity_t *entity);
+
+  /* callbacks provided to the RLC module */
+  void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity,
+                      char *buf, int size);
+  void *deliver_sdu_data;
+
+  void (*sdu_successful_delivery)(void *sdu_successful_delivery_data,
+                                  struct rlc_entity_t *entity,
+                                  int sdu_id);
+  void *sdu_successful_delivery_data;
+
+  void (*max_retx_reached)(void *max_retx_reached_data,
+                           struct rlc_entity_t *entity);
+  void *max_retx_reached_data;
+} rlc_entity_t;
+
+rlc_entity_t *new_rlc_entity_am(
+    int rx_maxsize,
+    int tx_maxsize,
+    void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity,
+                      char *buf, int size),
+    void *deliver_sdu_data,
+    void (*sdu_successful_delivery)(void *sdu_successful_delivery_data,
+                                    struct rlc_entity_t *entity,
+                                    int sdu_id),
+    void *sdu_successful_delivery_data,
+    void (*max_retx_reached)(void *max_retx_reached_data,
+                             struct rlc_entity_t *entity),
+    void *max_retx_reached_data,
+    int t_reordering,
+    int t_status_prohibit,
+    int t_poll_retransmit,
+    int poll_pdu,
+    int poll_byte,
+    int max_retx_threshold);
+
+rlc_entity_t *new_rlc_entity_um(
+    int rx_maxsize,
+    int tx_maxsize,
+    void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity,
+                      char *buf, int size),
+    void *deliver_sdu_data,
+    int t_reordering,
+    int sn_field_length);
+
+#endif /* _RLC_ENTITY_H_ */
diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_am.c b/openair2/LAYER2/rlc_v2/rlc_entity_am.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4f3d2f47c86508d628edd0c5468a4ac96269004
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_entity_am.c
@@ -0,0 +1,1700 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include "rlc_entity_am.h"
+#include "rlc_pdu.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "LOG/log.h"
+
+/*************************************************************************/
+/* PDU RX functions                                                      */
+/*************************************************************************/
+
+static int modulus_rx(rlc_entity_am_t *entity, int a)
+{
+  /* as per 36.322 7.1, modulus base is vr(r) and modulus is 1024 for rx */
+  int r = a - entity->vr_r;
+  if (r < 0) r += 1024;
+  return r;
+}
+
+/* used in both RX and TX processing */
+static int modulus_tx(rlc_entity_am_t *entity, int a)
+{
+  /* as per 36.322 7.1, modulus base is vt(a) and modulus is 1024 for tx */
+  int r = a - entity->vt_a;
+  if (r < 0) r += 1024;
+  return r;
+}
+
+static int sn_in_recv_window(void *_entity, int sn)
+{
+  rlc_entity_am_t *entity = _entity;
+  int mod_sn = modulus_rx(entity, sn);
+  /* we simplify vr(r)<=sn<vr(mr). base is vr(r) and vr(mr) = vr(r) + 512 */
+  return mod_sn < 512;
+}
+
+static int sn_compare_rx(void *_entity, int a, int b)
+{
+  rlc_entity_am_t *entity = _entity;
+  return modulus_rx(entity, a) - modulus_rx(entity, b);
+}
+
+/* used in both RX and TX processing */
+static int sn_compare_tx(void *_entity, int a, int b)
+{
+  rlc_entity_am_t *entity = _entity;
+  return modulus_tx(entity, a) - modulus_tx(entity, b);
+}
+
+static int segment_already_received(rlc_entity_am_t *entity,
+    int sn, int so, int data_size)
+{
+  /* TODO: optimize */
+  rlc_rx_pdu_segment_t *l = entity->rx_list;
+
+  while (l != NULL) {
+    if (l->sn == sn && l->so <= so &&
+        l->so + l->size - l->data_offset >= so + data_size)
+      return 1;
+    l = l->next;
+  }
+
+  return 0;
+}
+
+static int rlc_am_segment_full(rlc_entity_am_t *entity, int sn)
+{
+  rlc_rx_pdu_segment_t *l = entity->rx_list;
+  int last_byte;
+  int new_last_byte;
+
+  last_byte = -1;
+  while (l != NULL) {
+    if (l->sn == sn)
+      break;
+    l = l->next;
+  }
+  while (l != NULL && l->sn == sn) {
+    if (l->so > last_byte + 1)
+      return 0;
+    if (l->is_last)
+      return 1;
+    new_last_byte = l->so + l->size - l->data_offset - 1;
+    if (new_last_byte > last_byte)
+      last_byte = new_last_byte;
+    l = l->next;
+  }
+  return 0;
+}
+
+/* return 1 if the new segment has some data to consume, 0 if not */
+static int rlc_am_reassemble_next_segment(rlc_am_reassemble_t *r)
+{
+  int rf;
+  int sn;
+
+  r->sdu_offset = r->start->data_offset;
+
+  rlc_pdu_decoder_init(&r->dec, r->start->data, r->start->size);
+
+  rlc_pdu_decoder_get_bits(&r->dec, 1);            /* dc */
+  rf    = rlc_pdu_decoder_get_bits(&r->dec, 1);
+  rlc_pdu_decoder_get_bits(&r->dec, 1);            /* p */
+  r->fi = rlc_pdu_decoder_get_bits(&r->dec, 2);
+  r->e  = rlc_pdu_decoder_get_bits(&r->dec, 1);
+  sn    = rlc_pdu_decoder_get_bits(&r->dec, 10);
+  if (rf) {
+    rlc_pdu_decoder_get_bits(&r->dec, 1);          /* lsf */
+    r->so = rlc_pdu_decoder_get_bits(&r->dec, 15);
+  } else {
+    r->so = 0;
+  }
+
+  if (r->e) {
+    r->e       = rlc_pdu_decoder_get_bits(&r->dec, 1);
+    r->sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11);
+  } else
+    r->sdu_len = r->start->size - r->sdu_offset;
+
+  /* new sn: read starts from PDU byte 0 */
+  if (sn != r->sn) {
+    r->pdu_byte = 0;
+    r->sn = sn;
+  }
+
+  r->data_pos = r->start->data_offset + r->pdu_byte - r->so;
+
+  /* TODO: remove this check, it is useless, data has been validated before */
+  if (r->pdu_byte < r->so) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  /* if pdu_byte is not in [so .. so+len-1] then all bytes from this segment
+   * have already been consumed
+   */
+  if (r->pdu_byte >= r->so + r->start->size - r->start->data_offset)
+    return 0;
+
+  /* go to correct SDU */
+  while (r->pdu_byte >= r->so + (r->sdu_offset - r->start->data_offset) + r->sdu_len) {
+    r->sdu_offset += r->sdu_len;
+    if (r->e) {
+      r->e       = rlc_pdu_decoder_get_bits(&r->dec, 1);
+      r->sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11);
+    } else {
+      r->sdu_len = r->start->size - r->sdu_offset;
+    }
+  }
+
+  return 1;
+}
+
+static void rlc_am_reassemble(rlc_entity_am_t *entity)
+{
+  rlc_am_reassemble_t *r = &entity->reassemble;
+
+  while (r->start != NULL) {
+    if (r->sdu_pos >= SDU_MAX) {
+      /* TODO: proper error handling (discard PDUs with current sn from
+       * reassembly queue? something else?)
+       */
+      LOG_E(RLC, "%s:%d:%s: bad RLC PDU\n", __FILE__, __LINE__, __FUNCTION__);
+      exit(1);
+    }
+    r->sdu[r->sdu_pos] = r->start->data[r->data_pos];
+    r->sdu_pos++;
+    r->data_pos++;
+    r->pdu_byte++;
+    if (r->data_pos == r->sdu_offset + r->sdu_len) {
+      /* all bytes of SDU are consumed, check if SDU is fully there.
+       * It is if the data pointer is not at the end of the PDU segment
+       * or if 'fi' & 1 == 0
+       */
+      if (r->data_pos != r->start->size ||
+          (r->fi & 1) == 0) {
+        /* SDU is full - deliver to higher layer */
+        entity->common.deliver_sdu(entity->common.deliver_sdu_data,
+                                   (rlc_entity_t *)entity,
+                                   r->sdu, r->sdu_pos);
+        r->sdu_pos = 0;
+      }
+      if (r->data_pos != r->start->size) {
+        /* not at the end, process next SDU */
+        r->sdu_offset += r->sdu_len;
+        if (r->e) {
+          r->e       = rlc_pdu_decoder_get_bits(&r->dec, 1);
+          r->sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11);
+        } else
+          r->sdu_len = r->start->size - r->sdu_offset;
+      } else {
+        /* all bytes are consumend, go to next segment not already fully
+         * processed, if any
+         */
+        do {
+          rlc_rx_pdu_segment_t *e = r->start;
+          entity->rx_size -= e->size;
+          r->start = r->start->next;
+          rlc_rx_free_pdu_segment(e);
+        } while (r->start != NULL && !rlc_am_reassemble_next_segment(r));
+      }
+    }
+  }
+}
+
+static void rlc_am_reception_actions(rlc_entity_am_t *entity,
+    rlc_rx_pdu_segment_t *pdu_segment)
+{
+  int x = pdu_segment->sn;
+  int vr_ms;
+  int vr_r;
+
+  if (modulus_rx(entity, x) >= modulus_rx(entity, entity->vr_h))
+    entity->vr_h = (x + 1) % 1024;
+
+  vr_ms = entity->vr_ms;
+  while (rlc_am_segment_full(entity, vr_ms))
+    vr_ms = (vr_ms + 1) % 1024;
+  entity->vr_ms = vr_ms;
+
+  if (x == entity->vr_r) {
+    vr_r = entity->vr_r;
+    while (rlc_am_segment_full(entity, vr_r)) {
+      /* move segments with sn=vr(r) from rx list to end of reassembly list */
+      while (entity->rx_list != NULL && entity->rx_list->sn == vr_r) {
+        rlc_rx_pdu_segment_t *e = entity->rx_list;
+        entity->rx_list = e->next;
+        e->next = NULL;
+        if (entity->reassemble.start == NULL) {
+          entity->reassemble.start = e;
+          /* the list was empty, we need to init decoder */
+          entity->reassemble.sn = -1;
+          if (!rlc_am_reassemble_next_segment(&entity->reassemble)) {
+            /* TODO: proper error recovery (or remove the test, it should not happen) */
+            LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+            exit(1);
+          }
+        } else {
+          entity->reassemble.end->next = e;
+        }
+        entity->reassemble.end = e;
+      }
+
+      /* update vr_r */
+      vr_r = (vr_r + 1) % 1024;
+    }
+    entity->vr_r = vr_r;
+  }
+
+  rlc_am_reassemble(entity);
+
+  if (entity->t_reordering_start) {
+    int vr_x = entity->vr_x;
+    if (vr_x < entity->vr_r) vr_x += 1024;
+    if (vr_x == entity->vr_r || vr_x > entity->vr_r + 512)
+      entity->t_reordering_start = 0;
+  }
+
+  if (entity->t_reordering_start == 0) {
+    if (sn_compare_rx(entity, entity->vr_h, entity->vr_r) > 0) {
+      entity->t_reordering_start = entity->t_current;
+      entity->vr_x = entity->vr_h;
+    }
+  }
+}
+
+static void process_received_ack(rlc_entity_am_t *entity, int sn)
+{
+  rlc_tx_pdu_segment_t head;
+  rlc_tx_pdu_segment_t *cur;
+  rlc_tx_pdu_segment_t *prev;
+
+  /* put all PDUs from wait and retransmit lists with SN < 'sn' to ack_list */
+
+  /* process wait list */
+  head.next = entity->wait_list;
+  prev = &head;
+  cur = entity->wait_list;
+  while (cur != NULL) {
+    if (sn_compare_tx(entity, cur->sn, sn) < 0) {
+      /* remove from wait list */
+      prev->next = cur->next;
+      /* put the PDU in the ack list */
+      entity->ack_list = rlc_tx_pdu_list_add(sn_compare_tx, entity,
+                                             entity->ack_list, cur);
+      cur = prev->next;
+    } else {
+      prev = cur;
+      cur = cur->next;
+    }
+  }
+  entity->wait_list = head.next;
+
+  /* process retransmit list */
+  head.next = entity->retransmit_list;
+  prev = &head;
+  cur = entity->retransmit_list;
+  while (cur != NULL) {
+    if (sn_compare_tx(entity, cur->sn, sn) < 0) {
+      /* dec. retx_count in case we put this segment back in retransmit list
+       * in 'process_received_nack'
+       */
+      cur->retx_count--;
+      /* remove from retransmit list */
+      prev->next = cur->next;
+      /* put the PDU in the ack list */
+      entity->ack_list = rlc_tx_pdu_list_add(sn_compare_tx, entity,
+                                             entity->ack_list, cur);
+      cur = prev->next;
+    } else {
+      prev = cur;
+      cur = cur->next;
+    }
+  }
+  entity->retransmit_list = head.next;
+
+}
+
+static void consider_retransmission(rlc_entity_am_t *entity,
+    rlc_tx_pdu_segment_t *cur)
+{
+  cur->retx_count++;
+
+  /* let's report max RETX reached for all retx_count >= max_retx_threshold
+   * (specs say to report if retx_count == max_retx_threshold).
+   * Upper layers should react (radio link failure), so no big deal actually.
+   */
+  if (cur->retx_count >= entity->max_retx_threshold) {
+    entity->common.max_retx_reached(entity->common.max_retx_reached_data,
+                                    (rlc_entity_t *)entity);
+  }
+
+  /* let's put in retransmit list even if we are over max_retx_threshold.
+   * upper layers should deal with this condition, internally it's better
+   * for the RLC code to keep going with this segment (we only remove
+   * a segment that was ACKed)
+   */
+  entity->retransmit_list = rlc_tx_pdu_list_add(sn_compare_tx, entity,
+                                                entity->retransmit_list, cur);
+}
+
+static int so_overlap(int s1, int e1, int s2, int e2)
+{
+  if (s1 < s2) {
+    if (e1 == -1 || e1 >= s2)
+      return 1;
+    return 0;
+  }
+  if (e2 == -1 || s1 <= e2)
+    return 1;
+  return 0;
+}
+
+static void process_received_nack(rlc_entity_am_t *entity, int sn,
+    int so_start, int so_end)
+{
+  /* put all PDU segments with SN == 'sn' and with an overlapping so start/end
+   * to the retransmit list
+   * source lists are ack list and wait list.
+   * Not sure if we should consider wait list, isn't the other end supposed
+   * to only NACK SNs lower than the ACK SN sent in the status PDU, in which
+   * case all potential PDU segments should all be in ack list when calling
+   * the current function? in doubt let's accept anything and thus process
+   * also wait list.
+   */
+  rlc_tx_pdu_segment_t head;
+  rlc_tx_pdu_segment_t *cur;
+  rlc_tx_pdu_segment_t *prev;
+
+  /* check that VT(A) <= sn < VT(S) */
+  if (!(sn_compare_tx(entity, entity->vt_a, sn) <= 0 &&
+        sn_compare_tx(entity, sn, entity->vt_s) < 0))
+    return;
+
+  /* process wait list */
+  head.next = entity->wait_list;
+  prev = &head;
+  cur = entity->wait_list;
+  while (cur != NULL) {
+    if (cur->sn == sn &&
+        so_overlap(so_start, so_end, cur->so, cur->so + cur->data_size - 1)) {
+      /* remove from wait list */
+      prev->next = cur->next;
+      /* consider the PDU segment for retransmission */
+      consider_retransmission(entity, cur);
+      cur = prev->next;
+    } else {
+      prev = cur;
+      cur = cur->next;
+    }
+  }
+  entity->wait_list = head.next;
+
+  /* process ack list */
+  head.next = entity->ack_list;
+  prev = &head;
+  cur = entity->ack_list;
+  while (cur != NULL) {
+    if (cur->sn == sn &&
+        so_overlap(so_start, so_end, cur->so, cur->so + cur->data_size - 1)) {
+      /* remove from ack list */
+      prev->next = cur->next;
+      /* consider the PDU segment for retransmission */
+      consider_retransmission(entity, cur);
+      cur = prev->next;
+    } else {
+      prev = cur;
+      cur = cur->next;
+    }
+  }
+  entity->ack_list = head.next;
+}
+
+int tx_pdu_in_ack_list_full(rlc_tx_pdu_segment_t *pdu)
+{
+  int sn = pdu->sn;
+  int last_byte = -1;
+  int new_last_byte;
+  int is_last_seen = 0;
+
+  while (pdu != NULL && pdu->sn == sn) {
+    if (pdu->so > last_byte + 1) return 0;
+    if (pdu->is_last)
+      is_last_seen = 1;
+    new_last_byte = pdu->so + pdu->data_size - 1;
+    if (new_last_byte > last_byte)
+      last_byte = new_last_byte;
+    pdu = pdu->next;
+  }
+
+  return is_last_seen == 1;
+}
+
+int tx_pdu_in_ack_list_size(rlc_tx_pdu_segment_t *pdu)
+{
+  int sn = pdu->sn;
+  int ret = 0;
+
+  while (pdu != NULL && pdu->sn == sn) {
+    ret += pdu->data_size;
+    pdu = pdu->next;
+  }
+
+  return ret;
+}
+
+void ack_sdu_bytes(rlc_sdu_t *start, int start_byte, int sdu_size)
+{
+  rlc_sdu_t *cur = start;
+  int remaining_size = sdu_size;
+
+  while (remaining_size) {
+    int cursize = cur->size - start_byte;
+    if (cursize > remaining_size)
+      cursize = remaining_size;
+    cur->acked_bytes += cursize;
+    remaining_size -= cursize;
+    /* start_byte is only meaningful for the 1st SDU, then it is 0 */
+    start_byte = 0;
+    cur = cur->next;
+  }
+}
+
+rlc_tx_pdu_segment_t *tx_list_remove_sn(rlc_tx_pdu_segment_t *list, int sn)
+{
+  rlc_tx_pdu_segment_t head;
+  rlc_tx_pdu_segment_t *cur;
+  rlc_tx_pdu_segment_t *prev;
+
+  head.next = list;
+  cur = list;
+  prev = &head;
+
+  while (cur != NULL) {
+    if (cur->sn == sn) {
+      prev->next = cur->next;
+      rlc_tx_free_pdu(cur);
+      cur = prev->next;
+    } else {
+      prev = cur;
+      cur = cur->next;
+    }
+  }
+
+  return head.next;
+}
+
+void cleanup_sdu_list(rlc_entity_am_t *entity)
+{
+  rlc_sdu_t head;
+  rlc_sdu_t *cur;
+  rlc_sdu_t *prev;
+
+  /* remove fully acked SDUs, indicate successful delivery to upper layer */
+  head.next = entity->tx_list;
+  cur = entity->tx_list;
+  prev = &head;
+
+  while (cur != NULL) {
+    if (cur->acked_bytes == cur->size) {
+      prev->next = cur->next;
+      entity->tx_size -= cur->size;
+      entity->common.sdu_successful_delivery(
+          entity->common.sdu_successful_delivery_data,
+          (rlc_entity_t *)entity, cur->upper_layer_id);
+      rlc_free_sdu(cur);
+      entity->tx_end = prev;
+      cur = prev->next;
+    } else {
+      entity->tx_end = cur;
+      prev = cur;
+      cur = cur->next;
+    }
+  }
+
+  entity->tx_list = head.next;
+
+  /* if tx_end == head then it means that the list is now empty */
+  if (entity->tx_end == &head)
+    entity->tx_end = NULL;
+}
+
+static void finalize_ack_nack_processing(rlc_entity_am_t *entity)
+{
+  int sn;
+  rlc_tx_pdu_segment_t *cur = entity->ack_list;
+  int pdu_size;
+
+  if (cur == NULL)
+    return;
+
+  /* Remove full PDUs and ack the SDU bytes they cover. Start from SN == VT(A)
+   * and process increasing SNs until end of list or missing ACK or PDU not
+   * fully ACKed.
+   */
+  while (cur != NULL && cur->sn == entity->vt_a &&
+         tx_pdu_in_ack_list_full(cur)) {
+    sn = cur->sn;
+    entity->vt_a = (entity->vt_a + 1) % 1024;
+    pdu_size = tx_pdu_in_ack_list_size(cur);
+    ack_sdu_bytes(cur->start_sdu, cur->sdu_start_byte, pdu_size);
+    while (cur != NULL && cur->sn == sn)
+      cur = cur->next;
+    entity->ack_list = tx_list_remove_sn(entity->ack_list, sn);
+  }
+
+  cleanup_sdu_list(entity);
+}
+
+void rlc_entity_am_recv_pdu(rlc_entity_t *_entity, char *buffer, int size)
+{
+#define R(d) do { if (rlc_pdu_decoder_in_error(&d)) goto err; } while (0)
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  rlc_pdu_decoder_t decoder;
+  rlc_pdu_decoder_t data_decoder;
+  rlc_pdu_decoder_t control_decoder;
+
+  int dc;
+  int rf;
+  int p = 0;
+  int fi;
+  int e;
+  int sn;
+  int lsf;
+  int so;
+
+  int cpt;
+  int e1;
+  int e2;
+  int ack_sn;
+  int nack_sn;
+  int so_start;
+  int so_end;
+  int control_e1;
+  int control_e2;
+
+  int data_e;
+  int data_li;
+
+  int packet_count;
+  int data_size;
+  int data_start;
+  int indicated_data_size;
+
+  rlc_rx_pdu_segment_t *pdu_segment;
+
+  rlc_pdu_decoder_init(&decoder, buffer, size);
+  dc = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+  if (dc == 0) goto control;
+
+  /* data PDU */
+  rf = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+  p  = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+  fi = rlc_pdu_decoder_get_bits(&decoder, 2); R(decoder);
+  e  = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+  sn = rlc_pdu_decoder_get_bits(&decoder, 10); R(decoder);
+
+  /* dicard PDU if rx buffer is full */
+  if (entity->rx_size + size > entity->rx_maxsize) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, RX buffer full\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    goto discard;
+  }
+
+  if (!sn_in_recv_window(entity, sn)) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, sn out of window (sn %d vr_r %d)\n",
+          __FILE__, __LINE__, __FUNCTION__,
+           sn, entity->vr_r);
+    goto discard;
+  }
+
+  if (rf) {
+    lsf = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+    so  = rlc_pdu_decoder_get_bits(&decoder, 15); R(decoder);
+  } else {
+    lsf = 1;
+    so = 0;
+  }
+
+  packet_count = 1;
+
+  /* go to start of data */
+  indicated_data_size = 0;
+  data_decoder = decoder;
+  data_e = e;
+  while (data_e) {
+    data_e = rlc_pdu_decoder_get_bits(&data_decoder, 1); R(data_decoder);
+    data_li = rlc_pdu_decoder_get_bits(&data_decoder, 11); R(data_decoder);
+    if (data_li == 0) {
+      LOG_D(RLC, "%s:%d:%s: warning: discard PDU, li == 0\n",
+            __FILE__, __LINE__, __FUNCTION__);
+      goto discard;
+    }
+    indicated_data_size += data_li;
+    packet_count++;
+  }
+  rlc_pdu_decoder_align(&data_decoder);
+
+  data_start = data_decoder.byte;
+  data_size = size - data_start;
+
+  if (data_size <= 0) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, wrong data size (sum of LI %d data size %d)\n",
+          __FILE__, __LINE__, __FUNCTION__,
+           indicated_data_size, data_size);
+    goto discard;
+  }
+  if (indicated_data_size >= data_size) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, bad LIs (sum of LI %d data size %d)\n",
+          __FILE__, __LINE__, __FUNCTION__,
+           indicated_data_size, data_size);
+    goto discard;
+  }
+
+  /* discard segment if all the bytes of the segment are already there */
+  if (segment_already_received(entity, sn, so, data_size)) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, already received\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    goto discard;
+  }
+
+  char *fi_str[] = {
+    "first byte: YES  last byte: YES",
+    "first byte: YES  last byte: NO",
+    "first byte: NO   last byte: YES",
+    "first byte: NO   last byte: NO",
+  };
+
+  LOG_D(RLC, "found %d packets, data size %d data start %d [fi %d %s] (sn %d) (p %d)\n",
+        packet_count, data_size, data_decoder.byte, fi, fi_str[fi], sn, p);
+
+  /* put in pdu reception list */
+  entity->rx_size += size;
+  pdu_segment = rlc_rx_new_pdu_segment(sn, so, size, lsf, buffer, data_start);
+  entity->rx_list = rlc_rx_pdu_segment_list_add(sn_compare_rx, entity,
+                                                entity->rx_list, pdu_segment);
+
+  /* do reception actions (36.322 5.1.3.2.3) */
+  rlc_am_reception_actions(entity, pdu_segment);
+
+  if (p) {
+    /* 36.322 5.2.3 says status triggering should be delayed
+     * until x < VR(MS) or x >= VR(MR). This is not clear (what
+     * is x then? we keep the same?). So let's trigger no matter what.
+     */
+    int vr_mr = (entity->vr_r + 512) % 1024;
+    entity->status_triggered = 1;
+    if (!(sn_compare_rx(entity, sn, entity->vr_ms) < 0 ||
+          sn_compare_rx(entity, sn, vr_mr) >= 0)) {
+      LOG_D(RLC, "%s:%d:%s: warning: STATUS trigger should be delayed, according to specs\n",
+            __FILE__, __LINE__, __FUNCTION__);
+    }
+  }
+
+  return;
+
+control:
+  cpt = rlc_pdu_decoder_get_bits(&decoder, 3); R(decoder);
+  if (cpt != 0) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, CPT not 0 (%d)\n",
+          __FILE__, __LINE__, __FUNCTION__, cpt);
+    goto discard;
+  }
+  ack_sn = rlc_pdu_decoder_get_bits(&decoder, 10); R(decoder);
+  e1 = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+
+  /* let's try to parse the control PDU once to check consistency */
+  control_decoder = decoder;
+  control_e1 = e1;
+  while (control_e1) {
+    rlc_pdu_decoder_get_bits(&control_decoder, 10); R(control_decoder); /* NACK_SN */
+    control_e1 = rlc_pdu_decoder_get_bits(&control_decoder, 1); R(control_decoder);
+    control_e2 = rlc_pdu_decoder_get_bits(&control_decoder, 1); R(control_decoder);
+    if (control_e2) {
+      rlc_pdu_decoder_get_bits(&control_decoder, 15); R(control_decoder); /* SOstart */
+      rlc_pdu_decoder_get_bits(&control_decoder, 15); R(control_decoder); /* SOend */
+    }
+  }
+
+  /* 36.322 5.2.2.2 says to stop t_poll_retransmit if a ACK or NACK is
+   * received for the SN 'poll_sn'
+   */
+  if (sn_compare_tx(entity, entity->poll_sn, ack_sn) < 0)
+    entity->t_poll_retransmit_start = 0;
+
+  /* at this point, accept the PDU even if the actual values
+   * may be incorrect (eg. if so_start > so_end)
+   */
+  process_received_ack(entity, ack_sn);
+
+  while (e1) {
+    nack_sn = rlc_pdu_decoder_get_bits(&decoder, 10); R(decoder);
+    e1 = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+    e2 = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+    if (e2) {
+      so_start = rlc_pdu_decoder_get_bits(&decoder, 15); R(decoder);
+      so_end = rlc_pdu_decoder_get_bits(&decoder, 15); R(decoder);
+      if (so_end < so_start) {
+        LOG_W(RLC, "%s:%d:%s: warning, bad so start/end, NACK the whole PDU (sn %d)\n",
+              __FILE__, __LINE__, __FUNCTION__, nack_sn);
+        so_start = 0;
+        so_end = -1;
+      }
+      /* special value 0x7fff indicates 'all bytes to the end' */
+      if (so_end == 0x7fff)
+        so_end = -1;
+    } else {
+      so_start = 0;
+      so_end = -1;
+    }
+    process_received_nack(entity, nack_sn, so_start, so_end);
+
+    /* 36.322 5.2.2.2 says to stop t_poll_retransmit if a ACK or NACK is
+     * received for the SN 'poll_sn'
+     */
+    if (entity->poll_sn == nack_sn)
+      entity->t_poll_retransmit_start = 0;
+  }
+
+  finalize_ack_nack_processing(entity);
+
+  return;
+
+err:
+  LOG_W(RLC, "%s:%d:%s: error decoding PDU, discarding\n", __FILE__, __LINE__, __FUNCTION__);
+  goto discard;
+
+discard:
+  if (p)
+    entity->status_triggered = 1;
+
+#undef R
+}
+
+/*************************************************************************/
+/* TX functions                                                          */
+/*************************************************************************/
+
+static int pdu_size(rlc_entity_am_t *entity, rlc_tx_pdu_segment_t *pdu)
+{
+  int header_size;
+  int sdu_count;
+  int data_size;
+  int li_bits;
+  rlc_sdu_t *sdu;
+
+  header_size = 2;
+  if (pdu->is_segment)
+    header_size += 2;
+
+  data_size = pdu->data_size;
+
+  sdu = pdu->start_sdu;
+
+  sdu_count = 1;
+  data_size -= sdu->size - pdu->sdu_start_byte;
+  sdu = sdu->next;
+
+  while (data_size > 0) {
+    sdu_count++;
+    data_size -= sdu->size;
+    sdu = sdu->next;
+  }
+
+  li_bits = 12 * (sdu_count - 1);
+  header_size += (li_bits + 7) / 8;
+
+  return header_size + pdu->data_size;
+}
+
+static int header_size(int sdu_count)
+{
+  int bits = 16 + 12 * (sdu_count - 1);
+  /* padding if we have to */
+  return (bits + 7) / 8;
+}
+
+typedef struct {
+  int sdu_count;
+  int data_size;
+  int header_size;
+} tx_pdu_size_t;
+
+static tx_pdu_size_t compute_new_pdu_size(rlc_entity_am_t *entity, int maxsize)
+{
+  tx_pdu_size_t ret;
+  int sdu_count;
+  int sdu_size;
+  int pdu_data_size;
+  rlc_sdu_t *sdu;
+
+  int vt_ms = (entity->vt_a + 512) % 1024;
+
+  ret.sdu_count = 0;
+  ret.data_size = 0;
+  ret.header_size = 0;
+
+  /* sn out of window? nothing to do */
+  if (!(sn_compare_tx(entity, entity->vt_s, entity->vt_a) >= 0 &&
+        sn_compare_tx(entity, entity->vt_s, vt_ms) < 0))
+    return ret;
+
+  /* TX PDU - let's make the biggest PDU we can with the SDUs we have */
+  sdu_count = 0;
+  pdu_data_size = 0;
+  sdu = entity->tx_list;
+  while (sdu != NULL) {
+    /* include SDU only if it has not been fully included in PDUs already */
+    if (sdu->next_byte != sdu->size) {
+      int new_header_size = header_size(sdu_count + 1);
+      /* if we cannot put new header + at least 1 byte of data then over */
+      if (new_header_size + pdu_data_size + 1 > maxsize)
+        break;
+      sdu_count++;
+      /* only include the bytes of this SDU not included in PDUs already */
+      sdu_size = sdu->size - sdu->next_byte;
+      /* don't feed more than 'maxsize' bytes */
+      if (new_header_size + pdu_data_size + sdu_size > maxsize)
+        sdu_size = maxsize - new_header_size - pdu_data_size;
+      pdu_data_size += sdu_size;
+      /* if we put more than 2^11-1 bytes then the LI field cannot be used,
+       * so this is the last SDU we can put
+       */
+      if (sdu_size > 2047)
+        break;
+    }
+    sdu = sdu->next;
+  }
+
+  if (sdu_count) {
+    ret.sdu_count = sdu_count;
+    ret.data_size = pdu_data_size;
+    ret.header_size = header_size(sdu_count);
+  }
+
+  return ret;
+}
+
+static int status_size(rlc_entity_am_t *entity, int maxsize)
+{
+  /* let's count bits */
+  int bits = 15;               /* minimum size is 15 (header+ack_sn+e1) */
+  int sn;
+
+  maxsize *= 8;
+
+  if (bits > maxsize) {
+    LOG_W(RLC, "%s:%d:%s: warning: cannot generate status PDU, not enough room\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    return 0;
+  }
+
+  /* each NACK adds 12 bits */
+  sn = entity->vr_r;
+  while (bits + 12 <= maxsize && sn_compare_rx(entity, sn, entity->vr_ms) < 0) {
+    if (!(rlc_am_segment_full(entity, sn)))
+      bits += 12;
+    sn = (sn + 1) % 1024;
+  }
+
+  return (bits + 7) / 8;
+}
+
+static int generate_status(rlc_entity_am_t *entity, char *buffer, int size)
+{
+  /* let's count bits */
+  int bits = 15;               /* minimum size is 15 (header+ack_sn+e1) */
+  int sn;
+  rlc_pdu_encoder_t encoder;
+  int has_nack = 0;
+  int ack;
+
+  rlc_pdu_encoder_init(&encoder, buffer, size);
+
+  size *= 8;
+
+  if (bits > size) {
+    LOG_W(RLC, "%s:%d:%s: warning: cannot generate status PDU, not enough room\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    return 0;
+  }
+
+  /* header */
+  rlc_pdu_encoder_put_bits(&encoder, 0, 1);   /* D/C */
+  rlc_pdu_encoder_put_bits(&encoder, 0, 3);   /* CPT */
+
+  /* reserve room for ACK (it will be set after putting the NACKs) */
+  rlc_pdu_encoder_put_bits(&encoder, 0, 10);
+
+  /* at this point, ACK is VR(R) */
+  ack = entity->vr_r;
+
+  /* each NACK adds 12 bits */
+  sn = entity->vr_r;
+  while (bits + 12 <= size && sn_compare_rx(entity, sn, entity->vr_ms) < 0) {
+    if (!(rlc_am_segment_full(entity, sn))) {
+      /* put previous e1 (is 1) */
+      rlc_pdu_encoder_put_bits(&encoder, 1, 1);
+      /* if previous was NACK, put previous e2 (0, we don't do 'so' thing) */
+      if (has_nack)
+        rlc_pdu_encoder_put_bits(&encoder, 0, 1);
+      /* put NACKed sn */
+      rlc_pdu_encoder_put_bits(&encoder, sn, 10);
+      has_nack = 1;
+      bits += 12;
+    } else {
+      /* this sn is full and we put all NACKs before it, use it for ACK */
+      ack = (sn + 1) % 1024;
+    }
+    sn = (sn + 1) % 1024;
+  }
+
+  /* go to highest full sn+1 for ACK, VR(MS) is the limit */
+  while (sn_compare_rx(entity, sn, entity->vr_ms) < 0 &&
+         rlc_am_segment_full(entity, sn)) {
+    ack = (sn + 1) % 1024;
+    sn = (sn + 1) % 1024;
+  }
+
+  /* at this point, if last put was NACK then put 2 bits else put 1 bit */
+  if (has_nack)
+    rlc_pdu_encoder_put_bits(&encoder, 0, 2);
+  else
+    rlc_pdu_encoder_put_bits(&encoder, 0, 1);
+
+  rlc_pdu_encoder_align(&encoder);
+
+  /* let's put the ACK */
+  buffer[0] |= ack >> 6;
+  buffer[1] |= (ack & 0x3f) << 2;
+
+  /* reset the trigger */
+  entity->status_triggered = 0;
+
+  /* start t_status_prohibit */
+  entity->t_status_prohibit_start = entity->t_current;
+
+  return encoder.byte;
+}
+
+int transmission_buffer_empty(rlc_entity_am_t *entity)
+{
+  rlc_sdu_t *sdu;
+
+  /* is transmission buffer empty? */
+  sdu = entity->tx_list;
+  while (sdu != NULL) {
+    if (sdu->next_byte != sdu->size)
+      return 0;
+    sdu = sdu->next;
+  }
+  return 1;
+}
+
+int check_poll_after_pdu_assembly(rlc_entity_am_t *entity)
+{
+  int retransmission_buffer_empty;
+  int window_stalling;
+  int vt_ms;
+
+  /* is retransmission buffer empty? */
+  if (entity->retransmit_list == NULL)
+    retransmission_buffer_empty = 1;
+  else
+    retransmission_buffer_empty = 0;
+
+  /* is window stalling? */
+  vt_ms = (entity->vt_a + 512) % 1024;
+  if (!(sn_compare_tx(entity, entity->vt_s, entity->vt_a) >= 0 &&
+        sn_compare_tx(entity, entity->vt_s, vt_ms) < 0))
+    window_stalling = 1;
+  else
+    window_stalling = 0;
+
+  return (transmission_buffer_empty(entity) && retransmission_buffer_empty) ||
+         window_stalling;
+}
+
+void include_poll(rlc_entity_am_t *entity, char *buffer)
+{
+  /* set the P bit to 1 */
+  buffer[0] |= 0x20;
+
+  entity->pdu_without_poll = 0;
+  entity->byte_without_poll = 0;
+
+  /* set POLL_SN to VT(S) - 1 */
+  entity->poll_sn = (entity->vt_s + 1023) % 1024;
+
+  /* start t_poll_retransmit */
+  entity->t_poll_retransmit_start = entity->t_current;
+}
+
+static int serialize_pdu(rlc_entity_am_t *entity, char *buffer, int bufsize,
+                         rlc_tx_pdu_segment_t *pdu, int p)
+{
+  int                  first_sdu_full;
+  int                  last_sdu_full;
+  int                  sdu_next_byte;
+  rlc_sdu_t            *sdu;
+  int                  i;
+  int                  cursize;
+  rlc_pdu_encoder_t    encoder;
+  int                  fi;
+  int                  e;
+  int                  li;
+  char                 *out;
+  int                  outpos;
+  int                  sdu_count;
+  int                  header_size;
+  int                  sdu_start_byte;
+
+  first_sdu_full = pdu->sdu_start_byte == 0;
+
+  /* is last SDU full? (and also compute sdu_count) */
+  last_sdu_full = 1;
+  sdu = pdu->start_sdu;
+  sdu_next_byte = pdu->sdu_start_byte;
+  cursize = 0;
+  sdu_count = 0;
+  while (cursize != pdu->data_size) {
+    int sdu_size = sdu->size - sdu_next_byte;
+    sdu_count++;
+    if (cursize + sdu_size > pdu->data_size) {
+      last_sdu_full = 0;
+      break;
+    }
+    cursize += sdu_size;
+    sdu = sdu->next;
+    sdu_next_byte = 0;
+  }
+
+  /* generate header */
+  rlc_pdu_encoder_init(&encoder, buffer, bufsize);
+
+  rlc_pdu_encoder_put_bits(&encoder, 1, 1);                /* D/C: 1 = data */
+  rlc_pdu_encoder_put_bits(&encoder, pdu->is_segment, 1);             /* RF */
+  rlc_pdu_encoder_put_bits(&encoder, 0, 1);        /* P: reserve, set later */
+
+  fi = 0;
+  if (!first_sdu_full)
+    fi |= 0x02;
+  if (!last_sdu_full)
+    fi |= 0x01;
+  rlc_pdu_encoder_put_bits(&encoder, fi, 2);                          /* FI */
+
+  /* to understand the logic for Es and LIs:
+   * If we have:
+   *   1 SDU:   E=0
+   *
+   *   2 SDUs:  E=1
+   *     then:  E=0 LI(sdu[0])
+   *
+   *   3 SDUs:  E=1
+   *     then:  E=1 LI(sdu[0])
+   *     then:  E=0 LI(sdu[1])
+   *
+   *   4 SDUs:  E=1
+   *     then:  E=1 LI(sdu[0])
+   *     then:  E=1 LI(sdu[1])
+   *     then:  E=0 LI(sdu[2])
+   */
+  if (sdu_count >= 2)
+    e = 1;
+  else
+    e = 0;
+  rlc_pdu_encoder_put_bits(&encoder, e, 1);                            /* E */
+
+  rlc_pdu_encoder_put_bits(&encoder, pdu->sn, 10);                    /* SN */
+
+  if (pdu->is_segment) {
+    rlc_pdu_encoder_put_bits(&encoder, pdu->is_last, 1);             /* LSF */
+    rlc_pdu_encoder_put_bits(&encoder, pdu->so, 15);                  /* SO */
+  }
+
+  /* put LIs */
+  sdu = pdu->start_sdu;
+  /* first SDU */
+  li = sdu->size - pdu->sdu_start_byte;
+  /* put E+LI only if at least 2 SDUs */
+  if (sdu_count >= 2) {
+    /* E is 1 if at least 3 SDUs */
+    if (sdu_count >= 3)
+      e = 1;
+    else
+      e = 0;
+    rlc_pdu_encoder_put_bits(&encoder, e, 1);                          /* E */
+    rlc_pdu_encoder_put_bits(&encoder, li, 11);                       /* LI */
+  }
+  /* next SDUs, but not the last (no LI for the last) */
+  sdu = sdu->next;
+  for (i = 2; i < sdu_count; i++, sdu = sdu->next) {
+    if (i != sdu_count - 1)
+      e = 1;
+    else
+      e = 0;
+    li = sdu->size;
+    rlc_pdu_encoder_put_bits(&encoder, e, 1);                          /* E */
+    rlc_pdu_encoder_put_bits(&encoder, li, 11);                       /* LI */
+  }
+
+  rlc_pdu_encoder_align(&encoder);
+
+  header_size = encoder.byte;
+
+  /* generate data */
+  out = buffer + header_size;
+  sdu = pdu->start_sdu;
+  sdu_start_byte = pdu->sdu_start_byte;
+  outpos = 0;
+  for (i = 0; i < sdu_count; i++, sdu = sdu->next) {
+    li = sdu->size - sdu_start_byte;
+    if (outpos + li >= pdu->data_size)
+      li = pdu->data_size - outpos;
+    memcpy(out+outpos, sdu->data + sdu_start_byte, li);
+    outpos += li;
+    sdu_start_byte = 0;
+  }
+
+  if (p)
+    include_poll(entity, buffer);
+
+  return header_size + pdu->data_size;
+}
+
+static int generate_tx_pdu(rlc_entity_am_t *entity, char *buffer, int bufsize)
+{
+  int                  vt_ms;
+  tx_pdu_size_t        pdu_size;
+  rlc_sdu_t            *sdu;
+  int                  i;
+  int                  cursize;
+  int                  p;
+  rlc_tx_pdu_segment_t *pdu;
+
+  /* sn out of window? do nothing */
+  vt_ms = (entity->vt_a + 512) % 1024;
+  if (!(sn_compare_tx(entity, entity->vt_s, entity->vt_a) >= 0 &&
+        sn_compare_tx(entity, entity->vt_s, vt_ms) < 0))
+    return 0;
+
+  pdu_size = compute_new_pdu_size(entity, bufsize);
+  if (pdu_size.sdu_count == 0)
+    return 0;
+
+  pdu = rlc_tx_new_pdu();
+
+  pdu->sn = entity->vt_s;
+  entity->vt_s = (entity->vt_s + 1) % 1024;
+
+  /* go to first SDU (skip those already fully processed) */
+  sdu = entity->tx_list;
+  while (sdu->next_byte == sdu->size)
+    sdu = sdu->next;
+
+  pdu->start_sdu = sdu;
+
+  pdu->sdu_start_byte = sdu->next_byte;
+
+  pdu->so = 0;
+  pdu->is_segment = 0;
+  pdu->is_last = 1;
+  /* to conform to specs' logic, put -1 (specs say "for 1st retransmission
+   * put 0 otherwise increase", let's put -1 and always increase when the
+   * segment goes to retransmit list)
+   */
+  pdu->retx_count = -1;
+
+  /* reserve SDU bytes */
+  cursize = 0;
+  for (i = 0; i < pdu_size.sdu_count; i++, sdu = sdu->next) {
+    int sdu_size = sdu->size - sdu->next_byte;
+    if (cursize + sdu_size > pdu_size.data_size)
+      sdu_size = pdu_size.data_size - cursize;
+    sdu->next_byte += sdu_size;
+    cursize += sdu_size;
+  }
+
+  pdu->data_size = cursize;
+
+  /* put PDU at the end of the wait list */
+  entity->wait_list = rlc_tx_pdu_list_append(entity->wait_list, pdu);
+
+  /* polling actions for a new PDU */
+  entity->pdu_without_poll++;
+  entity->byte_without_poll += pdu_size.data_size;
+  if ((entity->poll_pdu != -1 &&
+       entity->pdu_without_poll >= entity->poll_pdu) ||
+      (entity->poll_byte != -1 &&
+       entity->byte_without_poll >= entity->poll_byte))
+    p = 1;
+  else
+    p = check_poll_after_pdu_assembly(entity);
+
+  if (entity->force_poll) {
+    p = 1;
+    entity->force_poll = 0;
+  }
+
+  return serialize_pdu(entity, buffer, bufsize, pdu, p);
+}
+
+static void resegment(rlc_tx_pdu_segment_t *pdu, int size)
+{
+  rlc_tx_pdu_segment_t *new_pdu;
+  rlc_sdu_t *sdu;
+  int sdu_count;
+  int pdu_header_size;
+  int pdu_data_size;
+  int sdu_pos;
+  int sdu_bytes_to_take;
+
+  /* PDU segment too big, cut in two parts so that first part fits into
+   * size bytes (including header)
+   */
+  sdu = pdu->start_sdu;
+  pdu_data_size = 0;
+  sdu_pos = pdu->sdu_start_byte;
+  sdu_count = 0;
+  while (1) {
+    /* can we put a new header and at least one byte of data? */
+    /* header has 2 more bytes for SO */
+    pdu_header_size = 2 + header_size(sdu_count + 1);
+    if (pdu_header_size + pdu_data_size + 1 > size) {
+      /* no we can't, stop here */
+      break;
+    }
+    /* yes we can, go ahead */
+    sdu_count++;
+    sdu_bytes_to_take = sdu->size - sdu_pos;
+    if (pdu_header_size + pdu_data_size + sdu_bytes_to_take > size) {
+      sdu_bytes_to_take = size - (pdu_header_size + pdu_data_size);
+    }
+    sdu_pos += sdu_bytes_to_take;
+    if (sdu_pos == sdu->size) {
+      sdu = sdu->next;
+      sdu_pos = 0;
+    }
+    pdu_data_size += sdu_bytes_to_take;
+  }
+
+  new_pdu = rlc_tx_new_pdu();
+  pdu->is_segment = 1;
+  *new_pdu = *pdu;
+
+  new_pdu->so = pdu->so + pdu_data_size;
+  new_pdu->data_size = pdu->data_size - pdu_data_size;
+  new_pdu->start_sdu = sdu;
+  new_pdu->sdu_start_byte = sdu_pos;
+
+  pdu->is_last = 0;
+  pdu->data_size = pdu_data_size;
+  pdu->next = new_pdu;
+}
+
+static int generate_retx_pdu(rlc_entity_am_t *entity, char *buffer, int size)
+{
+  rlc_tx_pdu_segment_t *pdu;
+  int orig_size;
+  int p;
+
+  pdu = entity->retransmit_list;
+  orig_size = pdu_size(entity, pdu);
+
+  if (orig_size > size) {
+    /* we can't resegment if size is less than 5
+     * (4 bytes for header, 1 byte for data)
+     */
+    if (size < 5)
+      return 0;
+    resegment(pdu, size);
+  }
+
+  /* remove from retransmit list and put in wait list */
+  entity->retransmit_list = pdu->next;
+  entity->wait_list = rlc_tx_pdu_list_add(sn_compare_tx, entity,
+                                          entity->wait_list, pdu);
+
+  p = check_poll_after_pdu_assembly(entity);
+
+  if (entity->force_poll) {
+    p = 1;
+    entity->force_poll = 0;
+  }
+
+  return serialize_pdu(entity, buffer, orig_size, pdu, p);
+}
+
+static int status_to_report(rlc_entity_am_t *entity)
+{
+  return entity->status_triggered &&
+         (entity->t_status_prohibit_start == 0 ||
+          entity->t_current - entity->t_status_prohibit_start >
+              entity->t_status_prohibit);
+}
+
+static int retx_pdu_size(rlc_entity_am_t *entity, int maxsize)
+{
+  int size;
+
+  if (entity->retransmit_list == NULL)
+    return 0;
+
+  size = pdu_size(entity, entity->retransmit_list);
+  if (size <= maxsize)
+    return size;
+
+  /* we can segment head of retransmist list if maxsize is large enough
+   * to hold a PDU segment with at least 1 data byte (so 5 bytes: 4 bytes
+   * header + 1 byte data)
+   */
+  if (maxsize < 5)
+    return 0;
+
+  /* a later segmentation of the head of retransmit list will generate a pdu
+   * of maximum size 'maxsize' (can be less)
+   */
+  return maxsize;
+}
+
+rlc_entity_buffer_status_t rlc_entity_am_buffer_status(
+    rlc_entity_t *_entity, int maxsize)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  rlc_entity_buffer_status_t ret;
+  tx_pdu_size_t tx_size;
+
+  /* status PDU, if we have to */
+  if (status_to_report(entity))
+    ret.status_size = status_size(entity, maxsize);
+  else
+    ret.status_size = 0;
+
+  /* TX PDU */
+  /* todo: if an SDU has size >2047 in the tx list then processing
+   * stops and computed size will not be accurate. Change the computation
+   * to be more accurate (if needed).
+   */
+  tx_size = compute_new_pdu_size(entity, maxsize);
+  ret.tx_size = tx_size.data_size + tx_size.header_size;
+
+  /* reTX PDU */
+  /* todo: report size of all available data, not just first PDU */
+  ret.retx_size = retx_pdu_size(entity, maxsize);
+
+  return ret;
+}
+
+int rlc_entity_am_generate_pdu(rlc_entity_t *_entity, char *buffer, int size)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  int ret;
+
+  if (status_to_report(entity)) {
+    ret = generate_status(entity, buffer, size);
+    if (ret != 0)
+      return ret;
+  }
+
+  if (entity->retransmit_list != NULL) {
+    ret = generate_retx_pdu(entity, buffer, size);
+    if (ret != 0)
+      return ret;
+  }
+
+  return generate_tx_pdu(entity, buffer, size);
+}
+
+/*************************************************************************/
+/* SDU RX functions                                                      */
+/*************************************************************************/
+
+void rlc_entity_am_recv_sdu(rlc_entity_t *_entity, char *buffer, int size,
+                            int sdu_id)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  rlc_sdu_t *sdu;
+
+  if (size > SDU_MAX) {
+    LOG_E(RLC, "%s:%d:%s: fatal: SDU size too big (%d bytes)\n",
+          __FILE__, __LINE__, __FUNCTION__, size);
+    exit(1);
+  }
+
+  if (entity->tx_size + size > entity->tx_maxsize) {
+    LOG_D(RLC, "%s:%d:%s: warning: SDU rejected, SDU buffer full\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    return;
+  }
+
+  entity->tx_size += size;
+
+  sdu = rlc_new_sdu(buffer, size, sdu_id);
+  rlc_sdu_list_add(&entity->tx_list, &entity->tx_end, sdu);
+}
+
+/*************************************************************************/
+/* time/timers                                                           */
+/*************************************************************************/
+
+static void check_t_poll_retransmit(rlc_entity_am_t *entity)
+{
+  rlc_tx_pdu_segment_t head;
+  rlc_tx_pdu_segment_t *cur;
+  rlc_tx_pdu_segment_t *prev;
+  int sn;
+
+  /* 36.322 5.2.2.3 */
+  /* did t_poll_retransmit expire? */
+  if (entity->t_poll_retransmit_start == 0 ||
+      entity->t_current <= entity->t_poll_retransmit_start +
+                               entity->t_poll_retransmit)
+    return;
+
+  /* stop timer */
+  entity->t_poll_retransmit_start = 0;
+
+  /* 36.322 5.2.2.3 says:
+   *
+   *     - include a poll in a RLC data PDU as described in section 5.2.2.1
+   *
+   * That does not seem to be conditional. So we forcefully will send
+   * a poll as soon as we generate a PDU.
+   * Hopefully this interpretation is correct. In the worst case we generate
+   * more polling than necessary, but it's not a big deal. When
+   * 't_poll_retransmit' expires it means we didn't receive a status report,
+   * meaning a bad radio link, so things are quite bad at this point and
+   * asking again for a poll won't hurt much more.
+   */
+  entity->force_poll = 1;
+
+  LOG_D(RLC, "%s:%d:%s: warning: t_poll_retransmit expired\n",
+        __FILE__, __LINE__, __FUNCTION__);
+
+  /* do we meet conditions of 36.322 5.2.2.3? */
+  if (!check_poll_after_pdu_assembly(entity))
+    return;
+
+  /* search wait list for PDU with SN = VT(S)-1 */
+  sn = (entity->vt_s + 1023) % 1024;
+
+  head.next = entity->wait_list;
+  cur = entity->wait_list;
+  prev = &head;
+
+  while (cur != NULL) {
+    if (cur->sn == sn)
+      break;
+    prev = cur;
+    cur = cur->next;
+  }
+
+  /* PDU with SN = VT(S)-1 not found?, take the head of wait list */
+  if (cur == NULL) {
+    cur = entity->wait_list;
+    prev = &head;
+    sn = cur->sn;
+  }
+
+  /* 36.322 says "PDU", not "PDU segment", so let's retransmit all
+   * PDU segments with this SN
+   */
+  while (cur != NULL && cur->sn == sn) {
+    prev->next = cur->next;
+    entity->wait_list = head.next;
+    /* put in retransmit list */
+    consider_retransmission(entity, cur);
+    cur = prev->next;
+  }
+}
+
+static void check_t_reordering(rlc_entity_am_t *entity)
+{
+  int sn;
+
+  /* is t_reordering running and if yes has it expired? */
+  if (entity->t_reordering_start == 0 ||
+      entity->t_current <= entity->t_reordering_start + entity->t_reordering)
+    return;
+
+  /* stop timer */
+  entity->t_reordering_start = 0;
+
+  LOG_D(RLC, "%s:%d:%s: t_reordering expired\n", __FILE__, __LINE__, __FUNCTION__);
+
+  /* update VR(MS) to first SN >= VR(X) for which not all PDU segments
+   * have been received
+   */
+  sn = entity->vr_x;
+  while (rlc_am_segment_full(entity, sn))
+    sn = (sn + 1) % 1024;
+  entity->vr_ms = sn;
+
+  if (sn_compare_rx(entity, entity->vr_h, entity->vr_ms) > 0) {
+    entity->t_reordering_start = entity->t_current;
+    entity->vr_x = entity->vr_h;
+  }
+
+  /* trigger STATUS report */
+  entity->status_triggered = 1;
+}
+
+void rlc_entity_am_set_time(rlc_entity_t *_entity, uint64_t now)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+
+  entity->t_current = now;
+
+  check_t_poll_retransmit(entity);
+
+  check_t_reordering(entity);
+
+  /* t_status_prohibit is handled by generate_status */
+}
+
+/*************************************************************************/
+/* discard/re-establishment/delete                                       */
+/*************************************************************************/
+
+void rlc_entity_am_discard_sdu(rlc_entity_t *_entity, int sdu_id)
+{
+  /* implements 36.322 5.3 */
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  rlc_sdu_t head;
+  rlc_sdu_t *cur;
+  rlc_sdu_t *prev;
+
+  head.next = entity->tx_list;
+  cur = entity->tx_list;
+  prev = &head;
+
+  while (cur != NULL && cur->upper_layer_id != sdu_id) {
+    prev = cur;
+    cur = cur->next;
+  }
+
+  /* if sdu_id not found or some bytes have already been 'PDU-ized'
+   * then do nothing
+   */
+  if (cur == NULL || cur->next_byte != 0)
+    return;
+
+  /* remove SDU from tx_list */
+  prev->next = cur->next;
+  entity->tx_list = head.next;
+  if (entity->tx_end == cur) {
+    if (prev != &head)
+      entity->tx_end = prev;
+    else
+      entity->tx_end = NULL;
+  }
+
+  rlc_free_sdu(cur);
+}
+
+static void free_pdu_segment_list(rlc_tx_pdu_segment_t *l)
+{
+  rlc_tx_pdu_segment_t *cur;
+
+  while (l != NULL) {
+    cur = l;
+    l = l->next;
+    rlc_tx_free_pdu(cur);
+  }
+}
+
+static void clear_entity(rlc_entity_am_t *entity)
+{
+  rlc_rx_pdu_segment_t *cur_rx;
+  rlc_sdu_t            *cur_tx;
+
+  entity->vr_r = 0;
+  entity->vr_x = 0;
+  entity->vr_ms = 0;
+  entity->vr_h = 0;
+
+  entity->status_triggered = 0;
+
+  entity->vt_a = 0;
+  entity->vt_s = 0;
+  entity->poll_sn = 0;
+  entity->pdu_without_poll = 0;
+  entity->byte_without_poll = 0;
+  entity->force_poll = 0;
+
+  entity->t_current = 0;
+
+  entity->t_reordering_start = 0;
+  entity->t_status_prohibit_start = 0;
+  entity->t_poll_retransmit_start = 0;
+
+  cur_rx = entity->rx_list;
+  while (cur_rx != NULL) {
+    rlc_rx_pdu_segment_t *p = cur_rx;
+    cur_rx = cur_rx->next;
+    rlc_rx_free_pdu_segment(p);
+  }
+  entity->rx_list = NULL;
+  entity->rx_size = 0;
+
+  memset(&entity->reassemble, 0, sizeof(rlc_am_reassemble_t));
+
+  cur_tx = entity->tx_list;
+  while (cur_tx != NULL) {
+    rlc_sdu_t *p = cur_tx;
+    cur_tx = cur_tx->next;
+    rlc_free_sdu(p);
+  }
+  entity->tx_list = NULL;
+  entity->tx_end = NULL;
+  entity->tx_size = 0;
+
+  free_pdu_segment_list(entity->wait_list);
+  free_pdu_segment_list(entity->retransmit_list);
+  free_pdu_segment_list(entity->ack_list);
+  entity->wait_list = NULL;
+  entity->retransmit_list = NULL;
+  entity->ack_list = NULL;
+}
+
+void rlc_entity_am_reestablishment(rlc_entity_t *_entity)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+
+  /* 36.322 5.4 says to deliver SDUs if possible.
+   * Let's not do that, it makes the code simpler.
+   * TODO: change this behavior if wanted/needed.
+   */
+
+  clear_entity(entity);
+}
+
+void rlc_entity_am_delete(rlc_entity_t *_entity)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  clear_entity(entity);
+  free(entity);
+}
diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_am.h b/openair2/LAYER2/rlc_v2/rlc_entity_am.h
new file mode 100644
index 0000000000000000000000000000000000000000..0437f17ad8e63e97c9a9cca6e92a5c85a73fb604
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_entity_am.h
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _RLC_ENTITY_AM_H_
+#define _RLC_ENTITY_AM_H_
+
+#include <stdint.h>
+
+#include "rlc_entity.h"
+#include "rlc_pdu.h"
+#include "rlc_sdu.h"
+
+/*
+ * Here comes some documentation to understand the reassembly
+ * logic in the code and the fields in the structure rlc_am_reassemble_t.
+ *
+ * Inside RLC, we deal with SDUs, PDUs and PDU segments.
+ * SDUs are packets coming from upper layer.
+ * A PDU is made of a header and a payload.
+ * In the payload there are SDUs.
+ * First SDU and last SDU in a PDU may be incomplete.
+ * PDU segments exist in case of retransmissions when the MAC
+ * layer asks for less data than previously, in which case
+ * only part of the previous PDU is sent.
+ *
+ * This is PDU data (just bytes):
+ * ---------------------------------------------------------
+ * |  PDU data                                             |
+ * ---------------------------------------------------------
+ * It contains SDUs, like:
+ * ---------------------------------------------------------
+ * | SDU 1 | SDU 2     |  [...]                   | SDU n  |
+ * ---------------------------------------------------------
+ * SDU 1 may be only the end of an SDU from which previous bytes were
+ * transmitted in previous PDUs.
+ * SDU n may be only the start of an SDU, that is more bytes from
+ * this SDU may be sent in successive PDUs.
+ *
+ * At front of the PDU data, we have a header:
+ * ---------------  ---------------------------------------------------------
+ * | PDU header  |  | SDU 1 | SDU 2     |  [...]                   | SDU n  |
+ * ---------------  ---------------------------------------------------------
+ * PDU header describes PDU data (most notably lengths).
+ *
+ * A PDU segment is a part of a PDU. For example, from this PDU data:
+ * ---------------------------------------------------------
+ * | SDU 1 | SDU 2     |  [...]                   | SDU n  |
+ * ---------------------------------------------------------
+ * We can extract the following PDU segment (data part only):
+ *                ----------------------
+ *                | PDU segment data   |
+ *                ----------------------
+ * This PDU segment would contain the end of SDU 2 above and some SDUs up to,
+ * let's say SDU x (x is 5 below).
+ *
+ * In front of a transmitted PDU segment, we have a header,
+ * containing the important variable 'so' (segment offset) that gives
+ * the index of the first byte of the segment in the original PDU.
+ * -------------- ----------------------
+ * | seg. header| | PDU segment data   |
+ * -------------- ----------------------
+ *
+ * Let's now explain the data structure rlc_am_reassemble_t.
+ *
+ * In the structure rlc_am_reassemble_t, the fields fi, e, sn and so
+ * are coming from the PDU segment header and the semantics is the
+ * one of the RLC specs.
+ *
+ * The currently processed PDU segment is stored in 'start'.
+ * We have 'start->s->data_offset' and 'start->s->size'.
+ * start->s->data_offset is the index of the start of the data in the
+ * PDU segment. That is if the header is of length 3 bytes
+ * then start->s->data_offset is 3.
+ * start->s->size is the total length of the PDU segment,
+ * including header.
+ * The size of actual data bytes in the PDU segment is thus
+ * start->s->size - start->s->data_offset.
+ *
+ * The field sdu_len is the length of the current SDU being
+ * processed.
+ *
+ * The field sdu_offset is the starting point of the
+ * current SDU being processed (starting from beginning
+ * of PDU segment, including header).
+ *
+ * The field data_pos is the current read pointer. 0 points to
+ * the beginning of the PDU segment (including header).
+ *
+ * The field pdu_byte points to the current byte in the original
+ * PDU (not the PDU segment). It starts at 0 when we start
+ * processing a new PDU (when a new 'sn' is seen) and always
+ * increases after each byte processed. This is tha variable
+ * that is used to know if the next PDU segment will be used
+ * or not and if yes, starting from which data byte (see
+ * function rlc_am_reassemble_next_segment).
+ *
+ * 'so' is important and points to the byte in the original PDU
+ * that is the first byte of the PDU segment.
+ *
+ * For example, let's take this PDU segment data from above:
+ *                ----------------------
+ *                | PDU segment data   |
+ *                ----------------------
+ * Let's say it is decomposed as:
+ *                ----------------------
+ *                |222|33|4444|55555555|
+ *                ----------------------
+ * It contains SDUs 2, 3, 4, and 5.
+ * SDU 2 is 3 bytes, SDU 3 is 2 bytes, SDU 4 is 4 bytes, SDU 5 is 8 bytes.
+ *
+ * Let's suppose that the original PDU starts with:
+ * ----------------
+ * |1111111|222222|
+ * ----------------
+ *
+ * (In this example, in the PDU segment, SDU 2 is not full,
+ * we only have its end.)
+ *
+ * Then 'so' is 13 (SDU 1 is 7 bytes, head of SDU 2 is 6 bytes).
+ *
+ * Let's continue with our PDU segment data.
+ * Let's say we are current processing SDU 4.
+ * Let's say the read pointer (variable 'data_pos') is there:
+ *                ----------------------
+ *                |222|33|4444|55555555|
+ *                ----------------------
+ *                         ^
+ *                      read pointer (data_pos)
+ *
+ * Then:
+ *     - sdu_len is 4
+ *     - sdu_offset is 5 + [PDU segment header length]
+ *       (it points to the beginning of SDU 4, starting
+ *        from the head of the PDU segment, that is
+ *        3 bytes for SDU 2, 2 bytes for SDU 3, and the
+ *        PDU segment header length)
+ *     - start->s->data_offset is [PDU segment header length]
+ *     - pdu_byte is 20
+ *       (13 bytes from beginning of original PDU,
+ *        3 bytes for SDU 2, 2 bytes for SDU 3, then 2 bytes for SDU 4)
+ *     - data_pos = read pointer = 7 + [PDU segment header length]
+ *
+ * To finish this description, in the code, a PDU is simply
+ * seen as a PDU segment with 'so' = 0 (and is_last == 1 (lsf in the specs),
+ * but this variable is not used by the reassembly logic).
+ *
+ * And for [PDU segment header length] we use start->s->data_offset.
+ *
+ * To recap, here is an illustration of the various variables
+ * and what starting point they use. In the figures, the start
+ * of the variable name is aligned to the byte it refers to.
+ * + is used to show the starting point.
+ *
+ * Let's put the PDU segment back into the original PDU.
+ * And let's show the values for when the read pointer
+ * is on the second byte of SDU 4 (as above).
+ *
+ * +++++++++++++++ so
+ * +++++++++++++++++++++++ pdu_byte
+ * ---------------------------------------------------------
+ * | SDU 1| SDU 2..222|33|4444|55555555| [...]   | SDU n   |
+ * ---------------------------------------------------------
+ *
+ * And now the PDU segment with header.
+ *
+ *
+ *                        ++++ sdu_len
+ * ++++++++++++++++++++++ sdu_offset
+ * +++++++++++++++++++++++ data_pos
+ * +++++++++++++++ start->s->data_offset
+ * +++++++++++++++++++++++++++++++++++++ start->s->size
+ * -------------- ----------------------
+ * | seg. header| |222|33|4444|55555555|
+ * -------------- ----------------------
+ *
+ * We see three case for the starting point:
+ *     - start of original PDU (without any header)
+ *     - start of header of current PDU segment
+ *     - start of current SDU (for sdu_len)
+ */
+
+typedef struct {
+  rlc_rx_pdu_segment_t *start;      /* start of list */
+  rlc_rx_pdu_segment_t *end;        /* end of list (last element) */
+  int                  pos;         /* byte to get from current buffer */
+  char                 sdu[SDU_MAX]; /* sdu is reassembled here */
+  int                  sdu_pos;      /* next byte to put in sdu */
+
+  /* decoder of current PDU */
+  rlc_pdu_decoder_t    dec;
+  int fi;
+  int e;
+  int sn;
+  int so;
+  int sdu_len;
+  int sdu_offset;
+  int data_pos;
+  int pdu_byte;
+} rlc_am_reassemble_t;
+
+typedef struct {
+  rlc_entity_t common;
+
+  /* configuration */
+  int t_reordering;
+  int t_status_prohibit;
+  int t_poll_retransmit;
+  int poll_pdu;              /* -1 means infinity */
+  int poll_byte;             /* -1 means infinity */
+  int max_retx_threshold;
+
+  /* runtime rx */
+  int vr_r;
+  int vr_x;
+  int vr_ms;
+  int vr_h;
+
+  int status_triggered;
+
+  /* runtime tx */
+  int vt_a;
+  int vt_s;
+  int poll_sn;
+  int pdu_without_poll;
+  int byte_without_poll;
+  int force_poll;
+
+  /* set to the latest know time by the user of the module. Unit: ms */
+  uint64_t t_current;
+
+  /* timers (stores the TTI of activation, 0 means not active) */
+  uint64_t t_reordering_start;
+  uint64_t t_status_prohibit_start;
+  uint64_t t_poll_retransmit_start;
+
+  /* rx management */
+  rlc_rx_pdu_segment_t *rx_list;
+  int                  rx_size;
+  int                  rx_maxsize;
+
+  /* reassembly management */
+  rlc_am_reassemble_t    reassemble;
+
+  /* tx management */
+  rlc_sdu_t *tx_list;
+  rlc_sdu_t *tx_end;
+  int       tx_size;
+  int       tx_maxsize;
+
+  rlc_tx_pdu_segment_t *wait_list;
+  rlc_tx_pdu_segment_t *retransmit_list;
+
+  rlc_tx_pdu_segment_t *ack_list;
+} rlc_entity_am_t;
+
+void rlc_entity_am_recv_sdu(rlc_entity_t *entity, char *buffer, int size,
+                            int sdu_id);
+void rlc_entity_am_recv_pdu(rlc_entity_t *entity, char *buffer, int size);
+rlc_entity_buffer_status_t rlc_entity_am_buffer_status(
+    rlc_entity_t *entity, int maxsize);
+int rlc_entity_am_generate_pdu(rlc_entity_t *entity, char *buffer, int size);
+void rlc_entity_am_set_time(rlc_entity_t *entity, uint64_t now);
+void rlc_entity_am_discard_sdu(rlc_entity_t *entity, int sdu_id);
+void rlc_entity_am_reestablishment(rlc_entity_t *entity);
+void rlc_entity_am_delete(rlc_entity_t *entity);
+
+#endif /* _RLC_ENTITY_AM_H_ */
diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_um.c b/openair2/LAYER2/rlc_v2/rlc_entity_um.c
new file mode 100644
index 0000000000000000000000000000000000000000..54707875a0b25f7c91086131f4515d75dea9f5c9
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_entity_um.c
@@ -0,0 +1,708 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include "rlc_entity_um.h"
+#include "rlc_pdu.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "LOG/log.h"
+
+/*************************************************************************/
+/* PDU RX functions                                                      */
+/*************************************************************************/
+
+static int modulus_rx(rlc_entity_um_t *entity, int a)
+{
+  /* as per 36.322 7.1, modulus base is vr(uh)-window_size and modulus is
+   * 2^sn_field_length (which is 'sn_modulus' in rlc_entity_um_t)
+   */
+  int r = a - (entity->vr_uh - entity->window_size);
+  if (r < 0) r += entity->sn_modulus;
+  return r % entity->sn_modulus;
+}
+
+static int sn_compare_rx(void *_entity, int a, int b)
+{
+  rlc_entity_um_t *entity = _entity;
+  return modulus_rx(entity, a) - modulus_rx(entity, b);
+}
+
+static int sn_in_recv_window(void *_entity, int sn)
+{
+  rlc_entity_um_t *entity = _entity;
+  int mod_sn = modulus_rx(entity, sn);
+  /* we simplify (VR(UH) - UM_Window_Size) <= SN < VR(UH), base is
+   * (VR(UH) - UM_Window_Size) and VR(UH) = base + window_size
+   */
+  return mod_sn < entity->window_size;
+}
+
+/* return 1 if a PDU with SN == 'sn' is in the rx list, 0 otherwise */
+static int rlc_um_pdu_received(rlc_entity_um_t *entity, int sn)
+{
+  rlc_rx_pdu_segment_t *cur = entity->rx_list;
+  while (cur != NULL) {
+    if (cur->sn == sn)
+      return 1;
+    cur = cur->next;
+  }
+  return 0;
+}
+
+static int less_than_vr_ur(rlc_entity_um_t *entity, int sn)
+{
+  return sn_compare_rx(entity, sn, entity->vr_ur) < 0;
+}
+
+static int outside_of_reordering_window(rlc_entity_um_t *entity, int sn)
+{
+  return !sn_in_recv_window(entity, sn);
+}
+
+static int less_than_vr_uh(rlc_entity_um_t *entity, int sn)
+{
+  return sn_compare_rx(entity, sn, entity->vr_uh) < 0;
+}
+
+static void rlc_um_reassemble_pdu(rlc_entity_um_t *entity,
+    rlc_rx_pdu_segment_t *pdu)
+{
+  rlc_um_reassemble_t *r = &entity->reassemble;
+
+  int fi;
+  int e;
+  int sn;
+  int data_pos;
+  int sdu_len;
+  int sdu_offset;
+
+  sdu_offset = pdu->data_offset;
+
+  rlc_pdu_decoder_init(&r->dec, pdu->data, pdu->size);
+
+  if (entity->sn_field_length == 10)
+    rlc_pdu_decoder_get_bits(&r->dec, 3);
+
+  fi = rlc_pdu_decoder_get_bits(&r->dec, 2);
+  e  = rlc_pdu_decoder_get_bits(&r->dec, 1);
+  sn = rlc_pdu_decoder_get_bits(&r->dec, entity->sn_field_length);
+
+  if (e) {
+    e       = rlc_pdu_decoder_get_bits(&r->dec, 1);
+    sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11);
+  } else
+    sdu_len = pdu->size - sdu_offset;
+
+  /* discard current SDU being reassembled if bad SN or bad FI */
+  if (sn != (r->sn + 1) % entity->sn_modulus ||
+      !(fi & 0x02)) {
+    if (r->sdu_pos)
+      LOG_D(RLC, "%s:%d:%s: warning: discard partially reassembled SDU\n",
+            __FILE__, __LINE__, __FUNCTION__);
+    r->sdu_pos = 0;
+  }
+
+  /* if the head of the SDU is missing, still process the PDU
+   * but remember to discard the reassembled SDU later on (the
+   * head has not been received).
+   * The head is missing if sdu_pos == 0 and fi says the PDU does not
+   * start an SDU.
+   */
+  if (r->sdu_pos == 0 && (fi & 0x02))
+    r->sdu_head_missing = 1;
+
+  r->sn = sn;
+  data_pos = pdu->data_offset;
+
+  while (1) {
+    if (r->sdu_pos >= SDU_MAX) {
+      /* TODO: proper error handling (discard PDUs with current sn from
+       * reassembly queue? something else?)
+       */
+      LOG_E(RLC, "%s:%d:%s: bad RLC PDU\n", __FILE__, __LINE__, __FUNCTION__);
+      exit(1);
+    }
+    r->sdu[r->sdu_pos] = pdu->data[data_pos];
+    r->sdu_pos++;
+    data_pos++;
+    if (data_pos == sdu_offset + sdu_len) {
+      /* all bytes of SDU are consumed, check if SDU is fully there.
+       * It is if the data pointer is not at the end of the PDU segment
+       * or if 'fi' & 1 == 0
+       */
+      if (data_pos != pdu->size || (fi & 1) == 0) {
+        /* time to discard the SDU if we didn't receive the head */
+        if (r->sdu_head_missing) {
+          LOG_D(RLC, "%s:%d:%s: warning: discard SDU, head not received\n",
+                __FILE__, __LINE__, __FUNCTION__);
+          r->sdu_head_missing = 0;
+        } else {
+          /* SDU is full - deliver to higher layer */
+          entity->common.deliver_sdu(entity->common.deliver_sdu_data,
+                                     (rlc_entity_t *)entity,
+                                     r->sdu, r->sdu_pos);
+        }
+        r->sdu_pos = 0;
+      }
+      /* done with PDU? */
+      if (data_pos == pdu->size)
+        break;
+      /* not at the end of PDU, process next SDU */
+      sdu_offset += sdu_len;
+      if (e) {
+        e       = rlc_pdu_decoder_get_bits(&r->dec, 1);
+        sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11);
+      } else
+        sdu_len = pdu->size - sdu_offset;
+    }
+  }
+}
+
+static void rlc_um_reassemble(rlc_entity_um_t *entity,
+    int (*check_sn)(rlc_entity_um_t *entity, int sn))
+{
+  rlc_rx_pdu_segment_t *cur;
+
+  /* process all PDUs from head of rx list until all is processed or
+   * the SN is not valid anymore with respect to 'check_sn'
+   */
+  while (entity->rx_list != NULL && check_sn(entity, entity->rx_list->sn)) {
+    cur = entity->rx_list;
+    rlc_um_reassemble_pdu(entity, cur);
+    entity->rx_size -= cur->size;
+    entity->rx_list = cur->next;
+    rlc_rx_free_pdu_segment(cur);
+  }
+}
+
+static void rlc_um_reception_actions(rlc_entity_um_t *entity,
+    rlc_rx_pdu_segment_t *pdu_segment)
+{
+  if (!sn_in_recv_window(entity, pdu_segment->sn)) {
+    entity->vr_uh = (pdu_segment->sn + 1) % entity->sn_modulus;
+    rlc_um_reassemble(entity, outside_of_reordering_window);
+    if (!sn_in_recv_window(entity, entity->vr_ur))
+      entity->vr_ur = (entity->vr_uh - entity->window_size
+                         + entity->sn_modulus) % entity->sn_modulus;
+  }
+
+  if (rlc_um_pdu_received(entity, entity->vr_ur)) {
+    do {
+      entity->vr_ur = (entity->vr_ur + 1) % entity->sn_modulus;
+    } while (rlc_um_pdu_received(entity, entity->vr_ur));
+    rlc_um_reassemble(entity, less_than_vr_ur);
+  }
+
+  if (entity->t_reordering_start) {
+    if (sn_compare_rx(entity, entity->vr_ux, entity->vr_ur) <= 0 ||
+        (!sn_in_recv_window(entity, entity->vr_ux) &&
+         entity->vr_ux != entity->vr_uh))
+      entity->t_reordering_start = 0;
+  }
+
+  if (entity->t_reordering_start == 0) {
+    if (sn_compare_rx(entity, entity->vr_uh, entity->vr_ur) > 0) {
+      entity->t_reordering_start = entity->t_current;
+      entity->vr_ux = entity->vr_uh;
+    }
+  }
+}
+
+void rlc_entity_um_recv_pdu(rlc_entity_t *_entity, char *buffer, int size)
+{
+#define R(d) do { if (rlc_pdu_decoder_in_error(&d)) goto err; } while (0)
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+  rlc_pdu_decoder_t decoder;
+  rlc_pdu_decoder_t data_decoder;
+
+  int e;
+  int sn;
+
+  int data_e;
+  int data_li;
+
+  int packet_count;
+  int data_size;
+  int data_start;
+  int indicated_data_size;
+
+  rlc_rx_pdu_segment_t *pdu_segment;
+
+  rlc_pdu_decoder_init(&decoder, buffer, size);
+
+  if (entity->sn_field_length == 10) {
+    rlc_pdu_decoder_get_bits(&decoder, 3); R(decoder);       /* R1 */
+  }
+
+  rlc_pdu_decoder_get_bits(&decoder, 2); R(decoder);         /* FI */
+  e  = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder);
+  sn = rlc_pdu_decoder_get_bits(&decoder, entity->sn_field_length); R(decoder);
+
+  /* dicard PDU if rx buffer is full */
+  if (entity->rx_size + size > entity->rx_maxsize) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, RX buffer full\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    return;
+  }
+
+  /* discard according to 36.322 5.1.2.2.2 */
+  if ((sn_compare_rx(entity, entity->vr_ur, sn) < 0 &&
+       sn_compare_rx(entity, sn, entity->vr_uh) < 0 &&
+       rlc_um_pdu_received(entity, sn)) ||
+      (sn_compare_rx(entity, entity->vr_uh - entity->window_size, sn) <= 0 &&
+       sn_compare_rx(entity, sn, entity->vr_ur) < 0)) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU (sn %d vr(ur) %d vr(uh) %d)\n",
+          __FILE__, __LINE__, __FUNCTION__,
+          sn, entity->vr_ur, entity->vr_uh);
+    return;
+  }
+
+  packet_count = 1;
+
+  /* go to start of data */
+  indicated_data_size = 0;
+  data_decoder = decoder;
+  data_e = e;
+  while (data_e) {
+    data_e = rlc_pdu_decoder_get_bits(&data_decoder, 1); R(data_decoder);
+    data_li = rlc_pdu_decoder_get_bits(&data_decoder, 11); R(data_decoder);
+    if (data_li == 0) {
+      LOG_D(RLC, "%s:%d:%s: warning: discard PDU, li == 0\n",
+            __FILE__, __LINE__, __FUNCTION__);
+      return;
+    }
+    indicated_data_size += data_li;
+    packet_count++;
+  }
+  rlc_pdu_decoder_align(&data_decoder);
+
+  data_start = data_decoder.byte;
+  data_size = size - data_start;
+
+  if (data_size <= 0) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, wrong data size (sum of LI %d data size %d)\n",
+          __FILE__, __LINE__, __FUNCTION__,
+          indicated_data_size, data_size);
+    return;
+  }
+  if (indicated_data_size >= data_size) {
+    LOG_D(RLC, "%s:%d:%s: warning: discard PDU, bad LIs (sum of LI %d data size %d)\n",
+          __FILE__, __LINE__, __FUNCTION__,
+          indicated_data_size, data_size);
+    return;
+  }
+
+  /* put in pdu reception list */
+  entity->rx_size += size;
+  pdu_segment = rlc_rx_new_pdu_segment(sn, 0, size, 1, buffer, data_start);
+  entity->rx_list = rlc_rx_pdu_segment_list_add(sn_compare_rx, entity,
+                                                entity->rx_list, pdu_segment);
+
+  /* do reception actions (36.322 5.1.2.2.3) */
+  rlc_um_reception_actions(entity, pdu_segment);
+
+  return;
+
+err:
+  LOG_D(RLC, "%s:%d:%s: error decoding PDU, discarding\n", __FILE__, __LINE__, __FUNCTION__);
+
+#undef R
+}
+
+/*************************************************************************/
+/* TX functions                                                          */
+/*************************************************************************/
+
+typedef struct {
+  int sdu_count;
+  int data_size;
+  int header_size;
+  int last_sdu_is_full;
+  int first_sdu_length;
+} tx_pdu_size_t;
+
+static int header_size(int sn_field_length, int sdu_count)
+{
+  int bits = 8 + 8 * (sn_field_length == 10) + 12 * (sdu_count - 1);
+  /* padding if we have to */
+  return (bits + 7) / 8;
+}
+
+static tx_pdu_size_t tx_pdu_size(rlc_entity_um_t *entity, int maxsize)
+{
+  tx_pdu_size_t ret;
+  int sdu_count;
+  int sdu_size;
+  int pdu_data_size;
+  rlc_sdu_t *sdu;
+
+  ret.sdu_count = 0;
+  ret.data_size = 0;
+  ret.header_size = 0;
+  ret.last_sdu_is_full = 1;
+  ret.first_sdu_length = 0;
+
+  /* TX PDU - let's make the biggest PDU we can with the SDUs we have */
+  sdu_count = 0;
+  pdu_data_size = 0;
+  sdu = entity->tx_list;
+  while (sdu != NULL) {
+    int new_header_size = header_size(entity->sn_field_length, sdu_count+1);
+    /* if we cannot put new header + at least 1 byte of data then over */
+    if (new_header_size + pdu_data_size >= maxsize)
+      break;
+    sdu_count++;
+    /* only include the bytes of this SDU not included in PDUs already */
+    sdu_size = sdu->size - sdu->next_byte;
+    /* don't feed more than 'maxsize' bytes */
+    if (new_header_size + pdu_data_size + sdu_size > maxsize) {
+      sdu_size = maxsize - new_header_size - pdu_data_size;
+      ret.last_sdu_is_full = 0;
+    }
+    if (sdu_count == 1)
+      ret.first_sdu_length = sdu_size;
+    pdu_data_size += sdu_size;
+    /* if we put more than 2^11-1 bytes then the LI field cannot be used,
+     * so this is the last SDU we can put
+     */
+    if (sdu_size > 2047)
+      break;
+    sdu = sdu->next;
+  }
+
+  if (sdu_count) {
+    ret.sdu_count = sdu_count;
+    ret.data_size = pdu_data_size;
+    ret.header_size = header_size(entity->sn_field_length, sdu_count);
+  }
+
+  return ret;
+}
+
+rlc_entity_buffer_status_t rlc_entity_um_buffer_status(
+    rlc_entity_t *_entity, int maxsize)
+{
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+  rlc_entity_buffer_status_t ret;
+  tx_pdu_size_t tx_size;
+
+  ret.status_size = 0;
+
+  /* todo: if an SDU has size >2047 in the tx list then processing
+   * stops and computed size will not be accurate. Change the computation
+   * to be more accurate (if needed).
+   */
+  tx_size = tx_pdu_size(entity, maxsize);
+  ret.tx_size = tx_size.data_size + tx_size.header_size;
+
+  ret.retx_size = 0;
+
+  return ret;
+}
+
+int rlc_entity_um_generate_pdu(rlc_entity_t *_entity, char *buffer, int size)
+{
+  rlc_entity_um_t      *entity = (rlc_entity_um_t *)_entity;
+  tx_pdu_size_t        pdu_size;
+  rlc_sdu_t            *sdu;
+  int                  i;
+  int                  cursize;
+  int                  first_sdu_full;
+  int                  last_sdu_full;
+  rlc_pdu_encoder_t    encoder;
+  int                  fi;
+  int                  e;
+  int                  li;
+  char                 *out;
+  int                  outpos;
+  int                  first_sdu_start_byte;
+  int                  sdu_start_byte;
+
+  pdu_size = tx_pdu_size(entity, size);
+  if (pdu_size.sdu_count == 0)
+    return 0;
+
+  sdu = entity->tx_list;
+
+  first_sdu_start_byte = sdu->next_byte;
+
+  /* reserve SDU bytes */
+  cursize = 0;
+  for (i = 0; i < pdu_size.sdu_count; i++, sdu = sdu->next) {
+    int sdu_size = sdu->size - sdu->next_byte;
+    if (cursize + sdu_size > pdu_size.data_size)
+      sdu_size = pdu_size.data_size - cursize;
+    sdu->next_byte += sdu_size;
+    cursize += sdu_size;
+  }
+
+  first_sdu_full = first_sdu_start_byte == 0;
+  last_sdu_full = pdu_size.last_sdu_is_full;
+
+  /* generate header */
+  rlc_pdu_encoder_init(&encoder, buffer, size);
+
+  if (entity->sn_field_length == 10)
+    rlc_pdu_encoder_put_bits(&encoder, 0, 3);                         /* R1 */
+
+  fi = 0;
+  if (!first_sdu_full)
+    fi |= 0x02;
+  if (!last_sdu_full)
+    fi |= 0x01;
+  rlc_pdu_encoder_put_bits(&encoder, fi, 2);                          /* FI */
+
+  /* see the AM code to understand the logic for Es and LIs */
+  if (pdu_size.sdu_count >= 2)
+    e = 1;
+  else
+    e = 0;
+  rlc_pdu_encoder_put_bits(&encoder, e, 1);                            /* E */
+
+  if (entity->sn_field_length == 10)
+    rlc_pdu_encoder_put_bits(&encoder, entity->vt_us, 10);            /* SN */
+  else
+    rlc_pdu_encoder_put_bits(&encoder, entity->vt_us, 5);             /* SN */
+
+  /* put LIs */
+  sdu = entity->tx_list;
+  /* first SDU */
+  li = pdu_size.first_sdu_length;
+  /* put E+LI only if at least 2 SDUs */
+  if (pdu_size.sdu_count >= 2) {
+    /* E is 1 if at least 3 SDUs */
+    if (pdu_size.sdu_count >= 3)
+      e = 1;
+    else
+      e = 0;
+    rlc_pdu_encoder_put_bits(&encoder, e, 1);                          /* E */
+    rlc_pdu_encoder_put_bits(&encoder, li, 11);                       /* LI */
+  }
+  /* next SDUs, but not the last (no LI for the last) */
+  sdu = sdu->next;
+  for (i = 2; i < pdu_size.sdu_count; i++, sdu = sdu->next) {
+    if (i != pdu_size.sdu_count - 1)
+      e = 1;
+    else
+      e = 0;
+    li = sdu->size;
+    rlc_pdu_encoder_put_bits(&encoder, e, 1);                          /* E */
+    rlc_pdu_encoder_put_bits(&encoder, li, 11);                       /* LI */
+  }
+
+  rlc_pdu_encoder_align(&encoder);
+
+  /* generate data */
+  out = buffer + pdu_size.header_size;
+  sdu = entity->tx_list;
+  sdu_start_byte = first_sdu_start_byte;
+  outpos = 0;
+  for (i = 0; i < pdu_size.sdu_count; i++, sdu = sdu->next) {
+    li = sdu->size - sdu_start_byte;
+    if (outpos + li >= pdu_size.data_size)
+      li = pdu_size.data_size - outpos;
+    memcpy(out+outpos, sdu->data + sdu_start_byte, li);
+    outpos += li;
+    sdu_start_byte = 0;
+  }
+
+  /* cleanup sdu list */
+  while (entity->tx_list != NULL &&
+         entity->tx_list->size == entity->tx_list->next_byte) {
+    rlc_sdu_t *c = entity->tx_list;
+    /* release SDU bytes */
+    entity->tx_size -= c->size;
+    entity->tx_list = c->next;
+    rlc_free_sdu(c);
+  }
+  if (entity->tx_list == NULL)
+    entity->tx_end = NULL;
+
+  /* update VT(US) */
+  entity->vt_us = (entity->vt_us + 1) % entity->sn_modulus;
+
+  return pdu_size.header_size + pdu_size.data_size;
+}
+
+/*************************************************************************/
+/* SDU RX functions                                                      */
+/*************************************************************************/
+
+void rlc_entity_um_recv_sdu(rlc_entity_t *_entity, char *buffer, int size,
+                            int sdu_id)
+{
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+  rlc_sdu_t *sdu;
+
+  if (size > SDU_MAX) {
+    LOG_E(RLC, "%s:%d:%s: fatal: SDU size too big (%d bytes)\n",
+          __FILE__, __LINE__, __FUNCTION__, size);
+    exit(1);
+  }
+
+  if (entity->tx_size + size > entity->tx_maxsize) {
+    LOG_D(RLC, "%s:%d:%s: warning: SDU rejected, SDU buffer full\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    return;
+  }
+
+  entity->tx_size += size;
+
+  sdu = rlc_new_sdu(buffer, size, sdu_id);
+  rlc_sdu_list_add(&entity->tx_list, &entity->tx_end, sdu);
+}
+
+/*************************************************************************/
+/* time/timers                                                           */
+/*************************************************************************/
+
+static void check_t_reordering(rlc_entity_um_t *entity)
+{
+  int sn;
+
+  /* is t_reordering running and if yes has it expired? */
+  if (entity->t_reordering_start == 0 ||
+      entity->t_current <= entity->t_reordering_start + entity->t_reordering)
+    return;
+
+  /* stop timer */
+  entity->t_reordering_start = 0;
+
+  LOG_D(RLC, "%s:%d:%s: t_reordering expired\n", __FILE__, __LINE__, __FUNCTION__);
+
+  /* update VR(UR) to first SN >= VR(UX) of PDU not received
+   */
+  sn = entity->vr_ux;
+  while (rlc_um_pdu_received(entity, sn))
+    sn = (sn + 1) % entity->sn_modulus;
+  entity->vr_ur = sn;
+
+  rlc_um_reassemble(entity, less_than_vr_ur);
+
+  if (sn_compare_rx(entity, entity->vr_uh, entity->vr_ur) > 0) {
+    entity->t_reordering_start = entity->t_current;
+    entity->vr_ux = entity->vr_uh;
+  }
+}
+
+void rlc_entity_um_set_time(rlc_entity_t *_entity, uint64_t now)
+{
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+
+  entity->t_current = now;
+
+  check_t_reordering(entity);
+}
+
+/*************************************************************************/
+/* discard/re-establishment/delete                                       */
+/*************************************************************************/
+
+void rlc_entity_um_discard_sdu(rlc_entity_t *_entity, int sdu_id)
+{
+  /* implements 36.322 5.3 */
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+  rlc_sdu_t head;
+  rlc_sdu_t *cur;
+  rlc_sdu_t *prev;
+
+  head.next = entity->tx_list;
+  cur = entity->tx_list;
+  prev = &head;
+
+  while (cur != NULL && cur->upper_layer_id != sdu_id) {
+    prev = cur;
+    cur = cur->next;
+  }
+
+  /* if sdu_id not found or some bytes have already been 'PDU-ized'
+   * then do nothing
+   */
+  if (cur == NULL || cur->next_byte != 0)
+    return;
+
+  /* remove SDU from tx_list */
+  prev->next = cur->next;
+  entity->tx_list = head.next;
+  if (entity->tx_end == cur) {
+    if (prev != &head)
+      entity->tx_end = prev;
+    else
+      entity->tx_end = NULL;
+  }
+
+  rlc_free_sdu(cur);
+}
+
+static void clear_entity(rlc_entity_um_t *entity)
+{
+  rlc_rx_pdu_segment_t *cur_rx;
+  rlc_sdu_t            *cur_tx;
+
+  entity->vr_ur = 0;
+  entity->vr_ux = 0;
+  entity->vr_uh = 0;
+
+  entity->vt_us = 0;
+
+  entity->t_current = 0;
+
+  entity->t_reordering_start = 0;
+
+  cur_rx = entity->rx_list;
+  while (cur_rx != NULL) {
+    rlc_rx_pdu_segment_t *p = cur_rx;
+    cur_rx = cur_rx->next;
+    rlc_rx_free_pdu_segment(p);
+  }
+  entity->rx_list = NULL;
+  entity->rx_size = 0;
+
+  memset(&entity->reassemble, 0, sizeof(rlc_um_reassemble_t));
+
+  cur_tx = entity->tx_list;
+  while (cur_tx != NULL) {
+    rlc_sdu_t *p = cur_tx;
+    cur_tx = cur_tx->next;
+    rlc_free_sdu(p);
+  }
+  entity->tx_list = NULL;
+  entity->tx_end = NULL;
+  entity->tx_size = 0;
+}
+
+void rlc_entity_um_reestablishment(rlc_entity_t *_entity)
+{
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+
+  rlc_um_reassemble(entity, less_than_vr_uh);
+
+  clear_entity(entity);
+}
+
+void rlc_entity_um_delete(rlc_entity_t *_entity)
+{
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+  clear_entity(entity);
+  free(entity);
+}
diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_um.h b/openair2/LAYER2/rlc_v2/rlc_entity_um.h
new file mode 100644
index 0000000000000000000000000000000000000000..02c5141a7a6613536728e2b81c75ca1b21b1db1f
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_entity_um.h
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _RLC_ENTITY_UM_H_
+#define _RLC_ENTITY_UM_H_
+
+#include "rlc_entity.h"
+#include "rlc_pdu.h"
+#include "rlc_sdu.h"
+
+typedef struct {
+  char sdu[SDU_MAX];     /* sdu is reassembled here */
+  int  sdu_pos;          /* next byte to put in sdu */
+
+  /* decoder of current PDU */
+  rlc_pdu_decoder_t    dec;
+  int sn;
+
+  int sdu_head_missing;
+} rlc_um_reassemble_t;
+
+typedef struct {
+  rlc_entity_t common;
+
+  /* configuration */
+  int t_reordering;
+  int sn_field_length;
+
+  int sn_modulus;        /* 1024 for sn_field_length == 10, 32 for 5 */
+  int window_size;       /* 512 for sn_field_length == 10, 16 for 5 */
+
+  /* runtime rx */
+  int vr_ur;
+  int vr_ux;
+  int vr_uh;
+
+  /* runtime tx */
+  int vt_us;
+
+  /* set to the latest know time by the user of the module. Unit: ms */
+  uint64_t t_current;
+
+  /* timers (stores the TTI of activation, 0 means not active) */
+  uint64_t t_reordering_start;
+
+  /* rx management */
+  rlc_rx_pdu_segment_t *rx_list;
+  int                  rx_size;
+  int                  rx_maxsize;
+
+  /* reassembly management */
+  rlc_um_reassemble_t reassemble;
+
+  /* tx management */
+  rlc_sdu_t *tx_list;
+  rlc_sdu_t *tx_end;
+  int       tx_size;
+  int       tx_maxsize;
+} rlc_entity_um_t;
+
+void rlc_entity_um_recv_sdu(rlc_entity_t *_entity, char *buffer, int size,
+                            int sdu_id);
+void rlc_entity_um_recv_pdu(rlc_entity_t *entity, char *buffer, int size);
+rlc_entity_buffer_status_t rlc_entity_um_buffer_status(
+    rlc_entity_t *entity, int maxsize);
+int rlc_entity_um_generate_pdu(rlc_entity_t *_entity, char *buffer, int size);
+void rlc_entity_um_set_time(rlc_entity_t *entity, uint64_t now);
+void rlc_entity_um_discard_sdu(rlc_entity_t *entity, int sdu_id);
+void rlc_entity_um_reestablishment(rlc_entity_t *entity);
+void rlc_entity_um_delete(rlc_entity_t *entity);
+
+#endif /* _RLC_ENTITY_UM_H_ */
diff --git a/openair2/LAYER2/rlc_v2/rlc_oai_api.c b/openair2/LAYER2/rlc_v2/rlc_oai_api.c
new file mode 100644
index 0000000000000000000000000000000000000000..bd3eebe6355f487690178f08f31246627b444438
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_oai_api.c
@@ -0,0 +1,1030 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+/* from openair */
+#include "rlc.h"
+#include "pdcp.h"
+
+/* from new rlc module */
+#include "asn1_utils.h"
+#include "rlc_ue_manager.h"
+#include "rlc_entity.h"
+
+#include <stdint.h>
+
+static rlc_ue_manager_t *rlc_ue_manager;
+
+/* TODO: handle time a bit more properly */
+static uint64_t rlc_current_time;
+static int      rlc_current_time_last_frame;
+static int      rlc_current_time_last_subframe;
+
+void mac_rlc_data_ind     (
+  const module_id_t         module_idP,
+  const rnti_t              rntiP,
+  const eNB_index_t         eNB_index,
+  const frame_t             frameP,
+  const eNB_flag_t          enb_flagP,
+  const MBMS_flag_t         MBMS_flagP,
+  const logical_chan_id_t   channel_idP,
+  char                     *buffer_pP,
+  const tb_size_t           tb_sizeP,
+  num_tb_t                  num_tbP,
+  crc_t                    *crcs_pP)
+{
+  rlc_ue_t *ue;
+  rlc_entity_t *rb;
+  int rnti;
+  int channel_id;
+
+  if (enb_flagP == 1 && module_idP != 0) {
+    LOG_E(RLC, "%s:%d:%s: fatal, module_id must be 0 for eNB\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  if (/*module_idP != 0 ||*/ eNB_index != 0 /*|| enb_flagP != 1 || MBMS_flagP != 0*/) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  if (enb_flagP)
+    T(T_ENB_RLC_MAC_UL, T_INT(module_idP), T_INT(rntiP),
+      T_INT(channel_idP), T_INT(tb_sizeP));
+
+  /* TODO: better handle mbms, maybe we should not change rnti here */
+  if (!enb_flagP && MBMS_flagP) {
+    rnti = 0xfffd;
+    /* TODO: handle channel_id properly */
+    if (channel_idP != 5) {
+      LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+      exit(1);
+    }
+    channel_id = 7;
+  } else {
+    rnti = rntiP;
+    channel_id = channel_idP;
+  }
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rnti);
+
+  switch (channel_id) {
+  case 1 ... 2: rb = ue->srb[channel_id - 1]; break;
+  case 3 ... 7: rb = ue->drb[channel_id - 3]; break;
+  default:      rb = NULL;                    break;
+  }
+
+  if (rb != NULL) {
+    rb->set_time(rb, rlc_current_time);
+    rb->recv_pdu(rb, buffer_pP, tb_sizeP);
+  } else {
+    LOG_E(RLC, "%s:%d:%s: fatal: no RB found (rnti %d channel ID %d)\n",
+          __FILE__, __LINE__, __FUNCTION__, rnti, channel_id);
+    exit(1);
+  }
+
+  rlc_manager_unlock(rlc_ue_manager);
+}
+
+tbs_size_t mac_rlc_data_req(
+  const module_id_t       module_idP,
+  const rnti_t            rntiP,
+  const eNB_index_t       eNB_index,
+  const frame_t           frameP,
+  const eNB_flag_t        enb_flagP,
+  const MBMS_flag_t       MBMS_flagP,
+  const logical_chan_id_t channel_idP,
+  const tb_size_t         tb_sizeP,
+  char             *buffer_pP,
+  const uint32_t sourceL2Id,
+  const uint32_t destinationL2Id
+   )
+{
+  int ret;
+  rlc_ue_t *ue;
+  rlc_entity_t *rb;
+  int maxsize;
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rntiP);
+
+  switch (channel_idP) {
+  case 1 ... 2: rb = ue->srb[channel_idP - 1]; break;
+  case 3 ... 7: rb = ue->drb[channel_idP - 3]; break;
+  default:      rb = NULL;                     break;
+  }
+
+  if (MBMS_flagP == MBMS_FLAG_YES) {
+    if (channel_idP >= 1 && channel_idP <= 5)
+      rb = ue->drb[channel_idP - 1];
+    else
+      rb = NULL;
+  }
+
+
+  if (rb != NULL) {
+    rb->set_time(rb, rlc_current_time);
+    maxsize = tb_sizeP;
+    ret = rb->generate_pdu(rb, buffer_pP, maxsize);
+  } else {
+    LOG_E(RLC, "%s:%d:%s: fatal: data req for unknown RB\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+    ret = 0;
+  }
+
+  rlc_manager_unlock(rlc_ue_manager);
+
+  if (enb_flagP)
+    T(T_ENB_RLC_MAC_DL, T_INT(module_idP), T_INT(rntiP),
+      T_INT(channel_idP), T_INT(ret));
+
+  return ret;
+}
+
+mac_rlc_status_resp_t mac_rlc_status_ind(
+  const module_id_t       module_idP,
+  const rnti_t            rntiP,
+  const eNB_index_t       eNB_index,
+  const frame_t           frameP,
+  const sub_frame_t       subframeP,
+  const eNB_flag_t        enb_flagP,
+  const MBMS_flag_t       MBMS_flagP,
+  const logical_chan_id_t channel_idP,
+  const uint32_t sourceL2Id,
+  const uint32_t destinationL2Id
+  )
+{
+  rlc_ue_t *ue;
+  mac_rlc_status_resp_t ret;
+  rlc_entity_t *rb;
+
+  /* TODO: handle time a bit more properly */
+  if (rlc_current_time_last_frame != frameP ||
+      rlc_current_time_last_subframe != subframeP) {
+    rlc_current_time++;
+    rlc_current_time_last_frame = frameP;
+    rlc_current_time_last_subframe = subframeP;
+  }
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rntiP);
+
+  switch (channel_idP) {
+  case 1 ... 2: rb = ue->srb[channel_idP - 1]; break;
+  case 3 ... 7: rb = ue->drb[channel_idP - 3]; break;
+  default:      rb = NULL;                     break;
+  }
+
+  if (MBMS_flagP == MBMS_FLAG_YES) {
+    if (channel_idP >= 1 && channel_idP <= 5)
+      rb = ue->drb[channel_idP - 1];
+    else
+      rb = NULL;
+  }
+
+  if (rb != NULL) {
+    rlc_entity_buffer_status_t buf_stat;
+    rb->set_time(rb, rlc_current_time);
+    /* 36.321 deals with BSR values up to 3000000 bytes, after what it
+     * reports '> 3000000' (table 6.1.3.1-2). Passing 4000000 is thus
+     * more than enough.
+     */
+    buf_stat = rb->buffer_status(rb, 4000000);
+    ret.bytes_in_buffer = buf_stat.status_size
+                        + buf_stat.retx_size
+                        + buf_stat.tx_size;
+  } else {
+    ret.bytes_in_buffer = 0;
+  }
+
+  rlc_manager_unlock(rlc_ue_manager);
+
+  ret.pdus_in_buffer = 0;
+  /* TODO: creation time may be important (unit: frame, as it seems) */
+  ret.head_sdu_creation_time = 0;
+  ret.head_sdu_remaining_size_to_send = 0;
+  ret.head_sdu_is_segmented = 0;
+  return ret;
+}
+
+rlc_buffer_occupancy_t mac_rlc_get_buffer_occupancy_ind(
+  const module_id_t       module_idP,
+  const rnti_t            rntiP,
+  const eNB_index_t       eNB_index,
+  const frame_t           frameP,
+  const sub_frame_t       subframeP,
+  const eNB_flag_t        enb_flagP,
+  const logical_chan_id_t channel_idP)
+{
+  rlc_ue_t *ue;
+  rlc_buffer_occupancy_t ret;
+  rlc_entity_t *rb;
+
+  if (enb_flagP) {
+    LOG_E(RLC, "Tx mac_rlc_get_buffer_occupancy_ind function is not implemented for eNB LcId=%u\n", channel_idP);
+    exit(1);
+  }
+
+  /* TODO: handle time a bit more properly */
+  if (rlc_current_time_last_frame != frameP ||
+      rlc_current_time_last_subframe != subframeP) {
+    rlc_current_time++;
+    rlc_current_time_last_frame = frameP;
+    rlc_current_time_last_subframe = subframeP;
+  }
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rntiP);
+
+  switch (channel_idP) {
+  case 1 ... 2: rb = ue->srb[channel_idP - 1]; break;
+  case 3 ... 7: rb = ue->drb[channel_idP - 3]; break;
+  default:      rb = NULL;                     break;
+  }
+
+  if (rb != NULL) {
+    rlc_entity_buffer_status_t buf_stat;
+    rb->set_time(rb, rlc_current_time);
+    /* 36.321 deals with BSR values up to 3000000 bytes, after what it
+     * reports '> 3000000' (table 6.1.3.1-2). Passing 4000000 is thus
+     * more than enough.
+     */
+    buf_stat = rb->buffer_status(rb, 4000000);
+    ret = buf_stat.status_size
+        + buf_stat.retx_size
+        + buf_stat.tx_size;
+  } else {
+    ret = 0;
+  }
+
+  rlc_manager_unlock(rlc_ue_manager);
+
+  return ret;
+}
+
+int oai_emulation;
+
+rlc_op_status_t rlc_data_req     (const protocol_ctxt_t *const ctxt_pP,
+                                  const srb_flag_t   srb_flagP,
+                                  const MBMS_flag_t  MBMS_flagP,
+                                  const rb_id_t      rb_idP,
+                                  const mui_t        muiP,
+                                  confirm_t    confirmP,
+                                  sdu_size_t   sdu_sizeP,
+                                  mem_block_t *sdu_pP,
+                                  const uint32_t *const sourceL2Id,
+                                  const uint32_t *const destinationL2Id
+                                 )
+{
+  int rnti = ctxt_pP->rnti;
+  rlc_ue_t *ue;
+  rlc_entity_t *rb;
+
+  if (MBMS_flagP == MBMS_FLAG_YES)
+    rnti = 0xfffd;
+
+  LOG_D(RLC, "%s rnti %d srb_flag %d rb_id %d mui %d confirm %d sdu_size %d MBMS_flag %d\n",
+        __FUNCTION__, rnti, srb_flagP, (int)rb_idP, muiP, confirmP, sdu_sizeP,
+        MBMS_flagP);
+
+  if (ctxt_pP->enb_flag)
+    T(T_ENB_RLC_DL, T_INT(ctxt_pP->module_id),
+      T_INT(ctxt_pP->rnti), T_INT(rb_idP), T_INT(sdu_sizeP));
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rnti);
+
+  rb = NULL;
+
+  if (srb_flagP) {
+    if (rb_idP >= 1 && rb_idP <= 2)
+      rb = ue->srb[rb_idP - 1];
+  } else {
+    if (rb_idP >= 1 && rb_idP <= 5)
+      rb = ue->drb[rb_idP - 1];
+  }
+
+  if( MBMS_flagP == MBMS_FLAG_YES) {
+    if (rb_idP >= 1 && rb_idP <= 5)
+      rb = ue->drb[rb_idP - 1];
+  }
+
+  if (rb != NULL) {
+    rb->set_time(rb, rlc_current_time);
+    rb->recv_sdu(rb, (char *)sdu_pP->data, sdu_sizeP, muiP);
+  } else {
+    LOG_E(RLC, "%s:%d:%s: fatal: SDU sent to unknown RB\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  rlc_manager_unlock(rlc_ue_manager);
+
+  free_mem_block(sdu_pP, __func__);
+
+  return RLC_OP_STATUS_OK;
+}
+
+int rlc_module_init(int enb_flag)
+{
+  static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+  static int inited = 0;
+
+  if (pthread_mutex_lock(&lock)) abort();
+
+  if (inited) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  inited = 1;
+
+  rlc_ue_manager = new_rlc_ue_manager(enb_flag);
+
+  if (pthread_mutex_unlock(&lock)) abort();
+
+  return 0;
+}
+
+void rlc_util_print_hex_octets(comp_name_t componentP, unsigned char *dataP, const signed long sizeP)
+{
+}
+
+#include "common/ran_context.h"
+extern RAN_CONTEXT_t RC;
+
+static void deliver_sdu(void *_ue, rlc_entity_t *entity, char *buf, int size)
+{
+  rlc_ue_t *ue = _ue;
+  int is_srb;
+  int rb_id;
+  protocol_ctxt_t ctx;
+  mem_block_t *memblock;
+  int i;
+  int is_enb;
+  int is_mbms;
+
+  /* TODO: be sure it's fine to check rnti for MBMS */
+  is_mbms = ue->rnti == 0xfffd;
+
+  /* is it SRB? */
+  for (i = 0; i < 2; i++) {
+    if (entity == ue->srb[i]) {
+      is_srb = 1;
+      rb_id = i+1;
+      goto rb_found;
+    }
+  }
+
+  /* maybe DRB? */
+  for (i = 0; i < 5; i++) {
+    if (entity == ue->drb[i]) {
+      is_srb = 0;
+      rb_id = i+1;
+      goto rb_found;
+    }
+  }
+
+  LOG_E(RLC, "%s:%d:%s: fatal, no RB found for ue %d\n",
+        __FILE__, __LINE__, __FUNCTION__, ue->rnti);
+  exit(1);
+
+rb_found:
+  LOG_D(RLC, "%s:%d:%s: delivering SDU (rnti %d is_srb %d rb_id %d) size %d",
+        __FILE__, __LINE__, __FUNCTION__, ue->rnti, is_srb, rb_id, size);
+
+  memblock = get_free_mem_block(size, __func__);
+  if (memblock == NULL) {
+    LOG_E(RLC, "%s:%d:%s: ERROR: get_free_mem_block failed\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  memcpy(memblock->data, buf, size);
+
+  /* unused fields? */
+  ctx.instance = ue->module_id;
+  ctx.frame = 0;
+  ctx.subframe = 0;
+  ctx.eNB_index = 0;
+  ctx.configured = 1;
+  ctx.brOption = 0;
+
+  /* used fields? */
+  ctx.module_id = ue->module_id;
+  ctx.rnti = ue->rnti;
+
+  is_enb = rlc_manager_get_enb_flag(rlc_ue_manager);
+  ctx.enb_flag = is_enb;
+
+  if (is_enb) {
+    T(T_ENB_RLC_UL,
+      T_INT(0 /*ctxt_pP->module_id*/),
+      T_INT(ue->rnti), T_INT(rb_id), T_INT(size));
+
+    const ngran_node_t type = RC.rrc[0 /*ctxt_pP->module_id*/]->node_type;
+    AssertFatal(type != ngran_eNB_CU && type != ngran_ng_eNB_CU && type != ngran_gNB_CU,
+                "Can't be CU, bad node type %d\n", type);
+
+    if (NODE_IS_DU(type) && is_srb == 1) {
+      MessageDef *msg = itti_alloc_new_message(TASK_RLC_ENB, F1AP_UL_RRC_MESSAGE);
+      F1AP_UL_RRC_MESSAGE(msg).rnti = ue->rnti;
+      F1AP_UL_RRC_MESSAGE(msg).srb_id = rb_id;
+      F1AP_UL_RRC_MESSAGE(msg).rrc_container = (unsigned char *)buf;
+      F1AP_UL_RRC_MESSAGE(msg).rrc_container_length = size;
+      itti_send_msg_to_task(TASK_DU_F1, ENB_MODULE_ID_TO_INSTANCE(0 /*ctxt_pP->module_id*/), msg);
+      return;
+    }
+  }
+
+  if (!get_pdcp_data_ind_func()(&ctx, is_srb, is_mbms, rb_id, size, memblock, NULL, NULL)) {
+    LOG_E(RLC, "%s:%d:%s: ERROR: pdcp_data_ind failed (is_srb %d rb_id %d rnti %d)\n",
+          __FILE__, __LINE__, __FUNCTION__,
+          is_srb, rb_id, ue->rnti);
+    /* what to do in case of failure? for the moment: nothing */
+  }
+}
+
+static void successful_delivery(void *_ue, rlc_entity_t *entity, int sdu_id)
+{
+  rlc_ue_t *ue = _ue;
+  int i;
+  int is_srb;
+  int rb_id;
+  MessageDef *msg;
+  int is_enb;
+
+  /* is it SRB? */
+  for (i = 0; i < 2; i++) {
+    if (entity == ue->srb[i]) {
+      is_srb = 1;
+      rb_id = i+1;
+      goto rb_found;
+    }
+  }
+
+  /* maybe DRB? */
+  for (i = 0; i < 5; i++) {
+    if (entity == ue->drb[i]) {
+      is_srb = 0;
+      rb_id = i+1;
+      goto rb_found;
+    }
+  }
+
+  LOG_E(RLC, "%s:%d:%s: fatal, no RB found for ue %d\n",
+        __FILE__, __LINE__, __FUNCTION__, ue->rnti);
+  exit(1);
+
+rb_found:
+  LOG_D(RLC, "sdu %d was successfully delivered on %s %d\n",
+        sdu_id,
+        is_srb ? "SRB" : "DRB",
+        rb_id);
+
+  /* TODO: do something for DRBs? */
+  if (is_srb == 0)
+    return;
+
+  is_enb = rlc_manager_get_enb_flag(rlc_ue_manager);
+  if (!is_enb)
+    return;
+
+  msg = itti_alloc_new_message(TASK_RLC_ENB, RLC_SDU_INDICATION);
+  RLC_SDU_INDICATION(msg).rnti          = ue->rnti;
+  RLC_SDU_INDICATION(msg).is_successful = 1;
+  RLC_SDU_INDICATION(msg).srb_id        = rb_id;
+  RLC_SDU_INDICATION(msg).message_id    = sdu_id;
+  /* TODO: accept more than 1 instance? here we send to instance id 0 */
+  itti_send_msg_to_task(TASK_RRC_ENB, 0, msg);
+}
+
+static void max_retx_reached(void *_ue, rlc_entity_t *entity)
+{
+  rlc_ue_t *ue = _ue;
+  int i;
+  int is_srb;
+  int rb_id;
+  MessageDef *msg;
+  int is_enb;
+
+  /* is it SRB? */
+  for (i = 0; i < 2; i++) {
+    if (entity == ue->srb[i]) {
+      is_srb = 1;
+      rb_id = i+1;
+      goto rb_found;
+    }
+  }
+
+  /* maybe DRB? */
+  for (i = 0; i < 5; i++) {
+    if (entity == ue->drb[i]) {
+      is_srb = 0;
+      rb_id = i+1;
+      goto rb_found;
+    }
+  }
+
+  LOG_E(RLC, "%s:%d:%s: fatal, no RB found for ue %d\n",
+        __FILE__, __LINE__, __FUNCTION__, ue->rnti);
+  exit(1);
+
+rb_found:
+  LOG_D(RLC, "max RETX reached on %s %d\n",
+        is_srb ? "SRB" : "DRB",
+        rb_id);
+
+  /* TODO: do something for DRBs? */
+  if (is_srb == 0)
+    return;
+
+  is_enb = rlc_manager_get_enb_flag(rlc_ue_manager);
+  if (!is_enb)
+    return;
+
+  msg = itti_alloc_new_message(TASK_RLC_ENB, RLC_SDU_INDICATION);
+  RLC_SDU_INDICATION(msg).rnti          = ue->rnti;
+  RLC_SDU_INDICATION(msg).is_successful = 0;
+  RLC_SDU_INDICATION(msg).srb_id        = rb_id;
+  RLC_SDU_INDICATION(msg).message_id    = -1;
+  /* TODO: accept more than 1 instance? here we send to instance id 0 */
+  itti_send_msg_to_task(TASK_RRC_ENB, 0, msg);
+}
+
+static void add_srb(int rnti, int module_id, struct LTE_SRB_ToAddMod *s)
+{
+  rlc_entity_t            *rlc_am;
+  rlc_ue_t                *ue;
+
+  struct LTE_SRB_ToAddMod__rlc_Config *r = s->rlc_Config;
+  struct LTE_SRB_ToAddMod__logicalChannelConfig *l = s->logicalChannelConfig;
+  int srb_id = s->srb_Identity;
+  int logical_channel_group;
+
+  int t_reordering;
+  int t_status_prohibit;
+  int t_poll_retransmit;
+  int poll_pdu;
+  int poll_byte;
+  int max_retx_threshold;
+
+  if (srb_id != 1 && srb_id != 2) {
+    LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n",
+          __FILE__, __LINE__, __FUNCTION__, srb_id);
+    exit(1);
+  }
+
+  switch (l->present) {
+  case LTE_SRB_ToAddMod__logicalChannelConfig_PR_explicitValue:
+    logical_channel_group = *l->choice.explicitValue.ul_SpecificParameters->logicalChannelGroup;
+    break;
+  case LTE_SRB_ToAddMod__logicalChannelConfig_PR_defaultValue:
+    /* default value from 36.331 9.2.1 */
+    logical_channel_group = 0;
+    break;
+  default:
+    LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  /* TODO: accept other values? */
+  if (logical_channel_group != 0) {
+    LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  switch (r->present) {
+  case LTE_SRB_ToAddMod__rlc_Config_PR_explicitValue: {
+    struct LTE_RLC_Config__am *am;
+    if (r->choice.explicitValue.present != LTE_RLC_Config_PR_am) {
+      LOG_E(RLC, "%s:%d:%s: fatal error, must be RLC AM\n",
+            __FILE__, __LINE__, __FUNCTION__);
+      exit(1);
+    }
+    am = &r->choice.explicitValue.choice.am;
+    t_reordering       = decode_t_reordering(am->dl_AM_RLC.t_Reordering);
+    t_status_prohibit  = decode_t_status_prohibit(am->dl_AM_RLC.t_StatusProhibit);
+    t_poll_retransmit  = decode_t_poll_retransmit(am->ul_AM_RLC.t_PollRetransmit);
+    poll_pdu           = decode_poll_pdu(am->ul_AM_RLC.pollPDU);
+    poll_byte          = decode_poll_byte(am->ul_AM_RLC.pollByte);
+    max_retx_threshold = decode_max_retx_threshold(am->ul_AM_RLC.maxRetxThreshold);
+    break;
+  }
+  case LTE_SRB_ToAddMod__rlc_Config_PR_defaultValue:
+    /* default values from 36.331 9.2.1 */
+    t_reordering       = 35;
+    t_status_prohibit  = 0;
+    t_poll_retransmit  = 45;
+    poll_pdu           = -1;
+    poll_byte          = -1;
+    max_retx_threshold = 4;
+    break;
+  default:
+    LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rnti);
+  ue->module_id = module_id;
+  if (ue->srb[srb_id-1] != NULL) {
+    LOG_D(RLC, "%s:%d:%s: warning SRB %d already exist for ue %d, do nothing\n",
+          __FILE__, __LINE__, __FUNCTION__, srb_id, rnti);
+  } else {
+    rlc_am = new_rlc_entity_am(100000,
+                               100000,
+                               deliver_sdu, ue,
+                               successful_delivery, ue,
+                               max_retx_reached, ue,
+                               t_reordering, t_status_prohibit,
+                               t_poll_retransmit,
+                               poll_pdu, poll_byte, max_retx_threshold);
+    rlc_ue_add_srb_rlc_entity(ue, srb_id, rlc_am);
+
+    LOG_D(RLC, "%s:%d:%s: added srb %d to ue %d\n",
+          __FILE__, __LINE__, __FUNCTION__, srb_id, rnti);
+  }
+  rlc_manager_unlock(rlc_ue_manager);
+}
+
+static void add_drb_am(int rnti, int module_id, struct LTE_DRB_ToAddMod *s)
+{
+  rlc_entity_t            *rlc_am;
+  rlc_ue_t                *ue;
+
+  struct LTE_RLC_Config *r = s->rlc_Config;
+  struct LTE_LogicalChannelConfig *l = s->logicalChannelConfig;
+  int drb_id = s->drb_Identity;
+  int channel_id = *s->logicalChannelIdentity;
+  int logical_channel_group;
+
+  int t_reordering;
+  int t_status_prohibit;
+  int t_poll_retransmit;
+  int poll_pdu;
+  int poll_byte;
+  int max_retx_threshold;
+
+  if (!(drb_id >= 1 && drb_id <= 5)) {
+    LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n",
+          __FILE__, __LINE__, __FUNCTION__, drb_id);
+    exit(1);
+  }
+
+  if (channel_id != drb_id + 2) {
+    LOG_E(RLC, "%s:%d:%s: todo, remove this limitation\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  logical_channel_group = *l->ul_SpecificParameters->logicalChannelGroup;
+
+  /* TODO: accept other values? */
+  if (logical_channel_group != 1) {
+    LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  switch (r->present) {
+  case LTE_RLC_Config_PR_am: {
+    struct LTE_RLC_Config__am *am;
+    am = &r->choice.am;
+    t_reordering       = decode_t_reordering(am->dl_AM_RLC.t_Reordering);
+    t_status_prohibit  = decode_t_status_prohibit(am->dl_AM_RLC.t_StatusProhibit);
+    t_poll_retransmit  = decode_t_poll_retransmit(am->ul_AM_RLC.t_PollRetransmit);
+    poll_pdu           = decode_poll_pdu(am->ul_AM_RLC.pollPDU);
+    poll_byte          = decode_poll_byte(am->ul_AM_RLC.pollByte);
+    max_retx_threshold = decode_max_retx_threshold(am->ul_AM_RLC.maxRetxThreshold);
+    break;
+  }
+  default:
+    LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rnti);
+  ue->module_id = module_id;
+  if (ue->drb[drb_id-1] != NULL) {
+    LOG_D(RLC, "%s:%d:%s: warning DRB %d already exist for ue %d, do nothing\n",
+          __FILE__, __LINE__, __FUNCTION__, drb_id, rnti);
+  } else {
+    rlc_am = new_rlc_entity_am(1000000,
+                               1000000,
+                               deliver_sdu, ue,
+                               successful_delivery, ue,
+                               max_retx_reached, ue,
+                               t_reordering, t_status_prohibit,
+                               t_poll_retransmit,
+                               poll_pdu, poll_byte, max_retx_threshold);
+    rlc_ue_add_drb_rlc_entity(ue, drb_id, rlc_am);
+
+    LOG_D(RLC, "%s:%d:%s: added drb %d to ue %d\n",
+          __FILE__, __LINE__, __FUNCTION__, drb_id, rnti);
+  }
+  rlc_manager_unlock(rlc_ue_manager);
+}
+
+static void add_drb_um(int rnti, int module_id, struct LTE_DRB_ToAddMod *s)
+{
+  rlc_entity_t            *rlc_um;
+  rlc_ue_t                *ue;
+
+  struct LTE_RLC_Config *r = s->rlc_Config;
+  struct LTE_LogicalChannelConfig *l = s->logicalChannelConfig;
+  int drb_id = s->drb_Identity;
+  int channel_id = *s->logicalChannelIdentity;
+  int logical_channel_group;
+
+  int t_reordering;
+  int sn_field_length;
+
+  if (!(drb_id >= 1 && drb_id <= 5)) {
+    LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n",
+          __FILE__, __LINE__, __FUNCTION__, drb_id);
+    exit(1);
+  }
+
+  if (channel_id != drb_id + 2) {
+    LOG_E(RLC, "%s:%d:%s: todo, remove this limitation\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  logical_channel_group = *l->ul_SpecificParameters->logicalChannelGroup;
+
+  /* TODO: accept other values? */
+  if (logical_channel_group != 1) {
+    LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  switch (r->present) {
+  case LTE_RLC_Config_PR_um_Bi_Directional: {
+    struct LTE_RLC_Config__um_Bi_Directional *um;
+    um = &r->choice.um_Bi_Directional;
+    t_reordering    = decode_t_reordering(um->dl_UM_RLC.t_Reordering);
+    if (um->dl_UM_RLC.sn_FieldLength != um->ul_UM_RLC.sn_FieldLength) {
+      LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+      exit(1);
+    }
+    sn_field_length = decode_sn_field_length(um->dl_UM_RLC.sn_FieldLength);
+    break;
+  }
+  default:
+    LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  rlc_manager_lock(rlc_ue_manager);
+  ue = rlc_manager_get_ue(rlc_ue_manager, rnti);
+  ue->module_id = module_id;
+  if (ue->drb[drb_id-1] != NULL) {
+    LOG_D(RLC, "%s:%d:%s: warning DRB %d already exist for ue %d, do nothing\n",
+          __FILE__, __LINE__, __FUNCTION__, drb_id, rnti);
+  } else {
+    rlc_um = new_rlc_entity_um(1000000,
+                               1000000,
+                               deliver_sdu, ue,
+                               t_reordering,
+                               sn_field_length);
+    rlc_ue_add_drb_rlc_entity(ue, drb_id, rlc_um);
+
+    LOG_D(RLC, "%s:%d:%s: added drb %d to ue %d\n",
+          __FILE__, __LINE__, __FUNCTION__, drb_id, rnti);
+  }
+  rlc_manager_unlock(rlc_ue_manager);
+}
+
+static void add_drb(int rnti, int module_id, struct LTE_DRB_ToAddMod *s)
+{
+  switch (s->rlc_Config->present) {
+  case LTE_RLC_Config_PR_am:
+    add_drb_am(rnti, module_id, s);
+    break;
+  case LTE_RLC_Config_PR_um_Bi_Directional:
+    add_drb_um(rnti, module_id, s);
+    break;
+  default:
+    LOG_E(RLC, "%s:%d:%s: fatal: unhandled DRB type\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+}
+
+rlc_op_status_t rrc_rlc_config_asn1_req (const protocol_ctxt_t   * const ctxt_pP,
+    const LTE_SRB_ToAddModList_t   * const srb2add_listP,
+    const LTE_DRB_ToAddModList_t   * const drb2add_listP,
+    const LTE_DRB_ToReleaseList_t  * const drb2release_listP,
+    const LTE_PMCH_InfoList_r9_t * const pmch_InfoList_r9_pP,
+    const uint32_t sourceL2Id,
+    const uint32_t destinationL2Id
+                                        )
+{
+  int rnti = ctxt_pP->rnti;
+  int module_id = ctxt_pP->module_id;
+  int i;
+  int j;
+
+  if (ctxt_pP->enb_flag == 1 &&
+      (ctxt_pP->module_id != 0 || ctxt_pP->instance != 0)) {
+    LOG_E(RLC, "%s: module_id != 0 or instance != 0 not handled for eNB\n",
+          __FUNCTION__);
+    exit(1);
+  }
+
+  if (0 /*||
+      ctxt_pP->instance != 0 || ctxt_pP->eNB_index != 0 ||
+      ctxt_pP->configured != 1 || ctxt_pP->brOption != 0 */) {
+    LOG_E(RLC, "%s: ctxt_pP not handled (%d %d %d %d %d %d)\n", __FUNCTION__,
+          ctxt_pP->enb_flag , ctxt_pP->module_id, ctxt_pP->instance,
+          ctxt_pP->eNB_index, ctxt_pP->configured, ctxt_pP->brOption);
+    exit(1);
+  }
+
+  if (pmch_InfoList_r9_pP != NULL) {
+    int                           mbms_rnti = 0xfffd;
+    LTE_MBMS_SessionInfoList_r9_t *mbms_SessionInfoList_r9_p = NULL;
+    LTE_MBMS_SessionInfo_r9_t     *MBMS_SessionInfo_p        = NULL;
+    mbms_session_id_t             mbms_session_id;
+    mbms_service_id_t             mbms_service_id;
+    rb_id_t                       drb_id = 0;
+    logical_chan_id_t             lc_id  = 0;
+    //LTE_DRB_Identity_t     drb_id          = 0;
+    //LTE_DRB_Identity_t*    pdrb_id         = NULL;
+
+    for (i=0; i<pmch_InfoList_r9_pP->list.count; i++) {
+      mbms_SessionInfoList_r9_p = &(pmch_InfoList_r9_pP->list.array[i]->mbms_SessionInfoList_r9);
+
+      for (j=0; j<mbms_SessionInfoList_r9_p->list.count; j++) {
+        MBMS_SessionInfo_p = mbms_SessionInfoList_r9_p->list.array[j];
+        if (0/*MBMS_SessionInfo_p->sessionId_r9*/)
+          mbms_session_id  = MBMS_SessionInfo_p->sessionId_r9->buf[0];
+        else
+          mbms_session_id  = MBMS_SessionInfo_p->logicalChannelIdentity_r9;
+        lc_id              = mbms_session_id;
+        mbms_service_id    = MBMS_SessionInfo_p->tmgi_r9.serviceId_r9.buf[2]; //serviceId is 3-octet string
+//        mbms_service_id    = j;
+
+#if 0
+        /* TODO: check if this code should stay there
+         * as it is both enb and ue cases do the same thing
+         */
+        // can set the mch_id = i
+        if (ctxt_pP->enb_flag) {
+          drb_id =  (mbms_service_id * LTE_maxSessionPerPMCH ) + mbms_session_id;//+ (LTE_maxDRB + 3) * MAX_MOBILES_PER_ENB; // 1
+        } else {
+          drb_id =  (mbms_service_id * LTE_maxSessionPerPMCH ) + mbms_session_id; // + (LTE_maxDRB + 3); // 15
+        }
+#endif
+
+        drb_id =  (mbms_service_id * LTE_maxSessionPerPMCH ) + mbms_session_id;
+
+        LOG_I(RLC, PROTOCOL_CTXT_FMT" CONFIG REQ MBMS ASN1 LC ID %u RB ID %u SESSION ID %u SERVICE ID %u, mbms_rnti %x\n",
+              PROTOCOL_CTXT_ARGS(ctxt_pP),
+              lc_id,
+              (int)drb_id,
+              mbms_session_id,
+              mbms_service_id,
+              mbms_rnti
+             );
+
+        rlc_entity_t            *rlc_um;
+        rlc_ue_t                *ue;
+
+        //drb_id = rb_id;
+
+        rlc_manager_lock(rlc_ue_manager);
+        ue = rlc_manager_get_ue(rlc_ue_manager, mbms_rnti);
+        if (ue->drb[drb_id-1] != NULL) {
+          LOG_D(RLC, "%s:%d:%s: warning DRB %d already exist for ue %d, do nothing\n",
+              __FILE__, __LINE__, __FUNCTION__, (int)drb_id, mbms_rnti);
+        } else {
+          rlc_um = new_rlc_entity_um(1000000,
+                                     1000000,
+                                     deliver_sdu, ue,
+                                     0,//LTE_T_Reordering_ms0,//t_reordering,
+                                     5//LTE_SN_FieldLength_size5//sn_field_length
+                                    );
+          rlc_ue_add_drb_rlc_entity(ue, drb_id, rlc_um);
+
+          LOG_D(RLC, "%s:%d:%s: added drb %d to ue %d\n",
+                __FILE__, __LINE__, __FUNCTION__, (int)drb_id, mbms_rnti);
+        }
+        rlc_manager_unlock(rlc_ue_manager);
+
+      }
+    }
+
+  }
+
+  if (drb2release_listP != NULL) {
+    LOG_E(RLC, "%s:%d:%s: TODO\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  if (srb2add_listP != NULL) {
+    for (i = 0; i < srb2add_listP->list.count; i++) {
+      add_srb(rnti, module_id, srb2add_listP->list.array[i]);
+    }
+  }
+
+  if (drb2add_listP != NULL) {
+    for (i = 0; i < drb2add_listP->list.count; i++) {
+      add_drb(rnti, module_id, drb2add_listP->list.array[i]);
+    }
+  }
+
+  return RLC_OP_STATUS_OK;
+}
+
+rlc_op_status_t rrc_rlc_config_req   (
+  const protocol_ctxt_t* const ctxt_pP,
+  const srb_flag_t      srb_flagP,
+  const MBMS_flag_t     mbms_flagP,
+  const config_action_t actionP,
+  const rb_id_t         rb_idP,
+  const rlc_info_t      rlc_infoP)
+{
+  rlc_ue_t *ue;
+  int      i;
+
+  if (mbms_flagP) {
+    LOG_E(RLC, "%s:%d:%s: todo (mbms not supported)\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  if (actionP != CONFIG_ACTION_REMOVE) {
+    LOG_E(RLC, "%s:%d:%s: todo (only CONFIG_ACTION_REMOVE supported)\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  if (ctxt_pP->module_id) {
+    LOG_E(RLC, "%s:%d:%s: todo (only module_id 0 supported)\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  if ((srb_flagP && !(rb_idP >= 1 && rb_idP <= 2)) ||
+      (!srb_flagP && !(rb_idP >= 1 && rb_idP <= 5))) {
+    LOG_E(RLC, "%s:%d:%s: bad rb_id (%d) (is_srb %d)\n", __FILE__, __LINE__, __FUNCTION__, (int)rb_idP, srb_flagP);
+    exit(1);
+  }
+  rlc_manager_lock(rlc_ue_manager);
+  LOG_D(RLC, "%s:%d:%s: remove rb %d (is_srb %d) for UE %d\n", __FILE__, __LINE__, __FUNCTION__, (int)rb_idP, srb_flagP, ctxt_pP->rnti);
+  ue = rlc_manager_get_ue(rlc_ue_manager, ctxt_pP->rnti);
+  if (srb_flagP) {
+    if (ue->srb[rb_idP-1] != NULL) {
+      ue->srb[rb_idP-1]->delete(ue->srb[rb_idP-1]);
+      ue->srb[rb_idP-1] = NULL;
+    } else
+      LOG_W(RLC, "removing non allocated SRB %d, do nothing\n", (int)rb_idP);
+  } else {
+    if (ue->drb[rb_idP-1] != NULL) {
+      ue->drb[rb_idP-1]->delete(ue->drb[rb_idP-1]);
+      ue->drb[rb_idP-1] = NULL;
+    } else
+      LOG_W(RLC, "removing non allocated DRB %d, do nothing\n", (int)rb_idP);
+  }
+  /* remove UE if it has no more RB configured */
+  for (i = 0; i < 2; i++)
+    if (ue->srb[i] != NULL)
+      break;
+  if (i == 2) {
+    for (i = 0; i < 5; i++)
+      if (ue->drb[i] != NULL)
+        break;
+    if (i == 5)
+      rlc_manager_remove_ue(rlc_ue_manager, ctxt_pP->rnti);
+  }
+  rlc_manager_unlock(rlc_ue_manager);
+  return RLC_OP_STATUS_OK;
+}
+
+void rrc_rlc_register_rrc (rrc_data_ind_cb_t rrc_data_indP, rrc_data_conf_cb_t rrc_data_confP)
+{
+  /* nothing to do */
+}
+
+rlc_op_status_t rrc_rlc_remove_ue (const protocol_ctxt_t* const x)
+{
+  LOG_D(RLC, "%s:%d:%s: remove UE %d\n", __FILE__, __LINE__, __FUNCTION__, x->rnti);
+  rlc_manager_lock(rlc_ue_manager);
+  rlc_manager_remove_ue(rlc_ue_manager, x->rnti);
+  rlc_manager_unlock(rlc_ue_manager);
+
+  return RLC_OP_STATUS_OK;
+}
diff --git a/openair2/LAYER2/rlc_v2/rlc_pdu.c b/openair2/LAYER2/rlc_v2/rlc_pdu.c
new file mode 100644
index 0000000000000000000000000000000000000000..c55e2d9c3c54bcd6b7415146f9688f8cc500699c
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_pdu.c
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include "rlc_pdu.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "LOG/log.h"
+
+/**************************************************************************/
+/* RX PDU segment and segment list                                        */
+/**************************************************************************/
+
+rlc_rx_pdu_segment_t *rlc_rx_new_pdu_segment(int sn, int so, int size,
+    int is_last, char *data, int data_offset)
+{
+  rlc_rx_pdu_segment_t *ret = malloc(sizeof(rlc_rx_pdu_segment_t));
+  if (ret == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  ret->sn = sn;
+  ret->so = so;
+  ret->size = size;
+  ret->is_last = is_last;
+  ret->next = NULL;
+
+  ret->data = malloc(size);
+  if (ret->data == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  memcpy(ret->data, data, size);
+
+  ret->data_offset = data_offset;
+
+  return ret;
+}
+
+void rlc_rx_free_pdu_segment(rlc_rx_pdu_segment_t *pdu_segment)
+{
+  free(pdu_segment->data);
+  free(pdu_segment);
+}
+
+rlc_rx_pdu_segment_t *rlc_rx_pdu_segment_list_add(
+    int (*sn_compare)(void *, int, int), void *sn_compare_data,
+    rlc_rx_pdu_segment_t *list, rlc_rx_pdu_segment_t *pdu_segment)
+{
+  rlc_rx_pdu_segment_t head;
+  rlc_rx_pdu_segment_t *cur;
+  rlc_rx_pdu_segment_t *prev;
+
+  head.next = list;
+  cur = list;
+  prev = &head;
+
+  /* order is by 'sn', if 'sn' is the same then order is by 'so' */
+  while (cur != NULL) {
+    /* check if 'pdu_segment' is before 'cur' in the list */
+    if (sn_compare(sn_compare_data, cur->sn, pdu_segment->sn) > 0 ||
+        (cur->sn == pdu_segment->sn && cur->so > pdu_segment->so)) {
+      break;
+    }
+    prev = cur;
+    cur = cur->next;
+  }
+  prev->next = pdu_segment;
+  pdu_segment->next = cur;
+  return head.next;
+}
+
+/**************************************************************************/
+/* TX PDU management                                                      */
+/**************************************************************************/
+
+rlc_tx_pdu_segment_t *rlc_tx_new_pdu(void)
+{
+  rlc_tx_pdu_segment_t *ret = calloc(1, sizeof(rlc_tx_pdu_segment_t));
+  if (ret == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  ret->retx_count = -1;
+  return ret;
+}
+
+void rlc_tx_free_pdu(rlc_tx_pdu_segment_t *pdu)
+{
+  free(pdu);
+}
+
+rlc_tx_pdu_segment_t *rlc_tx_pdu_list_append(rlc_tx_pdu_segment_t *list,
+    rlc_tx_pdu_segment_t *pdu)
+{
+  rlc_tx_pdu_segment_t head;
+  rlc_tx_pdu_segment_t *cur;
+
+  head.next = list;
+
+  cur = &head;
+  while (cur->next != NULL) {
+    cur = cur->next;
+  }
+  cur->next = pdu;
+
+  return head.next;
+}
+
+rlc_tx_pdu_segment_t *rlc_tx_pdu_list_add(
+    int (*sn_compare)(void *, int, int), void *sn_compare_data,
+    rlc_tx_pdu_segment_t *list, rlc_tx_pdu_segment_t *pdu_segment)
+{
+  rlc_tx_pdu_segment_t head;
+  rlc_tx_pdu_segment_t *cur;
+  rlc_tx_pdu_segment_t *prev;
+
+  head.next = list;
+  cur = list;
+  prev = &head;
+
+  /* order is by 'sn', if 'sn' is the same then order is by 'so' */
+  while (cur != NULL) {
+    /* check if 'pdu_segment' is before 'cur' in the list */
+    if (sn_compare(sn_compare_data, cur->sn, pdu_segment->sn) > 0 ||
+        (cur->sn == pdu_segment->sn && cur->so > pdu_segment->so)) {
+      break;
+    }
+    prev = cur;
+    cur = cur->next;
+  }
+  prev->next = pdu_segment;
+  pdu_segment->next = cur;
+  return head.next;
+}
+
+/**************************************************************************/
+/* PDU decoder                                                            */
+/**************************************************************************/
+
+void rlc_pdu_decoder_init(rlc_pdu_decoder_t *decoder, char *buffer, int size)
+{
+  decoder->error = 0;
+  decoder->byte = 0;
+  decoder->bit = 0;
+  decoder->buffer = buffer;
+  decoder->size = size;
+}
+
+static int get_bit(rlc_pdu_decoder_t *decoder)
+{
+  int ret;
+
+  if (decoder->byte >= decoder->size) {
+    decoder->error = 1;
+    return 0;
+  }
+
+  ret = (decoder->buffer[decoder->byte] >> (7 - decoder->bit)) & 1;
+
+  decoder->bit++;
+  if (decoder->bit == 8) {
+    decoder->bit = 0;
+    decoder->byte++;
+  }
+
+  return ret;
+}
+
+int rlc_pdu_decoder_get_bits(rlc_pdu_decoder_t *decoder, int count)
+{
+  int ret = 0;
+  int i;
+
+  if (count > 31) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  for (i = 0; i < count; i++) {
+    ret <<= 1;
+    ret |= get_bit(decoder);
+    if (decoder->error) return -1;
+  }
+
+  return ret;
+}
+
+void rlc_pdu_decoder_align(rlc_pdu_decoder_t *decoder)
+{
+  if (decoder->bit) {
+    decoder->bit = 0;
+    decoder->byte++;
+  }
+}
+
+/**************************************************************************/
+/* PDU encoder                                                            */
+/**************************************************************************/
+
+void rlc_pdu_encoder_init(rlc_pdu_encoder_t *encoder, char *buffer, int size)
+{
+  encoder->byte = 0;
+  encoder->bit = 0;
+  encoder->buffer = buffer;
+  encoder->size = size;
+}
+
+static void put_bit(rlc_pdu_encoder_t *encoder, int bit)
+{
+  if (encoder->byte == encoder->size) {
+    LOG_E(RLC, "%s:%d:%s: fatal, buffer full\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  encoder->buffer[encoder->byte] <<= 1;
+  if (bit)
+    encoder->buffer[encoder->byte] |= 1;
+
+  encoder->bit++;
+  if (encoder->bit == 8) {
+    encoder->bit = 0;
+    encoder->byte++;
+  }
+}
+
+void rlc_pdu_encoder_put_bits(rlc_pdu_encoder_t *encoder, int value, int count)
+{
+  int i;
+  int x;
+
+  if (count > 31) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  x = 1 << (count - 1);
+  for (i = 0; i < count; i++, x >>= 1)
+    put_bit(encoder, value & x);
+}
+
+void rlc_pdu_encoder_align(rlc_pdu_encoder_t *encoder)
+{
+  while (encoder->bit)
+    put_bit(encoder, 0);
+}
diff --git a/openair2/LAYER2/rlc_v2/rlc_pdu.h b/openair2/LAYER2/rlc_v2/rlc_pdu.h
new file mode 100644
index 0000000000000000000000000000000000000000..dbffe9f3cbff92fe706985af5bfcc5156b2f52b9
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_pdu.h
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _RLC_PDU_H_
+#define _RLC_PDU_H_
+
+/**************************************************************************/
+/* RX PDU segment and segment list                                        */
+/**************************************************************************/
+
+typedef struct rlc_rx_pdu_segment_t {
+  int sn;
+  int so;
+  int size;
+  int is_last;
+  char *data;
+  int data_offset;
+  struct rlc_rx_pdu_segment_t *next;
+} rlc_rx_pdu_segment_t;
+
+rlc_rx_pdu_segment_t *rlc_rx_new_pdu_segment(int sn, int so, int size,
+    int is_last, char *data, int data_offset);
+
+void rlc_rx_free_pdu_segment(rlc_rx_pdu_segment_t *pdu_segment);
+
+rlc_rx_pdu_segment_t *rlc_rx_pdu_segment_list_add(
+    int (*sn_compare)(void *, int, int), void *sn_compare_data,
+    rlc_rx_pdu_segment_t *list, rlc_rx_pdu_segment_t *pdu_segment);
+
+/**************************************************************************/
+/* TX PDU management                                                      */
+/**************************************************************************/
+
+typedef struct rlc_tx_pdu_segment_t {
+  int       sn;
+  void      *start_sdu;        /* real type is rlc_sdu_t * */
+  int       sdu_start_byte;    /* starting byte in 'start_sdu' */
+  int       so;                /* starting byte of the segment in full PDU */
+  int       data_size;         /* number of data bytes (exclude header) */
+  int       is_segment;
+  int       is_last;
+  int       retx_count;
+  struct rlc_tx_pdu_segment_t *next;
+} rlc_tx_pdu_segment_t;
+
+rlc_tx_pdu_segment_t *rlc_tx_new_pdu(void);
+void rlc_tx_free_pdu(rlc_tx_pdu_segment_t *pdu);
+rlc_tx_pdu_segment_t *rlc_tx_pdu_list_append(rlc_tx_pdu_segment_t *list,
+    rlc_tx_pdu_segment_t *pdu);
+rlc_tx_pdu_segment_t *rlc_tx_pdu_list_add(
+    int (*sn_compare)(void *, int, int), void *sn_compare_data,
+    rlc_tx_pdu_segment_t *list, rlc_tx_pdu_segment_t *pdu_segment);
+
+/**************************************************************************/
+/* PDU decoder                                                            */
+/**************************************************************************/
+
+typedef struct {
+  int error;
+  int byte;           /* next byte to decode */
+  int bit;            /* next bit in next byte to decode */
+  char *buffer;
+  int size;
+} rlc_pdu_decoder_t;
+
+void rlc_pdu_decoder_init(rlc_pdu_decoder_t *decoder, char *buffer, int size);
+
+#define rlc_pdu_decoder_in_error(d) ((d)->error == 1)
+
+int rlc_pdu_decoder_get_bits(rlc_pdu_decoder_t *decoder, int count);
+
+void rlc_pdu_decoder_align(rlc_pdu_decoder_t *decoder);
+
+/**************************************************************************/
+/* PDU encoder                                                            */
+/**************************************************************************/
+
+typedef struct {
+  int byte;           /* next byte to encode */
+  int bit;            /* next bit in next byte to encode */
+  char *buffer;
+  int size;
+} rlc_pdu_encoder_t;
+
+void rlc_pdu_encoder_init(rlc_pdu_encoder_t *encoder, char *buffer, int size);
+
+void rlc_pdu_encoder_put_bits(rlc_pdu_encoder_t *encoder, int value, int count);
+
+void rlc_pdu_encoder_align(rlc_pdu_encoder_t *encoder);
+
+#endif /* _RLC_PDU_H_ */
diff --git a/openair1/PHY/NR_TRANSPORT/nr_dci_tools_common.c b/openair2/LAYER2/rlc_v2/rlc_sdu.c
similarity index 56%
rename from openair1/PHY/NR_TRANSPORT/nr_dci_tools_common.c
rename to openair2/LAYER2/rlc_v2/rlc_sdu.c
index d26a52f157c0e051cbda07c296135c18b9e38247..16465a9ff13bede7314c4ee0c9eef757242944c5 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_dci_tools_common.c
+++ b/openair2/LAYER2/rlc_v2/rlc_sdu.c
@@ -19,44 +19,50 @@
  *      contact@openairinterface.org
  */
 
-/*! \file PHY/NR_TRANSPORT/nr_dci_tools_common.c
- * \brief
- * \author
- * \date 2018
- * \version 0.1
- * \company Eurecom
- * \email:
- * \note
- * \warning
- */
+#include "rlc_sdu.h"
+
+#include <stdlib.h>
+#include <string.h>
 
-#include "nr_dci.h"
+#include "LOG/log.h"
 
-//#define DEBUG_FILL_DCI
+rlc_sdu_t *rlc_new_sdu(char *buffer, int size, int upper_layer_id)
+{
+  rlc_sdu_t *ret = calloc(1, sizeof(rlc_sdu_t));
+  if (ret == NULL)
+    goto oom;
 
-#include "nr_dlsch.h"
+  ret->upper_layer_id = upper_layer_id;
 
+  ret->data = malloc(size);
+  if (ret->data == NULL)
+    goto oom;
 
-void get_coreset_rballoc(uint8_t *FreqDomainResource,int *n_rb,int *rb_offset) {
+  memcpy(ret->data, buffer, size);
 
-  uint8_t count=0, start=0, start_set=0;
+  ret->size = size;
 
-  uint64_t bitmap = (((uint64_t)FreqDomainResource[0])<<37)|
-    (((uint64_t)FreqDomainResource[1])<<29)|
-    (((uint64_t)FreqDomainResource[2])<<21)|
-    (((uint64_t)FreqDomainResource[3])<<13)|
-    (((uint64_t)FreqDomainResource[4])<<5)|
-    (((uint64_t)FreqDomainResource[5])>>3);
-  
-  for (int i=0; i<45; i++)
-    if ((bitmap>>(44-i))&1) {
-      count++;
-      if (!start_set) {
-        start = i;
-        start_set = 1;
-      }
-    }
-  *rb_offset = 6*start;
-  *n_rb = 6*count;
+  return ret;
+
+oom:
+  LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__,  __FUNCTION__);
+  exit(1);
 }
 
+void rlc_free_sdu(rlc_sdu_t *sdu)
+{
+  free(sdu->data);
+  free(sdu);
+}
+
+void rlc_sdu_list_add(rlc_sdu_t **list, rlc_sdu_t **end, rlc_sdu_t *sdu)
+{
+  if (*list == NULL) {
+    *list = sdu;
+    *end = sdu;
+    return;
+  }
+
+  (*end)->next = sdu;
+  *end = sdu;
+}
diff --git a/openair2/LAYER2/rlc_v2/rlc_sdu.h b/openair2/LAYER2/rlc_v2/rlc_sdu.h
new file mode 100644
index 0000000000000000000000000000000000000000..2c678956ee47a1286db4a2838a1ac96cc1129e72
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_sdu.h
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _RLC_SDU_H_
+#define _RLC_SDU_H_
+
+typedef struct rlc_sdu_t {
+  int upper_layer_id;
+  char *data;
+  int size;
+  /* next_byte indicates the starting byte to use to construct a new PDU */
+  int next_byte;
+  int acked_bytes;
+  struct rlc_sdu_t *next;
+} rlc_sdu_t;
+
+rlc_sdu_t *rlc_new_sdu(char *buffer, int size, int upper_layer_id);
+void rlc_free_sdu(rlc_sdu_t *sdu);
+void rlc_sdu_list_add(rlc_sdu_t **list, rlc_sdu_t **end, rlc_sdu_t *sdu);
+
+#endif /* _RLC_SDU_H_ */
diff --git a/openair2/LAYER2/rlc_v2/rlc_ue_manager.c b/openair2/LAYER2/rlc_v2/rlc_ue_manager.c
new file mode 100644
index 0000000000000000000000000000000000000000..1fd366fda1523a73c35aa91d9127ea66e51c9ffd
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_ue_manager.c
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#include "rlc_ue_manager.h"
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "LOG/log.h"
+
+typedef struct {
+  pthread_mutex_t lock;
+  rlc_ue_t        **ue_list;
+  int             ue_count;
+  int             enb_flag;
+} rlc_ue_manager_internal_t;
+
+rlc_ue_manager_t *new_rlc_ue_manager(int enb_flag)
+{
+  rlc_ue_manager_internal_t *ret;
+
+  ret = calloc(1, sizeof(rlc_ue_manager_internal_t));
+  if (ret == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  if (pthread_mutex_init(&ret->lock, NULL)) abort();
+  ret->enb_flag = enb_flag;
+
+  return ret;
+}
+
+int rlc_manager_get_enb_flag(rlc_ue_manager_t *_m)
+{
+  rlc_ue_manager_internal_t *m = _m;
+  return m->enb_flag;
+}
+
+void rlc_manager_lock(rlc_ue_manager_t *_m)
+{
+  rlc_ue_manager_internal_t *m = _m;
+  if (pthread_mutex_lock(&m->lock)) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+}
+
+void rlc_manager_unlock(rlc_ue_manager_t *_m)
+{
+  rlc_ue_manager_internal_t *m = _m;
+  if (pthread_mutex_unlock(&m->lock)) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+}
+
+/* must be called with lock acquired */
+rlc_ue_t *rlc_manager_get_ue(rlc_ue_manager_t *_m, int rnti)
+{
+  /* TODO: optimze */
+  rlc_ue_manager_internal_t *m = _m;
+  int i;
+
+  for (i = 0; i < m->ue_count; i++)
+    if (m->ue_list[i]->rnti == rnti)
+      return m->ue_list[i];
+
+  LOG_D(RLC, "%s:%d:%s: new UE %d\n", __FILE__, __LINE__, __FUNCTION__, rnti);
+
+  m->ue_count++;
+  m->ue_list = realloc(m->ue_list, sizeof(rlc_ue_t *) * m->ue_count);
+  if (m->ue_list == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+  m->ue_list[m->ue_count-1] = calloc(1, sizeof(rlc_ue_t));
+  if (m->ue_list[m->ue_count-1] == NULL) {
+    LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  m->ue_list[m->ue_count-1]->rnti = rnti;
+
+  return m->ue_list[m->ue_count-1];
+}
+
+/* must be called with lock acquired */
+void rlc_manager_remove_ue(rlc_ue_manager_t *_m, int rnti)
+{
+  rlc_ue_manager_internal_t *m = _m;
+  rlc_ue_t *ue;
+  int i;
+  int j;
+
+  for (i = 0; i < m->ue_count; i++)
+    if (m->ue_list[i]->rnti == rnti)
+      break;
+
+  if (i == m->ue_count) {
+    LOG_D(RLC, "%s:%d:%s: warning: ue %d not found\n",
+          __FILE__, __LINE__, __FUNCTION__,
+          rnti);
+    return;
+  }
+
+  ue = m->ue_list[i];
+
+  for (j = 0; j < 2; j++)
+    if (ue->srb[j] != NULL)
+      ue->srb[j]->delete(ue->srb[j]);
+
+  for (j = 0; j < 5; j++)
+    if (ue->drb[j] != NULL)
+      ue->drb[j]->delete(ue->drb[j]);
+
+  free(ue);
+
+  m->ue_count--;
+  if (m->ue_count == 0) {
+    free(m->ue_list);
+    m->ue_list = NULL;
+    return;
+  }
+
+  memmove(&m->ue_list[i], &m->ue_list[i+1],
+          (m->ue_count - i) * sizeof(rlc_ue_t *));
+  m->ue_list = realloc(m->ue_list, m->ue_count * sizeof(rlc_ue_t *));
+  if (m->ue_list == NULL) {
+    LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+}
+
+/* must be called with lock acquired */
+void rlc_ue_add_srb_rlc_entity(rlc_ue_t *ue, int srb_id, rlc_entity_t *entity)
+{
+  if (srb_id < 1 || srb_id > 2) {
+    LOG_E(RLC, "%s:%d:%s: fatal, bad srb id\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  srb_id--;
+
+  if (ue->srb[srb_id] != NULL) {
+    LOG_E(RLC, "%s:%d:%s: fatal, srb already present\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  ue->srb[srb_id] = entity;
+}
+
+/* must be called with lock acquired */
+void rlc_ue_add_drb_rlc_entity(rlc_ue_t *ue, int drb_id, rlc_entity_t *entity)
+{
+  if (drb_id < 1 || drb_id > 5) {
+    LOG_E(RLC, "%s:%d:%s: fatal, bad drb id\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  drb_id--;
+
+  if (ue->drb[drb_id] != NULL) {
+    LOG_E(RLC, "%s:%d:%s: fatal, drb already present\n",
+          __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  ue->drb[drb_id] = entity;
+}
diff --git a/openair2/LAYER2/rlc_v2/rlc_ue_manager.h b/openair2/LAYER2/rlc_v2/rlc_ue_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..81bb0d0fa1ef767a5d2721872e945738fcf2ff40
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/rlc_ue_manager.h
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The OpenAirInterface Software Alliance licenses this file to You under
+ * the OAI Public License, Version 1.1  (the "License"); you may not use this file
+ * except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.openairinterface.org/?page_id=698
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *-------------------------------------------------------------------------------
+ * For more information about the OpenAirInterface (OAI) Software Alliance:
+ *      contact@openairinterface.org
+ */
+
+#ifndef _RLC_UE_MANAGER_H_
+#define _RLC_UE_MANAGER_H_
+
+#include "rlc_entity.h"
+
+typedef void rlc_ue_manager_t;
+
+typedef struct rlc_ue_t {
+  int rnti;
+  int module_id;   /* necesarry for the L2 simulator - not clean, to revise */
+  rlc_entity_t *srb[2];
+  rlc_entity_t *drb[5];
+} rlc_ue_t;
+
+/***********************************************************************/
+/* manager functions                                                   */
+/***********************************************************************/
+
+rlc_ue_manager_t *new_rlc_ue_manager(int enb_flag);
+
+int rlc_manager_get_enb_flag(rlc_ue_manager_t *m);
+
+void rlc_manager_lock(rlc_ue_manager_t *m);
+void rlc_manager_unlock(rlc_ue_manager_t *m);
+
+rlc_ue_t *rlc_manager_get_ue(rlc_ue_manager_t *m, int rnti);
+void rlc_manager_remove_ue(rlc_ue_manager_t *m, int rnti);
+
+/***********************************************************************/
+/* ue functions                                                        */
+/***********************************************************************/
+
+void rlc_ue_add_srb_rlc_entity(rlc_ue_t *ue, int srb_id, rlc_entity_t *entity);
+void rlc_ue_add_drb_rlc_entity(rlc_ue_t *ue, int drb_id, rlc_entity_t *entity);
+
+#endif /* _RLC_UE_MANAGER_H_ */
diff --git a/openair2/LAYER2/rlc_v2/tests/LOG/log.h b/openair2/LAYER2/rlc_v2/tests/LOG/log.h
new file mode 100644
index 0000000000000000000000000000000000000000..5c9fcd643cfca036cc81eca221f4a5e818aee685
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/LOG/log.h
@@ -0,0 +1,10 @@
+#ifndef _LOG_H_
+#define _LOG_H_
+
+#include <stdio.h>
+
+#define LOG_E(x, ...) printf(__VA_ARGS__)
+#define LOG_D(x, ...) printf(__VA_ARGS__)
+#define LOG_W(x, ...) printf(__VA_ARGS__)
+
+#endif /* _LOG_H_ */
diff --git a/openair2/LAYER2/rlc_v2/tests/Makefile b/openair2/LAYER2/rlc_v2/tests/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..14bb186d4c2cf78d1405f1afa9ab218e7461b6e3
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/Makefile
@@ -0,0 +1,32 @@
+CC=gcc
+CFLAGS=-Wall -g --coverage -I.
+
+LIB=rlc_entity.o rlc_entity_am.o rlc_entity_um.o rlc_pdu.o rlc_sdu.o
+
+tests:
+	@./run_tests.sh
+
+all: clean_run $(TEST).run
+
+%.run: $(TEST).bin
+	#valgrind ./$(TEST).bin > $(TEST).run_pre 2> $(TEST).valgrind
+	./$(TEST).bin > $(TEST).run_pre
+	grep ^TEST $(TEST).run_pre > $(TEST).run
+	gunzip -c $(TEST).txt.gz > $(TEST).txt
+	diff -q $(TEST).txt $(TEST).run
+
+$(TEST).bin: $(TEST).o $(LIB)
+	$(CC) $(CFLAGS) -o $@ $^
+
+%.o: ../%.c
+	$(CC) $(CFLAGS) -I.. -c -o $@ $<
+
+$(TEST).o: test.c
+	$(CC) $(CFLAGS) -c -o $@ $< -DTEST='"$(TEST).h"'
+
+clean_run:
+	rm -f $(TEST).run $(TEST).bin $(TEST).o
+
+clean:
+	rm -f *.o *.bin *.run *.run_pre *.gcov *.gcda *.gcno test*.txt a.out \
+		*.valgrind
diff --git a/openair2/LAYER2/rlc_v2/tests/README b/openair2/LAYER2/rlc_v2/tests/README
new file mode 100644
index 0000000000000000000000000000000000000000..db69cd4fa716be83bafe0422c601c7037268f2b4
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/README
@@ -0,0 +1,38 @@
+To run tests, simply type: make
+
+Each test is made of:
+  testXX.h         definition of the test
+  testXX.txt.gz    compressed expected output of the test
+
+At runtime, we generate:
+  testXX.run    actual output of the test
+
+test.c is the test driving program.
+
+Only the output lines of the test program starting with "TEST" are
+stored into testXX.txt and testXX.run.
+
+A test is considered a success if testXX.txt and testXX.run are equal.
+
+Only failed tests are reported.
+
+How to define a new test?
+
+1 - get the ID
+
+    Look in the file run_tests.sh, the variable test_count gives you
+    the number of existing tests. The ID of your test has to be
+    test_count + 1.
+
+2 - create files
+
+    Create the file test<ID>.h containing the test, then generate test<ID>.run
+    by running 'make all TEST=test<ID>' and copy test<ID>.run to test<ID>.txt.
+    Then compress this file (gzip -9 test<ID>.txt). Be sure that the output
+    is correct, of course.
+
+    For the file names, replace <ID> by the actual number of the test.
+    For example, if your test ID is 47, then name the files test47.h and
+    test47.txt. And run 'make all TEST=test47' to generate test47.run.
+
+The available instructions for a test are described at the top of test.c.
diff --git a/openair2/LAYER2/rlc_v2/tests/make_pdu.c b/openair2/LAYER2/rlc_v2/tests/make_pdu.c
new file mode 100644
index 0000000000000000000000000000000000000000..057cc3e36db2e06958969d9d79dc474ea9a9b7bf
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/make_pdu.c
@@ -0,0 +1,29 @@
+/* gcc -Wall make_pdu.c -I.. ../rlc_pdu.c */
+
+#include "rlc_pdu.h"
+#include <stdio.h>
+
+int main(void)
+{
+  char out[100];
+  rlc_pdu_encoder_t e;
+  int i;
+
+  rlc_pdu_encoder_init(&e, out, 100);
+
+  rlc_pdu_encoder_put_bits(&e, 0, 1);    // D/C
+  rlc_pdu_encoder_put_bits(&e, 0, 3);    // CPT
+  rlc_pdu_encoder_put_bits(&e, 0, 10);   // ack_sn
+  rlc_pdu_encoder_put_bits(&e, 1, 1);    // e1
+  rlc_pdu_encoder_put_bits(&e, 10, 10);  // nack_sn
+  rlc_pdu_encoder_put_bits(&e, 0, 1);    // e1
+  rlc_pdu_encoder_put_bits(&e, 0, 1);    // e2
+
+  rlc_pdu_encoder_align(&e);
+
+  for (i = 0; i < e.byte; i++) printf(" %2.2x", (unsigned char)e.buffer[i]);
+
+  printf("\n");
+
+  return 0;
+}
diff --git a/openair2/LAYER2/rlc_v2/tests/run_tests.sh b/openair2/LAYER2/rlc_v2/tests/run_tests.sh
new file mode 100755
index 0000000000000000000000000000000000000000..72feff00363bf3e917a112b2cbbe76bd2b38dec9
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/run_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+test_count=45
+
+for i in `seq $test_count`
+do
+  make all TEST=test$i >/dev/null 2>/dev/null
+  if [ $? != 0 ]
+  then
+    echo TEST $i FAILURE
+  fi
+done
diff --git a/openair2/LAYER2/rlc_v2/tests/test.c b/openair2/LAYER2/rlc_v2/tests/test.c
new file mode 100644
index 0000000000000000000000000000000000000000..734e85f1f56cc38abe6226d6e6865aadf0522d03
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test.c
@@ -0,0 +1,433 @@
+#include "../rlc_entity.h"
+#include "../rlc_entity_am.h"
+#include "../rlc_entity_um.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/*
+ * ENB_AM <rx_maxsize> <tx_maxsize> <t_reordering> <t_status_prohibit>
+ *       <t_poll_retransmit> <poll_pdu> <poll_byte> <max_retx_threshold>
+ *       create the eNB RLC AM entity with given parameters
+ *
+ * UE_AM <rx_maxsize> <tx_maxsize> <t_reordering> <t_status_prohibit>
+ *      <t_poll_retransmit> <poll_pdu> <poll_byte> <max_retx_threshold>
+ *       create the UE RLC AM entity with given parameters
+ *
+ * ENB_UM <rx_maxsize> <tx_maxsize> <t_reordering> <sn_field_length>
+ *     create the eNB RLC UM entity with given parameters
+ *
+ * UE_UM <rx_maxsize> <tx_maxsize> <t_reordering> <sn_field_length>
+ *     create the UE RLC UM entity with given parameters
+ *
+ * TIME <time>
+ *     following actions to be performed at time <time>
+ *     <time> starts at 1
+ *     You must end your test definition with a line 'TIME, -1'.
+ *
+ * ENB_SDU <id> <size>
+ *     send an SDU to eNB with id <i> and size <size>
+ *     the SDU is [00 01 ... ff 01 ...]
+ *     (ie. start byte is 00 then we increment for each byte, loop if needed)
+ *
+ * UE_SDU <id> <size>
+ *     same as ENB_SDU but the SDU is sent to the UE
+ *
+ * ENB_PDU <size> <'size' bytes>
+ *     send a custom PDU from eNB to UE (eNB does not see this PDU at all)
+ *
+ * UE_PDU <size> <'size' bytes>
+ *     send a custom PDU from UE to eNB (UE does not see this PDU at all)
+ *
+ * ENB_PDU_SIZE <size>
+ *     set 'enb_pdu_size'
+ *
+ * UE_PDU_SIZE <size>
+ *     set 'ue_pdu_size'
+ *
+ * ENB_RECV_FAILS <fails>
+ *     set the 'enb_recv_fails' flag to <fails>
+ *     (1: recv will fail, 0: recv will succeed)
+ *
+ * UE_RECV_FAILS <fails>
+ *     same as ENB_RECV_FAILS but for 'ue_recv_fails'
+ *
+ * MUST_FAIL
+ *     to be used as first command after the first TIME to indicate
+ *     that the test must fail (ie. exit with non zero, crash not allowed)
+ *
+ * ENB_BUFFER_STATUS
+ *     call buffer_status for eNB and print result
+ *
+ * UE_BUFFER_STATUS
+ *     call buffer_status for UE and print result
+ *
+ * ENB_DISCARD_SDU <sdu ID>
+ *     discards given SDU
+ *
+ * UE_DISCARD_SDU <sdu ID>
+ *     discards given SDU
+ *
+ * RE_ESTABLISH
+ *     re-establish both eNB and UE
+ */
+
+enum action {
+  ENB_AM, UE_AM,
+  ENB_UM, UE_UM,
+  TIME, ENB_SDU, UE_SDU, ENB_PDU, UE_PDU,
+  ENB_PDU_SIZE, UE_PDU_SIZE,
+  ENB_RECV_FAILS, UE_RECV_FAILS,
+  MUST_FAIL,
+  ENB_BUFFER_STATUS, UE_BUFFER_STATUS,
+  ENB_DISCARD_SDU, UE_DISCARD_SDU,
+  RE_ESTABLISH
+};
+
+int test[] = {
+/* TEST is defined at compilation time */
+#include TEST
+};
+
+void deliver_sdu_enb_am(void *deliver_sdu_data, struct rlc_entity_t *_entity,
+                        char *buf, int size)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  printf("TEST: ENB: %"PRIu64": deliver SDU size %d [",
+         entity->t_current, size);
+  for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]);
+  printf("]\n");
+}
+
+void deliver_sdu_enb_um(void *deliver_sdu_data, struct rlc_entity_t *_entity,
+                        char *buf, int size)
+{
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+  printf("TEST: ENB: %"PRIu64": deliver SDU size %d [",
+         entity->t_current, size);
+  for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]);
+  printf("]\n");
+}
+
+void successful_delivery_enb(void *successful_delivery_data,
+                             rlc_entity_t *_entity, int sdu_id)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  printf("TEST: ENB: %"PRIu64": SDU %d was successfully delivered.\n",
+         entity->t_current, sdu_id);
+}
+
+void max_retx_reached_enb(void *max_retx_reached_data, rlc_entity_t *_entity)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  printf("TEST: ENB: %"PRIu64": max RETX reached! radio link failure!\n",
+         entity->t_current);
+  exit(1);
+}
+
+void deliver_sdu_ue_am(void *deliver_sdu_data, struct rlc_entity_t *_entity,
+                       char *buf, int size)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  printf("TEST: UE: %"PRIu64": deliver SDU size %d [",
+         entity->t_current, size);
+  for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]);
+  printf("]\n");
+}
+
+void deliver_sdu_ue_um(void *deliver_sdu_data, struct rlc_entity_t *_entity,
+                       char *buf, int size)
+{
+  rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity;
+  printf("TEST: UE: %"PRIu64": deliver SDU size %d [",
+         entity->t_current, size);
+  for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]);
+  printf("]\n");
+}
+
+void successful_delivery_ue(void *successful_delivery_data,
+                            rlc_entity_t *_entity, int sdu_id)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  printf("TEST: UE: %"PRIu64": SDU %d was successfully delivered.\n",
+         entity->t_current, sdu_id);
+}
+
+void max_retx_reached_ue(void *max_retx_reached_data, rlc_entity_t *_entity)
+{
+  rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity;
+  printf("TEST: UE: %"PRIu64", max RETX reached! radio link failure!\n",
+         entity->t_current);
+  exit(1);
+}
+
+int test_main(void)
+{
+  rlc_entity_t *enb = NULL;
+  rlc_entity_t *ue = NULL;
+  int i;
+  int k;
+  char *sdu;
+  char *pdu;
+  rlc_entity_buffer_status_t buffer_status;
+  int enb_do_buffer_status = 0;
+  int ue_do_buffer_status = 0;
+  int size;
+  int pos;
+  int next_byte_enb = 0;
+  int next_byte_ue = 0;
+  int enb_recv_fails = 0;
+  int ue_recv_fails = 0;
+  int enb_pdu_size = 1000;
+  int ue_pdu_size = 1000;
+
+  sdu = malloc(16001);
+  pdu = malloc(3000);
+  if (sdu == NULL || pdu == NULL) {
+    printf("out of memory\n");
+    exit(1);
+  }
+
+  for (i = 0; i < 16001; i++)
+    sdu[i] = i & 255;
+
+  pos = 0;
+  if (test[pos] != TIME) {
+    printf("%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__);
+    exit(1);
+  }
+
+  for (i = 1; i < 1000; i++) {
+    if (i == test[pos+1]) {
+      pos += 2;
+      while (test[pos] != TIME)
+        switch (test[pos]) {
+        default: printf("fatal: unknown action\n"); exit(1);
+        case ENB_AM:
+          enb = new_rlc_entity_am(test[pos+1], test[pos+2],
+                                  deliver_sdu_enb_am, NULL,
+                                  successful_delivery_enb, NULL,
+                                  max_retx_reached_enb, NULL,
+                                  test[pos+3], test[pos+4], test[pos+5],
+                                  test[pos+6], test[pos+7], test[pos+8]);
+          pos += 9;
+          break;
+        case UE_AM:
+          ue = new_rlc_entity_am(test[pos+1], test[pos+2],
+                                 deliver_sdu_ue_am, NULL,
+                                 successful_delivery_ue, NULL,
+                                 max_retx_reached_ue, NULL,
+                                 test[pos+3], test[pos+4], test[pos+5],
+                                 test[pos+6], test[pos+7], test[pos+8]);
+          pos += 9;
+          break;
+        case ENB_UM:
+          enb = new_rlc_entity_um(test[pos+1], test[pos+2],
+                                  deliver_sdu_enb_um, NULL,
+                                  test[pos+3], test[pos+4]);
+          pos += 5;
+          break;
+        case UE_UM:
+          ue = new_rlc_entity_um(test[pos+1], test[pos+2],
+                                 deliver_sdu_ue_um, NULL,
+                                 test[pos+3], test[pos+4]);
+          pos += 5;
+          break;
+        case ENB_SDU:
+          for (k = 0; k < test[pos+2]; k++, next_byte_enb++)
+            sdu[k] = next_byte_enb;
+          printf("TEST: ENB: %d: recv_sdu (id %d): size %d: [",
+                 i, test[pos+1], test[pos+2]);
+          for (k = 0; k < test[pos+2]; k++)
+            printf(" %2.2x", (unsigned char)sdu[k]);
+          printf("]\n");
+          enb->recv_sdu(enb, sdu, test[pos+2], test[pos+1]);
+          pos += 3;
+          break;
+        case UE_SDU:
+          for (k = 0; k < test[pos+2]; k++, next_byte_ue++)
+            sdu[k] = next_byte_ue;
+          printf("TEST: UE: %d: recv_sdu (id %d): size %d: [",
+                 i, test[pos+1], test[pos+2]);
+          for (k = 0; k < test[pos+2]; k++)
+            printf(" %2.2x", (unsigned char)sdu[k]);
+          printf("]\n");
+          ue->recv_sdu(ue, sdu, test[pos+2], test[pos+1]);
+          pos += 3;
+          break;
+        case ENB_PDU:
+          for (k = 0; k < test[pos+1]; k++)
+            pdu[k] = test[pos+2+k];
+          printf("TEST: ENB: %d: custom PDU: size %d: [", i, test[pos+1]);
+          for (k = 0; k < test[pos+1]; k++) printf(" %2.2x", (unsigned char)pdu[k]);
+          printf("]\n");
+          if (!ue_recv_fails)
+            ue->recv_pdu(ue, pdu, test[pos+1]);
+          pos += 2 + test[pos+1];
+          break;
+        case UE_PDU:
+          for (k = 0; k < test[pos+1]; k++)
+            pdu[k] = test[pos+2+k];
+          printf("TEST: UE: %d: custom PDU: size %d: [", i, test[pos+1]);
+          for (k = 0; k < test[pos+1]; k++) printf(" %2.2x", (unsigned char)pdu[k]);
+          printf("]\n");
+          if (!enb_recv_fails)
+            enb->recv_pdu(enb, pdu, test[pos+1]);
+          pos += 2 + test[pos+1];
+          break;
+        case ENB_PDU_SIZE:
+          enb_pdu_size = test[pos+1];
+          pos += 2;
+          break;
+        case UE_PDU_SIZE:
+          ue_pdu_size = test[pos+1];
+          pos += 2;
+          break;
+        case ENB_RECV_FAILS:
+          enb_recv_fails = test[pos+1];
+          pos += 2;
+          break;
+        case UE_RECV_FAILS:
+          ue_recv_fails = test[pos+1];
+          pos += 2;
+          break;
+        case MUST_FAIL:
+          /* do nothing, only used by caller */
+          pos++;
+          break;
+        case ENB_BUFFER_STATUS:
+          enb_do_buffer_status = 1;
+          pos++;
+          break;
+        case UE_BUFFER_STATUS:
+          ue_do_buffer_status = 1;
+          pos++;
+          break;
+        case ENB_DISCARD_SDU:
+          printf("TEST: ENB: %d: discard SDU %d\n", i, test[pos+1]);
+          enb->discard_sdu(enb, test[pos+1]);
+          pos += 2;
+          break;
+        case UE_DISCARD_SDU:
+          printf("TEST: UE: %d: discard SDU %d\n", i, test[pos+1]);
+          ue->discard_sdu(ue, test[pos+1]);
+          pos += 2;
+          break;
+        case RE_ESTABLISH:
+          printf("TEST: %d: re-establish eNB and UE\n", i);
+          enb->reestablishment(enb);
+          ue->reestablishment(ue);
+          pos++;
+          break;
+        }
+    }
+
+    enb->set_time(enb, i);
+    ue->set_time(ue, i);
+
+    if (enb_do_buffer_status) {
+      enb_do_buffer_status = 0;
+      buffer_status = enb->buffer_status(enb, enb_pdu_size);
+      printf("TEST: ENB: %d: buffer_status: status_size %d tx_size %d retx_size %d\n",
+             i,
+             buffer_status.status_size,
+             buffer_status.tx_size,
+             buffer_status.retx_size);
+    }
+
+    size = enb->generate_pdu(enb, pdu, enb_pdu_size);
+    if (size) {
+      printf("TEST: ENB: %d: generate_pdu: size %d: [", i, size);
+      for (k = 0; k < size; k++) printf(" %2.2x", (unsigned char)pdu[k]);
+      printf("]\n");
+      if (!ue_recv_fails)
+        ue->recv_pdu(ue, pdu, size);
+    }
+
+    if (ue_do_buffer_status) {
+      ue_do_buffer_status = 0;
+      buffer_status = ue->buffer_status(ue, ue_pdu_size);
+      printf("TEST: UE: %d: buffer_status: status_size %d tx_size %d retx_size %d\n",
+             i,
+             buffer_status.status_size,
+             buffer_status.tx_size,
+             buffer_status.retx_size);
+    }
+
+    size = ue->generate_pdu(ue, pdu, ue_pdu_size);
+    if (size) {
+      printf("TEST: UE: %d: generate_pdu: size %d: [", i, size);
+      for (k = 0; k < size; k++) printf(" %2.2x", (unsigned char)pdu[k]);
+      printf("]\n");
+      if (!enb_recv_fails)
+        enb->recv_pdu(enb, pdu, size);
+    }
+  }
+
+  enb->delete(enb);
+  ue->delete(ue);
+
+  free(sdu);
+  free(pdu);
+
+  return 0;
+}
+
+void usage(void)
+{
+  printf("options:\n");
+  printf("    -no-fork\n");
+  printf("        don't fork (to ease debugging with gdb)\n");
+  exit(0);
+}
+
+int main(int n, char **v)
+{
+  int must_fail = 0;
+  int son;
+  int status;
+  int i;
+  int no_fork = 0;
+
+  for (i = 1; i < n; i++) {
+    if (!strcmp(v[i], "-no-fork")) { no_fork = 1; continue; }
+    usage();
+  }
+
+  if (test[2] == MUST_FAIL)
+    must_fail = 1;
+
+  if (no_fork) return test_main();
+
+  son = fork();
+  if (son == -1) {
+    perror("fork");
+    return 1;
+  }
+
+  if (son == 0)
+    return test_main();
+
+  if (wait(&status) == -1) {
+    perror("wait");
+    return 1;
+  }
+
+  /* child must quit properly */
+  if (!WIFEXITED(status))
+    return 1;
+
+  /* child must fail if expected to */
+  if (must_fail && WEXITSTATUS(status) == 0)
+    return 1;
+
+  /* child must not fail if not expected to */
+  if (!must_fail && WEXITSTATUS(status))
+    return 1;
+
+  return 0;
+}
diff --git a/openair2/LAYER2/rlc_v2/tests/test1.h b/openair2/LAYER2/rlc_v2/tests/test1.h
new file mode 100644
index 0000000000000000000000000000000000000000..c7744da55c28ed7012cadc71d698777351843b7f
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test1.h
@@ -0,0 +1,14 @@
+/*
+ * basic am test:
+ * at time 1, eNB receives an SDU of 10 bytes
+ * at time 10, UE receives an SDU of 5 bytes
+ */
+
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+    UE_BUFFER_STATUS,
+TIME, 10,
+    UE_SDU, 0, 5,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test1.txt.gz b/openair2/LAYER2/rlc_v2/tests/test1.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..1c6661e9ea1c43c854ecf24cdac718215bbd1f22
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test1.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test10.h b/openair2/LAYER2/rlc_v2/tests/test10.h
new file mode 100644
index 0000000000000000000000000000000000000000..c7aca15eb058d7371963f8f29f68e398fa7e1d0b
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test10.h
@@ -0,0 +1,23 @@
+/*
+ * rlc am test resegmentation of PDU segment with several SDUs
+ *   eNB sends 3 SDUs [1..10] [11.20] [21..30], not received
+ *   eNB retx with smaller PDUs, not received
+ *   eNB retx with still smaller PDUs, not received
+ *   then reception on, all passes
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 10,
+    ENB_SDU, 2, 10,
+TIME, 2,
+    ENB_PDU_SIZE, 25,
+TIME, 48,
+    ENB_PDU_SIZE, 15,
+TIME, 100,
+    UE_RECV_FAILS, 0,
+    ENB_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test10.txt.gz b/openair2/LAYER2/rlc_v2/tests/test10.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..68fd3fa0ba7ec7fad990101439087dcdf55693b4
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test10.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test11.h b/openair2/LAYER2/rlc_v2/tests/test11.h
new file mode 100644
index 0000000000000000000000000000000000000000..5801689aea498b2b350967df97de389eaa3481c8
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test11.h
@@ -0,0 +1,37 @@
+/*
+ * rlc am test function rlc_am_reassemble_next_segment
+ *        in r->pdu_byte >= r->so + (r->sdu_offset - r->start->data_offset)
+ *                                + r->sdu_len
+ *        when case 'if (r->e)' is false
+ *   eNB sends 3 SDUs [1..10] [11.20] [21..30], not received
+ *   eNB retx with smaller PDUs, not received
+ *   eNB retx with still smaller PDUs, not received
+ *   then UE reception on
+ *   then custom PDUs, first a small part of head of original PDU
+ *                     then a bigger part, covering the first part
+ *                     so that the beginning of this part triggers the 'while'
+ *   then eNB reception on, all passes
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 10,
+    ENB_SDU, 2, 10,
+TIME, 2,
+    ENB_PDU_SIZE, 25,
+TIME, 48,
+    ENB_PDU_SIZE, 15,
+TIME, 95,
+    ENB_BUFFER_STATUS,
+TIME, 99,
+    UE_RECV_FAILS, 0,
+    ENB_PDU, 14, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
+    ENB_PDU, 25, 0xec, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12,
+TIME, 100,
+    ENB_RECV_FAILS, 0,
+TIME, 134,
+    UE_BUFFER_STATUS,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test11.txt.gz b/openair2/LAYER2/rlc_v2/tests/test11.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..ea435a666025ab5d90ca2992b91dd94e1551a654
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test11.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test12.h b/openair2/LAYER2/rlc_v2/tests/test12.h
new file mode 100644
index 0000000000000000000000000000000000000000..0387f0aa7f380b65224a4f71f97de78548bb5c59
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test12.h
@@ -0,0 +1,34 @@
+/*
+ * rlc am test function rlc_am_reassemble_next_segment
+ *        in r->pdu_byte >= r->so + (r->sdu_offset - r->start->data_offset)
+ *                                + r->sdu_len
+ *        when case 'if (r->e)' is true
+ *   eNB sends 4 SDUs [1..5] [6..10] [11.20] [21..30], not received
+ *   eNB retx with smaller PDUs, not received
+ *   eNB retx with still smaller PDUs, not received
+ *   then UE reception on
+ *   then custom PDUs, first a small part of head of original PDU
+ *                     then a bigger part, covering the first part
+ *                     so that the beginning of this part triggers the 'while'
+ *   then eNB reception on, all passes
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_SDU, 0, 5,
+    ENB_SDU, 1, 5,
+    ENB_SDU, 2, 10,
+    ENB_SDU, 3, 10,
+TIME, 2,
+    ENB_PDU_SIZE, 25,
+TIME, 48,
+    ENB_PDU_SIZE, 15,
+TIME, 99,
+    UE_RECV_FAILS, 0,
+    ENB_PDU, 15, 0xec, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    ENB_PDU, 25, 0xec, 0x00, 0x00, 0x00, 0x80, 0x50, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
+TIME, 100,
+    ENB_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test12.txt.gz b/openair2/LAYER2/rlc_v2/tests/test12.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..988d7ae644c008cef2f34adce448d00a3a7ce4e1
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test12.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test13.h b/openair2/LAYER2/rlc_v2/tests/test13.h
new file mode 100644
index 0000000000000000000000000000000000000000..a57bd43a946e3c5fdd3d38a482ba6bdbcee85318
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test13.h
@@ -0,0 +1,30 @@
+/*
+ * rlc am test function process_received_ack with something in
+ *             the retransmit_list to put in the ack_list
+ * eNB sends 4 PDUs, not received
+ * eNB retransmits 4th PDU, received, ACKed with NACKs for PDU 1, 2, 3
+ * UE receives custom PDU for 1, 2, 3, 4 (they are not sent by eNB)
+ * (4 resent to have the P bit set)
+ * UE sends ACK for all, eNB puts from retransmit_list to ack_list
+ *
+ * Maybe not very realistic (custom PDUs).
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_PDU_SIZE, 12,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 10,
+    ENB_SDU, 2, 10,
+    ENB_SDU, 3, 10,
+TIME, 10,
+    UE_RECV_FAILS, 0,
+    ENB_RECV_FAILS, 0,
+TIME, 87,
+    ENB_PDU, 12, 0x80, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
+    ENB_PDU, 12, 0x80, 0x01, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
+    ENB_PDU, 12, 0x80, 0x02, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
+    ENB_PDU, 12, 0xa0, 0x03, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test13.txt.gz b/openair2/LAYER2/rlc_v2/tests/test13.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..55a26712db1f9d1efb68b8a66a275d20b4867beb
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test13.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test14.h b/openair2/LAYER2/rlc_v2/tests/test14.h
new file mode 100644
index 0000000000000000000000000000000000000000..0a3a50179614faf31f9e6c3fc1e473fd75204c05
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test14.h
@@ -0,0 +1,12 @@
+/*
+ * rlc am test max_retx_reached
+ * eNB sends PDU, never received
+ */
+TIME, 1,
+    MUST_FAIL,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_SDU, 0, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test14.txt.gz b/openair2/LAYER2/rlc_v2/tests/test14.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..93aa5de81ea42ae69511552066c75ccf11febbea
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test14.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test15.h b/openair2/LAYER2/rlc_v2/tests/test15.h
new file mode 100644
index 0000000000000000000000000000000000000000..4adf93f81c045c26e8e6d3fab5013f7fc5071514
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test15.h
@@ -0,0 +1,42 @@
+/*
+ * rlc am test so_overlap
+ * eNB sends PDU, not received
+ * then PDU is segmented in 3 parts, part 1 & 3 not received,
+ * then we generate a fake control PDU from UE to eNB that
+ * contains NACK with so_start/so_end being inside part 2.
+ *
+ * code to generate fake control PDU:
+ *  rlc_pdu_encoder_init(&e, out, 100);
+ *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // D/C
+ *  rlc_pdu_encoder_put_bits(&e, 0, 3);    // CPT
+ *  rlc_pdu_encoder_put_bits(&e, 2, 10);   // ack_sn
+ *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // e1
+ *  rlc_pdu_encoder_put_bits(&e, 1, 10);   // nack_sn
+ *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // e1
+ *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // e2
+ *  rlc_pdu_encoder_put_bits(&e, 14, 15);  // so_start
+ *  rlc_pdu_encoder_put_bits(&e, 16, 15);  // so_end
+ *  rlc_pdu_encoder_align(&e);
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 8,
+    ENB_RECV_FAILS, 1,
+TIME, 2,
+    UE_RECV_FAILS, 1,
+    ENB_SDU, 1, 30,
+TIME, 20,
+    ENB_PDU_SIZE, 14,
+TIME, 48,
+    UE_RECV_FAILS, 0,
+TIME, 49,
+    UE_RECV_FAILS, 1,
+TIME, 50,
+    UE_RECV_FAILS, 0,
+TIME, 60,
+    ENB_RECV_FAILS, 0,
+    UE_PDU, 8, 0x00, 0x0a, 0x00, 0xa0, 0x03, 0x80, 0x08, 0x00,
+TIME, 70,
+    UE_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test15.txt.gz b/openair2/LAYER2/rlc_v2/tests/test15.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f6f25dac9857198c69c7a5c05bd468b9458d65f9
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test15.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test16.h b/openair2/LAYER2/rlc_v2/tests/test16.h
new file mode 100644
index 0000000000000000000000000000000000000000..862cecf344bdcea3978fd055b4c62242702c7bb6
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test16.h
@@ -0,0 +1,48 @@
+/*
+ * rlc am test process_received_nack
+ * Same events as for test15 except the fake control PDU
+ * does not ACK anything (ack_sn = 0) so that PDU in the
+ * wait_list are not transfered into the ack_list and
+ * we cover the case:
+ *    } else {
+ *      prev = cur;
+ *      cur = cur->next;
+ *    }
+ * for the wait_list case.
+ *
+ *  code to generate fake control PDU:
+ *  rlc_pdu_encoder_init(&e, out, 100);
+ *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // D/C
+ *  rlc_pdu_encoder_put_bits(&e, 0, 3);    // CPT
+ *  rlc_pdu_encoder_put_bits(&e, 0, 10);   // ack_sn
+ *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // e1
+ *  rlc_pdu_encoder_put_bits(&e, 1, 10);   // nack_sn
+ *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // e1
+ *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // e2
+ *  rlc_pdu_encoder_put_bits(&e, 14, 15);  // so_start
+ *  rlc_pdu_encoder_put_bits(&e, 16, 15);  // so_end
+ *  rlc_pdu_encoder_align(&e);
+ */
+
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 8,
+    ENB_RECV_FAILS, 1,
+TIME, 2,
+    UE_RECV_FAILS, 1,
+    ENB_SDU, 1, 30,
+TIME, 20,
+    ENB_PDU_SIZE, 14,
+TIME, 48,
+    UE_RECV_FAILS, 0,
+TIME, 49,
+    UE_RECV_FAILS, 1,
+TIME, 50,
+    UE_RECV_FAILS, 0,
+TIME, 60,
+    ENB_RECV_FAILS, 0,
+    UE_PDU, 8, 0x00, 0x02, 0x00, 0xa0, 0x03, 0x80, 0x08, 0x00,
+TIME, 70,
+    UE_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test16.txt.gz b/openair2/LAYER2/rlc_v2/tests/test16.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..61f36c292ec8ec46edaa3de7d77b235d78322a06
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test16.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test17.h b/openair2/LAYER2/rlc_v2/tests/test17.h
new file mode 100644
index 0000000000000000000000000000000000000000..a2e6c237de9b8302744bb022ad22aa81025a2639
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test17.h
@@ -0,0 +1,30 @@
+/*
+ * rlc am test function process_received_nack
+ *             case 'check that VT(A) <= sn < VT(S)'
+ * eNB sends PDU, not received, resends segmented
+ * we generate a fake control PDU containing nack_sn == 10,
+ * to fail the 'check ...' and cover the return.
+ *
+ *  code to generate fake control PDU:
+ *  rlc_pdu_encoder_init(&e, out, 100);
+ *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // D/C
+ *  rlc_pdu_encoder_put_bits(&e, 0, 3);    // CPT
+ *  rlc_pdu_encoder_put_bits(&e, 0, 10);   // ack_sn
+ *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // e1
+ *  rlc_pdu_encoder_put_bits(&e, 10, 10);  // nack_sn
+ *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // e1
+ *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // e2
+ *  rlc_pdu_encoder_align(&e);
+ */
+
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 30,
+    ENB_RECV_FAILS, 1,
+TIME, 20,
+    ENB_PDU_SIZE, 14,
+TIME, 60,
+    ENB_RECV_FAILS, 0,
+    UE_PDU, 4, 0x00, 0x02, 0x05, 0x00,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test17.txt.gz b/openair2/LAYER2/rlc_v2/tests/test17.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..a35b5cecd18759c7c683afd4e83df2fc7ba38293
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test17.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test18.h b/openair2/LAYER2/rlc_v2/tests/test18.h
new file mode 100644
index 0000000000000000000000000000000000000000..0ac25d5c915ad4db43c8bf93fcbaeafae9619f0e
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test18.h
@@ -0,0 +1,10 @@
+/*
+ * test rlc am simulate rx pdu buffer full
+ * eNB sends too big PDU to UE, rejected because buffer full
+ */
+TIME, 1,
+    MUST_FAIL,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 10, 10, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test18.txt.gz b/openair2/LAYER2/rlc_v2/tests/test18.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..e119c2b018fcece7c4504135b4bf09c23d902590
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test18.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test19.h b/openair2/LAYER2/rlc_v2/tests/test19.h
new file mode 100644
index 0000000000000000000000000000000000000000..f28e7609f451a9becdb7f5c4737681c4a69d501a
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test19.h
@@ -0,0 +1,54 @@
+/*
+ * test rlc am bad PDU
+ * eNB sends custom PDUs to UE, all of them are wrong for a reason or another
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    /* data PDU, LI == 0
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // D/C
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // RF
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // P
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 10);   // SN
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 11);   // LI
+     */
+    ENB_PDU, 4, 0x84, 0x00, 0x00, 0x00,
+    /* data PDU, no data
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // D/C
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // RF
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // P
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 10);   // SN
+     */
+    ENB_PDU, 2, 0x80, 0x00,
+    /* data PDU, LI == 2 > data size == 1
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // D/C
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // RF
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // P
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 10);   // SN
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 2, 11);   // LI
+     *  rlc_pdu_encoder_align(&e);
+     *  rlc_pdu_encoder_put_bits(&e, 0, 8);    // 1 byte of data
+     */
+    ENB_PDU, 5, 0x84, 0x00, 0x00, 0x20, 0x00,
+    /* control PDU, CPT != 0
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // D/C
+     *  rlc_pdu_encoder_put_bits(&e, 2, 3);    // CPT
+     */
+    ENB_PDU, 1, 0x20,
+    /* data PDU, but only 1 byte
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // D/C
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // RF
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // P
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // E
+     */
+    ENB_PDU, 1, 0x84,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test19.txt.gz b/openair2/LAYER2/rlc_v2/tests/test19.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..a3c034e7298d5298cd54493622afdfca7c51b9be
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test19.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test2.h b/openair2/LAYER2/rlc_v2/tests/test2.h
new file mode 100644
index 0000000000000000000000000000000000000000..ba00920778b2821b5807cb9ecf4e5424df892df7
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test2.h
@@ -0,0 +1,10 @@
+/*
+ * basic am test:
+ * at time 1, eNB receives an SDU of 16000 bytes
+ */
+
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 16000,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test2.txt.gz b/openair2/LAYER2/rlc_v2/tests/test2.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9961ff3a1020fe5ecf83b49b11ede590b229de6d
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test2.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test20.h b/openair2/LAYER2/rlc_v2/tests/test20.h
new file mode 100644
index 0000000000000000000000000000000000000000..54f4bec720ab5c6b28123d372f541ddfbc88772d
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test20.h
@@ -0,0 +1,28 @@
+/*
+ * rlc am test full tx window
+ * for that eNB sends a lot of small PDUs
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 513,
+    ENB_PDU_SIZE, 3,
+    ENB_RECV_FAILS, 1,
+    ENB_BUFFER_STATUS,
+TIME, 511,
+    UE_BUFFER_STATUS,
+TIME, 512,
+    UE_BUFFER_STATUS,
+TIME, 513,
+    UE_BUFFER_STATUS,
+TIME, 557,
+    ENB_BUFFER_STATUS,
+TIME, 558,
+    ENB_BUFFER_STATUS,
+TIME, 559,
+    ENB_BUFFER_STATUS,
+TIME, 600,
+    ENB_BUFFER_STATUS,
+    ENB_RECV_FAILS, 0,
+TIME, -1
+
diff --git a/openair2/LAYER2/rlc_v2/tests/test20.txt.gz b/openair2/LAYER2/rlc_v2/tests/test20.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..5fedad91a452500def6a850e2da72d99f68346d0
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test20.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test21.h b/openair2/LAYER2/rlc_v2/tests/test21.h
new file mode 100644
index 0000000000000000000000000000000000000000..ba2a2088e683df7682e62b8273ed044d5cfc1e31
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test21.h
@@ -0,0 +1,18 @@
+/*
+ * rlc am test big SDU (size > 2047)
+ * first generate SDU with exactly 2047 bytes
+ * later on generate SDU with exactly 2048 bytes
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 20,
+    ENB_SDU, 1, 2047,
+    ENB_SDU, 2, 20,
+    ENB_PDU_SIZE, 2200,
+TIME, 10,
+    ENB_SDU, 3, 20,
+    ENB_SDU, 4, 2048,
+    ENB_SDU, 5, 20,
+TIME, -1
+
diff --git a/openair2/LAYER2/rlc_v2/tests/test21.txt.gz b/openair2/LAYER2/rlc_v2/tests/test21.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7fc8cbacdef75cc7a77684509989bc7d414e37d6
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test21.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test22.h b/openair2/LAYER2/rlc_v2/tests/test22.h
new file mode 100644
index 0000000000000000000000000000000000000000..6e2e8cd410acd6e122fb39047d6438a2e33dfe85
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test22.h
@@ -0,0 +1,25 @@
+/*
+ * am test: ask for retx with TX buffer too small
+ * then ask for status with buffer too small
+ */
+
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 100,
+    UE_RECV_FAILS, 1,
+TIME, 47,
+    ENB_PDU_SIZE, 4,
+    ENB_BUFFER_STATUS,
+    UE_BUFFER_STATUS,
+TIME, 48,
+    ENB_PDU_SIZE, 1000,
+    UE_PDU_SIZE, 1,
+    UE_BUFFER_STATUS,
+    UE_RECV_FAILS, 0,
+TIME, 49,
+    UE_BUFFER_STATUS,
+TIME, 50,
+    UE_PDU_SIZE, 1000,
+    UE_BUFFER_STATUS,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test22.txt.gz b/openair2/LAYER2/rlc_v2/tests/test22.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cdc7f51a162aae7cff631abeb0324a088ab48907
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test22.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test23.h b/openair2/LAYER2/rlc_v2/tests/test23.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ad2d25b7defac794d7cfe9c71e3c440c2dd1070
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test23.h
@@ -0,0 +1,9 @@
+/*
+ * am test: basic test with poll_byte == 1
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, 1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, 1, 4,
+    ENB_SDU, 0, 30,
+    ENB_PDU_SIZE, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test23.txt.gz b/openair2/LAYER2/rlc_v2/tests/test23.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3d66e6afa45fde0e6ebc6f9907c15b970f3a13d7
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test23.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test24.h b/openair2/LAYER2/rlc_v2/tests/test24.h
new file mode 100644
index 0000000000000000000000000000000000000000..2393f7a95a8249b0c33e93dc36ca65b5996a342d
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test24.h
@@ -0,0 +1,9 @@
+/*
+ * am test: basic test with poll_pdu == 2
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, 2, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, 2, -1, 4,
+    ENB_SDU, 0, 50,
+    ENB_PDU_SIZE, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test24.txt.gz b/openair2/LAYER2/rlc_v2/tests/test24.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6c457987dcf297d3beb6a93f27aeddbac5fd58af
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test24.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test25.h b/openair2/LAYER2/rlc_v2/tests/test25.h
new file mode 100644
index 0000000000000000000000000000000000000000..ddb584cdf64af9a5eb359512a4fd8927e2e235a3
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test25.h
@@ -0,0 +1,8 @@
+/*
+ * am test: reject SDU because not enough room in rx buffer
+ */
+TIME, 1,
+    ENB_AM, 10, 10, 35, 0, 45, -1, -1, 4,
+    UE_AM, 10, 10, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 50,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test25.txt.gz b/openair2/LAYER2/rlc_v2/tests/test25.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7ad895aaccc095103430cffdf324d3976077f0f2
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test25.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test26.h b/openair2/LAYER2/rlc_v2/tests/test26.h
new file mode 100644
index 0000000000000000000000000000000000000000..95d8367247a6a10ae5e880a8d34e0973d9936a11
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test26.h
@@ -0,0 +1,25 @@
+/*
+ * am test: test function check_t_poll_retransmit
+ *               case 'PDU with SN = VT(S)-1 not found?'
+ * eNB sends some PDUs, UE receives none
+ * then UE receives the first retransmitted PDU and nothing more
+ * until poll retransmit occurs again in the eNB to trigger the case
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+    UE_RECV_FAILS, 1,
+TIME, 2,
+    ENB_SDU, 1, 10,
+TIME, 3,
+    ENB_SDU, 2, 10,
+TIME, 4,
+    ENB_SDU, 3, 10,
+TIME, 50,
+    UE_RECV_FAILS, 0,
+TIME, 51,
+    UE_RECV_FAILS, 1,
+TIME, 100,
+    UE_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test26.txt.gz b/openair2/LAYER2/rlc_v2/tests/test26.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..85f1af55f691179defd6ab24bf8d4c0960d986dc
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test26.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test27.h b/openair2/LAYER2/rlc_v2/tests/test27.h
new file mode 100644
index 0000000000000000000000000000000000000000..224fd1218592c8cd5834ad7dccb736d295553e4e
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test27.h
@@ -0,0 +1,17 @@
+/*
+ * am test: test function check_t_poll_retransmit
+ *               case 'do we meet conditions of 36.322 5.2.2.3?'
+ * eNB sends one PDU, UE does not receive
+ * just before calling check_t_poll_retransmit, eNB receives a new SDU
+ * for the function 'check_poll_after_pdu_assembly' to fail
+ * then UE receives all what eNB sends
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+    UE_RECV_FAILS, 1,
+TIME, 47,
+    ENB_SDU, 1, 10,
+    UE_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test27.txt.gz b/openair2/LAYER2/rlc_v2/tests/test27.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..15fc41defe11b0d13f2b044a3e3b02eab4c133ad
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test27.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test28.h b/openair2/LAYER2/rlc_v2/tests/test28.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac768f36523f6afa52f7459146ec981c58aea2e1
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test28.h
@@ -0,0 +1,18 @@
+/*
+ * am test: test function check_t_reordering,
+ *               case 'update VR(MS) to first SN >= VR(X) for which not
+ *                     all PDU segments have been received'
+ * eNB sends 3 PDUs, first not received, two others received
+ * later on, everything is received
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+    UE_RECV_FAILS, 1,
+TIME, 2,
+    UE_RECV_FAILS, 0,
+    ENB_SDU, 1, 10,
+TIME, 3,
+    ENB_SDU, 2, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test28.txt.gz b/openair2/LAYER2/rlc_v2/tests/test28.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..760d1f2b84f0aa4849d987f157f258ab7d22b90b
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test28.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test29.h b/openair2/LAYER2/rlc_v2/tests/test29.h
new file mode 100644
index 0000000000000000000000000000000000000000..61bb183641d1251b26afa17055a6bc9b8fd611a3
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test29.h
@@ -0,0 +1,21 @@
+/*
+ * am test: test function check_t_reordering,
+ *               case 'VR(H) > VR(MS)'
+ * eNB sends 4 PDUs, only 1st and 3rd are received
+ * later on, everything is received
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+    UE_RECV_FAILS, 1,
+TIME, 2,
+    UE_RECV_FAILS, 0,
+    ENB_SDU, 1, 10,
+TIME, 3,
+    UE_RECV_FAILS, 1,
+    ENB_SDU, 2, 10,
+TIME, 4,
+    UE_RECV_FAILS, 0,
+    ENB_SDU, 3, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test29.txt.gz b/openair2/LAYER2/rlc_v2/tests/test29.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..265735edbceb54e2a54c0f2d7c171080262c95de
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test29.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test3.h b/openair2/LAYER2/rlc_v2/tests/test3.h
new file mode 100644
index 0000000000000000000000000000000000000000..5a469d82e24a872af68c8e11c83414797acebc87
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test3.h
@@ -0,0 +1,11 @@
+/*
+ * basic am test:
+ * at time 1, eNB receives an SDU of 16001 bytes
+ */
+
+TIME, 1,
+    MUST_FAIL,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 16001,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test3.txt.gz b/openair2/LAYER2/rlc_v2/tests/test3.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..30a96e22781c5f1d3b4711f48fc337cef23a574e
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test3.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test30.h b/openair2/LAYER2/rlc_v2/tests/test30.h
new file mode 100644
index 0000000000000000000000000000000000000000..feeee977fd371854098c482e867a4912f5bf3576
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test30.h
@@ -0,0 +1,16 @@
+/*
+ * am test: test function generate_status
+ *               enter the while loop 'go to highest full sn+1 for ACK'
+ * eNB sends several PDUs, only the last is received
+ * UE sends status PDU of a chosen size that let the code enter the while
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 70,
+    ENB_PDU_SIZE, 12,
+    UE_RECV_FAILS, 1,
+TIME, 7,
+    UE_RECV_FAILS, 0,
+    UE_PDU_SIZE, 12,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test30.txt.gz b/openair2/LAYER2/rlc_v2/tests/test30.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..eeb856c3414ce973ac95ab4e3258f6e9aacd3ff1
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test30.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test31.h b/openair2/LAYER2/rlc_v2/tests/test31.h
new file mode 100644
index 0000000000000000000000000000000000000000..a978c69b39a056233c332724f155a24912709015
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test31.h
@@ -0,0 +1,10 @@
+/*
+ * um test: several SDUs in a PDU (field length 5 bits)
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 20,
+    ENB_SDU, 2, 30,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test31.txt.gz b/openair2/LAYER2/rlc_v2/tests/test31.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2c5e6fcc3415544b1ca81a258e81eef6d190311b
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test31.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test32.h b/openair2/LAYER2/rlc_v2/tests/test32.h
new file mode 100644
index 0000000000000000000000000000000000000000..69d068cc836cd33d8541d76864d015bc9207ff99
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test32.h
@@ -0,0 +1,10 @@
+/*
+ * um test: several SDUs in a PDU (field length 10 bits)
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 10,
+    UE_UM, 100000, 100000, 35, 10,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 20,
+    ENB_SDU, 2, 30,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test32.txt.gz b/openair2/LAYER2/rlc_v2/tests/test32.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0b4633045337017eb15e520e995f9754478fa423
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test32.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test33.h b/openair2/LAYER2/rlc_v2/tests/test33.h
new file mode 100644
index 0000000000000000000000000000000000000000..6e907db577f80fe0f565a7d7ed86cef11b8c4638
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test33.h
@@ -0,0 +1,18 @@
+/*
+ * um test: test function rlc_um_reassemble_pdu, discard SDU
+ *               case '!(fi & 0x02'
+ * eNB sends 33 PDUs covering 1 SDU, only PDU 0 received (with SN=0 and FI=1)
+ * then eNB sends 1 PDU covering 1 SDU (so SN=1 and FI=0 for this one)
+ * received by UE
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    ENB_SDU, 0, 33,
+    ENB_PDU_SIZE, 2,
+TIME, 2,
+    UE_RECV_FAILS, 1,
+TIME, 34,
+    UE_RECV_FAILS, 0,
+    ENB_SDU, 1, 1,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test33.txt.gz b/openair2/LAYER2/rlc_v2/tests/test33.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..08cb366be415251d3c552da7b4f22615fa8f6138
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test33.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test34.h b/openair2/LAYER2/rlc_v2/tests/test34.h
new file mode 100644
index 0000000000000000000000000000000000000000..da119a6047fa5fc03e274b62cb330cf7ce21e925
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test34.h
@@ -0,0 +1,15 @@
+/*
+ * um test: trigger some cases in rlc_um_reception_actions
+ * eNB sends several PDUs, only the beginning PDUs and ending PDUs are
+ * received. Middle PDUs are not.
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    ENB_SDU, 0, 40,
+    ENB_PDU_SIZE, 2,
+TIME, 2,
+    UE_RECV_FAILS, 1,
+TIME, 8,
+    UE_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test34.txt.gz b/openair2/LAYER2/rlc_v2/tests/test34.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..aabbe570e56ea236be3853ee4f8f445dbde899fd
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test34.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test35.h b/openair2/LAYER2/rlc_v2/tests/test35.h
new file mode 100644
index 0000000000000000000000000000000000000000..35ccec1a42a4b0e7fbcf05a6d6742ffb44efa02f
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test35.h
@@ -0,0 +1,9 @@
+/*
+ * um: discard PDU because rx buffer full
+ * eNB sends a PDU too big
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 10, 10, 35, 5,
+    ENB_SDU, 0, 40,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test35.txt.gz b/openair2/LAYER2/rlc_v2/tests/test35.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6581c390c73f05a34696e1effc3a7194e5f97f3c
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test35.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test36.h b/openair2/LAYER2/rlc_v2/tests/test36.h
new file mode 100644
index 0000000000000000000000000000000000000000..0a49527a923350ae87bab862bcb8d094818c0f15
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test36.h
@@ -0,0 +1,14 @@
+/*
+ * um: discard according to 36.322 5.1.2.2.2
+ * eNB sends many PDUs. 1st is received, then not, then again.
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    ENB_SDU, 0, 33,
+    ENB_PDU_SIZE, 2,
+TIME, 2,
+    UE_RECV_FAILS, 1,
+TIME, 22,
+    UE_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test36.txt.gz b/openair2/LAYER2/rlc_v2/tests/test36.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6ad38454f9ba8cbe02dbb137842d91cab52bbcca
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test36.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test37.h b/openair2/LAYER2/rlc_v2/tests/test37.h
new file mode 100644
index 0000000000000000000000000000000000000000..b418e2c7151ac82ddae3c623b74feae3360e6502
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test37.h
@@ -0,0 +1,37 @@
+/*
+ * um: some wrong PDUs
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    /* LI == 0
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 5);    // SN
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 11);   // LI
+     */
+    ENB_PDU, 3, 0x20, 0x00, 0x00,
+    /* no data
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 5);    // SN
+     */
+    ENB_PDU, 1, 0x00,
+    /* LI == 2 >= data_size == 1
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 5);    // SN
+     *  rlc_pdu_encoder_put_bits(&e, 0, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 2, 11);   // LI
+     *  rlc_pdu_encoder_align(&e);
+     *  rlc_pdu_encoder_put_bits(&e, 0, 8);    // 1 byte of data
+     */
+    ENB_PDU, 4, 0x20, 0x00, 0x20, 0x00,
+    /* PDU with E == 1 but has size 1 byte only (truncated PDU)
+     *  rlc_pdu_encoder_put_bits(&e, 0, 2);    // FI
+     *  rlc_pdu_encoder_put_bits(&e, 1, 1);    // E
+     *  rlc_pdu_encoder_put_bits(&e, 0, 5);    // SN
+     */
+    ENB_PDU, 1, 0x20,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test37.txt.gz b/openair2/LAYER2/rlc_v2/tests/test37.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2a1a837bf0329b2859fa9e71ac9a4bc460c55075
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test37.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test38.h b/openair2/LAYER2/rlc_v2/tests/test38.h
new file mode 100644
index 0000000000000000000000000000000000000000..66a37207e0274ddf93abfd7064908ec4a4c4b32e
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test38.h
@@ -0,0 +1,22 @@
+/*
+ * um: test some cases of functions tx_pdu_size and rlc_entity_um_generate_pdu
+ * eNB has too much data to fit in one PDU
+ * then later eNB wants to send an SDU of size > 2047
+ * then later eNB sends several SDUs in one PDU
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    ENB_PDU_SIZE, 2050,
+    ENB_SDU, 0, 1500,
+    ENB_SDU, 1, 1500,
+    ENB_SDU, 2, 10,
+TIME, 10,
+    ENB_SDU, 3, 2048,
+    ENB_SDU, 4, 10,
+TIME, 20,
+    ENB_SDU, 5, 10,
+    ENB_SDU, 6, 10,
+    ENB_SDU, 7, 10,
+    ENB_SDU, 8, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test38.txt.gz b/openair2/LAYER2/rlc_v2/tests/test38.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..57e4ed270acc00bd861eeb824e24cc0a154c3a60
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test38.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test39.h b/openair2/LAYER2/rlc_v2/tests/test39.h
new file mode 100644
index 0000000000000000000000000000000000000000..8c926b3745ff69d70dd75f0d421e763c3451283a
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test39.h
@@ -0,0 +1,9 @@
+/*
+ * um: SDU too big
+ */
+TIME, 1,
+    MUST_FAIL,
+    ENB_UM, 10, 10, 35, 5,
+    UE_UM, 100, 100, 35, 5,
+    ENB_SDU, 0, 16001,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test39.txt.gz b/openair2/LAYER2/rlc_v2/tests/test39.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c4f6501d596f474dd77abbce265bd7ab4e7c9cd4
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test39.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test4.h b/openair2/LAYER2/rlc_v2/tests/test4.h
new file mode 100644
index 0000000000000000000000000000000000000000..8801096de117e51e9a0a07ccc6bd4c22114ef905
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test4.h
@@ -0,0 +1,13 @@
+/*
+ * basic um test: UE field length 5 bits
+ * at time 1, eNB receives an SDU of 10 bytes
+ * at time 10, UE receives an SDU of 5 bytes
+ */
+
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    ENB_SDU, 0, 10,
+TIME, 10,
+    UE_SDU, 0, 5,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test4.txt.gz b/openair2/LAYER2/rlc_v2/tests/test4.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..4339005cd60ae367d0f4bd3bf919253bcad82241
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test4.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test40.h b/openair2/LAYER2/rlc_v2/tests/test40.h
new file mode 100644
index 0000000000000000000000000000000000000000..478fe1af06536d88afe7c9c72abb4e91742119f7
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test40.h
@@ -0,0 +1,9 @@
+/*
+ * um: not enough room in SDU list
+ */
+TIME, 1,
+    ENB_UM, 10, 10, 35, 5,
+    UE_UM, 100, 100, 35, 5,
+    ENB_SDU, 0, 20,
+    ENB_BUFFER_STATUS,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test40.txt.gz b/openair2/LAYER2/rlc_v2/tests/test40.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..38d4b31cdaa43d1bac222a9d92ea9a615201cae6
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test40.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test41.h b/openair2/LAYER2/rlc_v2/tests/test41.h
new file mode 100644
index 0000000000000000000000000000000000000000..076d3e0d8c041f3310779967507157b91cea6eee
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test41.h
@@ -0,0 +1,45 @@
+/*
+ * um: test function check_t_reordering
+ * eNB sends PDUs, UE receives some and some not
+ */
+TIME, 1,
+    ENB_UM, 10000, 10000, 35, 5,
+    UE_UM, 10000, 10000, 35, 5,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 10,
+    ENB_SDU, 2, 10,
+    ENB_SDU, 3, 10,
+    ENB_SDU, 4, 10,
+    ENB_SDU, 5, 10,
+    ENB_SDU, 6, 10,
+    ENB_SDU, 7, 10,
+    ENB_SDU, 8, 10,
+    ENB_SDU, 9, 10,
+    ENB_SDU, 10, 10,
+    ENB_SDU, 11, 10,
+    ENB_SDU, 12, 10,
+    ENB_SDU, 13, 10,
+    ENB_SDU, 14, 10,
+    ENB_SDU, 15, 10,
+    ENB_SDU, 16, 10,
+    ENB_SDU, 17, 10,
+    ENB_SDU, 18, 10,
+    ENB_SDU, 19, 10,
+    ENB_SDU, 20, 10,
+    ENB_SDU, 21, 10,
+    ENB_SDU, 22, 10,
+    ENB_SDU, 23, 10,
+    ENB_SDU, 24, 10,
+    ENB_SDU, 25, 10,
+    ENB_PDU_SIZE, 40,
+TIME, 2,
+    UE_RECV_FAILS, 1,
+TIME, 3,
+    UE_RECV_FAILS, 0,
+TIME, 6,
+    UE_RECV_FAILS, 1,
+TIME, 7,
+    UE_RECV_FAILS, 0,
+TIME, 8,
+    UE_RECV_FAILS, 1,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test41.txt.gz b/openair2/LAYER2/rlc_v2/tests/test41.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8b799ac084ca15f8c1914c93d71d2c25d6271e7d
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test41.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test42.h b/openair2/LAYER2/rlc_v2/tests/test42.h
new file mode 100644
index 0000000000000000000000000000000000000000..66f27b9dac46468006efea07f7e691db53a45ee1
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test42.h
@@ -0,0 +1,39 @@
+/*
+ * am test: test rlc_entity_am_discard_sdu
+ * eNB and UE get some SDU, later on some are discarded
+ */
+
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 10,
+    ENB_SDU, 2, 10,
+    ENB_SDU, 3, 10,
+    ENB_PDU_SIZE, 23,
+TIME, 2,
+    ENB_DISCARD_SDU, 0,
+    ENB_DISCARD_SDU, 2,
+    ENB_DISCARD_SDU, 3,
+    ENB_DISCARD_SDU, 1,
+TIME, 10,
+    UE_SDU, 0, 5,
+    UE_SDU, 1, 5,
+    UE_SDU, 2, 5,
+    UE_SDU, 3, 5,
+    UE_SDU, 4, 5,
+    UE_SDU, 5, 5,
+    UE_PDU_SIZE, 13,
+TIME, 12,
+    UE_DISCARD_SDU, 3,
+    UE_DISCARD_SDU, 1,
+    UE_DISCARD_SDU, 0,
+    UE_DISCARD_SDU, 5,
+    UE_DISCARD_SDU, 4,
+    UE_DISCARD_SDU, 2,
+TIME, 30,
+    UE_SDU, 6, 5,
+    UE_DISCARD_SDU, 6,
+TIME, 31,
+    UE_SDU, 7, 8,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test42.txt.gz b/openair2/LAYER2/rlc_v2/tests/test42.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cf9f45c88268e0a986a41c335480dded4f33abbd
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test42.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test43.h b/openair2/LAYER2/rlc_v2/tests/test43.h
new file mode 100644
index 0000000000000000000000000000000000000000..e594437ae8869c8d4f08d975ccaf7ccf5591ee85
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test43.h
@@ -0,0 +1,39 @@
+/*
+ * um test: test rlc_entity_um_discard_sdu
+ * eNB and UE get some SDU, later on some are discarded
+ */
+
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 10,
+    UE_UM, 100000, 100000, 35, 10,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 10,
+    ENB_SDU, 2, 10,
+    ENB_SDU, 3, 10,
+    ENB_PDU_SIZE, 23,
+TIME, 2,
+    ENB_DISCARD_SDU, 0,
+    ENB_DISCARD_SDU, 2,
+    ENB_DISCARD_SDU, 3,
+    ENB_DISCARD_SDU, 1,
+TIME, 10,
+    UE_SDU, 0, 5,
+    UE_SDU, 1, 5,
+    UE_SDU, 2, 5,
+    UE_SDU, 3, 5,
+    UE_SDU, 4, 5,
+    UE_SDU, 5, 5,
+    UE_PDU_SIZE, 13,
+TIME, 12,
+    UE_DISCARD_SDU, 3,
+    UE_DISCARD_SDU, 1,
+    UE_DISCARD_SDU, 0,
+    UE_DISCARD_SDU, 5,
+    UE_DISCARD_SDU, 4,
+    UE_DISCARD_SDU, 2,
+TIME, 30,
+    UE_SDU, 6, 5,
+    UE_DISCARD_SDU, 6,
+TIME, 31,
+    UE_SDU, 7, 8,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test43.txt.gz b/openair2/LAYER2/rlc_v2/tests/test43.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3387b6530e11728fbd180554a216f20c2b4ef2f8
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test43.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test44.h b/openair2/LAYER2/rlc_v2/tests/test44.h
new file mode 100644
index 0000000000000000000000000000000000000000..cc9873ac34b40f7c030a7bf16ea68860bd3bf808
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test44.h
@@ -0,0 +1,20 @@
+/*
+ * am: test function rlc_entity_am_reestablishment
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    RE_ESTABLISH,
+TIME, 2,
+    ENB_SDU, 0, 10,
+    RE_ESTABLISH,
+TIME, 3,
+    ENB_SDU, 0, 40,
+    ENB_PDU_SIZE, 14,
+    UE_RECV_FAILS, 1,
+TIME, 4,
+    UE_RECV_FAILS, 0,
+TIME, 10,
+    RE_ESTABLISH,
+TIME, -1
+
diff --git a/openair2/LAYER2/rlc_v2/tests/test44.txt.gz b/openair2/LAYER2/rlc_v2/tests/test44.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..bdad9e3fbc5ce1eb162b82c6ea82b0c8cf6fef86
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test44.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test45.h b/openair2/LAYER2/rlc_v2/tests/test45.h
new file mode 100644
index 0000000000000000000000000000000000000000..c27fd8e2f0641bef9abda06882ba332a85bce506
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test45.h
@@ -0,0 +1,30 @@
+/*
+ * um: test function rlc_entity_am_reestablishment
+ *     and also the function clear_entity, case 'while (cur_rx != NULL)'
+ */
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 5,
+    UE_UM, 100000, 100000, 35, 5,
+    RE_ESTABLISH,
+TIME, 2,
+    ENB_SDU, 0, 10,
+    RE_ESTABLISH,
+TIME, 3,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 0, 10,
+    ENB_PDU_SIZE, 14,
+TIME, 5,
+    UE_RECV_FAILS, 1,
+TIME, 6,
+    UE_RECV_FAILS, 0,
+TIME, 10,
+    RE_ESTABLISH,
+TIME, 998,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 0, 10,
+    UE_RECV_FAILS, 1,
+TIME, 999,
+    UE_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test45.txt.gz b/openair2/LAYER2/rlc_v2/tests/test45.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c5e3e71d46e7f4a547b59fd5403557ac623e7d4f
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test45.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test5.h b/openair2/LAYER2/rlc_v2/tests/test5.h
new file mode 100644
index 0000000000000000000000000000000000000000..3224817c264296f8491a877f309ea62074064615
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test5.h
@@ -0,0 +1,13 @@
+/*
+ * basic um test: UE field length 10 bits
+ * at time 1, eNB receives an SDU of 10 bytes
+ * at time 10, UE receives an SDU of 5 bytes
+ */
+
+TIME, 1,
+    ENB_UM, 100000, 100000, 35, 10,
+    UE_UM, 100000, 100000, 35, 10,
+    ENB_SDU, 0, 10,
+TIME, 10,
+    UE_SDU, 0, 5,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test5.txt.gz b/openair2/LAYER2/rlc_v2/tests/test5.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..5a27d5260641878ac7e26cda1d77b8eeb7442154
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test5.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test6.h b/openair2/LAYER2/rlc_v2/tests/test6.h
new file mode 100644
index 0000000000000000000000000000000000000000..2115c8a328af4f490353e978be4e77961bf93035
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test6.h
@@ -0,0 +1,27 @@
+/*
+ * rlc am test function segment_already_received
+ *   eNB sends SDU [1..900], not received
+ *   eNB retx with smaller PDUs [1..600] [601..900]
+ *   [1..600] is received but ACK/NACK not
+ *   eNB retx with still smaller PDUs [1..400] [401..600] [601..900]
+ *   all is received, ACKs/NACKs go through
+ *
+ * this test will fail if NACK mechanism uses SOstart/SOend
+ * (not implemented for the moment)
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_SDU, 0, 900,
+TIME, 2,
+    ENB_PDU_SIZE, 600,
+    UE_RECV_FAILS, 0,
+TIME, 48,
+    UE_RECV_FAILS, 1,
+    ENB_PDU_SIZE, 400,
+TIME, 90,
+    UE_RECV_FAILS, 0,
+    ENB_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test6.txt.gz b/openair2/LAYER2/rlc_v2/tests/test6.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..54870821a619725938e1ea529ff533d748d9c7db
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test6.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test7.h b/openair2/LAYER2/rlc_v2/tests/test7.h
new file mode 100644
index 0000000000000000000000000000000000000000..081227a400dcfebf40bfc63b85cb244e17de1d81
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test7.h
@@ -0,0 +1,26 @@
+/*
+ * rlc am test function rlc_am_segment_full
+ *   eNB sends SDU [1..900], not received
+ *   eNB retx with smaller PDUs [1..600] [601..900]
+ *   nothing received
+ *   eNB retx with still smaller PDUs [1..400] [401..600] [601..900]
+ *   [401..600] received, ACK goes through
+ *   link clean, all goes through
+ *
+ * this test will fail if NACK mechanism uses SOstart/SOend
+ * (not implemented for the moment)
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_SDU, 0, 900,
+TIME, 2,
+    ENB_PDU_SIZE, 600,
+TIME, 48,
+    ENB_PDU_SIZE, 400,
+TIME, 95,
+    UE_RECV_FAILS, 0,
+    ENB_RECV_FAILS, 0,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test7.txt.gz b/openair2/LAYER2/rlc_v2/tests/test7.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9976a6050779805882bbefb2dab98fa27fd789bc
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test7.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test8.h b/openair2/LAYER2/rlc_v2/tests/test8.h
new file mode 100644
index 0000000000000000000000000000000000000000..aa7f5bed5be78d0979df6a12f245da7e8e38bdfb
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test8.h
@@ -0,0 +1,19 @@
+/*
+ * basic am test:
+ * at time 1, eNB receives 10 SDUs of 10 bytes
+ */
+
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    ENB_SDU, 0, 10,
+    ENB_SDU, 1, 10,
+    ENB_SDU, 2, 10,
+    ENB_SDU, 3, 10,
+    ENB_SDU, 4, 10,
+    ENB_SDU, 5, 10,
+    ENB_SDU, 6, 10,
+    ENB_SDU, 7, 10,
+    ENB_SDU, 8, 10,
+    ENB_SDU, 9, 10,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test8.txt.gz b/openair2/LAYER2/rlc_v2/tests/test8.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c8016878635a3f971d3c63298ac49ddefe6835b7
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test8.txt.gz differ
diff --git a/openair2/LAYER2/rlc_v2/tests/test9.h b/openair2/LAYER2/rlc_v2/tests/test9.h
new file mode 100644
index 0000000000000000000000000000000000000000..88e23d94e95891923a49a486445c119f4590b85f
--- /dev/null
+++ b/openair2/LAYER2/rlc_v2/tests/test9.h
@@ -0,0 +1,34 @@
+/*
+ * rlc am test function rlc_am_reassemble_next_segment
+ *        case 'if pdu_byte is not in [so .. so+len-1]'
+ *   eNB sends SDU [1..30], not received
+ *   eNB retx with smaller PDUs [1..21] [22..30], not received
+ *   eNB retx with still smaller PDUs [1..11] [12..21] [22..30], not received
+ *   custom PDU [12..21] sent to UE, received
+ *   custom PDU [1..21] sent to UE, received
+ *
+ * Not sure if in a real setup [12..21] is sent and then [1..21] is sent.
+ * In the current RLC implementation, this is impossible. If we send [12..21]
+ * it means [1..21] has been split and so we won't sent it later on.
+ * Maybe with HARQ retransmissions in PHY/MAC in bad radio conditions?
+ *
+ * this test will fail if NACK mechanism uses SOstart/SOend
+ * (not implemented for the moment)
+ */
+TIME, 1,
+    ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4,
+    UE_RECV_FAILS, 1,
+    ENB_RECV_FAILS, 1,
+    ENB_SDU, 0, 30,
+TIME, 2,
+    ENB_PDU_SIZE, 25,
+TIME, 48,
+    ENB_PDU_SIZE, 15,
+TIME, 100,
+    UE_RECV_FAILS, 0,
+    ENB_RECV_FAILS, 0,
+    ENB_PDU, 14, 0xd8, 0x00, 0x00, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+TIME, 101,
+    ENB_PDU, 25, 0xe8, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+TIME, -1
diff --git a/openair2/LAYER2/rlc_v2/tests/test9.txt.gz b/openair2/LAYER2/rlc_v2/tests/test9.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cc6d934e708e1ddad7286e7c3eec33cd23c91f94
Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test9.txt.gz differ