diff --git a/ci-scripts/constants.py b/ci-scripts/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..1e7e84fb32af9a694992f8b54f3f9bbb43278c66 --- /dev/null +++ b/ci-scripts/constants.py @@ -0,0 +1,71 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ +#--------------------------------------------------------------------- +# Python for CI of OAI-eNB + COTS-UE +# +# Required Python Version +# Python 3.x +# +# Required Python Package +# pexpect +#--------------------------------------------------------------------- + +#----------------------------------------------------------- +# Version +#----------------------------------------------------------- +Version = '0.2' + +#----------------------------------------------------------- +# Constants +#----------------------------------------------------------- +ALL_PROCESSES_OK = 0 +ENB_PROCESS_FAILED = -1 +ENB_PROCESS_OK = +1 +ENB_PROCESS_SEG_FAULT = -11 +ENB_PROCESS_ASSERTION = -12 +ENB_PROCESS_REALTIME_ISSUE = -13 +ENB_PROCESS_NOLOGFILE_TO_ANALYZE = -14 +ENB_PROCESS_SLAVE_RRU_NOT_SYNCED = -15 +HSS_PROCESS_FAILED = -2 +HSS_PROCESS_OK = +2 +MME_PROCESS_FAILED = -3 +MME_PROCESS_OK = +3 +SPGW_PROCESS_FAILED = -4 +SPGW_PROCESS_OK = +4 +UE_IP_ADDRESS_ISSUE = -5 +OAI_UE_PROCESS_NOLOGFILE_TO_ANALYZE = -20 +OAI_UE_PROCESS_COULD_NOT_SYNC = -21 +OAI_UE_PROCESS_ASSERTION = -22 +OAI_UE_PROCESS_FAILED = -23 +OAI_UE_PROCESS_NO_TUNNEL_INTERFACE = -24 +OAI_UE_PROCESS_SEG_FAULT = -25 +OAI_UE_PROCESS_OK = +6 + +UE_STATUS_DETACHED = 0 +UE_STATUS_DETACHING = 1 +UE_STATUS_ATTACHING = 2 +UE_STATUS_ATTACHED = 3 + +X2_HO_REQ_STATE__IDLE = 0 +X2_HO_REQ_STATE__TARGET_RECEIVES_REQ = 1 +X2_HO_REQ_STATE__TARGET_RRC_RECFG_COMPLETE = 2 +X2_HO_REQ_STATE__TARGET_SENDS_SWITCH_REQ = 3 +X2_HO_REQ_STATE__SOURCE_RECEIVES_REQ_ACK = 10 diff --git a/ci-scripts/epc.py b/ci-scripts/epc.py new file mode 100644 index 0000000000000000000000000000000000000000..09d500f203d185fc00fd0778721a1b5e542d4a38 --- /dev/null +++ b/ci-scripts/epc.py @@ -0,0 +1,499 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ +#--------------------------------------------------------------------- +# Python for CI of OAI-eNB + COTS-UE +# +# Required Python Version +# Python 3.x +# +# Required Python Package +# pexpect +#--------------------------------------------------------------------- + +#----------------------------------------------------------- +# Import +#----------------------------------------------------------- +import sys # arg +import re # reg +import logging +import os +import time +import signal + +from multiprocessing import Process, Lock, SimpleQueue + +#----------------------------------------------------------- +# OAI Testing modules +#----------------------------------------------------------- +import sshconnection as SSH +import helpreadme as HELP +import constants as CONST +import html + +#----------------------------------------------------------- +# Class Declaration +#----------------------------------------------------------- +class EPCManagement(): + + def __init__(self): + + self.IPAddress = '' + self.UserName = '' + self.Password = '' + self.SourceCodePath = '' + self.Type = '' + self.PcapFileName = '' + self.htmlObj = None + self.testCase_id = '' + self.MmeIPAddress = '' + self.containerPrefix = 'prod' + +#----------------------------------------------------------- +# Setter and Getters on Public Members +#----------------------------------------------------------- + + def SetIPAddress(self, ipaddress): + self.IPAddress = ipaddress + def GetIPAddress(self): + return self.IPAddress + def SetUserName(self, username): + self.UserName = username + def GetUserName(self): + return self.UserName + def SetPassword(self, password): + self.Password = password + def GetPassword(self): + return self.Password + def SetSourceCodePath(self, sourcecodepath): + self.SourceCodePath = sourcecodepath + def GetSourceCodePath(self): + return self.SourceCodePath + def SetType(self, kind): + self.Type = kind + def GetType(self): + return self.Type + def SetHtmlObj(self, obj): + self.htmlObj = obj + def SetTestCase_id(self, idx): + self.testCase_id = idx + def GetMmeIPAddress(self): + return self.MmeIPAddress + def SetContainerPrefix(self, prefix): + self.containerPrefix = prefix + +#----------------------------------------------------------- +# EPC management functions +#----------------------------------------------------------- + + def InitializeHSS(self): + if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '': + HELP.GenericHelp(CONST.Version) + HELP.EPCSrvHelp(self.IPAddress, self.UserName, self.Password, self.SourceCodePath, self.Type) + sys.exit('Insufficient EPC Parameters') + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + logging.debug('Using the OAI EPC Release 14 Cassandra-based HSS in Docker') + mySSH.command('if [ -d ' + self.SourceCodePath + '/scripts ]; then echo ' + self.Password + ' | sudo -S rm -Rf ' + self.SourceCodePath + '/scripts ; fi', '\$', 5) + mySSH.command('mkdir -p ' + self.SourceCodePath + '/scripts', '\$', 5) + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-hss /bin/bash -c "nohup tshark -i eth0 -i eth1 -w /tmp/hss_check_run.pcap 2>&1 > /dev/null"', '\$', 5) + time.sleep(5) + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-hss /bin/bash -c "nohup ./bin/oai_hss -j ./etc/hss_rel14.json --reloadkey true > hss_check_run.log 2>&1"', '\$', 5) + elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + logging.debug('Using the OAI EPC Release 14 Cassandra-based HSS') + mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5) + logging.debug('\u001B[1m Launching tshark on all interfaces \u001B[0m') + self.PcapFileName = 'epc_' + self.testCase_id + '.pcap' + mySSH.command('echo ' + self.Password + ' | sudo -S rm -f ' + self.PcapFileName, '\$', 5) + mySSH.command('echo $USER; nohup sudo tshark -f "tcp port not 22 and port not 53" -i any -w ' + self.SourceCodePath + '/scripts/' + self.PcapFileName + ' > /tmp/tshark.log 2>&1 &', self.UserName, 5) + mySSH.command('echo ' + self.Password + ' | sudo -S mkdir -p logs', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S rm -f hss_' + self.testCase_id + '.log logs/hss*.*', '\$', 5) + mySSH.command('echo "oai_hss -j /usr/local/etc/oai/hss_rel14.json" > ./my-hss.sh', '\$', 5) + mySSH.command('chmod 755 ./my-hss.sh', '\$', 5) + mySSH.command('sudo daemon --unsafe --name=hss_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/hss_' + self.testCase_id + '.log ./my-hss.sh', '\$', 5) + elif re.match('OAI', self.Type, re.IGNORECASE): + logging.debug('Using the OAI EPC HSS') + mySSH.command('cd ' + self.SourceCodePath, '\$', 5) + mySSH.command('source oaienv', '\$', 5) + mySSH.command('cd scripts', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S ./run_hss 2>&1 | stdbuf -o0 awk \'{ print strftime("[%Y/%m/%d %H:%M:%S] ",systime()) $0 }\' | stdbuf -o0 tee -a hss_' + self.testCase_id + '.log &', 'Core state: 2 -> 3', 35) + elif re.match('ltebox', self.Type, re.IGNORECASE): + logging.debug('Using the ltebox simulated HSS') + mySSH.command('if [ -d ' + self.SourceCodePath + '/scripts ]; then echo ' + self.Password + ' | sudo -S rm -Rf ' + self.SourceCodePath + '/scripts ; fi', '\$', 5) + mySSH.command('mkdir -p ' + self.SourceCodePath + '/scripts', '\$', 5) + mySSH.command('cd /opt/hss_sim0609', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S rm -f hss.log', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S echo "Starting sudo session" && sudo su -c "screen -dm -S simulated_hss ./starthss"', '\$', 5) + else: + logging.error('This option should not occur!') + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Type, 'OK', CONST.ALL_PROCESSES_OK) + + def InitializeMME(self): + if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '': + HELP.GenericHelp(CONST.Version) + HELP.EPCSrvHelp(self.IPAddress, self.UserName, self.Password, self.SourceCodePath, self.Type) + sys.exit('Insufficient EPC Parameters') + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + logging.debug('Using the OAI EPC Release 14 MME in Docker') + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-mme /bin/bash -c "nohup tshark -i eth0 -i lo:s10 -w /tmp/mme_check_run.pcap 2>&1 > /dev/null"', '\$', 5) + time.sleep(5) + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-mme /bin/bash -c "nohup ./bin/oai_mme -c ./etc/mme.conf > mme_check_run.log 2>&1"', '\$', 5) + elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + logging.debug('Using the OAI EPC Release 14 MME') + mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S rm -f mme_' + self.testCase_id + '.log', '\$', 5) + mySSH.command('echo "./run_mme --config-file /usr/local/etc/oai/mme.conf --set-virt-if" > ./my-mme.sh', '\$', 5) + mySSH.command('chmod 755 ./my-mme.sh', '\$', 5) + mySSH.command('sudo daemon --unsafe --name=mme_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/mme_' + self.testCase_id + '.log ./my-mme.sh', '\$', 5) + elif re.match('OAI', self.Type, re.IGNORECASE): + mySSH.command('cd ' + self.SourceCodePath, '\$', 5) + mySSH.command('source oaienv', '\$', 5) + mySSH.command('cd scripts', '\$', 5) + mySSH.command('stdbuf -o0 hostname', '\$', 5) + result = re.search('hostname\\\\r\\\\n(?P<host_name>[a-zA-Z0-9\-\_]+)\\\\r\\\\n', mySSH.getBefore()) + if result is None: + logging.debug('\u001B[1;37;41m Hostname Not Found! \u001B[0m') + sys.exit(1) + host_name = result.group('host_name') + mySSH.command('echo ' + self.Password + ' | sudo -S ./run_mme 2>&1 | stdbuf -o0 tee -a mme_' + self.testCase_id + '.log &', 'MME app initialization complete', 100) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cd /opt/ltebox/tools', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S ./start_mme', '\$', 5) + else: + logging.error('This option should not occur!') + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Type, 'OK', CONST.ALL_PROCESSES_OK) + + def SetMmeIPAddress(self): + # Not an error if we don't need an EPC + if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '': + return + if self.IPAddress == 'none': + return + # Only in case of Docker containers, MME IP address is not the EPC HOST IP address + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + mySSH.command('docker inspect --format="MME_IP_ADDR = {{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}" ' + self.containerPrefix + '-oai-mme', '\$', 5) + result = re.search('MME_IP_ADDR = (?P<mme_ip_addr>[0-9\.]+)', mySSH.getBefore()) + if result is not None: + self.MmeIPAddress = result.group('mme_ip_addr') + logging.debug('MME IP Address is ' + self.MmeIPAddress) + mySSH.close() + else: + self.MmeIPAddress = self.IPAddress + + def InitializeSPGW(self): + if self.IPAddress == '' or self.UserName == '' or self.Password == '' or self.SourceCodePath == '' or self.Type == '': + HELP.GenericHelp(CONST.Version) + HELP.EPCSrvHelp(self.IPAddress, self.UserName, self.Password, self.SourceCodePath, self.Type) + sys.exit('Insufficient EPC Parameters') + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + logging.debug('Using the OAI EPC Release 14 SPGW-CUPS in Docker') + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "nohup tshark -i eth0 -i lo:p5c -i lo:s5c -w /tmp/spgwc_check_run.pcap 2>&1 > /dev/null"', '\$', 5) + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "nohup tshark -i eth0 -w /tmp/spgwu_check_run.pcap 2>&1 > /dev/null"', '\$', 5) + time.sleep(5) + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "nohup ./bin/oai_spgwc -o -c ./etc/spgw_c.conf > spgwc_check_run.log 2>&1"', '\$', 5) + time.sleep(5) + mySSH.command('docker exec -d ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "nohup ./bin/oai_spgwu -o -c ./etc/spgw_u.conf > spgwu_check_run.log 2>&1"', '\$', 5) + elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + logging.debug('Using the OAI EPC Release 14 SPGW-CUPS') + mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S rm -f spgwc_' + self.testCase_id + '.log spgwu_' + self.testCase_id + '.log', '\$', 5) + mySSH.command('echo "spgwc -c /usr/local/etc/oai/spgw_c.conf" > ./my-spgwc.sh', '\$', 5) + mySSH.command('chmod 755 ./my-spgwc.sh', '\$', 5) + mySSH.command('sudo daemon --unsafe --name=spgwc_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/spgwc_' + self.testCase_id + '.log ./my-spgwc.sh', '\$', 5) + time.sleep(5) + mySSH.command('echo "spgwu -c /usr/local/etc/oai/spgw_u.conf" > ./my-spgwu.sh', '\$', 5) + mySSH.command('chmod 755 ./my-spgwu.sh', '\$', 5) + mySSH.command('sudo daemon --unsafe --name=spgwu_daemon --chdir=' + self.SourceCodePath + '/scripts -o ' + self.SourceCodePath + '/scripts/spgwu_' + self.testCase_id + '.log ./my-spgwu.sh', '\$', 5) + elif re.match('OAI', self.Type, re.IGNORECASE): + mySSH.command('cd ' + self.SourceCodePath, '\$', 5) + mySSH.command('source oaienv', '\$', 5) + mySSH.command('cd scripts', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S ./run_spgw 2>&1 | stdbuf -o0 tee -a spgw_' + self.testCase_id + '.log &', 'Initializing SPGW-APP task interface: DONE', 30) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cd /opt/ltebox/tools', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S ./start_xGw', '\$', 5) + else: + logging.error('This option should not occur!') + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Type, 'OK', CONST.ALL_PROCESSES_OK) + + def CheckHSSProcess(self, status_queue): + try: + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "ps aux | grep oai_hss"', '\$', 5) + else: + mySSH.command('stdbuf -o0 ps -aux | grep --color=never hss | grep -v grep', '\$', 5) + if re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + result = re.search('oai_hss -j', mySSH.getBefore()) + elif re.match('OAI', self.Type, re.IGNORECASE): + result = re.search('\/bin\/bash .\/run_', mySSH.getBefore()) + elif re.match('ltebox', self.Type, re.IGNORECASE): + result = re.search('hss_sim s6as diam_hss', mySSH.getBefore()) + else: + logging.error('This should not happen!') + if result is None: + logging.debug('\u001B[1;37;41m HSS Process Not Found! \u001B[0m') + status_queue.put(CONST.HSS_PROCESS_FAILED) + else: + status_queue.put(CONST.HSS_PROCESS_OK) + mySSH.close() + except: + os.kill(os.getppid(),signal.SIGUSR1) + + def CheckMMEProcess(self, status_queue): + try: + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "ps aux | grep oai_mme"', '\$', 5) + else: + mySSH.command('stdbuf -o0 ps -aux | grep --color=never mme | grep -v grep', '\$', 5) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + result = re.search('oai_mme -c ', mySSH.getBefore()) + elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + result = re.search('mme -c', mySSH.getBefore()) + elif re.match('OAI', self.Type, re.IGNORECASE): + result = re.search('\/bin\/bash .\/run_', mySSH.getBefore()) + elif re.match('ltebox', self.Type, re.IGNORECASE): + result = re.search('mme', mySSH.getBefore()) + else: + logging.error('This should not happen!') + if result is None: + logging.debug('\u001B[1;37;41m MME Process Not Found! \u001B[0m') + status_queue.put(CONST.MME_PROCESS_FAILED) + else: + status_queue.put(CONST.MME_PROCESS_OK) + mySSH.close() + except: + os.kill(os.getppid(),signal.SIGUSR1) + + def CheckSPGWProcess(self, status_queue): + try: + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "ps aux | grep oai_spgwc"', '\$', 5) + result = re.search('oai_spgwc -o -c ', mySSH.getBefore()) + if result is not None: + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "ps aux | grep oai_spgwu"', '\$', 5) + result = re.search('oai_spgwu -o -c ', mySSH.getBefore()) + elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('stdbuf -o0 ps -aux | grep --color=never spgw | grep -v grep', '\$', 5) + result = re.search('spgwu -c ', mySSH.getBefore()) + elif re.match('OAI', self.Type, re.IGNORECASE): + mySSH.command('stdbuf -o0 ps -aux | grep --color=never spgw | grep -v grep', '\$', 5) + result = re.search('\/bin\/bash .\/run_', mySSH.getBefore()) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('stdbuf -o0 ps -aux | grep --color=never xGw | grep -v grep', '\$', 5) + result = re.search('xGw', mySSH.getBefore()) + else: + logging.error('This should not happen!') + if result is None: + logging.debug('\u001B[1;37;41m SPGW Process Not Found! \u001B[0m') + status_queue.put(CONST.SPGW_PROCESS_FAILED) + else: + status_queue.put(CONST.SPGW_PROCESS_OK) + mySSH.close() + except: + os.kill(os.getppid(),signal.SIGUSR1) + + def TerminateHSS(self): + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "killall --signal SIGINT oai_hss tshark"', '\$', 5) + time.sleep(2) + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "ps aux | grep oai_hss"', '\$', 5) + result = re.search('oai_hss -j ', mySSH.getBefore()) + if result is not None: + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-hss /bin/bash -c "killall --signal SIGKILL oai_hss"', '\$', 5) + elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT oai_hss || true', '\$', 5) + time.sleep(2) + mySSH.command('stdbuf -o0 ps -aux | grep hss | grep -v grep', '\$', 5) + result = re.search('oai_hss -j', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL oai_hss || true', '\$', 5) + mySSH.command('rm -f ' + self.SourceCodePath + '/scripts/my-hss.sh', '\$', 5) + elif re.match('OAI', self.Type, re.IGNORECASE): + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT run_hss oai_hss || true', '\$', 5) + time.sleep(2) + mySSH.command('stdbuf -o0 ps -aux | grep hss | grep -v grep', '\$', 5) + result = re.search('\/bin\/bash .\/run_', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL run_hss oai_hss || true', '\$', 5) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cd ' + self.SourceCodePath, '\$', 5) + mySSH.command('cd scripts', '\$', 5) + time.sleep(1) + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL hss_sim', '\$', 5) + else: + logging.error('This should not happen!') + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK) + + def TerminateMME(self): + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "killall --signal SIGINT oai_mme tshark"', '\$', 5) + time.sleep(2) + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "ps aux | grep oai_mme"', '\$', 5) + result = re.search('oai_mme -c ', mySSH.getBefore()) + if result is not None: + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-mme /bin/bash -c "killall --signal SIGKILL oai_mme"', '\$', 5) + elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT run_mme mme || true', '\$', 5) + time.sleep(2) + mySSH.command('stdbuf -o0 ps -aux | grep mme | grep -v grep', '\$', 5) + result = re.search('mme -c', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL run_mme mme || true', '\$', 5) + mySSH.command('rm -f ' + self.SourceCodePath + '/scripts/my-mme.sh', '\$', 5) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cd /opt/ltebox/tools', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S ./stop_mme', '\$', 5) + else: + logging.error('This should not happen!') + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK) + + def TerminateSPGW(self): + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "killall --signal SIGINT oai_spgwc tshark"', '\$', 5) + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "killall --signal SIGINT oai_spgwu tshark"', '\$', 5) + time.sleep(2) + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "ps aux | grep oai_spgwc"', '\$', 5) + result = re.search('oai_spgwc -o -c ', mySSH.getBefore()) + if result is not None: + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwc /bin/bash -c "killall --signal SIGKILL oai_spgwc"', '\$', 5) + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "ps aux | grep oai_spgwu"', '\$', 5) + result = re.search('oai_spgwu -o -c ', mySSH.getBefore()) + if result is not None: + mySSH.command('docker exec -it ' + self.containerPrefix + '-oai-spgwu-tiny /bin/bash -c "killall --signal SIGKILL oai_spgwu"', '\$', 5) + elif re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT spgwc spgwu || true', '\$', 5) + time.sleep(2) + mySSH.command('stdbuf -o0 ps -aux | grep spgw | grep -v grep', '\$', 5) + result = re.search('spgwc -c |spgwu -c ', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL spgwc spgwu || true', '\$', 5) + mySSH.command('rm -f ' + self.SourceCodePath + '/scripts/my-spgw*.sh', '\$', 5) + mySSH.command('stdbuf -o0 ps -aux | grep tshark | grep -v grep', '\$', 5) + result = re.search('-w ', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT tshark || true', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S chmod 666 ' + self.SourceCodePath + '/scripts/*.pcap', '\$', 5) + elif re.match('OAI', self.Type, re.IGNORECASE): + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGINT run_spgw spgw || true', '\$', 5) + time.sleep(2) + mySSH.command('stdbuf -o0 ps -aux | grep spgw | grep -v grep', '\$', 5) + result = re.search('\/bin\/bash .\/run_', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + self.Password + ' | sudo -S killall --signal SIGKILL run_spgw spgw || true', '\$', 5) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cd /opt/ltebox/tools', '\$', 5) + mySSH.command('echo ' + self.Password + ' | sudo -S ./stop_xGw', '\$', 5) + else: + logging.error('This should not happen!') + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK) + + def LogCollectHSS(self): + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5) + mySSH.command('rm -f hss.log.zip', '\$', 5) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker cp ' + self.containerPrefix + '-oai-hss:/openair-hss/hss_check_run.log .', '\$', 60) + mySSH.command('docker cp ' + self.containerPrefix + '-oai-hss:/tmp/hss_check_run.pcap .', '\$', 60) + mySSH.command('zip hss.log.zip hss_check_run.*', '\$', 60) + elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('zip hss.log.zip hss*.log', '\$', 60) + mySSH.command('echo ' + self.Password + ' | sudo -S rm hss*.log', '\$', 5) + if re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('zip hss.log.zip logs/hss*.* *.pcap', '\$', 60) + mySSH.command('echo ' + self.Password + ' | sudo -S rm -f logs/hss*.* *.pcap', '\$', 5) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cp /opt/hss_sim0609/hss.log .', '\$', 60) + mySSH.command('zip hss.log.zip hss.log', '\$', 60) + else: + logging.error('This option should not occur!') + mySSH.close() + + def LogCollectMME(self): + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5) + mySSH.command('rm -f mme.log.zip', '\$', 5) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker cp ' + self.containerPrefix + '-oai-mme:/openair-mme/mme_check_run.log .', '\$', 60) + mySSH.command('docker cp ' + self.containerPrefix + '-oai-mme:/tmp/mme_check_run.pcap .', '\$', 60) + mySSH.command('zip mme.log.zip mme_check_run.*', '\$', 60) + elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('zip mme.log.zip mme*.log', '\$', 60) + mySSH.command('echo ' + self.Password + ' | sudo -S rm mme*.log', '\$', 5) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cp /opt/ltebox/var/log/*Log.0 .', '\$', 5) + mySSH.command('zip mme.log.zip mmeLog.0 s1apcLog.0 s1apsLog.0 s11cLog.0 libLog.0 s1apCodecLog.0', '\$', 60) + else: + logging.error('This option should not occur!') + mySSH.close() + + def LogCollectSPGW(self): + mySSH = SSH.SSHConnection() + mySSH.open(self.IPAddress, self.UserName, self.Password) + mySSH.command('cd ' + self.SourceCodePath + '/scripts', '\$', 5) + mySSH.command('rm -f spgw.log.zip', '\$', 5) + if re.match('OAI-Rel14-Docker', self.Type, re.IGNORECASE): + mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwc:/openair-spgwc/spgwc_check_run.log .', '\$', 60) + mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwu-tiny:/openair-spgwu-tiny/spgwu_check_run.log .', '\$', 60) + mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwc:/tmp/spgwc_check_run.pcap .', '\$', 60) + mySSH.command('docker cp ' + self.containerPrefix + '-oai-spgwu-tiny:/tmp/spgwu_check_run.pcap .', '\$', 60) + mySSH.command('zip spgw.log.zip spgw*_check_run.*', '\$', 60) + elif re.match('OAI', self.Type, re.IGNORECASE) or re.match('OAI-Rel14-CUPS', self.Type, re.IGNORECASE): + mySSH.command('zip spgw.log.zip spgw*.log', '\$', 60) + mySSH.command('echo ' + self.Password + ' | sudo -S rm spgw*.log', '\$', 5) + elif re.match('ltebox', self.Type, re.IGNORECASE): + mySSH.command('cp /opt/ltebox/var/log/xGwLog.0 .', '\$', 5) + mySSH.command('zip spgw.log.zip xGwLog.0', '\$', 60) + else: + logging.error('This option should not occur!') + mySSH.close() + diff --git a/ci-scripts/helpreadme.py b/ci-scripts/helpreadme.py new file mode 100644 index 0000000000000000000000000000000000000000..4b58c77510d72ed314e3e90c876d11abb55e49aa --- /dev/null +++ b/ci-scripts/helpreadme.py @@ -0,0 +1,81 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ +#--------------------------------------------------------------------- +# Python for CI of OAI-eNB + COTS-UE +# +# Required Python Version +# Python 3.x +# +# Required Python Package +# pexpect +#--------------------------------------------------------------------- + +#----------------------------------------------------------- +# Functions Declaration +#----------------------------------------------------------- + +def GenericHelp(vers): + print('----------------------------------------------------------------------------------------------------------------------') + print('main.py Ver: ' + vers) + print('----------------------------------------------------------------------------------------------------------------------') + print('python main.py [options]') + print(' --help Show this help.') + print(' --mode=[Mode]') + print(' TesteNB') + print(' InitiateHtml, FinalizeHtml') + print(' TerminateeNB, TerminateUE, TerminateHSS, TerminateMME, TerminateSPGW') + print(' LogCollectBuild, LogCollecteNB, LogCollectHSS, LogCollectMME, LogCollectSPGW, LogCollectPing, LogCollectIperf') + +def GitSrvHelp(repository,branch,commit,mergeallow,targetbranch): + print(' --ranRepository=[OAI RAN Repository URL] -- ' + repository) + print(' --ranBranch=[OAI RAN Repository Branch] -- ' + branch) + print(' --ranCommitID=[OAI RAN Repository Commit SHA-1] -- ' + commit) + print(' --ranAllowMerge=[Allow Merge Request (with target branch) (true or false)] -- ' + mergeallow) + print(' --ranTargetBranch=[Target Branch in case of a Merge Request] -- ' + targetbranch) + +def eNBSrvHelp(ipaddr, username, password, sourcepath): + print(' --eNBIPAddress=[eNB\'s IP Address] -- ' + ipaddr) + print(' --eNBUserName=[eNB\'s Login User Name] -- ' + username) + print(' --eNBPassword=[eNB\'s Login Password] -- ' + password) + print(' --eNBSourceCodePath=[eNB\'s Source Code Path] -- ' + sourcepath) + +def OAIUESrvHelp(ipaddr, username, password, sourcepath): + print(' --UEIPAddress=[UE\'s IP Address] -- ' + ipaddr) + print(' --UEUserName=[UE\'s Login User Name] -- ' + username) + print(' --UEPassword=[UE\'s Login Password] -- ' + password) + print(' --UESourceCodePath=[UE\'s Source Code Path] -- ' + sourcepath) + +def EPCSrvHelp(ipaddr, username, password, sourcepath, epctype): + print(' --EPCIPAddress=[EPC\'s IP Address] -- ' + ipaddr) + print(' --EPCUserName=[EPC\'s Login User Name] -- ' + username) + print(' --EPCPassword=[EPC\'s Login Password] -- ' + password) + print(' --EPCSourceCodePath=[EPC\'s Source Code Path] -- ' + sourcepath) + print(' --EPCType=[EPC\'s Type: OAI or ltebox or OAI-Rel14-CUPS] -- ' + epctype) + +def ADBSrvHelp(ipaddr, username, password): + print(' --ADBIPAddress=[ADB\'s IP Address] -- ' + ipaddr) + print(' --ADBUserName=[ADB\'s Login User Name] -- ' + username) + print(' --ADBPassword=[ADB\'s Login Password] -- ' + password) + +def XmlHelp(filename): + print(' --XMLTestFile=[XML Test File to be run] -- ' + filename) + print(' Note: multiple xml files can be specified (--XMLFile=File1 ... --XMLTestFile=FileN) when HTML headers are created ("InitiateHtml" mode)') + diff --git a/ci-scripts/html.py b/ci-scripts/html.py new file mode 100644 index 0000000000000000000000000000000000000000..1a6e3f8fdb45b9c5aacb5e9a98ec361a32c4ef47 --- /dev/null +++ b/ci-scripts/html.py @@ -0,0 +1,479 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ +#--------------------------------------------------------------------- +# Python for CI of OAI-eNB + COTS-UE +# +# Required Python Version +# Python 3.x +# +# Required Python Package +# pexpect +#--------------------------------------------------------------------- + +#----------------------------------------------------------- +# Import +#----------------------------------------------------------- +import sys # arg +import re # reg +import logging +import os +import time +import subprocess +from multiprocessing import Process, Lock, SimpleQueue + +import constants as CONST + +#----------------------------------------------------------- +# Class Declaration +#----------------------------------------------------------- +class HTMLManagement(): + + def __init__(self): + + self.htmlFile = '' + self.htmlHeaderCreated = False + self.htmlFooterCreated = False + + self.ranRepository = '' + self.ranBranch = '' + self.ranCommitID = '' + self.ranAllowMerge = False + self.ranTargetBranch = '' + + self.nbTestXMLfiles = 0 + self.htmlTabRefs = [] + self.htmlTabNames = [] + self.htmlTabIcons = [] + self.testXMLfiles = [] + + self.htmleNBFailureMsg = '' + self.htmlUEFailureMsg = '' + + self.startTime = int(round(time.time() * 1000)) + self.testCase_id = '' + self.desc = '' + + self.OsVersion = ['', ''] + self.KernelVersion = ['', ''] + self.UhdVersion = ['', ''] + self.UsrpBoard = ['', ''] + self.CpuNb = ['', ''] + self.CpuModel = ['', ''] + self.CpuMHz = ['', ''] + +#----------------------------------------------------------- +# Setters and Getters +#----------------------------------------------------------- + def SethtmlUEFailureMsg(self,huefa): + self.htmlUEFailureMsg = huefa + def GethtmlUEFailureMsg(self): + return self.htmlUEFailureMsg + def SetHmleNBFailureMsg(self, msg): + self.htmleNBFailureMsg = msg + + def Setdesc(self, dsc): + self.desc = dsc + + def SetstartTime(self, sttime): + self.startTime = sttime + + def SettestCase_id(self, tcid): + self.testCase_id = tcid + def GettestCase_id(self): + return self.testCase_id + + def SetranRepository(self, repository): + self.ranRepository = repository + def SetranAllowMerge(self, merge): + self.ranAllowMerge = merge + def SetranBranch(self, branch): + self.ranBranch = branch + def SetranCommitID(self, commitid): + self.ranCommitID = commitid + def SetranTargetBranch(self, tbranch): + self.ranTargetBranch = tbranch + + def SethtmlUEConnected(self, nbUEs): + if nbUEs > 0: + self.htmlUEConnected = nbUEs + else: + self.htmlUEConnected = 1 + def SethtmlNb_Smartphones(self, nbUEs): + self.htmlNb_Smartphones = nbUEs + def SethtmlNb_CATM_Modules(self, nbUEs): + self.htmlNb_CATM_Modules = nbUEs + + def SetnbTestXMLfiles(self, nb): + self.nbTestXMLfiles = nb + def GetnbTestXMLfiles(self): + return self.nbTestXMLfiles + + def SettestXMLfiles(self, xmlFile): + self.testXMLfiles.append(xmlFile) + def SethtmlTabRefs(self, tabRef): + self.htmlTabRefs.append(tabRef) + def SethtmlTabNames(self, tabName): + self.htmlTabNames.append(tabName) + def SethtmlTabIcons(self, tabIcon): + self.htmlTabIcons.append(tabIcon) + + def SetOsVersion(self, version, idx): + self.OsVersion[idx] = version + def SetKernelVersion(self, version, idx): + self.KernelVersion[idx] = version + def SetUhdVersion(self, version, idx): + self.UhdVersion[idx] = version + def SetUsrpBoard(self, version, idx): + self.UsrpBoard[idx] = version + def SetCpuNb(self, nb, idx): + self.CpuNb[idx] = nb + def SetCpuModel(self, model, idx): + self.CpuModel[idx] = model + def SetCpuMHz(self, freq, idx): + self.CpuMHz[idx] = freq + +#----------------------------------------------------------- +# HTML structure creation functions +#----------------------------------------------------------- + + + def CreateHtmlHeader(self, ADBIPAddress): + if (not self.htmlHeaderCreated): + logging.debug('\u001B[1m----------------------------------------\u001B[0m') + logging.debug('\u001B[1m Creating HTML header \u001B[0m') + logging.debug('\u001B[1m----------------------------------------\u001B[0m') + self.htmlFile = open('test_results.html', 'w') + self.htmlFile.write('<!DOCTYPE html>\n') + self.htmlFile.write('<html class="no-js" lang="en-US">\n') + self.htmlFile.write('<head>\n') + self.htmlFile.write(' <meta name="viewport" content="width=device-width, initial-scale=1">\n') + self.htmlFile.write(' <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">\n') + self.htmlFile.write(' <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>\n') + self.htmlFile.write(' <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script>\n') + self.htmlFile.write(' <title>Test Results for TEMPLATE_JOB_NAME job build #TEMPLATE_BUILD_ID</title>\n') + self.htmlFile.write('</head>\n') + self.htmlFile.write('<body><div class="container">\n') + self.htmlFile.write(' <br>\n') + self.htmlFile.write(' <table style="border-collapse: collapse; border: none;">\n') + self.htmlFile.write(' <tr style="border-collapse: collapse; border: none;">\n') + self.htmlFile.write(' <td style="border-collapse: collapse; border: none;">\n') + self.htmlFile.write(' <a href="http://www.openairinterface.org/">\n') + self.htmlFile.write(' <img src="http://www.openairinterface.org/wp-content/uploads/2016/03/cropped-oai_final_logo2.png" alt="" border="none" height=50 width=150>\n') + self.htmlFile.write(' </img>\n') + self.htmlFile.write(' </a>\n') + self.htmlFile.write(' </td>\n') + self.htmlFile.write(' <td style="border-collapse: collapse; border: none; vertical-align: center;">\n') + self.htmlFile.write(' <b><font size = "6">Job Summary -- Job: TEMPLATE_JOB_NAME -- Build-ID: TEMPLATE_BUILD_ID</font></b>\n') + self.htmlFile.write(' </td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' </table>\n') + self.htmlFile.write(' <br>\n') + self.htmlFile.write(' <div class="alert alert-info"><strong> <span class="glyphicon glyphicon-dashboard"></span> TEMPLATE_STAGE_NAME</strong></div>\n') + self.htmlFile.write(' <table border = "1">\n') + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-time"></span> Build Start Time (UTC) </td>\n') + self.htmlFile.write(' <td>TEMPLATE_BUILD_TIME</td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-cloud-upload"></span> GIT Repository </td>\n') + self.htmlFile.write(' <td><a href="' + self.ranRepository + '">' + self.ranRepository + '</a></td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-wrench"></span> Job Trigger </td>\n') + if (self.ranAllowMerge): + self.htmlFile.write(' <td>Merge-Request</td>\n') + else: + self.htmlFile.write(' <td>Push to Branch</td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' <tr>\n') + if (self.ranAllowMerge): + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-log-out"></span> Source Branch </td>\n') + else: + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-tree-deciduous"></span> Branch</td>\n') + self.htmlFile.write(' <td>' + self.ranBranch + '</td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' <tr>\n') + if (self.ranAllowMerge): + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-tag"></span> Source Commit ID </td>\n') + else: + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-tag"></span> Commit ID </td>\n') + self.htmlFile.write(' <td>' + self.ranCommitID + '</td>\n') + self.htmlFile.write(' </tr>\n') + if self.ranAllowMerge != '': + commit_message = subprocess.check_output("git log -n1 --pretty=format:\"%s\" " + self.ranCommitID, shell=True, universal_newlines=True) + commit_message = commit_message.strip() + self.htmlFile.write(' <tr>\n') + if (self.ranAllowMerge): + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-comment"></span> Source Commit Message </td>\n') + else: + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-comment"></span> Commit Message </td>\n') + self.htmlFile.write(' <td>' + commit_message + '</td>\n') + self.htmlFile.write(' </tr>\n') + if (self.ranAllowMerge): + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" > <span class="glyphicon glyphicon-log-in"></span> Target Branch </td>\n') + if (self.ranTargetBranch == ''): + self.htmlFile.write(' <td>develop</td>\n') + else: + self.htmlFile.write(' <td>' + self.ranTargetBranch + '</td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' </table>\n') + + if (ADBIPAddress != 'none'): + self.htmlFile.write(' <h2><span class="glyphicon glyphicon-phone"></span> <span class="glyphicon glyphicon-menu-right"></span> ' + str(self.htmlNb_Smartphones) + ' UE(s) is(are) connected to ADB bench server</h2>\n') + self.htmlFile.write(' <h2><span class="glyphicon glyphicon-phone"></span> <span class="glyphicon glyphicon-menu-right"></span> ' + str(self.htmlNb_CATM_Modules) + ' CAT-M UE(s) is(are) connected to bench server</h2>\n') + else: + self.htmlUEConnected = 1 + self.htmlFile.write(' <h2><span class="glyphicon glyphicon-phone"></span> <span class="glyphicon glyphicon-menu-right"></span> 1 OAI UE(s) is(are) connected to CI bench</h2>\n') + self.htmlFile.write(' <br>\n') + self.htmlFile.write(' <ul class="nav nav-pills">\n') + count = 0 + while (count < self.nbTestXMLfiles): + pillMsg = ' <li><a data-toggle="pill" href="#' + pillMsg += self.htmlTabRefs[count] + pillMsg += '">' + pillMsg += '__STATE_' + self.htmlTabNames[count] + '__' + pillMsg += self.htmlTabNames[count] + pillMsg += ' <span class="glyphicon glyphicon-' + pillMsg += self.htmlTabIcons[count] + pillMsg += '"></span></a></li>\n' + self.htmlFile.write(pillMsg) + count += 1 + self.htmlFile.write(' </ul>\n') + self.htmlFile.write(' <div class="tab-content">\n') + self.htmlFile.close() + + def CreateHtmlTabHeader(self): + if (not self.htmlHeaderCreated): + if (not os.path.isfile('test_results.html')): + self.CreateHtmlHeader('none') + self.htmlFile = open('test_results.html', 'a') + if (self.nbTestXMLfiles == 1): + self.htmlFile.write(' <div id="' + self.htmlTabRefs[0] + '" class="tab-pane fade">\n') + self.htmlFile.write(' <h3>Test Summary for <span class="glyphicon glyphicon-file"></span> ' + self.testXMLfiles[0] + '</h3>\n') + else: + self.htmlFile.write(' <div id="build-tab" class="tab-pane fade">\n') + self.htmlFile.write(' <table class="table" border = "1">\n') + self.htmlFile.write(' <tr bgcolor = "#33CCFF" >\n') + self.htmlFile.write(' <th>Relative Time (ms)</th>\n') + self.htmlFile.write(' <th>Test Id</th>\n') + self.htmlFile.write(' <th>Test Desc</th>\n') + self.htmlFile.write(' <th>Test Options</th>\n') + self.htmlFile.write(' <th>Test Status</th>\n') + + i = 0 + while (i < self.htmlUEConnected): + self.htmlFile.write(' <th>UE' + str(i) + ' Status</th>\n') + i += 1 + self.htmlFile.write(' </tr>\n') + self.htmlFile.close() + self.htmlHeaderCreated = True + + def CreateHtmlTabFooter(self, passStatus): + if ((not self.htmlFooterCreated) and (self.htmlHeaderCreated)): + self.htmlFile = open('test_results.html', 'a') + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <th bgcolor = "#33CCFF" colspan=3>Final Tab Status</th>\n') + if passStatus: + self.htmlFile.write(' <th bgcolor = "green" colspan=' + str(2 + self.htmlUEConnected) + '><font color="white">PASS <span class="glyphicon glyphicon-ok"></span> </font></th>\n') + else: + self.htmlFile.write(' <th bgcolor = "red" colspan=' + str(2 + self.htmlUEConnected) + '><font color="white">FAIL <span class="glyphicon glyphicon-remove"></span> </font></th>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' </table>\n') + self.htmlFile.write(' </div>\n') + self.htmlFile.close() + time.sleep(1) + if passStatus: + cmd = "sed -i -e 's/__STATE_" + self.htmlTabNames[0] + "__//' test_results.html" + subprocess.run(cmd, shell=True) + else: + cmd = "sed -i -e 's/__STATE_" + self.htmlTabNames[0] + "__/<span class=\"glyphicon glyphicon-remove\"><\/span>/' test_results.html" + subprocess.run(cmd, shell=True) + self.htmlFooterCreated = False + + def CreateHtmlFooter(self, passStatus): + if (os.path.isfile('test_results.html')): + self.htmlFile = open('test_results.html', 'a') + self.htmlFile.write('</div>\n') + self.htmlFile.write(' <p></p>\n') + self.htmlFile.write(' <table class="table table-condensed">\n') + + machines = [ 'eNB', 'UE' ] + for machine in machines: + if machine == 'eNB': + idx = 0 + else: + idx = 1 + if self.OsVersion[idx] == '': + continue + + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <th colspan=8>' + str('eNB') + ' Server Characteristics</th>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td>OS Version</td>\n') + self.htmlFile.write(' <td><span class="label label-default">' + self.OsVersion[idx] + '</span></td>\n') + self.htmlFile.write(' <td>Kernel Version</td>\n') + self.htmlFile.write(' <td><span class="label label-default">' + self.KernelVersion[idx] + '</span></td>\n') + self.htmlFile.write(' <td>UHD Version</td>\n') + self.htmlFile.write(' <td><span class="label label-default">' + self.UhdVersion[idx] + '</span></td>\n') + self.htmlFile.write(' <td>USRP Board</td>\n') + self.htmlFile.write(' <td><span class="label label-default">' + self.UsrpBoard[idx] + '</span></td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td>Nb CPUs</td>\n') + self.htmlFile.write(' <td><span class="label label-default">' + self.CpuNb[idx] + '</span></td>\n') + self.htmlFile.write(' <td>CPU Model Name</td>\n') + self.htmlFile.write(' <td><span class="label label-default">' + self.CpuModel[idx] + '</span></td>\n') + self.htmlFile.write(' <td>CPU Frequency</td>\n') + self.htmlFile.write(' <td><span class="label label-default">' + self.CpuMHz[idx] + '</span></td>\n') + self.htmlFile.write(' <td></td>\n') + self.htmlFile.write(' <td></td>\n') + self.htmlFile.write(' </tr>\n') + + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <th colspan=5 bgcolor = "#33CCFF">Final Status</th>\n') + if passStatus: + self.htmlFile.write(' <th colspan=3 bgcolor="green"><font color="white">PASS <span class="glyphicon glyphicon-ok"></span></font></th>\n') + else: + self.htmlFile.write(' <th colspan=3 bgcolor="red"><font color="white">FAIL <span class="glyphicon glyphicon-remove"></span> </font></th>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.write(' </table>\n') + self.htmlFile.write(' <p></p>\n') + self.htmlFile.write(' <div class="well well-lg">End of Test Report -- Copyright <span class="glyphicon glyphicon-copyright-mark"></span> 2018 <a href="http://www.openairinterface.org/">OpenAirInterface</a>. All Rights Reserved.</div>\n') + self.htmlFile.write('</div></body>\n') + self.htmlFile.write('</html>\n') + self.htmlFile.close() + + def CreateHtmlRetrySeparator(self, cntnumfails): + if ((not self.htmlFooterCreated) and (self.htmlHeaderCreated)): + self.htmlFile = open('test_results.html', 'a') + self.htmlFile.write(' <tr bgcolor = "#F0F0F0" >\n') + self.htmlFile.write(' <td colspan=' + str(5+self.htmlUEConnected) + '><b> ---- Try Run #' + str(cntnumfails) + ' ---- </b></td>\n') + self.htmlFile.write(' </tr>\n') + self.htmlFile.close() + + def CreateHtmlTestRow(self, options, status, processesStatus, machine='eNB'): + if (self.htmlFooterCreated or (not self.htmlHeaderCreated)): + return + self.htmlFile = open('test_results.html', 'a') + currentTime = int(round(time.time() * 1000)) - self.startTime + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" >' + format(currentTime / 1000, '.1f') + '</td>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" >' + self.testCase_id + '</td>\n') + self.htmlFile.write(' <td>' + self.desc + '</td>\n') + self.htmlFile.write(' <td>' + str(options) + '</td>\n') + if (str(status) == 'OK'): + self.htmlFile.write(' <td bgcolor = "lightgreen" >' + str(status) + '</td>\n') + elif (str(status) == 'KO'): + if (processesStatus == 0): + self.htmlFile.write(' <td bgcolor = "lightcoral" >' + str(status) + '</td>\n') + elif (processesStatus == CONST.ENB_PROCESS_FAILED): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - eNB process not found</td>\n') + elif (processesStatus == CONST.OAI_UE_PROCESS_FAILED): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - OAI UE process not found</td>\n') + elif (processesStatus == CONST.ENB_PROCESS_SEG_FAULT) or (processesStatus == CONST.OAI_UE_PROCESS_SEG_FAULT): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - ' + machine + ' process ended in Segmentation Fault</td>\n') + elif (processesStatus == CONST.ENB_PROCESS_ASSERTION) or (processesStatus == CONST.OAI_UE_PROCESS_ASSERTION): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - ' + machine + ' process ended in Assertion</td>\n') + elif (processesStatus == CONST.ENB_PROCESS_REALTIME_ISSUE): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - ' + machine + ' process faced Real Time issue(s)</td>\n') + elif (processesStatus == CONST.ENB_PROCESS_NOLOGFILE_TO_ANALYZE) or (processesStatus == CONST.OAI_UE_PROCESS_NOLOGFILE_TO_ANALYZE): + self.htmlFile.write(' <td bgcolor = "orange" >OK?</td>\n') + elif (processesStatus == CONST.ENB_PROCESS_SLAVE_RRU_NOT_SYNCED): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - ' + machine + ' Slave RRU could not synch</td>\n') + elif (processesStatus == CONST.OAI_UE_PROCESS_COULD_NOT_SYNC): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - UE could not sync</td>\n') + elif (processesStatus == CONST.HSS_PROCESS_FAILED): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - HSS process not found</td>\n') + elif (processesStatus == CONST.MME_PROCESS_FAILED): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - MME process not found</td>\n') + elif (processesStatus == CONST.SPGW_PROCESS_FAILED): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - SPGW process not found</td>\n') + elif (processesStatus == CONST.UE_IP_ADDRESS_ISSUE): + self.htmlFile.write(' <td bgcolor = "lightcoral" >KO - Could not retrieve UE IP address</td>\n') + else: + self.htmlFile.write(' <td bgcolor = "lightcoral" >' + str(status) + '</td>\n') + else: + self.htmlFile.write(' <td bgcolor = "orange" >' + str(status) + '</td>\n') + if (len(str(self.htmleNBFailureMsg)) > 2): + cellBgColor = 'white' + result = re.search('ended with|faced real time issues', self.htmleNBFailureMsg) + if result is not None: + cellBgColor = 'red' + else: + result = re.search('showed|Reestablishment|Could not copy eNB logfile', self.htmleNBFailureMsg) + if result is not None: + cellBgColor = 'orange' + self.htmlFile.write(' <td bgcolor = "' + cellBgColor + '" colspan=' + str(self.htmlUEConnected) + '><pre style="background-color:' + cellBgColor + '">' + self.htmleNBFailureMsg + '</pre></td>\n') + self.htmleNBFailureMsg = '' + elif (len(str(self.htmlUEFailureMsg)) > 2): + cellBgColor = 'white' + result = re.search('ended with|faced real time issues', self.htmlUEFailureMsg) + if result is not None: + cellBgColor = 'red' + else: + result = re.search('showed|Could not copy UE logfile|oaitun_ue1 interface is either NOT mounted or NOT configured', self.htmlUEFailureMsg) + if result is not None: + cellBgColor = 'orange' + self.htmlFile.write(' <td bgcolor = "' + cellBgColor + '" colspan=' + str(self.htmlUEConnected) + '><pre style="background-color:' + cellBgColor + '">' + self.htmlUEFailureMsg + '</pre></td>\n') + self.htmlUEFailureMsg = '' + else: + i = 0 + while (i < self.htmlUEConnected): + self.htmlFile.write(' <td>-</td>\n') + i += 1 + self.htmlFile.write(' </tr>\n') + self.htmlFile.close() + + def CreateHtmlTestRowQueue(self, options, status, ue_status, ue_queue): + if ((not self.htmlFooterCreated) and (self.htmlHeaderCreated)): + self.htmlFile = open('test_results.html', 'a') + currentTime = int(round(time.time() * 1000)) - self.startTime + addOrangeBK = False + self.htmlFile.write(' <tr>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" >' + format(currentTime / 1000, '.1f') + '</td>\n') + self.htmlFile.write(' <td bgcolor = "lightcyan" >' + self.testCase_id + '</td>\n') + self.htmlFile.write(' <td>' + self.desc + '</td>\n') + self.htmlFile.write(' <td>' + str(options) + '</td>\n') + if (str(status) == 'OK'): + self.htmlFile.write(' <td bgcolor = "lightgreen" >' + str(status) + '</td>\n') + elif (str(status) == 'KO'): + self.htmlFile.write(' <td bgcolor = "lightcoral" >' + str(status) + '</td>\n') + else: + addOrangeBK = True + self.htmlFile.write(' <td bgcolor = "orange" >' + str(status) + '</td>\n') + i = 0 + while (i < self.htmlUEConnected): + if (i < ue_status): + if (not ue_queue.empty()): + if (addOrangeBK): + self.htmlFile.write(' <td bgcolor = "orange" >' + str(ue_queue.get()).replace('white', 'orange') + '</td>\n') + else: + self.htmlFile.write(' <td>' + str(ue_queue.get()) + '</td>\n') + else: + self.htmlFile.write(' <td>-</td>\n') + else: + self.htmlFile.write(' <td>-</td>\n') + i += 1 + self.htmlFile.write(' </tr>\n') + self.htmlFile.close() + diff --git a/ci-scripts/ran.py b/ci-scripts/ran.py new file mode 100644 index 0000000000000000000000000000000000000000..5b5add0adb5b968f8197e8bc91a283d7c32f9a7a --- /dev/null +++ b/ci-scripts/ran.py @@ -0,0 +1,1059 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ +#--------------------------------------------------------------------- +# Python for CI of OAI-eNB + COTS-UE +# +# Required Python Version +# Python 3.x +# +# Required Python Package +# pexpect +#--------------------------------------------------------------------- + +#----------------------------------------------------------- +# Import +#----------------------------------------------------------- +import sys # arg +import re # reg +import logging +import os +import time +from multiprocessing import Process, Lock, SimpleQueue + +#----------------------------------------------------------- +# OAI Testing modules +#----------------------------------------------------------- +import sshconnection as SSH +import epc +import helpreadme as HELP +import constants as CONST +import html + +#----------------------------------------------------------- +# Class Declaration +#----------------------------------------------------------- +class RANManagement(): + + def __init__(self): + + self.prematureExit = False + self.ranRepository = '' + self.ranBranch = '' + self.ranAllowMerge = False + self.ranCommitID = '' + self.ranTargetBranch = '' + self.eNBIPAddress = '' + self.eNBUserName = '' + self.eNBPassword = '' + self.eNBSourceCodePath = '' + self.eNB1IPAddress = '' + self.eNB1UserName = '' + self.eNB1Password = '' + self.eNB1SourceCodePath = '' + self.eNB2IPAddress = '' + self.eNB2UserName = '' + self.eNB2Password = '' + self.eNB2SourceCodePath = '' + self.Build_eNB_args = '' + self.backgroundBuild = False + self.backgroundBuildTestId = ['', '', ''] + self.Build_eNB_forced_workspace_cleanup = False + self.Initialize_eNB_args = '' + self.air_interface = 'lte' + self.eNB_instance = '' + self.eNB_serverId = '' + self.eNBLogFiles = ['', '', ''] + self.eNBOptions = ['', '', ''] + self.eNBmbmsEnables = [False, False, False] + self.eNBstatuses = [-1, -1, -1] + self.flexranCtrlInstalled = False + self.flexranCtrlStarted = False + self.testCase_id = '' + self.epcPcapFile = '' + self.htmlObj = None + self.epcObj = None + +#----------------------------------------------------------- +# Setters and Getters on Public members +#----------------------------------------------------------- + + def SetHtmlObj(self, obj): + self.htmlObj = obj + def SetEpcObj(self, obj): + self.epcObj = obj + + def SetflexranCtrlInstalled(self,fxrctin): + self.flexranCtrlInstalled = fxrctin + def GetflexranCtrlInstalled(self): + return self.flexranCtrlInstalled + def SetflexranCtrlStarted(self,fxrctst): + self.flexranCtrlStarted = fxrctst + def GetflexranCtrlStarted(self): + return self.flexranCtrlStarted + def SetpStatus(self, pSt): + self.pStatus = pSt + def SetranRepository(self, repository): + self.ranRepository = repository + def GetranRepository(self): + return self.ranRepository + def SetranBranch(self, branch): + self.ranBranch = branch + def GetranBranch(self): + return self.ranBranch + def SetranCommitID(self, commitid): + self.ranCommitID = commitid + def GetranCommitID(self): + return self.ranCommitID + def SeteNB_serverId(self, enbsrvid): + self.eNB_serverId = enbsrvid + def GeteNB_serverId(self): + return self.eNB_serverId + def SeteNBIPAddress(self, enbip): + self.eNBIPAddress = enbip + def GeteNBIPAddress(self): + return self.eNBIPAddress + def SeteNBUserName(self, enbusr): + self.eNBUserName = enbusr + def GeteNBUserName(self): + return self.eNBUserName + def SeteNBPassword(self, enbpw): + self.eNBPassword = enbpw + def GeteNBPassword(self): + return self.eNBPassword + def SeteNBSourceCodePath(self, enbcodepath): + self.eNBSourceCodePath = enbcodepath + def GeteNBSourceCodePath(self): + return self.eNBSourceCodePath + def SetranAllowMerge(self, merge): + self.ranAllowMerge = merge + def GetranAllowMerge(self): + return self.ranAllowMerge + def SetranTargetBranch(self, tbranch): + self.ranTargetBranch = tbranch + def GetranTargetBranch(self): + return self.ranTargetBranch + def SetBuild_eNB_args(self, enbbuildarg): + self.Build_eNB_args = enbbuildarg + def GetBuild_eNB_args(self): + return self.Build_eNB_args + def SetInitialize_eNB_args(self, initenbarg): + self.Initialize_eNB_args = initenbarg + def GetInitialize_eNB_args(self): + return self.Initialize_eNB_args + def SetbackgroundBuild(self, bkbuild): + self.backgroundBuild = bkbuild + def GetbackgroundBuild(self): + return self.backgroundBuild + def SetbackgroundBuildTestId(self, bkbuildid): + self.backgroundBuildTestId = bkbuildid + def GetbackgroundBuildTestId(self): + return self.backgroundBuildTestId + def SetBuild_eNB_forced_workspace_cleanup(self, fcdwspclean): + self.Build_eNB_forced_workspace_cleanup = fcdwspclean + def GetBuild_eNB_forced_workspace_cleanup(self): + return self.Build_eNB_forced_workspace_cleanup + def Setair_interface(self, airif): + self.air_interface = airif + def Getair_interface(self): + return self.air_interface + def SeteNB_instance(self, enbinst): + self.eNB_instance = enbinst + def GeteNB_instance(self): + return self.eNB_instance + + def SeteNBLogFile(self, enblog, idx): + self.eNBLogFiles[idx] = enblog + def GeteNBLogFile(self, idx): + return self.eNBLogFiles[idx] + + def GeteNBmbmsEnable(self, idx): + return self.eNBmbmsEnables[idx] + + def SeteNB1IPAddress(self,enb1ip): + self.eNB1IPAddress = enb1ip + def GeteNB1IPAddress(self): + return self.eNB1IPAddress + def SeteNB1UserName(self, enb1usr): + self.eNB1UserName = enb1usr + def GeteNB1UserName(self): + return self.eNB1UserName + def SeteNB1Password(self, enb1pw): + self.eNB1Password = enb1pw + def GeteNB1Password(self): + return self.eNB1Password + def SeteNB1SourceCodePath(self, enb1codepath): + self.eNB1SourceCodePath = enb1codepath + def GeteNB1SourceCodePath(self): + return self.eNB1SourceCodePath + + def SeteNB2IPAddress(self, enb2ip): + self.eNB2IPAddress = enb2ip + def GeteNB2IPAddress(self): + return self.eNB2IPAddress + def SeteNB2UserName(self, enb2usr): + self.eNB2UserName = enb2usr + def GeteNB2UserName(self): + return self.eNB2UserName + def SeteNB2Password(self, enb2pw): + self.eNB2Password = enb2pw + def GeteNB2Password(self): + return self.eNB2Password + def SeteNB2SourceCodePath(self, enb2codepath): + self.eNB2SourceCodePath = enb2codepath + def GeteNB2SourceCodePath(self): + return self.eNB2SourceCodePath + + def SetprematureExit(self, premex): + self.prematureExit = premex + def GetprematureExit(self): + return self.prematureExit + +#----------------------------------------------------------- +# RAN management functions +#----------------------------------------------------------- + + def BuildeNB(self): + if self.ranRepository == '' or self.ranBranch == '' or self.ranCommitID == '': + HELP.GenericHelp(CONST.Version) + sys.exit('Insufficient Parameter') + if self.eNB_serverId == '0': + lIpAddr = self.eNBIPAddress + lUserName = self.eNBUserName + lPassWord = self.eNBPassword + lSourcePath = self.eNBSourceCodePath + elif self.eNB_serverId == '1': + lIpAddr = self.eNB1IPAddress + lUserName = self.eNB1UserName + lPassWord = self.eNB1Password + lSourcePath = self.eNB1SourceCodePath + elif self.eNB_serverId == '2': + lIpAddr = self.eNB2IPAddress + lUserName = self.eNB2UserName + lPassWord = self.eNB2Password + lSourcePath = self.eNB2SourceCodePath + if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '': + HELP.GenericHelp(CONST.Version) + sys.exit('Insufficient Parameter') + mySSH = SSH.SSHConnection() + mySSH.open(lIpAddr, lUserName, lPassWord) + # Check if we build an 5G-NR gNB or an LTE eNB + result = re.search('--gNB', self.Build_eNB_args) + if result is not None: + self.air_interface = 'nr' + else: + self.air_interface = 'lte' + # Worakround for some servers, we need to erase completely the workspace + if self.Build_eNB_forced_workspace_cleanup: + mySSH.command('echo ' + lPassWord + ' | sudo -S rm -Rf ' + lSourcePath, '\$', 15) + if self.htmlObj is not None: + self.testCase_id = self.htmlObj.GettestCase_id() + else: + self.testCase_id = '000000' + # on RedHat/CentOS .git extension is mandatory + result = re.search('([a-zA-Z0-9\:\-\.\/])+\.git', self.ranRepository) + if result is not None: + full_ran_repo_name = self.ranRepository + else: + full_ran_repo_name = self.ranRepository + '.git' + mySSH.command('mkdir -p ' + lSourcePath, '\$', 5) + mySSH.command('cd ' + lSourcePath, '\$', 5) + mySSH.command('if [ ! -e .git ]; then stdbuf -o0 git clone ' + full_ran_repo_name + ' .; else stdbuf -o0 git fetch --prune; fi', '\$', 600) + # Raphael: here add a check if git clone or git fetch went smoothly + mySSH.command('git config user.email "jenkins@openairinterface.org"', '\$', 5) + mySSH.command('git config user.name "OAI Jenkins"', '\$', 5) + # Checking the BUILD INFO file + if not self.backgroundBuild: + mySSH.command('ls *.txt', '\$', 5) + result = re.search('LAST_BUILD_INFO', mySSH.getBefore()) + if result is not None: + mismatch = False + mySSH.command('grep SRC_COMMIT LAST_BUILD_INFO.txt', '\$', 2) + result = re.search(self.ranCommitID, mySSH.getBefore()) + if result is None: + mismatch = True + mySSH.command('grep MERGED_W_TGT_BRANCH LAST_BUILD_INFO.txt', '\$', 2) + if (self.ranAllowMerge): + result = re.search('YES', mySSH.getBefore()) + if result is None: + mismatch = True + mySSH.command('grep TGT_BRANCH LAST_BUILD_INFO.txt', '\$', 2) + if self.ranTargetBranch == '': + result = re.search('develop', mySSH.getBefore()) + else: + result = re.search(self.ranTargetBranch, mySSH.getBefore()) + if result is None: + mismatch = True + else: + result = re.search('NO', mySSH.getBefore()) + if result is None: + mismatch = True + if not mismatch: + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'OK', CONST.ALL_PROCESSES_OK) + return + + mySSH.command('echo ' + lPassWord + ' | sudo -S git clean -x -d -ff', '\$', 30) + # if the commit ID is provided use it to point to it + if self.ranCommitID != '': + mySSH.command('git checkout -f ' + self.ranCommitID, '\$', 5) + # if the branch is not develop, then it is a merge request and we need to do + # the potential merge. Note that merge conflicts should already been checked earlier + if (self.ranAllowMerge): + if self.ranTargetBranch == '': + if (self.ranBranch != 'develop') and (self.ranBranch != 'origin/develop'): + mySSH.command('git merge --ff origin/develop -m "Temporary merge for CI"', '\$', 5) + else: + logging.debug('Merging with the target branch: ' + self.ranTargetBranch) + mySSH.command('git merge --ff origin/' + self.ranTargetBranch + ' -m "Temporary merge for CI"', '\$', 5) + mySSH.command('source oaienv', '\$', 5) + mySSH.command('cd cmake_targets', '\$', 5) + mySSH.command('mkdir -p log', '\$', 5) + mySSH.command('chmod 777 log', '\$', 5) + # no need to remove in log (git clean did the trick) + if self.backgroundBuild: + mySSH.command('echo "./build_oai ' + self.Build_eNB_args + '" > ./my-lte-softmodem-build.sh', '\$', 5) + mySSH.command('chmod 775 ./my-lte-softmodem-build.sh', '\$', 5) + mySSH.command('echo ' + lPassWord + ' | sudo -S -E daemon --inherit --unsafe --name=build_enb_daemon --chdir=' + lSourcePath + '/cmake_targets -o ' + lSourcePath + '/cmake_targets/compile_oai_enb.log ./my-lte-softmodem-build.sh', '\$', 5) + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'OK', CONST.ALL_PROCESSES_OK) + self.backgroundBuildTestId[int(self.eNB_instance)] = self.testCase_id + return + mySSH.command('stdbuf -o0 ./build_oai ' + self.Build_eNB_args + ' 2>&1 | stdbuf -o0 tee compile_oai_enb.log', 'Bypassing the Tests|build have failed', 1500) + mySSH.close() + self.checkBuildeNB(lIpAddr, lUserName, lPassWord, lSourcePath, self.testCase_id) + + def WaitBuildeNBisFinished(self): + if self.eNB_serverId == '0': + lIpAddr = self.eNBIPAddress + lUserName = self.eNBUserName + lPassWord = self.eNBPassword + lSourcePath = self.eNBSourceCodePath + elif self.eNB_serverId == '1': + lIpAddr = self.eNB1IPAddress + lUserName = self.eNB1UserName + lPassWord = self.eNB1Password + lSourcePath = self.eNB1SourceCodePath + elif self.eNB_serverId == '2': + lIpAddr = self.eNB2IPAddress + lUserName = self.eNB2UserName + lPassWord = self.eNB2Password + lSourcePath = self.eNB2SourceCodePath + if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '': + HELP.GenericHelp(CONST.Version) + sys.exit('Insufficient Parameter') + mySSH = SSH.SSHConnection() + mySSH.open(lIpAddr, lUserName, lPassWord) + count = 40 + buildOAIprocess = True + while (count > 0) and buildOAIprocess: + mySSH.command('ps aux | grep --color=never build_ | grep -v grep', '\$', 3) + result = re.search('build_oai', mySSH.getBefore()) + if result is None: + buildOAIprocess = False + else: + count -= 1 + time.sleep(30) + mySSH.close() + self.checkBuildeNB(lIpAddr, lUserName, lPassWord, lSourcePath, self.backgroundBuildTestId[int(self.eNB_instance)]) + + def checkBuildeNB(self, lIpAddr, lUserName, lPassWord, lSourcePath, testcaseId): + if self.htmlObj is not None: + self.htmlObj.SettestCase_id(testcaseId) + mySSH = SSH.SSHConnection() + mySSH.open(lIpAddr, lUserName, lPassWord) + mySSH.command('cd ' + lSourcePath + '/cmake_targets', '\$', 3) + mySSH.command('ls ran_build/build', '\$', 3) + mySSH.command('ls ran_build/build', '\$', 3) + if self.air_interface == 'nr': + nodeB_prefix = 'g' + else: + nodeB_prefix = 'e' + buildStatus = True + result = re.search(self.air_interface + '-softmodem', mySSH.getBefore()) + if result is None: + buildStatus = False + else: + # Generating a BUILD INFO file + mySSH.command('echo "SRC_BRANCH: ' + self.ranBranch + '" > ../LAST_BUILD_INFO.txt', '\$', 2) + mySSH.command('echo "SRC_COMMIT: ' + self.ranCommitID + '" >> ../LAST_BUILD_INFO.txt', '\$', 2) + if (self.ranAllowMerge): + mySSH.command('echo "MERGED_W_TGT_BRANCH: YES" >> ../LAST_BUILD_INFO.txt', '\$', 2) + if self.ranTargetBranch == '': + mySSH.command('echo "TGT_BRANCH: develop" >> ../LAST_BUILD_INFO.txt', '\$', 2) + else: + mySSH.command('echo "TGT_BRANCH: ' + self.ranTargetBranch + '" >> ../LAST_BUILD_INFO.txt', '\$', 2) + else: + mySSH.command('echo "MERGED_W_TGT_BRANCH: NO" >> ../LAST_BUILD_INFO.txt', '\$', 2) + mySSH.command('mkdir -p build_log_' + testcaseId, '\$', 5) + mySSH.command('mv log/* ' + 'build_log_' + testcaseId, '\$', 5) + mySSH.command('mv compile_oai_enb.log ' + 'build_log_' + testcaseId, '\$', 5) + if self.eNB_serverId != '0': + mySSH.command('cd cmake_targets', '\$', 5) + mySSH.command('if [ -e tmp_build' + testcaseId + '.zip ]; then rm -f tmp_build' + testcaseId + '.zip; fi', '\$', 5) + mySSH.command('zip -r -qq tmp_build' + testcaseId + '.zip build_log_' + testcaseId, '\$', 5) + mySSH.close() + if (os.path.isfile('./tmp_build' + testcaseId + '.zip')): + os.remove('./tmp_build' + testcaseId + '.zip') + mySSH.copyin(lIpAddr, lUserName, lPassWord, lSourcePath + '/cmake_targets/tmp_build' + testcaseId + '.zip', '.') + if (os.path.isfile('./tmp_build' + testcaseId + '.zip')): + mySSH.copyout(self.eNBIPAddress, self.eNBUserName, self.eNBPassword, './tmp_build' + testcaseId + '.zip', self.eNBSourceCodePath + '/cmake_targets/.') + os.remove('./tmp_build' + testcaseId + '.zip') + mySSH.open(self.eNBIPAddress, self.eNBUserName, self.eNBPassword) + mySSH.command('cd ' + self.eNBSourceCodePath + '/cmake_targets', '\$', 5) + mySSH.command('unzip -qq -DD tmp_build' + testcaseId + '.zip', '\$', 5) + mySSH.command('rm -f tmp_build' + testcaseId + '.zip', '\$', 5) + mySSH.close() + else: + mySSH.close() + + if buildStatus: + logging.info('\u001B[1m Building OAI ' + nodeB_prefix + 'NB Pass\u001B[0m') + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'OK', CONST.ALL_PROCESSES_OK) + else: + logging.error('\u001B[1m Building OAI ' + nodeB_prefix + 'NB Failed\u001B[0m') + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Build_eNB_args, 'KO', CONST.ALL_PROCESSES_OK) + self.htmlObj.CreateHtmlTabFooter(False) + sys.exit(1) + + def InitializeeNB(self): + if self.eNB_serverId == '0': + lIpAddr = self.eNBIPAddress + lUserName = self.eNBUserName + lPassWord = self.eNBPassword + lSourcePath = self.eNBSourceCodePath + elif self.eNB_serverId == '1': + lIpAddr = self.eNB1IPAddress + lUserName = self.eNB1UserName + lPassWord = self.eNB1Password + lSourcePath = self.eNB1SourceCodePath + elif self.eNB_serverId == '2': + lIpAddr = self.eNB2IPAddress + lUserName = self.eNB2UserName + lPassWord = self.eNB2Password + lSourcePath = self.eNB2SourceCodePath + if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '': + HELP.GenericHelp(CONST.Version) + sys.exit('Insufficient Parameter') + + if self.htmlObj is not None: + self.testCase_id = self.htmlObj.GettestCase_id() + else: + self.testCase_id = '000000' + mySSH = SSH.SSHConnection() + + if (self.pStatus < 0): + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow(self.Initialize_eNB_args, 'KO', self.pStatus) + self.htmlObj.CreateHtmlTabFooter(False) + sys.exit(1) + # If tracer options is on, running tshark on EPC side and capture traffic b/ EPC and eNB + result = re.search('T_stdout', str(self.Initialize_eNB_args)) + if (result is not None) and (self.epcObj is not None): + localEpcIpAddr = self.epcObj.GetIPAddress() + localEpcUserName = self.epcObj.GetUserName() + localEpcPassword = self.epcObj.GetPassword() + mySSH.open(localEpcIpAddr, localEpcUserName, localEpcPassword) + mySSH.command('ip addr show | awk -f /tmp/active_net_interfaces.awk | egrep -v "lo|tun"', '\$', 5) + result = re.search('interfaceToUse=(?P<eth_interface>[a-zA-Z0-9\-\_]+)done', mySSH.getBefore()) + if result is not None: + eth_interface = result.group('eth_interface') + logging.debug('\u001B[1m Launching tshark on interface ' + eth_interface + '\u001B[0m') + self.epcPcapFile = 'enb_' + self.testCase_id + '_s1log.pcap' + mySSH.command('echo ' + localEpcPassword + ' | sudo -S rm -f /tmp/' + self.epcPcapFile , '\$', 5) + mySSH.command('echo $USER; nohup sudo tshark -f "host ' + lIpAddr +'" -i ' + eth_interface + ' -w /tmp/' + self.epcPcapFile + ' > /tmp/tshark.log 2>&1 &', localEpcUserName, 5) + mySSH.close() + mySSH.open(lIpAddr, lUserName, lPassWord) + mySSH.command('cd ' + lSourcePath, '\$', 5) + # Initialize_eNB_args usually start with -O and followed by the location in repository + full_config_file = self.Initialize_eNB_args.replace('-O ','') + extra_options = '' + extIdx = full_config_file.find('.conf') + if (extIdx > 0): + extra_options = full_config_file[extIdx + 5:] + # if tracer options is on, compiling and running T Tracer + result = re.search('T_stdout', str(extra_options)) + if result is not None: + logging.debug('\u001B[1m Compiling and launching T Tracer\u001B[0m') + mySSH.command('cd common/utils/T/tracer', '\$', 5) + mySSH.command('make', '\$', 10) + mySSH.command('echo $USER; nohup ./record -d ../T_messages.txt -o ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '_record.raw -ON -off VCD -off HEAVY -off LEGACY_GROUP_TRACE -off LEGACY_GROUP_DEBUG > ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '_record.log 2>&1 &', lUserName, 5) + mySSH.command('cd ' + lSourcePath, '\$', 5) + full_config_file = full_config_file[:extIdx + 5] + config_path, config_file = os.path.split(full_config_file) + else: + sys.exit('Insufficient Parameter') + ci_full_config_file = config_path + '/ci-' + config_file + rruCheck = False + result = re.search('^rru|^rcc|^du.band', str(config_file)) + if result is not None: + rruCheck = True + # do not reset board twice in IF4.5 case + result = re.search('^rru|^enb|^du.band', str(config_file)) + if result is not None: + mySSH.command('echo ' + lPassWord + ' | sudo -S uhd_find_devices', '\$', 60) + result = re.search('type: b200', mySSH.getBefore()) + if result is not None: + logging.debug('Found a B2xx device --> resetting it') + mySSH.command('echo ' + lPassWord + ' | sudo -S b2xx_fx3_utils --reset-device', '\$', 10) + # Reloading FGPA bin firmware + mySSH.command('echo ' + lPassWord + ' | sudo -S uhd_find_devices', '\$', 60) + # Make a copy and adapt to EPC / eNB IP addresses + mySSH.command('cp ' + full_config_file + ' ' + ci_full_config_file, '\$', 5) + if self.epcObj is not None: + localMmeIpAddr = self.epcObj.GetMmeIPAddress() + mySSH.command('sed -i -e \'s/CI_MME_IP_ADDR/' + localMmeIpAddr + '/\' ' + ci_full_config_file, '\$', 2); + mySSH.command('sed -i -e \'s/CI_ENB_IP_ADDR/' + lIpAddr + '/\' ' + ci_full_config_file, '\$', 2); + mySSH.command('sed -i -e \'s/CI_RCC_IP_ADDR/' + self.eNBIPAddress + '/\' ' + ci_full_config_file, '\$', 2); + mySSH.command('sed -i -e \'s/CI_RRU1_IP_ADDR/' + self.eNB1IPAddress + '/\' ' + ci_full_config_file, '\$', 2); + mySSH.command('sed -i -e \'s/CI_RRU2_IP_ADDR/' + self.eNB2IPAddress + '/\' ' + ci_full_config_file, '\$', 2); + if self.flexranCtrlInstalled and self.flexranCtrlStarted: + mySSH.command('sed -i -e \'s/FLEXRAN_ENABLED.*;/FLEXRAN_ENABLED = "yes";/\' ' + ci_full_config_file, '\$', 2); + else: + mySSH.command('sed -i -e \'s/FLEXRAN_ENABLED.*;/FLEXRAN_ENABLED = "no";/\' ' + ci_full_config_file, '\$', 2); + self.eNBmbmsEnables[int(self.eNB_instance)] = False + mySSH.command('grep enable_enb_m2 ' + ci_full_config_file, '\$', 2); + result = re.search('yes', mySSH.getBefore()) + if result is not None: + self.eNBmbmsEnables[int(self.eNB_instance)] = True + logging.debug('\u001B[1m MBMS is enabled on this eNB\u001B[0m') + result = re.search('noS1', str(self.Initialize_eNB_args)) + eNBinNoS1 = False + if result is not None: + eNBinNoS1 = True + logging.debug('\u001B[1m eNB is in noS1 configuration \u001B[0m') + # Launch eNB with the modified config file + mySSH.command('source oaienv', '\$', 5) + mySSH.command('cd cmake_targets', '\$', 5) + mySSH.command('echo "ulimit -c unlimited && ./ran_build/build/' + self.air_interface + '-softmodem -O ' + lSourcePath + '/' + ci_full_config_file + extra_options + '" > ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5) + mySSH.command('chmod 775 ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5) + mySSH.command('echo ' + lPassWord + ' | sudo -S rm -Rf enb_' + self.testCase_id + '.log', '\$', 5) + mySSH.command('hostnamectl','\$', 5) + result = re.search('CentOS Linux 7', mySSH.getBefore()) + if result is not None: + mySSH.command('echo $USER; nohup sudo ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh > ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '.log 2>&1 &', lUserName, 10) + else: + mySSH.command('echo ' + lPassWord + ' | sudo -S -E daemon --inherit --unsafe --name=enb' + str(self.eNB_instance) + '_daemon --chdir=' + lSourcePath + '/cmake_targets -o ' + lSourcePath + '/cmake_targets/enb_' + self.testCase_id + '.log ./my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5) + self.eNBLogFiles[int(self.eNB_instance)] = 'enb_' + self.testCase_id + '.log' + if extra_options != '': + self.eNBOptions[int(self.eNB_instance)] = extra_options + time.sleep(6) + doLoop = True + loopCounter = 20 + enbDidSync = False + while (doLoop): + loopCounter = loopCounter - 1 + if (loopCounter == 0): + # In case of T tracer recording, we may need to kill it + result = re.search('T_stdout', str(self.Initialize_eNB_args)) + if result is not None: + mySSH.command('killall --signal SIGKILL record', '\$', 5) + mySSH.close() + doLoop = False + logging.error('\u001B[1;37;41m eNB logging system did not show got sync! \u001B[0m') + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('-O ' + config_file + extra_options, 'KO', CONST.ALL_PROCESSES_OK) + # In case of T tracer recording, we need to kill tshark on EPC side + result = re.search('T_stdout', str(self.Initialize_eNB_args)) + if (result is not None) and (self.epcObj is not None): + localEpcIpAddr = self.epcObj.GetIPAddress() + localEpcUserName = self.epcObj.GetUserName() + localEpcPassword = self.epcObj.GetPassword() + mySSH.open(localEpcIpAddr, localEpcUserName, localEpcPassword) + logging.debug('\u001B[1m Stopping tshark \u001B[0m') + mySSH.command('echo ' + localEpcPassword + ' | sudo -S killall --signal SIGKILL tshark', '\$', 5) + if self.epcPcapFile != '': + time.sleep(0.5) + mySSH.command('echo ' + localEpcPassword + ' | sudo -S chmod 666 /tmp/' + self.epcPcapFile, '\$', 5) + mySSH.close() + time.sleep(1) + if self.epcPcapFile != '': + copyin_res = mySSH.copyin(localEpcIpAddr, localEpcUserName, localEpcPassword, '/tmp/' + self.epcPcapFile, '.') + if (copyin_res == 0): + mySSH.copyout(lIpAddr, lUserName, lPassWord, self.epcPcapFile, lSourcePath + '/cmake_targets/.') + self.prematureExit = True + return + else: + mySSH.command('stdbuf -o0 cat enb_' + self.testCase_id + '.log | egrep --text --color=never -i "wait|sync|Starting"', '\$', 4) + if rruCheck: + result = re.search('wait RUs', mySSH.getBefore()) + else: + result = re.search('got sync|Starting F1AP at CU', mySSH.getBefore()) + if result is None: + time.sleep(6) + else: + doLoop = False + enbDidSync = True + time.sleep(10) + + if enbDidSync and eNBinNoS1: + mySSH.command('ifconfig oaitun_enb1', '\$', 4) + mySSH.command('ifconfig oaitun_enb1', '\$', 4) + result = re.search('inet addr:1|inet 1', mySSH.getBefore()) + if result is not None: + logging.debug('\u001B[1m oaitun_enb1 interface is mounted and configured\u001B[0m') + else: + logging.error('\u001B[1m oaitun_enb1 interface is either NOT mounted or NOT configured\u001B[0m') + if self.eNBmbmsEnables[int(self.eNB_instance)]: + mySSH.command('ifconfig oaitun_enm1', '\$', 4) + result = re.search('inet addr', mySSH.getBefore()) + if result is not None: + logging.debug('\u001B[1m oaitun_enm1 interface is mounted and configured\u001B[0m') + else: + logging.error('\u001B[1m oaitun_enm1 interface is either NOT mounted or NOT configured\u001B[0m') + if enbDidSync: + self.eNBstatuses[int(self.eNB_instance)] = int(self.eNB_serverId) + + mySSH.close() + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('-O ' + config_file + extra_options, 'OK', CONST.ALL_PROCESSES_OK) + logging.debug('\u001B[1m Initialize eNB Completed\u001B[0m') + + def CheckeNBProcess(self, status_queue): + try: + # At least the instance 0 SHALL be on! + if self.eNBstatuses[0] == 0: + lIpAddr = self.eNBIPAddress + lUserName = self.eNBUserName + lPassWord = self.eNBPassword + elif self.eNBstatuses[0] == 1: + lIpAddr = self.eNB1IPAddress + lUserName = self.eNB1UserName + lPassWord = self.eNB1Password + elif self.eNBstatuses[0] == 2: + lIpAddr = self.eNB2IPAddress + lUserName = self.eNB2UserName + lPassWord = self.eNB2Password + else: + lIpAddr = self.eNBIPAddress + lUserName = self.eNBUserName + lPassWord = self.eNBPassword + mySSH = SSH.SSHConnection() + mySSH.open(lIpAddr, lUserName, lPassWord) + mySSH.command('stdbuf -o0 ps -aux | grep --color=never ' + self.air_interface + '-softmodem | grep -v grep', '\$', 5) + result = re.search(self.air_interface + '-softmodem', mySSH.getBefore()) + if result is None: + logging.debug('\u001B[1;37;41m eNB Process Not Found! \u001B[0m') + status_queue.put(CONST.ENB_PROCESS_FAILED) + else: + status_queue.put(CONST.ENB_PROCESS_OK) + mySSH.close() + except: + os.kill(os.getppid(),signal.SIGUSR1) + + def TerminateeNB(self): + if self.eNB_serverId == '0': + lIpAddr = self.eNBIPAddress + lUserName = self.eNBUserName + lPassWord = self.eNBPassword + lSourcePath = self.eNBSourceCodePath + elif self.eNB_serverId == '1': + lIpAddr = self.eNB1IPAddress + lUserName = self.eNB1UserName + lPassWord = self.eNB1Password + lSourcePath = self.eNB1SourceCodePath + elif self.eNB_serverId == '2': + lIpAddr = self.eNB2IPAddress + lUserName = self.eNB2UserName + lPassWord = self.eNB2Password + lSourcePath = self.eNB2SourceCodePath + if lIpAddr == '' or lUserName == '' or lPassWord == '' or lSourcePath == '': + HELP.GenericHelp(CONST.Version) + sys.exit('Insufficient Parameter') + mySSH = SSH.SSHConnection() + mySSH.open(lIpAddr, lUserName, lPassWord) + mySSH.command('cd ' + lSourcePath + '/cmake_targets', '\$', 5) + if self.air_interface == 'lte': + nodeB_prefix = 'e' + else: + nodeB_prefix = 'g' + mySSH.command('stdbuf -o0 ps -aux | grep --color=never softmodem | grep -v grep', '\$', 5) + result = re.search('-softmodem', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + lPassWord + ' | sudo -S daemon --name=enb' + str(self.eNB_instance) + '_daemon --stop', '\$', 5) + mySSH.command('echo ' + lPassWord + ' | sudo -S killall --signal SIGINT -r .*-softmodem || true', '\$', 5) + time.sleep(10) + mySSH.command('stdbuf -o0 ps -aux | grep --color=never softmodem | grep -v grep', '\$', 5) + result = re.search('-softmodem', mySSH.getBefore()) + if result is not None: + mySSH.command('echo ' + lPassWord + ' | sudo -S killall --signal SIGKILL -r .*-softmodem || true', '\$', 5) + time.sleep(5) + mySSH.command('rm -f my-lte-softmodem-run' + str(self.eNB_instance) + '.sh', '\$', 5) + mySSH.close() + # If tracer options is on, stopping tshark on EPC side + result = re.search('T_stdout', str(self.Initialize_eNB_args)) + if (result is not None) and (self.epcObj is not None): + localEpcIpAddr = self.epcObj.GetIPAddress() + localEpcUserName = self.epcObj.GetUserName() + localEpcPassword = self.epcObj.GetPassword() + mySSH.open(localEpcIpAddr, localEpcUserName, localEpcPassword) + logging.debug('\u001B[1m Stopping tshark \u001B[0m') + mySSH.command('echo ' + localEpcPassword + ' | sudo -S killall --signal SIGKILL tshark', '\$', 5) + time.sleep(1) + if self.epcPcapFile != '': + mySSH.command('echo ' + localEpcPassword + ' | sudo -S chmod 666 /tmp/' + self.epcPcapFile, '\$', 5) + mySSH.copyin(localEpcIpAddr, localEpcUserName, localEpcPassword, '/tmp/' + self.epcPcapFile, '.') + mySSH.copyout(lIpAddr, lUserName, lPassWord, self.epcPcapFile, lSourcePath + '/cmake_targets/.') + mySSH.close() + logging.debug('\u001B[1m Replaying RAW record file\u001B[0m') + mySSH.open(lIpAddr, lUserName, lPassWord) + mySSH.command('cd ' + lSourcePath + '/common/utils/T/tracer/', '\$', 5) + enbLogFile = self.eNBLogFiles[int(self.eNB_instance)] + raw_record_file = enbLogFile.replace('.log', '_record.raw') + replay_log_file = enbLogFile.replace('.log', '_replay.log') + extracted_txt_file = enbLogFile.replace('.log', '_extracted_messages.txt') + extracted_log_file = enbLogFile.replace('.log', '_extracted_messages.log') + mySSH.command('./extract_config -i ' + lSourcePath + '/cmake_targets/' + raw_record_file + ' > ' + lSourcePath + '/cmake_targets/' + extracted_txt_file, '\$', 5) + mySSH.command('echo $USER; nohup ./replay -i ' + lSourcePath + '/cmake_targets/' + raw_record_file + ' > ' + lSourcePath + '/cmake_targets/' + replay_log_file + ' 2>&1 &', lUserName, 5) + mySSH.command('./textlog -d ' + lSourcePath + '/cmake_targets/' + extracted_txt_file + ' -no-gui -ON -full > ' + lSourcePath + '/cmake_targets/' + extracted_log_file, '\$', 5) + mySSH.close() + mySSH.copyin(lIpAddr, lUserName, lPassWord, lSourcePath + '/cmake_targets/' + extracted_log_file, '.') + logging.debug('\u001B[1m Analyzing eNB replay logfile \u001B[0m') + logStatus = self.AnalyzeLogFile_eNB(extracted_log_file) + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK) + self.eNBLogFiles[int(self.eNB_instance)] = '' + else: + analyzeFile = False + if self.eNBLogFiles[int(self.eNB_instance)] != '': + analyzeFile = True + fileToAnalyze = self.eNBLogFiles[int(self.eNB_instance)] + self.eNBLogFiles[int(self.eNB_instance)] = '' + if analyzeFile: + copyin_res = mySSH.copyin(lIpAddr, lUserName, lPassWord, lSourcePath + '/cmake_targets/' + fileToAnalyze, '.') + if (copyin_res == -1): + logging.debug('\u001B[1;37;41m Could not copy ' + nodeB_prefix + 'NB logfile to analyze it! \u001B[0m') + if self.htmlObj is not None: + self.htmlObj.SetHmleNBFailureMsg('Could not copy ' + nodeB_prefix + 'NB logfile to analyze it!') + self.htmlObj.CreateHtmlTestRow('N/A', 'KO', CONST.ENB_PROCESS_NOLOGFILE_TO_ANALYZE) + self.eNBmbmsEnables[int(self.eNB_instance)] = False + return + if self.eNB_serverId != '0': + mySSH.copyout(self.eNBIPAddress, self.eNBUserName, self.eNBPassword, './' + fileToAnalyze, self.eNBSourceCodePath + '/cmake_targets/') + logging.debug('\u001B[1m Analyzing ' + nodeB_prefix + 'NB logfile \u001B[0m ' + fileToAnalyze) + logStatus = self.AnalyzeLogFile_eNB(fileToAnalyze) + if (logStatus < 0): + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('N/A', 'KO', logStatus) + self.preamtureExit = True + self.eNBmbmsEnables[int(self.eNB_instance)] = False + return + else: + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK) + else: + if self.htmlObj is not None: + self.htmlObj.CreateHtmlTestRow('N/A', 'OK', CONST.ALL_PROCESSES_OK) + self.eNBmbmsEnables[int(self.eNB_instance)] = False + self.eNBstatuses[int(self.eNB_instance)] = -1 + + def LogCollecteNB(self): + mySSH = SSH.SSHConnection() + mySSH.open(self.eNBIPAddress, self.eNBUserName, self.eNBPassword) + mySSH.command('cd ' + self.eNBSourceCodePath, '\$', 5) + mySSH.command('cd cmake_targets', '\$', 5) + mySSH.command('echo ' + self.eNBPassword + ' | sudo -S rm -f enb.log.zip', '\$', 5) + mySSH.command('echo ' + self.eNBPassword + ' | sudo -S zip enb.log.zip enb*.log core* enb_*record.raw enb_*.pcap enb_*txt', '\$', 60) + mySSH.command('echo ' + self.eNBPassword + ' | sudo -S rm enb*.log core* enb_*record.raw enb_*.pcap enb_*txt', '\$', 5) + mySSH.close() + + def AnalyzeLogFile_eNB(self, eNBlogFile): + if (not os.path.isfile('./' + eNBlogFile)): + return -1 + enb_log_file = open('./' + eNBlogFile, 'r') + exitSignalReceived = False + foundAssertion = False + msgAssertion = '' + msgLine = 0 + foundSegFault = False + foundRealTimeIssue = False + rrcSetupComplete = 0 + rrcReleaseRequest = 0 + rrcReconfigRequest = 0 + rrcReconfigComplete = 0 + rrcReestablishRequest = 0 + rrcReestablishComplete = 0 + rrcReestablishReject = 0 + rlcDiscardBuffer = 0 + rachCanceledProcedure = 0 + uciStatMsgCount = 0 + pdcpFailure = 0 + ulschFailure = 0 + ulschReceiveOK = 0 + gnbRxTxWakeUpFailure = 0 + cdrxActivationMessageCount = 0 + dropNotEnoughRBs = 0 + mbmsRequestMsg = 0 + htmleNBFailureMsg = '' + isRRU = False + isSlave = False + slaveReceivesFrameResyncCmd = False + X2HO_state = CONST.X2_HO_REQ_STATE__IDLE + X2HO_inNbProcedures = 0 + X2HO_outNbProcedures = 0 + for line in enb_log_file.readlines(): + if X2HO_state == CONST.X2_HO_REQ_STATE__IDLE: + result = re.search('target eNB Receives X2 HO Req X2AP_HANDOVER_REQ', str(line)) + if result is not None: + X2HO_state = CONST.X2_HO_REQ_STATE__TARGET_RECEIVES_REQ + result = re.search('source eNB receives the X2 HO ACK X2AP_HANDOVER_REQ_ACK', str(line)) + if result is not None: + X2HO_state = CONST.X2_HO_REQ_STATE__SOURCE_RECEIVES_REQ_ACK + if X2HO_state == CONST.X2_HO_REQ_STATE__TARGET_RECEIVES_REQ: + result = re.search('Received LTE_RRCConnectionReconfigurationComplete from UE', str(line)) + if result is not None: + X2HO_state = CONST.X2_HO_REQ_STATE__TARGET_RRC_RECFG_COMPLETE + if X2HO_state == CONST.X2_HO_REQ_STATE__TARGET_RRC_RECFG_COMPLETE: + result = re.search('issue rrc_eNB_send_PATH_SWITCH_REQ', str(line)) + if result is not None: + X2HO_state = CONST.X2_HO_REQ_STATE__TARGET_SENDS_SWITCH_REQ + if X2HO_state == CONST.X2_HO_REQ_STATE__TARGET_SENDS_SWITCH_REQ: + result = re.search('received path switch ack S1AP_PATH_SWITCH_REQ_ACK', str(line)) + if result is not None: + X2HO_state = CONST.X2_HO_REQ_STATE__IDLE + X2HO_inNbProcedures += 1 + if X2HO_state == CONST.X2_HO_REQ_STATE__SOURCE_RECEIVES_REQ_ACK: + result = re.search('source eNB receives the X2 UE CONTEXT RELEASE X2AP_UE_CONTEXT_RELEASE', str(line)) + if result is not None: + X2HO_state = CONST.X2_HO_REQ_STATE__IDLE + X2HO_outNbProcedures += 1 + + if self.eNBOptions[int(self.eNB_instance)] != '': + res1 = re.search('max_rxgain (?P<requested_option>[0-9]+)', self.eNBOptions[int(self.eNB_instance)]) + res2 = re.search('max_rxgain (?P<applied_option>[0-9]+)', str(line)) + if res1 is not None and res2 is not None: + requested_option = int(res1.group('requested_option')) + applied_option = int(res2.group('applied_option')) + if requested_option == applied_option: + htmleNBFailureMsg += '<span class="glyphicon glyphicon-ok-circle"></span> Command line option(s) correctly applied <span class="glyphicon glyphicon-arrow-right"></span> ' + self.eNBOptions[int(self.eNB_instance)] + '\n\n' + else: + htmleNBFailureMsg += '<span class="glyphicon glyphicon-ban-circle"></span> Command line option(s) NOT applied <span class="glyphicon glyphicon-arrow-right"></span> ' + self.eNBOptions[int(self.eNB_instance)] + '\n\n' + result = re.search('Exiting OAI softmodem', str(line)) + if result is not None: + exitSignalReceived = True + result = re.search('[Ss]egmentation [Ff]ault', str(line)) + if result is not None and not exitSignalReceived: + foundSegFault = True + result = re.search('[Cc]ore [dD]ump', str(line)) + if result is not None and not exitSignalReceived: + foundSegFault = True + result = re.search('./ran_build/build/lte-softmodem', str(line)) + if result is not None and not exitSignalReceived: + foundSegFault = True + result = re.search('[Aa]ssertion', str(line)) + if result is not None and not exitSignalReceived: + foundAssertion = True + result = re.search('LLL', str(line)) + if result is not None and not exitSignalReceived: + foundRealTimeIssue = True + if foundAssertion and (msgLine < 3): + msgLine += 1 + msgAssertion += str(line) + result = re.search('Setting function for RU', str(line)) + if result is not None: + isRRU = True + if isRRU: + result = re.search('RU 0 is_slave=yes', str(line)) + if result is not None: + isSlave = True + if isSlave: + result = re.search('Received RRU_frame_resynch command', str(line)) + if result is not None: + slaveReceivesFrameResyncCmd = True + result = re.search('LTE_RRCConnectionSetupComplete from UE', str(line)) + if result is not None: + rrcSetupComplete += 1 + result = re.search('Generate LTE_RRCConnectionRelease|Generate RRCConnectionRelease', str(line)) + if result is not None: + rrcReleaseRequest += 1 + result = re.search('Generate LTE_RRCConnectionReconfiguration', str(line)) + if result is not None: + rrcReconfigRequest += 1 + result = re.search('LTE_RRCConnectionReconfigurationComplete from UE rnti', str(line)) + if result is not None: + rrcReconfigComplete += 1 + result = re.search('LTE_RRCConnectionReestablishmentRequest', str(line)) + if result is not None: + rrcReestablishRequest += 1 + result = re.search('LTE_RRCConnectionReestablishmentComplete', str(line)) + if result is not None: + rrcReestablishComplete += 1 + result = re.search('LTE_RRCConnectionReestablishmentReject', str(line)) + if result is not None: + rrcReestablishReject += 1 + result = re.search('CDRX configuration activated after RRC Connection', str(line)) + if result is not None: + cdrxActivationMessageCount += 1 + result = re.search('uci->stat', str(line)) + if result is not None: + uciStatMsgCount += 1 + result = re.search('PDCP.*Out of Resources.*reason', str(line)) + if result is not None: + pdcpFailure += 1 + result = re.search('could not wakeup gNB rxtx process', str(line)) + if result is not None: + gnbRxTxWakeUpFailure += 1 + result = re.search('ULSCH in error in round|ULSCH 0 in error', str(line)) + if result is not None: + ulschFailure += 1 + result = re.search('ULSCH received ok', str(line)) + if result is not None: + ulschReceiveOK += 1 + result = re.search('BAD all_segments_received', str(line)) + if result is not None: + rlcDiscardBuffer += 1 + result = re.search('Canceled RA procedure for UE rnti', str(line)) + if result is not None: + rachCanceledProcedure += 1 + result = re.search('dropping, not enough RBs', str(line)) + if result is not None: + dropNotEnoughRBs += 1 + if self.eNBmbmsEnables[int(self.eNB_instance)]: + result = re.search('MBMS USER-PLANE.*Requesting.*bytes from RLC', str(line)) + if result is not None: + mbmsRequestMsg += 1 + enb_log_file.close() + logging.debug(' File analysis completed') + if self.air_interface == 'lte': + nodeB_prefix = 'e' + else: + nodeB_prefix = 'g' + if self.air_interface == 'nr': + if ulschReceiveOK > 0: + statMsg = nodeB_prefix + 'NB showed ' + str(ulschReceiveOK) + ' "ULSCH received ok" message(s)' + logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m') + htmleNBFailureMsg += statMsg + '\n' + if gnbRxTxWakeUpFailure > 0: + statMsg = nodeB_prefix + 'NB showed ' + str(gnbRxTxWakeUpFailure) + ' "could not wakeup gNB rxtx process" message(s)' + logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m') + htmleNBFailureMsg += statMsg + '\n' + if uciStatMsgCount > 0: + statMsg = nodeB_prefix + 'NB showed ' + str(uciStatMsgCount) + ' "uci->stat" message(s)' + logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m') + htmleNBFailureMsg += statMsg + '\n' + if pdcpFailure > 0: + statMsg = nodeB_prefix + 'NB showed ' + str(pdcpFailure) + ' "PDCP Out of Resources" message(s)' + logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m') + htmleNBFailureMsg += statMsg + '\n' + if ulschFailure > 0: + statMsg = nodeB_prefix + 'NB showed ' + str(ulschFailure) + ' "ULSCH in error in round" message(s)' + logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m') + htmleNBFailureMsg += statMsg + '\n' + if dropNotEnoughRBs > 0: + statMsg = 'eNB showed ' + str(dropNotEnoughRBs) + ' "dropping, not enough RBs" message(s)' + logging.debug('\u001B[1;30;43m ' + statMsg + ' \u001B[0m') + htmleNBFailureMsg += statMsg + '\n' + if rrcSetupComplete > 0: + rrcMsg = nodeB_prefix + 'NB completed ' + str(rrcSetupComplete) + ' RRC Connection Setup(s)' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + rrcMsg = ' -- ' + str(rrcSetupComplete) + ' were completed' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if rrcReleaseRequest > 0: + rrcMsg = nodeB_prefix + 'NB requested ' + str(rrcReleaseRequest) + ' RRC Connection Release(s)' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if rrcReconfigRequest > 0 or rrcReconfigComplete > 0: + rrcMsg = nodeB_prefix + 'NB requested ' + str(rrcReconfigRequest) + ' RRC Connection Reconfiguration(s)' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + rrcMsg = ' -- ' + str(rrcReconfigComplete) + ' were completed' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if rrcReestablishRequest > 0 or rrcReestablishComplete > 0 or rrcReestablishReject > 0: + rrcMsg = nodeB_prefix + 'NB requested ' + str(rrcReestablishRequest) + ' RRC Connection Reestablishment(s)' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + rrcMsg = ' -- ' + str(rrcReestablishComplete) + ' were completed' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + rrcMsg = ' -- ' + str(rrcReestablishReject) + ' were rejected' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if self.eNBmbmsEnables[int(self.eNB_instance)]: + if mbmsRequestMsg > 0: + rrcMsg = 'eNB requested ' + str(mbmsRequestMsg) + ' times the RLC for MBMS USER-PLANE' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if X2HO_inNbProcedures > 0: + rrcMsg = 'eNB completed ' + str(X2HO_inNbProcedures) + ' X2 Handover Connection procedure(s)' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if X2HO_outNbProcedures > 0: + rrcMsg = 'eNB completed ' + str(X2HO_outNbProcedures) + ' X2 Handover Release procedure(s)' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if self.eNBOptions[int(self.eNB_instance)] != '': + res1 = re.search('drx_Config_present prSetup', self.eNBOptions[int(self.eNB_instance)]) + if res1 is not None: + if cdrxActivationMessageCount > 0: + rrcMsg = 'eNB activated the CDRX Configuration for ' + str(cdrxActivationMessageCount) + ' time(s)' + logging.debug('\u001B[1;30;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + else: + rrcMsg = 'eNB did NOT ACTIVATE the CDRX Configuration' + logging.debug('\u001B[1;37;43m ' + rrcMsg + ' \u001B[0m') + htmleNBFailureMsg += rrcMsg + '\n' + if rachCanceledProcedure > 0: + rachMsg = nodeB_prefix + 'NB cancelled ' + str(rachCanceledProcedure) + ' RA procedure(s)' + logging.debug('\u001B[1;30;43m ' + rachMsg + ' \u001B[0m') + htmleNBFailureMsg += rachMsg + '\n' + if isRRU: + if isSlave: + if slaveReceivesFrameResyncCmd: + rruMsg = 'Slave RRU received the RRU_frame_resynch command from RAU' + logging.debug('\u001B[1;30;43m ' + rruMsg + ' \u001B[0m') + htmleNBFailureMsg += rruMsg + '\n' + else: + rruMsg = 'Slave RRU DID NOT receive the RRU_frame_resynch command from RAU' + logging.debug('\u001B[1;37;41m ' + rruMsg + ' \u001B[0m') + htmleNBFailureMsg += rruMsg + '\n' + self.prematureExit(True) + return CONST.ENB_PROCESS_SLAVE_RRU_NOT_SYNCED + if foundSegFault: + logging.debug('\u001B[1;37;41m ' + nodeB_prefix + 'NB ended with a Segmentation Fault! \u001B[0m') + if self.htmlObj is not None: + self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg) + return CONST.ENB_PROCESS_SEG_FAULT + if foundAssertion: + logging.debug('\u001B[1;37;41m ' + nodeB_prefix + 'NB ended with an assertion! \u001B[0m') + htmleNBFailureMsg += msgAssertion + if self.htmlObj is not None: + self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg) + return CONST.ENB_PROCESS_ASSERTION + if foundRealTimeIssue: + logging.debug('\u001B[1;37;41m ' + nodeB_prefix + 'NB faced real time issues! \u001B[0m') + htmleNBFailureMsg += nodeB_prefix + 'NB faced real time issues!\n' + #return CONST.ENB_PROCESS_REALTIME_ISSUE + if rlcDiscardBuffer > 0: + rlcMsg = nodeB_prefix + 'NB RLC discarded ' + str(rlcDiscardBuffer) + ' buffer(s)' + logging.debug('\u001B[1;37;41m ' + rlcMsg + ' \u001B[0m') + htmleNBFailureMsg += rlcMsg + '\n' + if self.htmlObj is not None: + self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg) + return CONST.ENB_PROCESS_REALTIME_ISSUE + if self.htmlObj is not None: + self.htmlObj.SetHmleNBFailureMsg(htmleNBFailureMsg) + return 0 diff --git a/ci-scripts/sshconnection.py b/ci-scripts/sshconnection.py new file mode 100644 index 0000000000000000000000000000000000000000..ba0f900f2940482589e3e9711c942031af88cd9a --- /dev/null +++ b/ci-scripts/sshconnection.py @@ -0,0 +1,221 @@ +#/* +# * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The OpenAirInterface Software Alliance licenses this file to You under +# * the OAI Public License, Version 1.1 (the "License"); you may not use this file +# * except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.openairinterface.org/?page_id=698 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# *------------------------------------------------------------------------------- +# * For more information about the OpenAirInterface (OAI) Software Alliance: +# * contact@openairinterface.org +# */ +#--------------------------------------------------------------------- +# Python for CI of OAI-eNB + COTS-UE +# +# Required Python Version +# Python 3.x +# +# Required Python Package +# pexpect +#--------------------------------------------------------------------- + +#----------------------------------------------------------- +# Import +#----------------------------------------------------------- +import pexpect # pexpect +import logging +import time # sleep +import re +import sys + +#----------------------------------------------------------- +# Class Declaration +#----------------------------------------------------------- +class SSHConnection(): + def __init__(self): + self.ssh = '' + self.picocom_closure = False + + def disablePicocomClosure(self): + self.picocom_closure = False + + def enablePicocomClosure(self): + self.picocom_closure = True + + def open(self, ipaddress, username, password): + extraSshOptions = '' + count = 0 + connect_status = False + if ipaddress == '192.168.18.197': + extraSshOptions = ' -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' + while count < 4: + self.ssh = pexpect.spawn('ssh', [username + '@' + ipaddress + extraSshOptions], timeout = 5) + self.sshresponse = self.ssh.expect(['Are you sure you want to continue connecting (yes/no)?', 'password:', 'Last login', pexpect.EOF, pexpect.TIMEOUT]) + if self.sshresponse == 0: + self.ssh.sendline('yes') + self.sshresponse = self.ssh.expect(['password:', username + '@']) + if self.sshresponse == 0: + self.ssh.sendline(password) + self.sshresponse = self.ssh.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if self.sshresponse == 0: + count = 10 + connect_status = True + else: + logging.debug('self.sshresponse = ' + str(self.sshresponse)) + elif self.sshresponse == 1: + self.ssh.sendline(password) + self.sshresponse = self.ssh.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if self.sshresponse == 0: + count = 10 + connect_status = True + else: + logging.debug('self.sshresponse = ' + str(self.sshresponse)) + elif self.sshresponse == 2: + # Checking if we are really on the remote client defined by its IP address + self.command('stdbuf -o0 ifconfig | egrep --color=never "inet addr:|inet "', '\$', 5) + result = re.search(str(ipaddress), str(self.ssh.before)) + if result is None: + self.close() + else: + count = 10 + connect_status = True + else: + # debug output + logging.debug(str(self.ssh.before)) + logging.debug('self.sshresponse = ' + str(self.sshresponse)) + # adding a tempo when failure + if not connect_status: + time.sleep(1) + count += 1 + if connect_status: + pass + else: + sys.exit('SSH Connection Failed') + + def command(self, commandline, expectedline, timeout): + logging.debug(commandline) + self.ssh.timeout = timeout + self.ssh.sendline(commandline) + self.sshresponse = self.ssh.expect([expectedline, pexpect.EOF, pexpect.TIMEOUT]) + if self.sshresponse == 0: + return 0 + elif self.sshresponse == 1: + logging.debug('\u001B[1;37;41m Unexpected EOF \u001B[0m') + logging.debug('Expected Line : ' + expectedline) + logging.debug(str(self.ssh.before)) + sys.exit(self.sshresponse) + elif self.sshresponse == 2: + logging.debug('\u001B[1;37;41m Unexpected TIMEOUT \u001B[0m') + logging.debug('Expected Line : ' + expectedline) + result = re.search('ping |iperf |picocom', str(commandline)) + if result is None: + logging.debug(str(self.ssh.before)) + sys.exit(self.sshresponse) + else: + return -1 + else: + logging.debug('\u001B[1;37;41m Unexpected Others \u001B[0m') + logging.debug('Expected Line : ' + expectedline) + sys.exit(self.sshresponse) + + def close(self): + self.ssh.timeout = 5 + self.ssh.sendline('exit') + self.sshresponse = self.ssh.expect([pexpect.EOF, pexpect.TIMEOUT]) + if self.sshresponse == 0: + pass + elif self.sshresponse == 1: + if not self.picocom_closure: + logging.debug('\u001B[1;37;41m Unexpected TIMEOUT during closing\u001B[0m') + else: + logging.debug('\u001B[1;37;41m Unexpected Others during closing\u001B[0m') + + def copyin(self, ipaddress, username, password, source, destination): + count = 0 + copy_status = False + logging.debug('scp '+ username + '@' + ipaddress + ':' + source + ' ' + destination) + while count < 10: + scp_spawn = pexpect.spawn('scp '+ username + '@' + ipaddress + ':' + source + ' ' + destination, timeout = 100) + scp_response = scp_spawn.expect(['Are you sure you want to continue connecting (yes/no)?', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if scp_response == 0: + scp_spawn.sendline('yes') + scp_spawn.expect('password:') + scp_spawn.sendline(password) + scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if scp_response == 0: + count = 10 + copy_status = True + else: + logging.debug('1 - scp_response = ' + str(scp_response)) + elif scp_response == 1: + scp_spawn.sendline(password) + scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if scp_response == 0 or scp_response == 3: + count = 10 + copy_status = True + else: + logging.debug('2 - scp_response = ' + str(scp_response)) + elif scp_response == 2: + count = 10 + copy_status = True + else: + logging.debug('3 - scp_response = ' + str(scp_response)) + # adding a tempo when failure + if not copy_status: + time.sleep(1) + count += 1 + if copy_status: + return 0 + else: + return -1 + + def copyout(self, ipaddress, username, password, source, destination): + count = 0 + copy_status = False + logging.debug('scp ' + source + ' ' + username + '@' + ipaddress + ':' + destination) + while count < 4: + scp_spawn = pexpect.spawn('scp ' + source + ' ' + username + '@' + ipaddress + ':' + destination, timeout = 100) + scp_response = scp_spawn.expect(['Are you sure you want to continue connecting (yes/no)?', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if scp_response == 0: + scp_spawn.sendline('yes') + scp_spawn.expect('password:') + scp_spawn.sendline(password) + scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if scp_response == 0: + count = 10 + copy_status = True + else: + logging.debug('1 - scp_response = ' + str(scp_response)) + elif scp_response == 1: + scp_spawn.sendline(password) + scp_response = scp_spawn.expect(['\$', 'Permission denied', 'password:', pexpect.EOF, pexpect.TIMEOUT]) + if scp_response == 0 or scp_response == 3: + count = 10 + copy_status = True + else: + logging.debug('2 - scp_response = ' + str(scp_response)) + elif scp_response == 2: + count = 10 + copy_status = True + else: + logging.debug('3 - scp_response = ' + str(scp_response)) + # adding a tempo when failure + if not copy_status: + time.sleep(1) + count += 1 + if copy_status: + pass + else: + sys.exit('SCP failed') + + def getBefore(self): + return str(self.ssh.before) diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt index bb8f9024f95e0ef1b227361564bde8fa04a70c42..dcd063ff5c115617d9f8c98d85732b5d2835a86e 100644 --- a/cmake_targets/CMakeLists.txt +++ b/cmake_targets/CMakeLists.txt @@ -2825,6 +2825,7 @@ target_link_libraries(smallblocktest m pthread ${ATLAS_LIBRARIES} dl ) +<<<<<<< HEAD ################################################### # For CUDA library ################################################### @@ -2833,10 +2834,15 @@ CUDA_ADD_LIBRARY(LDPC_CU ) CUDA_ADD_CUFFT_TO_TARGET(LDPC_CU) cuda_add_executable(ldpctest +======= +add_executable(ldpctest + ${PHY_NR_CODINGIF} +>>>>>>> origin/develop ${OPENAIR1_DIR}/PHY/CODING/TESTBENCH/ldpctest.c ${T_SOURCE} ${SHLIB_LOADER_SOURCES} ) +<<<<<<< HEAD target_link_libraries(ldpctest -ldl -Wl,--start-group @@ -2851,6 +2857,8 @@ target_link_libraries(ldpctest -ldl # ${T_SOURCE} # ${SHLIB_LOADER_SOURCES} # ) +======= +>>>>>>> origin/develop add_dependencies( ldpctest ldpc_orig ldpc_optim ldpc_optim8seg ldpc ) target_link_libraries(ldpctest diff --git a/common/utils/T/tracer/hacks/pilot_timeplot.sh b/common/utils/T/tracer/hacks/pilot_timeplot.sh new file mode 100755 index 0000000000000000000000000000000000000000..0d9c4694a627e97bd4d9570de6b21627893971a4 --- /dev/null +++ b/common/utils/T/tracer/hacks/pilot_timeplot.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# use UP and DOWN arrow keys to scroll the view displayed by timeplot + +while read -n 1 key +do + case "$key" in + 'B' ) + kill -SIGUSR1 `ps aux|grep timeplot|grep -v grep|grep -v sh|tr -s ' ' :|cut -f 2 -d :` + ;; + 'A' ) + kill -SIGUSR2 `ps aux|grep timeplot|grep -v grep|grep -v sh|tr -s ' ' :|cut -f 2 -d :` + ;; + esac +done diff --git a/doc/L2NFAPI_NOS1.md b/doc/L2NFAPI_NOS1.md new file mode 100644 index 0000000000000000000000000000000000000000..bb4c32ec084bf2258657bfa0bb96d01253825807 --- /dev/null +++ b/doc/L2NFAPI_NOS1.md @@ -0,0 +1,349 @@ +<table style="border-collapse: collapse; border: none;"> + <tr style="border-collapse: collapse; border: none;"> + <td style="border-collapse: collapse; border: none;"> + <a href="http://www.openairinterface.org/"> + <img src="./images/oai_final_logo.png" alt="" border=3 height=50 width=150> + </img> + </a> + </td> + <td style="border-collapse: collapse; border: none; vertical-align: center;"> + <b><font size = "5">L2 nFAPI Simulator (no S1 Mode / 2-host deployment)</font></b> + </td> + </tr> +</table> + +## Table of Contents ## + +1. [Environment](#1-environment) +2. [Retrieve the OAI eNB-UE source code](#2-retrieve-the-oai-enb-ue-source-code) +3. [Setup of the USIM information in UE folder](#3-setup-of-the-usim-information-in-ue-folder) +4. [Setup of the Configuration files](#4-setup-of-the-configuration-files) + 1. [The eNB Configuration file](#41-the-enb-configuration-file) + 2. [The UE Configuration file](#42-the-ue-configuration-file) +5. [Build OAI UE and eNodeB](#5-build-oai-ue-and-enodeb) +6. [Start the eNB](#6-start-the-enb) +7. [Start the UE](#7-start-the-ue) +8. [Test with ping](#8-test-with-ping) +9. [Limitations](#9-limitations) + +# 1. Environment # + +You may not have access to an EPC or you don't want to hassle to deploy one. + +2 servers are used in this deployment. You can use Virtual Machines instead of each server; like it is done in the CI process. + +* Machine B contains the OAI eNB executable (`lte-softmodem`) +* Machine C contains the OAI UE(s) executable (`lte-uesoftmodem`) + +Example of L2 nFAPI Simulator testing environment: + +<img src="./images/L2-sim-noS1-2-host-deployment.png" alt="" border=3> + +Note that the IP addresses are indicative and need to be adapted to your environment. + +# 2. Retrieve the OAI eNB-UE source code # + +At the time of writing, the tag used in the `develop` branch to do this documentation was `2020.w16`. + +The tutorial should be valid for the `master` branch tags such as `v1.2.0` or `v1.2.1`. But you may face issues that could be fixed in newer `develop` tags. + +Please try to use the same commit ID on both eNB/UE hosts. + +```bash +$ ssh sudousername@machineB +git clone https://gitlab.eurecom.fr/oai/openairinterface5g.git enb_folder +cd enb_folder +git checkout develop +``` + +```bash +$ ssh sudousername@machineC +git clone https://gitlab.eurecom.fr/oai/openairinterface5g.git ue_folder +cd ue_folder +git checkout develop +``` + +# 3. Setup of the USIM information in UE folder # + +```bash +$ ssh sudousername@machineC +cd ue_folder +# Edit openair3/NAS/TOOLS/ue_eurecom_test_sfr.conf with your preferred editor +``` + +Edit the USIM information within this file in order to match the HSS database. They **HAVE TO** match: + +* PLMN+MSIN and IMSI of users table of HSS database **SHALL** be the same. +* OPC of this file and OPC of users table of HSS database **SHALL** be the same. +* USIM_API_K of this file and the key of users table of HSS database **SHALL** be the same. + +When testing multiple UEs, it is necessary to add other UEs information like described below for 2 Users. Only UE0 (first UE) information is written in the original file. + +``` +UE0: +{ + USER: { + IMEI="356113022094149"; + MANUFACTURER="EURECOM"; + MODEL="LTE Android PC"; + PIN="0000"; + }; + + SIM: { + MSIN="0000000001"; // <-- Modify here + USIM_API_K="8baf473f2f8fd09487cccbd7097c6862"; + OPC="e734f8734007d6c5ce7a0508809e7e9c"; + MSISDN="33611123456"; + }; +... +}; +// Copy the UE0 and edit +UE1: // <- Edit here +{ + USER: { + IMEI="356113022094149"; + MANUFACTURER="EURECOM"; + MODEL="LTE Android PC"; + PIN="0000"; + }; + + SIM: { + MSIN="0000000002"; // <-- Modify here + USIM_API_K="8baf473f2f8fd09487cccbd7097c6862"; + OPC="e734f8734007d6c5ce7a0508809e7e9c"; + MSISDN="33611123456"; + }; +... +}; +``` + +You can repeat the operation for as many users you want to test with. + +# 4. Setup of the Configuration files # + +**CAUTION: both proposed configuration files resides in the ci-scripts realm. You can copy them but you CANNOT push any modification on these 2 files as part of an MR without informing the CI team.** + +## 4.1. The eNB Configuration file ## + +```bash +$ ssh sudousername@machineB +cd enb_folder +# Edit ci-scripts/conf_files/rcc.band7.tm1.nfapi.conf with your preferred editor +``` + +First verify the nFAPI interface setup on the physical ethernet interface of machineB and put the proper IP addresses for both hosts. + +``` +MACRLCs = ( + { + num_cc = 1; + local_s_if_name = "ens3"; // <-- HERE + remote_s_address = "192.168.122.169"; // <-- HERE + local_s_address = "192.168.122.31"; // <-- HERE + local_s_portc = 50001; + remote_s_portc = 50000; + local_s_portd = 50011; + remote_s_portd = 50010; + tr_s_preference = "nfapi"; + tr_n_preference = "local_RRC"; + } +); +``` + +If you are testing more than 16 UEs, a proper setting on the RUs is necessary. **Note that this part is NOT present in the original configuration file**. + +``` +RUs = ( + { + local_rf = "yes" + nb_tx = 1 + nb_rx = 1 + att_tx = 20 + att_rx = 0; + bands = [38]; + max_pdschReferenceSignalPower = -23; + max_rxgain = 116; + eNB_instances = [0]; + } +); +``` + +Last, the S1 interface shall be properly set. + +``` + ////////// MME parameters: + mme_ip_address = ( { ipv4 = "CI_MME_IP_ADDR"; // replace with 192.168.122.195 + ipv6 = "192:168:30::17"; + active = "yes"; + preference = "ipv4"; + } + ); + + NETWORK_INTERFACES : + { + ENB_INTERFACE_NAME_FOR_S1_MME = "ens3"; // replace with the proper interface name + ENB_IPV4_ADDRESS_FOR_S1_MME = "CI_ENB_IP_ADDR"; // replace with 192.168.122.31 + ENB_INTERFACE_NAME_FOR_S1U = "ens3"; // replace with the proper interface name + ENB_IPV4_ADDRESS_FOR_S1U = "CI_ENB_IP_ADDR"; // replace with 192.168.122.31 + ENB_PORT_FOR_S1U = 2152; # Spec 2152 + ENB_IPV4_ADDRESS_FOR_X2C = "CI_ENB_IP_ADDR"; // replace with 192.168.122.31 + ENB_PORT_FOR_X2C = 36422; # Spec 36422 + + }; +``` + +## 4.2. The UE Configuration file ## + +```bash +$ ssh sudousername@machineB +cd ue_folder +# Edit ci-scripts/conf_files/ue.nfapi.conf with your preferred editor +``` + +Verify the nFAPI interface setup on the loopback interface. + +``` +L1s = ( + { + num_cc = 1; + tr_n_preference = "nfapi"; + local_n_if_name = "ens3"; // <- HERE + remote_n_address = "192.168.122.31"; // <- HERE + local_n_address = "192.168.122.169"; // <- HERE + local_n_portc = 50000; + remote_n_portc = 50001; + local_n_portd = 50010; + remote_n_portd = 50011; + } +); +``` + +# 5. Build OAI UE and eNodeB # + +See [Build documentation](./BUILD.md). + +# 6. Start the eNB # + +In the first terminal (the one you used to build the eNB): + +```bash +$ ssh sudousername@machineB +cd enb_folder/cmake_targets +sudo -E ./ran_build/build/lte-softmodem -O ../ci-scripts/conf_files/rcc.band7.tm1.nfapi.conf --noS1 > enb.log 2>&1 +sleep 10 +ifconfig +ens3 Link encap:Ethernet HWaddr XX:XX:XX:XX:XX:XX + inet addr:192.168.122.31 Bcast:192.168.122.255 Mask:255.255.255.0 +.... +oaitun_enb1 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 + inet addr:10.0.1.1 P-t-P:192.172.0.2 Mask:255.255.255.0 +.... +``` + +If you don't use redirection, you can test but many logs are printed on the console and this may affect performance of the L2-nFAPI simulator. + +We do recommend the redirection in steady mode once your setup is correct. + +# 7. Start the UE # + +In the second terminal (the one you used to build the UE): + +```bash +$ ssh sudousername@machineC +cd ue_folder/cmake_targets +# Test 64 UEs, 1 thread in FDD mode +sudo -E ./ran_build/build/lte-uesoftmodem -O ../ci-scripts/conf_files/ue.nfapi.conf --noS1 --L2-emul 3 --num-ues 64 --nums_ue_thread 1 --nokrnmod 1 > ue.log 2>&1 +# Test 64 UEs, 1 thread in TDD mode +sudo -E ./ran_build/build/lte-uesoftmodem -O ../ci-scripts/conf_files/ue.nfapi.conf --noS1 --L2-emul 3 --num-ues 64 --nums_ue_thread 1 --nokrnmod 1 -T 1 > ue.log 2>&1 +# The "-T 1" option means TDD config +``` + +- The number of UEs can set by using `--num-ues` option and the maximum UE number is 255 (with the `--mu*` options, otherwise 16). +- The number of threads can set with the `--nums-ue-thread`. This number **SHALL NOT** be greater than the number of UEs. + * At the time of writing, it seems to be enough to run on a single thread. +- The `--nokrnmod 1` option makes use of the preferred and supported tunnel interface. +- How many UE that can be tested depends on hardware (server , PC, etc) performance in your environment. + +For example, running with 4 UEs: + +```bash +$ ssh sudousername@machineC +cd ue_folder/cmake_targets +sudo -E ./ran_build/build/lte-uesoftmodem -O ../ci-scripts/conf_files/ue.nfapi.conf --noS1 --L2-emul 3 --num-ues 64 --nums_ue_thread 1 --nokrnmod 1 > ue.log 2>&1 +sleep 10 +ifconfig +ens3 Link encap:Ethernet HWaddr XX:XX:XX:XX:XX:XX + inet addr:192.168.122.169 Bcast:192.168.122.255 Mask:255.255.255.0 +.... +oaitun_ue1 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 + inet addr:10.0.1.2 P-t-P:192.172.0.2 Mask:255.255.255.0 +.... +oaitun_ue2 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 + inet addr:10.0.1.3 P-t-P:192.172.0.3 Mask:255.255.255.0 +.... +oaitun_ue3 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 + inet addr:10.0.1.4 P-t-P:192.172.0.4 Mask:255.255.255.0 +.... +oaitun_ue4 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 + inet addr:10.0.1.5 P-t-P:192.172.0.5 Mask:255.255.255.0 +.... +oaitun_uem1 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 + inet addr:10.0.2.2 P-t-P:10.0.2.2 Mask:255.255.255.0 +.... +.... +``` + +Having the 4 oaitun_ue tunnel interfaces up and with an allocated address means the connection with EPC went alright. + +# 8. Test with ping # + +In a third terminal, after around 10 seconds, the UE(s) shall be connected to the eNB: Check with ifconfig + +```bash +$ ssh sudousername@machineB +# Ping UE1 IP address based on the EPC pool used: in this example: +ping -I oaitun_enb1 -c 20 10.0.1.2 +# Ping UE4 IP address based on the EPC pool used: in this example: +ping -I oaitun_enb1 -c 20 10.0.1.5 +``` + +Ping from the UE side: + +```bash +$ ssh sudousername@machineC +ping -I oaitun_ue1 -c 20 10.0.1.1 +ping -I oaitun_ue3 -c 20 10.0.1.1 +``` + +iperf operations can also be performed. + +DL traffic: + +```bash +$ ssh sudousername@machineC +iperf -B 10.0.1.2 -u -s -i 1 -fm -p 5002 +$ ssh sudousername@machineB +iperf -c 10.0.1.2 -u -t 30 -b 3M -i 1 -fm -B 10.0.1.1 -p 5002 +``` + +UL traffic: + +```bash +$ ssh sudousername@machineB +iperf -B 10.0.1.1 -u -s -i 1 -fm -p 5002 +$ ssh sudousername@machineC +iperf -c 10.0.1.1 -u -t 30 -b 2M -i 1 -fm -B 10.0.1.2 -p 5002 +``` + +# 9. Limitations # + + +---- + +[oai wiki home](https://gitlab.eurecom.fr/oai/openairinterface5g/wikis/home) + +[oai softmodem features](FEATURE_SET.md) + +[oai softmodem build procedure](BUILD.md) + +[L2 nfapi simulator](L2NFAPI.md) diff --git a/doc/SystemX-tutorial-design.md b/doc/SystemX-tutorial-design.md new file mode 100644 index 0000000000000000000000000000000000000000..3e4496f7de6c22811d99984777fde9008bc8e282 --- /dev/null +++ b/doc/SystemX-tutorial-design.md @@ -0,0 +1,336 @@ +# OpenAirInterface for SystemX + +# Terminology + +****This document use the 5G terminology**** + +**Central Unit (CU):** It is a logical node that includes the gNB +functions like Transfer of user data, Mobility control, Radio access +network sharing, Positioning, Session Management etc., except those +functions allocated exclusively to the DU. CU controls the operation of +DUs over front-haul (Fs) interface. A central unit (CU) may also be +known as BBU/REC/RCC/C-RAN/V-RAN/VNF + +**Distributed Unit (DU):** This logical node includes a subset of the +gNB functions, depending on the functional split option. Its operation +is controlled by the CU. Distributed Unit (DU) also known with other +names like RRH/RRU/RE/RU/PNF. + +In OpenAir code, the terminology is often RU and BBU. + +# OpenAirUsage + +## EPC and general environment + +### OAI EPC + +Use the stable OAI EPC, that can run in one machine (VM or standalone) + +Draft description: +<https://open-cells.com/index.php/2017/08/22/all-in-one-openairinterface-august-22nd/> + +## Standalone 4G + +EPC+eNB on one machine, the UE can be commercial or OAI UE. + +### USRP B210 + +Main current issue: traffic is good only on coaxial link between UE and +eNB (probably power management issue). + +### Simulated RF + +Running eNB+UE both OAI can be done over a virtual RF link. + +The UE current status is that threads synchronization is implicit in +some cases. As the RF simulator is very quick, a “sleep()†is required +in the UE main loop + +(line 1744, targets/RT/USER/lte-ue.c). + +Running also the UE in the same machine is possible with simulated RF. + +Running in same machine is simpler, offers about infinite speed for +virtual RF samples transmission. + +A specific configuration is required because the EPC Sgi interface has +the same IP tunnel end point as the UE. + +So, we have to create a network namespace for the UE and to route data +in/out of the namespace. + +```bash +ip netns delete aNameSpace 2> /dev/null + +ip link delete v-eth1 2> /dev/null + +ip netns add aNameSpace + +ip link add v-eth1 type veth peer name v-peer1 + +ip link set v-peer1 netns aNameSpace + +ip addr add 10.200.1.1/24 dev v-eth1 + +ip link set v-eth1 up + +iptables -t nat -A POSTROUTING -s 10.200.1.0/255.255.255.0 -o enp0s31f6 \ +-j MASQUERADE + +iptables -A FORWARD -i enp0s31f6 -o v-eth1 -j ACCEPT + +iptables -A FORWARD -o enp0s31f6 -i v-eth1 -j ACCEPT + +ip netns exec aNameSpace ip link set dev lo up + +ip netns exec aNameSpace ip addr add 10.200.1.2/24 dev v-peer1 + +ip netns exec aNameSpace ip link set v-peer1 up + +ip netns exec aNameSpace bash +``` + +After the last command, the Linux shell is in the new namespace, ready +to run the UE. + +To make user plan traffic, the traffic generator has to run in the same +namespace + +```bash +ip netns exec aNameSpace bash +``` + +The traffic genenrator has to specify the interface: + +```bash +route add default oaitun_ue1 +``` + +or specify the outgoing route in the traffic generator (like option “-I†+in ping command). + +## Split 6 DL 4G + +The contract describes to reuse the uplink existing if4p5 and to develop +is this work the downlink “functional split 6â€. + +The customer required after signature to develop also the uplink +functional split 6. This is accepted, as long as the whole work is +research with no delivery completeness warranty. + +### Simulation + +To be able to verify the new features and to help in all future +developments, Open Cells added and improved the Rf board simulator +during this contract. + +We added the channel modeling simulation, that offer to simulate various +3GPP defined channels. + +### Main loop + +The main log is in RF simulator is in + + `targets/RT/USER/lte-ru.c and targets/RT/USER/lte-enb.c` + +As this piece of SW is very complex and doesn’t meet our goals +(functional split 6), a cleaned version replaces these 2 files in +executables/ocp-main.c (openair1/SCHED/prach\_procedures.c is also +replaced by this new file as it only launching the RACH actual work in a +way not compatible with our FS6). + +The main loop cadences the I/Q samples reception, signal processing and +I/Q samples sending. + +The main loop uses extensively function pointers to call the right +processing function depending on the split case. + +A lot of OAI reduntant global variables contains the same semantic data: time,frame, subframe. +The reworked main loop take care of a uniq variable that comes directly from harware: RF board sampling number. + +To use OAI, we need to set all OAI variables that derivates from this timestamp value. The function setAllfromTS() implements this. + +### Splitted main level + +When FS6 is actived, a main loop for DU (du_fs6()) a main loop for CU case replaces the uniq eNB main loop. + +Each of these main loops calls initialization of OAI LTE data and the FS6 transport layer initialization. + +Then, it runs a infinite loop on: set time, call UL and DL. The time comes from the RF board, so the DU sends the time to the CU. + +This is enough for RF board dialog, but the FS6 is higher in SW layers, +we need to cut higher functions inside downlink and uplink procedures. + +As much as possible, the FS6 code is in the directory OPENAIR_DIR/executables. When a given OAI piece of code is small or need complex changes, it is reworked in the file fs6-main.c. The functions naming keeps the OAI function name, adding suffix _fromsplit() or _tosplit(). + +When this organization would lead to large code copy, it is better to insert modifications in OAI code. This is done in two files: + +- openair1/SCHED/phy_procedures_lte_eNb.c: to send signaling channels computation results + - the function sendFs6Ulharq() centralizes all signaling channels forwarding to CU +- openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c: to deal with FS6 user plane split + - sendFs6Ul() is used once to forward user plane to CU + + +### DownLink + +The main procedure is phy\_procedures\_eNB\_TX() + +This is building the common channels (beacon, multi-UE signaling). + +The FS6 split breaks this function into pieces: + +* The multi-UE signals, built by common\_signal\_procedures(), + subframe2harq\_pid(), generate\_dci\_top(), subframe2harq\_pid() + * These functions run in the DU, nevertheless all context has to be sent + (it is also needed partially for UL spitting) + * Run in the DU also to meet the requirement of pushing + in DU the data encoded with large redundancy (>3 redundancy) + +* the per UE data: pdsch\_procedures() needs further splitting: + + * dlsch\_encoding\_all() that makes the encoding: turbo code + and lte\_rate\_matching\_turbo() that will be in the DU (some + unlikely cases can reach redundancy up to x3, when MCS is very + low (negative SINR cases)). + + * dlsch\_encoding() output needs to be transmitted between the + DU and the CU for functional split 6. + * dlsch\_scrambling() that will go in the DU + * dlsch\_modulation() that will go in the DU + + The du user plane data is made of expanded bit in OAI at FS6 split level. 1 pair of functions compact back these bits into 8bits/byte before sending data and expand it again in the DU data reception (functions: fs6Dl(un)pack()). + +### Uplink + +The uplink require configuration that is part of the DL transmission. + +It interprets the signalling to extract the RACH and the per UE data +channels. + +Ocp-main.c:rxtx() calls directly the entry procedure +phy\_procedures\_eNB\_uespec\_RX() calls: + +* rx\_ulsch() that demodulate and extract soft bits per UE. + + * This function runs in the DU + * the output data will be processes in the DU, so it needs to be + transmitted to the DU +* ulsch\_decoding() that do lte\_rate\_matching\_turbo\_rx() + sub\_block\_deinterleaving\_turbo() + then turbo decode that is in the CU +* fill\_ulsch\_cqi\_indication() fill\_crc\_indication() , fill\_rx\_indication() + * DU performs the signal processing of each channel data, prepare and sent to the CU the computed result + +* Random access channel detection runs in the DU + * the DU reports to the CU only the detected temprary identifier for RACH response + + +### signaling data in each direction (UL and DL) + + +* each LTE channel needs to be propagated between CU and DU + * the simplest are the almost static data such as PSS/SSS, that need only static eNB parameters and primary information (frame numbering) + * all the other channels require data transmission CU to DU and DU to CU + * the general design push all the low level processing for these channels in the DU + * the CU interface transports only signal processing results (UL) or configuration to create the RF signal (DL case) +* HARQ is detected in the DU, then only the ACK or NACK is reported to CU + +* the CU have to control the power and MCS (modulation and coding scheme) + * the DU performs the signal processing and report only the decoded data like the CQI + * as the DU performas the modulation, scrambling and puncturing, each data packet is associated with the LTE parameters required for these features + * in DL, the CU associates the control parameters and the user plane data + * in UL, the CU sends upfront the scheduled UL data to the DU. So, the DU have the required knowledge to decode the next subframes in time. + +### UDP transport layer + +A general UDP transport layer is in executables/transport\_split.c + +Linux offers a UDP socket builtin timeout, that we use. + +In and out buffers are memory zones that contains compacted +(concatenated) UDP chunks. + +For output, sendSubFrame() sends each UDP chunk + +For input, receiveSubFrame() collects all UDP chunks for a group (a +subframe in OAI LTE case). It returns in the following cases: + +- all chunks are received +- a timeout expired +- a chunk from the next subframe already arrived + +### Functional split 6 usage + +The ocp cleaned main hale to be used: run ocp-softmodem instead of +lte-softmodem. + +The functionality and parameters is the same, enhanced with FS6 mode. + +The end line option “--split73†enables the fs6 (also called split 7.3) mode and decided to be cu or du. + +Example: + +```bash +./ocp-softmodem -O $OPENAIR_DIR/enb.fs6.example.conf --rfsim --log_config.phy_log_level debug --split73 cu:127.0.0.1 +``` + +Run the CU init of the split 6 eNB, that will call du on 127.0.0.1 address + +```bash +./ocp-softmodem -O $OPENAIR_DIR/enb.fs6.example.conf --rfsim --log_config.phy_log_level debug --split73 du:127.0.0.1 +``` + +will run the du, calling the cu on 127.0.0.1 + +If the CU and the DU are not on the same machine, the remote address of each side need to be specified as per this example + +```bash +./ocp-softmodem -O $OPENAIR_DIR/enb.fs6.example.conf --rfsim --log_config.phy_log_level debug --split73 du:192.168.1.55 +``` + +runs the functional split 6 DU + +```bash +./lte-uesoftmodem -C 2685000000 -r 50 --rfsim --rfsimulator.serveraddr 192.168.1.1 -d +``` + +Runs the UE (to have the UE signal scope, compile it with make uescope) + +CU+DU+UE can run with option `--noS1` to avoid to use a EPC and/or with `--rfsim` to simulate RF board + + +## 5G and F1 + +Today 5G achievement is limited to physical layer. + +The available modulation is 40MHz, that require one X310 or N300 for the +gNB and a X310 or N300 for the nrUE. + +### Usage with X310 + +Linux configuration: +<https://files.ettus.com/manual/page_usrp_x3x0_config.html> + +We included most of this configuration included in OAI source code. + +Remain to set the NIC (network interface card) MTU to 9000 (jumbo +frames). + +### Running 5G + +Usage with RFsimulator: + +**gNB** + +```bash +sudo RFSIMULATOR=server ./nr-softmodem -O \ +../../../targets/PROJECTS/GENERIC-LTE-EPC/CONF/gnb.band78.tm1.106PRB.usrpn300.conf \ +--parallel-config PARALLEL\_SINGLE\_THREAD +``` + +**nrUE** + +```bash +sudo RFSIMULATOR=127.0.0.1 ./nr-uesoftmodem --numerology 1 -r 106 -C \ +3510000000 -d +``` diff --git a/doc/images/L2-sim-S1-3-host-deployment.png b/doc/images/L2-sim-S1-3-host-deployment.png new file mode 100644 index 0000000000000000000000000000000000000000..4049ebd7a8ace60df576a988f32442526af7f13e Binary files /dev/null and b/doc/images/L2-sim-S1-3-host-deployment.png differ diff --git a/doc/images/L2-sim-noS1-2-host-deployment.png b/doc/images/L2-sim-noS1-2-host-deployment.png new file mode 100644 index 0000000000000000000000000000000000000000..02d29b7b3cdfb05dc912548e001fb11aa1d96fc8 Binary files /dev/null and b/doc/images/L2-sim-noS1-2-host-deployment.png differ diff --git a/executables/main-fs6.c b/executables/main-fs6.c new file mode 100644 index 0000000000000000000000000000000000000000..ffa44bd978758cc13923499970ff8743e938543f --- /dev/null +++ b/executables/main-fs6.c @@ -0,0 +1,1608 @@ +/* +* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The OpenAirInterface Software Alliance licenses this file to You under +* the OAI Public License, Version 1.1 (the "License"); you may not use this file +* except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.openairinterface.org/?page_id=698 +* +* Author and copyright: Laurent Thomas, open-cells.com +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*------------------------------------------------------------------------------- +* For more information about the OpenAirInterface (OAI) Software Alliance: +* contact@openairinterface.org +*/ + +#include <stdint.h> +#include <common/utils/LOG/log.h> +#include <common/utils/system.h> +#include <common/config/config_userapi.h> +#include <targets/RT/USER/lte-softmodem.h> +#include <openair1/PHY/defs_eNB.h> +#include <openair1/PHY/phy_extern.h> +#include <nfapi/oai_integration/vendor_ext.h> +#include <openair1/SCHED/fapi_l1.h> +#include <openair1/PHY/INIT/phy_init.h> +#include <openair2/LAYER2/MAC/mac_extern.h> +#include <openair1/PHY/LTE_REFSIG/lte_refsig.h> +#include <nfapi/oai_integration/nfapi_pnf.h> +#include <executables/split_headers.h> +#include <nfapi/oai_integration/vendor_ext.h> +#include <openair1/PHY/INIT/lte_init.c> +#include <openair1/PHY/LTE_ESTIMATION/lte_estimation.h> +#include <executables/split_headers.h> +#include <openair1/PHY/CODING/coding_extern.h> +#include <threadPool/thread-pool.h> +#include <emmintrin.h> + +#define FS6_BUF_SIZE 1000*1000 +static UDPsock_t sockFS6; + +int sum(uint8_t *b, int s) { + int sum=0; + + for (int i=0; i < s; i++) + sum+=b[i]; + + return sum; +} + +static inline int cmpintRev(const void *a, const void *b) { + uint64_t *aa=(uint64_t *)a; + uint64_t *bb=(uint64_t *)b; + return (int)(*bb-*aa); +} + +static inline void printMeas2(char *txt, Meas *M, int period, bool MaxMin) { + if (M->iterations%period == 0 ) { + char txt2[512]; + sprintf(txt2,"%s avg=%" PRIu64 " iterations=%" PRIu64 " %s=%" + PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 "\n", + txt, + M->sum/M->iterations, + M->iterations, + MaxMin?"max":"min", + M->maxArray[1],M->maxArray[2], M->maxArray[3],M->maxArray[4], M->maxArray[5], + M->maxArray[6],M->maxArray[7], M->maxArray[8],M->maxArray[9],M->maxArray[10]); +#if T_TRACER + LOG_W(PHY,"%s",txt2); +#else + printf("%s",txt2); +#endif + } +} + +static inline void updateTimesReset(uint64_t start, Meas *M, int period, bool MaxMin, char *txt) { + if (start!=0) { + uint64_t end=rdtsc(); + long long diff=(end-start)/(cpuf*1000); + M->maxArray[0]=diff; + M->sum+=diff; + M->iterations++; + + if ( MaxMin) + qsort(M->maxArray, 11, sizeof(uint64_t), cmpint); + else + qsort(M->maxArray, 11, sizeof(uint64_t), cmpintRev); + + printMeas2(txt,M,period, MaxMin); + + if (M->iterations%period == 0 ) { + bzero(M,sizeof(*M)); + + if (!MaxMin) + for (int i=0; i<11; i++) + M->maxArray[i]=INT_MAX; + } + } +} + +static inline void measTransportTime(uint64_t DuSend, uint64_t CuMicroSec, Meas *M, int period, bool MaxMin, char *txt) { + if (DuSend!=0) { + uint64_t end=rdtsc(); + long long diff=(end-DuSend)/(cpuf*1000)-CuMicroSec; + M->maxArray[0]=diff; + M->sum+=diff; + M->iterations++; + + if ( MaxMin) + qsort(M->maxArray, 11, sizeof(uint64_t), cmpint); + else + qsort(M->maxArray, 11, sizeof(uint64_t), cmpintRev); + + printMeas2(txt,M,period, MaxMin); + + if (M->iterations%period == 0 ) { + bzero(M,sizeof(*M)); + + if (!MaxMin) + for (int i=0; i<11; i++) + M->maxArray[i]=INT_MAX; + } + } +} + +#define ceil16_bytes(a) ((((a+15)/16)*16)/8) + +static void fs6Dlunpack(void *out, void *in, int szUnpacked) { + static uint64_t *lut=NULL; + + if (!lut) { + lut=(uint64_t *) malloc(sizeof(*lut)*256); + + for (int i=0; i <256; i++) + for (int j=0; j<8; j++) + ((uint8_t *)(lut+i))[7-j]=(i>>j)&1; + } + + int64_t *out_64 = (int64_t *)out; + int sz=ceil16_bytes(szUnpacked); + + for (int i=0; i<sz; i++) + out_64[i]=lut[((uint8_t *)in)[i]]; + + return; +} + + +static void fs6Dlpack(void *out, void *in, int szUnpacked) { + __m128i zeros=_mm_set1_epi8(0); + __m128i shuffle=_mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7); + const int loop=ceil16_bytes(szUnpacked)/sizeof(uint16_t); + __m128i *iter=(__m128i *)in; + + for (int i=0; i < loop; i++) { + __m128i tmp=_mm_shuffle_epi8(_mm_cmpgt_epi8(*iter++,zeros),shuffle); + ((uint16_t *)out)[i]=(uint16_t)_mm_movemask_epi8(tmp); + } +} + +void prach_eNB_tosplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc) { + fs6_ul_t *header=(fs6_ul_t *) commonUDPdata(bufferZone); + + if (is_prach_subframe(&eNB->frame_parms, proc->frame_prach,proc->subframe_prach)<=0) + return; + + RU_t *ru; + int aa=0; + int ru_aa; + + for (int i=0; i<eNB->num_RU; i++) { + ru=eNB->RU_list[i]; + + for (ru_aa=0,aa=0; ru_aa<ru->nb_rx; ru_aa++,aa++) { + eNB->prach_vars.rxsigF[0][aa] = eNB->RU_list[i]->prach_rxsigF[ru_aa]; + int ce_level; + + for (ce_level=0; ce_level<4; ce_level++) + eNB->prach_vars_br.rxsigF[ce_level][aa] = eNB->RU_list[i]->prach_rxsigF_br[ce_level][ru_aa]; + } + } + + ocp_rx_prach(eNB, + proc, + eNB->RU_list[0], + header->max_preamble, + header->max_preamble_energy, + header->max_preamble_delay, + header->avg_preamble_energy, + proc->frame_prach, + 0, + false + ); + // run PRACH detection for CE-level 0 only for now when br_flag is set + /* fixme: seems not operational and may overwrite regular LTE prach detection + * OAI code can call is sequence + rx_prach(eNB, + eNB->RU_list[0], + header->max_preamble, + header->max_preamble_energy, + header->max_preamble_delay, + header->avg_preamble_energy, + frame, + 0, + true + ); + */ + LOG_D(PHY,"RACH detection index 0: max preamble: %u, energy: %u, delay: %u, avg energy: %u\n", + header->max_preamble[0], + header->max_preamble_energy[0], + header->max_preamble_delay[0], + header->avg_preamble_energy[0] + ); + return; +} + +void prach_eNB_fromsplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc) { + fs6_ul_t *header=(fs6_ul_t *) commonUDPdata(bufferZone); + uint16_t *max_preamble=header->max_preamble; + uint16_t *max_preamble_energy=header->max_preamble_energy; + uint16_t *max_preamble_delay=header->max_preamble_delay; + uint16_t *avg_preamble_energy=header->avg_preamble_energy; + int subframe=proc->subframe_prach; + int frame=proc->frame_prach; + // Fixme: not clear why we call twice with "br" and without + int br_flag=0; + + if (br_flag==1) { + int prach_mask; + prach_mask = is_prach_subframe (&eNB->frame_parms, proc->frame_prach_br, proc->subframe_prach_br); + eNB->UL_INFO.rach_ind_br.rach_indication_body.preamble_list = eNB->preamble_list_br; + int ind = 0; + int ce_level = 0; + /* Save for later, it doesn't work + for (int ind=0,ce_level=0;ce_level<4;ce_level++) { + + if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[ce_level]==1)&& + (prach_mask&(1<<(1+ce_level)) > 0) && // prach is active and CE level has finished its repetitions + (eNB->prach_vars_br.repetition_number[ce_level]== + eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level])) { + + */ + + if (eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[0] == 1) { + if ((eNB->prach_energy_counter == 100) && (max_preamble_energy[0] > eNB->measurements.prach_I0 + eNB->prach_DTX_threshold_emtc[0])) { + eNB->UL_INFO.rach_ind_br.rach_indication_body.number_of_preambles++; + eNB->preamble_list_br[ind].preamble_rel8.timing_advance = max_preamble_delay[ind]; // + eNB->preamble_list_br[ind].preamble_rel8.preamble = max_preamble[ind]; + // note: fid is implicitly 0 here, this is the rule for eMTC RA-RNTI from 36.321, Section 5.1.4 + eNB->preamble_list_br[ind].preamble_rel8.rnti = 1 + subframe + (60*(eNB->prach_vars_br.first_frame[ce_level] % 40)); + eNB->preamble_list_br[ind].instance_length = 0; //don't know exactly what this is + eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type = 1 + ce_level; // CE Level + LOG_I (PHY, "Filling NFAPI indication for RACH %d CELevel %d (mask %x) : TA %d, Preamble %d, rnti %x, rach_resource_type %d\n", + ind, + ce_level, + prach_mask, + eNB->preamble_list_br[ind].preamble_rel8.timing_advance, + eNB->preamble_list_br[ind].preamble_rel8.preamble, eNB->preamble_list_br[ind].preamble_rel8.rnti, eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type); + } + } + + /* + ind++; + } + } */// ce_level + } else if ((eNB->prach_energy_counter == 100) && + (max_preamble_energy[0] > eNB->measurements.prach_I0+eNB->prach_DTX_threshold)) { + LOG_I(PHY,"[eNB %d/%d][RAPROC] Frame %d, subframe %d Initiating RA procedure with preamble %d, energy %d.%d dB, delay %d\n", + eNB->Mod_id, + eNB->CC_id, + frame, + subframe, + max_preamble[0], + max_preamble_energy[0]/10, + max_preamble_energy[0]%10, + max_preamble_delay[0]); + pthread_mutex_lock(&eNB->UL_INFO_mutex); + eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles = 1; + eNB->UL_INFO.rach_ind.rach_indication_body.preamble_list = &eNB->preamble_list[0]; + eNB->UL_INFO.rach_ind.rach_indication_body.tl.tag = NFAPI_RACH_INDICATION_BODY_TAG; + eNB->UL_INFO.rach_ind.header.message_id = NFAPI_RACH_INDICATION; + eNB->UL_INFO.rach_ind.sfn_sf = frame<<4 | subframe; + eNB->preamble_list[0].preamble_rel8.tl.tag = NFAPI_PREAMBLE_REL8_TAG; + eNB->preamble_list[0].preamble_rel8.timing_advance = max_preamble_delay[0]; + eNB->preamble_list[0].preamble_rel8.preamble = max_preamble[0]; + eNB->preamble_list[0].preamble_rel8.rnti = 1+subframe; // note: fid is implicitly 0 here + eNB->preamble_list[0].preamble_rel13.rach_resource_type = 0; + eNB->preamble_list[0].instance_length = 0; //don't know exactly what this is + + if (NFAPI_MODE==NFAPI_MODE_PNF) { // If NFAPI PNF then we need to send the message to the VNF + LOG_D(PHY,"Filling NFAPI indication for RACH : SFN_SF:%d TA %d, Preamble %d, rnti %x, rach_resource_type %d\n", + NFAPI_SFNSF2DEC(eNB->UL_INFO.rach_ind.sfn_sf), + eNB->preamble_list[0].preamble_rel8.timing_advance, + eNB->preamble_list[0].preamble_rel8.preamble, + eNB->preamble_list[0].preamble_rel8.rnti, + eNB->preamble_list[0].preamble_rel13.rach_resource_type); + oai_nfapi_rach_ind(&eNB->UL_INFO.rach_ind); + eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles = 0; + } + + pthread_mutex_unlock(&eNB->UL_INFO_mutex); + } // max_preamble_energy > prach_I0 + 100 + else { + eNB->measurements.prach_I0 = ((eNB->measurements.prach_I0*900)>>10) + ((avg_preamble_energy[0]*124)>>10); + + if (eNB->prach_energy_counter < 100) + eNB->prach_energy_counter++; + } +} + +void sendFs6Ulharq(enum pckType type, int UEid, PHY_VARS_eNB *eNB, LTE_eNB_UCI *uci, int frame, int subframe, uint8_t *harq_ack, uint8_t tdd_mapping_mode, uint16_t tdd_multiplexing_mask, + uint16_t rnti, + int32_t stat) { + static int current_fsf=-1; + int fsf=frame*16+subframe; + uint8_t *bufferZone=eNB->FS6bufferZone; + commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone; + // move to the end + uint8_t *firstFreeByte=bufferZone; + int curBlock=0; + + if ( current_fsf != fsf ) { + for (int i=0; i < FirstUDPheader->nbBlocks; i++) { + AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,""); + firstFreeByte+=alignedSize(firstFreeByte); + curBlock++; + } + + commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte; + FirstUDPheader->nbBlocks++; + newUDPheader->blockID=curBlock; + newUDPheader->contentBytes=sizeof(fs6_ul_t)+sizeof(fs6_ul_uespec_uci_t); + hULUEuci(newUDPheader)->type=fs6ULcch; + hULUEuci(newUDPheader)->nb_active_ue=0; + } else + for (int i=0; i < FirstUDPheader->nbBlocks-1; i++) { + AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,""); + firstFreeByte+=alignedSize(firstFreeByte); + curBlock++; + } + + LOG_D(PHY,"FS6 du, block: %d: adding ul harq/sr: %d, rnti: %d, ueid: %d\n", + curBlock, type, rnti, UEid); + commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte; + fs6_ul_uespec_uci_element_t *tmp=(fs6_ul_uespec_uci_element_t *)(hULUEuci(newUDPheader)+1); + tmp+=hULUEuci(newUDPheader)->nb_active_ue; + tmp->type=type; + tmp->UEid=UEid; + tmp->frame=frame; + tmp->subframe=subframe; + + if (uci != NULL) + memcpy(&tmp->uci, uci, sizeof(*uci)); + else + tmp->uci.ue_id=0xFFFF; + + if (harq_ack != NULL) + memcpy(tmp->harq_ack, harq_ack, 4); + + tmp->tdd_mapping_mode=tdd_mapping_mode; + tmp->tdd_multiplexing_mask=tdd_multiplexing_mask; + tmp->n0_subband_power_dB=eNB->measurements.n0_subband_power_dB[0][0]; + tmp->rnti=rnti; + tmp->stat=stat; + hULUEuci(newUDPheader)->nb_active_ue++; + newUDPheader->contentBytes+=sizeof(fs6_ul_uespec_uci_element_t); +} + + +void sendFs6Ul(PHY_VARS_eNB *eNB, int UE_id, int harq_pid, int segmentID, int16_t *data, int dataLen, int r_offset) { + uint8_t *bufferZone=eNB->FS6bufferZone; + commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone; + // move to the end + uint8_t *firstFreeByte=bufferZone; + int curBlock=0; + + for (int i=0; i < FirstUDPheader->nbBlocks; i++) { + AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,""); + firstFreeByte+=alignedSize(firstFreeByte); + curBlock++; + } + + commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte; + FirstUDPheader->nbBlocks++; + newUDPheader->blockID=curBlock; + newUDPheader->contentBytes=sizeof(fs6_ul_t)+sizeof(fs6_ul_uespec_t) + dataLen; + hULUE(newUDPheader)->type=fs6ULsch; + hULUE(newUDPheader)->UE_id=UE_id; + hULUE(newUDPheader)->harq_id=harq_pid; + memcpy(hULUE(newUDPheader)->ulsch_power, + eNB->pusch_vars[UE_id]->ulsch_power, + sizeof(int)*2); + hULUE(newUDPheader)->cqi_crc_status=eNB->ulsch[UE_id]->harq_processes[harq_pid]->cqi_crc_status; + hULUE(newUDPheader)->O_ACK=eNB->ulsch[UE_id]->harq_processes[harq_pid]->O_ACK; + memcpy(hULUE(newUDPheader)->o_ACK, eNB->ulsch[UE_id]->harq_processes[harq_pid]->o_ACK, + sizeof(eNB->ulsch[UE_id]->harq_processes[harq_pid]->o_ACK)); + hULUE(newUDPheader)->ta=lte_est_timing_advance_pusch(eNB, UE_id); + hULUE(newUDPheader)->segment=segmentID; + memcpy(hULUE(newUDPheader)->o, eNB->ulsch[UE_id]->harq_processes[harq_pid]->o, + sizeof(eNB->ulsch[UE_id]->harq_processes[harq_pid]->o)); + memcpy(hULUE(newUDPheader)+1, data, dataLen); + hULUE(newUDPheader)->segLen=dataLen; + hULUE(newUDPheader)->r_offset=r_offset; + hULUE(newUDPheader)->G=eNB->ulsch[UE_id]->harq_processes[harq_pid]->G; +} + +void pusch_procedures_tosplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc) { + uint32_t harq_pid; + LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms; + const int subframe = proc->subframe_rx; + const int frame = proc->frame_rx; + + for (int i = 0; i < NUMBER_OF_UE_MAX; i++) { + LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[i]; + + if (ulsch->ue_type > NOCE) + harq_pid = 0; + else + harq_pid= subframe2harq_pid(&eNB->frame_parms,frame,subframe); + + LTE_UL_eNB_HARQ_t *ulsch_harq = ulsch->harq_processes[harq_pid]; + + if (ulsch->rnti>0) + LOG_D(PHY,"eNB->ulsch[%d]->harq_processes[harq_pid:%d] SFN/SF:%04d%d: PUSCH procedures, UE %d/%x ulsch_harq[status:%d SFN/SF:%04d%d active: %d handled:%d]\n", + i, harq_pid, frame,subframe,i,ulsch->rnti, + ulsch_harq->status, ulsch_harq->frame, ulsch_harq->subframe, ulsch_harq->status, ulsch_harq->handled); + + if ((ulsch) && + (ulsch->rnti>0) && + (ulsch_harq->status == ACTIVE) && + ((ulsch_harq->frame == frame) || (ulsch_harq->repetition_number >1) ) && + ((ulsch_harq->subframe == subframe) || (ulsch_harq->repetition_number >1) ) && + (ulsch_harq->handled == 0)) { + // UE has ULSCH scheduling + for (int rb=0; + rb<=ulsch_harq->nb_rb; + rb++) { + int rb2 = rb+ulsch_harq->first_rb; + eNB->rb_mask_ul[rb2>>5] |= (1<<(rb2&31)); + } + + LOG_D(PHY,"[eNB %d] frame %d, subframe %d: Scheduling ULSCH Reception for UE %d \n", + eNB->Mod_id, frame, subframe, i); + uint8_t nPRS= fp->pusch_config_common.ul_ReferenceSignalsPUSCH.nPRS[subframe<<1]; + ulsch->cyclicShift = (ulsch_harq->n_DMRS2 + + fp->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift + + nPRS)%12; + AssertFatal(ulsch_harq->TBS>0,"illegal TBS %d\n",ulsch_harq->TBS); + LOG_D(PHY, + "[eNB %d][PUSCH %d] Frame %d Subframe %d Demodulating PUSCH: dci_alloc %d, rar_alloc %d, round %d, first_rb %d, nb_rb %d, Qm %d, TBS %d, rv %d, cyclic_shift %d (n_DMRS2 %d, cyclicShift_common %d, ), O_ACK %d, beta_cqi %d \n", + eNB->Mod_id,harq_pid,frame,subframe, + ulsch_harq->dci_alloc, + ulsch_harq->rar_alloc, + ulsch_harq->round, + ulsch_harq->first_rb, + ulsch_harq->nb_rb, + ulsch_harq->Qm, + ulsch_harq->TBS, + ulsch_harq->rvidx, + ulsch->cyclicShift, + ulsch_harq->n_DMRS2, + fp->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift, + ulsch_harq->O_ACK, + ulsch->beta_offset_cqi_times8); + start_meas(&eNB->ulsch_demodulation_stats); + eNB->FS6bufferZone=bufferZone; + rx_ulsch(eNB, proc, i); + stop_meas(&eNB->ulsch_demodulation_stats); + // TBD: add datablock for transmission + start_meas(&eNB->ulsch_decoding_stats); + ulsch_decoding(eNB,proc, + i, + 0, // control_only_flag + ulsch_harq->V_UL_DAI, + ulsch_harq->nb_rb>20 ? 1 : 0); + stop_meas(&eNB->ulsch_decoding_stats); + } + } +} + +void phy_procedures_eNB_uespec_RX_tosplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc) { + //RX processing for ue-specific resources + LTE_DL_FRAME_PARMS *fp = &eNB->frame_parms; + const int subframe = proc->subframe_rx; + const int frame = proc->frame_rx; + /* TODO: use correct rxdata */ + + if ((fp->frame_type == TDD) && (subframe_select(fp,subframe)!=SF_UL)) return; + + LOG_D (PHY, "[eNB %d] Frame %d: Doing phy_procedures_eNB_uespec_RX(%d)\n", eNB->Mod_id, frame, subframe); + eNB->rb_mask_ul[0] = 0; + eNB->rb_mask_ul[1] = 0; + eNB->rb_mask_ul[2] = 0; + eNB->rb_mask_ul[3] = 0; + // Fix me here, these should be locked + eNB->UL_INFO.rx_ind.rx_indication_body.number_of_pdus = 0; + eNB->UL_INFO.crc_ind.crc_indication_body.number_of_crcs = 0; + // Call SRS first since all others depend on presence of SRS or lack thereof + srs_procedures (eNB, proc); + eNB->first_run_I0_measurements = 0; + uci_procedures (eNB, proc); + + if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { // If PNF or monolithic + pusch_procedures_tosplit(bufferZone, bufSize, eNB,proc); + } + + lte_eNB_I0_measurements (eNB, subframe, 0, eNB->first_run_I0_measurements); + int min_I0=1000,max_I0=0; + + if ((frame==0) && (subframe==4)) { + for (int i=0; i<eNB->frame_parms.N_RB_UL; i++) { + if (i==(eNB->frame_parms.N_RB_UL>>1) - 1) i+=2; + + if (eNB->measurements.n0_subband_power_tot_dB[i]<min_I0) + min_I0 = eNB->measurements.n0_subband_power_tot_dB[i]; + + if (eNB->measurements.n0_subband_power_tot_dB[i]>max_I0) + max_I0 = eNB->measurements.n0_subband_power_tot_dB[i]; + } + + LOG_I (PHY, "max_I0 %d, min_I0 %d\n", max_I0, min_I0); + } + + return; +} + + +void fill_rx_indication_from_split(uint8_t *bufferZone, PHY_VARS_eNB *eNB,int UE_id,int frame,int subframe, ul_propagation_t *ul_propa) { + nfapi_rx_indication_pdu_t *pdu; + int timing_advance_update; + uint32_t harq_pid; + + if (eNB->ulsch[UE_id]->ue_type > 0) + harq_pid = 0; + else + harq_pid = subframe2harq_pid (&eNB->frame_parms, + frame, subframe); + + pthread_mutex_lock(&eNB->UL_INFO_mutex); + eNB->UL_INFO.rx_ind.sfn_sf = frame<<4| subframe; + eNB->UL_INFO.rx_ind.rx_indication_body.tl.tag = NFAPI_RX_INDICATION_BODY_TAG; + pdu = &eNB->UL_INFO.rx_ind.rx_indication_body.rx_pdu_list[eNB->UL_INFO.rx_ind.rx_indication_body.number_of_pdus]; + // pdu->rx_ue_information.handle = eNB->ulsch[UE_id]->handle; + pdu->rx_ue_information.tl.tag = NFAPI_RX_UE_INFORMATION_TAG; + pdu->rx_ue_information.rnti = eNB->ulsch[UE_id]->rnti; + pdu->rx_indication_rel8.tl.tag = NFAPI_RX_INDICATION_REL8_TAG; + pdu->rx_indication_rel8.length = eNB->ulsch[UE_id]->harq_processes[harq_pid]->TBS>>3; + pdu->rx_indication_rel8.offset = 1; // DJP - I dont understand - but broken unless 1 ???? 0; // filled in at the end of the UL_INFO formation + pdu->data = eNB->ulsch[UE_id]->harq_processes[harq_pid]->decodedBytes; + // estimate timing advance for MAC + timing_advance_update = ul_propa[UE_id].ta; + + // if (timing_advance_update > 10) { dump_ulsch(eNB,frame,subframe,UE_id); exit(-1);} + // if (timing_advance_update < -10) { dump_ulsch(eNB,frame,subframe,UE_id); exit(-1);} + switch (eNB->frame_parms.N_RB_DL) { + case 6: /* nothing to do */ + break; + + case 15: + timing_advance_update /= 2; + break; + + case 25: + timing_advance_update /= 4; + break; + + case 50: + timing_advance_update /= 8; + break; + + case 75: + timing_advance_update /= 12; + break; + + case 100: + timing_advance_update /= 16; + break; + + default: + abort (); + } + + // put timing advance command in 0..63 range + timing_advance_update += 31; + + if (timing_advance_update < 0) + timing_advance_update = 0; + + if (timing_advance_update > 63) + timing_advance_update = 63; + + pdu->rx_indication_rel8.timing_advance = timing_advance_update; + // estimate UL_CQI for MAC (from antenna port 0 only) + int SNRtimes10 = dB_fixed_times10(eNB->pusch_vars[UE_id]->ulsch_power[0]) - 10 * eNB->measurements.n0_subband_power_dB[0][0]; + + if (SNRtimes10 < -640) + pdu->rx_indication_rel8.ul_cqi = 0; + else if (SNRtimes10 > 635) + pdu->rx_indication_rel8.ul_cqi = 255; + else + pdu->rx_indication_rel8.ul_cqi = (640 + SNRtimes10) / 5; + + LOG_D(PHY,"[PUSCH %d] Frame %d Subframe %d Filling RX_indication with SNR %d (%d), timing_advance %d (update %d)\n", + harq_pid,frame,subframe,SNRtimes10,pdu->rx_indication_rel8.ul_cqi,pdu->rx_indication_rel8.timing_advance, + timing_advance_update); + eNB->UL_INFO.rx_ind.rx_indication_body.number_of_pdus++; + eNB->UL_INFO.rx_ind.sfn_sf = frame<<4 | subframe; + pthread_mutex_unlock(&eNB->UL_INFO_mutex); +} + +void pusch_procedures_fromsplit(uint8_t *bufferZone, int bufSize, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, ul_propagation_t *ul_propa) { + //LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms; + const int subframe = proc->subframe_rx; + const int frame = proc->frame_rx; + uint32_t harq_pid; + uint32_t harq_pid0 = subframe2harq_pid(&eNB->frame_parms,frame,subframe); + + for (int i = 0; i < NUMBER_OF_UE_MAX; i++) { + LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[i]; + + if (ulsch->ue_type > NOCE) harq_pid = 0; + else harq_pid=harq_pid0; + + LTE_UL_eNB_HARQ_t *ulsch_harq = ulsch->harq_processes[harq_pid]; + + if (ulsch->rnti>0) + LOG_D(PHY,"eNB->ulsch[%d]->harq_processes[harq_pid:%d] SFN/SF:%04d%d: PUSCH procedures, UE %d/%x ulsch_harq[status:%d SFN/SF:%04d%d handled:%d]\n", + i, harq_pid, frame,subframe,i,ulsch->rnti, + ulsch_harq->status, ulsch_harq->frame, ulsch_harq->subframe, ulsch_harq->handled); + + if ((ulsch) && + (ulsch->rnti>0) && + (ulsch_harq->status == ACTIVE) && + (ulsch_harq->frame == frame) && + (ulsch_harq->subframe == subframe) && + (ulsch_harq->handled == 0)) { + // UE has ULSCH scheduling + for (int rb=0; + rb<=ulsch_harq->nb_rb; + rb++) { + int rb2 = rb+ulsch_harq->first_rb; + eNB->rb_mask_ul[rb2>>5] |= (1<<(rb2&31)); + } + + start_meas(&eNB->ulsch_decoding_stats); + // This is a new packet, so compute quantities regarding segmentation + ulsch_harq->B = ulsch_harq->TBS+24; + lte_segmentation(NULL, + NULL, + ulsch_harq->B, + &ulsch_harq->C, + &ulsch_harq->Cplus, + &ulsch_harq->Cminus, + &ulsch_harq->Kplus, + &ulsch_harq->Kminus, + &ulsch_harq->F); + ulsch_decoding_data(eNB, proc, i, harq_pid, + ulsch_harq->nb_rb>20 ? 1 : 0); + stop_meas(&eNB->ulsch_decoding_stats); + } // if ((ulsch) && + // (ulsch->rnti>0) && + // (ulsch_harq->status == ACTIVE)) + else if ((ulsch) && + (ulsch->rnti>0) && + (ulsch_harq->status == ACTIVE) && + (ulsch_harq->frame == frame) && + (ulsch_harq->subframe == subframe) && + (ulsch_harq->handled == 1)) { + // this harq process is stale, kill it, this 1024 frames later (10s), consider reducing that + ulsch_harq->status = SCH_IDLE; + ulsch_harq->handled = 0; + ulsch->harq_mask &= ~(1 << harq_pid); + LOG_W (PHY, "Removing stale ULSCH config for UE %x harq_pid %d (harq_mask is now 0x%2.2x)\n", ulsch->rnti, harq_pid, ulsch->harq_mask); + } + } // for (i=0; i<NUMBER_OF_UE_MAX; i++) + + while (proc->nbDecode > 0) { + notifiedFIFO_elt_t *req=pullTpool(proc->respDecode, proc->threadPool); + postDecode(proc, req); + delNotifiedFIFO_elt(req); + } +} + +void recvFs6Ul(uint8_t *bufferZone, int nbBlocks, PHY_VARS_eNB *eNB, ul_propagation_t *ul_propa) { + void *bufPtr=bufferZone; + + for (int i=0; i < nbBlocks; i++) { //nbBlocks is the actual received blocks + if ( ((commonUDP_t *)bufPtr)->contentBytes > sizeof(fs6_ul_t) ) { + int type=hULUE(bufPtr)->type; + + if ( type == fs6ULsch) { + LTE_eNB_ULSCH_t *ulsch =eNB->ulsch[hULUE(bufPtr)->UE_id]; + LTE_UL_eNB_HARQ_t *ulsch_harq=ulsch->harq_processes[hULUE(bufPtr)->harq_id]; + memcpy(ulsch_harq->eUL+hULUE(bufPtr)->r_offset, + hULUE(bufPtr)+1, + hULUE(bufPtr)->segLen); + memcpy(eNB->pusch_vars[hULUE(bufPtr)->UE_id]->ulsch_power, + hULUE(bufPtr)->ulsch_power, + sizeof(int)*2); + ulsch_harq->G=hULUE(bufPtr)->G; + ulsch_harq->cqi_crc_status=hULUE(bufPtr)->cqi_crc_status; + //ulsch_harq->O_ACK= hULUE(bufPtr)->O_ACK; + memcpy(ulsch_harq->o_ACK, hULUE(bufPtr)->o_ACK, + sizeof(ulsch_harq->o_ACK)); + memcpy(ulsch_harq->o,hULUE(bufPtr)->o, sizeof(ulsch_harq->o)); + ul_propa[hULUE(bufPtr)->UE_id].ta=hULUE(bufPtr)->ta; + LOG_D(PHY,"Received ulsch data for: rnti:%x, cqi_crc_status %d O_ACK: %d, segment: %d, seglen: %d \n", + ulsch->rnti, ulsch_harq->cqi_crc_status, ulsch_harq->O_ACK,hULUE(bufPtr)->segment, hULUE(bufPtr)->segLen); + } else if ( type == fs6ULcch ) { + int nb_uci=hULUEuci(bufPtr)->nb_active_ue; + fs6_ul_uespec_uci_element_t *tmp=(fs6_ul_uespec_uci_element_t *)(hULUEuci(bufPtr)+1); + + for (int j=0; j < nb_uci ; j++) { + LOG_D(PHY,"FS6 cu, block: %d/%d: received ul harq/sr: %d, rnti: %d, ueid: %d\n", + i, j, type, tmp->rnti, tmp->UEid); + eNB->measurements.n0_subband_power_dB[0][0]=tmp->n0_subband_power_dB; + + if (tmp->uci.ue_id != 0xFFFF) + memcpy(&eNB->uci_vars[tmp->UEid],&tmp->uci, sizeof(tmp->uci)); + + if ( tmp->type == fs6ULindicationHarq ) + fill_uci_harq_indication (tmp->UEid, eNB, &eNB->uci_vars[tmp->UEid], + tmp->frame, tmp->subframe, tmp->harq_ack, + tmp->tdd_mapping_mode, tmp->tdd_multiplexing_mask); + else if ( tmp->type == fs6ULindicationSr ) + fill_sr_indication(tmp->UEid, eNB,tmp->rnti,tmp->frame,tmp->subframe,tmp->stat); + else + LOG_E(PHY, "Split FS6: impossible UL harq type\n"); + + tmp++; + } + } else + LOG_E(PHY, "FS6 ul packet type impossible\n" ); + } + + bufPtr+=alignedSize(bufPtr); + } +} + +void phy_procedures_eNB_uespec_RX_fromsplit(uint8_t *bufferZone, int nbBlocks,PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc) { + // The configuration arrived in Dl, so we can extract the UL data + ul_propagation_t ul_propa[NUMBER_OF_UE_MAX]; + recvFs6Ul(bufferZone, nbBlocks, eNB, ul_propa); + + // dirty memory allocation in OAI... + for (int i = 0; i < NUMBER_OF_UCI_VARS_MAX; i++) + if ( eNB->uci_vars[i].frame == proc->frame_rx && + eNB->uci_vars[i].subframe == proc->subframe_rx ) + eNB->uci_vars[i].active=0; + + pusch_procedures_fromsplit(bufferZone, nbBlocks, eNB, proc, ul_propa); +} + +void rcvFs6DL(uint8_t *bufferZone, int nbBlocks, PHY_VARS_eNB *eNB, int frame, int subframe) { + void *bufPtr=bufferZone; + + for (int i=0; i < nbBlocks; i++) { //nbBlocks is the actual received blocks + if ( ((commonUDP_t *)bufPtr)->contentBytes > sizeof(fs6_dl_t) ) { + int type=hDLUE(bufPtr)->type; + + if ( type == fs6DlConfig) { + int curUE=hDLUE(bufPtr)->UE_id; + LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[curUE][0]; + LTE_DL_eNB_HARQ_t *dlsch_harq=dlsch0->harq_processes[hDLUE(bufPtr)->harq_pid]; +#ifdef PHY_TX_THREAD + dlsch0->active[subframe] = 1; +#else + dlsch0->active = 1; +#endif + dlsch0->harq_ids[frame%2][subframe]=hDLUE(bufPtr)->harq_pid; + dlsch0->rnti=hDLUE(bufPtr)->rnti; + dlsch0->sqrt_rho_a=hDLUE(bufPtr)->sqrt_rho_a; + dlsch0->sqrt_rho_b=hDLUE(bufPtr)->sqrt_rho_b; + dlsch_harq->nb_rb=hDLUE(bufPtr)->nb_rb; + memcpy(dlsch_harq->rb_alloc, hDLUE(bufPtr)->rb_alloc, sizeof(hDLUE(bufPtr)->rb_alloc)); + dlsch_harq->Qm=hDLUE(bufPtr)->Qm; + dlsch_harq->Nl=hDLUE(bufPtr)->Nl; + dlsch_harq->pdsch_start=hDLUE(bufPtr)->pdsch_start; +#ifdef PHY_TX_THREAD + dlsch_harq->CEmode = hDLUE(bufPtr)->CEmode; + dlsch_harq->i0=hDLUE(bufPtr)->i0; + dlsch_harq->sib1_br_flag=hDLUE(bufPtr)->sib1_br_flag; +#else + dlsch0->i0=hDLUE(bufPtr)->i0; + dlsch0->sib1_br_flag=hDLUE(bufPtr)->sib1_br_flag; +#endif + fs6Dlunpack(dlsch_harq->eDL, + hDLUE(bufPtr)+1, hDLUE(bufPtr)->dataLen); + LOG_D(PHY,"received %d bits, in harq id: %di fsf: %d.%d, sum %d\n", + hDLUE(bufPtr)->dataLen, hDLUE(bufPtr)->harq_pid, frame, subframe, sum(dlsch_harq->eDL, hDLUE(bufPtr)->dataLen)); + } else if (type == fs6UlConfig) { + int nbUE=(((commonUDP_t *)bufPtr)->contentBytes - sizeof(fs6_dl_t)) / sizeof( fs6_dl_ulsched_t ) ; +#define cpyVal(a) memcpy(&ulsch_harq->a,&hTxULUE(bufPtr)->a, sizeof(ulsch_harq->a)) + + for ( int i=0; i < nbUE; i++ ) { + int curUE=hTxULUE(bufPtr)->UE_id; + LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[curUE]; + LTE_UL_eNB_HARQ_t *ulsch_harq=ulsch->harq_processes[hTxULUE(bufPtr)->harq_pid]; + ulsch->ue_type=hTxULUE(bufPtr)->ue_type; + ulsch->harq_mask=hTxULUE(bufPtr)->harq_mask; + ulsch->Mlimit=hTxULUE(bufPtr)->Mlimit; + ulsch->max_turbo_iterations=hTxULUE(bufPtr)->max_turbo_iterations; + ulsch->bundling=hTxULUE(bufPtr)->bundling; + ulsch->beta_offset_cqi_times8=hTxULUE(bufPtr)->beta_offset_cqi_times8; + ulsch->beta_offset_ri_times8=hTxULUE(bufPtr)->beta_offset_ri_times8; + ulsch->beta_offset_harqack_times8=hTxULUE(bufPtr)->beta_offset_harqack_times8; + ulsch->Msg3_active=hTxULUE(bufPtr)->Msg3_active; + ulsch->cyclicShift=hTxULUE(bufPtr)->cyclicShift; + ulsch->cooperation_flag=hTxULUE(bufPtr)->cooperation_flag; + ulsch->num_active_cba_groups=hTxULUE(bufPtr)->num_active_cba_groups; + memcpy(ulsch->cba_rnti,hTxULUE(bufPtr)->cba_rnti,sizeof(ulsch->cba_rnti));//NUM_MAX_CBA_GROUP]; + ulsch->rnti=hTxULUE(bufPtr)->rnti; + ulsch_harq->nb_rb=hTxULUE(bufPtr)->nb_rb; + ulsch_harq->handled=0; + ulsch_harq->status = ACTIVE; + ulsch_harq->frame = frame; + ulsch_harq->subframe = subframe; + ulsch_harq->first_rb=hTxULUE(bufPtr)->first_rb; + ulsch_harq->O_RI=hTxULUE(bufPtr)->O_RI; + ulsch_harq->Or1=hTxULUE(bufPtr)->Or1; + ulsch_harq->Msc_initial=hTxULUE(bufPtr)->Msc_initial; + ulsch_harq->Nsymb_initial=hTxULUE(bufPtr)->Nsymb_initial; + ulsch_harq->V_UL_DAI=hTxULUE(bufPtr)->V_UL_DAI; + ulsch_harq->Qm=hTxULUE(bufPtr)->Qm; + ulsch_harq->srs_active=hTxULUE(bufPtr)->srs_active; + ulsch_harq->TBS=hTxULUE(bufPtr)->TBS; + ulsch_harq->Nsymb_pusch=hTxULUE(bufPtr)->Nsymb_pusch; + cpyVal(dci_alloc); + cpyVal(rar_alloc); + cpyVal(status); + cpyVal(Msg3_flag); + cpyVal(phich_active); + cpyVal(phich_ACK); + cpyVal(previous_first_rb); + cpyVal(B); + cpyVal(G); + //cpyVal(o); + cpyVal(uci_format); + cpyVal(Or2); + cpyVal(o_RI); + cpyVal(o_ACK); + cpyVal(O_ACK); + //cpyVal(q); + cpyVal(o_RCC); + cpyVal(q_ACK); + cpyVal(q_RI); + cpyVal(RTC); + cpyVal(ndi); + cpyVal(round); + cpyVal(rvidx); + cpyVal(Nl); + cpyVal(n_DMRS); + cpyVal(previous_n_DMRS); + cpyVal(n_DMRS2); + cpyVal(delta_TF); + cpyVal(repetition_number ); + cpyVal(total_number_of_repetitions); + LOG_D(PHY,"Received request to perform ulsch for: rnti:%d, fsf: %d/%d, O_ACK: %d\n", + ulsch->rnti, frame, subframe, ulsch_harq->O_ACK); + } + } else if ( type == fs6ULConfigCCH ) { + fs6_dl_uespec_ulcch_element_t *tmp=(fs6_dl_uespec_ulcch_element_t *)(hTxULcch(bufPtr)+1); + + for (int i=0; i< hTxULcch(bufPtr)->nb_active_ue; i++ ) + memcpy(&eNB->uci_vars[tmp->UE_id], &tmp->cch_vars, sizeof(tmp->cch_vars)); + } else + LOG_E(PHY, "Impossible block in fs6 DL\n"); + } + + bufPtr+=alignedSize(bufPtr); + } +} + +void phy_procedures_eNB_TX_fromsplit(uint8_t *bufferZone, int nbBlocks, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int do_meas ) { + LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms; + int subframe=proc->subframe_tx; + int frame=proc->frame_tx; + //LTE_UL_eNB_HARQ_t *ulsch_harq; + eNB->pdcch_vars[subframe&1].num_pdcch_symbols=hDL(bufferZone)->num_pdcch_symbols; + eNB->pdcch_vars[subframe&1].num_dci=hDL(bufferZone)->num_dci; + uint8_t num_mdci = eNB->mpdcch_vars[subframe&1].num_dci = hDL(bufferZone)->num_mdci; + eNB->pbch_configured=true; + memcpy(eNB->pbch_pdu,hDL(bufferZone)->pbch_pdu, 4); + + // Remove all scheduled DL, we will populate from the CU sending + for (int UE_id=0; UE_id<NUMBER_OF_UE_MAX; UE_id++) { + LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[UE_id][0]; + + if ( dlsch0 && dlsch0->rnti>0 ) { +#ifdef PHY_TX_THREAD + dlsch0->active[subframe] = 0; +#else + dlsch0->active = 0; +#endif + } + } + + rcvFs6DL(bufferZone, nbBlocks, eNB, frame, subframe); + + if (do_meas==1) { + start_meas(&eNB->phy_proc_tx); + start_meas(&eNB->dlsch_common_and_dci); + } + + // clear the transmit data array for the current subframe + for (int aa = 0; aa < fp->nb_antenna_ports_eNB; aa++) { + memset (&eNB->common_vars.txdataF[aa][subframe * fp->ofdm_symbol_size * (fp->symbols_per_tti)], + 0, fp->ofdm_symbol_size * (fp->symbols_per_tti) * sizeof (int32_t)); + } + + if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { + if (is_pmch_subframe(frame,subframe,fp)) { + pmch_procedures(eNB,proc); + } else { + // this is not a pmch subframe, so generate PSS/SSS/PBCH + common_signal_procedures(eNB,frame, subframe); + } + } + + // clear previous allocation information for all UEs + for (int i = 0; i < NUMBER_OF_UE_MAX; i++) { + //if (eNB->dlsch[i][0]) + //eNB->dlsch[i][0]->subframe_tx[subframe] = 0; + } + + if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { + for (int i=0; i< hDL(bufferZone)->num_dci; i++) + eNB->pdcch_vars[subframe&1].dci_alloc[i]=hDL(bufferZone)->dci_alloc[i]; + + LOG_D (PHY, "Frame %d, subframe %d: Calling generate_dci_top (pdcch) (num_dci %" PRIu8 ")\n", frame, subframe, hDL(bufferZone)->num_dci); + generate_dci_top(hDL(bufferZone)->num_pdcch_symbols, + hDL(bufferZone)->num_dci, + &eNB->pdcch_vars[subframe&1].dci_alloc[0], + 0, + hDL(bufferZone)->amp, + fp, + eNB->common_vars.txdataF, + subframe); + + if (num_mdci > 0) { + LOG_D (PHY, "[eNB %" PRIu8 "] Frame %d, subframe %d: Calling generate_mdci_top (mpdcch) (num_dci %" PRIu8 ")\n", eNB->Mod_id, frame, subframe, num_mdci); + generate_mdci_top (eNB, frame, subframe, AMP, eNB->common_vars.txdataF); + } + } + + for (int UE_id=0; UE_id<NUMBER_OF_UE_MAX; UE_id++) { + LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[UE_id][0]; + LTE_eNB_DLSCH_t *dlsch1 = eNB->dlsch[UE_id][1]; + + if ((dlsch0)&&(dlsch0->rnti>0)&& +#ifdef PHY_TX_THREAD + (dlsch0->active[subframe] == 1) +#else + (dlsch0->active == 1) +#endif + ) { + uint64_t sum=0; + + for ( int i= subframe * fp->ofdm_symbol_size * (fp->symbols_per_tti); + i< (subframe+1) * fp->ofdm_symbol_size * (fp->symbols_per_tti); + i++) + sum+=((int32_t *)(eNB->common_vars.txdataF[0]))[i]; + + LOG_D(PHY,"frame: %d, subframe: %d, sum of dlsch mod v1: %lx\n", frame, subframe, sum); + int harq_pid=dlsch0->harq_ids[frame%2][subframe]; + pdsch_procedures(eNB, + proc, + harq_pid, + dlsch0, + dlsch1); + } + } + + eNB->phich_vars[subframe&1]=hDL(bufferZone)->phich_vars; + generate_phich_top(eNB, + proc, + AMP); +} + +#define cpyToDu(a) hTxULUE(newUDPheader)->a=ulsch->a +#define cpyToDuHarq(a) hTxULUE(newUDPheader)->a=ulsch_harq->a +#define memcpyToDuHarq(a) memcpy(&hTxULUE(newUDPheader)->a,&ulsch_harq->a, sizeof(ulsch_harq->a)); + +void appendFs6TxULUE(uint8_t *bufferZone, LTE_DL_FRAME_PARMS *fp, int curUE, LTE_eNB_ULSCH_t *ulsch, int frame, int subframe) { + commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone; + // move to the end + uint8_t *firstFreeByte=bufferZone; + int curBlock=0; + + for (int i=0; i < FirstUDPheader->nbBlocks; i++) { + AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,""); + firstFreeByte+=alignedSize(firstFreeByte); + curBlock++; + } + + commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte; + FirstUDPheader->nbBlocks++; + newUDPheader->blockID=curBlock; + newUDPheader->contentBytes=sizeof(fs6_dl_t)+sizeof(fs6_dl_ulsched_t); + // We skip the fs6 DL header, that is populated by caller + // This header will be duplicated during sending + hTxULUE(newUDPheader)->type=fs6UlConfig; + hTxULUE(newUDPheader)->UE_id=curUE; + int harq_pid; + + if (ulsch->ue_type > NOCE) + // LTE-M case + harq_pid = 0; + else + harq_pid = subframe2harq_pid(fp, frame, subframe); + + LTE_UL_eNB_HARQ_t *ulsch_harq=ulsch->harq_processes[harq_pid]; + hTxULUE(newUDPheader)->harq_pid=harq_pid; + cpyToDu(ue_type); + cpyToDu(harq_mask); + cpyToDu(Mlimit); + cpyToDu(max_turbo_iterations); + cpyToDu(bundling); + cpyToDu(beta_offset_cqi_times8); + cpyToDu(beta_offset_ri_times8); + cpyToDu(beta_offset_harqack_times8); + cpyToDu(Msg3_active); + cpyToDu(cyclicShift); + cpyToDu(cooperation_flag); + cpyToDu(num_active_cba_groups); + memcpy(hTxULUE(newUDPheader)->cba_rnti,ulsch->cba_rnti,sizeof(ulsch->cba_rnti));//NUM_MAX_CBA_GROUP]; + cpyToDu(rnti); + cpyToDuHarq(nb_rb); + cpyToDuHarq(Msc_initial); + cpyToDuHarq(Nsymb_initial); + cpyToDuHarq(O_RI); + cpyToDuHarq(Or1); + cpyToDuHarq(first_rb); + cpyToDuHarq(V_UL_DAI); + cpyToDuHarq(Qm); + cpyToDuHarq(srs_active); + cpyToDuHarq(TBS); + cpyToDuHarq(Nsymb_pusch); + memcpyToDuHarq(dci_alloc); + memcpyToDuHarq(rar_alloc); + memcpyToDuHarq(status); + memcpyToDuHarq(Msg3_flag); + memcpyToDuHarq(phich_active); + memcpyToDuHarq(phich_ACK); + memcpyToDuHarq(previous_first_rb); + memcpyToDuHarq(B); + memcpyToDuHarq(G); + //memcpyToDuHarq(o); + memcpyToDuHarq(uci_format); + memcpyToDuHarq(Or2); + memcpyToDuHarq(o_RI); + memcpyToDuHarq(o_ACK); + memcpyToDuHarq(O_ACK); + //memcpyToDuHarq(q); + memcpyToDuHarq(o_RCC); + memcpyToDuHarq(q_ACK); + memcpyToDuHarq(q_RI); + memcpyToDuHarq(RTC); + memcpyToDuHarq(ndi); + memcpyToDuHarq(round); + memcpyToDuHarq(rvidx); + memcpyToDuHarq(Nl); + memcpyToDuHarq(n_DMRS); + memcpyToDuHarq(previous_n_DMRS); + memcpyToDuHarq(n_DMRS2); + memcpyToDuHarq(delta_TF); + memcpyToDuHarq(repetition_number ); + memcpyToDuHarq(total_number_of_repetitions); + LOG_D(PHY,"Added request to perform ulsch for: rnti:%x, fsf: %d/%d\n", ulsch->rnti, frame, subframe); +} + +void appendFs6DLUE(uint8_t *bufferZone, LTE_DL_FRAME_PARMS *fp, int UE_id, int8_t harq_pid, LTE_eNB_DLSCH_t *dlsch0, LTE_DL_eNB_HARQ_t *harqData, int frame, int subframe) { + commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone; + // move to the end + uint8_t *firstFreeByte=bufferZone; + int curBlock=0; + + for (int i=0; i < FirstUDPheader->nbBlocks; i++) { + AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,""); + firstFreeByte+=alignedSize(firstFreeByte); + curBlock++; + } + + int UEdataLen= get_G(fp, + harqData->nb_rb, + harqData->rb_alloc, + harqData->Qm, + harqData->Nl, + harqData->pdsch_start, + frame,subframe, + 0); + AssertFatal(firstFreeByte+ceil16_bytes(UEdataLen)+sizeof(fs6_dl_t) <= bufferZone+FS6_BUF_SIZE, ""); + commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte; + FirstUDPheader->nbBlocks++; + newUDPheader->blockID=curBlock; + newUDPheader->contentBytes=sizeof(fs6_dl_t)+sizeof(fs6_dl_uespec_t) + ceil16_bytes(UEdataLen); + // We skip the fs6 DL header, that is populated by caller + // This header will be duplicated during sending + hDLUE(newUDPheader)->type=fs6DlConfig; + hDLUE(newUDPheader)->UE_id=UE_id; + hDLUE(newUDPheader)->harq_pid=harq_pid; + hDLUE(newUDPheader)->rnti=dlsch0->rnti; + hDLUE(newUDPheader)->sqrt_rho_a=dlsch0->sqrt_rho_a; + hDLUE(newUDPheader)->sqrt_rho_b=dlsch0->sqrt_rho_b; + hDLUE(newUDPheader)->nb_rb=harqData->nb_rb; + memcpy(hDLUE(newUDPheader)->rb_alloc, harqData->rb_alloc, sizeof(harqData->rb_alloc)); + hDLUE(newUDPheader)->Qm=harqData->Qm; + hDLUE(newUDPheader)->Nl=harqData->Nl; + hDLUE(newUDPheader)->pdsch_start=harqData->pdsch_start; +#ifdef PHY_TX_THREAD + hDLUE(newUDPheader)->CEmode=harqData->CEmode; + hDLUE(newUDPheader)->i0=harqData->i0; + hDLUE(newUDPheader)->sib1_br_flag=harqData->sib1_br_flag; +#else + hDLUE(newUDPheader)->i0=dlsch0->i0; + hDLUE(newUDPheader)->sib1_br_flag=dlsch0->sib1_br_flag; +#endif + hDLUE(newUDPheader)->dataLen=UEdataLen; + fs6Dlpack(hDLUE(newUDPheader)+1, harqData->eDL, UEdataLen); + LOG_D(PHY,"sending %d bits, in harq id: %di fsf: %d.%d, sum %d\n", + UEdataLen, harq_pid, frame, subframe, sum(harqData->eDL, UEdataLen)); + //for (int i=0; i < UEdataLen; i++) + //LOG_D(PHY,"buffer ei[%d]:%hhx\n", i, ( (uint8_t *)(hDLUE(newUDPheader)+1) )[i]); +} + +void appendFs6DLUEcch(uint8_t *bufferZone, PHY_VARS_eNB *eNB, int frame, int subframe) { + commonUDP_t *FirstUDPheader=(commonUDP_t *) bufferZone; + // move to the end + uint8_t *firstFreeByte=bufferZone; + int curBlock=0; + + for (int i=0; i < FirstUDPheader->nbBlocks; i++) { + AssertFatal( ((commonUDP_t *) firstFreeByte)->blockID==curBlock,""); + firstFreeByte+=alignedSize(firstFreeByte); + curBlock++; + } + + commonUDP_t *newUDPheader=(commonUDP_t *) firstFreeByte; + bool first_UE=true; + + for (int i = 0; i < NUMBER_OF_UCI_VARS_MAX; i++) { + LTE_eNB_UCI *uci = &(eNB->uci_vars[i]); + + if ((uci->active == 1) && (uci->frame == frame) && (uci->subframe == subframe)) { + LOG_D(PHY,"Frame %d, subframe %d: adding uci procedures (type %d) for %d \n", + frame, + subframe, + uci->type, + i); + + if ( first_UE ) { + FirstUDPheader->nbBlocks++; + newUDPheader->blockID=curBlock; + newUDPheader->contentBytes=sizeof(fs6_dl_t)+sizeof(fs6_dl_uespec_ulcch_t); + hTxULcch(newUDPheader)->type=fs6ULConfigCCH; + hTxULcch(newUDPheader)->nb_active_ue=0; + first_UE=false; + } + + fs6_dl_uespec_ulcch_element_t *tmp=(fs6_dl_uespec_ulcch_element_t *)(hTxULcch(newUDPheader)+1); + tmp+=hTxULcch(newUDPheader)->nb_active_ue; + tmp->UE_id=i; + memcpy(&tmp->cch_vars,uci, sizeof(tmp->cch_vars)); + hTxULcch(newUDPheader)->nb_active_ue++; + newUDPheader->contentBytes+=sizeof(fs6_dl_uespec_ulcch_element_t); + } + } +} + +void phy_procedures_eNB_TX_tosplit(uint8_t *bufferZone, PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int do_meas, uint8_t *buf, int bufSize) { + int frame=proc->frame_tx; + int subframe=proc->subframe_tx; + LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms; + + if ((fp->frame_type == TDD) && (subframe_select (fp, subframe) == SF_UL)) { + LOG_W(HW,"no sending in eNB_TX\n"); + return; + } + + // clear previous allocation information for all UEs + for (int i = 0; i < NUMBER_OF_UE_MAX; i++) { + //if (eNB->dlsch[i][0]) + //eNB->dlsch[i][0]->subframe_tx[subframe] = 0; + } + + // Send to DU the UL scheduled for future UL subframe + for (int i=0; i<NUMBER_OF_UE_MAX; i++) { + int harq_pid; + LTE_eNB_ULSCH_t *ulsch = eNB->ulsch[i]; + if (ulsch == NULL) + continue; + + if (ulsch->ue_type > NOCE) + harq_pid = 0; + else + harq_pid= subframe2harq_pid(&eNB->frame_parms,frame,subframe); + + LTE_UL_eNB_HARQ_t *ulsch_harq = ulsch->harq_processes[harq_pid]; + + if (ulsch->rnti>0) { + LOG_D(PHY,"check in UL scheduled harq %d: rnti %d, tx frame %d/%d, ulsch: %d, %d/%d (handled: %d)\n", + harq_pid, ulsch->rnti, frame, subframe, ulsch_harq->status, ulsch_harq->frame, ulsch_harq->subframe, ulsch_harq->handled); + } + + for (int k=0; k<8; k++) { + ulsch_harq = ulsch->harq_processes[k]; + if (ulsch_harq == NULL) + continue; + + if ((ulsch->rnti>0) && + (ulsch_harq->status == ACTIVE) && + (ulsch_harq->frame == frame) && + (ulsch_harq->subframe == subframe) && + (ulsch_harq->handled == 0) + ) + appendFs6TxULUE(bufferZone, + fp, + i, + ulsch, + frame, + subframe + ); + } + } + + appendFs6DLUEcch(bufferZone, + eNB, + frame, + subframe + ); + uint8_t num_pdcch_symbols = eNB->pdcch_vars[subframe&1].num_pdcch_symbols; + uint8_t num_dci = eNB->pdcch_vars[subframe&1].num_dci; + uint8_t num_mdci = eNB->mpdcch_vars[subframe&1].num_dci; + memcpy(hDL(bufferZone)->pbch_pdu,eNB->pbch_pdu,4); + + if ( num_dci <= 8 ) + LOG_D(PHY,"num_pdcch_symbols %"PRIu8",number dci %"PRIu8"\n",num_pdcch_symbols, num_dci); + else { + LOG_E(PHY, "Num dci too large for current FS6 implementation, reducing to 8 dci (was %d)\n", num_dci); + num_dci=8; + } + + if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { + hDL(bufferZone)->num_pdcch_symbols=num_pdcch_symbols; + hDL(bufferZone)->num_dci=num_dci; + hDL(bufferZone)->num_mdci=num_mdci; + hDL(bufferZone)->amp=AMP; + + for (int i=0; i< hDL(bufferZone)->num_dci; i++) + hDL(bufferZone)->dci_alloc[i]=eNB->pdcch_vars[subframe&1].dci_alloc[i]; + + LOG_D(PHY, "pbch configured: %d\n", eNB->pbch_configured); + } + + if (do_meas==1) stop_meas(&eNB->dlsch_common_and_dci); + + if (do_meas==1) start_meas(&eNB->dlsch_ue_specific); + + for (int UE_id=0; UE_id<NUMBER_OF_UE_MAX; UE_id++) { + LTE_eNB_DLSCH_t *dlsch0 = eNB->dlsch[UE_id][0]; + + if ((dlsch0)&&(dlsch0->rnti>0)&& +#ifdef PHY_TX_THREAD + (dlsch0->active[subframe] == 1) +#else + (dlsch0->active == 1) +#endif + ) { + // get harq_pid + int harq_pid = dlsch0->harq_ids[frame%2][subframe]; + AssertFatal(harq_pid>=0,"harq_pid is negative\n"); + + if (harq_pid>=8) { + if (dlsch0->ue_type == NOCE) + LOG_E(PHY,"harq_pid:%d corrupt must be 0-7 UE_id:%d frame:%d subframe:%d rnti:%x [ %1d.%1d.%1d.%1d.%1d.%1d.%1d.%1d\n", harq_pid,UE_id,frame,subframe,dlsch0->rnti, + dlsch0->harq_ids[frame%2][0], + dlsch0->harq_ids[frame%2][1], + dlsch0->harq_ids[frame%2][2], + dlsch0->harq_ids[frame%2][3], + dlsch0->harq_ids[frame%2][4], + dlsch0->harq_ids[frame%2][5], + dlsch0->harq_ids[frame%2][6], + dlsch0->harq_ids[frame%2][7]); + } else { + if (dlsch_procedures(eNB, + proc, + harq_pid, + dlsch0, + &eNB->UE_stats[(uint32_t)UE_id])) { + // data in: dlsch0 harq_processes[harq_pid]->e + /* length + get_G(fp, + dlsch_harq->nb_rb, + dlsch_harq->rb_alloc, + dlsch_harq->Qm, + dlsch_harq->Nl, + dlsch_harq->pdsch_start, + frame,subframe, + 0) + need harq_pid + */ + LTE_DL_eNB_HARQ_t *dlsch_harq=dlsch0->harq_processes[harq_pid]; + appendFs6DLUE(bufferZone, + fp, + UE_id, + harq_pid, + dlsch0, + dlsch_harq, + frame, + subframe + ); + } + } + } else if ((dlsch0)&&(dlsch0->rnti>0)&& +#ifdef PHY_TX_THREAD + (dlsch0->active[subframe] == 0) +#else + (dlsch0->active == 0) +#endif + ) { + // clear subframe TX flag since UE is not scheduled for PDSCH in this subframe (so that we don't look for PUCCH later) + //dlsch0->subframe_tx[subframe]=0; + } + } + + hDL(bufferZone)->phich_vars=eNB->phich_vars[subframe&1]; + + if (do_meas==1) stop_meas(&eNB->dlsch_ue_specific); + + if (do_meas==1) stop_meas(&eNB->phy_proc_tx); + + // MBMS is not working in OAI + if (hDL(bufferZone)->num_mdci) abort(); + + return; +} + +void *DL_du_fs6(void *arg) { + RU_t *ru=(RU_t *)arg; + static uint64_t lastTS; + L1_rxtx_proc_t L1proc= {0}; + // We pick the global thread pool from the legacy code global vars + L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool; + L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode; + L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode; + initStaticTime(begingWait); + initStaticTime(begingProcessing); + initRefTimes(fullLoop); + initRefTimes(DuHigh); + initRefTimes(DuLow); + initRefTimes(transportTime); + + while (1) { + for (int i=0; i<ru->num_eNB; i++) { + initBufferZone(bufferZone); + pickStaticTime(begingWait); + int nb_blocks=receiveSubFrame(&sockFS6, bufferZone, sizeof(bufferZone), CTsentCUv0 ); + updateTimesReset(begingWait, &fullLoop, 1000, false, "DU wait CU"); + + if (nb_blocks > 0) { + if ( lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti < hUDP(bufferZone)->timestamp) { + LOG_E(HW,"Missed a subframe: expecting: %lu, received %lu\n", + lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti, + hUDP(bufferZone)->timestamp); + } else if ( lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti > hUDP(bufferZone)->timestamp) { + LOG_E(HW,"Received a subframe in past time from CU (dropping it): expecting: %lu, received %lu\n", + lastTS+ru->eNB_list[i]->frame_parms.samples_per_tti, + hUDP(bufferZone)->timestamp); + } + + pickStaticTime(begingProcessing); + lastTS=hUDP(bufferZone)->timestamp; + setAllfromTS(hUDP(bufferZone)->timestamp - sf_ahead*ru->eNB_list[i]->frame_parms.samples_per_tti, &L1proc); + measTransportTime(hDL(bufferZone)->DuClock, hDL(bufferZone)->CuSpentMicroSec, + &transportTime, 1000, false, "Transport time, to CU + from CU for one subframe"); + phy_procedures_eNB_TX_fromsplit( bufferZone, nb_blocks, ru->eNB_list[i], &L1proc, 1); + updateTimesReset(begingProcessing, &DuHigh, 1000, false, "DU high layer1 processing for DL"); + } else + LOG_E(PHY,"DL not received for subframe\n"); + } + + pickStaticTime(begingProcessing); + feptx_prec(ru, L1proc.frame_tx,L1proc.subframe_tx ); + feptx_ofdm(ru, L1proc.frame_tx,L1proc.subframe_tx ); + ocp_tx_rf(ru, &L1proc); + updateTimesReset(begingProcessing, &DuLow, 1000, false, "DU low layer1 processing for DL"); + + if ( IS_SOFTMODEM_RFSIM ) + return NULL; + } + + return NULL; +} + +void UL_du_fs6(RU_t *ru, L1_rxtx_proc_t *proc) { + initStaticTime(begingWait); + initRefTimes(fullLoop); + pickStaticTime(begingWait); + rx_rf(ru, proc); + updateTimesReset(begingWait, &fullLoop, 1000, false, "DU wait USRP"); + // front end processing: convert from time domain to frequency domain + // fills rxdataF buffer + fep_full(ru, proc->subframe_rx); + // Fixme: datamodel issue + PHY_VARS_eNB *eNB = RC.eNB[0][0]; + + if (NFAPI_MODE==NFAPI_MODE_PNF) { + // I am a PNF and I need to let nFAPI know that we have a (sub)frame tick + //add_subframe(&frame, &subframe, 4); + //oai_subframe_ind(proc->frame_tx, proc->subframe_tx); + oai_subframe_ind(proc->frame_rx, proc->subframe_rx); + } + + initBufferZone(bufferZone); + hUDP(bufferZone)->timestamp=proc->timestamp_rx; + prach_eNB_tosplit(bufferZone, FS6_BUF_SIZE, eNB, proc ); + + if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { + phy_procedures_eNB_uespec_RX_tosplit(bufferZone, FS6_BUF_SIZE, eNB, proc ); + } + + if (hUDP(bufferZone)->nbBlocks==0) { + hUDP(bufferZone)->nbBlocks=1; // We have to send the signaling, even is there is no user plan data (no UE) + hUDP(bufferZone)->blockID=0; + hUDP(bufferZone)->contentBytes=sizeof(fs6_ul_t); + } + + for (int i=0; i<ru->num_eNB; i++) { + sendSubFrame(&sockFS6, bufferZone, sizeof(fs6_ul_t), CTsentDUv0); + } +} + +void DL_cu_fs6(RU_t *ru, L1_rxtx_proc_t *proc, uint64_t DuClock, uint64_t startCycle) { + initRefTimes(CUprocessing); + // Fixme: datamodel issue + PHY_VARS_eNB *eNB = RC.eNB[0][0]; + pthread_mutex_lock(&eNB->UL_INFO_mutex); + eNB->UL_INFO.frame = proc->frame_rx; + eNB->UL_INFO.subframe = proc->subframe_rx; + eNB->UL_INFO.module_id = eNB->Mod_id; + eNB->UL_INFO.CC_id = eNB->CC_id; + eNB->if_inst->UL_indication(&eNB->UL_INFO, proc); + pthread_mutex_unlock(&eNB->UL_INFO_mutex); + initBufferZone(bufferZone); + phy_procedures_eNB_TX_tosplit(bufferZone, eNB, proc, 1, bufferZone, FS6_BUF_SIZE); + hUDP(bufferZone)->timestamp=proc->timestamp_tx; + + if (hUDP(bufferZone)->nbBlocks==0) { + hUDP(bufferZone)->nbBlocks=1; // We have to send the signaling, even is there is no user plan data (no UE) + hUDP(bufferZone)->blockID=0; + hUDP(bufferZone)->contentBytes=sizeof(fs6_dl_t); + } + + hDL(bufferZone)->DuClock=DuClock; + hDL(bufferZone)->CuSpentMicroSec=(rdtsc()-startCycle)/(cpuf*1000); + updateTimesReset(startCycle, &CUprocessing, 1000, true,"CU entire processing from recv to send"); + sendSubFrame(&sockFS6, bufferZone, sizeof(fs6_dl_t), CTsentCUv0 ); + return; +} + +void UL_cu_fs6(RU_t *ru, L1_rxtx_proc_t *proc, uint64_t *TS, uint64_t *DuClock, uint64_t *startProcessing) { + initBufferZone(bufferZone); + initStaticTime(begingWait); + initRefTimes(fullLoop); + pickStaticTime(begingWait); + int nb_blocks=receiveSubFrame(&sockFS6, bufferZone, sizeof(bufferZone), CTsentDUv0 ); + * DuClock=hUDP(bufferZone)->senderClock; + * startProcessing=rdtsc(); + updateTimesReset(begingWait, &fullLoop, 1000, false, "CU wait DU"); + + if (nb_blocks ==0) { + LOG_W(PHY, "CU lost a subframe\n"); + return; + } + + if (nb_blocks != hUDP(bufferZone)->nbBlocks ) + LOG_W(PHY, "received %d blocks for %d expected\n", nb_blocks, hUDP(bufferZone)->nbBlocks); + + if ( *TS != hUDP(bufferZone)->timestamp ) { + LOG_W(HW, "CU received time: %lu instead of %lu expected\n", hUDP(bufferZone)->timestamp, *TS); + *TS=hUDP(bufferZone)->timestamp; + } + + setAllfromTS(hUDP(bufferZone)->timestamp, proc); + PHY_VARS_eNB *eNB = RC.eNB[0][0]; + + if (is_prach_subframe(&eNB->frame_parms, proc->frame_prach,proc->subframe_prach)>0) + prach_eNB_fromsplit(bufferZone, sizeof(bufferZone), eNB, proc); + + release_UE_in_freeList(eNB->Mod_id); + + if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { + phy_procedures_eNB_uespec_RX_fromsplit(bufferZone, nb_blocks, eNB, proc); + } +} + +void *cu_fs6(void *arg) { + setbuf(stdout, NULL); + setbuf(stderr, NULL); + RU_t *ru = (RU_t *)arg; + //RU_proc_t *proc = &ru->proc; + fill_rf_config(ru,ru->rf_config_file); + init_frame_parms(ru->frame_parms,1); + phy_init_RU(ru); + wait_sync("ru_thread"); + char remoteIP[1024]; + strncpy(remoteIP,get_softmodem_params()->split73+3, 1023); //three first char should be cu: or du: + char port_def[256]=DU_PORT; + + for (int i=0; i <1000; i++) + if (remoteIP[i]==':') { + strncpy(port_def,remoteIP+i+1,255); + remoteIP[i]=0; + break; + } + + AssertFatal(createUDPsock(NULL, CU_PORT, remoteIP, port_def, &sockFS6), ""); + L1_rxtx_proc_t L1proc= {0}; + // We pick the global thread pool from the legacy code global vars + L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool; + L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode; + L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode; + uint64_t timeStamp=0; + initStaticTime(begingWait); + initStaticTime(begingWait2); + initRefTimes(waitDUAndProcessingUL); + initRefTimes(makeSendDL); + initRefTimes(fullLoop); + uint64_t DuClock=0, startProcessing=0; + + while(1) { + timeStamp+=ru->frame_parms->samples_per_tti; + updateTimesReset(begingWait, &fullLoop, 1000, true, "CU for full SubFrame (must be less 1ms)"); + pickStaticTime(begingWait); + UL_cu_fs6(ru, &L1proc, &timeStamp, &DuClock, &startProcessing); + updateTimesReset(begingWait, &waitDUAndProcessingUL, 1000, true,"CU Time in wait Rx + Ul processing"); + pickStaticTime(begingWait2); + DL_cu_fs6(ru, &L1proc, DuClock, startProcessing); + updateTimesReset(begingWait2, &makeSendDL, 1000, true,"CU Time in DL build+send"); + } + + return NULL; +} + +void *du_fs6(void *arg) { + setbuf(stdout, NULL); + setbuf(stderr, NULL); + RU_t *ru = (RU_t *)arg; + //RU_proc_t *proc = &ru->proc; + fill_rf_config(ru,ru->rf_config_file); + init_frame_parms(ru->frame_parms,1); + phy_init_RU(ru); + init_rf(ru); + wait_sync("ru_thread"); + char remoteIP[1024]; + strncpy(remoteIP,get_softmodem_params()->split73+3,1023); //three first char should be cu: or du: + char port_def[256]=CU_PORT; + + for (int i=0; i <1000; i++) + if (remoteIP[i]==':') { + strncpy(port_def,remoteIP+i+1,255); + remoteIP[i]=0; + break; + } + + AssertFatal(createUDPsock(NULL, DU_PORT, remoteIP, port_def, &sockFS6), ""); + + if (ru->rfdevice.trx_start_func(&ru->rfdevice) != 0) + LOG_E(HW,"Could not start the RF device\n"); + else + LOG_I(PHY,"RU %d rf device ready\n",ru->idx); + + initStaticTime(begingWait); + initRefTimes(waitRxAndProcessingUL); + initRefTimes(fullLoop); + pthread_t t; + + if ( !IS_SOFTMODEM_RFSIM ) + threadCreate(&t, DL_du_fs6, (void *)ru, "MainDuTx", -1, OAI_PRIORITY_RT_MAX); + + L1_rxtx_proc_t L1proc= {0}; + // We pick the global thread pool from the legacy code global vars + L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool; + L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode; + L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode; + + while(!oai_exit) { + updateTimesReset(begingWait, &fullLoop, 1000, true,"DU for full SubFrame (must be less 1ms)"); + pickStaticTime(begingWait); + UL_du_fs6(ru, &L1proc); + + if ( IS_SOFTMODEM_RFSIM ) + DL_du_fs6((void *)ru); + + updateTimesReset(begingWait, &waitRxAndProcessingUL, 1000, true,"DU Time in wait Rx + Ul processing"); + } + + ru->rfdevice.trx_end_func(&ru->rfdevice); + LOG_I(PHY,"RU %d rf device stopped\n",ru->idx); + return NULL; +} diff --git a/executables/main-ocp.c b/executables/main-ocp.c new file mode 100644 index 0000000000000000000000000000000000000000..d2ee9418dcaa2efb4059324a1e177d25f27967fe --- /dev/null +++ b/executables/main-ocp.c @@ -0,0 +1,1401 @@ +/* +* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The OpenAirInterface Software Alliance licenses this file to You under +* the OAI Public License, Version 1.1 (the "License"); you may not use this file +* except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.openairinterface.org/?page_id=698 +* +* Author and copyright: Laurent Thomas, open-cells.com +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*------------------------------------------------------------------------------- +* For more information about the OpenAirInterface (OAI) Software Alliance: +* contact@openairinterface.org +*/ + + +/* + * This file replaces + * targets/RT/USER/lte-softmodem.c + * targets/RT/USER/rt_wrapper.c + * targets/RT/USER/lte-ru.c + * targets/RT/USER/lte-enb.c + * targets/RT/USER/ru_control.c + * openair1/SCHED/prach_procedures.c + * The merger of OpenAir central code to this branch + * should check if these 3 files are modified and analyze if code code has to be copied in here + */ +#define _GNU_SOURCE +#include <pthread.h> + +#include <common/utils/LOG/log.h> +#include <common/utils/system.h> +#include <common/utils/assertions.h> +static int DEFBANDS[] = {7}; +static int DEFENBS[] = {0}; +#include <common/config/config_userapi.h> +#include <targets/RT/USER/lte-softmodem.h> +#include <openair1/PHY/defs_eNB.h> +#include <openair1/PHY/phy_extern.h> +#include <nfapi/oai_integration/vendor_ext.h> +#include <openair1/SCHED/fapi_l1.h> +#include <openair1/PHY/INIT/phy_init.h> +#include <openair2/LAYER2/MAC/mac_extern.h> +#include <openair1/PHY/LTE_REFSIG/lte_refsig.h> +#include <nfapi/oai_integration/nfapi_pnf.h> +#include <executables/split_headers.h> +#include <common/utils/threadPool/thread-pool.h> +#include <openair2/ENB_APP/NB_IoT_interface.h> +#include <common/utils/load_module_shlib.h> +#include <targets/COMMON/create_tasks.h> +#include <openair1/PHY/TOOLS/phy_scope_interface.h> +#include <openair2/UTIL/OPT/opt.h> +#include <openair1/SIMULATION/TOOLS/sim.h> +#include <openair1/PHY/phy_vars.h> +#include <openair1/SCHED/sched_common_vars.h> +#include <openair2/LAYER2/MAC/mac_vars.h> +#include <openair2/RRC/LTE/rrc_vars.h> + +pthread_cond_t nfapi_sync_cond; +pthread_mutex_t nfapi_sync_mutex; +int nfapi_sync_var=-1; //!< protected by mutex \ref nfapi_sync_mutex +pthread_cond_t sync_cond; +pthread_mutex_t sync_mutex; +int sync_var=-1; //!< protected by mutex \ref sync_mutex. +int config_sync_var=-1; +volatile int oai_exit = 0; +double cpuf; +uint16_t sf_ahead=4; +int otg_enabled; +uint64_t downlink_frequency[MAX_NUM_CCs][4]; +int32_t uplink_frequency_offset[MAX_NUM_CCs][4]; +int split73; +char * split73_config; +int split73; + +static void *ru_thread( void *param ); +void kill_RU_proc(RU_t *ru) { +} +void kill_eNB_proc(int inst) { +} +void free_transport(PHY_VARS_eNB *eNB) { +} +void reset_opp_meas(void) { +} +extern void phy_free_RU(RU_t *); + +void exit_function(const char *file, const char *function, const int line, const char *s) { + if (s != NULL) { + printf("%s:%d %s() Exiting OAI softmodem: %s\n",file,line, function, s); + } + + close_log_mem(); + oai_exit = 1; + + if (RC.ru == NULL) + exit(-1); // likely init not completed, prevent crash or hang, exit now... + + for (int ru_id=0; ru_id<RC.nb_RU; ru_id++) { + if (RC.ru[ru_id] && RC.ru[ru_id]->rfdevice.trx_end_func) { + RC.ru[ru_id]->rfdevice.trx_end_func(&RC.ru[ru_id]->rfdevice); + RC.ru[ru_id]->rfdevice.trx_end_func = NULL; + } + + if (RC.ru[ru_id] && RC.ru[ru_id]->ifdevice.trx_end_func) { + RC.ru[ru_id]->ifdevice.trx_end_func(&RC.ru[ru_id]->ifdevice); + RC.ru[ru_id]->ifdevice.trx_end_func = NULL; + } + } + + sleep(1); //allow lte-softmodem threads to exit first + exit(1); +} + +// Fixme: there are many mistakes in the datamodel and in redondant variables +// TDD is also mode complex +void setAllfromTS(uint64_t TS, L1_rxtx_proc_t *proc) { + for (int i=0; i < RC.nb_inst; i++) { + for (int j=0; j<RC.nb_CC[i]; j++) { + LTE_DL_FRAME_PARMS *fp=&RC.eNB[i][j]->frame_parms; + uint64_t TStx=TS+(sf_ahead)*fp->samples_per_tti; + uint64_t TSrach=TS;//-fp->samples_per_tti; + proc->timestamp_rx= TS; + proc->timestamp_tx= TStx; + proc->subframe_rx= (TS / fp->samples_per_tti)%10; + proc->subframe_prach=(TSrach / fp->samples_per_tti)%10; + proc->subframe_prach_br=(TSrach / fp->samples_per_tti)%10; + proc->frame_rx= (TS / (fp->samples_per_tti*10))&1023; + proc->frame_prach= (TSrach / (fp->samples_per_tti*10))&1023; + proc->frame_prach_br=(TSrach / (fp->samples_per_tti*10))&1023; + proc->frame_tx= (TStx / (fp->samples_per_tti*10))&1023; + proc->subframe_tx= (TStx / fp->samples_per_tti)%10; + } + } + + return; +} + +void init_RU_proc(RU_t *ru) { + pthread_t t; + + switch(split73) { + case SPLIT73_CU: + threadCreate(&t, cu_fs6, (void *)ru, "MainCu", -1, OAI_PRIORITY_RT_MAX); + break; + case SPLIT73_DU: + threadCreate(&t, du_fs6, (void *)ru, "MainDuRx", -1, OAI_PRIORITY_RT_MAX); + break; + default: + threadCreate(&t, ru_thread, (void *)ru, "MainRu", -1, OAI_PRIORITY_RT_MAX); + } +} + +// Create per UE structures +void init_transport(PHY_VARS_eNB *eNB) { + LTE_DL_FRAME_PARMS *fp = &eNB->frame_parms; + LOG_I(PHY, "Initialise transport\n"); + + for (int i=0; i<NUMBER_OF_UE_MAX; i++) { + LOG_D(PHY,"Allocating Transport Channel Buffers for DLSCH, UE %d\n",i); + + for (int j=0; j<2; j++) { + AssertFatal( (eNB->dlsch[i][j] = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL,0,fp)) != NULL, + "Can't get eNB dlsch structures for UE %d \n", i); + eNB->dlsch[i][j]->rnti=0; + LOG_D(PHY,"dlsch[%d][%d] => %p rnti:%d\n",i,j,eNB->dlsch[i][j], eNB->dlsch[i][j]->rnti); + } + + LOG_D(PHY,"Allocating Transport Channel Buffer for ULSCH, UE %d\n",i); + AssertFatal((eNB->ulsch[1+i] = new_eNB_ulsch(MAX_TURBO_ITERATIONS,fp->N_RB_UL, 0)) != NULL, + "Can't get eNB ulsch structures\n"); + // this is the transmission mode for the signalling channels + // this will be overwritten with the real transmission mode by the RRC once the UE is connected + eNB->transmission_mode[i] = fp->nb_antenna_ports_eNB==1 ? 1 : 2; + } + + // ULSCH for RA + AssertFatal( (eNB->ulsch[0] = new_eNB_ulsch(MAX_TURBO_ITERATIONS, fp->N_RB_UL, 0)) !=NULL, + "Can't get eNB ulsch structures\n"); + eNB->dlsch_SI = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL, 0, fp); + LOG_D(PHY,"eNB %d.%d : SI %p\n",eNB->Mod_id,eNB->CC_id,eNB->dlsch_SI); + eNB->dlsch_ra = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL, 0, fp); + LOG_D(PHY,"eNB %d.%d : RA %p\n",eNB->Mod_id,eNB->CC_id,eNB->dlsch_ra); + eNB->dlsch_MCH = new_eNB_dlsch(1,8,NSOFT,fp->N_RB_DL, 0, fp); + LOG_D(PHY,"eNB %d.%d : MCH %p\n",eNB->Mod_id,eNB->CC_id,eNB->dlsch_MCH); + eNB->rx_total_gain_dB=130; + + for(int i=0; i<NUMBER_OF_UE_MAX; i++) + eNB->mu_mimo_mode[i].dl_pow_off = 2; + + eNB->check_for_total_transmissions = 0; + eNB->check_for_MUMIMO_transmissions = 0; + eNB->FULL_MUMIMO_transmissions = 0; + eNB->check_for_SUMIMO_transmissions = 0; + fp->pucch_config_common.deltaPUCCH_Shift = 1; +} + +void init_eNB_afterRU(void) { + for (int inst=0; inst<RC.nb_inst; inst++) { + for (int CC_id=0; CC_id<RC.nb_CC[inst]; CC_id++) { + PHY_VARS_eNB *eNB = RC.eNB[inst][CC_id]; + phy_init_lte_eNB(eNB,0,0); + eNB->frame_parms.nb_antennas_rx = 0; + eNB->frame_parms.nb_antennas_tx = 0; + eNB->prach_vars.rxsigF[0] = (int16_t **)malloc16(64*sizeof(int16_t *)); + + for (int ce_level=0; ce_level<4; ce_level++) { + eNB->prach_vars_br.rxsigF[ce_level] = (int16_t **)malloc16(64*sizeof(int16_t *)); + } + + for (int ru_id=0,aa=0; ru_id<eNB->num_RU; ru_id++) { + eNB->frame_parms.nb_antennas_rx += eNB->RU_list[ru_id]->nb_rx; + eNB->frame_parms.nb_antennas_tx += eNB->RU_list[ru_id]->nb_tx; + AssertFatal(eNB->RU_list[ru_id]->common.rxdataF!=NULL, + "RU %d : common.rxdataF is NULL\n", + eNB->RU_list[ru_id]->idx); + AssertFatal(eNB->RU_list[ru_id]->prach_rxsigF!=NULL, + "RU %d : prach_rxsigF is NULL\n", + eNB->RU_list[ru_id]->idx); + + for (int i=0; i<eNB->RU_list[ru_id]->nb_rx; aa++,i++) { + LOG_I(PHY,"Attaching RU %d antenna %d to eNB antenna %d\n",eNB->RU_list[ru_id]->idx,i,aa); + eNB->prach_vars.rxsigF[0][aa] = eNB->RU_list[ru_id]->prach_rxsigF[i]; + + for (int ce_level=0; ce_level<4; ce_level++) + eNB->prach_vars_br.rxsigF[ce_level][aa] = eNB->RU_list[ru_id]->prach_rxsigF_br[ce_level][i]; + + eNB->common_vars.rxdataF[aa] = eNB->RU_list[ru_id]->common.rxdataF[i]; + } + } + + AssertFatal( eNB->frame_parms.nb_antennas_rx > 0 && eNB->frame_parms.nb_antennas_rx < 4, ""); + AssertFatal( eNB->frame_parms.nb_antennas_tx > 0 && eNB->frame_parms.nb_antennas_rx < 4, ""); + LOG_I(PHY,"inst %d, CC_id %d : nb_antennas_rx %d\n",inst,CC_id,eNB->frame_parms.nb_antennas_rx); + init_transport(eNB); + //init_precoding_weights(RC.eNB[inst][CC_id]); + } + } +} + +void init_eNB(int single_thread_flag,int wait_for_sync) { + AssertFatal(RC.eNB != NULL,"RC.eNB must have been allocated\n"); + + for (int inst=0; inst<RC.nb_L1_inst; inst++) { + AssertFatal(RC.eNB[inst] != NULL,"RC.eNB[%d] must have been allocated\n", inst); + + for (int CC_id=0; CC_id<RC.nb_L1_CC[inst]; CC_id++) { + AssertFatal(RC.eNB[inst][CC_id] != NULL,"RC.eNB[%d][%d] must have been allocated\n", inst, CC_id); + PHY_VARS_eNB *eNB = RC.eNB[inst][CC_id]; + eNB->abstraction_flag = 0; + eNB->single_thread_flag = single_thread_flag; + AssertFatal((eNB->if_inst = IF_Module_init(inst))!=NULL,"Cannot register interface"); + eNB->if_inst->schedule_response = schedule_response; + eNB->if_inst->PHY_config_req = phy_config_request; + memset((void *)&eNB->UL_INFO,0,sizeof(eNB->UL_INFO)); + memset((void *)&eNB->Sched_INFO,0,sizeof(eNB->Sched_INFO)); + pthread_mutex_init( &eNB->UL_INFO_mutex, NULL); + LOG_I(PHY,"Setting indication lists\n"); + eNB->UL_INFO.rx_ind.rx_indication_body.rx_pdu_list = eNB->rx_pdu_list; + eNB->UL_INFO.crc_ind.crc_indication_body.crc_pdu_list = eNB->crc_pdu_list; + eNB->UL_INFO.sr_ind.sr_indication_body.sr_pdu_list = eNB->sr_pdu_list; + eNB->UL_INFO.harq_ind.harq_indication_body.harq_pdu_list = eNB->harq_pdu_list; + eNB->UL_INFO.cqi_ind.cqi_indication_body.cqi_pdu_list = eNB->cqi_pdu_list; + eNB->UL_INFO.cqi_ind.cqi_indication_body.cqi_raw_pdu_list = eNB->cqi_raw_pdu_list; + eNB->prach_energy_counter = 0; + } + } + + SET_LOG_DEBUG(PRACH); +} + +void stop_eNB(int nb_inst) { + for (int inst=0; inst<nb_inst; inst++) { + LOG_I(PHY,"Killing eNB %d processing threads\n",inst); + kill_eNB_proc(inst); + } +} + +// this is for RU with local RF unit +void fill_rf_config(RU_t *ru, char *rf_config_file) { + int i; + LTE_DL_FRAME_PARMS *fp = ru->frame_parms; + openair0_config_t *cfg = &ru->openair0_cfg; + //printf("////////////////numerology in config = %d\n",numerology); + int numerology = get_softmodem_params()->numerology; + + if(fp->N_RB_DL == 100) { + if(numerology == 0) { + if (fp->threequarter_fs) { + cfg->sample_rate=23.04e6; + cfg->samples_per_frame = 230400; + cfg->tx_bw = 10e6; + cfg->rx_bw = 10e6; + } else { + cfg->sample_rate=30.72e6; + cfg->samples_per_frame = 307200; + cfg->tx_bw = 10e6; + cfg->rx_bw = 10e6; + } + } else if(numerology == 1) { + cfg->sample_rate=61.44e6; + cfg->samples_per_frame = 307200; + cfg->tx_bw = 20e6; + cfg->rx_bw = 20e6; + } else if(numerology == 2) { + cfg->sample_rate=122.88e6; + cfg->samples_per_frame = 307200; + cfg->tx_bw = 40e6; + cfg->rx_bw = 40e6; + } else { + LOG_E(PHY,"Wrong input for numerology %d\n setting to 20MHz normal CP configuration",numerology); + cfg->sample_rate=30.72e6; + cfg->samples_per_frame = 307200; + cfg->tx_bw = 10e6; + cfg->rx_bw = 10e6; + } + } else if(fp->N_RB_DL == 50) { + cfg->sample_rate=15.36e6; + cfg->samples_per_frame = 153600; + cfg->tx_bw = 5e6; + cfg->rx_bw = 5e6; + } else if (fp->N_RB_DL == 25) { + cfg->sample_rate=7.68e6; + cfg->samples_per_frame = 76800; + cfg->tx_bw = 2.5e6; + cfg->rx_bw = 2.5e6; + } else if (fp->N_RB_DL == 6) { + cfg->sample_rate=1.92e6; + cfg->samples_per_frame = 19200; + cfg->tx_bw = 1.5e6; + cfg->rx_bw = 1.5e6; + } else AssertFatal(1==0,"Unknown N_RB_DL %d\n",fp->N_RB_DL); + + if (fp->frame_type==TDD) + cfg->duplex_mode = duplex_mode_TDD; + else //FDD + cfg->duplex_mode = duplex_mode_FDD; + + cfg->Mod_id = 0; + cfg->num_rb_dl=fp->N_RB_DL; + cfg->tx_num_channels=ru->nb_tx; + cfg->rx_num_channels=ru->nb_rx; + cfg->clock_source=get_softmodem_params()->clock_source; + + for (i=0; i<ru->nb_tx; i++) { + cfg->tx_freq[i] = (double)fp->dl_CarrierFreq; + cfg->rx_freq[i] = (double)fp->ul_CarrierFreq; + cfg->tx_gain[i] = (double)ru->att_tx; + cfg->rx_gain[i] = ru->max_rxgain-(double)ru->att_rx; + cfg->configFilename = rf_config_file; + LOG_I(PHY,"channel %d, Setting tx_gain offset %f, rx_gain offset %f, tx_freq %f, rx_freq %f\n", + i, cfg->tx_gain[i], + cfg->rx_gain[i], + cfg->tx_freq[i], + cfg->rx_freq[i]); + } +} + +/* this function maps the RU tx and rx buffers to the available rf chains. + Each rf chain is is addressed by the card number and the chain on the card. The + rf_map specifies for each antenna port, on which rf chain the mapping should start. Multiple + antennas are mapped to successive RF chains on the same card. */ +int setup_RU_buffers(RU_t *ru) { + //uint16_t N_TA_offset = 0; + LTE_DL_FRAME_PARMS *frame_parms; + AssertFatal(ru, "ru is NULL"); + frame_parms = ru->frame_parms; + LOG_I(PHY,"setup_RU_buffers: frame_parms = %p\n",frame_parms); + + if (frame_parms->frame_type == TDD) { + if (frame_parms->N_RB_DL == 100) { + ru->N_TA_offset = 624; + } else if (frame_parms->N_RB_DL == 50) { + ru->N_TA_offset = 624/2; + ru->sf_extension /= 2; + ru->end_of_burst_delay /= 2; + } else if (frame_parms->N_RB_DL == 25) { + ru->N_TA_offset = 624/4; + ru->sf_extension /= 4; + ru->end_of_burst_delay /= 4; + } else { + LOG_E(PHY,"not handled, todo\n"); + exit(1); + } + } else { + ru->N_TA_offset = 0; + ru->sf_extension = 0; + ru->end_of_burst_delay = 0; + } + + return(0); +} + +void init_precoding_weights(PHY_VARS_eNB *eNB) { + int layer,ru_id,aa,re,ue,tb; + LTE_DL_FRAME_PARMS *fp=&eNB->frame_parms; + RU_t *ru; + LTE_eNB_DLSCH_t *dlsch; + + // init precoding weigths + for (ue=0; ue<NUMBER_OF_UE_MAX; ue++) { + for (tb=0; tb<2; tb++) { + dlsch = eNB->dlsch[ue][tb]; + + for (layer=0; layer<4; layer++) { + int nb_tx=0; + + for (ru_id=0; ru_id<RC.nb_RU; ru_id++) { + ru = RC.ru[ru_id]; + nb_tx+=ru->nb_tx; + } + + dlsch->ue_spec_bf_weights[layer] = (int32_t **)malloc16(nb_tx*sizeof(int32_t *)); + + for (aa=0; aa<nb_tx; aa++) { + dlsch->ue_spec_bf_weights[layer][aa] = (int32_t *)malloc16(fp->ofdm_symbol_size*sizeof(int32_t)); + + for (re=0; re<fp->ofdm_symbol_size; re++) { + dlsch->ue_spec_bf_weights[layer][aa][re] = 0x00007fff; + } + } + } + } + } +} + +void ocp_rx_prach(PHY_VARS_eNB *eNB, + L1_rxtx_proc_t *proc, + RU_t *ru, + uint16_t *max_preamble, + uint16_t *max_preamble_energy, + uint16_t *max_preamble_delay, + uint16_t *avg_preamble_energy, + uint16_t Nf, + uint8_t tdd_mapindex, + uint8_t br_flag) { + int i; + int prach_mask=0; + + if (br_flag == 0) { + rx_prach0(eNB,ru,proc->frame_prach, proc->subframe_prach, + max_preamble,max_preamble_energy,max_preamble_delay,avg_preamble_energy,Nf,tdd_mapindex,0,0); + } else { // This is procedure for eMTC, basically handling the repetitions + prach_mask = is_prach_subframe(&eNB->frame_parms,proc->frame_prach_br,proc->subframe_prach_br); + + for (i=0; i<4; i++) { + if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[i]==1) && + ((prach_mask&(1<<(i+1))) > 0)) { // check that prach CE level is active now + + // if first reception in group of repetitions store frame for later (in RA-RNTI for Msg2) + if (eNB->prach_vars_br.repetition_number[i]==0) eNB->prach_vars_br.first_frame[i]=proc->frame_prach_br; + + // increment repetition number + eNB->prach_vars_br.repetition_number[i]++; + // do basic PRACH reception + rx_prach0(eNB,ru,proc->frame_prach, proc->subframe_prach_br, + max_preamble,max_preamble_energy,max_preamble_delay,avg_preamble_energy,Nf,tdd_mapindex,1,i); + + // if last repetition, clear counter + if (eNB->prach_vars_br.repetition_number[i] == eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[i]) { + eNB->prach_vars_br.repetition_number[i]=0; + } + } + } /* for i ... */ + } /* else br_flag == 0 */ +} + +void prach_procedures_ocp(PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int br_flag) { + uint16_t max_preamble[4],max_preamble_energy[4],max_preamble_delay[4],avg_preamble_energy[4]; + RU_t *ru; + int aa=0; + int ru_aa; + + for (int i=0; i<eNB->num_RU; i++) { + ru=eNB->RU_list[i]; + + for (ru_aa=0,aa=0; ru_aa<ru->nb_rx; ru_aa++,aa++) { + eNB->prach_vars.rxsigF[0][aa] = eNB->RU_list[i]->prach_rxsigF[ru_aa]; + int ce_level; + + if (br_flag==1) + for (ce_level=0; ce_level<4; ce_level++) + eNB->prach_vars_br.rxsigF[ce_level][aa] = eNB->RU_list[i]->prach_rxsigF_br[ce_level][ru_aa]; + } + } + + // run PRACH detection for CE-level 0 only for now when br_flag is set + ocp_rx_prach(eNB, + proc, + eNB->RU_list[0], + &max_preamble[0], + &max_preamble_energy[0], + &max_preamble_delay[0], + &avg_preamble_energy[0], + proc->frame_prach, + 0 + ,br_flag + ); + LOG_D(PHY,"RACH detection index 0: max preamble: %u, energy: %u, delay: %u, avg energy: %u\n", + max_preamble[0], + max_preamble_energy[0], + max_preamble_delay[0], + avg_preamble_energy[0] + ); + + if (br_flag==1) { + int prach_mask; + prach_mask = is_prach_subframe (&eNB->frame_parms, proc->frame_prach_br, proc->subframe_prach_br); + eNB->UL_INFO.rach_ind_br.rach_indication_body.preamble_list = eNB->preamble_list_br; + int ind = 0; + int ce_level = 0; + /* Save for later, it doesn't work + for (int ind=0,ce_level=0;ce_level<4;ce_level++) { + + if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[ce_level]==1)&& + (prach_mask&(1<<(1+ce_level)) > 0) && // prach is active and CE level has finished its repetitions + (eNB->prach_vars_br.repetition_number[ce_level]== + eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level])) { + + */ + + if (eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[0] == 1) { + if ((eNB->prach_energy_counter == 100) && (max_preamble_energy[0] > eNB->measurements.prach_I0 + eNB->prach_DTX_threshold_emtc[0])) { + eNB->UL_INFO.rach_ind_br.rach_indication_body.number_of_preambles++; + eNB->preamble_list_br[ind].preamble_rel8.timing_advance = max_preamble_delay[ind]; // + eNB->preamble_list_br[ind].preamble_rel8.preamble = max_preamble[ind]; + // note: fid is implicitly 0 here, this is the rule for eMTC RA-RNTI from 36.321, Section 5.1.4 + eNB->preamble_list_br[ind].preamble_rel8.rnti = 1 + proc->subframe_prach + (60*(eNB->prach_vars_br.first_frame[ce_level] % 40)); + eNB->preamble_list_br[ind].instance_length = 0; //don't know exactly what this is + eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type = 1 + ce_level; // CE Level + LOG_I (PHY, "Filling NFAPI indication for RACH %d CELevel %d (mask %x) : TA %d, Preamble %d, rnti %x, rach_resource_type %d\n", + ind, + ce_level, + prach_mask, + eNB->preamble_list_br[ind].preamble_rel8.timing_advance, + eNB->preamble_list_br[ind].preamble_rel8.preamble, eNB->preamble_list_br[ind].preamble_rel8.rnti, eNB->preamble_list_br[ind].preamble_rel13.rach_resource_type); + } + } + + /* + ind++; + } + } */// ce_level + } else if ((eNB->prach_energy_counter == 100) && + (max_preamble_energy[0] > eNB->measurements.prach_I0+eNB->prach_DTX_threshold)) { + LOG_I(PHY,"[eNB %d/%d][RAPROC] Frame %d, subframe %d Initiating RA procedure with preamble %d, energy %d.%d dB, delay %d\n", + eNB->Mod_id, + eNB->CC_id, + proc->frame_prach, + proc->subframe_prach, + max_preamble[0], + max_preamble_energy[0]/10, + max_preamble_energy[0]%10, + max_preamble_delay[0]); + pthread_mutex_lock(&eNB->UL_INFO_mutex); + eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles = 1; + eNB->UL_INFO.rach_ind.rach_indication_body.preamble_list = &eNB->preamble_list[0]; + eNB->UL_INFO.rach_ind.rach_indication_body.tl.tag = NFAPI_RACH_INDICATION_BODY_TAG; + eNB->UL_INFO.rach_ind.header.message_id = NFAPI_RACH_INDICATION; + eNB->UL_INFO.rach_ind.sfn_sf = proc->frame_prach<<4 | proc->subframe_prach; + eNB->preamble_list[0].preamble_rel8.tl.tag = NFAPI_PREAMBLE_REL8_TAG; + eNB->preamble_list[0].preamble_rel8.timing_advance = max_preamble_delay[0]; + eNB->preamble_list[0].preamble_rel8.preamble = max_preamble[0]; + eNB->preamble_list[0].preamble_rel8.rnti = 1+proc->subframe_prach; // note: fid is implicitly 0 here + eNB->preamble_list[0].preamble_rel13.rach_resource_type = 0; + eNB->preamble_list[0].instance_length = 0; //don't know exactly what this is + + if (NFAPI_MODE==NFAPI_MODE_PNF) { // If NFAPI PNF then we need to send the message to the VNF + LOG_D(PHY,"Filling NFAPI indication for RACH : SFN_SF:%d TA %d, Preamble %d, rnti %x, rach_resource_type %d\n", + NFAPI_SFNSF2DEC(eNB->UL_INFO.rach_ind.sfn_sf), + eNB->preamble_list[0].preamble_rel8.timing_advance, + eNB->preamble_list[0].preamble_rel8.preamble, + eNB->preamble_list[0].preamble_rel8.rnti, + eNB->preamble_list[0].preamble_rel13.rach_resource_type); + oai_nfapi_rach_ind(&eNB->UL_INFO.rach_ind); + eNB->UL_INFO.rach_ind.rach_indication_body.number_of_preambles = 0; + } + + pthread_mutex_unlock(&eNB->UL_INFO_mutex); + } // max_preamble_energy > prach_I0 + 100 + else { + eNB->measurements.prach_I0 = ((eNB->measurements.prach_I0*900)>>10) + ((avg_preamble_energy[0]*124)>>10); + + if (eNB->prach_energy_counter < 100) + eNB->prach_energy_counter++; + } +} // else br_flag + +void prach_eNB(PHY_VARS_eNB *eNB, L1_rxtx_proc_t *proc, int frame,int subframe) { + // check if we have to detect PRACH first + if (is_prach_subframe(&eNB->frame_parms, frame,subframe)>0) { + prach_procedures_ocp(eNB, proc, 0); + prach_procedures_ocp(eNB, proc, 1); + } +} + +static inline int rxtx(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc, char *thread_name) { + AssertFatal( eNB !=NULL, ""); + + if (NFAPI_MODE==NFAPI_MODE_PNF) { + // I am a PNF and I need to let nFAPI know that we have a (sub)frame tick + //add_subframe(&frame, &subframe, 4); + //oai_subframe_ind(proc->frame_tx, proc->subframe_tx); + oai_subframe_ind(proc->frame_rx, proc->subframe_rx); + } + + AssertFatal( !(NFAPI_MODE==NFAPI_MODE_PNF && + eNB->pdcch_vars[proc->subframe_tx&1].num_pdcch_symbols == 0), ""); + prach_eNB(eNB,proc,proc->frame_rx,proc->subframe_rx); + release_UE_in_freeList(eNB->Mod_id); + + // UE-specific RX processing for subframe n + if (NFAPI_MODE==NFAPI_MONOLITHIC || NFAPI_MODE==NFAPI_MODE_PNF) { + phy_procedures_eNB_uespec_RX(eNB, proc); + } + + pthread_mutex_lock(&eNB->UL_INFO_mutex); + eNB->UL_INFO.frame = proc->frame_rx; + eNB->UL_INFO.subframe = proc->subframe_rx; + eNB->UL_INFO.module_id = eNB->Mod_id; + eNB->UL_INFO.CC_id = eNB->CC_id; + eNB->if_inst->UL_indication(&eNB->UL_INFO, proc); + pthread_mutex_unlock(&eNB->UL_INFO_mutex); + phy_procedures_eNB_TX(eNB, proc, 1); + return(0); +} + +void rx_rf(RU_t *ru, L1_rxtx_proc_t *proc) { + LTE_DL_FRAME_PARMS *fp = ru->frame_parms; + void *rxp[ru->nb_rx]; + unsigned int rxs; + int i; + openair0_timestamp ts=0, timestamp_rx; + static openair0_timestamp old_ts=0; + + for (i=0; i<ru->nb_rx; i++) + //receive in the next slot + rxp[i] = (void *)&ru->common.rxdata[i][((proc->subframe_rx+1)%10)*fp->samples_per_tti]; + + rxs = ru->rfdevice.trx_read_func(&ru->rfdevice, + &ts, + rxp, + fp->samples_per_tti, + ru->nb_rx); + timestamp_rx = ts-ru->ts_offset; + + // AssertFatal(rxs == fp->samples_per_tti, + // "rx_rf: Asked for %d samples, got %d from SDR\n",fp->samples_per_tti,rxs); + if(rxs != fp->samples_per_tti) { + LOG_E(PHY,"rx_rf: Asked for %d samples, got %d from SDR\n",fp->samples_per_tti,rxs); +#if defined(USRP_REC_PLAY) + exit_fun("Exiting IQ record/playback"); +#else + //exit_fun( "problem receiving samples" ); + LOG_E(PHY, "problem receiving samples"); +#endif + } + + if (old_ts != 0 && timestamp_rx - old_ts != fp->samples_per_tti) { + LOG_E(HW,"impossible shift in rx stream, rx: %ld, previous rx distance: %ld, should be %d\n", timestamp_rx, proc->timestamp_rx - old_ts, fp->samples_per_tti); + //ru->ts_offset += (proc->timestamp_rx - old_ts - fp->samples_per_tti); + //proc->timestamp_rx = ts-ru->ts_offset; + } + + old_ts=timestamp_rx; + setAllfromTS(timestamp_rx, proc); +} + +void ocp_tx_rf(RU_t *ru, L1_rxtx_proc_t *proc) { + LTE_DL_FRAME_PARMS *fp = ru->frame_parms; + void *txp[ru->nb_tx]; + int i; + lte_subframe_t SF_type = subframe_select(fp,proc->subframe_tx%10); + lte_subframe_t prevSF_type = subframe_select(fp,(proc->subframe_tx+9)%10); + int sf_extension = 0; + + if ((SF_type == SF_DL) || + (SF_type == SF_S)) { + int siglen=fp->samples_per_tti,flags=1; + + if (SF_type == SF_S) { + /* end_of_burst_delay is used to stop TX only "after a while". + * If we stop right after effective signal, with USRP B210 and + * B200mini, we observe a high EVM on the S subframe (on the + * PSS). + * A value of 400 (for 30.72MHz) solves this issue. This is + * the default. + */ + siglen = (fp->ofdm_symbol_size + fp->nb_prefix_samples0) + + (fp->dl_symbols_in_S_subframe - 1) * (fp->ofdm_symbol_size + fp->nb_prefix_samples) + + ru->end_of_burst_delay; + flags=3; // end of burst + } + + if (fp->frame_type == TDD && + SF_type == SF_DL && + prevSF_type == SF_UL) { + flags = 2; // start of burst + sf_extension = ru->sf_extension; + } + +#if defined(__x86_64) || defined(__i386__) +#ifdef __AVX2__ + sf_extension = (sf_extension)&0xfffffff8; +#else + sf_extension = (sf_extension)&0xfffffffc; +#endif +#elif defined(__arm__) + sf_extension = (sf_extension)&0xfffffffc; +#endif + + for (i=0; i<ru->nb_tx; i++) + txp[i] = (void *)&ru->common.txdata[i][(proc->subframe_tx*fp->samples_per_tti)-sf_extension]; + + /* add fail safe for late command end */ + // prepare tx buffer pointers + ru->rfdevice.trx_write_func(&ru->rfdevice, + proc->timestamp_tx+ru->ts_offset-ru->openair0_cfg.tx_sample_advance-sf_extension, + txp, + siglen+sf_extension, + ru->nb_tx, + flags); + LOG_D(PHY,"[TXPATH] RU %d tx_rf, writing to TS %llu, frame %d, subframe %d\n",ru->idx, + (long long unsigned int)proc->timestamp_tx,proc->frame_tx,proc->subframe_tx); + } + + return; +} + +static void *ru_thread( void *param ) { + setbuf(stdout, NULL); + setbuf(stderr, NULL); + RU_t *ru = (RU_t *)param; + L1_rxtx_proc_t L1proc= {0}; + // We pick the global thread pool from the legacy code global vars + L1proc.threadPool=RC.eNB[0][0]->proc.L1_proc.threadPool; + L1proc.respEncode=RC.eNB[0][0]->proc.L1_proc.respEncode; + L1proc.respDecode=RC.eNB[0][0]->proc.L1_proc.respDecode; + + if (ru->if_south == LOCAL_RF) { // configure RF parameters only + fill_rf_config(ru,ru->rf_config_file); + init_frame_parms(ru->frame_parms,1); + phy_init_RU(ru); + init_rf(ru); + } + + AssertFatal(setup_RU_buffers(ru)==0, "Exiting, cannot initialize RU Buffers\n"); + LOG_I(PHY, "Signaling main thread that RU %d is ready\n",ru->idx); + wait_sync("ru_thread"); + + // Start RF device if any + if (ru->rfdevice.trx_start_func(&ru->rfdevice) != 0) + LOG_E(HW,"Could not start the RF device\n"); + else LOG_I(PHY,"RU %d rf device ready\n",ru->idx); + + // This is a forever while loop, it loops over subframes which are scheduled by incoming samples from HW devices + while (!oai_exit) { + // synchronization on input FH interface, acquire signals/data and block + rx_rf(ru, &L1proc); + // do RX front-end processing (frequency-shift, dft) if needed + fep_full(ru, L1proc.subframe_rx); + + // At this point, all information for subframe has been received on FH interface + // If this proc is to provide synchronization, do so + // Fixme: not used + // wakeup_slaves(proc); + for (int i=0; i<ru->num_eNB; i++) { + char string[20]; + sprintf(string,"Incoming RU %d",ru->idx); + + if (rxtx(ru->eNB_list[i],&L1proc,string) < 0) + LOG_E(PHY,"eNB %d CC_id %d failed during execution\n", + ru->eNB_list[i]->Mod_id,ru->eNB_list[i]->CC_id); + } + + // do TX front-end processing if needed (precoding and/or IDFTs) + feptx_prec(ru, L1proc.frame_tx, L1proc.subframe_tx); + // do OFDM if needed + feptx_ofdm(ru, L1proc.frame_tx, L1proc.subframe_tx); + // do outgoing fronthaul (south) if needed + ocp_tx_rf(ru, &L1proc); + } + + LOG_W(PHY,"Exiting ru_thread \n"); + ru->rfdevice.trx_end_func(&ru->rfdevice); + LOG_I(PHY,"RU %d rf device stopped\n",ru->idx); + return NULL; +} + +int init_rf(RU_t *ru) { + char name[256]; + pthread_getname_np(pthread_self(),name, 255); + pthread_setname_np(pthread_self(),"UHD for OAI"); + int ret=openair0_device_load(&ru->rfdevice,&ru->openair0_cfg); + pthread_setname_np(pthread_self(),name); + return ret; +} + +void init_RU(char *rf_config_file, int send_dmrssync) { + RU_t *ru; + PHY_VARS_eNB *eNB0= (PHY_VARS_eNB *)NULL; + int i; + int CC_id; + // read in configuration file) + LOG_I(PHY,"configuring RU from file\n"); + LOG_I(PHY,"number of L1 instances %d, number of RU %d, number of CPU cores %d\n", + RC.nb_L1_inst,RC.nb_RU,get_nprocs()); + + if (RC.nb_CC != 0) + for (i=0; i<RC.nb_L1_inst; i++) + for (CC_id=0; CC_id<RC.nb_CC[i]; CC_id++) + RC.eNB[i][CC_id]->num_RU=0; + + LOG_D(PHY,"Process RUs RC.nb_RU:%d\n",RC.nb_RU); + + for (int ru_id=0; ru_id<RC.nb_RU; ru_id++) { + LOG_D(PHY,"Process RC.ru[%d]\n",ru_id); + ru = RC.ru[ru_id]; + ru->rf_config_file = rf_config_file; + ru->idx = ru_id; + ru->ts_offset = 0; + + if (ru->is_slave == 1) { + ru->in_synch = 0; + ru->generate_dmrs_sync = 0; + } else { + ru->in_synch = 1; + ru->generate_dmrs_sync=send_dmrssync; + } + + ru->cmd = EMPTY; + ru->south_out_cnt= 0; + + // ru->generate_dmrs_sync = (ru->is_slave == 0) ? 1 : 0; + if (ru->generate_dmrs_sync == 1) { + generate_ul_ref_sigs(); + ru->dmrssync = (int16_t *)malloc16_clear(ru->frame_parms->ofdm_symbol_size*2*sizeof(int16_t)); + } + + ru->wakeup_L1_sleeptime = 2000; + ru->wakeup_L1_sleep_cnt_max = 3; + + if (ru->num_eNB > 0) { + LOG_D(PHY, "%s() RC.ru[%d].num_eNB:%d ru->eNB_list[0]:%p RC.eNB[0][0]:%p rf_config_file:%s\n", + __FUNCTION__, ru_id, ru->num_eNB, ru->eNB_list[0], RC.eNB[0][0], ru->rf_config_file); + AssertFatal(ru->eNB_list[0], "ru->eNB_list is not initialized\n"); + } else { + LOG_E(PHY,"Wrong data model, assigning eNB 0, carrier 0 to RU 0\n"); + ru->eNB_list[0] = RC.eNB[0][0]; + ru->num_eNB=1; + } + + eNB0 = ru->eNB_list[0]; + // datamodel error in regular OAI: a RU uses one single eNB carrier parameters! + ru->frame_parms = &eNB0->frame_parms; + + for (i=0; i<ru->num_eNB; i++) { + eNB0 = ru->eNB_list[i]; + int ruIndex=eNB0->num_RU++; + eNB0->RU_list[ruIndex] = ru; + } + } // for ru_id +} + +void stop_RU(int nb_ru) { + for (int inst = 0; inst < nb_ru; inst++) { + LOG_I(PHY, "Stopping RU %d processing threads\n", inst); + kill_RU_proc(RC.ru[inst]); + } +} + +/* --------------------------------------------------------*/ +/* from here function to use configuration module */ +static int DEFBFW[] = {0x00007fff}; +void RCconfig_RU(void) { + paramdef_t RUParams[] = RUPARAMS_DESC; + paramlist_def_t RUParamList = {CONFIG_STRING_RU_LIST,NULL,0}; + config_getlist( &RUParamList,RUParams,sizeof(RUParams)/sizeof(paramdef_t), NULL); + + if ( RUParamList.numelt == 0 ) { + LOG_W(PHY, "Calling RCconfig_RU while no ru\n"); + RC.nb_RU = 0; + return; + } // setting != NULL + + if ( RC.ru != NULL ) { + LOG_W(PHY, "Calling RCconfig_RU twice (nb ru=%d), ignoring the second call data structure is %p\n", + RUParamList.numelt,RC.ru); + return; + } + + RC.ru = (RU_t **)malloc(RC.nb_RU*sizeof(RU_t *)); + + for (int j = 0; j < RC.nb_RU; j++) { + RC.ru[j] = (RU_t *)calloc(sizeof(RU_t), 1); + RC.ru[j]->idx = j; + LOG_I(PHY,"Creating RC.ru[%d]:%p\n", j, RC.ru[j]); + RC.ru[j]->if_timing = synch_to_ext_device; + paramdef_t *vals=RUParamList.paramarray[j]; + + if (RC.nb_L1_inst >0) + RC.ru[j]->num_eNB = vals[RU_ENB_LIST_IDX].numelt; + else + RC.ru[j]->num_eNB = 0; + + for (int i=0; i<RC.ru[j]->num_eNB; i++) + RC.ru[j]->eNB_list[i] = RC.eNB[vals[RU_ENB_LIST_IDX].iptr[i]][0]; + + if (config_isparamset(vals, RU_SDR_ADDRS)) { + RC.ru[j]->openair0_cfg.sdr_addrs = strdup(*(vals[RU_SDR_ADDRS].strptr)); + } + + if (config_isparamset(vals, RU_SDR_CLK_SRC)) { + char *paramVal=*(vals[RU_SDR_CLK_SRC].strptr); + LOG_D(PHY, "RU clock source set as %s\n", paramVal); + + if (strcmp(paramVal, "internal") == 0) { + RC.ru[j]->openair0_cfg.clock_source = internal; + } else if (strcmp(paramVal, "external") == 0) { + RC.ru[j]->openair0_cfg.clock_source = external; + } else if (strcmp(paramVal, "gpsdo") == 0) { + RC.ru[j]->openair0_cfg.clock_source = gpsdo; + } else { + LOG_E(PHY, "Erroneous RU clock source in the provided configuration file: '%s'\n", paramVal); + } + } + + if (strcmp(*(vals[RU_LOCAL_RF_IDX].strptr), "yes") == 0) { + if ( !(config_isparamset(vals,RU_LOCAL_IF_NAME_IDX)) ) { + RC.ru[j]->if_south = LOCAL_RF; + RC.ru[j]->function = eNodeB_3GPP; + LOG_I(PHY, "Setting function for RU %d to eNodeB_3GPP\n",j); + } else { + RC.ru[j]->eth_params.local_if_name = strdup(*(vals[RU_LOCAL_IF_NAME_IDX].strptr)); + RC.ru[j]->eth_params.my_addr = strdup(*(vals[RU_LOCAL_ADDRESS_IDX].strptr)); + RC.ru[j]->eth_params.remote_addr = strdup(*(vals[RU_REMOTE_ADDRESS_IDX].strptr)); + RC.ru[j]->eth_params.my_portc = *(vals[RU_LOCAL_PORTC_IDX].uptr); + RC.ru[j]->eth_params.remote_portc = *(vals[RU_REMOTE_PORTC_IDX].uptr); + RC.ru[j]->eth_params.my_portd = *(vals[RU_LOCAL_PORTD_IDX].uptr); + RC.ru[j]->eth_params.remote_portd = *(vals[RU_REMOTE_PORTD_IDX].uptr); + } + + RC.ru[j]->max_pdschReferenceSignalPower = *(vals[RU_MAX_RS_EPRE_IDX].uptr);; + RC.ru[j]->max_rxgain = *(vals[RU_MAX_RXGAIN_IDX].uptr); + RC.ru[j]->num_bands = vals[RU_BAND_LIST_IDX].numelt; + /* sf_extension is in unit of samples for 30.72MHz here, has to be scaled later */ + RC.ru[j]->sf_extension = *(vals[RU_SF_EXTENSION_IDX].uptr); + RC.ru[j]->end_of_burst_delay = *(vals[RU_END_OF_BURST_DELAY_IDX].uptr); + + for (int i=0; i<RC.ru[j]->num_bands; i++) RC.ru[j]->band[i] = vals[RU_BAND_LIST_IDX].iptr[i]; + } else { + LOG_I(PHY,"RU %d: Transport %s\n",j,*(vals[RU_TRANSPORT_PREFERENCE_IDX].strptr)); + RC.ru[j]->eth_params.local_if_name = strdup(*(vals[RU_LOCAL_IF_NAME_IDX].strptr)); + RC.ru[j]->eth_params.my_addr = strdup(*(vals[RU_LOCAL_ADDRESS_IDX].strptr)); + RC.ru[j]->eth_params.remote_addr = strdup(*(vals[RU_REMOTE_ADDRESS_IDX].strptr)); + RC.ru[j]->eth_params.my_portc = *(vals[RU_LOCAL_PORTC_IDX].uptr); + RC.ru[j]->eth_params.remote_portc = *(vals[RU_REMOTE_PORTC_IDX].uptr); + RC.ru[j]->eth_params.my_portd = *(vals[RU_LOCAL_PORTD_IDX].uptr); + RC.ru[j]->eth_params.remote_portd = *(vals[RU_REMOTE_PORTD_IDX].uptr); + } /* strcmp(local_rf, "yes") != 0 */ + + RC.ru[j]->nb_tx = *(vals[RU_NB_TX_IDX].uptr); + RC.ru[j]->nb_rx = *(vals[RU_NB_RX_IDX].uptr); + RC.ru[j]->att_tx = *(vals[RU_ATT_TX_IDX].uptr); + RC.ru[j]->att_rx = *(vals[RU_ATT_RX_IDX].uptr); + }// j=0..num_rus + + return; +} + + +static void get_options(void) { + CONFIG_SETRTFLAG(CONFIG_NOEXITONHELP); + get_common_options(SOFTMODEM_ENB_BIT); + CONFIG_CLEARRTFLAG(CONFIG_NOEXITONHELP); + + if ( !(CONFIG_ISFLAGSET(CONFIG_ABORT)) ) { + memset((void *)&RC,0,sizeof(RC)); + /* Read RC configuration file */ + RCConfig(); + NB_eNB_INST = RC.nb_inst; + printf("Configuration: nb_rrc_inst %d, nb_L1_inst %d, nb_ru %d\n",NB_eNB_INST,RC.nb_L1_inst,RC.nb_RU); + + if (!IS_SOFTMODEM_NONBIOT) { + load_NB_IoT(); + printf(" nb_nbiot_rrc_inst %d, nb_nbiot_L1_inst %d, nb_nbiot_macrlc_inst %d\n", + RC.nb_nb_iot_rrc_inst, RC.nb_nb_iot_L1_inst, RC.nb_nb_iot_macrlc_inst); + } else { + printf("All Nb-IoT instances disabled\n"); + RC.nb_nb_iot_rrc_inst=RC.nb_nb_iot_L1_inst=RC.nb_nb_iot_macrlc_inst=0; + } + } +} + +void set_default_frame_parms(LTE_DL_FRAME_PARMS *frame_parms[MAX_NUM_CCs]) { + int CC_id; + + for (CC_id=0; CC_id<MAX_NUM_CCs; CC_id++) { + frame_parms[CC_id] = (LTE_DL_FRAME_PARMS *) malloc(sizeof(LTE_DL_FRAME_PARMS)); + /* Set some default values that may be overwritten while reading options */ + frame_parms[CC_id]->frame_type = FDD; + frame_parms[CC_id]->tdd_config = 3; + frame_parms[CC_id]->tdd_config_S = 0; + frame_parms[CC_id]->N_RB_DL = 100; + frame_parms[CC_id]->N_RB_UL = 100; + frame_parms[CC_id]->Ncp = NORMAL; + frame_parms[CC_id]->Ncp_UL = NORMAL; + frame_parms[CC_id]->Nid_cell = 0; + frame_parms[CC_id]->num_MBSFN_config = 0; + frame_parms[CC_id]->nb_antenna_ports_eNB = 1; + frame_parms[CC_id]->nb_antennas_tx = 1; + frame_parms[CC_id]->nb_antennas_rx = 1; + frame_parms[CC_id]->nushift = 0; + frame_parms[CC_id]->phich_config_common.phich_resource = oneSixth; + frame_parms[CC_id]->phich_config_common.phich_duration = normal; + // UL RS Config + frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift = 0;//n_DMRS1 set to 0 + frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.groupHoppingEnabled = 0; + frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.sequenceHoppingEnabled = 0; + frame_parms[CC_id]->pusch_config_common.ul_ReferenceSignalsPUSCH.groupAssignmentPUSCH = 0; + frame_parms[CC_id]->prach_config_common.rootSequenceIndex=22; + frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig=1; + frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.prach_ConfigIndex=0; + frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.highSpeedFlag=0; + frame_parms[CC_id]->prach_config_common.prach_ConfigInfo.prach_FreqOffset=0; + // downlink_frequency[CC_id][0] = 2680000000; // Use float to avoid issue with frequency over 2^31. + // downlink_frequency[CC_id][1] = downlink_frequency[CC_id][0]; + // downlink_frequency[CC_id][2] = downlink_frequency[CC_id][0]; + // downlink_frequency[CC_id][3] = downlink_frequency[CC_id][0]; + //printf("Downlink for CC_id %d frequency set to %u\n", CC_id, downlink_frequency[CC_id][0]); + frame_parms[CC_id]->dl_CarrierFreq=downlink_frequency[CC_id][0]; + } +} + +void init_pdcp(void) { + if (!NODE_IS_DU(RC.rrc[0]->node_type)) { + pdcp_layer_init(); + uint32_t pdcp_initmask = (IS_SOFTMODEM_NOS1) ? + (PDCP_USE_NETLINK_BIT | LINK_ENB_PDCP_TO_IP_DRIVER_BIT) : LINK_ENB_PDCP_TO_GTPV1U_BIT; + + if (IS_SOFTMODEM_NOS1) + pdcp_initmask = pdcp_initmask | ENB_NAS_USE_TUN_BIT | SOFTMODEM_NOKRNMOD_BIT ; + + pdcp_initmask = pdcp_initmask | ENB_NAS_USE_TUN_W_MBMS_BIT; + + if ( split73!=SPLIT73_DU) + pdcp_module_init(pdcp_initmask); + + if (NODE_IS_CU(RC.rrc[0]->node_type)) { + pdcp_set_rlc_data_req_func((send_rlc_data_req_func_t)proto_agent_send_rlc_data_req); + } else { + pdcp_set_rlc_data_req_func((send_rlc_data_req_func_t) rlc_data_req); + pdcp_set_pdcp_data_ind_func((pdcp_data_ind_func_t) pdcp_data_ind); + } + } else { + pdcp_set_pdcp_data_ind_func((pdcp_data_ind_func_t) proto_agent_send_pdcp_data_ind); + } +} + +static void wait_nfapi_init(char *thread_name) { + printf( "waiting for NFAPI PNF connection and population of global structure (%s)\n",thread_name); + pthread_mutex_lock( &nfapi_sync_mutex ); + + while (nfapi_sync_var<0) + pthread_cond_wait( &nfapi_sync_cond, &nfapi_sync_mutex ); + + pthread_mutex_unlock(&nfapi_sync_mutex); + printf( "NFAPI: got sync (%s)\n", thread_name); +} + +void terminate_task(module_id_t mod_id, task_id_t from, task_id_t to) { + LOG_I(ENB_APP, "sending TERMINATE_MESSAGE from task %s (%d) to task %s (%d)\n", + itti_get_task_name(from), from, itti_get_task_name(to), to); + MessageDef *msg; + msg = itti_alloc_new_message (from, TERMINATE_MESSAGE); + itti_send_msg_to_task (to, ENB_MODULE_ID_TO_INSTANCE(mod_id), msg); +} + +int stop_L1L2(module_id_t enb_id) { + LOG_W(ENB_APP, "stopping lte-softmodem\n"); + + if (!RC.ru) { + LOG_UI(ENB_APP, "no RU configured\n"); + return -1; + } + + /* these tasks need to pick up new configuration */ + terminate_task(enb_id, TASK_ENB_APP, TASK_RRC_ENB); + oai_exit = 1; + LOG_I(ENB_APP, "calling kill_RU_proc() for instance %d\n", enb_id); + kill_RU_proc(RC.ru[enb_id]); + LOG_I(ENB_APP, "calling kill_eNB_proc() for instance %d\n", enb_id); + kill_eNB_proc(enb_id); + oai_exit = 0; + + for (int cc_id = 0; cc_id < RC.nb_CC[enb_id]; cc_id++) { + free_transport(RC.eNB[enb_id][cc_id]); + phy_free_lte_eNB(RC.eNB[enb_id][cc_id]); + } + + phy_free_RU(RC.ru[enb_id]); + free_lte_top(); + return 0; +} + +/* + * Restart the lte-softmodem after it has been soft-stopped with stop_L1L2() + */ +int restart_L1L2(module_id_t enb_id) { + RU_t *ru = RC.ru[enb_id]; + MessageDef *msg_p = NULL; + LOG_W(ENB_APP, "restarting lte-softmodem\n"); + /* block threads */ + pthread_mutex_lock(&sync_mutex); + sync_var = -1; + pthread_mutex_unlock(&sync_mutex); + RC.ru_mask |= (1 << ru->idx); + /* copy the changed frame parameters to the RU */ + /* TODO this should be done for all RUs associated to this eNB */ + memcpy(&ru->frame_parms, &RC.eNB[enb_id][0]->frame_parms, sizeof(LTE_DL_FRAME_PARMS)); + /* reset the list of connected UEs in the MAC, since in this process with + * loose all UEs (have to reconnect) */ + init_UE_info(&RC.mac[enb_id]->UE_info); + LOG_I(ENB_APP, "attempting to create ITTI tasks\n"); + // No more rrc thread, as many race conditions are hidden behind + rrc_enb_init(); + itti_mark_task_ready(TASK_RRC_ENB); + /* pass a reconfiguration request which will configure everything down to + * RC.eNB[i][j]->frame_parms, too */ + msg_p = itti_alloc_new_message(TASK_ENB_APP, RRC_CONFIGURATION_REQ); + RRC_CONFIGURATION_REQ(msg_p) = RC.rrc[enb_id]->configuration; + itti_send_msg_to_task(TASK_RRC_ENB, ENB_MODULE_ID_TO_INSTANCE(enb_id), msg_p); + /* TODO XForms might need to be restarted, but it is currently (09/02/18) + * broken, so we cannot test it */ + init_RU_proc(ru); + ru->rf_map.card = 0; + ru->rf_map.chain = 0; /* CC_id + chain_offset;*/ + init_eNB_afterRU(); + printf("Sending sync to all threads\n"); + pthread_mutex_lock(&sync_mutex); + sync_var=0; + pthread_cond_broadcast(&sync_cond); + pthread_mutex_unlock(&sync_mutex); + return 0; +} + +int main ( int argc, char **argv ) { + int i; + int CC_id = 0; + int node_type = ngran_eNB; + AssertFatal(load_configmodule(argc,argv,0), "[SOFTMODEM] Error, configuration module init failed\n"); + logInit(); + printf("Reading in command-line options\n"); + get_options (); + AssertFatal(!CONFIG_ISFLAGSET(CONFIG_ABORT),"Getting configuration failed\n"); + EPC_MODE_ENABLED = !IS_SOFTMODEM_NOS1; +#if T_TRACER + T_Config_Init(); +#endif + configure_linux(); + cpuf=get_cpu_freq_GHz(); + set_taus_seed (0); + + if (opp_enabled ==1) + reset_opp_meas(); + + itti_init(TASK_MAX, THREAD_MAX, MESSAGES_ID_MAX, tasks_info, messages_info); + init_opt(); +#ifndef PACKAGE_VERSION +# define PACKAGE_VERSION "UNKNOWN-EXPERIMENTAL" +#endif + LOG_I(HW, "Version: %s\n", PACKAGE_VERSION); + + /* Read configuration */ + if (RC.nb_inst > 0) { + // Allocate memory from RC variable + read_config_and_init(); + } else { + printf("RC.nb_inst = 0, Initializing L1\n"); + RCconfig_L1(); + } + + /* We need to read RU configuration before FlexRAN starts so it knows what + * splits to report. Actual RU start comes later. */ + if (RC.nb_RU > 0 && NFAPI_MODE != NFAPI_MODE_VNF) { + RCconfig_RU(); + LOG_I(PHY, + "number of L1 instances %d, number of RU %d, number of CPU cores %d\n", + RC.nb_L1_inst, RC.nb_RU, get_nprocs()); + } + + if ( strlen(get_softmodem_params()->split73) > 0 ) { + char tmp[1024]={0}; + strncpy(tmp,get_softmodem_params()->split73, 1023); + tmp[2]=0; + if ( strncasecmp(tmp,"cu", 2)==0 ) + split73=SPLIT73_CU; + else if ( strncasecmp(tmp,"du", 2)==0 ) + split73=SPLIT73_DU; + else + AssertFatal(false,"split73 syntax: <cu|du>:<remote ip addr>[:<ip port>] (string found: %s) \n",get_softmodem_params()->split73); + } + + if (RC.nb_inst > 0) { + /* Start the agent. If it is turned off in the configuration, it won't start */ + for (i = 0; i < RC.nb_inst; i++) { + flexran_agent_start(i); + } + + /* initializes PDCP and sets correct RLC Request/PDCP Indication callbacks + * for monolithic/F1 modes */ + init_pdcp(); + AssertFatal(create_tasks(1)==0,"cannot create ITTI tasks\n"); + + for (int enb_id = 0; enb_id < RC.nb_inst; enb_id++) { + MessageDef *msg_p = itti_alloc_new_message (TASK_ENB_APP, RRC_CONFIGURATION_REQ); + RRC_CONFIGURATION_REQ(msg_p) = RC.rrc[enb_id]->configuration; + itti_send_msg_to_task (TASK_RRC_ENB, ENB_MODULE_ID_TO_INSTANCE(enb_id), msg_p); + } + + node_type = RC.rrc[0]->node_type; + } + + if (RC.nb_inst > 0 && NODE_IS_CU(node_type)) { + protocol_ctxt_t ctxt; + ctxt.module_id = 0 ; + ctxt.instance = 0; + ctxt.rnti = 0; + ctxt.enb_flag = 1; + ctxt.frame = 0; + ctxt.subframe = 0; + pdcp_run(&ctxt); + } + + /* start threads if only L1 or not a CU */ + if (RC.nb_inst == 0 || !NODE_IS_CU(node_type) || NFAPI_MODE == NFAPI_MODE_PNF || NFAPI_MODE == NFAPI_MODE_VNF) { + // init UE_PF_PO and mutex lock + pthread_mutex_init(&ue_pf_po_mutex, NULL); + memset (&UE_PF_PO[0][0], 0, sizeof(UE_PF_PO_t)*MAX_MOBILES_PER_ENB*MAX_NUM_CCs); + pthread_cond_init(&sync_cond,NULL); + pthread_mutex_init(&sync_mutex, NULL); + + if (NFAPI_MODE!=NFAPI_MONOLITHIC) { + LOG_I(ENB_APP,"NFAPI*** - mutex and cond created - will block shortly for completion of PNF connection\n"); + pthread_cond_init(&sync_cond,NULL); + pthread_mutex_init(&sync_mutex, NULL); + } + + if (NFAPI_MODE==NFAPI_MODE_VNF) {// VNF +#if defined(PRE_SCD_THREAD) + init_ru_vnf(); // ru pointer is necessary for pre_scd. +#endif + wait_nfapi_init("main?"); + } + + LOG_I(ENB_APP,"START MAIN THREADS\n"); + // start the main threads + number_of_cards = 1; + printf("RC.nb_L1_inst:%d\n", RC.nb_L1_inst); + + if (RC.nb_L1_inst > 0) { + printf("Initializing eNB threads single_thread_flag:%d wait_for_sync:%d\n", + get_softmodem_params()->single_thread_flag, + get_softmodem_params()->wait_for_sync); + init_eNB(get_softmodem_params()->single_thread_flag, + get_softmodem_params()->wait_for_sync); + } + + for (int x=0; x < RC.nb_L1_inst; x++) + for (int CC_id=0; CC_id<RC.nb_L1_CC[x]; CC_id++) { + L1_rxtx_proc_t *L1proc= &RC.eNB[x][CC_id]->proc.L1_proc; + L1proc->threadPool=(tpool_t *)malloc(sizeof(tpool_t)); + L1proc->respEncode=(notifiedFIFO_t *) malloc(sizeof(notifiedFIFO_t)); + L1proc->respDecode=(notifiedFIFO_t *) malloc(sizeof(notifiedFIFO_t)); + + if ( strlen(get_softmodem_params()->threadPoolConfig) > 0 ) + initTpool(get_softmodem_params()->threadPoolConfig, L1proc->threadPool, true); + else + initTpool("n", L1proc->threadPool, true); + + initNotifiedFIFO(L1proc->respEncode); + initNotifiedFIFO(L1proc->respDecode); + } + } + + printf("About to Init RU threads RC.nb_RU:%d\n", RC.nb_RU); + + // RU thread and some L1 procedure aren't necessary in VNF or L2 FAPI simulator. + // but RU thread deals with pre_scd and this is necessary in VNF and simulator. + // some initialization is necessary and init_ru_vnf do this. + if (RC.nb_RU >0 && NFAPI_MODE!=NFAPI_MODE_VNF) { + printf("Initializing RU threads\n"); + init_RU(get_softmodem_params()->rf_config_file, + get_softmodem_params()->send_dmrs_sync); + + for (int ru_id=0; ru_id<RC.nb_RU; ru_id++) { + RC.ru[ru_id]->rf_map.card=0; + RC.ru[ru_id]->rf_map.chain=CC_id+(get_softmodem_params()->chain_offset); + LOG_I(PHY,"Starting ru_thread %d\n",ru_id); + init_RU_proc(RC.ru[ru_id]); + } + + config_sync_var=0; + + if (NFAPI_MODE==NFAPI_MODE_PNF) { // PNF + wait_nfapi_init("main?"); + } + + LOG_I(ENB_APP,"RC.nb_RU:%d\n", RC.nb_RU); + // once all RUs are ready intiailize the rest of the eNBs ((dependence on final RU parameters after configuration) + printf("ALL RUs ready - init eNBs\n"); + + if (NFAPI_MODE!=NFAPI_MODE_PNF && NFAPI_MODE!=NFAPI_MODE_VNF) { + LOG_I(ENB_APP,"Not NFAPI mode - call init_eNB_afterRU()\n"); + init_eNB_afterRU(); + } else { + LOG_I(ENB_APP,"NFAPI mode - DO NOT call init_eNB_afterRU()\n"); + } + + LOG_UI(ENB_APP,"ALL RUs ready - ALL eNBs ready\n"); + // connect the TX/RX buffers + sleep(1); /* wait for thread activation */ + LOG_I(ENB_APP,"Sending sync to all threads\n"); + pthread_mutex_lock(&sync_mutex); + sync_var=0; + pthread_cond_broadcast(&sync_cond); + pthread_mutex_unlock(&sync_mutex); + config_check_unknown_cmdlineopt(CONFIG_CHECKALLSECTIONS); + } + + create_tasks_mbms(1); + // wait for end of program + LOG_UI(ENB_APP,"TYPE <CTRL-C> TO TERMINATE\n"); + // CI -- Flushing the std outputs for the previous marker to show on the eNB / DU / CU log file + fflush(stdout); + fflush(stderr); + + // end of CI modifications + //getchar(); + if(IS_SOFTMODEM_DOFORMS) + load_softscope("enb"); + + itti_wait_tasks_end(); + oai_exit=1; + LOG_I(ENB_APP,"oai_exit=%d\n",oai_exit); + // stop threads + + if (RC.nb_inst == 0 || !NODE_IS_CU(node_type)) { + if(IS_SOFTMODEM_DOFORMS) + end_forms(); + + LOG_I(ENB_APP,"stopping MODEM threads\n"); + stop_eNB(NB_eNB_INST); + stop_RU(RC.nb_RU); + + /* release memory used by the RU/eNB threads (incomplete), after all + * threads have been stopped (they partially use the same memory) */ + for (int inst = 0; inst < NB_eNB_INST; inst++) { + for (int cc_id = 0; cc_id < RC.nb_CC[inst]; cc_id++) { + free_transport(RC.eNB[inst][cc_id]); + phy_free_lte_eNB(RC.eNB[inst][cc_id]); + } + } + + for (int inst = 0; inst < RC.nb_RU; inst++) { + phy_free_RU(RC.ru[inst]); + } + + free_lte_top(); + end_configmodule(); + pthread_cond_destroy(&sync_cond); + pthread_mutex_destroy(&sync_mutex); + pthread_cond_destroy(&nfapi_sync_cond); + pthread_mutex_destroy(&nfapi_sync_mutex); + pthread_mutex_destroy(&ue_pf_po_mutex); + + for(int ru_id=0; ru_id<RC.nb_RU; ru_id++) { + if (RC.ru[ru_id]->rfdevice.trx_end_func) { + RC.ru[ru_id]->rfdevice.trx_end_func(&RC.ru[ru_id]->rfdevice); + RC.ru[ru_id]->rfdevice.trx_end_func = NULL; + } + + if (RC.ru[ru_id]->ifdevice.trx_end_func) { + RC.ru[ru_id]->ifdevice.trx_end_func(&RC.ru[ru_id]->ifdevice); + RC.ru[ru_id]->ifdevice.trx_end_func = NULL; + } + } + } + + terminate_opt(); + logClean(); + printf("Bye.\n"); + return 0; +} diff --git a/executables/split_headers.h b/executables/split_headers.h new file mode 100644 index 0000000000000000000000000000000000000000..4e328f74c418c7f902014b99630dd4fbe2130e7a --- /dev/null +++ b/executables/split_headers.h @@ -0,0 +1,329 @@ +/* +* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The OpenAirInterface Software Alliance licenses this file to You under +* the OAI Public License, Version 1.1 (the "License"); you may not use this file +* except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.openairinterface.org/?page_id=698 +* +* Author and copyright: Laurent Thomas, open-cells.com +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*------------------------------------------------------------------------------- +* For more information about the OpenAirInterface (OAI) Software Alliance: +* contact@openairinterface.org +*/ + + +#ifndef __SPLIT_HEADERS_H +#define __SPLIT_HEADERS_H + +#include <stdint.h> +#include <stdbool.h> +#include <openair1/PHY/defs_eNB.h> + +#define CU_PORT "7878" +#define DU_PORT "8787" +#define SPLIT73_CU 1 +#define SPLIT73_DU 2 +extern int split73; + +#define MTU 65536 +#define UDP_TIMEOUT 900000L // in micro second (struct timeval, NOT struct timespec) +// linux may timeout for a much longer time (up to 10ms) +#define blockAlign 32 //bytes to align memory for SIMD copy (256 bits vectors) + +// FS6 transport configuration and handler +typedef struct { + char *sourceIP; + char *sourcePort; + char *destIP; + char *destPort; + struct addrinfo *destAddr; + int sockHandler; +} UDPsock_t; + +#define CTsentCUv0 0xA500 +#define CTsentDUv0 0x5A00 + +// Main FS6 transport layer header +// All packets starts with this header +typedef struct commonUDP_s { + uint64_t timestamp; // id of the group (subframe for LTE) + uint16_t nbBlocks; // total number of blocks for this timestamp + uint16_t blockID; // id: 0..nbBocks-1 + uint16_t contentType; // defines the content format + uint16_t contentBytes; // will be sent in a UDP packet, so must be < 2^16 bytes + uint64_t senderClock; +} commonUDP_t; + +// FS6 UL common header (DU to CU) +// gives the RACH detection data and is always sent to inform the CU that a subframe arrived +typedef struct { + uint16_t max_preamble[4]; + uint16_t max_preamble_energy[4]; + uint16_t max_preamble_delay[4]; + uint16_t avg_preamble_energy[4]; +} fs6_ul_t; + +// FS6 DL common header (CU to DU) +// gives the DCI configuration from each subframe +typedef struct { + uint8_t pbch_pdu[4]; + int num_pdcch_symbols; + int num_dci; + DCI_ALLOC_t dci_alloc[8]; + int num_mdci; + int amp; + LTE_eNB_PHICH phich_vars; + uint64_t DuClock; + uint64_t CuSpentMicroSec; +} fs6_dl_t; + +// a value to type all sub packets, +// to detect errors, and to be able to extend to other versions +// the first byte of each sub structure should match one of these values +enum pckType { + fs6UlConfig=25, + fs6DlConfig=26, + fs6ULConfigCCH=27, + fs6ULsch=28, + fs6ULcch=29, + fs6ULindicationHarq=40, + fs6ULindicationSr=41, +}; + +// CU to DU definition of a future UL subframe decode +// defines a UE future data plane +typedef struct { + enum pckType type:8; + uint16_t UE_id; + int8_t harq_pid; + UE_type_t ue_type; + + uint8_t dci_alloc; + uint8_t rar_alloc; + SCH_status_t status; + uint8_t Msg3_flag; + uint8_t subframe; + uint32_t frame; + uint8_t handled; + uint8_t phich_active; + uint8_t phich_ACK; + uint16_t previous_first_rb; + uint32_t B; + uint32_t G; + UCI_format_t uci_format; + uint8_t Or2; + uint8_t o_RI[2]; + uint8_t o_ACK[4]; + uint8_t O_ACK; + uint8_t o_RCC; + int16_t q_ACK[MAX_ACK_PAYLOAD]; + int16_t q_RI[MAX_RI_PAYLOAD]; + uint32_t RTC[MAX_NUM_ULSCH_SEGMENTS]; + uint8_t ndi; + uint8_t round; + uint8_t rvidx; + uint8_t Nl; + uint8_t n_DMRS; + uint8_t previous_n_DMRS; + uint8_t n_DMRS2; + int32_t delta_TF; + uint32_t repetition_number ; + uint32_t total_number_of_repetitions; + + uint16_t harq_mask; + uint16_t nb_rb; + uint8_t Qm; + uint16_t first_rb; + uint8_t O_RI; + uint8_t Or1; + uint16_t Msc_initial; + uint8_t Nsymb_initial; + uint8_t V_UL_DAI; + uint8_t srs_active; + uint32_t TBS; + uint8_t Nsymb_pusch; + uint8_t Mlimit; + uint8_t max_turbo_iterations; + uint8_t bundling; + uint16_t beta_offset_cqi_times8; + uint16_t beta_offset_ri_times8; + uint16_t beta_offset_harqack_times8; + uint8_t Msg3_active; + uint16_t rnti; + uint8_t cyclicShift; + uint8_t cooperation_flag; + uint8_t num_active_cba_groups; + uint16_t cba_rnti[4];//NUM_MAX_CBA_GROUP]; +} fs6_dl_ulsched_t; + +// CU to DU defintion of a DL packet for a given UE +// The data itself is padded at the end of this structure +typedef struct { + enum pckType type:8; + int UE_id; + int8_t harq_pid; + uint16_t rnti; + int16_t sqrt_rho_a; + int16_t sqrt_rho_b; + CEmode_t CEmode:8; + uint16_t nb_rb; + uint8_t Qm; + int8_t Nl; + uint8_t pdsch_start; + uint8_t sib1_br_flag; + uint16_t i0; + uint32_t rb_alloc[4]; + int dataLen; +} fs6_dl_uespec_t; + +// CU to DU definition of CCH channel +typedef struct { + int16_t UE_id; + LTE_eNB_UCI cch_vars; +} fs6_dl_uespec_ulcch_element_t; + +// header to group all UE CCH channels definitions in one UDP packet +typedef struct { + enum pckType type:8; + int16_t nb_active_ue; +} fs6_dl_uespec_ulcch_t; + +// code internal, not transmitted as this +typedef struct { + int ta; +} ul_propagation_t; + +// One UE UL data, data plane, UE data appended after the header +typedef struct { + enum pckType type:8; + short UE_id; + uint8_t harq_id; + uint8_t segment; + int segLen; + int r_offset; + int G; + int ulsch_power[2]; + uint8_t o_ACK[4]; + uint8_t O_ACK; + int ta; + uint8_t o[MAX_CQI_BYTES]; + uint8_t cqi_crc_status; +} fs6_ul_uespec_t; + +// UL UCI (control plane), per UE +typedef struct { + enum pckType type:8; + int UEid; + int frame; + int subframe; + LTE_eNB_UCI uci; + uint8_t harq_ack[4]; + uint8_t tdd_mapping_mode; + uint16_t tdd_multiplexing_mask; + unsigned short n0_subband_power_dB; + uint16_t rnti; + int32_t stat; +} fs6_ul_uespec_uci_element_t; + +// all segments UCI grouped in one UDP packet +typedef struct { + enum pckType type:8; + int16_t nb_active_ue; +} fs6_ul_uespec_uci_t; + + +bool createUDPsock (char *sourceIP, char *sourcePort, char *destIP, char *destPort, UDPsock_t *result); +int receiveSubFrame(UDPsock_t *sock, void *bufferZone, int bufferSize, uint16_t contentType); +int sendSubFrame(UDPsock_t *sock, void *bufferZone, ssize_t secondHeaderSize, uint16_t contentType); + +#define initBufferZone(xBuf) \ + uint8_t xBuf[FS6_BUF_SIZE]; \ + ((commonUDP_t *)xBuf)->nbBlocks=0; + +#define hUDP(xBuf) ((commonUDP_t *)xBuf) +#define hDL(xBuf) ((fs6_dl_t*)(((commonUDP_t *)xBuf)+1)) +#define hUL(xBuf) ((fs6_ul_t*)(((commonUDP_t *)xBuf)+1)) +#define hDLUE(xBuf) ((fs6_dl_uespec_t*) (((fs6_dl_t*)(((commonUDP_t *)xBuf)+1))+1)) +#define hTxULUE(xBuf) ((fs6_dl_ulsched_t*) (((fs6_dl_t*)(((commonUDP_t *)xBuf)+1))+1)) +#define hTxULcch(xBuf) ((fs6_dl_uespec_ulcch_t*) (((fs6_dl_t*)(((commonUDP_t *)xBuf)+1))+1)) +#define hULUE(xBuf) ((fs6_ul_uespec_t*) (((fs6_ul_t*)(((commonUDP_t *)xBuf)+1))+1)) +#define hULUEuci(xBuf) ((fs6_ul_uespec_uci_t*) (((fs6_ul_t*)(((commonUDP_t *)xBuf)+1))+1)) + +static inline size_t alignedSize(uint8_t *ptr) { + commonUDP_t *header=(commonUDP_t *) ptr; + return ((header->contentBytes+sizeof(commonUDP_t)+blockAlign-1)/blockAlign)*blockAlign; +} + +static inline void *commonUDPdata(uint8_t *ptr) { + return (void *) (((commonUDP_t *)ptr)+1); +} + +void setAllfromTS(uint64_t TS, L1_rxtx_proc_t *proc); +void sendFs6Ulharq(enum pckType type, int UEid, PHY_VARS_eNB *eNB,LTE_eNB_UCI *uci, int frame, int subframe, uint8_t *harq_ack, uint8_t tdd_mapping_mode, uint16_t tdd_multiplexing_mask, + uint16_t rnti, int32_t stat); +void sendFs6Ul(PHY_VARS_eNB *eNB, int UE_id, int harq_pid, int segmentID, int16_t *data, int dataLen, int r_offset); +void *cu_fs6(void *arg); +void *du_fs6(void *arg); +void fill_rf_config(RU_t *ru, char *rf_config_file); +int init_rf(RU_t *ru); +void rx_rf(RU_t *ru, L1_rxtx_proc_t *proc); +void tx_rf(RU_t *ru, L1_rxtx_proc_t *proc); +void common_signal_procedures (PHY_VARS_eNB *eNB,int frame, int subframe); +void pmch_procedures(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc); +bool dlsch_procedures(PHY_VARS_eNB *eNB, + L1_rxtx_proc_t *proc, + int harq_pid, + LTE_eNB_DLSCH_t *dlsch, + LTE_eNB_UE_stats *ue_stats) ; +void postDecode(L1_rxtx_proc_t *proc, notifiedFIFO_elt_t *req); +void pdsch_procedures(PHY_VARS_eNB *eNB, + L1_rxtx_proc_t *proc, + int harq_pid, + LTE_eNB_DLSCH_t *dlsch, + LTE_eNB_DLSCH_t *dlsch1); +void srs_procedures(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc); +void uci_procedures(PHY_VARS_eNB *eNB, + L1_rxtx_proc_t *proc); +void ocp_rx_prach(PHY_VARS_eNB *eNB, + L1_rxtx_proc_t *proc, + RU_t *ru, + uint16_t *max_preamble, + uint16_t *max_preamble_energy, + uint16_t *max_preamble_delay, + uint16_t *avg_preamble_energy, + uint16_t Nf, + uint8_t tdd_mapindex, + uint8_t br_flag); +void rx_prach0(PHY_VARS_eNB *eNB, + RU_t *ru, + int frame_prach, + int subframe, + uint16_t *max_preamble, + uint16_t *max_preamble_energy, + uint16_t *max_preamble_delay, + uint16_t *avg_preamble_energy, + uint16_t Nf, + uint8_t tdd_mapindex, + uint8_t br_flag, + uint8_t ce_level + ); +void ocp_tx_rf(RU_t *ru, L1_rxtx_proc_t *proc); + +// mistakes in main OAI +void phy_init_RU(RU_t *); +void fep_full(RU_t *ru, int subframe); +void feptx_prec(RU_t *ru,int frame,int subframe); +void feptx_ofdm(RU_t *ru, int frame, int subframe); +void oai_subframe_ind(uint16_t sfn, uint16_t sf); +extern uint16_t sf_ahead; +#endif diff --git a/executables/transport_split.c b/executables/transport_split.c new file mode 100644 index 0000000000000000000000000000000000000000..f380eb90e36977727e7b493b200999fa3edf6316 --- /dev/null +++ b/executables/transport_split.c @@ -0,0 +1,195 @@ +/* +* Licensed to the OpenAirInterface (OAI) Software Alliance under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The OpenAirInterface Software Alliance licenses this file to You under +* the OAI Public License, Version 1.1 (the "License"); you may not use this file +* except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.openairinterface.org/?page_id=698 +* +* Author and copyright: Laurent Thomas, open-cells.com +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*------------------------------------------------------------------------------- +* For more information about the OpenAirInterface (OAI) Software Alliance: +* contact@openairinterface.org +*/ + + + +#include <executables/split_headers.h> +#include <sys/types.h> /* See NOTES */ +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/udp.h> +#include <netdb.h> +#include <targets/RT/USER/lte-softmodem.h> + +bool createUDPsock (char *sourceIP, char *sourcePort, char *destIP, char *destPort, UDPsock_t *result) { + struct addrinfo hints= {0}, *servinfo, *p; + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_flags = AI_PASSIVE; + int status; + + if ((status = getaddrinfo(sourceIP, sourcePort, &hints, &servinfo)) != 0) { + LOG_E(GTPU,"getaddrinfo error: %s\n", gai_strerror(status)); + return false; + } + + // loop through all the results and bind to the first we can + for(p = servinfo; p != NULL; p = p->ai_next) { + if ((result->sockHandler = socket(p->ai_family, p->ai_socktype, + p->ai_protocol)) == -1) { + LOG_W(GTPU,"socket: %s\n", strerror(errno)); + continue; + } + + if (bind(result->sockHandler, p->ai_addr, p->ai_addrlen) == -1) { + close(result->sockHandler); + LOG_W(GTPU,"bind: %s\n", strerror(errno)); + continue; + } + + break; // if we get here, we must have connected successfully + } + + if (p == NULL) { + // looped off the end of the list with no successful bind + LOG_E(GTPU,"failed to bind socket: %s %s \n",sourceIP,sourcePort); + return false; + } + + freeaddrinfo(servinfo); // all done with this structure + + if ((status = getaddrinfo(destIP, destPort, &hints, &servinfo)) != 0) { + LOG_E(GTPU,"getaddrinfo error: %s\n", gai_strerror(status)); + return false; + } + + if (servinfo) { + result->destAddr=servinfo; + } else { + LOG_E(PHY,"No valid UDP addr: %s:%s\n",destIP, destPort); + return false; + } + + int enable=1; + AssertFatal(setsockopt(result->sockHandler, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))==0,""); + struct timeval tv= {0,UDP_TIMEOUT}; + + if (IS_SOFTMODEM_RFSIM) + tv.tv_sec=2; //debug: wait 2 seconds for human understanding + + AssertFatal(setsockopt(result->sockHandler, SOL_SOCKET, SO_RCVTIMEO,&tv,sizeof(tv)) ==0,""); + // Make a send/recv buffer larger than a a couple of subframe + // so the kernel will store for us in and out paquets + int buff=1000*1000*10; + AssertFatal ( setsockopt(result->sockHandler, SOL_SOCKET, SO_SNDBUF, &buff, sizeof(buff)) == 0, ""); + AssertFatal ( setsockopt(result->sockHandler, SOL_SOCKET, SO_RCVBUF, &buff, sizeof(buff)) == 0, ""); + return true; +} + +// sock: udp socket +// bufferZone: a reception area of bufferSize +int receiveSubFrame(UDPsock_t *sock, void *bufferZone, int bufferSize, uint16_t contentType) { + int rcved=0; + commonUDP_t *bufOrigin=(commonUDP_t *)bufferZone; + static uint8_t crossData[65536]; + static int crossDataSize=0; + + if (crossDataSize) { + LOG_D(HW,"copy a block received in previous subframe\n"); + memcpy(bufferZone, crossData, crossDataSize); + rcved=1; + bufferZone+=crossDataSize; + crossDataSize=0; + } + + do { + //read all subframe data from the control unit + int ret=recv(sock->sockHandler, bufferZone, bufferSize, 0); + + if ( ret==-1) { + if ( errno == EWOULDBLOCK || errno== EINTR ) { + LOG_I(HW,"Received: Timeout, subframe incomplete\n"); + return rcved; + } else { + LOG_E(HW,"Critical issue in socket: %s\n", strerror(errno)); + return -1; + } + } else { + if (hUDP(bufferZone)->contentType != contentType) + abort(); + + if (rcved && bufOrigin->timestamp != hUDP(bufferZone)->timestamp ) { + if ( hUDP(bufferZone)->timestamp > bufOrigin->timestamp ) { + LOG_W(HW,"Received data for TS: %lu before end of TS : %lu completion\n", + hUDP(bufferZone)->timestamp, + bufOrigin->timestamp); + memcpy(crossData, bufferZone, ret ); + crossDataSize=ret; + return rcved; + } else { + LOG_W(HW,"Dropping late packet\n"); + continue; + } + } + + rcved++; + bufferZone+=ret; + } + + LOG_D(HW,"Received: blocks: %d/%d, size %d, TS: %lu\n", + rcved, bufOrigin->nbBlocks, ret, bufOrigin->timestamp); + } while ( rcved == 0 || rcved < bufOrigin->nbBlocks ); + + return rcved; +} + +int sendSubFrame(UDPsock_t *sock, void *bufferZone, ssize_t secondHeaderSize, uint16_t contentType) { + commonUDP_t *UDPheader=(commonUDP_t *)bufferZone ; + UDPheader->contentType=contentType; + UDPheader->senderClock=rdtsc(); + int nbBlocks=UDPheader->nbBlocks; + int blockId=0; + + if (nbBlocks <= 0 ) { + LOG_E(PHY,"FS6: can't send blocks: %d\n", nbBlocks); + return 0; + } + + do { + if (blockId > 0 ) { + commonUDP_t *currentHeader=(commonUDP_t *)bufferZone; + currentHeader->timestamp=UDPheader->timestamp; + currentHeader->nbBlocks=UDPheader->nbBlocks; + currentHeader->blockID=blockId; + currentHeader->contentType=UDPheader->contentType; + memcpy(commonUDPdata((void *)currentHeader), commonUDPdata(bufferZone), secondHeaderSize); + } + + blockId++; + int sz=alignedSize(bufferZone); + // Let's use the first address returned by getaddrinfo() + int ret=sendto(sock->sockHandler, bufferZone, sz, 0, + sock->destAddr->ai_addr, sock->destAddr->ai_addrlen); + + if ( ret != sz ) + LOG_W(HW,"Wrote socket doesn't return size %d (val: %d, errno:%d, %s)\n", + sz, ret, errno, strerror(errno)); + + LOG_D(HW,"Sent: TS: %lu, blocks %d/%d, block size : %d \n", + UDPheader->timestamp, UDPheader->nbBlocks-nbBlocks, UDPheader->nbBlocks, sz); + bufferZone+=sz; + nbBlocks--; + } while (nbBlocks); + + return 0; +} diff --git a/openair1/PHY/NR_REFSIG/scrambling_luts.c b/openair1/PHY/NR_REFSIG/scrambling_luts.c new file mode 100644 index 0000000000000000000000000000000000000000..9d957de0f716f2f6929110c63015b3d6e4a9c970 --- /dev/null +++ b/openair1/PHY/NR_REFSIG/scrambling_luts.c @@ -0,0 +1,62 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/* Lookup tables for 3GPP scrambling/unscrambling */ + +/* Author R. Knopp / EURECOM / OpenAirInterface.org */ +#ifndef __SCRAMBLING_LUTS__C__ +#define __SCRAMBLING_LUTS__C__ + +#include "PHY/impl_defs_nr.h" +#include "PHY/sse_intrin.h" + +__m64 byte2m64_re[256]; +__m64 byte2m64_im[256]; + +void init_byte2m64(void) { + + for (int s=0;s<256;s++) { + byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*(s&1)),0); + byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>1)&1)),0); + byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*((s>>2)&1)),1); + byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>3)&1)),1); + byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*((s>>4)&1)),2); + byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>5)&1)),2); + byte2m64_re[s] = _mm_insert_pi16(byte2m64_re[s],(1-2*((s>>6)&1)),3); + byte2m64_im[s] = _mm_insert_pi16(byte2m64_im[s],(1-2*((s>>7)&1)),3); + printf("init_scrambling_luts: s %x (%d) ((%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", + ((uint16_t*)&s)[0], + (1-2*(s&1)), + ((int16_t*)&byte2m64_re[s])[0],((int16_t*)&byte2m64_im[s])[0], + ((int16_t*)&byte2m64_re[s])[1],((int16_t*)&byte2m64_im[s])[1], + ((int16_t*)&byte2m64_re[s])[2],((int16_t*)&byte2m64_im[s])[2], + ((int16_t*)&byte2m64_re[s])[3],((int16_t*)&byte2m64_im[s])[3]); + + } +} + +void init_scrambling_luts(void) { + + init_byte2m64(); + +} + +#endif diff --git a/openair1/PHY/NR_TRANSPORT/nr_uci_tools_common.c b/openair1/PHY/NR_TRANSPORT/nr_uci_tools_common.c new file mode 100644 index 0000000000000000000000000000000000000000..2b0cbe72b3a91c8757c12d3ccbf194d5d08f740c --- /dev/null +++ b/openair1/PHY/NR_TRANSPORT/nr_uci_tools_common.c @@ -0,0 +1,167 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/NR_TRANSPORT/nr_dci_tools_common.c + * \brief + * \author + * \date 2020 + * \version 0.1 + * \company Eurecom + * \email: + * \note + * \warning + */ + +#include "nr_dci.h" + +void nr_group_sequence_hopping (pucch_GroupHopping_t PUCCH_GroupHopping, + uint32_t n_id, + uint8_t n_hop, + int nr_tti_tx, + uint8_t *u, + uint8_t *v) { + /* + * Implements TS 38.211 subclause 6.3.2.2.1 Group and sequence hopping + * The following variables are set by higher layers: + * - PUCCH_GroupHopping: + * - n_id: higher-layer parameter hoppingId + * - n_hop: frequency hopping index + * if intra-slot frequency hopping is disabled by the higher-layer parameter PUCCH-frequency-hopping + * n_hop=0 + * if frequency hopping is enabled by the higher-layer parameter PUCCH-frequency-hopping + * n_hop=0 for the first hop + * n_hop=1 for the second hop + */ + // depending on the value of the PUCCH_GroupHopping, we will obtain different values for u,v + //pucch_GroupHopping_t PUCCH_GroupHopping = ue->pucch_config_common_nr->pucch_GroupHopping; // from higher layers FIXME!!! + // n_id defined as per TS 38.211 subclause 6.3.2.2.1 (is given by the higher-layer parameter hoppingId) + // it is hoppingId from PUCCH-ConfigCommon: + // Cell-Specific scrambling ID for group hoppping and sequence hopping if enabled + // Corresponds to L1 parameter 'HoppingID' (see 38.211, section 6.3.2.2) BIT STRING (SIZE (10)) + //uint16_t n_id = ue->pucch_config_common_nr->hoppingId; // from higher layers FIXME!!! +#ifdef DEBUG_NR_PUCCH_TX + printf("\t\t [nr_group_sequence_hopping] PUCCH_GroupHopping=%u, n_id=%u \n",PUCCH_GroupHopping,n_id); +#endif + uint8_t f_ss=0,f_gh=0; + *u=0; + *v=0; + uint32_t c_init = 0; + uint32_t x1,s; // TS 38.211 Subclause 5.2.1 + int l = 32, minShift = ((2*nr_tti_tx+n_hop)<<3); + int tmpShift =0; +#ifdef DEBUG_NR_PUCCH_TX + printf("\t\t [nr_group_sequence_hopping] calculating u,v -> "); +#endif + + if (PUCCH_GroupHopping == neither) { // PUCCH_GroupHopping 'neither' + f_ss = n_id%30; + } + + if (PUCCH_GroupHopping == enable) { // PUCCH_GroupHopping 'enabled' + c_init = floor(n_id/30); // we initialize c_init to calculate u,v according to 6.3.2.2.1 of 38.211 + s = lte_gold_generic(&x1, &c_init, 1); // TS 38.211 Subclause 5.2.1 + for (int m=0; m<8; m++) { + while(minShift >= l) { + s = lte_gold_generic(&x1, &c_init, 0); + l = l+32; + } + + tmpShift = (minShift&((1<<5)-1)); //minShift%32; + f_gh = f_gh + ((1<<m)*((uint8_t)((s>>tmpShift)&1))); + minShift ++; + } + + f_gh = f_gh%30; + f_ss = n_id%30; + /* for (int m=0; m<8; m++){ + f_gh = f_gh + ((1<<m)*((uint8_t)((s>>(8*(2*nr_tti_tx+n_hop)+m))&1))); // Not sure we have to use nr_tti_tx FIXME!!! + } + f_gh = f_gh%30; + f_ss = n_id%30;*/ + } + + if (PUCCH_GroupHopping == disable) { // PUCCH_GroupHopping 'disabled' + c_init = (1<<5)*floor(n_id/30)+(n_id%30); // we initialize c_init to calculate u,v + s = lte_gold_generic(&x1, &c_init, 1); // TS 38.211 Subclause 5.2.1 + f_ss = n_id%30; + l = 32, minShift = (2*nr_tti_tx+n_hop); + + while(minShift >= l) { + s = lte_gold_generic(&x1, &c_init, 0); + l = l+32; + } + + tmpShift = (minShift&((1<<5)-1)); //minShift%32; + *v = (uint8_t)((s>>tmpShift)&1); + // *v = (uint8_t)((s>>(2*nr_tti_tx+n_hop))&1); // Not sure we have to use nr_tti_tx FIXME!!! + } + + *u = (f_gh+f_ss)%30; +#ifdef DEBUG_NR_PUCCH_TX + printf("%d,%d\n",*u,*v); +#endif +} + +double nr_cyclic_shift_hopping(uint32_t n_id, + uint8_t m0, + uint8_t mcs, + uint8_t lnormal, + uint8_t lprime, + int nr_tti_tx) { + /* + * Implements TS 38.211 subclause 6.3.2.2.2 Cyclic shift hopping + * - n_id: higher-layer parameter hoppingId + * - m0: provided by higher layer parameter PUCCH-F0-F1-initial-cyclic-shift of PUCCH-F0-resource-config + * - mcs: mcs=0 except for PUCCH format 0 when it depends on information to be transmitted according to TS 38.213 subclause 9.2 + * - lnormal: lnormal is the OFDM symbol number in the PUCCH transmission where l=0 corresponds to the first OFDM symbol of the PUCCH transmission + * - lprime: lprime is the index of the OFDM symbol in the slot that corresponds to the first OFDM symbol of the PUCCH transmission in the slot given by [5, TS 38.213] + */ + // alpha_init initialized to 2*PI/12=0.5235987756 + double alpha = 0.5235987756; + uint32_t c_init = n_id; // we initialize c_init again to calculate n_cs + + uint32_t x1,s = lte_gold_generic(&x1, &c_init, 1); // TS 38.211 Subclause 5.2.1 + uint8_t n_cs=0; + int l = 32, minShift = (14*8*nr_tti_tx )+ 8*(lnormal+lprime); + int tmpShift =0; +#ifdef DEBUG_NR_PUCCH_TX + printf("\t\t [nr_cyclic_shift_hopping] calculating alpha (cyclic shift) using c_init=%u -> \n",c_init); +#endif + + for (int m=0; m<8; m++) { + while(minShift >= l) { + s = lte_gold_generic(&x1, &c_init, 0); + l = l+32; + } + + tmpShift = (minShift&((1<<5)-1)); //minShift%32; + minShift ++; + n_cs = n_cs+((1<<m)*((uint8_t)((s>>tmpShift)&1))); + // calculating n_cs (Not sure we have to use nr_tti_tx FIXME!!!) + // n_cs = n_cs+((1<<m)*((uint8_t)((s>>((14*8*nr_tti_tx) + 8*(lnormal+lprime) + m))&1))); + } + + alpha = (alpha * (double)((m0+mcs+n_cs)%12)); +#ifdef DEBUG_NR_PUCCH_TX + printf("n_cs=%d -> %lf\n",n_cs,alpha); +#endif + return(alpha); +} diff --git a/openair1/PHY/TOOLS/dfts_load.c b/openair1/PHY/TOOLS/dfts_load.c new file mode 100644 index 0000000000000000000000000000000000000000..d758f46a4330977dd7cd3a667ede8f8998ef6614 --- /dev/null +++ b/openair1/PHY/TOOLS/dfts_load.c @@ -0,0 +1,65 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file openair1/PHY/CODING/coding_nr_load.c + * \brief: load library implementing coding/decoding algorithms + * \author Francois TABURET + * \date 2020 + * \version 0.1 + * \company NOKIA BellLabs France + * \email: francois.taburet@nokia-bell-labs.com + * \note + * \warning + */ +#define _GNU_SOURCE +#include <sys/types.h> +#include <stdlib.h> +#include <malloc.h> +#include "assertions.h" +#include "common/utils/LOG/log.h" +#define OAIDFTS_LOADER +#include "tools_defs.h" +#include "common/config/config_userapi.h" +#include "common/utils/load_module_shlib.h" + + +/* function description array, to be used when loading the dfts/idfts lib */ +static loader_shlibfunc_t shlib_fdesc[2]; +static char *arg[64]={"phytest","-O","cmdlineonly::dbgl0"}; + + +int load_dftslib(void) { + + char *ptr = (char*)config_get_if(); + if ( ptr==NULL ) {// phy simulators, config module possibly not loaded + load_configmodule(3,(char **)arg,CONFIG_ENABLECMDLINEONLY) ; + logInit(); + } + shlib_fdesc[0].fname = "dft"; + shlib_fdesc[1].fname = "idft"; + int ret=load_module_shlib("dfts",shlib_fdesc,sizeof(shlib_fdesc)/sizeof(loader_shlibfunc_t),NULL); + AssertFatal( (ret >= 0),"Error loading dftsc decoder"); + dft = (dftfunc_t)shlib_fdesc[0].fptr; + idft = (idftfunc_t)shlib_fdesc[1].fptr; +return 0; +} + + diff --git a/openair1/PHY/TOOLS/oai_dfts.c b/openair1/PHY/TOOLS/oai_dfts.c new file mode 100644 index 0000000000000000000000000000000000000000..bd516cd1e05c8aa973826feb34c760745996f7f4 --- /dev/null +++ b/openair1/PHY/TOOLS/oai_dfts.c @@ -0,0 +1,9707 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <stdint.h> +#include <math.h> +#include <pthread.h> +#include <execinfo.h> + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif +#define OAIDFTS_MAIN +#ifndef MR_MAIN +#include "PHY/defs_common.h" +#include "PHY/impl_defs_top.h" +#else +#include "time_meas.h" +#include "LOG/log.h" +#define debug_msg +#define ONE_OVER_SQRT2_Q15 23170 + +int oai_exit=0; +#endif + +#define ONE_OVER_SQRT3_Q15 18919 + +#include "../sse_intrin.h" + +#include "assertions.h" + +#define print_shorts(s,x) printf("%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7]) +#define print_shorts256(s,x) printf("%s %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7],(x)[8],(x)[9],(x)[10],(x)[11],(x)[12],(x)[13],(x)[14],(x)[15]) + +#define print_ints(s,x) printf("%s %d %d %d %d\n",s,(x)[0],(x)[1],(x)[2],(x)[3]) + + +const static int16_t conjugatedft[32] __attribute__((aligned(32))) = {-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1}; + + +const static int16_t reflip[32] __attribute__((aligned(32))) = {1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1}; + + + + + + +#if defined(__x86_64__) || defined(__i386__) +static inline void cmac(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline)); +static inline void cmac(__m128i a,__m128i b, __m128i *re32, __m128i *im32) +{ + + __m128i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32; + + cmac_tmp = _mm_sign_epi16(b,*(__m128i*)reflip); + cmac_tmp_re32 = _mm_madd_epi16(a,cmac_tmp); + + + // cmac_tmp = _mm_shufflelo_epi16(b,_MM_SHUFFLE(2,3,0,1)); + // cmac_tmp = _mm_shufflehi_epi16(cmac_tmp,_MM_SHUFFLE(2,3,0,1)); + cmac_tmp = _mm_shuffle_epi8(b,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + cmac_tmp_im32 = _mm_madd_epi16(cmac_tmp,a); + + *re32 = _mm_add_epi32(*re32,cmac_tmp_re32); + *im32 = _mm_add_epi32(*im32,cmac_tmp_im32); +} + +static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline)); +static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) +{ + + __m128i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32; + + + cmac_tmp_re32 = _mm_madd_epi16(a,b); + + + cmac_tmp = _mm_sign_epi16(b,*(__m128i*)reflip); + // cmac_tmp = _mm_shufflelo_epi16(b,_MM_SHUFFLE(2,3,0,1)); + // cmac_tmp = _mm_shufflehi_epi16(cmac_tmp,_MM_SHUFFLE(2,3,0,1)); + cmac_tmp = _mm_shuffle_epi8(cmac_tmp,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + cmac_tmp_im32 = _mm_madd_epi16(cmac_tmp,a); + + *re32 = _mm_add_epi32(*re32,cmac_tmp_re32); + *im32 = _mm_add_epi32(*im32,cmac_tmp_im32); +} + +#ifdef __AVX2__ +static inline void cmac_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline)); +static inline void cmac_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) +{ + + __m256i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32; + __m256i imshuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,19,18,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + cmac_tmp = _mm256_sign_epi16(b,*(__m256i*)reflip); + cmac_tmp_re32 = _mm256_madd_epi16(a,cmac_tmp); + + cmac_tmp = _mm256_shuffle_epi8(b,imshuffle); + cmac_tmp_im32 = _mm256_madd_epi16(cmac_tmp,a); + + *re32 = _mm256_add_epi32(*re32,cmac_tmp_re32); + *im32 = _mm256_add_epi32(*im32,cmac_tmp_im32); +} + +static inline void cmacc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline)); +static inline void cmacc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) +{ + + __m256i cmac_tmp,cmac_tmp_re32,cmac_tmp_im32; + __m256i imshuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,19,18,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + cmac_tmp_re32 = _mm256_madd_epi16(a,b); + + + cmac_tmp = _mm256_sign_epi16(b,*(__m256i*)reflip); + cmac_tmp = _mm256_shuffle_epi8(b,imshuffle); + cmac_tmp_im32 = _mm256_madd_epi16(cmac_tmp,a); + + *re32 = _mm256_add_epi32(*re32,cmac_tmp_re32); + *im32 = _mm256_add_epi32(*im32,cmac_tmp_im32); +} + +#endif + +static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline)); + +static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) +{ + + register __m128i mmtmpb; + + mmtmpb = _mm_sign_epi16(b,*(__m128i*)reflip); + *re32 = _mm_madd_epi16(a,mmtmpb); + // mmtmpb = _mm_shufflelo_epi16(b,_MM_SHUFFLE(2,3,0,1)); + // mmtmpb = _mm_shufflehi_epi16(mmtmpb,_MM_SHUFFLE(2,3,0,1)); + mmtmpb = _mm_shuffle_epi8(b,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + *im32 = _mm_madd_epi16(a,mmtmpb); + +} + +#ifdef __AVX2__ +static inline void cmult_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline)); + +static inline void cmult_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) +{ + + register __m256i mmtmpb; + __m256i const perm_mask = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + mmtmpb = _mm256_sign_epi16(b,*(__m256i*)reflip); + *re32 = _mm256_madd_epi16(a,mmtmpb); + mmtmpb = _mm256_shuffle_epi8(b,perm_mask); + *im32 = _mm256_madd_epi16(a,mmtmpb); + +} + +#endif + +static inline void cmultc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline)); + +static inline void cmultc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) +{ + + register __m128i mmtmpb; + + *re32 = _mm_madd_epi16(a,b); + mmtmpb = _mm_sign_epi16(b,*(__m128i*)reflip); + mmtmpb = _mm_shuffle_epi8(mmtmpb,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + *im32 = _mm_madd_epi16(a,mmtmpb); + +} + +#ifdef __AVX2__ +static inline void cmultc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) __attribute__((always_inline)); + +static inline void cmultc_256(__m256i a,__m256i b, __m256i *re32, __m256i *im32) +{ + + register __m256i mmtmpb; + __m256i const perm_mask = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + *re32 = _mm256_madd_epi16(a,b); + mmtmpb = _mm256_sign_epi16(b,*(__m256i*)reflip); + mmtmpb = _mm256_shuffle_epi8(mmtmpb,perm_mask); + *im32 = _mm256_madd_epi16(a,mmtmpb); + +} + +#endif + +static inline __m128i cpack(__m128i xre,__m128i xim) __attribute__((always_inline)); + +static inline __m128i cpack(__m128i xre,__m128i xim) +{ + + register __m128i cpack_tmp1,cpack_tmp2; + + cpack_tmp1 = _mm_unpacklo_epi32(xre,xim); + cpack_tmp2 = _mm_unpackhi_epi32(xre,xim); + return(_mm_packs_epi32(_mm_srai_epi32(cpack_tmp1,15),_mm_srai_epi32(cpack_tmp2,15))); + +} + +#ifdef __AVX2__ +static inline __m256i cpack_256(__m256i xre,__m256i xim) __attribute__((always_inline)); + +static inline __m256i cpack_256(__m256i xre,__m256i xim) +{ + + register __m256i cpack_tmp1,cpack_tmp2; + + cpack_tmp1 = _mm256_unpacklo_epi32(xre,xim); + cpack_tmp2 = _mm256_unpackhi_epi32(xre,xim); + return(_mm256_packs_epi32(_mm256_srai_epi32(cpack_tmp1,15),_mm256_srai_epi32(cpack_tmp2,15))); + +} + +#endif + +static inline void packed_cmult(__m128i a,__m128i b, __m128i *c) __attribute__((always_inline)); + +static inline void packed_cmult(__m128i a,__m128i b, __m128i *c) +{ + + __m128i cre,cim; + cmult(a,b,&cre,&cim); + *c = cpack(cre,cim); + +} + +#ifdef __AVX2__ +static inline void packed_cmult_256(__m256i a,__m256i b, __m256i *c) __attribute__((always_inline)); + +static inline void packed_cmult_256(__m256i a,__m256i b, __m256i *c) +{ + + __m256i cre,cim; + cmult_256(a,b,&cre,&cim); + *c = cpack_256(cre,cim); + +} +#endif + +static inline void packed_cmultc(__m128i a,__m128i b, __m128i *c) __attribute__((always_inline)); + +static inline void packed_cmultc(__m128i a,__m128i b, __m128i *c) +{ + + __m128i cre,cim; + + cmultc(a,b,&cre,&cim); + *c = cpack(cre,cim); + +} + +#ifdef __AVX2__ +static inline void packed_cmultc_256(__m256i a,__m256i b, __m256i *c) __attribute__((always_inline)); + +static inline void packed_cmultc_256(__m256i a,__m256i b, __m256i *c) +{ + + __m256i cre,cim; + + cmultc_256(a,b,&cre,&cim); + *c = cpack_256(cre,cim); + +} +#endif + +static inline __m128i packed_cmult2(__m128i a,__m128i b,__m128i b2) __attribute__((always_inline)); + +static inline __m128i packed_cmult2(__m128i a,__m128i b,__m128i b2) +{ + + + register __m128i cre,cim; + + cre = _mm_madd_epi16(a,b); + cim = _mm_madd_epi16(a,b2); + + return(cpack(cre,cim)); + +} + +#ifdef __AVX2__ +static inline __m256i packed_cmult2_256(__m256i a,__m256i b,__m256i b2) __attribute__((always_inline)); + +static inline __m256i packed_cmult2_256(__m256i a,__m256i b,__m256i b2) +{ + + + register __m256i cre,cim; + + cre = _mm256_madd_epi16(a,b); + cim = _mm256_madd_epi16(a,b2); + + return(cpack_256(cre,cim)); + +} +#endif + +#elif defined (__arm__) +static inline void cmac(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline)); +static inline void cmac(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) +{ + + + int32x4_t ab_re0,ab_re1,ab_im0,ab_im1; + int16x8_t bflip = vrev32q_s16(b); + int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip); + + ab_re0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&bconj)[0]); + ab_re1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&bconj)[1]); + ab_im0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&bflip)[0]); + ab_im1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&bflip)[1]); + *re32 = vqaddq_s32(*re32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]), + vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1]))); + *im32 = vqaddq_s32(*im32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]), + vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1]))); +} + +static inline void cmacc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline)); +static inline void cmacc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) +{ + int32x4_t ab_re0,ab_re1,ab_im0,ab_im1; + int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip); + int16x8_t bflip = vrev32q_s16(bconj); + + ab_re0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&b)[0]); + ab_re1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&b)[1]); + ab_im0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&bflip)[0]); + ab_im1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&bflip)[1]); + *re32 = vqaddq_s32(*re32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]), + vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1]))); + *im32 = vqaddq_s32(*im32,vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]), + vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1]))); + +} + +static inline void cmult(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline)); +static inline void cmult(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) +{ + int32x4_t ab_re0,ab_re1,ab_im0,ab_im1; + int16x8_t bflip = vrev32q_s16(b); + int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip); + int16x4_t al,ah,bcl,bch,bfl,bfh; + int32x2_t abr0l,abr0h,abr1l,abr1h,abi0l,abi0h,abi1l,abi1h; + + al = vget_low_s16(a); ah = vget_high_s16(a); + bcl = vget_low_s16(bconj); bch = vget_high_s16(bconj); + bfl = vget_low_s16(bflip); bfh = vget_high_s16(bflip); + + ab_re0 = vmull_s16(al,bcl); + ab_re1 = vmull_s16(ah,bch); + ab_im0 = vmull_s16(al,bfl); + ab_im1 = vmull_s16(ah,bfh); + abr0l = vget_low_s32(ab_re0); abr0h = vget_high_s32(ab_re0); + abr1l = vget_low_s32(ab_re1); abr1h = vget_high_s32(ab_re1); + abi0l = vget_low_s32(ab_im0); abi0h = vget_high_s32(ab_im0); + abi1l = vget_low_s32(ab_im1); abi1h = vget_high_s32(ab_im1); + + *re32 = vcombine_s32(vpadd_s32(abr0l,abr0h), + vpadd_s32(abr1l,abr1h)); + *im32 = vcombine_s32(vpadd_s32(abi0l,abi0h), + vpadd_s32(abi1l,abi1h)); +} + +static inline void cmultc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) __attribute__((always_inline)); + +static inline void cmultc(int16x8_t a,int16x8_t b, int32x4_t *re32, int32x4_t *im32) +{ + int32x4_t ab_re0,ab_re1,ab_im0,ab_im1; + int16x8_t bconj = vmulq_s16(b,*(int16x8_t *)reflip); + int16x8_t bflip = vrev32q_s16(bconj); + int16x4_t al,ah,bl,bh,bfl,bfh; + int32x2_t abr0l,abr0h,abr1l,abr1h,abi0l,abi0h,abi1l,abi1h; + al = vget_low_s16(a); ah = vget_high_s16(a); + bl = vget_low_s16(b); bh = vget_high_s16(b); + bfl = vget_low_s16(bflip); bfh = vget_high_s16(bflip); + + ab_re0 = vmull_s16(al,bl); + ab_re1 = vmull_s16(ah,bh); + ab_im0 = vmull_s16(al,bfl); + ab_im1 = vmull_s16(ah,bfh); + + abr0l = vget_low_s32(ab_re0); abr0h = vget_high_s32(ab_re0); + abr1l = vget_low_s32(ab_re1); abr1h = vget_high_s32(ab_re1); + abi0l = vget_low_s32(ab_im0); abi0h = vget_high_s32(ab_im0); + abi1l = vget_low_s32(ab_im1); abi1h = vget_high_s32(ab_im1); + + *re32 = vcombine_s32(vpadd_s32(abr0l,abr0h), + vpadd_s32(abr1l,abr1h)); + *im32 = vcombine_s32(vpadd_s32(abi0l,abi0h), + vpadd_s32(abi1l,abi1h)); + +} + + +static inline int16x8_t cpack(int32x4_t xre,int32x4_t xim) __attribute__((always_inline)); + +static inline int16x8_t cpack(int32x4_t xre,int32x4_t xim) +{ + int32x4x2_t xtmp; + + xtmp = vzipq_s32(xre,xim); + return(vcombine_s16(vqshrn_n_s32(xtmp.val[0],15),vqshrn_n_s32(xtmp.val[1],15))); + +} + + +static inline void packed_cmult(int16x8_t a,int16x8_t b, int16x8_t *c) __attribute__((always_inline)); + +static inline void packed_cmult(int16x8_t a,int16x8_t b, int16x8_t *c) +{ + + int32x4_t cre,cim; + cmult(a,b,&cre,&cim); + *c = cpack(cre,cim); + +} + + +static inline void packed_cmultc(int16x8_t a,int16x8_t b, int16x8_t *c) __attribute__((always_inline)); + +static inline void packed_cmultc(int16x8_t a,int16x8_t b, int16x8_t *c) +{ + + int32x4_t cre,cim; + + cmultc(a,b,&cre,&cim); + *c = cpack(cre,cim); + +} + +static inline int16x8_t packed_cmult2(int16x8_t a,int16x8_t b, int16x8_t b2) __attribute__((always_inline)); + +static inline int16x8_t packed_cmult2(int16x8_t a,int16x8_t b, int16x8_t b2) +{ + + + + int32x4_t ab_re0,ab_re1,ab_im0,ab_im1,cre,cim; + + ab_re0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&b)[0]); + ab_re1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&b)[1]); + ab_im0 = vmull_s16(((int16x4_t*)&a)[0],((int16x4_t*)&b2)[0]); + ab_im1 = vmull_s16(((int16x4_t*)&a)[1],((int16x4_t*)&b2)[1]); + cre = vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]), + vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1])); + cim = vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]), + vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1])); + return(cpack(cre,cim)); + +} + +#endif + +const static int16_t W0s[16]__attribute__((aligned(32))) = {32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0,32767,0}; + +const static int16_t W13s[16]__attribute__((aligned(32))) = {-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378,-16384,-28378}; +const static int16_t W23s[16]__attribute__((aligned(32))) = {-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378,-16384,28378}; + +const static int16_t W15s[16]__attribute__((aligned(32))) = {10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163,10126,-31163}; +const static int16_t W25s[16]__attribute__((aligned(32))) = {-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260,-26509,-19260}; +const static int16_t W35s[16]__attribute__((aligned(32))) = {-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260,-26510,19260}; +const static int16_t W45s[16]__attribute__((aligned(32))) = {10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163,10126,31163}; + +#if defined(__x86_64__) || defined(__i386__) +const __m128i *W0 = (__m128i *)W0s; +const __m128i *W13 = (__m128i *)W13s; +const __m128i *W23 = (__m128i *)W23s; +const __m128i *W15 = (__m128i *)W15s; +const __m128i *W25 = (__m128i *)W25s; +const __m128i *W35 = (__m128i *)W35s; +const __m128i *W45 = (__m128i *)W45s; + +#ifdef __AVX2__ +const __m256i *W0_256 = (__m256i *)W0s; +const __m256i *W13_256 = (__m256i *)W13s; +const __m256i *W23_256 = (__m256i *)W23s; +const __m256i *W15_256 = (__m256i *)W15s; +const __m256i *W25_256 = (__m256i *)W25s; +const __m256i *W35_256 = (__m256i *)W35s; +const __m256i *W45_256 = (__m256i *)W45s; +#endif + +#elif defined(__arm__) +int16x8_t *W0 = (int16x8_t *)W0s; +int16x8_t *W13 = (int16x8_t *)W13s; +int16x8_t *W23 = (int16x8_t *)W23s; +int16x8_t *W15 = (int16x8_t *)W15s; +int16x8_t *W25 = (int16x8_t *)W25s; +int16x8_t *W35 = (int16x8_t *)W35s; +int16x8_t *W45 = (int16x8_t *)W45s; +#endif +const static int16_t dft_norm_table[16] = {9459, //12 + 6689,//24 + 5461,//36 + 4729,//482 + 4230,//60 + 23170,//72 + 3344,//96 + 3153,//108 + 2991,//120 + 18918,//sqrt(3),//144 + 18918,//sqrt(3),//180 + 16384,//2, //192 + 18918,//sqrt(3), // 216 + 16384,//2, //240 + 18918,//sqrt(3), // 288 + 14654 +}; //sqrt(5) //300 + + +#if defined(__x86_64__) || defined(__i386__) +static inline void bfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw)__attribute__((always_inline)); + +static inline void bfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw) +{ + + __m128i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i; + __m128i bfly2_tmp1,bfly2_tmp2; + + cmult(*(x0),*(W0),&x0r_2,&x0i_2); + cmult(*(x1),*(tw),&x1r_2,&x1i_2); + + dy0r = _mm_srai_epi32(_mm_add_epi32(x0r_2,x1r_2),15); + dy1r = _mm_srai_epi32(_mm_sub_epi32(x0r_2,x1r_2),15); + dy0i = _mm_srai_epi32(_mm_add_epi32(x0i_2,x1i_2),15); + // printf("y0i %d\n",((int16_t *)y0i)[0]); + dy1i = _mm_srai_epi32(_mm_sub_epi32(x0i_2,x1i_2),15); + + bfly2_tmp1 = _mm_unpacklo_epi32(dy0r,dy0i); + bfly2_tmp2 = _mm_unpackhi_epi32(dy0r,dy0i); + *y0 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2); + + bfly2_tmp1 = _mm_unpacklo_epi32(dy1r,dy1i); + bfly2_tmp2 = _mm_unpackhi_epi32(dy1r,dy1i); + *y1 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2); +} + +#ifdef __AVX2__ + +static inline void bfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)__attribute__((always_inline)); + +static inline void bfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw) +{ + + __m256i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i; + __m256i bfly2_tmp1,bfly2_tmp2; + + cmult_256(*(x0),*(W0_256),&x0r_2,&x0i_2); + cmult_256(*(x1),*(tw),&x1r_2,&x1i_2); + + dy0r = _mm256_srai_epi32(_mm256_add_epi32(x0r_2,x1r_2),15); + dy1r = _mm256_srai_epi32(_mm256_sub_epi32(x0r_2,x1r_2),15); + dy0i = _mm256_srai_epi32(_mm256_add_epi32(x0i_2,x1i_2),15); + // printf("y0i %d\n",((int16_t *)y0i)[0]); + dy1i = _mm256_srai_epi32(_mm256_sub_epi32(x0i_2,x1i_2),15); + + bfly2_tmp1 = _mm256_unpacklo_epi32(dy0r,dy0i); + bfly2_tmp2 = _mm256_unpackhi_epi32(dy0r,dy0i); + *y0 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2); + + bfly2_tmp1 = _mm256_unpacklo_epi32(dy1r,dy1i); + bfly2_tmp2 = _mm256_unpackhi_epi32(dy1r,dy1i); + *y1 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2); +} + +#endif + +#elif defined(__arm__) + +static inline void bfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw)__attribute__((always_inline)); + +static inline void bfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw) +{ + + int32x4_t x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i; + + cmult(*(x0),*(W0),&x0r_2,&x0i_2); + cmult(*(x1),*(tw),&x1r_2,&x1i_2); + + dy0r = vqaddq_s32(x0r_2,x1r_2); + dy1r = vqsubq_s32(x0r_2,x1r_2); + dy0i = vqaddq_s32(x0i_2,x1i_2); + dy1i = vqsubq_s32(x0i_2,x1i_2); + + *y0 = cpack(dy0r,dy0i); + *y1 = cpack(dy1r,dy1i); +} + + +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void bfly2_tw1(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1)__attribute__((always_inline)); + +static inline void bfly2_tw1(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1) +{ + + *y0 = _mm_adds_epi16(*x0,*x1); + *y1 = _mm_subs_epi16(*x0,*x1); + +} + +#elif defined(__arm__) + +static inline void bfly2_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1)__attribute__((always_inline)); + +static inline void bfly2_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1) +{ + + *y0 = vqaddq_s16(*x0,*x1); + *y1 = vqsubq_s16(*x0,*x1); + +} +#endif + +#if defined(__x86_64__) || defined(__i386__) + + + +static inline void bfly2_16(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1, __m128i *tw, __m128i *twb)__attribute__((always_inline)); + +static inline void bfly2_16(__m128i *x0, __m128i *x1, __m128i *y0, __m128i *y1, __m128i *tw, __m128i *twb) +{ + + // register __m128i x1t; + __m128i x1t; + + x1t = packed_cmult2(*(x1),*(tw),*(twb)); + /* + print_shorts("x0",(int16_t*)x0); + print_shorts("x1",(int16_t*)x1); + print_shorts("tw",(int16_t*)tw); + print_shorts("twb",(int16_t*)twb); + print_shorts("x1t",(int16_t*)&x1t);*/ + *y0 = _mm_adds_epi16(*x0,x1t); + *y1 = _mm_subs_epi16(*x0,x1t); + /* print_shorts("y0",(int16_t*)y0); + print_shorts("y1",(int16_t*)y1);*/ +} + +#ifdef __AVX2__ + +static inline void bfly2_16_256(__m256i *x0, __m256i *x1, __m256i *y0, __m256i *y1, __m256i *tw, __m256i *twb)__attribute__((always_inline)); + +static inline void bfly2_16_256(__m256i *x0, __m256i *x1, __m256i *y0, __m256i *y1, __m256i *tw, __m256i *twb) +{ + + // register __m256i x1t; + __m256i x1t; + + x1t = packed_cmult2_256(*(x1),*(tw),*(twb)); + /* + print_shorts256("x0",(int16_t*)x0); + print_shorts256("x1",(int16_t*)x1); + print_shorts256("tw",(int16_t*)tw); + print_shorts256("twb",(int16_t*)twb); + print_shorts256("x1t",(int16_t*)&x1t);*/ + *y0 = _mm256_adds_epi16(*x0,x1t); + *y1 = _mm256_subs_epi16(*x0,x1t); + + /*print_shorts256("y0",(int16_t*)y0); + print_shorts256("y1",(int16_t*)y1);*/ +} +#endif + + +#elif defined(__arm__) + +static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1, int16x8_t *tw, int16x8_t *twb)__attribute__((always_inline)); + +static inline void bfly2_16(int16x8_t *x0, int16x8_t *x1, int16x8_t *y0, int16x8_t *y1, int16x8_t *tw, int16x8_t *twb) +{ + + *y0 = vqaddq_s16(*x0,*x1); + *y1 = vqsubq_s16(*x0,*x1); + +} +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void ibfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw)__attribute__((always_inline)); + +static inline void ibfly2(__m128i *x0, __m128i *x1,__m128i *y0, __m128i *y1,__m128i *tw) +{ + + __m128i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i; + __m128i bfly2_tmp1,bfly2_tmp2; + + cmultc(*(x0),*(W0),&x0r_2,&x0i_2); + cmultc(*(x1),*(tw),&x1r_2,&x1i_2); + + dy0r = _mm_srai_epi32(_mm_add_epi32(x0r_2,x1r_2),15); + dy1r = _mm_srai_epi32(_mm_sub_epi32(x0r_2,x1r_2),15); + dy0i = _mm_srai_epi32(_mm_add_epi32(x0i_2,x1i_2),15); + // printf("y0i %d\n",((int16_t *)y0i)[0]); + dy1i = _mm_srai_epi32(_mm_sub_epi32(x0i_2,x1i_2),15); + + bfly2_tmp1 = _mm_unpacklo_epi32(dy0r,dy0i); + bfly2_tmp2 = _mm_unpackhi_epi32(dy0r,dy0i); + *y0 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2); + + bfly2_tmp1 = _mm_unpacklo_epi32(dy1r,dy1i); + bfly2_tmp2 = _mm_unpackhi_epi32(dy1r,dy1i); + *y1 = _mm_packs_epi32(bfly2_tmp1,bfly2_tmp2); +} + +#ifdef __AVX2__ +static inline void ibfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw)__attribute__((always_inline)); + +static inline void ibfly2_256(__m256i *x0, __m256i *x1,__m256i *y0, __m256i *y1,__m256i *tw) +{ + + __m256i x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i; + __m256i bfly2_tmp1,bfly2_tmp2; + + cmultc_256(*(x0),*(W0_256),&x0r_2,&x0i_2); + cmultc_256(*(x1),*(tw),&x1r_2,&x1i_2); + + dy0r = _mm256_srai_epi32(_mm256_add_epi32(x0r_2,x1r_2),15); + dy1r = _mm256_srai_epi32(_mm256_sub_epi32(x0r_2,x1r_2),15); + dy0i = _mm256_srai_epi32(_mm256_add_epi32(x0i_2,x1i_2),15); + // printf("y0i %d\n",((int16_t *)y0i)[0]); + dy1i = _mm256_srai_epi32(_mm256_sub_epi32(x0i_2,x1i_2),15); + + bfly2_tmp1 = _mm256_unpacklo_epi32(dy0r,dy0i); + bfly2_tmp2 = _mm256_unpackhi_epi32(dy0r,dy0i); + *y0 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2); + + bfly2_tmp1 = _mm256_unpacklo_epi32(dy1r,dy1i); + bfly2_tmp2 = _mm256_unpackhi_epi32(dy1r,dy1i); + *y1 = _mm256_packs_epi32(bfly2_tmp1,bfly2_tmp2); +} +#endif + +#elif defined(__arm__) +static inline void ibfly2(int16x8_t *x0, int16x8_t *x1,int16x8_t *y0, int16x8_t *y1,int16x8_t *tw) +{ + + int32x4_t x0r_2,x0i_2,x1r_2,x1i_2,dy0r,dy1r,dy0i,dy1i; + + cmultc(*(x0),*(W0),&x0r_2,&x0i_2); + cmultc(*(x1),*(tw),&x1r_2,&x1i_2); + + dy0r = vqaddq_s32(x0r_2,x1r_2); + dy1r = vqsubq_s32(x0r_2,x1r_2); + dy0i = vqaddq_s32(x0i_2,x1i_2); + dy1i = vqsubq_s32(x0i_2,x1i_2); + + *y0 = cpack(dy0r,dy0i); + *y1 = cpack(dy1r,dy1i); + +} + +#endif + + + + +// This is the radix-3 butterfly (fft) + +#if defined(__x86_64__) || defined(__i386__) + +static inline void bfly3(__m128i *x0,__m128i *x1,__m128i *x2, + __m128i *y0,__m128i *y1,__m128i *y2, + __m128i *tw1,__m128i *tw2) __attribute__((always_inline)); + +static inline void bfly3(__m128i *x0,__m128i *x1,__m128i *x2, + __m128i *y0,__m128i *y1,__m128i *y2, + __m128i *tw1,__m128i *tw2) +{ + + __m128i tmpre,tmpim,x1_2,x2_2; + + packed_cmult(*(x1),*(tw1),&x1_2); + packed_cmult(*(x2),*(tw2),&x2_2); + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1_2,x2_2)); + cmult(x1_2,*(W13),&tmpre,&tmpim); + cmac(x2_2,*(W23),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = _mm_adds_epi16(*(x0),*(y1)); + cmult(x1_2,*(W23),&tmpre,&tmpim); + cmac(x2_2,*(W13),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = _mm_adds_epi16(*(x0),*(y2)); +} + +#ifdef __AVX2__ + +static inline void bfly3_256(__m256i *x0,__m256i *x1,__m256i *x2, + __m256i *y0,__m256i *y1,__m256i *y2, + __m256i *tw1,__m256i *tw2) __attribute__((always_inline)); + +static inline void bfly3_256(__m256i *x0,__m256i *x1,__m256i *x2, + __m256i *y0,__m256i *y1,__m256i *y2, + __m256i *tw1,__m256i *tw2) +{ + + __m256i tmpre,tmpim,x1_2,x2_2; + + packed_cmult_256(*(x1),*(tw1),&x1_2); + packed_cmult_256(*(x2),*(tw2),&x2_2); + *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,x2_2)); + cmult_256(x1_2,*(W13_256),&tmpre,&tmpim); + cmac_256(x2_2,*(W23_256),&tmpre,&tmpim); + *(y1) = cpack_256(tmpre,tmpim); + *(y1) = _mm256_adds_epi16(*(x0),*(y1)); + cmult_256(x1_2,*(W23_256),&tmpre,&tmpim); + cmac_256(x2_2,*(W13_256),&tmpre,&tmpim); + *(y2) = cpack_256(tmpre,tmpim); + *(y2) = _mm256_adds_epi16(*(x0),*(y2)); +} +#endif + +#elif defined(__arm__) +static inline void bfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2, + int16x8_t *tw1,int16x8_t *tw2) __attribute__((always_inline)); + +static inline void bfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2, + int16x8_t *tw1,int16x8_t *tw2) +{ + + int32x4_t tmpre,tmpim; + int16x8_t x1_2,x2_2; + + packed_cmult(*(x1),*(tw1),&x1_2); + packed_cmult(*(x2),*(tw2),&x2_2); + *(y0) = vqaddq_s16(*(x0),vqaddq_s16(x1_2,x2_2)); + cmult(x1_2,*(W13),&tmpre,&tmpim); + cmac(x2_2,*(W23),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = vqaddq_s16(*(x0),*(y1)); + cmult(x1_2,*(W23),&tmpre,&tmpim); + cmac(x2_2,*(W13),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = vqaddq_s16(*(x0),*(y2)); +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void ibfly3(__m128i *x0,__m128i *x1,__m128i *x2, + __m128i *y0,__m128i *y1,__m128i *y2, + __m128i *tw1,__m128i *tw2) __attribute__((always_inline)); + +static inline void ibfly3(__m128i *x0,__m128i *x1,__m128i *x2, + __m128i *y0,__m128i *y1,__m128i *y2, + __m128i *tw1,__m128i *tw2) +{ + + __m128i tmpre,tmpim,x1_2,x2_2; + + packed_cmultc(*(x1),*(tw1),&x1_2); + packed_cmultc(*(x2),*(tw2),&x2_2); + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1_2,x2_2)); + cmultc(x1_2,*(W13),&tmpre,&tmpim); + cmacc(x2_2,*(W23),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = _mm_adds_epi16(*(x0),*(y1)); + cmultc(x1_2,*(W23),&tmpre,&tmpim); + cmacc(x2_2,*(W13),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = _mm_adds_epi16(*(x0),*(y2)); +} + +#ifdef __AVX2__ + +static inline void ibfly3_256(__m256i *x0,__m256i *x1,__m256i *x2, + __m256i *y0,__m256i *y1,__m256i *y2, + __m256i *tw1,__m256i *tw2) __attribute__((always_inline)); + +static inline void ibfly3_256(__m256i *x0,__m256i *x1,__m256i *x2, + __m256i *y0,__m256i *y1,__m256i *y2, + __m256i *tw1,__m256i *tw2) +{ + + __m256i tmpre,tmpim,x1_2,x2_2; + + packed_cmultc_256(*(x1),*(tw1),&x1_2); + packed_cmultc_256(*(x2),*(tw2),&x2_2); + *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,x2_2)); + cmultc_256(x1_2,*(W13_256),&tmpre,&tmpim); + cmacc_256(x2_2,*(W23_256),&tmpre,&tmpim); + *(y1) = cpack_256(tmpre,tmpim); + *(y1) = _mm256_adds_epi16(*(x0),*(y1)); + cmultc_256(x1_2,*(W23_256),&tmpre,&tmpim); + cmacc_256(x2_2,*(W13_256),&tmpre,&tmpim); + *(y2) = cpack_256(tmpre,tmpim); + *(y2) = _mm256_adds_epi16(*(x0),*(y2)); +} +#endif + +#elif defined(__arm__) +static inline void ibfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2, + int16x8_t *tw1,int16x8_t *tw2) __attribute__((always_inline)); + +static inline void ibfly3(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2, + int16x8_t *tw1,int16x8_t *tw2) +{ + + int32x4_t tmpre,tmpim; + int16x8_t x1_2,x2_2; + + packed_cmultc(*(x1),*(tw1),&x1_2); + packed_cmultc(*(x2),*(tw2),&x2_2); + *(y0) = vqaddq_s16(*(x0),vqaddq_s16(x1_2,x2_2)); + cmultc(x1_2,*(W13),&tmpre,&tmpim); + cmacc(x2_2,*(W23),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = vqaddq_s16(*(x0),*(y1)); + cmultc(x1_2,*(W23),&tmpre,&tmpim); + cmacc(x2_2,*(W13),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = vqaddq_s16(*(x0),*(y2)); +} +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void bfly3_tw1(__m128i *x0,__m128i *x1,__m128i *x2, + __m128i *y0,__m128i *y1,__m128i *y2) __attribute__((always_inline)); + +static inline void bfly3_tw1(__m128i *x0,__m128i *x1,__m128i *x2, + __m128i *y0,__m128i *y1,__m128i *y2) +{ + + __m128i tmpre,tmpim; + + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),*(x2))); + cmult(*(x1),*(W13),&tmpre,&tmpim); + cmac(*(x2),*(W23),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = _mm_adds_epi16(*(x0),*(y1)); + cmult(*(x1),*(W23),&tmpre,&tmpim); + cmac(*(x2),*(W13),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = _mm_adds_epi16(*(x0),*(y2)); +} + +#ifdef __AVX2__ + +static inline void bfly3_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2, + __m256i *y0,__m256i *y1,__m256i *y2) __attribute__((always_inline)); + +static inline void bfly3_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2, + __m256i *y0,__m256i *y1,__m256i *y2) +{ + + __m256i tmpre,tmpim; + + *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(*(x1),*(x2))); + cmult_256(*(x1),*(W13_256),&tmpre,&tmpim); + cmac_256(*(x2),*(W23_256),&tmpre,&tmpim); + *(y1) = cpack_256(tmpre,tmpim); + *(y1) = _mm256_adds_epi16(*(x0),*(y1)); + cmult_256(*(x1),*(W23_256),&tmpre,&tmpim); + cmac_256(*(x2),*(W13_256),&tmpre,&tmpim); + *(y2) = cpack_256(tmpre,tmpim); + *(y2) = _mm256_adds_epi16(*(x0),*(y2)); +} +#endif + +#elif defined(__arm__) +static inline void bfly3_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2) __attribute__((always_inline)); + +static inline void bfly3_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2) +{ + + int32x4_t tmpre,tmpim; + + *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),*(x2))); + cmult(*(x1),*(W13),&tmpre,&tmpim); + cmac(*(x2),*(W23),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = vqaddq_s16(*(x0),*(y1)); + cmult(*(x1),*(W23),&tmpre,&tmpim); + cmac(*(x2),*(W13),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = vqaddq_s16(*(x0),*(y2)); + +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void bfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3)__attribute__((always_inline)); + +static inline void bfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3) +{ + + __m128i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i; + + // cmult(*(x0),*(W0),&x0r_2,&x0i_2); + cmult(*(x1),*(tw1),&x1r_2,&x1i_2); + cmult(*(x2),*(tw2),&x2r_2,&x2i_2); + cmult(*(x3),*(tw3),&x3r_2,&x3i_2); + // dy0r = _mm_add_epi32(x0r_2,_mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2))); + // dy0i = _mm_add_epi32(x0i_2,_mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2))); + // *(y0) = cpack(dy0r,dy0i); + dy0r = _mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2)); + dy0i = _mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2)); + *(y0) = _mm_add_epi16(*(x0),cpack(dy0r,dy0i)); + // dy1r = _mm_add_epi32(x0r_2,_mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2))); + // dy1i = _mm_sub_epi32(x0i_2,_mm_add_epi32(x1r_2,_mm_sub_epi32(x2i_2,x3r_2))); + // *(y1) = cpack(dy1r,dy1i); + dy1r = _mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2)); + dy1i = _mm_sub_epi32(_mm_sub_epi32(x3r_2,x2i_2),x1r_2); + *(y1) = _mm_add_epi16(*(x0),cpack(dy1r,dy1i)); + // dy2r = _mm_sub_epi32(x0r_2,_mm_sub_epi32(x1r_2,_mm_sub_epi32(x2r_2,x3r_2))); + // dy2i = _mm_sub_epi32(x0i_2,_mm_sub_epi32(x1i_2,_mm_sub_epi32(x2i_2,x3i_2))); + // *(y2) = cpack(dy2r,dy2i); + dy2r = _mm_sub_epi32(_mm_sub_epi32(x2r_2,x3r_2),x1r_2); + dy2i = _mm_sub_epi32(_mm_sub_epi32(x2i_2,x3i_2),x1i_2); + *(y2) = _mm_add_epi16(*(x0),cpack(dy2r,dy2i)); + // dy3r = _mm_sub_epi32(x0r_2,_mm_add_epi32(x1i_2,_mm_sub_epi32(x2r_2,x3i_2))); + // dy3i = _mm_add_epi32(x0i_2,_mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2))); + // *(y3) = cpack(dy3r,dy3i); + dy3r = _mm_sub_epi32(_mm_sub_epi32(x3i_2,x2r_2),x1i_2); + dy3i = _mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2)); + *(y3) = _mm_add_epi16(*(x0),cpack(dy3r,dy3i)); +} + +#ifdef __AVX2__ +static inline void bfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3)__attribute__((always_inline)); + +static inline void bfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3) +{ + + __m256i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i; + + // cmult(*(x0),*(W0),&x0r_2,&x0i_2); + cmult_256(*(x1),*(tw1),&x1r_2,&x1i_2); + cmult_256(*(x2),*(tw2),&x2r_2,&x2i_2); + cmult_256(*(x3),*(tw3),&x3r_2,&x3i_2); + // dy0r = _mm_add_epi32(x0r_2,_mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2))); + // dy0i = _mm_add_epi32(x0i_2,_mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2))); + // *(y0) = cpack(dy0r,dy0i); + dy0r = _mm256_add_epi32(x1r_2,_mm256_add_epi32(x2r_2,x3r_2)); + dy0i = _mm256_add_epi32(x1i_2,_mm256_add_epi32(x2i_2,x3i_2)); + *(y0) = _mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i)); + // dy1r = _mm_add_epi32(x0r_2,_mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2))); + // dy1i = _mm_sub_epi32(x0i_2,_mm_add_epi32(x1r_2,_mm_sub_epi32(x2i_2,x3r_2))); + // *(y1) = cpack(dy1r,dy1i); + dy1r = _mm256_sub_epi32(x1i_2,_mm256_add_epi32(x2r_2,x3i_2)); + dy1i = _mm256_sub_epi32(_mm256_sub_epi32(x3r_2,x2i_2),x1r_2); + *(y1) = _mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i)); + // dy2r = _mm_sub_epi32(x0r_2,_mm_sub_epi32(x1r_2,_mm_sub_epi32(x2r_2,x3r_2))); + // dy2i = _mm_sub_epi32(x0i_2,_mm_sub_epi32(x1i_2,_mm_sub_epi32(x2i_2,x3i_2))); + // *(y2) = cpack(dy2r,dy2i); + dy2r = _mm256_sub_epi32(_mm256_sub_epi32(x2r_2,x3r_2),x1r_2); + dy2i = _mm256_sub_epi32(_mm256_sub_epi32(x2i_2,x3i_2),x1i_2); + *(y2) = _mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i)); + // dy3r = _mm_sub_epi32(x0r_2,_mm_add_epi32(x1i_2,_mm_sub_epi32(x2r_2,x3i_2))); + // dy3i = _mm_add_epi32(x0i_2,_mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2))); + // *(y3) = cpack(dy3r,dy3i); + dy3r = _mm256_sub_epi32(_mm256_sub_epi32(x3i_2,x2r_2),x1i_2); + dy3i = _mm256_sub_epi32(x1r_2,_mm256_add_epi32(x2i_2,x3r_2)); + *(y3) = _mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i)); +} +#endif +#elif defined(__arm__) +static inline void bfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3)__attribute__((always_inline)); + +static inline void bfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3) +{ + + int32x4_t x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i; + + // cmult(*(x0),*(W0),&x0r_2,&x0i_2); + cmult(*(x1),*(tw1),&x1r_2,&x1i_2); + cmult(*(x2),*(tw2),&x2r_2,&x2i_2); + cmult(*(x3),*(tw3),&x3r_2,&x3i_2); + // dy0r = _mm_add_epi32(x0r_2,_mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2))); + // dy0i = _mm_add_epi32(x0i_2,_mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2))); + // *(y0) = cpack(dy0r,dy0i); + dy0r = vqaddq_s32(x1r_2,vqaddq_s32(x2r_2,x3r_2)); + dy0i = vqaddq_s32(x1i_2,vqaddq_s32(x2i_2,x3i_2)); + *(y0) = vqaddq_s16(*(x0),cpack(dy0r,dy0i)); + // dy1r = _mm_add_epi32(x0r_2,_mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2))); + // dy1i = _mm_sub_epi32(x0i_2,_mm_add_epi32(x1r_2,_mm_sub_epi32(x2i_2,x3r_2))); + // *(y1) = cpack(dy1r,dy1i); + dy1r = vqsubq_s32(x1i_2,vqaddq_s32(x2r_2,x3i_2)); + dy1i = vqsubq_s32(vqsubq_s32(x3r_2,x2i_2),x1r_2); + *(y1) = vqaddq_s16(*(x0),cpack(dy1r,dy1i)); + // dy2r = _mm_sub_epi32(x0r_2,_mm_sub_epi32(x1r_2,_mm_sub_epi32(x2r_2,x3r_2))); + // dy2i = _mm_sub_epi32(x0i_2,_mm_sub_epi32(x1i_2,_mm_sub_epi32(x2i_2,x3i_2))); + // *(y2) = cpack(dy2r,dy2i); + dy2r = vqsubq_s32(vqsubq_s32(x2r_2,x3r_2),x1r_2); + dy2i = vqsubq_s32(vqsubq_s32(x2i_2,x3i_2),x1i_2); + *(y2) = vqaddq_s16(*(x0),cpack(dy2r,dy2i)); + // dy3r = _mm_sub_epi32(x0r_2,_mm_add_epi32(x1i_2,_mm_sub_epi32(x2r_2,x3i_2))); + // dy3i = _mm_add_epi32(x0i_2,_mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2))); + // *(y3) = cpack(dy3r,dy3i); + dy3r = vqsubq_s32(vqsubq_s32(x3i_2,x2r_2),x1i_2); + dy3i = vqsubq_s32(x1r_2,vqaddq_s32(x2i_2,x3r_2)); + *(y3) = vqaddq_s16(*(x0),cpack(dy3r,dy3i)); +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void ibfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3)__attribute__((always_inline)); + +static inline void ibfly4(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3) +{ + + __m128i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i; + + + cmultc(*(x1),*(tw1),&x1r_2,&x1i_2); + cmultc(*(x2),*(tw2),&x2r_2,&x2i_2); + cmultc(*(x3),*(tw3),&x3r_2,&x3i_2); + + dy0r = _mm_add_epi32(x1r_2,_mm_add_epi32(x2r_2,x3r_2)); + dy0i = _mm_add_epi32(x1i_2,_mm_add_epi32(x2i_2,x3i_2)); + *(y0) = _mm_add_epi16(*(x0),cpack(dy0r,dy0i)); + dy3r = _mm_sub_epi32(x1i_2,_mm_add_epi32(x2r_2,x3i_2)); + dy3i = _mm_sub_epi32(_mm_sub_epi32(x3r_2,x2i_2),x1r_2); + *(y3) = _mm_add_epi16(*(x0),cpack(dy3r,dy3i)); + dy2r = _mm_sub_epi32(_mm_sub_epi32(x2r_2,x3r_2),x1r_2); + dy2i = _mm_sub_epi32(_mm_sub_epi32(x2i_2,x3i_2),x1i_2); + *(y2) = _mm_add_epi16(*(x0),cpack(dy2r,dy2i)); + dy1r = _mm_sub_epi32(_mm_sub_epi32(x3i_2,x2r_2),x1i_2); + dy1i = _mm_sub_epi32(x1r_2,_mm_add_epi32(x2i_2,x3r_2)); + *(y1) = _mm_add_epi16(*(x0),cpack(dy1r,dy1i)); +} + +#ifdef __AVX2__ + +static inline void ibfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3)__attribute__((always_inline)); + +static inline void ibfly4_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3) +{ + + __m256i x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i; + + + cmultc_256(*(x1),*(tw1),&x1r_2,&x1i_2); + cmultc_256(*(x2),*(tw2),&x2r_2,&x2i_2); + cmultc_256(*(x3),*(tw3),&x3r_2,&x3i_2); + + dy0r = _mm256_add_epi32(x1r_2,_mm256_add_epi32(x2r_2,x3r_2)); + dy0i = _mm256_add_epi32(x1i_2,_mm256_add_epi32(x2i_2,x3i_2)); + *(y0) = _mm256_add_epi16(*(x0),cpack_256(dy0r,dy0i)); + dy3r = _mm256_sub_epi32(x1i_2,_mm256_add_epi32(x2r_2,x3i_2)); + dy3i = _mm256_sub_epi32(_mm256_sub_epi32(x3r_2,x2i_2),x1r_2); + *(y3) = _mm256_add_epi16(*(x0),cpack_256(dy3r,dy3i)); + dy2r = _mm256_sub_epi32(_mm256_sub_epi32(x2r_2,x3r_2),x1r_2); + dy2i = _mm256_sub_epi32(_mm256_sub_epi32(x2i_2,x3i_2),x1i_2); + *(y2) = _mm256_add_epi16(*(x0),cpack_256(dy2r,dy2i)); + dy1r = _mm256_sub_epi32(_mm256_sub_epi32(x3i_2,x2r_2),x1i_2); + dy1i = _mm256_sub_epi32(x1r_2,_mm256_add_epi32(x2i_2,x3r_2)); + *(y1) = _mm256_add_epi16(*(x0),cpack_256(dy1r,dy1i)); +} + +#endif +#elif defined(__arm__) + +static inline void ibfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3)__attribute__((always_inline)); + +static inline void ibfly4(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3) +{ + + int32x4_t x1r_2,x1i_2,x2r_2,x2i_2,x3r_2,x3i_2,dy0r,dy0i,dy1r,dy1i,dy2r,dy2i,dy3r,dy3i; + + + cmultc(*(x1),*(tw1),&x1r_2,&x1i_2); + cmultc(*(x2),*(tw2),&x2r_2,&x2i_2); + cmultc(*(x3),*(tw3),&x3r_2,&x3i_2); + + dy0r = vqaddq_s32(x1r_2,vqaddq_s32(x2r_2,x3r_2)); + dy0i = vqaddq_s32(x1i_2,vqaddq_s32(x2i_2,x3i_2)); + *(y0) = vqaddq_s16(*(x0),cpack(dy0r,dy0i)); + dy3r = vqsubq_s32(x1i_2,vqaddq_s32(x2r_2,x3i_2)); + dy3i = vqsubq_s32(vqsubq_s32(x3r_2,x2i_2),x1r_2); + *(y3) = vqaddq_s16(*(x0),cpack(dy3r,dy3i)); + dy2r = vqsubq_s32(vqsubq_s32(x2r_2,x3r_2),x1r_2); + dy2i = vqsubq_s32(vqsubq_s32(x2i_2,x3i_2),x1i_2); + *(y2) = vqaddq_s16(*(x0),cpack(dy2r,dy2i)); + dy1r = vqsubq_s32(vqsubq_s32(x3i_2,x2r_2),x1i_2); + dy1i = vqsubq_s32(x1r_2,vqaddq_s32(x2i_2,x3r_2)); + *(y1) = vqaddq_s16(*(x0),cpack(dy1r,dy1i)); +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) + +static inline void bfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3)__attribute__((always_inline)); + +static inline void bfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3) +{ + register __m128i x1_flip,x3_flip,x02t,x13t; + register __m128i complex_shuffle = _mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + x02t = _mm_adds_epi16(*(x0),*(x2)); + x13t = _mm_adds_epi16(*(x1),*(x3)); + *(y0) = _mm_adds_epi16(x02t,x13t); + *(y2) = _mm_subs_epi16(x02t,x13t); + x1_flip = _mm_sign_epi16(*(x1),*(__m128i*)conjugatedft); + x1_flip = _mm_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm_sign_epi16(*(x3),*(__m128i*)conjugatedft); + x3_flip = _mm_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm_subs_epi16(*(x0),*(x2)); + x13t = _mm_subs_epi16(x1_flip,x3_flip); + *(y1) = _mm_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + *(y3) = _mm_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + + /* + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),_mm_adds_epi16(*(x2),*(x3)))); + x1_flip = _mm_sign_epi16(*(x1),*(__m128i*)conjugatedft); + x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x3_flip = _mm_sign_epi16(*(x3),*(__m128i*)conjugatedft); + x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + *(y1) = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(*(x2),x3_flip))); + *(y2) = _mm_subs_epi16(*(x0),_mm_subs_epi16(*(x1),_mm_subs_epi16(*(x2),*(x3)))); + *(y3) = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(*(x2),x3_flip))); + */ +} + +#ifdef __AVX2__ + +static inline void bfly4_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3)__attribute__((always_inline)); + +static inline void bfly4_tw1_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3) +{ + register __m256i x1_flip,x3_flip,x02t,x13t; + register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + x02t = _mm256_adds_epi16(*(x0),*(x2)); + x13t = _mm256_adds_epi16(*(x1),*(x3)); + *(y0) = _mm256_adds_epi16(x02t,x13t); + *(y2) = _mm256_subs_epi16(x02t,x13t); + x1_flip = _mm256_sign_epi16(*(x1),*(__m256i*)conjugatedft); + x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm256_sign_epi16(*(x3),*(__m256i*)conjugatedft); + x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm256_subs_epi16(*(x0),*(x2)); + x13t = _mm256_subs_epi16(x1_flip,x3_flip); + *(y1) = _mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + *(y3) = _mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f +} +#endif + +#elif defined(__arm__) + +static inline void bfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)__attribute__((always_inline)); + +static inline void bfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3) +{ + + register int16x8_t x1_flip,x3_flip; + + *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),vqaddq_s16(*(x2),*(x3)))); + x1_flip = vrev32q_s16(vmulq_s16(*(x1),*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(*(x3),*(int16x8_t*)conjugatedft)); + *(y1) = vqaddq_s16(*(x0),vqsubq_s16(x1_flip,vqaddq_s16(*(x2),x3_flip))); + *(y2) = vqsubq_s16(*(x0),vqsubq_s16(*(x1),vqsubq_s16(*(x2),*(x3)))); + *(y3) = vqsubq_s16(*(x0),vqaddq_s16(x1_flip,vqsubq_s16(*(x2),x3_flip))); +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) + +static inline void ibfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3)__attribute__((always_inline)); + +static inline void ibfly4_tw1(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3) +{ + + register __m128i x1_flip,x3_flip; + + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),_mm_adds_epi16(*(x2),*(x3)))); + + x1_flip = _mm_sign_epi16(*(x1),*(__m128i*)conjugatedft); + // x1_flip = _mm_shufflelo_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1)); + // x1_flip = _mm_shufflehi_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1)); + x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x3_flip = _mm_sign_epi16(*(x3),*(__m128i*)conjugatedft); + // x3_flip = _mm_shufflelo_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1)); + // x3_flip = _mm_shufflehi_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1)); + x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + *(y1) = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(*(x2),x3_flip))); + *(y2) = _mm_subs_epi16(*(x0),_mm_subs_epi16(*(x1),_mm_subs_epi16(*(x2),*(x3)))); + *(y3) = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(*(x2),x3_flip))); +} + + +#elif defined(__arm__) +static inline void ibfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3)__attribute__((always_inline)); + +static inline void ibfly4_tw1(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3) +{ + + register int16x8_t x1_flip,x3_flip; + + *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),vqaddq_s16(*(x2),*(x3)))); + x1_flip = vrev32q_s16(vmulq_s16(*(x1),*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(*(x3),*(int16x8_t*)conjugatedft)); + *(y1) = vqsubq_s16(*(x0),vqaddq_s16(x1_flip,vqsubq_s16(*(x2),x3_flip))); + *(y2) = vqsubq_s16(*(x0),vqsubq_s16(*(x1),vqsubq_s16(*(x2),*(x3)))); + *(y3) = vqaddq_s16(*(x0),vqsubq_s16(x1_flip,vqaddq_s16(*(x2),x3_flip))); +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void bfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3, + __m128i *tw1b,__m128i *tw2b,__m128i *tw3b)__attribute__((always_inline)); + +static inline void bfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3, + __m128i *tw1b,__m128i *tw2b,__m128i *tw3b) +{ + + register __m128i x1t,x2t,x3t,x02t,x13t; + register __m128i x1_flip,x3_flip; + + x1t = packed_cmult2(*(x1),*(tw1),*(tw1b)); + x2t = packed_cmult2(*(x2),*(tw2),*(tw2b)); + x3t = packed_cmult2(*(x3),*(tw3),*(tw3b)); + + + // bfly4_tw1(x0,&x1t,&x2t,&x3t,y0,y1,y2,y3); + x02t = _mm_adds_epi16(*(x0),x2t); + x13t = _mm_adds_epi16(x1t,x3t); + /* + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1t,_mm_adds_epi16(x2t,x3t))); + *(y2) = _mm_subs_epi16(*(x0),_mm_subs_epi16(x1t,_mm_subs_epi16(x2t,x3t))); + */ + *(y0) = _mm_adds_epi16(x02t,x13t); + *(y2) = _mm_subs_epi16(x02t,x13t); + + x1_flip = _mm_sign_epi16(x1t,*(__m128i*)conjugatedft); + // x1_flip = _mm_shufflelo_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1)); + // x1_flip = _mm_shufflehi_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1)); + x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x3_flip = _mm_sign_epi16(x3t,*(__m128i*)conjugatedft); + // x3_flip = _mm_shufflelo_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1)); + // x3_flip = _mm_shufflehi_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1)); + x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x02t = _mm_subs_epi16(*(x0),x2t); + x13t = _mm_subs_epi16(x1_flip,x3_flip); + /* + *(y1) = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(x2t,x3_flip))); // x0 + x1f - x2 - x3f + *(y3) = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(x2t,x3_flip))); // x0 - x1f - x2 + x3f + */ + *(y1) = _mm_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + *(y3) = _mm_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + +} + +#ifdef __AVX2__ +static inline void bfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3, + __m256i *tw1b,__m256i *tw2b,__m256i *tw3b)__attribute__((always_inline)); + +static inline void bfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3, + __m256i *tw1b,__m256i *tw2b,__m256i *tw3b) +{ + + register __m256i x1t,x2t,x3t,x02t,x13t; + register __m256i x1_flip,x3_flip; + register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + // each input xi is assumed to be to consecutive vectors xi0 xi1 on which to perform the 8 butterflies + // [xi00 xi01 xi02 xi03 xi10 xi20 xi30 xi40] + // each output yi is the same + + x1t = packed_cmult2_256(*(x1),*(tw1),*(tw1b)); + x2t = packed_cmult2_256(*(x2),*(tw2),*(tw2b)); + x3t = packed_cmult2_256(*(x3),*(tw3),*(tw3b)); + + x02t = _mm256_adds_epi16(*(x0),x2t); + x13t = _mm256_adds_epi16(x1t,x3t); + *(y0) = _mm256_adds_epi16(x02t,x13t); + *(y2) = _mm256_subs_epi16(x02t,x13t); + + x1_flip = _mm256_sign_epi16(x1t,*(__m256i*)conjugatedft); + x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm256_sign_epi16(x3t,*(__m256i*)conjugatedft); + x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm256_subs_epi16(*(x0),x2t); + x13t = _mm256_subs_epi16(x1_flip,x3_flip); + *(y1) = _mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + *(y3) = _mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + +} + +#endif + +#elif defined(__arm__) + +static inline void bfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3, + int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b)__attribute__((always_inline)); + +static inline void bfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3, + int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b) +{ + + register int16x8_t x1t,x2t,x3t,x02t,x13t; + register int16x8_t x1_flip,x3_flip; + + x1t = packed_cmult2(*(x1),*(tw1),*(tw1b)); + x2t = packed_cmult2(*(x2),*(tw2),*(tw2b)); + x3t = packed_cmult2(*(x3),*(tw3),*(tw3b)); + + + + x02t = vqaddq_s16(*(x0),x2t); + x13t = vqaddq_s16(x1t,x3t); + *(y0) = vqaddq_s16(x02t,x13t); + *(y2) = vqsubq_s16(x02t,x13t); + x1_flip = vrev32q_s16(vmulq_s16(x1t,*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(x3t,*(int16x8_t*)conjugatedft)); + x02t = vqsubq_s16(*(x0),x2t); + x13t = vqsubq_s16(x1_flip,x3_flip); + *(y1) = vqaddq_s16(x02t,x13t); // x0 + x1f - x2 - x3f + *(y3) = vqsubq_s16(x02t,x13t); // x0 - x1f - x2 + x3f +} +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void ibfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3, + __m128i *tw1b,__m128i *tw2b,__m128i *tw3b)__attribute__((always_inline)); + +static inline void ibfly4_16(__m128i *x0,__m128i *x1,__m128i *x2,__m128i *x3, + __m128i *y0,__m128i *y1,__m128i *y2,__m128i *y3, + __m128i *tw1,__m128i *tw2,__m128i *tw3, + __m128i *tw1b,__m128i *tw2b,__m128i *tw3b) +{ + + register __m128i x1t,x2t,x3t,x02t,x13t; + register __m128i x1_flip,x3_flip; + + x1t = packed_cmult2(*(x1),*(tw1),*(tw1b)); + x2t = packed_cmult2(*(x2),*(tw2),*(tw2b)); + x3t = packed_cmult2(*(x3),*(tw3),*(tw3b)); + + + // bfly4_tw1(x0,&x1t,&x2t,&x3t,y0,y1,y2,y3); + x02t = _mm_adds_epi16(*(x0),x2t); + x13t = _mm_adds_epi16(x1t,x3t); + /* + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1t,_mm_adds_epi16(x2t,x3t))); + *(y2) = _mm_subs_epi16(*(x0),_mm_subs_epi16(x1t,_mm_subs_epi16(x2t,x3t))); + */ + *(y0) = _mm_adds_epi16(x02t,x13t); + *(y2) = _mm_subs_epi16(x02t,x13t); + + x1_flip = _mm_sign_epi16(x1t,*(__m128i*)conjugatedft); + // x1_flip = _mm_shufflelo_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1)); + // x1_flip = _mm_shufflehi_epi16(x1_flip,_MM_SHUFFLE(2,3,0,1)); + x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x3_flip = _mm_sign_epi16(x3t,*(__m128i*)conjugatedft); + // x3_flip = _mm_shufflelo_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1)); + // x3_flip = _mm_shufflehi_epi16(x3_flip,_MM_SHUFFLE(2,3,0,1)); + x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x02t = _mm_subs_epi16(*(x0),x2t); + x13t = _mm_subs_epi16(x1_flip,x3_flip); + /* + *(y1) = _mm_adds_epi16(*(x0),_mm_subs_epi16(x1_flip,_mm_adds_epi16(x2t,x3_flip))); // x0 + x1f - x2 - x3f + *(y3) = _mm_subs_epi16(*(x0),_mm_adds_epi16(x1_flip,_mm_subs_epi16(x2t,x3_flip))); // x0 - x1f - x2 + x3f + */ + *(y3) = _mm_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + *(y1) = _mm_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + +} + +#ifdef __AVX2__ +static inline void ibfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3, + __m256i *tw1b,__m256i *tw2b,__m256i *tw3b)__attribute__((always_inline)); + +static inline void ibfly4_16_256(__m256i *x0,__m256i *x1,__m256i *x2,__m256i *x3, + __m256i *y0,__m256i *y1,__m256i *y2,__m256i *y3, + __m256i *tw1,__m256i *tw2,__m256i *tw3, + __m256i *tw1b,__m256i *tw2b,__m256i *tw3b) +{ + + register __m256i x1t,x2t,x3t,x02t,x13t; + register __m256i x1_flip,x3_flip; + register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + // each input xi is assumed to be to consecutive vectors xi0 xi1 on which to perform the 8 butterflies + // [xi00 xi01 xi02 xi03 xi10 xi20 xi30 xi40] + // each output yi is the same + + x1t = packed_cmult2_256(*(x1),*(tw1),*(tw1b)); + x2t = packed_cmult2_256(*(x2),*(tw2),*(tw2b)); + x3t = packed_cmult2_256(*(x3),*(tw3),*(tw3b)); + + x02t = _mm256_adds_epi16(*(x0),x2t); + x13t = _mm256_adds_epi16(x1t,x3t); + *(y0) = _mm256_adds_epi16(x02t,x13t); + *(y2) = _mm256_subs_epi16(x02t,x13t); + + x1_flip = _mm256_sign_epi16(x1t,*(__m256i*)conjugatedft); + x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm256_sign_epi16(x3t,*(__m256i*)conjugatedft); + x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm256_subs_epi16(*(x0),x2t); + x13t = _mm256_subs_epi16(x1_flip,x3_flip); + *(y3) = _mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + *(y1) = _mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + +} +#endif + +#elif defined(__arm__) +static inline void ibfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3, + int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b)__attribute__((always_inline)); + +static inline void ibfly4_16(int16x8_t *x0,int16x8_t *x1,int16x8_t *x2,int16x8_t *x3, + int16x8_t *y0,int16x8_t *y1,int16x8_t *y2,int16x8_t *y3, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3, + int16x8_t *tw1b,int16x8_t *tw2b,int16x8_t *tw3b) +{ + + register int16x8_t x1t,x2t,x3t,x02t,x13t; + register int16x8_t x1_flip,x3_flip; + + x1t = packed_cmult2(*(x1),*(tw1),*(tw1b)); + x2t = packed_cmult2(*(x2),*(tw2),*(tw2b)); + x3t = packed_cmult2(*(x3),*(tw3),*(tw3b)); + + x02t = vqaddq_s16(*(x0),x2t); + x13t = vqaddq_s16(x1t,x3t); + *(y0) = vqaddq_s16(x02t,x13t); + *(y2) = vqsubq_s16(x02t,x13t); + x1_flip = vrev32q_s16(vmulq_s16(x1t,*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(x3t,*(int16x8_t*)conjugatedft)); + x02t = vqsubq_s16(*(x0),x2t); + x13t = vqsubq_s16(x1_flip,x3_flip); + *(y3) = vqaddq_s16(x02t,x13t); // x0 - x1f - x2 + x3f + *(y1) = vqsubq_s16(x02t,x13t); // x0 + x1f - x2 - x3f +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void bfly5(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4, + __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4, + __m128i *tw1,__m128i *tw2,__m128i *tw3,__m128i *tw4)__attribute__((always_inline)); + +static inline void bfly5(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4, + __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4, + __m128i *tw1,__m128i *tw2,__m128i *tw3,__m128i *tw4) +{ + + + + __m128i x1_2,x2_2,x3_2,x4_2,tmpre,tmpim; + + packed_cmult(*(x1),*(tw1),&x1_2); + packed_cmult(*(x2),*(tw2),&x2_2); + packed_cmult(*(x3),*(tw3),&x3_2); + packed_cmult(*(x4),*(tw4),&x4_2); + + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(x1_2,_mm_adds_epi16(x2_2,_mm_adds_epi16(x3_2,x4_2)))); + cmult(x1_2,*(W15),&tmpre,&tmpim); + cmac(x2_2,*(W25),&tmpre,&tmpim); + cmac(x3_2,*(W35),&tmpre,&tmpim); + cmac(x4_2,*(W45),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = _mm_adds_epi16(*(x0),*(y1)); + + cmult(x1_2,*(W25),&tmpre,&tmpim); + cmac(x2_2,*(W45),&tmpre,&tmpim); + cmac(x3_2,*(W15),&tmpre,&tmpim); + cmac(x4_2,*(W35),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = _mm_adds_epi16(*(x0),*(y2)); + + cmult(x1_2,*(W35),&tmpre,&tmpim); + cmac(x2_2,*(W15),&tmpre,&tmpim); + cmac(x3_2,*(W45),&tmpre,&tmpim); + cmac(x4_2,*(W25),&tmpre,&tmpim); + *(y3) = cpack(tmpre,tmpim); + *(y3) = _mm_adds_epi16(*(x0),*(y3)); + + cmult(x1_2,*(W45),&tmpre,&tmpim); + cmac(x2_2,*(W35),&tmpre,&tmpim); + cmac(x3_2,*(W25),&tmpre,&tmpim); + cmac(x4_2,*(W15),&tmpre,&tmpim); + *(y4) = cpack(tmpre,tmpim); + *(y4) = _mm_adds_epi16(*(x0),*(y4)); + + +} + +#ifdef __AVX2__ + +static inline void bfly5_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4, + __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4, + __m256i *tw1,__m256i *tw2,__m256i *tw3,__m256i *tw4)__attribute__((always_inline)); + +static inline void bfly5_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4, + __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4, + __m256i *tw1,__m256i *tw2,__m256i *tw3,__m256i *tw4) +{ + + + + __m256i x1_2,x2_2,x3_2,x4_2,tmpre,tmpim; + + packed_cmult_256(*(x1),*(tw1),&x1_2); + packed_cmult_256(*(x2),*(tw2),&x2_2); + packed_cmult_256(*(x3),*(tw3),&x3_2); + packed_cmult_256(*(x4),*(tw4),&x4_2); + + *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(x1_2,_mm256_adds_epi16(x2_2,_mm256_adds_epi16(x3_2,x4_2)))); + cmult_256(x1_2,*(W15_256),&tmpre,&tmpim); + cmac_256(x2_2,*(W25_256),&tmpre,&tmpim); + cmac_256(x3_2,*(W35_256),&tmpre,&tmpim); + cmac_256(x4_2,*(W45_256),&tmpre,&tmpim); + *(y1) = cpack_256(tmpre,tmpim); + *(y1) = _mm256_adds_epi16(*(x0),*(y1)); + + cmult_256(x1_2,*(W25_256),&tmpre,&tmpim); + cmac_256(x2_2,*(W45_256),&tmpre,&tmpim); + cmac_256(x3_2,*(W15_256),&tmpre,&tmpim); + cmac_256(x4_2,*(W35_256),&tmpre,&tmpim); + *(y2) = cpack_256(tmpre,tmpim); + *(y2) = _mm256_adds_epi16(*(x0),*(y2)); + + cmult_256(x1_2,*(W35_256),&tmpre,&tmpim); + cmac_256(x2_2,*(W15_256),&tmpre,&tmpim); + cmac_256(x3_2,*(W45_256),&tmpre,&tmpim); + cmac_256(x4_2,*(W25_256),&tmpre,&tmpim); + *(y3) = cpack_256(tmpre,tmpim); + *(y3) = _mm256_adds_epi16(*(x0),*(y3)); + + cmult_256(x1_2,*(W45_256),&tmpre,&tmpim); + cmac_256(x2_2,*(W35_256),&tmpre,&tmpim); + cmac_256(x3_2,*(W25_256),&tmpre,&tmpim); + cmac_256(x4_2,*(W15_256),&tmpre,&tmpim); + *(y4) = cpack_256(tmpre,tmpim); + *(y4) = _mm256_adds_epi16(*(x0),*(y4)); + + +} +#endif + +#elif defined(__arm__) +static inline void bfly5(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4, + int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,int16x8_t *tw4)__attribute__((always_inline)); + +static inline void bfly5(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4, + int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4, + int16x8_t *tw1,int16x8_t *tw2,int16x8_t *tw3,int16x8_t *tw4) +{ + + + + int16x8_t x1_2,x2_2,x3_2,x4_2; + int32x4_t tmpre,tmpim; + + packed_cmult(*(x1),*(tw1),&x1_2); + packed_cmult(*(x2),*(tw2),&x2_2); + packed_cmult(*(x3),*(tw3),&x3_2); + packed_cmult(*(x4),*(tw4),&x4_2); + + *(y0) = vqaddq_s16(*(x0),vqaddq_s16(x1_2,vqaddq_s16(x2_2,vqaddq_s16(x3_2,x4_2)))); + cmult(x1_2,*(W15),&tmpre,&tmpim); + cmac(x2_2,*(W25),&tmpre,&tmpim); + cmac(x3_2,*(W35),&tmpre,&tmpim); + cmac(x4_2,*(W45),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = vqaddq_s16(*(x0),*(y1)); + + cmult(x1_2,*(W25),&tmpre,&tmpim); + cmac(x2_2,*(W45),&tmpre,&tmpim); + cmac(x3_2,*(W15),&tmpre,&tmpim); + cmac(x4_2,*(W35),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = vqaddq_s16(*(x0),*(y2)); + + cmult(x1_2,*(W35),&tmpre,&tmpim); + cmac(x2_2,*(W15),&tmpre,&tmpim); + cmac(x3_2,*(W45),&tmpre,&tmpim); + cmac(x4_2,*(W25),&tmpre,&tmpim); + *(y3) = cpack(tmpre,tmpim); + *(y3) = vqaddq_s16(*(x0),*(y3)); + + cmult(x1_2,*(W45),&tmpre,&tmpim); + cmac(x2_2,*(W35),&tmpre,&tmpim); + cmac(x3_2,*(W25),&tmpre,&tmpim); + cmac(x4_2,*(W15),&tmpre,&tmpim); + *(y4) = cpack(tmpre,tmpim); + *(y4) = vqaddq_s16(*(x0),*(y4)); + + +} + + +#endif + +#if defined(__x86_64__) || defined(__i386__) +static inline void bfly5_tw1(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4, + __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4) __attribute__((always_inline)); + +static inline void bfly5_tw1(__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3,__m128i *x4, + __m128i *y0, __m128i *y1, __m128i *y2, __m128i *y3,__m128i *y4) +{ + + __m128i tmpre,tmpim; + + *(y0) = _mm_adds_epi16(*(x0),_mm_adds_epi16(*(x1),_mm_adds_epi16(*(x2),_mm_adds_epi16(*(x3),*(x4))))); + cmult(*(x1),*(W15),&tmpre,&tmpim); + cmac(*(x2),*(W25),&tmpre,&tmpim); + cmac(*(x3),*(W35),&tmpre,&tmpim); + cmac(*(x4),*(W45),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = _mm_adds_epi16(*(x0),*(y1)); + cmult(*(x1),*(W25),&tmpre,&tmpim); + cmac(*(x2),*(W45),&tmpre,&tmpim); + cmac(*(x3),*(W15),&tmpre,&tmpim); + cmac(*(x4),*(W35),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = _mm_adds_epi16(*(x0),*(y2)); + cmult(*(x1),*(W35),&tmpre,&tmpim); + cmac(*(x2),*(W15),&tmpre,&tmpim); + cmac(*(x3),*(W45),&tmpre,&tmpim); + cmac(*(x4),*(W25),&tmpre,&tmpim); + *(y3) = cpack(tmpre,tmpim); + *(y3) = _mm_adds_epi16(*(x0),*(y3)); + cmult(*(x1),*(W45),&tmpre,&tmpim); + cmac(*(x2),*(W35),&tmpre,&tmpim); + cmac(*(x3),*(W25),&tmpre,&tmpim); + cmac(*(x4),*(W15),&tmpre,&tmpim); + *(y4) = cpack(tmpre,tmpim); + *(y4) = _mm_adds_epi16(*(x0),*(y4)); +} + +#ifdef __AVX2__ +static inline void bfly5_tw1_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4, + __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4) __attribute__((always_inline)); + +static inline void bfly5_tw1_256(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3,__m256i *x4, + __m256i *y0, __m256i *y1, __m256i *y2, __m256i *y3,__m256i *y4) +{ + + __m256i tmpre,tmpim; + + *(y0) = _mm256_adds_epi16(*(x0),_mm256_adds_epi16(*(x1),_mm256_adds_epi16(*(x2),_mm256_adds_epi16(*(x3),*(x4))))); + cmult_256(*(x1),*(W15_256),&tmpre,&tmpim); + cmac_256(*(x2),*(W25_256),&tmpre,&tmpim); + cmac_256(*(x3),*(W35_256),&tmpre,&tmpim); + cmac_256(*(x4),*(W45_256),&tmpre,&tmpim); + *(y1) = cpack_256(tmpre,tmpim); + *(y1) = _mm256_adds_epi16(*(x0),*(y1)); + cmult_256(*(x1),*(W25_256),&tmpre,&tmpim); + cmac_256(*(x2),*(W45_256),&tmpre,&tmpim); + cmac_256(*(x3),*(W15_256),&tmpre,&tmpim); + cmac_256(*(x4),*(W35_256),&tmpre,&tmpim); + *(y2) = cpack_256(tmpre,tmpim); + *(y2) = _mm256_adds_epi16(*(x0),*(y2)); + cmult_256(*(x1),*(W35_256),&tmpre,&tmpim); + cmac_256(*(x2),*(W15_256),&tmpre,&tmpim); + cmac_256(*(x3),*(W45_256),&tmpre,&tmpim); + cmac_256(*(x4),*(W25_256),&tmpre,&tmpim); + *(y3) = cpack_256(tmpre,tmpim); + *(y3) = _mm256_adds_epi16(*(x0),*(y3)); + cmult_256(*(x1),*(W45_256),&tmpre,&tmpim); + cmac_256(*(x2),*(W35_256),&tmpre,&tmpim); + cmac_256(*(x3),*(W25_256),&tmpre,&tmpim); + cmac_256(*(x4),*(W15_256),&tmpre,&tmpim); + *(y4) = cpack_256(tmpre,tmpim); + *(y4) = _mm256_adds_epi16(*(x0),*(y4)); +} +#endif +#elif defined(__arm__) +static inline void bfly5_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4, + int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4) __attribute__((always_inline)); + +static inline void bfly5_tw1(int16x8_t *x0, int16x8_t *x1, int16x8_t *x2, int16x8_t *x3,int16x8_t *x4, + int16x8_t *y0, int16x8_t *y1, int16x8_t *y2, int16x8_t *y3,int16x8_t *y4) +{ + + int32x4_t tmpre,tmpim; + + *(y0) = vqaddq_s16(*(x0),vqaddq_s16(*(x1),vqaddq_s16(*(x2),vqaddq_s16(*(x3),*(x4))))); + cmult(*(x1),*(W15),&tmpre,&tmpim); + cmac(*(x2),*(W25),&tmpre,&tmpim); + cmac(*(x3),*(W35),&tmpre,&tmpim); + cmac(*(x4),*(W45),&tmpre,&tmpim); + *(y1) = cpack(tmpre,tmpim); + *(y1) = vqaddq_s16(*(x0),*(y1)); + cmult(*(x1),*(W25),&tmpre,&tmpim); + cmac(*(x2),*(W45),&tmpre,&tmpim); + cmac(*(x3),*(W15),&tmpre,&tmpim); + cmac(*(x4),*(W35),&tmpre,&tmpim); + *(y2) = cpack(tmpre,tmpim); + *(y2) = vqaddq_s16(*(x0),*(y2)); + cmult(*(x1),*(W35),&tmpre,&tmpim); + cmac(*(x2),*(W15),&tmpre,&tmpim); + cmac(*(x3),*(W45),&tmpre,&tmpim); + cmac(*(x4),*(W25),&tmpre,&tmpim); + *(y3) = cpack(tmpre,tmpim); + *(y3) = vqaddq_s16(*(x0),*(y3)); + cmult(*(x1),*(W45),&tmpre,&tmpim); + cmac(*(x2),*(W35),&tmpre,&tmpim); + cmac(*(x3),*(W25),&tmpre,&tmpim); + cmac(*(x4),*(W15),&tmpre,&tmpim); + *(y4) = cpack(tmpre,tmpim); + *(y4) = vqaddq_s16(*(x0),*(y4)); +} + +#endif +// performs 4x4 transpose of input x (complex interleaved) using 128bit SIMD intrinsics +// i.e. x = [x0r x0i x1r x1i ... x15r x15i], y = [x0r x0i x4r x4i x8r x8i x12r x12i x1r x1i x5r x5i x9r x9i x13r x13i x2r x2i ... x15r x15i] + +#if defined(__x86_64__) || defined(__i386__) +static inline void transpose16(__m128i *x,__m128i *y) __attribute__((always_inline)); +static inline void transpose16(__m128i *x,__m128i *y) +{ + register __m128i ytmp0,ytmp1,ytmp2,ytmp3; + + ytmp0 = _mm_unpacklo_epi32(x[0],x[1]); + ytmp1 = _mm_unpackhi_epi32(x[0],x[1]); + ytmp2 = _mm_unpacklo_epi32(x[2],x[3]); + ytmp3 = _mm_unpackhi_epi32(x[2],x[3]); + y[0] = _mm_unpacklo_epi64(ytmp0,ytmp2); + y[1] = _mm_unpackhi_epi64(ytmp0,ytmp2); + y[2] = _mm_unpacklo_epi64(ytmp1,ytmp3); + y[3] = _mm_unpackhi_epi64(ytmp1,ytmp3); +} + +#elif defined(__arm__) +static inline void transpose16(int16x8_t *x,int16x8_t *y) __attribute__((always_inline)); +static inline void transpose16(int16x8_t *x,int16x8_t *y) +{ + register uint32x4x2_t ytmp0,ytmp1; + + ytmp0 = vtrnq_u32((uint32x4_t)(x[0]),(uint32x4_t)(x[1])); + ytmp1 = vtrnq_u32((uint32x4_t)(x[2]),(uint32x4_t)(x[3])); + + y[0] = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0])); + y[1] = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0])); + y[2] = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1])); + y[3] = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1])); +} + +# endif +// same as above but output is offset by off +#if defined(__x86_64__) || defined(__i386__) +static inline void transpose16_ooff(__m128i *x,__m128i *y,int off) __attribute__((always_inline)); + +static inline void transpose16_ooff(__m128i *x,__m128i *y,int off) +{ + register __m128i ytmp0,ytmp1,ytmp2,ytmp3; + __m128i *y2=y; + + ytmp0 = _mm_unpacklo_epi32(x[0],x[1]); // x00 x10 x01 x11 + ytmp1 = _mm_unpackhi_epi32(x[0],x[1]); // x02 x12 x03 x13 + ytmp2 = _mm_unpacklo_epi32(x[2],x[3]); // x20 x30 x21 x31 + ytmp3 = _mm_unpackhi_epi32(x[2],x[3]); // x22 x32 x23 x33 + *y2 = _mm_unpacklo_epi64(ytmp0,ytmp2); // x00 x10 x20 x30 + y2+=off; + *y2 = _mm_unpackhi_epi64(ytmp0,ytmp2); // x01 x11 x21 x31 + y2+=off; + *y2 = _mm_unpacklo_epi64(ytmp1,ytmp3); // x02 x12 x22 x32 + y2+=off; + *y2 = _mm_unpackhi_epi64(ytmp1,ytmp3); // x03 x13 x23 x33 +} + +#ifdef __AVX2__ + +static inline void transpose16_ooff_simd256(__m256i *x,__m256i *y,int off) __attribute__((always_inline)); +static inline void transpose16_ooff_simd256(__m256i *x,__m256i *y,int off) +{ + register __m256i ytmp0,ytmp1,ytmp2,ytmp3,ytmp4,ytmp5,ytmp6,ytmp7; + __m256i *y2=y; + __m256i const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0); + + ytmp0 = _mm256_permutevar8x32_epi32(x[0],perm_mask); // x00 x10 x01 x11 x02 x12 x03 x13 + ytmp1 = _mm256_permutevar8x32_epi32(x[1],perm_mask); // x20 x30 x21 x31 x22 x32 x23 x33 + ytmp2 = _mm256_permutevar8x32_epi32(x[2],perm_mask); // x40 x50 x41 x51 x42 x52 x43 x53 + ytmp3 = _mm256_permutevar8x32_epi32(x[3],perm_mask); // x60 x70 x61 x71 x62 x72 x63 x73 + ytmp4 = _mm256_unpacklo_epi64(ytmp0,ytmp1); // x00 x10 x20 x30 x01 x11 x21 x31 + ytmp5 = _mm256_unpackhi_epi64(ytmp0,ytmp1); // x02 x12 x22 x32 x03 x13 x23 x33 + ytmp6 = _mm256_unpacklo_epi64(ytmp2,ytmp3); // x40 x50 x60 x70 x41 x51 x61 x71 + ytmp7 = _mm256_unpackhi_epi64(ytmp2,ytmp3); // x42 x52 x62 x72 x43 x53 x63 x73 + + *y2 = _mm256_insertf128_si256(ytmp4,_mm256_extracti128_si256(ytmp6,0),1); //x00 x10 x20 x30 x40 x50 x60 x70 + y2+=off; + *y2 = _mm256_insertf128_si256(ytmp6,_mm256_extracti128_si256(ytmp4,1),0); //x01 x11 x21 x31 x41 x51 x61 x71 + y2+=off; + *y2 = _mm256_insertf128_si256(ytmp5,_mm256_extracti128_si256(ytmp7,0),1); //x00 x10 x20 x30 x40 x50 x60 x70 + y2+=off; + *y2 = _mm256_insertf128_si256(ytmp7,_mm256_extracti128_si256(ytmp5,1),0); //x01 x11 x21 x31 x41 x51 x61 x71 +} +#endif + +#elif defined(__arm__) +static inline void transpose16_ooff(int16x8_t *x,int16x8_t *y,int off) __attribute__((always_inline)); + +static inline void transpose16_ooff(int16x8_t *x,int16x8_t *y,int off) +{ + int16x8_t *y2=y; + register uint32x4x2_t ytmp0,ytmp1; + + ytmp0 = vtrnq_u32((uint32x4_t)(x[0]),(uint32x4_t)(x[1])); + ytmp1 = vtrnq_u32((uint32x4_t)(x[2]),(uint32x4_t)(x[3])); + + *y2 = (int16x8_t)vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0])); y2+=off; + *y2 = (int16x8_t)vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1])); y2+=off; + *y2 = (int16x8_t)vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0])); y2+=off; + *y2 = (int16x8_t)vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1])); + + +} + +#endif + +#if defined(__x86_64__) || defined(__i386__) + +static inline void transpose4_ooff(__m64 *x,__m64 *y,int off)__attribute__((always_inline)); +static inline void transpose4_ooff(__m64 *x,__m64 *y,int off) +{ + y[0] = _mm_unpacklo_pi32(x[0],x[1]); + y[off] = _mm_unpackhi_pi32(x[0],x[1]); + + // x[0] = [x0 x1] + // x[1] = [x2 x3] + // y[0] = [x0 x2] + // y[off] = [x1 x3] +} +#ifdef __AVX2__ +static inline void transpose4_ooff_simd256(__m256i *x,__m256i *y,int off)__attribute__((always_inline)); +static inline void transpose4_ooff_simd256(__m256i *x,__m256i *y,int off) +{ + __m256i const perm_mask = _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0); + __m256i perm_tmp0,perm_tmp1; + + // x[0] = [x0 x1 x2 x3 x4 x5 x6 x7] + // x[1] = [x8 x9 x10 x11 x12 x13 x14] + // y[0] = [x0 x2 x4 x6 x8 x10 x12 x14] + // y[off] = [x1 x3 x5 x7 x9 x11 x13 x15] + perm_tmp0 = _mm256_permutevar8x32_epi32(x[0],perm_mask); + perm_tmp1 = _mm256_permutevar8x32_epi32(x[1],perm_mask); + y[0] = _mm256_insertf128_si256(perm_tmp0,_mm256_extracti128_si256(perm_tmp1,0),1); + y[off] = _mm256_insertf128_si256(perm_tmp1,_mm256_extracti128_si256(perm_tmp0,1),0); +} +#endif +#elif (__arm__) + +static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off)__attribute__((always_inline)); +static inline void transpose4_ooff(int16x4_t *x,int16x4_t *y,int off) +{ + uint32x2x2_t ytmp = vtrn_u32((uint32x2_t)x[0],(uint32x2_t)x[1]); + + y[0] = (int16x4_t)ytmp.val[0]; + y[off] = (int16x4_t)ytmp.val[1]; +} + +#endif + +// 16-point optimized DFT kernel + +const static int16_t tw16[24] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273, + 32767,0,23169,-23170,0 ,-32767,-23170,-23170, + 32767,0,12539,-30273,-23170,-23170,-30273,12539 + }; + +const static int16_t tw16a[24] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273, + 32767,0,23169,23170,0 ,32767,-23170,23170, + 32767,0,12539,30273,-23170,23170,-30273,-12539 + }; + +const static int16_t tw16b[24] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539, + 0,32767,-23170,23169,-32767,0 ,-23170,-23170, + 0,32767,-30273,12539,-23170,-23170,12539 ,-30273 + }; + +const static int16_t tw16c[24] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539, + 0,32767,23170,23169,32767,0 ,23170 ,-23170, + 0,32767,30273,12539,23170,-23170,-12539,-30273 + }; + +#ifdef __AVX2__ + +const static int16_t tw16rep[48] __attribute__((aligned(32))) = { 32767,0,30272,-12540,23169 ,-23170,12539 ,-30273,32767,0,30272,-12540,23169 ,-23170,12539 ,-30273, + 32767,0,23169,-23170,0 ,-32767,-23170,-23170,32767,0,23169,-23170,0 ,-32767,-23170,-23170, + 32767,0,12539,-30273,-23170,-23170,-30273,12539,32767,0,12539,-30273,-23170,-23170,-30273,12539 + }; + +const static int16_t tw16arep[48] __attribute__((aligned(32))) = {32767,0,30272,12540,23169 ,23170,12539 ,30273,32767,0,30272,12540,23169 ,23170,12539 ,30273, + 32767,0,23169,23170,0 ,32767,-23170,23170,32767,0,23169,23170,0 ,32767,-23170,23170, + 32767,0,12539,30273,-23170,23170,-30273,-12539,32767,0,12539,30273,-23170,23170,-30273,-12539 + }; + +const static int16_t tw16brep[48] __attribute__((aligned(32))) = { 0,32767,-12540,30272,-23170,23169 ,-30273,12539,0,32767,-12540,30272,-23170,23169 ,-30273,12539, + 0,32767,-23170,23169,-32767,0 ,-23170,-23170,0,32767,-23170,23169,-32767,0 ,-23170,-23170, + 0,32767,-30273,12539,-23170,-23170,12539 ,-30273,0,32767,-30273,12539,-23170,-23170,12539 ,-30273 + }; + +const static int16_t tw16crep[48] __attribute__((aligned(32))) = { 0,32767,12540,30272,23170,23169 ,30273 ,12539,0,32767,12540,30272,23170,23169 ,30273 ,12539, + 0,32767,23170,23169,32767,0 ,23170 ,-23170,0,32767,23170,23169,32767,0 ,23170 ,-23170, + 0,32767,30273,12539,23170,-23170,-12539,-30273,0,32767,30273,12539,23170,-23170,-12539,-30273 + }; + +#endif /* __AVX2__ */ + + + +static inline void dft16(int16_t *x,int16_t *y) __attribute__((always_inline)); + +static inline void dft16(int16_t *x,int16_t *y) +{ + +#if defined(__x86_64__) || defined(__i386__) + + __m128i *tw16a_128=(__m128i *)tw16a,*tw16b_128=(__m128i *)tw16b,*x128=(__m128i *)x,*y128=(__m128i *)y; + + + + /* This is the original version before unrolling + + bfly4_tw1(x128,x128+1,x128+2,x128+3, + y128,y128+1,y128+2,y128+3); + + transpose16(y128,ytmp); + + bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3, + y128,y128+1,y128+2,y128+3, + tw16_128,tw16_128+1,tw16_128+2); + */ + + register __m128i x1_flip,x3_flip,x02t,x13t; + register __m128i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3; + register __m128i complex_shuffle = _mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + // First stage : 4 Radix-4 butterflies without input twiddles + + x02t = _mm_adds_epi16(x128[0],x128[2]); + x13t = _mm_adds_epi16(x128[1],x128[3]); + xtmp0 = _mm_adds_epi16(x02t,x13t); + xtmp2 = _mm_subs_epi16(x02t,x13t); + x1_flip = _mm_sign_epi16(x128[1],*(__m128i*)conjugatedft); + x1_flip = _mm_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm_sign_epi16(x128[3],*(__m128i*)conjugatedft); + x3_flip = _mm_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm_subs_epi16(x128[0],x128[2]); + x13t = _mm_subs_epi16(x1_flip,x3_flip); + xtmp1 = _mm_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + xtmp3 = _mm_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + + ytmp0 = _mm_unpacklo_epi32(xtmp0,xtmp1); + ytmp1 = _mm_unpackhi_epi32(xtmp0,xtmp1); + ytmp2 = _mm_unpacklo_epi32(xtmp2,xtmp3); + ytmp3 = _mm_unpackhi_epi32(xtmp2,xtmp3); + xtmp0 = _mm_unpacklo_epi64(ytmp0,ytmp2); + xtmp1 = _mm_unpackhi_epi64(ytmp0,ytmp2); + xtmp2 = _mm_unpacklo_epi64(ytmp1,ytmp3); + xtmp3 = _mm_unpackhi_epi64(ytmp1,ytmp3); + + // Second stage : 4 Radix-4 butterflies with input twiddles + xtmp1 = packed_cmult2(xtmp1,tw16a_128[0],tw16b_128[0]); + xtmp2 = packed_cmult2(xtmp2,tw16a_128[1],tw16b_128[1]); + xtmp3 = packed_cmult2(xtmp3,tw16a_128[2],tw16b_128[2]); + + x02t = _mm_adds_epi16(xtmp0,xtmp2); + x13t = _mm_adds_epi16(xtmp1,xtmp3); + y128[0] = _mm_adds_epi16(x02t,x13t); + y128[2] = _mm_subs_epi16(x02t,x13t); + x1_flip = _mm_sign_epi16(xtmp1,*(__m128i*)conjugatedft); + x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x3_flip = _mm_sign_epi16(xtmp3,*(__m128i*)conjugatedft); + x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x02t = _mm_subs_epi16(xtmp0,xtmp2); + x13t = _mm_subs_epi16(x1_flip,x3_flip); + y128[1] = _mm_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + y128[3] = _mm_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + +#elif defined(__arm__) + + int16x8_t *tw16a_128=(int16x8_t *)tw16a,*tw16b_128=(int16x8_t *)tw16b,*x128=(int16x8_t *)x,*y128=(int16x8_t *)y; + + /* This is the original version before unrolling + + bfly4_tw1(x128,x128+1,x128+2,x128+3, + y128,y128+1,y128+2,y128+3); + + transpose16(y128,ytmp); + + bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3, + y128,y128+1,y128+2,y128+3, + tw16_128,tw16_128+1,tw16_128+2); + */ + + register int16x8_t x1_flip,x3_flip,x02t,x13t; + register int16x8_t xtmp0,xtmp1,xtmp2,xtmp3; + register uint32x4x2_t ytmp0,ytmp1; + register int16x8_t ytmp0b,ytmp1b,ytmp2b,ytmp3b; + + // First stage : 4 Radix-4 butterflies without input twiddles + + x02t = vqaddq_s16(x128[0],x128[2]); + x13t = vqaddq_s16(x128[1],x128[3]); + xtmp0 = vqaddq_s16(x02t,x13t); + xtmp2 = vqsubq_s16(x02t,x13t); + x1_flip = vrev32q_s16(vmulq_s16(x128[1],*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(x128[3],*(int16x8_t*)conjugatedft)); + x02t = vqsubq_s16(x128[0],x128[2]); + x13t = vqsubq_s16(x1_flip,x3_flip); + xtmp1 = vqaddq_s16(x02t,x13t); // x0 + x1f - x2 - x3f + xtmp3 = vqsubq_s16(x02t,x13t); // x0 - x1f - x2 + x3f + + ytmp0 = vtrnq_u32((uint32x4_t)(xtmp0),(uint32x4_t)(xtmp1)); +// y0[0] = [x00 x10 x02 x12], y0[1] = [x01 x11 x03 x13] + ytmp1 = vtrnq_u32((uint32x4_t)(xtmp2),(uint32x4_t)(xtmp3)); +// y1[0] = [x20 x30 x22 x32], y1[1] = [x21 x31 x23 x33] + + + ytmp0b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0])); +// y0 = [x00 x10 x20 x30] + ytmp1b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1])); +// t1 = [x01 x11 x21 x31] + ytmp2b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0])); +// t2 = [x02 x12 x22 x32] + ytmp3b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1])); +// t3 = [x03 x13 x23 x33] + + + // Second stage : 4 Radix-4 butterflies with input twiddles + xtmp1 = packed_cmult2(ytmp1b,tw16a_128[0],tw16b_128[0]); + xtmp2 = packed_cmult2(ytmp2b,tw16a_128[1],tw16b_128[1]); + xtmp3 = packed_cmult2(ytmp3b,tw16a_128[2],tw16b_128[2]); + + x02t = vqaddq_s16(ytmp0b,xtmp2); + x13t = vqaddq_s16(xtmp1,xtmp3); + y128[0] = vqaddq_s16(x02t,x13t); + y128[2] = vqsubq_s16(x02t,x13t); + x1_flip = vrev32q_s16(vmulq_s16(xtmp1,*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(xtmp3,*(int16x8_t*)conjugatedft)); + x02t = vqsubq_s16(ytmp0b,xtmp2); + x13t = vqsubq_s16(x1_flip,x3_flip); + y128[1] = vqaddq_s16(x02t,x13t); // x0 + x1f - x2 - x3f + y128[3] = vqsubq_s16(x02t,x13t); // x0 - x1f - x2 + x3f + + +#endif +} + +#if defined(__x86_64__) || defined(__i386__) +#ifdef __AVX2__ +// Does two 16-point DFTS (x[0 .. 15] is 128 LSBs of input vector, x[16..31] is in 128 MSBs) +static inline void dft16_simd256(int16_t *x,int16_t *y) __attribute__((always_inline)); +static inline void dft16_simd256(int16_t *x,int16_t *y) +{ + + __m256i *tw16a_256=(__m256i *)tw16arep,*tw16b_256=(__m256i *)tw16brep,*x256=(__m256i *)x,*y256=(__m256i *)y; + + __m256i x1_flip,x3_flip,x02t,x13t; + __m256i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3; + register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + // First stage : 4 Radix-4 butterflies without input twiddles + + x02t = _mm256_adds_epi16(x256[0],x256[2]); + x13t = _mm256_adds_epi16(x256[1],x256[3]); + xtmp0 = _mm256_adds_epi16(x02t,x13t); + xtmp2 = _mm256_subs_epi16(x02t,x13t); + x1_flip = _mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft); + x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft); + x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm256_subs_epi16(x256[0],x256[2]); + x13t = _mm256_subs_epi16(x1_flip,x3_flip); + xtmp1 = _mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + xtmp3 = _mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + + /* print_shorts256("xtmp0",(int16_t*)&xtmp0); + print_shorts256("xtmp1",(int16_t*)&xtmp1); + print_shorts256("xtmp2",(int16_t*)&xtmp2); + print_shorts256("xtmp3",(int16_t*)&xtmp3);*/ + + ytmp0 = _mm256_unpacklo_epi32(xtmp0,xtmp1); + ytmp1 = _mm256_unpackhi_epi32(xtmp0,xtmp1); + ytmp2 = _mm256_unpacklo_epi32(xtmp2,xtmp3); + ytmp3 = _mm256_unpackhi_epi32(xtmp2,xtmp3); + xtmp0 = _mm256_unpacklo_epi64(ytmp0,ytmp2); + xtmp1 = _mm256_unpackhi_epi64(ytmp0,ytmp2); + xtmp2 = _mm256_unpacklo_epi64(ytmp1,ytmp3); + xtmp3 = _mm256_unpackhi_epi64(ytmp1,ytmp3); + + // Second stage : 4 Radix-4 butterflies with input twiddles + xtmp1 = packed_cmult2_256(xtmp1,tw16a_256[0],tw16b_256[0]); + xtmp2 = packed_cmult2_256(xtmp2,tw16a_256[1],tw16b_256[1]); + xtmp3 = packed_cmult2_256(xtmp3,tw16a_256[2],tw16b_256[2]); + + /* print_shorts256("xtmp0",(int16_t*)&xtmp0); + print_shorts256("xtmp1",(int16_t*)&xtmp1); + print_shorts256("xtmp2",(int16_t*)&xtmp2); + print_shorts256("xtmp3",(int16_t*)&xtmp3);*/ + + x02t = _mm256_adds_epi16(xtmp0,xtmp2); + x13t = _mm256_adds_epi16(xtmp1,xtmp3); + ytmp0 = _mm256_adds_epi16(x02t,x13t); + ytmp2 = _mm256_subs_epi16(x02t,x13t); + x1_flip = _mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft); + x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft); + x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm256_subs_epi16(xtmp0,xtmp2); + x13t = _mm256_subs_epi16(x1_flip,x3_flip); + ytmp1 = _mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + ytmp3 = _mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + + + // [y0 y1 y2 y3 y16 y17 y18 y19] + // [y4 y5 y6 y7 y20 y21 y22 y23] + // [y8 y9 y10 y11 y24 y25 y26 y27] + // [y12 y13 y14 y15 y28 y29 y30 y31] + + y256[0] = _mm256_insertf128_si256(ytmp0,_mm256_extracti128_si256(ytmp1,0),1); + y256[1] = _mm256_insertf128_si256(ytmp2,_mm256_extracti128_si256(ytmp3,0),1); + y256[2] = _mm256_insertf128_si256(ytmp1,_mm256_extracti128_si256(ytmp0,1),0); + y256[3] = _mm256_insertf128_si256(ytmp3,_mm256_extracti128_si256(ytmp2,1),0); + + // [y0 y1 y2 y3 y4 y5 y6 y7] + // [y8 y9 y10 y11 y12 y13 y14 y15] + // [y16 y17 y18 y19 y20 y21 y22 y23] + // [y24 y25 y26 y27 y28 y29 y30 y31] +} + +#endif +#endif +static inline void idft16(int16_t *x,int16_t *y) __attribute__((always_inline)); + +static inline void idft16(int16_t *x,int16_t *y) +{ + +#if defined(__x86_64__) || defined(__i386__) + __m128i *tw16a_128=(__m128i *)tw16,*tw16b_128=(__m128i *)tw16c,*x128=(__m128i *)x,*y128=(__m128i *)y; + + /* + bfly4_tw1(x128,x128+1,x128+2,x128+3, + y128,y128+1,y128+2,y128+3); + + transpose16(y128,ytmp); + + bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3, + y128,y128+1,y128+2,y128+3, + tw16_128,tw16_128+1,tw16_128+2); + */ + + register __m128i x1_flip,x3_flip,x02t,x13t; + register __m128i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3; + + // First stage : 4 Radix-4 butterflies without input twiddles + + x02t = _mm_adds_epi16(x128[0],x128[2]); + x13t = _mm_adds_epi16(x128[1],x128[3]); + xtmp0 = _mm_adds_epi16(x02t,x13t); + xtmp2 = _mm_subs_epi16(x02t,x13t); + x1_flip = _mm_sign_epi16(x128[1],*(__m128i*)conjugatedft); + x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x3_flip = _mm_sign_epi16(x128[3],*(__m128i*)conjugatedft); + x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x02t = _mm_subs_epi16(x128[0],x128[2]); + x13t = _mm_subs_epi16(x1_flip,x3_flip); + xtmp3 = _mm_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + xtmp1 = _mm_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + + ytmp0 = _mm_unpacklo_epi32(xtmp0,xtmp1); + ytmp1 = _mm_unpackhi_epi32(xtmp0,xtmp1); + ytmp2 = _mm_unpacklo_epi32(xtmp2,xtmp3); + ytmp3 = _mm_unpackhi_epi32(xtmp2,xtmp3); + xtmp0 = _mm_unpacklo_epi64(ytmp0,ytmp2); + xtmp1 = _mm_unpackhi_epi64(ytmp0,ytmp2); + xtmp2 = _mm_unpacklo_epi64(ytmp1,ytmp3); + xtmp3 = _mm_unpackhi_epi64(ytmp1,ytmp3); + + // Second stage : 4 Radix-4 butterflies with input twiddles + xtmp1 = packed_cmult2(xtmp1,tw16a_128[0],tw16b_128[0]); + xtmp2 = packed_cmult2(xtmp2,tw16a_128[1],tw16b_128[1]); + xtmp3 = packed_cmult2(xtmp3,tw16a_128[2],tw16b_128[2]); + + x02t = _mm_adds_epi16(xtmp0,xtmp2); + x13t = _mm_adds_epi16(xtmp1,xtmp3); + y128[0] = _mm_adds_epi16(x02t,x13t); + y128[2] = _mm_subs_epi16(x02t,x13t); + x1_flip = _mm_sign_epi16(xtmp1,*(__m128i*)conjugatedft); + x1_flip = _mm_shuffle_epi8(x1_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x3_flip = _mm_sign_epi16(xtmp3,*(__m128i*)conjugatedft); + x3_flip = _mm_shuffle_epi8(x3_flip,_mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2)); + x02t = _mm_subs_epi16(xtmp0,xtmp2); + x13t = _mm_subs_epi16(x1_flip,x3_flip); + y128[3] = _mm_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + y128[1] = _mm_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + +#elif defined(__arm__) + int16x8_t *tw16a_128=(int16x8_t *)tw16,*tw16b_128=(int16x8_t *)tw16c,*x128=(int16x8_t *)x,*y128=(int16x8_t *)y; + + /* This is the original version before unrolling + + bfly4_tw1(x128,x128+1,x128+2,x128+3, + y128,y128+1,y128+2,y128+3); + + transpose16(y128,ytmp); + + bfly4_16(ytmp,ytmp+1,ytmp+2,ytmp+3, + y128,y128+1,y128+2,y128+3, + tw16_128,tw16_128+1,tw16_128+2); + */ + + register int16x8_t x1_flip,x3_flip,x02t,x13t; + register int16x8_t xtmp0,xtmp1,xtmp2,xtmp3; + register uint32x4x2_t ytmp0,ytmp1; + register int16x8_t ytmp0b,ytmp1b,ytmp2b,ytmp3b; + + // First stage : 4 Radix-4 butterflies without input twiddles + + x02t = vqaddq_s16(x128[0],x128[2]); + x13t = vqaddq_s16(x128[1],x128[3]); + xtmp0 = vqaddq_s16(x02t,x13t); + xtmp2 = vqsubq_s16(x02t,x13t); + x1_flip = vrev32q_s16(vmulq_s16(x128[1],*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(x128[3],*(int16x8_t*)conjugatedft)); + x02t = vqsubq_s16(x128[0],x128[2]); + x13t = vqsubq_s16(x1_flip,x3_flip); + xtmp3 = vqaddq_s16(x02t,x13t); // x0 + x1f - x2 - x3f + xtmp1 = vqsubq_s16(x02t,x13t); // x0 - x1f - x2 + x3f + + ytmp0 = vtrnq_u32((uint32x4_t)(xtmp0),(uint32x4_t)(xtmp1)); +// y0[0] = [x00 x10 x02 x12], y0[1] = [x01 x11 x03 x13] + ytmp1 = vtrnq_u32((uint32x4_t)(xtmp2),(uint32x4_t)(xtmp3)); +// y1[0] = [x20 x30 x22 x32], y1[1] = [x21 x31 x23 x33] + + + ytmp0b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[0]),vget_low_s16((int16x8_t)ytmp1.val[0])); +// y0 = [x00 x10 x20 x30] + ytmp1b = vcombine_s16(vget_low_s16((int16x8_t)ytmp0.val[1]),vget_low_s16((int16x8_t)ytmp1.val[1])); +// t1 = [x01 x11 x21 x31] + ytmp2b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[0]),vget_high_s16((int16x8_t)ytmp1.val[0])); +// t2 = [x02 x12 x22 x32] + ytmp3b = vcombine_s16(vget_high_s16((int16x8_t)ytmp0.val[1]),vget_high_s16((int16x8_t)ytmp1.val[1])); +// t3 = [x03 x13 x23 x33] + + // Second stage : 4 Radix-4 butterflies with input twiddles + xtmp1 = packed_cmult2(ytmp1b,tw16a_128[0],tw16b_128[0]); + xtmp2 = packed_cmult2(ytmp2b,tw16a_128[1],tw16b_128[1]); + xtmp3 = packed_cmult2(ytmp3b,tw16a_128[2],tw16b_128[2]); + + x02t = vqaddq_s16(ytmp0b,xtmp2); + x13t = vqaddq_s16(xtmp1,xtmp3); + y128[0] = vqaddq_s16(x02t,x13t); + y128[2] = vqsubq_s16(x02t,x13t); + x1_flip = vrev32q_s16(vmulq_s16(xtmp1,*(int16x8_t*)conjugatedft)); + x3_flip = vrev32q_s16(vmulq_s16(xtmp3,*(int16x8_t*)conjugatedft)); + x02t = vqsubq_s16(ytmp0b,xtmp2); + x13t = vqsubq_s16(x1_flip,x3_flip); + y128[3] = vqaddq_s16(x02t,x13t); // x0 + x1f - x2 - x3f + y128[1] = vqsubq_s16(x02t,x13t); // x0 - x1f - x2 + x3f + +#endif +} + +void idft16f(int16_t *x,int16_t *y) { + idft16(x,y); +} + +#if defined(__x86_64__) || defined(__i386__) +#ifdef __AVX2__ +// Does two 16-point IDFTS (x[0 .. 15] is 128 LSBs of input vector, x[16..31] is in 128 MSBs) +static inline void idft16_simd256(int16_t *x,int16_t *y) __attribute__((always_inline)); +static inline void idft16_simd256(int16_t *x,int16_t *y) +{ + + __m256i *tw16a_256=(__m256i *)tw16rep,*tw16b_256=(__m256i *)tw16crep,*x256=(__m256i *)x,*y256=(__m256i *)y; + register __m256i x1_flip,x3_flip,x02t,x13t; + register __m256i ytmp0,ytmp1,ytmp2,ytmp3,xtmp0,xtmp1,xtmp2,xtmp3; + register __m256i complex_shuffle = _mm256_set_epi8(29,28,31,30,25,24,27,26,21,20,23,22,17,16,19,18,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2); + + // First stage : 4 Radix-4 butterflies without input twiddles + + x02t = _mm256_adds_epi16(x256[0],x256[2]); + x13t = _mm256_adds_epi16(x256[1],x256[3]); + xtmp0 = _mm256_adds_epi16(x02t,x13t); + xtmp2 = _mm256_subs_epi16(x02t,x13t); + x1_flip = _mm256_sign_epi16(x256[1],*(__m256i*)conjugatedft); + x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm256_sign_epi16(x256[3],*(__m256i*)conjugatedft); + x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm256_subs_epi16(x256[0],x256[2]); + x13t = _mm256_subs_epi16(x1_flip,x3_flip); + xtmp3 = _mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + xtmp1 = _mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + + ytmp0 = _mm256_unpacklo_epi32(xtmp0,xtmp1); + ytmp1 = _mm256_unpackhi_epi32(xtmp0,xtmp1); + ytmp2 = _mm256_unpacklo_epi32(xtmp2,xtmp3); + ytmp3 = _mm256_unpackhi_epi32(xtmp2,xtmp3); + xtmp0 = _mm256_unpacklo_epi64(ytmp0,ytmp2); + xtmp1 = _mm256_unpackhi_epi64(ytmp0,ytmp2); + xtmp2 = _mm256_unpacklo_epi64(ytmp1,ytmp3); + xtmp3 = _mm256_unpackhi_epi64(ytmp1,ytmp3); + + // Second stage : 4 Radix-4 butterflies with input twiddles + xtmp1 = packed_cmult2_256(xtmp1,tw16a_256[0],tw16b_256[0]); + xtmp2 = packed_cmult2_256(xtmp2,tw16a_256[1],tw16b_256[1]); + xtmp3 = packed_cmult2_256(xtmp3,tw16a_256[2],tw16b_256[2]); + + x02t = _mm256_adds_epi16(xtmp0,xtmp2); + x13t = _mm256_adds_epi16(xtmp1,xtmp3); + ytmp0 = _mm256_adds_epi16(x02t,x13t); + ytmp2 = _mm256_subs_epi16(x02t,x13t); + x1_flip = _mm256_sign_epi16(xtmp1,*(__m256i*)conjugatedft); + x1_flip = _mm256_shuffle_epi8(x1_flip,complex_shuffle); + x3_flip = _mm256_sign_epi16(xtmp3,*(__m256i*)conjugatedft); + x3_flip = _mm256_shuffle_epi8(x3_flip,complex_shuffle); + x02t = _mm256_subs_epi16(xtmp0,xtmp2); + x13t = _mm256_subs_epi16(x1_flip,x3_flip); + ytmp3 = _mm256_adds_epi16(x02t,x13t); // x0 + x1f - x2 - x3f + ytmp1 = _mm256_subs_epi16(x02t,x13t); // x0 - x1f - x2 + x3f + + // [y0 y1 y2 y3 y16 y17 y18 y19] + // [y4 y5 y6 y7 y20 y21 y22 y23] + // [y8 y9 y10 y11 y24 y25 y26 y27] + // [y12 y13 y14 y15 y28 y29 y30 y31] + + y256[0] = _mm256_insertf128_si256(ytmp0,_mm256_extracti128_si256(ytmp1,0),1); + y256[1] = _mm256_insertf128_si256(ytmp2,_mm256_extracti128_si256(ytmp3,0),1); + y256[2] = _mm256_insertf128_si256(ytmp1,_mm256_extracti128_si256(ytmp0,1),0); + y256[3] = _mm256_insertf128_si256(ytmp3,_mm256_extracti128_si256(ytmp2,1),0); + +} +#endif +#endif + +// 64-point optimized DFT + +const static int16_t tw64[96] __attribute__((aligned(32))) = { +32767,0,32609,-3212,32137,-6393,31356,-9512, +30272,-12540,28897,-15447,27244,-18205,25329,-20788, +23169,-23170,20787,-25330,18204,-27245,15446,-28898, +12539,-30273,9511,-31357,6392,-32138,3211,-32610, +32767,0,32137,-6393,30272,-12540,27244,-18205, +23169,-23170,18204,-27245,12539,-30273,6392,-32138, +0,-32767,-6393,-32138,-12540,-30273,-18205,-27245, +-23170,-23170,-27245,-18205,-30273,-12540,-32138,-6393, +32767,0,31356,-9512,27244,-18205,20787,-25330, +12539,-30273,3211,-32610,-6393,-32138,-15447,-28898, +-23170,-23170,-28898,-15447,-32138,-6393,-32610,3211, +-30273,12539,-25330,20787,-18205,27244,-9512,31356 + }; +const static int16_t tw64a[96] __attribute__((aligned(32))) = { +32767,0,32609,3212,32137,6393,31356,9512, +30272,12540,28897,15447,27244,18205,25329,20788, +23169,23170,20787,25330,18204,27245,15446,28898, +12539,30273,9511,31357,6392,32138,3211,32610, +32767,0,32137,6393,30272,12540,27244,18205, +23169,23170,18204,27245,12539,30273,6392,32138, +0,32767,-6393,32138,-12540,30273,-18205,27245, +-23170,23170,-27245,18205,-30273,12540,-32138,6393, +32767,0,31356,9512,27244,18205,20787,25330, +12539,30273,3211,32610,-6393,32138,-15447,28898, +-23170,23170,-28898,15447,-32138,6393,-32610,-3211, +-30273,-12539,-25330,-20787,-18205,-27244,-9512,-31356 + }; +const static int16_t tw64b[96] __attribute__((aligned(32))) = { +0,32767,-3212,32609,-6393,32137,-9512,31356, +-12540,30272,-15447,28897,-18205,27244,-20788,25329, +-23170,23169,-25330,20787,-27245,18204,-28898,15446, +-30273,12539,-31357,9511,-32138,6392,-32610,3211, +0,32767,-6393,32137,-12540,30272,-18205,27244, +-23170,23169,-27245,18204,-30273,12539,-32138,6392, +-32767,0,-32138,-6393,-30273,-12540,-27245,-18205, +-23170,-23170,-18205,-27245,-12540,-30273,-6393,-32138, +0,32767,-9512,31356,-18205,27244,-25330,20787, +-30273,12539,-32610,3211,-32138,-6393,-28898,-15447, +-23170,-23170,-15447,-28898,-6393,-32138,3211,-32610, +12539,-30273,20787,-25330,27244,-18205,31356,-9512 + }; +const static int16_t tw64c[96] __attribute__((aligned(32))) = { +0,32767,3212,32609,6393,32137,9512,31356, +12540,30272,15447,28897,18205,27244,20788,25329, +23170,23169,25330,20787,27245,18204,28898,15446, +30273,12539,31357,9511,32138,6392,32610,3211, +0,32767,6393,32137,12540,30272,18205,27244, +23170,23169,27245,18204,30273,12539,32138,6392, +32767,0,32138,-6393,30273,-12540,27245,-18205, +23170,-23170,18205,-27245,12540,-30273,6393,-32138, +0,32767,9512,31356,18205,27244,25330,20787, +30273,12539,32610,3211,32138,-6393,28898,-15447, +23170,-23170,15447,-28898,6393,-32138,-3211,-32610, +-12539,-30273,-20787,-25330,-27244,-18205,-31356,-9512 + }; +#if defined(__x86_64__) || defined(__i386__) +#define simd_q15_t __m128i +#define simdshort_q15_t __m64 +#define shiftright_int16(a,shift) _mm_srai_epi16(a,shift) +#define set1_int16(a) _mm_set1_epi16(a); +#define mulhi_int16(a,b) _mm_mulhrs_epi16 (a,b) +#ifdef __AVX2__ +#define simd256_q15_t __m256i +#define shiftright_int16_simd256(a,shift) _mm256_srai_epi16(a,shift) +#define set1_int16_simd256(a) _mm256_set1_epi16(a); +#define mulhi_int16_simd256(a,b) _mm256_mulhrs_epi16(a,b); //_mm256_slli_epi16(_mm256_mulhi_epi16(a,b),1); +#endif + +#elif defined(__arm__) +#define simd_q15_t int16x8_t +#define simdshort_q15_t int16x4_t +#define shiftright_int16(a,shift) vshrq_n_s16(a,shift) +#define set1_int16(a) vdupq_n_s16(a) +#define mulhi_int16(a,b) vqdmulhq_s16(a,b); +#define _mm_empty() +#define _m_empty() + +#endif + +#ifndef __AVX2__ +void dft64(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[16],ytmp[16],*tw64a_128=(simd_q15_t *)tw64a,*tw64b_128=(simd_q15_t *)tw64b,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y; + + +#ifdef D64STATS + time_stats_t ts_t,ts_d,ts_b; + + reset_meas(&ts_t); + reset_meas(&ts_d); + reset_meas(&ts_b); + start_meas(&ts_t); +#endif + + + transpose16_ooff(x128,xtmp,4); + // xtmp0 = x00 x10 x20 x30 + // xtmp4 = x01 x11 x21 x31 + // xtmp8 = x02 x12 x22 x32 + // xtmp12 = x03 x13 x23 x33 + transpose16_ooff(x128+4,xtmp+1,4); + // xtmp1 = x40 x50 x60 x70 + // xtmp5 = x41 x51 x61 x71 + // xtmp9 = x42 x52 x62 x72 + // xtmp13 = x43 x53 x63 x73 + transpose16_ooff(x128+8,xtmp+2,4); + // xtmp2 = x80 x90 xa0 xb0 + // xtmp6 = x41 x51 x61 x71 + // xtmp10 = x82 x92 xa2 xb2 + // xtmp14 = x83 x93 xa3 xb3 + transpose16_ooff(x128+12,xtmp+3,4); + // xtmp3 = xc0 xd0 xe0 xf0 + // xtmp7 = xc1 xd1 xe1 xf1 + // xtmp11 = xc2 xd2 xe2 xf2 + // xtmp15 = xc3 xd3 xe3 xf3 + +#ifdef D64STATS + stop_meas(&ts_t); + start_meas(&ts_d); +#endif + + // xtmp0 = x00 x10 x20 x30 + // xtmp1 = x40 x50 x60 x70 + // xtmp2 = x80 x90 xa0 xb0 + // xtmp3 = xc0 xd0 xe0 xf0 + dft16((int16_t*)(xtmp),(int16_t*)ytmp); + + // xtmp4 = x01 x11 x21 x31 + // xtmp5 = x41 x51 x61 x71 + // xtmp6 = x81 x91 xa1 xb1 + // xtmp7 = xc1 xd1 xe1 xf1 + dft16((int16_t*)(xtmp+4),(int16_t*)(ytmp+4)); + dft16((int16_t*)(xtmp+8),(int16_t*)(ytmp+8)); + dft16((int16_t*)(xtmp+12),(int16_t*)(ytmp+12)); + + +#ifdef D64STATS + stop_meas(&ts_d); + start_meas(&ts_b); +#endif + + + bfly4_16(ytmp,ytmp+4,ytmp+8,ytmp+12, + y128,y128+4,y128+8,y128+12, + tw64a_128,tw64a_128+4,tw64a_128+8, + tw64b_128,tw64b_128+4,tw64b_128+8); + + bfly4_16(ytmp+1,ytmp+5,ytmp+9,ytmp+13, + y128+1,y128+5,y128+9,y128+13, + tw64a_128+1,tw64a_128+5,tw64a_128+9, + tw64b_128+1,tw64b_128+5,tw64b_128+9); + + bfly4_16(ytmp+2,ytmp+6,ytmp+10,ytmp+14, + y128+2,y128+6,y128+10,y128+14, + tw64a_128+2,tw64a_128+6,tw64a_128+10, + tw64b_128+2,tw64b_128+6,tw64b_128+10); + + bfly4_16(ytmp+3,ytmp+7,ytmp+11,ytmp+15, + y128+3,y128+7,y128+11,y128+15, + tw64a_128+3,tw64a_128+7,tw64a_128+11, + tw64b_128+3,tw64b_128+7,tw64b_128+11); + +#ifdef D64STATS + stop_meas(&ts_b); + printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff); +#endif + + + if (scale>0) { + y128[0] = shiftright_int16(y128[0],3); + y128[1] = shiftright_int16(y128[1],3); + y128[2] = shiftright_int16(y128[2],3); + y128[3] = shiftright_int16(y128[3],3); + y128[4] = shiftright_int16(y128[4],3); + y128[5] = shiftright_int16(y128[5],3); + y128[6] = shiftright_int16(y128[6],3); + y128[7] = shiftright_int16(y128[7],3); + y128[8] = shiftright_int16(y128[8],3); + y128[9] = shiftright_int16(y128[9],3); + y128[10] = shiftright_int16(y128[10],3); + y128[11] = shiftright_int16(y128[11],3); + y128[12] = shiftright_int16(y128[12],3); + y128[13] = shiftright_int16(y128[13],3); + y128[14] = shiftright_int16(y128[14],3); + y128[15] = shiftright_int16(y128[15],3); + } + + _mm_empty(); + _m_empty(); + +} + +#else // __AVX2__ +void dft64(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[16],ytmp[16],*tw64a_256=(simd256_q15_t *)tw64a,*tw64b_256=(simd256_q15_t *)tw64b,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y; + simd256_q15_t xintl0,xintl1,xintl2,xintl3,xintl4,xintl5,xintl6,xintl7; + simd256_q15_t const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0); + + +#ifdef D64STATS + time_stats_t ts_t,ts_d,ts_b; + + reset_meas(&ts_t); + reset_meas(&ts_d); + reset_meas(&ts_b); + start_meas(&ts_t); +#endif + +#ifdef D64STATS + stop_meas(&ts_t); + start_meas(&ts_d); +#endif + /* + print_shorts256("x2560",(int16_t*)x256); + print_shorts256("x2561",(int16_t*)(x256+1)); + print_shorts256("x2562",(int16_t*)(x256+2)); + print_shorts256("x2563",(int16_t*)(x256+3)); + print_shorts256("x2564",(int16_t*)(x256+4)); + print_shorts256("x2565",(int16_t*)(x256+5)); + print_shorts256("x2566",(int16_t*)(x256+6)); + print_shorts256("x2567",(int16_t*)(x256+7)); + */ + xintl0 = _mm256_permutevar8x32_epi32(x256[0],perm_mask); // x0 x4 x1 x5 x2 x6 x3 x7 + xintl1 = _mm256_permutevar8x32_epi32(x256[1],perm_mask); // x8 x12 x9 x13 x10 x14 x11 x15 + xintl2 = _mm256_permutevar8x32_epi32(x256[2],perm_mask); // x16 x20 x17 x21 x18 x22 x19 x23 + xintl3 = _mm256_permutevar8x32_epi32(x256[3],perm_mask); // x24 x28 x25 x29 x26 x30 x27 x31 + xintl4 = _mm256_permutevar8x32_epi32(x256[4],perm_mask); // x32 x28 x25 x29 x26 x30 x27 x31 + xintl5 = _mm256_permutevar8x32_epi32(x256[5],perm_mask); // x40 x28 x25 x29 x26 x30 x27 x31 + xintl6 = _mm256_permutevar8x32_epi32(x256[6],perm_mask); // x48 x28 x25 x29 x26 x30 x27 x31 + xintl7 = _mm256_permutevar8x32_epi32(x256[7],perm_mask); // x56 x28 x25 x29 x26 x30 x27 x31 + /* + print_shorts256("xintl0",(int16_t*)&xintl0); + print_shorts256("xintl1",(int16_t*)&xintl1); + print_shorts256("xintl2",(int16_t*)&xintl2); + print_shorts256("xintl3",(int16_t*)&xintl3); + print_shorts256("xintl4",(int16_t*)&xintl4); + print_shorts256("xintl5",(int16_t*)&xintl5); + print_shorts256("xintl6",(int16_t*)&xintl6); + print_shorts256("xintl7",(int16_t*)&xintl7); + */ + xtmp[0] = _mm256_unpacklo_epi64(xintl0,xintl1); // x0 x4 x8 x12 x1 x5 x9 x13 + xtmp[4] = _mm256_unpackhi_epi64(xintl0,xintl1); // x2 x6 x10 x14 x3 x7 x11 x15 + xtmp[1] = _mm256_unpacklo_epi64(xintl2,xintl3); // x16 x20 x24 x28 x17 x21 x25 x29 + xtmp[5] = _mm256_unpackhi_epi64(xintl2,xintl3); // x18 x22 x26 x30 x19 x23 x27 x31 + xtmp[2] = _mm256_unpacklo_epi64(xintl4,xintl5); // x32 x36 x40 x44 x33 x37 x41 x45 + xtmp[6] = _mm256_unpackhi_epi64(xintl4,xintl5); // x34 x38 x42 x46 x35 x39 x43 x47 + xtmp[3] = _mm256_unpacklo_epi64(xintl6,xintl7); // x48 x52 x56 x60 x49 x53 x57 x61 + xtmp[7] = _mm256_unpackhi_epi64(xintl6,xintl7); // x50 x54 x58 x62 x51 x55 x59 x63 + /* + print_shorts256("xtmp0",(int16_t*)xtmp); + print_shorts256("xtmp1",(int16_t*)(xtmp+1)); + print_shorts256("xtmp2",(int16_t*)(xtmp+2)); + print_shorts256("xtmp3",(int16_t*)(xtmp+3)); + print_shorts256("xtmp4",(int16_t*)(xtmp+4)); + print_shorts256("xtmp5",(int16_t*)(xtmp+5)); + print_shorts256("xtmp6",(int16_t*)(xtmp+6)); + print_shorts256("xtmp7",(int16_t*)(xtmp+7)); + */ + dft16_simd256((int16_t*)(xtmp),(int16_t*)ytmp); + // [y0 y1 y2 y3 y4 y5 y6 y7] + // [y8 y9 y10 y11 y12 y13 y14 y15] + // [y16 y17 y18 y19 y20 y21 y22 y23] + // [y24 y25 y26 y27 y28 y29 y30 y31] + /* + print_shorts256("ytmp0",(int16_t*)ytmp); + print_shorts256("ytmp1",(int16_t*)(ytmp+1)); + print_shorts256("ytmp2",(int16_t*)(ytmp+2)); + print_shorts256("ytmp3",(int16_t*)(ytmp+3)); + */ + dft16_simd256((int16_t*)(xtmp+4),(int16_t*)(ytmp+4)); + // [y32 y33 y34 y35 y36 y37 y38 y39] + // [y40 y41 y42 y43 y44 y45 y46 y47] + // [y48 y49 y50 y51 y52 y53 y54 y55] + // [y56 y57 y58 y59 y60 y61 y62 y63] + /* + print_shorts256("ytmp4",(int16_t*)(ytmp+4)); + print_shorts256("ytmp5",(int16_t*)(ytmp+5)); + print_shorts256("ytmp6",(int16_t*)(ytmp+6)); + print_shorts256("ytmp7",(int16_t*)(ytmp+7)); + */ +#ifdef D64STATS + stop_meas(&ts_d); + start_meas(&ts_b); +#endif + + + bfly4_16_256(ytmp,ytmp+2,ytmp+4,ytmp+6, + y256,y256+2,y256+4,y256+6, + tw64a_256,tw64a_256+2,tw64a_256+4, + tw64b_256,tw64b_256+2,tw64b_256+4); + // [y0 y1 y2 y3 y4 y5 y6 y7] + // [y16 y17 y18 y19 y20 y21 y22 y23] + // [y32 y33 y34 y35 y36 y37 y38 y39] + // [y48 y49 y50 y51 y52 y53 y54 y55] + + bfly4_16_256(ytmp+1,ytmp+3,ytmp+5,ytmp+7, + y256+1,y256+3,y256+5,y256+7, + tw64a_256+1,tw64a_256+3,tw64a_256+5, + tw64b_256+1,tw64b_256+3,tw64b_256+5); + // [y8 y9 y10 y11 y12 y13 y14 y15] + // [y24 y25 y26 y27 y28 y29 y30 y31] + // [y40 y41 y42 y43 y44 y45 y46 y47] + // [y56 y57 y58 y59 y60 y61 y62 y63] + /* + print_shorts256("y256_0",(int16_t*)&y256[0]); + print_shorts256("y256_1",(int16_t*)&y256[1]); + print_shorts256("y256_2",(int16_t*)&y256[2]); + print_shorts256("y256_3",(int16_t*)&y256[3]); + print_shorts256("y256_4",(int16_t*)&y256[4]); + print_shorts256("y256_5",(int16_t*)&y256[5]); + print_shorts256("y256_6",(int16_t*)&y256[6]); + print_shorts256("y256_7",(int16_t*)&y256[7]); + */ + +#ifdef D64STATS + stop_meas(&ts_b); + printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff); +#endif + + + if (scale>0) { + y256[0] = shiftright_int16_simd256(y256[0],3); + y256[1] = shiftright_int16_simd256(y256[1],3); + y256[2] = shiftright_int16_simd256(y256[2],3); + y256[3] = shiftright_int16_simd256(y256[3],3); + y256[4] = shiftright_int16_simd256(y256[4],3); + y256[5] = shiftright_int16_simd256(y256[5],3); + y256[6] = shiftright_int16_simd256(y256[6],3); + y256[7] = shiftright_int16_simd256(y256[7],3); + } + + _mm_empty(); + _m_empty(); + + +} +#endif + +#ifndef __AVX2__ +void idft64(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[16],ytmp[16],*tw64a_128=(simd_q15_t *)tw64,*tw64b_128=(simd_q15_t *)tw64c,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y; + + +#ifdef D64STATS + time_stats_t ts_t,ts_d,ts_b; + + reset_meas(&ts_t); + reset_meas(&ts_d); + reset_meas(&ts_b); + start_meas(&ts_t); +#endif + + + transpose16_ooff(x128,xtmp,4); + transpose16_ooff(x128+4,xtmp+1,4); + transpose16_ooff(x128+8,xtmp+2,4); + transpose16_ooff(x128+12,xtmp+3,4); + + +#ifdef D64STATS + stop_meas(&ts_t); + start_meas(&ts_d); +#endif + + + idft16((int16_t*)(xtmp),(int16_t*)ytmp); + idft16((int16_t*)(xtmp+4),(int16_t*)(ytmp+4)); + idft16((int16_t*)(xtmp+8),(int16_t*)(ytmp+8)); + idft16((int16_t*)(xtmp+12),(int16_t*)(ytmp+12)); + + +#ifdef D64STATS + stop_meas(&ts_d); + start_meas(&ts_b); +#endif + + + ibfly4_16(ytmp,ytmp+4,ytmp+8,ytmp+12, + y128,y128+4,y128+8,y128+12, + tw64a_128,tw64a_128+4,tw64a_128+8, + tw64b_128,tw64b_128+4,tw64b_128+8); + ibfly4_16(ytmp+1,ytmp+5,ytmp+9,ytmp+13, + y128+1,y128+5,y128+9,y128+13, + tw64a_128+1,tw64a_128+5,tw64a_128+9, + tw64b_128+1,tw64b_128+5,tw64b_128+9); + + ibfly4_16(ytmp+2,ytmp+6,ytmp+10,ytmp+14, + y128+2,y128+6,y128+10,y128+14, + tw64a_128+2,tw64a_128+6,tw64a_128+10, + tw64b_128+2,tw64b_128+6,tw64b_128+10); + + ibfly4_16(ytmp+3,ytmp+7,ytmp+11,ytmp+15, + y128+3,y128+7,y128+11,y128+15, + tw64a_128+3,tw64a_128+7,tw64a_128+11, + tw64b_128+3,tw64b_128+7,tw64b_128+11); + +#ifdef D64STATS + stop_meas(&ts_b); + printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff); +#endif + + + if (scale>0) { + + y128[0] = shiftright_int16(y128[0],3); + y128[1] = shiftright_int16(y128[1],3); + y128[2] = shiftright_int16(y128[2],3); + y128[3] = shiftright_int16(y128[3],3); + y128[4] = shiftright_int16(y128[4],3); + y128[5] = shiftright_int16(y128[5],3); + y128[6] = shiftright_int16(y128[6],3); + y128[7] = shiftright_int16(y128[7],3); + y128[8] = shiftright_int16(y128[8],3); + y128[9] = shiftright_int16(y128[9],3); + y128[10] = shiftright_int16(y128[10],3); + y128[11] = shiftright_int16(y128[11],3); + y128[12] = shiftright_int16(y128[12],3); + y128[13] = shiftright_int16(y128[13],3); + y128[14] = shiftright_int16(y128[14],3); + y128[15] = shiftright_int16(y128[15],3); + + } + + _mm_empty(); + _m_empty(); + +} + +#else // __AVX2__ +void idft64(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[16],ytmp[16],*tw64a_256=(simd256_q15_t *)tw64,*tw64b_256=(simd256_q15_t *)tw64c,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y; + register simd256_q15_t xintl0,xintl1,xintl2,xintl3,xintl4,xintl5,xintl6,xintl7; + simd256_q15_t const perm_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0); + + +#ifdef D64STATS + time_stats_t ts_t,ts_d,ts_b; + + reset_meas(&ts_t); + reset_meas(&ts_d); + reset_meas(&ts_b); + start_meas(&ts_t); +#endif + +#ifdef D64STATS + stop_meas(&ts_t); + start_meas(&ts_d); +#endif + + xintl0 = _mm256_permutevar8x32_epi32(x256[0],perm_mask); // x0 x4 x1 x5 x2 x6 x3 x7 + xintl1 = _mm256_permutevar8x32_epi32(x256[1],perm_mask); // x8 x12 x9 x13 x10 x14 x11 x15 + xintl2 = _mm256_permutevar8x32_epi32(x256[2],perm_mask); // x16 x20 x17 x21 x18 x22 x19 x23 + xintl3 = _mm256_permutevar8x32_epi32(x256[3],perm_mask); // x24 x28 x25 x29 x26 x30 x27 x31 + xintl4 = _mm256_permutevar8x32_epi32(x256[4],perm_mask); // x24 x28 x25 x29 x26 x30 x27 x31 + xintl5 = _mm256_permutevar8x32_epi32(x256[5],perm_mask); // x24 x28 x25 x29 x26 x30 x27 x31 + xintl6 = _mm256_permutevar8x32_epi32(x256[6],perm_mask); // x24 x28 x25 x29 x26 x30 x27 x31 + xintl7 = _mm256_permutevar8x32_epi32(x256[7],perm_mask); // x24 x28 x25 x29 x26 x30 x27 x31 + + xtmp[0] = _mm256_unpacklo_epi64(xintl0,xintl1); // x0 x4 x8 x12 x1 x5 x9 x13 + xtmp[4] = _mm256_unpackhi_epi64(xintl0,xintl1); // x2 x6 x10 x14 x3 x7 x11 x15 + xtmp[1] = _mm256_unpacklo_epi64(xintl2,xintl3); // x16 x20 x24 x28 x17 x21 x25 x29 + xtmp[5] = _mm256_unpackhi_epi64(xintl2,xintl3); // x18 x22 x26 x30 x19 x23 x27 x31 + xtmp[2] = _mm256_unpacklo_epi64(xintl4,xintl5); // x32 x36 x40 x44 x33 x37 x41 x45 + xtmp[6] = _mm256_unpackhi_epi64(xintl4,xintl5); // x34 x38 x42 x46 x35 x39 x43 x47 + xtmp[3] = _mm256_unpacklo_epi64(xintl6,xintl7); // x48 x52 x56 x60 x49 x53 x57 x61 + xtmp[7] = _mm256_unpackhi_epi64(xintl6,xintl7); // x50 x54 x58 x62 x51 x55 x59 x63 + + + idft16_simd256((int16_t*)(xtmp),(int16_t*)ytmp); + // [y0 y1 y2 y3 y16 y17 y18 y19] + // [y4 y5 y6 y7 y20 y21 y22 y23] + // [y8 y9 y10 y11 y24 y25 y26 y27] + // [y12 y13 y14 y15 y28 y29 y30 y31] + + idft16_simd256((int16_t*)(xtmp+4),(int16_t*)(ytmp+4)); + // [y32 y33 y34 y35 y48 y49 y50 y51] + // [y36 y37 y38 y39 y52 y53 y54 y55] + // [y40 y41 y42 y43 y56 y57 y58 y59] + // [y44 y45 y46 y47 y60 y61 y62 y63] + +#ifdef D64STATS + stop_meas(&ts_d); + start_meas(&ts_b); +#endif + + + ibfly4_16_256(ytmp,ytmp+2,ytmp+4,ytmp+6, + y256,y256+2,y256+4,y256+6, + tw64a_256,tw64a_256+2,tw64a_256+4, + tw64b_256,tw64b_256+2,tw64b_256+4); + // [y0 y1 y2 y3 y4 y5 y6 y7] + // [y16 y17 y18 y19 y20 y21 y22 y23] + // [y32 y33 y34 y35 y36 y37 y38 y39] + // [y48 y49 y50 y51 y52 y53 y54 y55] + + ibfly4_16_256(ytmp+1,ytmp+3,ytmp+5,ytmp+7, + y256+1,y256+3,y256+5,y256+7, + tw64a_256+1,tw64a_256+3,tw64a_256+5, + tw64b_256+1,tw64b_256+3,tw64b_256+5); + // [y8 y9 y10 y11 y12 y13 y14 y15] + // [y24 y25 y26 y27 y28 y29 y30 y31] + // [y40 y41 y42 y43 y44 y45 y46 y47] + // [y56 y57 y58 y59 y60 y61 y62 y63] + + +#ifdef D64STATS + stop_meas(&ts_b); + printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff); +#endif + + + if (scale>0) { + y256[0] = shiftright_int16_simd256(y256[0],3); + y256[1] = shiftright_int16_simd256(y256[1],3); + y256[2] = shiftright_int16_simd256(y256[2],3); + y256[3] = shiftright_int16_simd256(y256[3],3); + y256[4] = shiftright_int16_simd256(y256[4],3); + y256[5] = shiftright_int16_simd256(y256[5],3); + y256[6] = shiftright_int16_simd256(y256[6],3); + y256[7] = shiftright_int16_simd256(y256[7],3); + } + + _mm_empty(); + _m_empty(); + +} +#endif + +int16_t tw128[128] __attribute__((aligned(32))) = { 32767,0,32727,-1608,32609,-3212,32412,-4808,32137,-6393,31785,-7962,31356,-9512,30851,-11039,30272,-12540,29621,-14010,28897,-15447,28105,-16846,27244,-18205,26318,-19520,25329,-20788,24278,-22005,23169,-23170,22004,-24279,20787,-25330,19519,-26319,18204,-27245,16845,-28106,15446,-28898,14009,-29622,12539,-30273,11038,-30852,9511,-31357,7961,-31786,6392,-32138,4807,-32413,3211,-32610,1607,-32728,0,-32767,-1608,-32728,-3212,-32610,-4808,-32413,-6393,-32138,-7962,-31786,-9512,-31357,-11039,-30852,-12540,-30273,-14010,-29622,-15447,-28898,-16846,-28106,-18205,-27245,-19520,-26319,-20788,-25330,-22005,-24279,-23170,-23170,-24279,-22005,-25330,-20788,-26319,-19520,-27245,-18205,-28106,-16846,-28898,-15447,-29622,-14010,-30273,-12540,-30852,-11039,-31357,-9512,-31786,-7962,-32138,-6393,-32413,-4808,-32610,-3212,-32728,-1608}; + +int16_t tw128a[128] __attribute__((aligned(32))) = { 32767,0,32727,1608,32609,3212,32412,4808,32137,6393,31785,7962,31356,9512,30851,11039,30272,12540,29621,14010,28897,15447,28105,16846,27244,18205,26318,19520,25329,20788,24278,22005,23169,23170,22004,24279,20787,25330,19519,26319,18204,27245,16845,28106,15446,28898,14009,29622,12539,30273,11038,30852,9511,31357,7961,31786,6392,32138,4807,32413,3211,32610,1607,32728,0,32767,-1608,32728,-3212,32610,-4808,32413,-6393,32138,-7962,31786,-9512,31357,-11039,30852,-12540,30273,-14010,29622,-15447,28898,-16846,28106,-18205,27245,-19520,26319,-20788,25330,-22005,24279,-23170,23170,-24279,22005,-25330,20788,-26319,19520,-27245,18205,-28106,16846,-28898,15447,-29622,14010,-30273,12540,-30852,11039,-31357,9512,-31786,7962,-32138,6393,-32413,4808,-32610,3212,-32728,1608}; + +int16_t tw128b[128] __attribute__((aligned(32))) = {0,32767,-1608,32727,-3212,32609,-4808,32412,-6393,32137,-7962,31785,-9512,31356,-11039,30851,-12540,30272,-14010,29621,-15447,28897,-16846,28105,-18205,27244,-19520,26318,-20788,25329,-22005,24278,-23170,23169,-24279,22004,-25330,20787,-26319,19519,-27245,18204,-28106,16845,-28898,15446,-29622,14009,-30273,12539,-30852,11038,-31357,9511,-31786,7961,-32138,6392,-32413,4807,-32610,3211,-32728,1607,-32767,0,-32728,-1608,-32610,-3212,-32413,-4808,-32138,-6393,-31786,-7962,-31357,-9512,-30852,-11039,-30273,-12540,-29622,-14010,-28898,-15447,-28106,-16846,-27245,-18205,-26319,-19520,-25330,-20788,-24279,-22005,-23170,-23170,-22005,-24279,-20788,-25330,-19520,-26319,-18205,-27245,-16846,-28106,-15447,-28898,-14010,-29622,-12540,-30273,-11039,-30852,-9512,-31357,-7962,-31786,-6393,-32138,-4808,-32413,-3212,-32610,-1608,-32728}; + +int16_t tw128c[128] __attribute__((aligned(32))) = {0,32767,1608,32727,3212,32609,4808,32412,6393,32137,7962,31785,9512,31356,11039,30851,12540,30272,14010,29621,15447,28897,16846,28105,18205,27244,19520,26318,20788,25329,22005,24278,23170,23169,24279,22004,25330,20787,26319,19519,27245,18204,28106,16845,28898,15446,29622,14009,30273,12539,30852,11038,31357,9511,31786,7961,32138,6392,32413,4807,32610,3211,32728,1607,32767,0,32728,-1608,32610,-3212,32413,-4808,32138,-6393,31786,-7962,31357,-9512,30852,-11039,30273,-12540,29622,-14010,28898,-15447,28106,-16846,27245,-18205,26319,-19520,25330,-20788,24279,-22005,23170,-23170,22005,-24279,20788,-25330,19520,-26319,18205,-27245,16846,-28106,15447,-28898,14010,-29622,12540,-30273,11039,-30852,9512,-31357,7962,-31786,6393,-32138,4808,-32413,3212,-32610,1608,-32728}; + +#ifndef __AVX2__ +void dft128(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[64],*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[32],*tw128a_128p=(simd_q15_t *)tw128a,*tw128b_128p=(simd_q15_t *)tw128b,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + + transpose4_ooff(x64 ,xtmp,32); + transpose4_ooff(x64+2,xtmp+1,32); + transpose4_ooff(x64+4,xtmp+2,32); + transpose4_ooff(x64+6,xtmp+3,32); + transpose4_ooff(x64+8,xtmp+4,32); + transpose4_ooff(x64+10,xtmp+5,32); + transpose4_ooff(x64+12,xtmp+6,32); + transpose4_ooff(x64+14,xtmp+7,32); + transpose4_ooff(x64+16,xtmp+8,32); + transpose4_ooff(x64+18,xtmp+9,32); + transpose4_ooff(x64+20,xtmp+10,32); + transpose4_ooff(x64+22,xtmp+11,32); + transpose4_ooff(x64+24,xtmp+12,32); + transpose4_ooff(x64+26,xtmp+13,32); + transpose4_ooff(x64+28,xtmp+14,32); + transpose4_ooff(x64+30,xtmp+15,32); + transpose4_ooff(x64+32,xtmp+16,32); + transpose4_ooff(x64+34,xtmp+17,32); + transpose4_ooff(x64+36,xtmp+18,32); + transpose4_ooff(x64+38,xtmp+19,32); + transpose4_ooff(x64+40,xtmp+20,32); + transpose4_ooff(x64+42,xtmp+21,32); + transpose4_ooff(x64+44,xtmp+22,32); + transpose4_ooff(x64+46,xtmp+23,32); + transpose4_ooff(x64+48,xtmp+24,32); + transpose4_ooff(x64+50,xtmp+25,32); + transpose4_ooff(x64+52,xtmp+26,32); + transpose4_ooff(x64+54,xtmp+27,32); + transpose4_ooff(x64+56,xtmp+28,32); + transpose4_ooff(x64+58,xtmp+29,32); + transpose4_ooff(x64+60,xtmp+30,32); + transpose4_ooff(x64+62,xtmp+31,32); + + dft64((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+16),1); + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft128a.m","dfta",ytmp,64,1,1); + LOG_M("dft128b.m","dftb",ytmp+16,64,1,1); + } + for (i=0; i<16; i++) { + bfly2_16(ytmpp,ytmpp+16, + y128p,y128p+16, + tw128a_128p, + tw128b_128p); + tw128a_128p++; + tw128b_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + + y128[0] = mulhi_int16(y128[0],ONE_OVER_SQRT2_Q15_128); + y128[1] = mulhi_int16(y128[1],ONE_OVER_SQRT2_Q15_128); + y128[2] = mulhi_int16(y128[2],ONE_OVER_SQRT2_Q15_128); + y128[3] = mulhi_int16(y128[3],ONE_OVER_SQRT2_Q15_128); + y128[4] = mulhi_int16(y128[4],ONE_OVER_SQRT2_Q15_128); + y128[5] = mulhi_int16(y128[5],ONE_OVER_SQRT2_Q15_128); + y128[6] = mulhi_int16(y128[6],ONE_OVER_SQRT2_Q15_128); + y128[7] = mulhi_int16(y128[7],ONE_OVER_SQRT2_Q15_128); + y128[8] = mulhi_int16(y128[8],ONE_OVER_SQRT2_Q15_128); + y128[9] = mulhi_int16(y128[9],ONE_OVER_SQRT2_Q15_128); + y128[10] = mulhi_int16(y128[10],ONE_OVER_SQRT2_Q15_128); + y128[11] = mulhi_int16(y128[11],ONE_OVER_SQRT2_Q15_128); + y128[12] = mulhi_int16(y128[12],ONE_OVER_SQRT2_Q15_128); + y128[13] = mulhi_int16(y128[13],ONE_OVER_SQRT2_Q15_128); + y128[14] = mulhi_int16(y128[14],ONE_OVER_SQRT2_Q15_128); + y128[15] = mulhi_int16(y128[15],ONE_OVER_SQRT2_Q15_128); + y128[16] = mulhi_int16(y128[16],ONE_OVER_SQRT2_Q15_128); + y128[17] = mulhi_int16(y128[17],ONE_OVER_SQRT2_Q15_128); + y128[18] = mulhi_int16(y128[18],ONE_OVER_SQRT2_Q15_128); + y128[19] = mulhi_int16(y128[19],ONE_OVER_SQRT2_Q15_128); + y128[20] = mulhi_int16(y128[20],ONE_OVER_SQRT2_Q15_128); + y128[21] = mulhi_int16(y128[21],ONE_OVER_SQRT2_Q15_128); + y128[22] = mulhi_int16(y128[22],ONE_OVER_SQRT2_Q15_128); + y128[23] = mulhi_int16(y128[23],ONE_OVER_SQRT2_Q15_128); + y128[24] = mulhi_int16(y128[24],ONE_OVER_SQRT2_Q15_128); + y128[25] = mulhi_int16(y128[25],ONE_OVER_SQRT2_Q15_128); + y128[26] = mulhi_int16(y128[26],ONE_OVER_SQRT2_Q15_128); + y128[27] = mulhi_int16(y128[27],ONE_OVER_SQRT2_Q15_128); + y128[28] = mulhi_int16(y128[28],ONE_OVER_SQRT2_Q15_128); + y128[29] = mulhi_int16(y128[29],ONE_OVER_SQRT2_Q15_128); + y128[30] = mulhi_int16(y128[30],ONE_OVER_SQRT2_Q15_128); + y128[31] = mulhi_int16(y128[31],ONE_OVER_SQRT2_Q15_128); + + + } + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft128out.m","dft128",y,128,1,1); + exit(-1); + } + _mm_empty(); + _m_empty(); + +} + +#else // __AVX2__ +void dft128(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[16],*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[16],*y256=(simd256_q15_t*)y; + simd256_q15_t *tw128a_256p=(simd256_q15_t *)tw128a,*tw128b_256p=(simd256_q15_t *)tw128b,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + transpose4_ooff_simd256(x256 ,xtmp,8); + transpose4_ooff_simd256(x256+2,xtmp+1,8); + transpose4_ooff_simd256(x256+4,xtmp+2,8); + transpose4_ooff_simd256(x256+6,xtmp+3,8); + transpose4_ooff_simd256(x256+8,xtmp+4,8); + transpose4_ooff_simd256(x256+10,xtmp+5,8); + transpose4_ooff_simd256(x256+12,xtmp+6,8); + transpose4_ooff_simd256(x256+14,xtmp+7,8); + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft128ina_256.m","dftina",xtmp,64,1,1); + LOG_M("dft128inb_256.m","dftinb",xtmp+8,64,1,1); + } + + dft64((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1); + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft128outa_256.m","dftouta",ytmp,64,1,1); + LOG_M("dft128outb_256.m","dftoutb",ytmp+8,64,1,1); + } + + for (i=0; i<8; i++) { + bfly2_16_256(ytmpp,ytmpp+8, + y256p,y256p+8, + tw128a_256p, + tw128b_256p); + tw128a_256p++; + tw128b_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256); + y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256); + y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256); + y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256); + y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256); + y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256); + y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256); + y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256); + y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256); + y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256); + y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256); + y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256); + y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256); + y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256); + y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256); + y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256); + + } + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft128.m","dft",y256,128,1,1); + exit(-1); + } +} + +#endif + +#ifndef __AVX2__ +void idft128(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[64],*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[32],*tw128_128p=(simd_q15_t *)tw128,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + + transpose4_ooff(x64 ,xtmp,32); + transpose4_ooff(x64+2,xtmp+1,32); + transpose4_ooff(x64+4,xtmp+2,32); + transpose4_ooff(x64+6,xtmp+3,32); + transpose4_ooff(x64+8,xtmp+4,32); + transpose4_ooff(x64+10,xtmp+5,32); + transpose4_ooff(x64+12,xtmp+6,32); + transpose4_ooff(x64+14,xtmp+7,32); + transpose4_ooff(x64+16,xtmp+8,32); + transpose4_ooff(x64+18,xtmp+9,32); + transpose4_ooff(x64+20,xtmp+10,32); + transpose4_ooff(x64+22,xtmp+11,32); + transpose4_ooff(x64+24,xtmp+12,32); + transpose4_ooff(x64+26,xtmp+13,32); + transpose4_ooff(x64+28,xtmp+14,32); + transpose4_ooff(x64+30,xtmp+15,32); + transpose4_ooff(x64+32,xtmp+16,32); + transpose4_ooff(x64+34,xtmp+17,32); + transpose4_ooff(x64+36,xtmp+18,32); + transpose4_ooff(x64+38,xtmp+19,32); + transpose4_ooff(x64+40,xtmp+20,32); + transpose4_ooff(x64+42,xtmp+21,32); + transpose4_ooff(x64+44,xtmp+22,32); + transpose4_ooff(x64+46,xtmp+23,32); + transpose4_ooff(x64+48,xtmp+24,32); + transpose4_ooff(x64+50,xtmp+25,32); + transpose4_ooff(x64+52,xtmp+26,32); + transpose4_ooff(x64+54,xtmp+27,32); + transpose4_ooff(x64+56,xtmp+28,32); + transpose4_ooff(x64+58,xtmp+29,32); + transpose4_ooff(x64+60,xtmp+30,32); + transpose4_ooff(x64+62,xtmp+31,32); + + idft64((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+16),1); + + + for (i=0; i<16; i++) { + ibfly2(ytmpp,ytmpp+16, + y128p,y128p+16, + tw128_128p); + tw128_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + + y128[0] = mulhi_int16(y128[0],ONE_OVER_SQRT2_Q15_128); + y128[1] = mulhi_int16(y128[1],ONE_OVER_SQRT2_Q15_128); + y128[2] = mulhi_int16(y128[2],ONE_OVER_SQRT2_Q15_128); + y128[3] = mulhi_int16(y128[3],ONE_OVER_SQRT2_Q15_128); + y128[4] = mulhi_int16(y128[4],ONE_OVER_SQRT2_Q15_128); + y128[5] = mulhi_int16(y128[5],ONE_OVER_SQRT2_Q15_128); + y128[6] = mulhi_int16(y128[6],ONE_OVER_SQRT2_Q15_128); + y128[7] = mulhi_int16(y128[7],ONE_OVER_SQRT2_Q15_128); + y128[8] = mulhi_int16(y128[8],ONE_OVER_SQRT2_Q15_128); + y128[9] = mulhi_int16(y128[9],ONE_OVER_SQRT2_Q15_128); + y128[10] = mulhi_int16(y128[10],ONE_OVER_SQRT2_Q15_128); + y128[11] = mulhi_int16(y128[11],ONE_OVER_SQRT2_Q15_128); + y128[12] = mulhi_int16(y128[12],ONE_OVER_SQRT2_Q15_128); + y128[13] = mulhi_int16(y128[13],ONE_OVER_SQRT2_Q15_128); + y128[14] = mulhi_int16(y128[14],ONE_OVER_SQRT2_Q15_128); + y128[15] = mulhi_int16(y128[15],ONE_OVER_SQRT2_Q15_128); + y128[16] = mulhi_int16(y128[16],ONE_OVER_SQRT2_Q15_128); + y128[17] = mulhi_int16(y128[17],ONE_OVER_SQRT2_Q15_128); + y128[18] = mulhi_int16(y128[18],ONE_OVER_SQRT2_Q15_128); + y128[19] = mulhi_int16(y128[19],ONE_OVER_SQRT2_Q15_128); + y128[20] = mulhi_int16(y128[20],ONE_OVER_SQRT2_Q15_128); + y128[21] = mulhi_int16(y128[21],ONE_OVER_SQRT2_Q15_128); + y128[22] = mulhi_int16(y128[22],ONE_OVER_SQRT2_Q15_128); + y128[23] = mulhi_int16(y128[23],ONE_OVER_SQRT2_Q15_128); + y128[24] = mulhi_int16(y128[24],ONE_OVER_SQRT2_Q15_128); + y128[25] = mulhi_int16(y128[25],ONE_OVER_SQRT2_Q15_128); + y128[26] = mulhi_int16(y128[26],ONE_OVER_SQRT2_Q15_128); + y128[27] = mulhi_int16(y128[27],ONE_OVER_SQRT2_Q15_128); + y128[28] = mulhi_int16(y128[28],ONE_OVER_SQRT2_Q15_128); + y128[29] = mulhi_int16(y128[29],ONE_OVER_SQRT2_Q15_128); + y128[30] = mulhi_int16(y128[30],ONE_OVER_SQRT2_Q15_128); + y128[31] = mulhi_int16(y128[31],ONE_OVER_SQRT2_Q15_128); + + } + + _mm_empty(); + _m_empty(); + +} + +#else // __AVX2__ +void idft128(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[16],*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[16],*y256=(simd256_q15_t*)y; + simd256_q15_t *tw128_256p=(simd256_q15_t *)tw128,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + + transpose4_ooff_simd256(x256 ,xtmp,8); + transpose4_ooff_simd256(x256+2,xtmp+1,8); + transpose4_ooff_simd256(x256+4,xtmp+2,8); + transpose4_ooff_simd256(x256+6,xtmp+3,8); + transpose4_ooff_simd256(x256+8,xtmp+4,8); + transpose4_ooff_simd256(x256+10,xtmp+5,8); + transpose4_ooff_simd256(x256+12,xtmp+6,8); + transpose4_ooff_simd256(x256+14,xtmp+7,8); + + idft64((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1); + + + for (i=0; i<8; i++) { + ibfly2_256(ytmpp,ytmpp+8, + y256p,y256p+8, + tw128_256p); + tw128_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256); + y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256); + y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256); + y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256); + y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256); + y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256); + y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256); + y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256); + y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256); + y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256); + y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256); + y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256); + y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256); + y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256); + y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256); + y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256); + + } + +} + +#endif + +int16_t tw256[384] __attribute__((aligned(32))) = { 32767,0,32757,-805,32727,-1608,32678,-2411,32609,-3212,32520,-4012,32412,-4808,32284,-5602,32137,-6393,31970,-7180,31785,-7962,31580,-8740,31356,-9512,31113,-10279,30851,-11039,30571,-11793,30272,-12540,29955,-13279,29621,-14010,29268,-14733,28897,-15447,28510,-16151,28105,-16846,27683,-17531,27244,-18205,26789,-18868,26318,-19520,25831,-20160,25329,-20788,24811,-21403,24278,-22005,23731,-22595,23169,-23170,22594,-23732,22004,-24279,21402,-24812,20787,-25330,20159,-25832,19519,-26319,18867,-26790,18204,-27245,17530,-27684,16845,-28106,16150,-28511,15446,-28898,14732,-29269,14009,-29622,13278,-29956,12539,-30273,11792,-30572,11038,-30852,10278,-31114,9511,-31357,8739,-31581,7961,-31786,7179,-31971,6392,-32138,5601,-32285,4807,-32413,4011,-32521,3211,-32610,2410,-32679,1607,-32728,804,-32758, + 32767,0,32727,-1608,32609,-3212,32412,-4808,32137,-6393,31785,-7962,31356,-9512,30851,-11039,30272,-12540,29621,-14010,28897,-15447,28105,-16846,27244,-18205,26318,-19520,25329,-20788,24278,-22005,23169,-23170,22004,-24279,20787,-25330,19519,-26319,18204,-27245,16845,-28106,15446,-28898,14009,-29622,12539,-30273,11038,-30852,9511,-31357,7961,-31786,6392,-32138,4807,-32413,3211,-32610,1607,-32728,0,-32767,-1608,-32728,-3212,-32610,-4808,-32413,-6393,-32138,-7962,-31786,-9512,-31357,-11039,-30852,-12540,-30273,-14010,-29622,-15447,-28898,-16846,-28106,-18205,-27245,-19520,-26319,-20788,-25330,-22005,-24279,-23170,-23170,-24279,-22005,-25330,-20788,-26319,-19520,-27245,-18205,-28106,-16846,-28898,-15447,-29622,-14010,-30273,-12540,-30852,-11039,-31357,-9512,-31786,-7962,-32138,-6393,-32413,-4808,-32610,-3212,-32728,-1608, + 32767,0,32678,-2411,32412,-4808,31970,-7180,31356,-9512,30571,-11793,29621,-14010,28510,-16151,27244,-18205,25831,-20160,24278,-22005,22594,-23732,20787,-25330,18867,-26790,16845,-28106,14732,-29269,12539,-30273,10278,-31114,7961,-31786,5601,-32285,3211,-32610,804,-32758,-1608,-32728,-4012,-32521,-6393,-32138,-8740,-31581,-11039,-30852,-13279,-29956,-15447,-28898,-17531,-27684,-19520,-26319,-21403,-24812,-23170,-23170,-24812,-21403,-26319,-19520,-27684,-17531,-28898,-15447,-29956,-13279,-30852,-11039,-31581,-8740,-32138,-6393,-32521,-4012,-32728,-1608,-32758,804,-32610,3211,-32285,5601,-31786,7961,-31114,10278,-30273,12539,-29269,14732,-28106,16845,-26790,18867,-25330,20787,-23732,22594,-22005,24278,-20160,25831,-18205,27244,-16151,28510,-14010,29621,-11793,30571,-9512,31356,-7180,31970,-4808,32412,-2411,32678 + }; + +int16_t tw256a[384] __attribute__((aligned(32))) = { 32767,0,32757,804,32727,1607,32678,2410,32609,3211,32520,4011,32412,4807,32284,5601,32137,6392,31970,7179,31785,7961,31580,8739,31356,9511,31113,10278,30851,11038,30571,11792,30272,12539,29955,13278,29621,14009,29268,14732,28897,15446,28510,16150,28105,16845,27683,17530,27244,18204,26789,18867,26318,19519,25831,20159,25329,20787,24811,21402,24278,22004,23731,22594,23169,23169,22594,23731,22004,24278,21402,24811,20787,25329,20159,25831,19519,26318,18867,26789,18204,27244,17530,27683,16845,28105,16150,28510,15446,28897,14732,29268,14009,29621,13278,29955,12539,30272,11792,30571,11038,30851,10278,31113,9511,31356,8739,31580,7961,31785,7179,31970,6392,32137,5601,32284,4807,32412,4011,32520,3211,32609,2410,32678,1607,32727,804,32757, + 32767,0,32727,1607,32609,3211,32412,4807,32137,6392,31785,7961,31356,9511,30851,11038,30272,12539,29621,14009,28897,15446,28105,16845,27244,18204,26318,19519,25329,20787,24278,22004,23169,23169,22004,24278,20787,25329,19519,26318,18204,27244,16845,28105,15446,28897,14009,29621,12539,30272,11038,30851,9511,31356,7961,31785,6392,32137,4807,32412,3211,32609,1607,32727,0,32767,-1608,32727,-3212,32609,-4808,32412,-6393,32137,-7962,31785,-9512,31356,-11039,30851,-12540,30272,-14010,29621,-15447,28897,-16846,28105,-18205,27244,-19520,26318,-20788,25329,-22005,24278,-23170,23169,-24279,22004,-25330,20787,-26319,19519,-27245,18204,-28106,16845,-28898,15446,-29622,14009,-30273,12539,-30852,11038,-31357,9511,-31786,7961,-32138,6392,-32413,4807,-32610,3211,-32728,1607, + 32767,0,32678,2410,32412,4807,31970,7179,31356,9511,30571,11792,29621,14009,28510,16150,27244,18204,25831,20159,24278,22004,22594,23731,20787,25329,18867,26789,16845,28105,14732,29268,12539,30272,10278,31113,7961,31785,5601,32284,3211,32609,804,32757,-1608,32727,-4012,32520,-6393,32137,-8740,31580,-11039,30851,-13279,29955,-15447,28897,-17531,27683,-19520,26318,-21403,24811,-23170,23169,-24812,21402,-26319,19519,-27684,17530,-28898,15446,-29956,13278,-30852,11038,-31581,8739,-32138,6392,-32521,4011,-32728,1607,-32758,-805,-32610,-3212,-32285,-5602,-31786,-7962,-31114,-10279,-30273,-12540,-29269,-14733,-28106,-16846,-26790,-18868,-25330,-20788,-23732,-22595,-22005,-24279,-20160,-25832,-18205,-27245,-16151,-28511,-14010,-29622,-11793,-30572,-9512,-31357,-7180,-31971,-4808,-32413,-2411,-32679 + }; + +int16_t tw256b[384] __attribute__((aligned(32))) = {0,32767,-805,32757,-1608,32727,-2411,32678,-3212,32609,-4012,32520,-4808,32412,-5602,32284,-6393,32137,-7180,31970,-7962,31785,-8740,31580,-9512,31356,-10279,31113,-11039,30851,-11793,30571,-12540,30272,-13279,29955,-14010,29621,-14733,29268,-15447,28897,-16151,28510,-16846,28105,-17531,27683,-18205,27244,-18868,26789,-19520,26318,-20160,25831,-20788,25329,-21403,24811,-22005,24278,-22595,23731,-23170,23169,-23732,22594,-24279,22004,-24812,21402,-25330,20787,-25832,20159,-26319,19519,-26790,18867,-27245,18204,-27684,17530,-28106,16845,-28511,16150,-28898,15446,-29269,14732,-29622,14009,-29956,13278,-30273,12539,-30572,11792,-30852,11038,-31114,10278,-31357,9511,-31581,8739,-31786,7961,-31971,7179,-32138,6392,-32285,5601,-32413,4807,-32521,4011,-32610,3211,-32679,2410,-32728,1607,-32758,804, + 0,32767,-1608,32727,-3212,32609,-4808,32412,-6393,32137,-7962,31785,-9512,31356,-11039,30851,-12540,30272,-14010,29621,-15447,28897,-16846,28105,-18205,27244,-19520,26318,-20788,25329,-22005,24278,-23170,23169,-24279,22004,-25330,20787,-26319,19519,-27245,18204,-28106,16845,-28898,15446,-29622,14009,-30273,12539,-30852,11038,-31357,9511,-31786,7961,-32138,6392,-32413,4807,-32610,3211,-32728,1607,-32767,0,-32728,-1608,-32610,-3212,-32413,-4808,-32138,-6393,-31786,-7962,-31357,-9512,-30852,-11039,-30273,-12540,-29622,-14010,-28898,-15447,-28106,-16846,-27245,-18205,-26319,-19520,-25330,-20788,-24279,-22005,-23170,-23170,-22005,-24279,-20788,-25330,-19520,-26319,-18205,-27245,-16846,-28106,-15447,-28898,-14010,-29622,-12540,-30273,-11039,-30852,-9512,-31357,-7962,-31786,-6393,-32138,-4808,-32413,-3212,-32610,-1608,-32728, + 0,32767,-2411,32678,-4808,32412,-7180,31970,-9512,31356,-11793,30571,-14010,29621,-16151,28510,-18205,27244,-20160,25831,-22005,24278,-23732,22594,-25330,20787,-26790,18867,-28106,16845,-29269,14732,-30273,12539,-31114,10278,-31786,7961,-32285,5601,-32610,3211,-32758,804,-32728,-1608,-32521,-4012,-32138,-6393,-31581,-8740,-30852,-11039,-29956,-13279,-28898,-15447,-27684,-17531,-26319,-19520,-24812,-21403,-23170,-23170,-21403,-24812,-19520,-26319,-17531,-27684,-15447,-28898,-13279,-29956,-11039,-30852,-8740,-31581,-6393,-32138,-4012,-32521,-1608,-32728,804,-32758,3211,-32610,5601,-32285,7961,-31786,10278,-31114,12539,-30273,14732,-29269,16845,-28106,18867,-26790,20787,-25330,22594,-23732,24278,-22005,25831,-20160,27244,-18205,28510,-16151,29621,-14010,30571,-11793,31356,-9512,31970,-7180,32412,-4808,32678,-2411 + }; +#ifndef __AVX2__ +void dft256(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[64],ytmp[64],*tw256a_128p=(simd_q15_t *)tw256a,*tw256b_128p=(simd_q15_t *)tw256b,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + +#ifdef D256STATS + time_stats_t ts_t,ts_d,ts_b; + + reset_meas(&ts_t); + reset_meas(&ts_d); + reset_meas(&ts_b); + start_meas(&ts_t); +#endif + /* + for (i=0,j=0;i<64;i+=4,j++) { + transpose16_ooff(x128+i,xtmp+j,16); + } + */ + transpose16_ooff(x128+0,xtmp+0,16); + transpose16_ooff(x128+4,xtmp+1,16); + transpose16_ooff(x128+8,xtmp+2,16); + transpose16_ooff(x128+12,xtmp+3,16); + transpose16_ooff(x128+16,xtmp+4,16); + transpose16_ooff(x128+20,xtmp+5,16); + transpose16_ooff(x128+24,xtmp+6,16); + transpose16_ooff(x128+28,xtmp+7,16); + transpose16_ooff(x128+32,xtmp+8,16); + transpose16_ooff(x128+36,xtmp+9,16); + transpose16_ooff(x128+40,xtmp+10,16); + transpose16_ooff(x128+44,xtmp+11,16); + transpose16_ooff(x128+48,xtmp+12,16); + transpose16_ooff(x128+52,xtmp+13,16); + transpose16_ooff(x128+56,xtmp+14,16); + transpose16_ooff(x128+60,xtmp+15,16); + +#ifdef D256STATS + stop_meas(&ts_t); + start_meas(&ts_d); +#endif + + dft64((int16_t*)(xtmp),(int16_t*)(ytmp),1); + dft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1); + dft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1); + dft64((int16_t*)(xtmp+48),(int16_t*)(ytmp+48),1); + +#ifdef D256STATS + stop_meas(&ts_d); + start_meas(&ts_b); +#endif + + for (i=0; i<16; i+=4) { + bfly4_16(ytmpp,ytmpp+16,ytmpp+32,ytmpp+48, + y128p,y128p+16,y128p+32,y128p+48, + tw256a_128p,tw256a_128p+16,tw256a_128p+32, + tw256b_128p,tw256b_128p+16,tw256b_128p+32); + bfly4_16(ytmpp+1,ytmpp+17,ytmpp+33,ytmpp+49, + y128p+1,y128p+17,y128p+33,y128p+49, + tw256a_128p+1,tw256a_128p+17,tw256a_128p+33, + tw256b_128p+1,tw256b_128p+17,tw256b_128p+33); + bfly4_16(ytmpp+2,ytmpp+18,ytmpp+34,ytmpp+50, + y128p+2,y128p+18,y128p+34,y128p+50, + tw256a_128p+2,tw256a_128p+18,tw256a_128p+34, + tw256b_128p+2,tw256b_128p+18,tw256b_128p+34); + bfly4_16(ytmpp+3,ytmpp+19,ytmpp+35,ytmpp+51, + y128p+3,y128p+19,y128p+35,y128p+51, + tw256a_128p+3,tw256a_128p+19,tw256a_128p+35, + tw256b_128p+3,tw256b_128p+19,tw256b_128p+35); + tw256a_128p+=4; + tw256b_128p+=4; + y128p+=4; + ytmpp+=4; + } + +#ifdef D256STATS + stop_meas(&ts_b); + printf("t: %llu cycles, d: %llu cycles, b: %llu cycles\n",ts_t.diff,ts_d.diff,ts_b.diff); +#endif + + if (scale>0) { + + for (i=0; i<4; i++) { + y128[0] = shiftright_int16(y128[0],1); + y128[1] = shiftright_int16(y128[1],1); + y128[2] = shiftright_int16(y128[2],1); + y128[3] = shiftright_int16(y128[3],1); + y128[4] = shiftright_int16(y128[4],1); + y128[5] = shiftright_int16(y128[5],1); + y128[6] = shiftright_int16(y128[6],1); + y128[7] = shiftright_int16(y128[7],1); + y128[8] = shiftright_int16(y128[8],1); + y128[9] = shiftright_int16(y128[9],1); + y128[10] = shiftright_int16(y128[10],1); + y128[11] = shiftright_int16(y128[11],1); + y128[12] = shiftright_int16(y128[12],1); + y128[13] = shiftright_int16(y128[13],1); + y128[14] = shiftright_int16(y128[14],1); + y128[15] = shiftright_int16(y128[15],1); + + y128+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + + + +void idft256(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[64],ytmp[64],*tw256_128p=(simd_q15_t *)tw256,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<64; i+=4,j++) { + transpose16_ooff(x128+i,xtmp+j,16); + } + + + idft64((int16_t*)(xtmp),(int16_t*)(ytmp),1); + idft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1); + idft64((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1); + idft64((int16_t*)(xtmp+48),(int16_t*)(ytmp+48),1); + + for (i=0; i<16; i++) { + ibfly4(ytmpp,ytmpp+16,ytmpp+32,ytmpp+48, + y128p,y128p+16,y128p+32,y128p+48, + tw256_128p,tw256_128p+16,tw256_128p+32); + tw256_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<4; i++) { + y128[0] = shiftright_int16(y128[0],1); + y128[1] = shiftright_int16(y128[1],1); + y128[2] = shiftright_int16(y128[2],1); + y128[3] = shiftright_int16(y128[3],1); + y128[4] = shiftright_int16(y128[4],1); + y128[5] = shiftright_int16(y128[5],1); + y128[6] = shiftright_int16(y128[6],1); + y128[7] = shiftright_int16(y128[7],1); + y128[8] = shiftright_int16(y128[8],1); + y128[9] = shiftright_int16(y128[9],1); + y128[10] = shiftright_int16(y128[10],1); + y128[11] = shiftright_int16(y128[11],1); + y128[12] = shiftright_int16(y128[12],1); + y128[13] = shiftright_int16(y128[13],1); + y128[14] = shiftright_int16(y128[14],1); + y128[15] = shiftright_int16(y128[15],1); + + y128+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +#else //__AVX2__ + +void dft256(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[32],ytmp[32],*tw256a_256p=(simd256_q15_t *)tw256a,*tw256b_256p=(simd256_q15_t *)tw256b,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + + transpose16_ooff_simd256(x256+0,xtmp+0,8); + transpose16_ooff_simd256(x256+4,xtmp+1,8); + transpose16_ooff_simd256(x256+8,xtmp+2,8); + transpose16_ooff_simd256(x256+12,xtmp+3,8); + transpose16_ooff_simd256(x256+16,xtmp+4,8); + transpose16_ooff_simd256(x256+20,xtmp+5,8); + transpose16_ooff_simd256(x256+24,xtmp+6,8); + transpose16_ooff_simd256(x256+28,xtmp+7,8); + /* + char vname[10]; + for (i=0;i<32;i++) { + sprintf(vname,"xtmp%d",i); + print_shorts256(vname,(int16_t*)(xtmp+i)); + } + exit(-1);*/ + + dft64((int16_t*)(xtmp),(int16_t*)(ytmp),1); + dft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1); + dft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1); + dft64((int16_t*)(xtmp+24),(int16_t*)(ytmp+24),1); + + + bfly4_16_256(ytmpp,ytmpp+8,ytmpp+16,ytmpp+24, + y256p,y256p+8,y256p+16,y256p+24, + tw256a_256p,tw256a_256p+8,tw256a_256p+16, + tw256b_256p,tw256b_256p+8,tw256b_256p+16); + bfly4_16_256(ytmpp+1,ytmpp+9,ytmpp+17,ytmpp+25, + y256p+1,y256p+9,y256p+17,y256p+25, + tw256a_256p+1,tw256a_256p+9,tw256a_256p+17, + tw256b_256p+1,tw256b_256p+9,tw256b_256p+17); + bfly4_16_256(ytmpp+2,ytmpp+10,ytmpp+18,ytmpp+26, + y256p+2,y256p+10,y256p+18,y256p+26, + tw256a_256p+2,tw256a_256p+10,tw256a_256p+18, + tw256b_256p+2,tw256b_256p+10,tw256b_256p+18); + bfly4_16_256(ytmpp+3,ytmpp+11,ytmpp+19,ytmpp+27, + y256p+3,y256p+11,y256p+19,y256p+27, + tw256a_256p+3,tw256a_256p+11,tw256a_256p+19, + tw256b_256p+3,tw256b_256p+11,tw256b_256p+19); + bfly4_16_256(ytmpp+4,ytmpp+12,ytmpp+20,ytmpp+28, + y256p+4,y256p+12,y256p+20,y256p+28, + tw256a_256p+4,tw256a_256p+12,tw256a_256p+20, + tw256b_256p+4,tw256b_256p+12,tw256b_256p+20); + bfly4_16_256(ytmpp+5,ytmpp+13,ytmpp+21,ytmpp+29, + y256p+5,y256p+13,y256p+21,y256p+29, + tw256a_256p+5,tw256a_256p+13,tw256a_256p+21, + tw256b_256p+5,tw256b_256p+13,tw256b_256p+21); + bfly4_16_256(ytmpp+6,ytmpp+14,ytmpp+22,ytmpp+30, + y256p+6,y256p+14,y256p+22,y256p+30, + tw256a_256p+6,tw256a_256p+14,tw256a_256p+22, + tw256b_256p+6,tw256b_256p+14,tw256b_256p+22); + bfly4_16_256(ytmpp+7,ytmpp+15,ytmpp+23,ytmpp+31, + y256p+7,y256p+15,y256p+23,y256p+31, + tw256a_256p+7,tw256a_256p+15,tw256a_256p+23, + tw256b_256p+7,tw256b_256p+15,tw256b_256p+23); + + if (scale>0) { + + for (i=0; i<2; i++) { + y256[0] = shiftright_int16_simd256(y256[0],1); + y256[1] = shiftright_int16_simd256(y256[1],1); + y256[2] = shiftright_int16_simd256(y256[2],1); + y256[3] = shiftright_int16_simd256(y256[3],1); + y256[4] = shiftright_int16_simd256(y256[4],1); + y256[5] = shiftright_int16_simd256(y256[5],1); + y256[6] = shiftright_int16_simd256(y256[6],1); + y256[7] = shiftright_int16_simd256(y256[7],1); + y256[8] = shiftright_int16_simd256(y256[8],1); + y256[9] = shiftright_int16_simd256(y256[9],1); + y256[10] = shiftright_int16_simd256(y256[10],1); + y256[11] = shiftright_int16_simd256(y256[11],1); + y256[12] = shiftright_int16_simd256(y256[12],1); + y256[13] = shiftright_int16_simd256(y256[13],1); + y256[14] = shiftright_int16_simd256(y256[14],1); + y256[15] = shiftright_int16_simd256(y256[15],1); + + y256+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +void idft256(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[32],ytmp[32],*tw256_256p=(simd256_q15_t *)tw256,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + + transpose16_ooff_simd256(x256+0,xtmp+0,8); + transpose16_ooff_simd256(x256+4,xtmp+1,8); + transpose16_ooff_simd256(x256+8,xtmp+2,8); + transpose16_ooff_simd256(x256+12,xtmp+3,8); + transpose16_ooff_simd256(x256+16,xtmp+4,8); + transpose16_ooff_simd256(x256+20,xtmp+5,8); + transpose16_ooff_simd256(x256+24,xtmp+6,8); + transpose16_ooff_simd256(x256+28,xtmp+7,8); + + idft64((int16_t*)(xtmp),(int16_t*)(ytmp),1); + idft64((int16_t*)(xtmp+8),(int16_t*)(ytmp+8),1); + idft64((int16_t*)(xtmp+16),(int16_t*)(ytmp+16),1); + idft64((int16_t*)(xtmp+24),(int16_t*)(ytmp+24),1); + + + ibfly4_256(ytmpp,ytmpp+8,ytmpp+16,ytmpp+24, + y256p,y256p+8,y256p+16,y256p+24, + tw256_256p,tw256_256p+8,tw256_256p+16); + + ibfly4_256(ytmpp+1,ytmpp+9,ytmpp+17,ytmpp+25, + y256p+1,y256p+9,y256p+17,y256p+25, + tw256_256p+1,tw256_256p+9,tw256_256p+17); + + ibfly4_256(ytmpp+2,ytmpp+10,ytmpp+18,ytmpp+26, + y256p+2,y256p+10,y256p+18,y256p+26, + tw256_256p+2,tw256_256p+10,tw256_256p+18); + + ibfly4_256(ytmpp+3,ytmpp+11,ytmpp+19,ytmpp+27, + y256p+3,y256p+11,y256p+19,y256p+27, + tw256_256p+3,tw256_256p+11,tw256_256p+19); + + ibfly4_256(ytmpp+4,ytmpp+12,ytmpp+20,ytmpp+28, + y256p+4,y256p+12,y256p+20,y256p+28, + tw256_256p+4,tw256_256p+12,tw256_256p+20); + + ibfly4_256(ytmpp+5,ytmpp+13,ytmpp+21,ytmpp+29, + y256p+5,y256p+13,y256p+21,y256p+29, + tw256_256p+5,tw256_256p+13,tw256_256p+21); + + ibfly4_256(ytmpp+6,ytmpp+14,ytmpp+22,ytmpp+30, + y256p+6,y256p+14,y256p+22,y256p+30, + tw256_256p+6,tw256_256p+14,tw256_256p+22); + + ibfly4_256(ytmpp+7,ytmpp+15,ytmpp+23,ytmpp+31, + y256p+7,y256p+15,y256p+23,y256p+31, + tw256_256p+7,tw256_256p+15,tw256_256p+23); + + + if (scale>0) { + + for (i=0; i<2; i++) { + y256[0] = shiftright_int16_simd256(y256[0],1); + y256[1] = shiftright_int16_simd256(y256[1],1); + y256[2] = shiftright_int16_simd256(y256[2],1); + y256[3] = shiftright_int16_simd256(y256[3],1); + y256[4] = shiftright_int16_simd256(y256[4],1); + y256[5] = shiftright_int16_simd256(y256[5],1); + y256[6] = shiftright_int16_simd256(y256[6],1); + y256[7] = shiftright_int16_simd256(y256[7],1); + y256[8] = shiftright_int16_simd256(y256[8],1); + y256[9] = shiftright_int16_simd256(y256[9],1); + y256[10] = shiftright_int16_simd256(y256[10],1); + y256[11] = shiftright_int16_simd256(y256[11],1); + y256[12] = shiftright_int16_simd256(y256[12],1); + y256[13] = shiftright_int16_simd256(y256[13],1); + y256[14] = shiftright_int16_simd256(y256[14],1); + y256[15] = shiftright_int16_simd256(y256[15],1); + + y256+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +#endif +int16_t tw512[512] __attribute__((aligned(32))) = { + 32767,0,32764,-403,32757,-805,32744,-1207,32727,-1608,32705,-2010,32678,-2411,32646,-2812,32609,-3212,32567,-3612,32520,-4012,32468,-4410,32412,-4808,32350,-5206,32284,-5602,32213,-5998,32137,-6393,32056,-6787,31970,-7180,31880,-7572,31785,-7962,31684,-8352,31580,-8740,31470,-9127,31356,-9512,31236,-9896,31113,-10279,30984,-10660,30851,-11039,30713,-11417,30571,-11793,30424,-12167,30272,-12540,30116,-12910,29955,-13279,29790,-13646,29621,-14010,29446,-14373,29268,-14733,29085,-15091,28897,-15447,28706,-15800,28510,-16151,28309,-16500,28105,-16846,27896,-17190,27683,-17531,27466,-17869,27244,-18205,27019,-18538,26789,-18868,26556,-19195,26318,-19520,26077,-19841,25831,-20160,25582,-20475,25329,-20788,25072,-21097,24811,-21403,24546,-21706,24278,-22005,24006,-22302,23731,-22595,23452,-22884,23169,-23170,22883,-23453,22594,-23732,22301,-24007,22004,-24279,21705,-24547,21402,-24812,21096,-25073,20787,-25330,20474,-25583,20159,-25832,19840,-26078,19519,-26319,19194,-26557,18867,-26790,18537,-27020,18204,-27245,17868,-27467,17530,-27684,17189,-27897,16845,-28106,16499,-28310,16150,-28511,15799,-28707,15446,-28898,15090,-29086,14732,-29269,14372,-29447,14009,-29622,13645,-29791,13278,-29956,12909,-30117,12539,-30273,12166,-30425,11792,-30572,11416,-30714,11038,-30852,10659,-30985,10278,-31114,9895,-31237,9511,-31357,9126,-31471,8739,-31581,8351,-31685,7961,-31786,7571,-31881,7179,-31971,6786,-32057,6392,-32138,5997,-32214,5601,-32285,5205,-32351,4807,-32413,4409,-32469,4011,-32521,3611,-32568,3211,-32610,2811,-32647,2410,-32679,2009,-32706,1607,-32728,1206,-32745,804,-32758,402,-32765,0,-32767,-403,-32765,-805,-32758,-1207,-32745,-1608,-32728,-2010,-32706,-2411,-32679,-2812,-32647,-3212,-32610,-3612,-32568,-4012,-32521,-4410,-32469,-4808,-32413,-5206,-32351,-5602,-32285,-5998,-32214,-6393,-32138,-6787,-32057,-7180,-31971,-7572,-31881,-7962,-31786,-8352,-31685,-8740,-31581,-9127,-31471,-9512,-31357,-9896,-31237,-10279,-31114,-10660,-30985,-11039,-30852,-11417,-30714,-11793,-30572,-12167,-30425,-12540,-30273,-12910,-30117,-13279,-29956,-13646,-29791,-14010,-29622,-14373,-29447,-14733,-29269,-15091,-29086,-15447,-28898,-15800,-28707,-16151,-28511,-16500,-28310,-16846,-28106,-17190,-27897,-17531,-27684,-17869,-27467,-18205,-27245,-18538,-27020,-18868,-26790,-19195,-26557,-19520,-26319,-19841,-26078,-20160,-25832,-20475,-25583,-20788,-25330,-21097,-25073,-21403,-24812,-21706,-24547,-22005,-24279,-22302,-24007,-22595,-23732,-22884,-23453,-23170,-23170,-23453,-22884,-23732,-22595,-24007,-22302,-24279,-22005,-24547,-21706,-24812,-21403,-25073,-21097,-25330,-20788,-25583,-20475,-25832,-20160,-26078,-19841,-26319,-19520,-26557,-19195,-26790,-18868,-27020,-18538,-27245,-18205,-27467,-17869,-27684,-17531,-27897,-17190,-28106,-16846,-28310,-16500,-28511,-16151,-28707,-15800,-28898,-15447,-29086,-15091,-29269,-14733,-29447,-14373,-29622,-14010,-29791,-13646,-29956,-13279,-30117,-12910,-30273,-12540,-30425,-12167,-30572,-11793,-30714,-11417,-30852,-11039,-30985,-10660,-31114,-10279,-31237,-9896,-31357,-9512,-31471,-9127,-31581,-8740,-31685,-8352,-31786,-7962,-31881,-7572,-31971,-7180,-32057,-6787,-32138,-6393,-32214,-5998,-32285,-5602,-32351,-5206,-32413,-4808,-32469,-4410,-32521,-4012,-32568,-3612,-32610,-3212,-32647,-2812,-32679,-2411,-32706,-2010,-32728,-1608,-32745,-1207,-32758,-805,-32765,-403 +}; + +int16_t tw512a[512] __attribute__((aligned(32))) = { + 32767,0,32764,403,32757,805,32744,1207,32727,1608,32705,2010,32678,2411,32646,2812,32609,3212,32567,3612,32520,4012,32468,4410,32412,4808,32350,5206,32284,5602,32213,5998,32137,6393,32056,6787,31970,7180,31880,7572,31785,7962,31684,8352,31580,8740,31470,9127,31356,9512,31236,9896,31113,10279,30984,10660,30851,11039,30713,11417,30571,11793,30424,12167,30272,12540,30116,12910,29955,13279,29790,13646,29621,14010,29446,14373,29268,14733,29085,15091,28897,15447,28706,15800,28510,16151,28309,16500,28105,16846,27896,17190,27683,17531,27466,17869,27244,18205,27019,18538,26789,18868,26556,19195,26318,19520,26077,19841,25831,20160,25582,20475,25329,20788,25072,21097,24811,21403,24546,21706,24278,22005,24006,22302,23731,22595,23452,22884,23169,23170,22883,23453,22594,23732,22301,24007,22004,24279,21705,24547,21402,24812,21096,25073,20787,25330,20474,25583,20159,25832,19840,26078,19519,26319,19194,26557,18867,26790,18537,27020,18204,27245,17868,27467,17530,27684,17189,27897,16845,28106,16499,28310,16150,28511,15799,28707,15446,28898,15090,29086,14732,29269,14372,29447,14009,29622,13645,29791,13278,29956,12909,30117,12539,30273,12166,30425,11792,30572,11416,30714,11038,30852,10659,30985,10278,31114,9895,31237,9511,31357,9126,31471,8739,31581,8351,31685,7961,31786,7571,31881,7179,31971,6786,32057,6392,32138,5997,32214,5601,32285,5205,32351,4807,32413,4409,32469,4011,32521,3611,32568,3211,32610,2811,32647,2410,32679,2009,32706,1607,32728,1206,32745,804,32758,402,32765,0,32767,-403,32765,-805,32758,-1207,32745,-1608,32728,-2010,32706,-2411,32679,-2812,32647,-3212,32610,-3612,32568,-4012,32521,-4410,32469,-4808,32413,-5206,32351,-5602,32285,-5998,32214,-6393,32138,-6787,32057,-7180,31971,-7572,31881,-7962,31786,-8352,31685,-8740,31581,-9127,31471,-9512,31357,-9896,31237,-10279,31114,-10660,30985,-11039,30852,-11417,30714,-11793,30572,-12167,30425,-12540,30273,-12910,30117,-13279,29956,-13646,29791,-14010,29622,-14373,29447,-14733,29269,-15091,29086,-15447,28898,-15800,28707,-16151,28511,-16500,28310,-16846,28106,-17190,27897,-17531,27684,-17869,27467,-18205,27245,-18538,27020,-18868,26790,-19195,26557,-19520,26319,-19841,26078,-20160,25832,-20475,25583,-20788,25330,-21097,25073,-21403,24812,-21706,24547,-22005,24279,-22302,24007,-22595,23732,-22884,23453,-23170,23170,-23453,22884,-23732,22595,-24007,22302,-24279,22005,-24547,21706,-24812,21403,-25073,21097,-25330,20788,-25583,20475,-25832,20160,-26078,19841,-26319,19520,-26557,19195,-26790,18868,-27020,18538,-27245,18205,-27467,17869,-27684,17531,-27897,17190,-28106,16846,-28310,16500,-28511,16151,-28707,15800,-28898,15447,-29086,15091,-29269,14733,-29447,14373,-29622,14010,-29791,13646,-29956,13279,-30117,12910,-30273,12540,-30425,12167,-30572,11793,-30714,11417,-30852,11039,-30985,10660,-31114,10279,-31237,9896,-31357,9512,-31471,9127,-31581,8740,-31685,8352,-31786,7962,-31881,7572,-31971,7180,-32057,6787,-32138,6393,-32214,5998,-32285,5602,-32351,5206,-32413,4808,-32469,4410,-32521,4012,-32568,3612,-32610,3212,-32647,2812,-32679,2411,-32706,2010,-32728,1608,-32745,1207,-32758,805,-32765,403 +}; + + + +int16_t tw512b[512] __attribute__((aligned(32))) = { + 0,32767,-403,32764,-805,32757,-1207,32744,-1608,32727,-2010,32705,-2411,32678,-2812,32646,-3212,32609,-3612,32567,-4012,32520,-4410,32468,-4808,32412,-5206,32350,-5602,32284,-5998,32213,-6393,32137,-6787,32056,-7180,31970,-7572,31880,-7962,31785,-8352,31684,-8740,31580,-9127,31470,-9512,31356,-9896,31236,-10279,31113,-10660,30984,-11039,30851,-11417,30713,-11793,30571,-12167,30424,-12540,30272,-12910,30116,-13279,29955,-13646,29790,-14010,29621,-14373,29446,-14733,29268,-15091,29085,-15447,28897,-15800,28706,-16151,28510,-16500,28309,-16846,28105,-17190,27896,-17531,27683,-17869,27466,-18205,27244,-18538,27019,-18868,26789,-19195,26556,-19520,26318,-19841,26077,-20160,25831,-20475,25582,-20788,25329,-21097,25072,-21403,24811,-21706,24546,-22005,24278,-22302,24006,-22595,23731,-22884,23452,-23170,23169,-23453,22883,-23732,22594,-24007,22301,-24279,22004,-24547,21705,-24812,21402,-25073,21096,-25330,20787,-25583,20474,-25832,20159,-26078,19840,-26319,19519,-26557,19194,-26790,18867,-27020,18537,-27245,18204,-27467,17868,-27684,17530,-27897,17189,-28106,16845,-28310,16499,-28511,16150,-28707,15799,-28898,15446,-29086,15090,-29269,14732,-29447,14372,-29622,14009,-29791,13645,-29956,13278,-30117,12909,-30273,12539,-30425,12166,-30572,11792,-30714,11416,-30852,11038,-30985,10659,-31114,10278,-31237,9895,-31357,9511,-31471,9126,-31581,8739,-31685,8351,-31786,7961,-31881,7571,-31971,7179,-32057,6786,-32138,6392,-32214,5997,-32285,5601,-32351,5205,-32413,4807,-32469,4409,-32521,4011,-32568,3611,-32610,3211,-32647,2811,-32679,2410,-32706,2009,-32728,1607,-32745,1206,-32758,804,-32765,402,-32767,0,-32765,-403,-32758,-805,-32745,-1207,-32728,-1608,-32706,-2010,-32679,-2411,-32647,-2812,-32610,-3212,-32568,-3612,-32521,-4012,-32469,-4410,-32413,-4808,-32351,-5206,-32285,-5602,-32214,-5998,-32138,-6393,-32057,-6787,-31971,-7180,-31881,-7572,-31786,-7962,-31685,-8352,-31581,-8740,-31471,-9127,-31357,-9512,-31237,-9896,-31114,-10279,-30985,-10660,-30852,-11039,-30714,-11417,-30572,-11793,-30425,-12167,-30273,-12540,-30117,-12910,-29956,-13279,-29791,-13646,-29622,-14010,-29447,-14373,-29269,-14733,-29086,-15091,-28898,-15447,-28707,-15800,-28511,-16151,-28310,-16500,-28106,-16846,-27897,-17190,-27684,-17531,-27467,-17869,-27245,-18205,-27020,-18538,-26790,-18868,-26557,-19195,-26319,-19520,-26078,-19841,-25832,-20160,-25583,-20475,-25330,-20788,-25073,-21097,-24812,-21403,-24547,-21706,-24279,-22005,-24007,-22302,-23732,-22595,-23453,-22884,-23170,-23170,-22884,-23453,-22595,-23732,-22302,-24007,-22005,-24279,-21706,-24547,-21403,-24812,-21097,-25073,-20788,-25330,-20475,-25583,-20160,-25832,-19841,-26078,-19520,-26319,-19195,-26557,-18868,-26790,-18538,-27020,-18205,-27245,-17869,-27467,-17531,-27684,-17190,-27897,-16846,-28106,-16500,-28310,-16151,-28511,-15800,-28707,-15447,-28898,-15091,-29086,-14733,-29269,-14373,-29447,-14010,-29622,-13646,-29791,-13279,-29956,-12910,-30117,-12540,-30273,-12167,-30425,-11793,-30572,-11417,-30714,-11039,-30852,-10660,-30985,-10279,-31114,-9896,-31237,-9512,-31357,-9127,-31471,-8740,-31581,-8352,-31685,-7962,-31786,-7572,-31881,-7180,-31971,-6787,-32057,-6393,-32138,-5998,-32214,-5602,-32285,-5206,-32351,-4808,-32413,-4410,-32469,-4012,-32521,-3612,-32568,-3212,-32610,-2812,-32647,-2411,-32679,-2010,-32706,-1608,-32728,-1207,-32745,-805,-32758,-403,-32765 +}; + +int16_t tw512c[512] __attribute__((aligned(32))) = { + 0,32767,403,32764,805,32757,1207,32744,1608,32727,2010,32705,2411,32678,2812,32646,3212,32609,3612,32567,4012,32520,4410,32468,4808,32412,5206,32350,5602,32284,5998,32213,6393,32137,6787,32056,7180,31970,7572,31880,7962,31785,8352,31684,8740,31580,9127,31470,9512,31356,9896,31236,10279,31113,10660,30984,11039,30851,11417,30713,11793,30571,12167,30424,12540,30272,12910,30116,13279,29955,13646,29790,14010,29621,14373,29446,14733,29268,15091,29085,15447,28897,15800,28706,16151,28510,16500,28309,16846,28105,17190,27896,17531,27683,17869,27466,18205,27244,18538,27019,18868,26789,19195,26556,19520,26318,19841,26077,20160,25831,20475,25582,20788,25329,21097,25072,21403,24811,21706,24546,22005,24278,22302,24006,22595,23731,22884,23452,23170,23169,23453,22883,23732,22594,24007,22301,24279,22004,24547,21705,24812,21402,25073,21096,25330,20787,25583,20474,25832,20159,26078,19840,26319,19519,26557,19194,26790,18867,27020,18537,27245,18204,27467,17868,27684,17530,27897,17189,28106,16845,28310,16499,28511,16150,28707,15799,28898,15446,29086,15090,29269,14732,29447,14372,29622,14009,29791,13645,29956,13278,30117,12909,30273,12539,30425,12166,30572,11792,30714,11416,30852,11038,30985,10659,31114,10278,31237,9895,31357,9511,31471,9126,31581,8739,31685,8351,31786,7961,31881,7571,31971,7179,32057,6786,32138,6392,32214,5997,32285,5601,32351,5205,32413,4807,32469,4409,32521,4011,32568,3611,32610,3211,32647,2811,32679,2410,32706,2009,32728,1607,32745,1206,32758,804,32765,402,32767,0,32765,-403,32758,-805,32745,-1207,32728,-1608,32706,-2010,32679,-2411,32647,-2812,32610,-3212,32568,-3612,32521,-4012,32469,-4410,32413,-4808,32351,-5206,32285,-5602,32214,-5998,32138,-6393,32057,-6787,31971,-7180,31881,-7572,31786,-7962,31685,-8352,31581,-8740,31471,-9127,31357,-9512,31237,-9896,31114,-10279,30985,-10660,30852,-11039,30714,-11417,30572,-11793,30425,-12167,30273,-12540,30117,-12910,29956,-13279,29791,-13646,29622,-14010,29447,-14373,29269,-14733,29086,-15091,28898,-15447,28707,-15800,28511,-16151,28310,-16500,28106,-16846,27897,-17190,27684,-17531,27467,-17869,27245,-18205,27020,-18538,26790,-18868,26557,-19195,26319,-19520,26078,-19841,25832,-20160,25583,-20475,25330,-20788,25073,-21097,24812,-21403,24547,-21706,24279,-22005,24007,-22302,23732,-22595,23453,-22884,23170,-23170,22884,-23453,22595,-23732,22302,-24007,22005,-24279,21706,-24547,21403,-24812,21097,-25073,20788,-25330,20475,-25583,20160,-25832,19841,-26078,19520,-26319,19195,-26557,18868,-26790,18538,-27020,18205,-27245,17869,-27467,17531,-27684,17190,-27897,16846,-28106,16500,-28310,16151,-28511,15800,-28707,15447,-28898,15091,-29086,14733,-29269,14373,-29447,14010,-29622,13646,-29791,13279,-29956,12910,-30117,12540,-30273,12167,-30425,11793,-30572,11417,-30714,11039,-30852,10660,-30985,10279,-31114,9896,-31237,9512,-31357,9127,-31471,8740,-31581,8352,-31685,7962,-31786,7572,-31881,7180,-31971,6787,-32057,6393,-32138,5998,-32214,5602,-32285,5206,-32351,4808,-32413,4410,-32469,4012,-32521,3612,-32568,3212,-32610,2812,-32647,2411,-32679,2010,-32706,1608,-32728,1207,-32745,805,-32758,403,-32765 +}; + +#ifndef __AVX2__ +void dft512(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[256],*xtmpp,*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[128],*tw512a_128p=(simd_q15_t *)tw512a,*tw512b_128p=(simd_q15_t *)tw512b,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<4; i++) { + transpose4_ooff(x64 ,xtmpp,128); + transpose4_ooff(x64+2,xtmpp+1,128); + transpose4_ooff(x64+4,xtmpp+2,128); + transpose4_ooff(x64+6,xtmpp+3,128); + transpose4_ooff(x64+8,xtmpp+4,128); + transpose4_ooff(x64+10,xtmpp+5,128); + transpose4_ooff(x64+12,xtmpp+6,128); + transpose4_ooff(x64+14,xtmpp+7,128); + transpose4_ooff(x64+16,xtmpp+8,128); + transpose4_ooff(x64+18,xtmpp+9,128); + transpose4_ooff(x64+20,xtmpp+10,128); + transpose4_ooff(x64+22,xtmpp+11,128); + transpose4_ooff(x64+24,xtmpp+12,128); + transpose4_ooff(x64+26,xtmpp+13,128); + transpose4_ooff(x64+28,xtmpp+14,128); + transpose4_ooff(x64+30,xtmpp+15,128); + transpose4_ooff(x64+32,xtmpp+16,128); + transpose4_ooff(x64+34,xtmpp+17,128); + transpose4_ooff(x64+36,xtmpp+18,128); + transpose4_ooff(x64+38,xtmpp+19,128); + transpose4_ooff(x64+40,xtmpp+20,128); + transpose4_ooff(x64+42,xtmpp+21,128); + transpose4_ooff(x64+44,xtmpp+22,128); + transpose4_ooff(x64+46,xtmpp+23,128); + transpose4_ooff(x64+48,xtmpp+24,128); + transpose4_ooff(x64+50,xtmpp+25,128); + transpose4_ooff(x64+52,xtmpp+26,128); + transpose4_ooff(x64+54,xtmpp+27,128); + transpose4_ooff(x64+56,xtmpp+28,128); + transpose4_ooff(x64+58,xtmpp+29,128); + transpose4_ooff(x64+60,xtmpp+30,128); + transpose4_ooff(x64+62,xtmpp+31,128); + x64+=64; + xtmpp+=32; + } + + dft256((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+64),1); + + + for (i=0; i<64; i+=8) { + bfly2_16(ytmpp,ytmpp+64, + y128p,y128p+64, + tw512a_128p, + tw512b_128p); + bfly2_16(ytmpp+1,ytmpp+65, + y128p+1,y128p+65, + tw512a_128p+1, + tw512b_128p+1); + bfly2_16(ytmpp+2,ytmpp+66, + y128p+2,y128p+66, + tw512a_128p+2, + tw512b_128p+2); + bfly2_16(ytmpp+3,ytmpp+67, + y128p+3,y128p+67, + tw512a_128p+3, + tw512b_128p+3); + bfly2_16(ytmpp+4,ytmpp+68, + y128p+4,y128p+68, + tw512a_128p+4, + tw512b_128p+4); + bfly2_16(ytmpp+5,ytmpp+69, + y128p+5,y128p+69, + tw512a_128p+5, + tw512b_128p+5); + bfly2_16(ytmpp+6,ytmpp+70, + y128p+6,y128p+70, + tw512a_128p+6, + tw512b_128p+6); + bfly2_16(ytmpp+7,ytmpp+71, + y128p+7,y128p+71, + tw512a_128p+7, + tw512b_128p+7); + tw512a_128p+=8; + tw512b_128p+=8; + y128p+=8; + ytmpp+=8; + } + + if (scale>0) { + y128p = y128; + + for (i=0; i<8; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +void idft512(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[256],*xtmpp,*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[128],*tw512_128p=(simd_q15_t *)tw512,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<4; i++) { + transpose4_ooff(x64 ,xtmpp,128); + transpose4_ooff(x64+2,xtmpp+1,128); + transpose4_ooff(x64+4,xtmpp+2,128); + transpose4_ooff(x64+6,xtmpp+3,128); + transpose4_ooff(x64+8,xtmpp+4,128); + transpose4_ooff(x64+10,xtmpp+5,128); + transpose4_ooff(x64+12,xtmpp+6,128); + transpose4_ooff(x64+14,xtmpp+7,128); + transpose4_ooff(x64+16,xtmpp+8,128); + transpose4_ooff(x64+18,xtmpp+9,128); + transpose4_ooff(x64+20,xtmpp+10,128); + transpose4_ooff(x64+22,xtmpp+11,128); + transpose4_ooff(x64+24,xtmpp+12,128); + transpose4_ooff(x64+26,xtmpp+13,128); + transpose4_ooff(x64+28,xtmpp+14,128); + transpose4_ooff(x64+30,xtmpp+15,128); + transpose4_ooff(x64+32,xtmpp+16,128); + transpose4_ooff(x64+34,xtmpp+17,128); + transpose4_ooff(x64+36,xtmpp+18,128); + transpose4_ooff(x64+38,xtmpp+19,128); + transpose4_ooff(x64+40,xtmpp+20,128); + transpose4_ooff(x64+42,xtmpp+21,128); + transpose4_ooff(x64+44,xtmpp+22,128); + transpose4_ooff(x64+46,xtmpp+23,128); + transpose4_ooff(x64+48,xtmpp+24,128); + transpose4_ooff(x64+50,xtmpp+25,128); + transpose4_ooff(x64+52,xtmpp+26,128); + transpose4_ooff(x64+54,xtmpp+27,128); + transpose4_ooff(x64+56,xtmpp+28,128); + transpose4_ooff(x64+58,xtmpp+29,128); + transpose4_ooff(x64+60,xtmpp+30,128); + transpose4_ooff(x64+62,xtmpp+31,128); + x64+=64; + xtmpp+=32; + } + + idft256((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+64),1); + + + for (i=0; i<64; i++) { + ibfly2(ytmpp,ytmpp+64, + y128p,y128p+64, + tw512_128p); + tw512_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + y128p = y128; + + for (i=0; i<8; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +#else //__AVX2__ + +void dft512(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[64],*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[64],*y256=(simd256_q15_t*)y; + simd256_q15_t *tw512_256p=(simd256_q15_t*)tw512,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + + transpose4_ooff_simd256(x256 ,xtmp,32); + transpose4_ooff_simd256(x256+2,xtmp+1,32); + transpose4_ooff_simd256(x256+4,xtmp+2,32); + transpose4_ooff_simd256(x256+6,xtmp+3,32); + transpose4_ooff_simd256(x256+8,xtmp+4,32); + transpose4_ooff_simd256(x256+10,xtmp+5,32); + transpose4_ooff_simd256(x256+12,xtmp+6,32); + transpose4_ooff_simd256(x256+14,xtmp+7,32); + transpose4_ooff_simd256(x256+16,xtmp+8,32); + transpose4_ooff_simd256(x256+18,xtmp+9,32); + transpose4_ooff_simd256(x256+20,xtmp+10,32); + transpose4_ooff_simd256(x256+22,xtmp+11,32); + transpose4_ooff_simd256(x256+24,xtmp+12,32); + transpose4_ooff_simd256(x256+26,xtmp+13,32); + transpose4_ooff_simd256(x256+28,xtmp+14,32); + transpose4_ooff_simd256(x256+30,xtmp+15,32); + transpose4_ooff_simd256(x256+32,xtmp+16,32); + transpose4_ooff_simd256(x256+34,xtmp+17,32); + transpose4_ooff_simd256(x256+36,xtmp+18,32); + transpose4_ooff_simd256(x256+38,xtmp+19,32); + transpose4_ooff_simd256(x256+40,xtmp+20,32); + transpose4_ooff_simd256(x256+42,xtmp+21,32); + transpose4_ooff_simd256(x256+44,xtmp+22,32); + transpose4_ooff_simd256(x256+46,xtmp+23,32); + transpose4_ooff_simd256(x256+48,xtmp+24,32); + transpose4_ooff_simd256(x256+50,xtmp+25,32); + transpose4_ooff_simd256(x256+52,xtmp+26,32); + transpose4_ooff_simd256(x256+54,xtmp+27,32); + transpose4_ooff_simd256(x256+56,xtmp+28,32); + transpose4_ooff_simd256(x256+58,xtmp+29,32); + transpose4_ooff_simd256(x256+60,xtmp+30,32); + transpose4_ooff_simd256(x256+62,xtmp+31,32); + + dft256((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1); + + + for (i=0; i<32; i++) { + bfly2_256(ytmpp,ytmpp+32, + y256p,y256p+32, + tw512_256p); + tw512_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0;i<4;i++) { + y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256); + y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256); + y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256); + y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256); + y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256); + y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256); + y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256); + y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256); + y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256); + y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256); + y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256); + y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256); + y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256); + y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256); + y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256); + y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256); + y256+=16; + } + } + +} + +void idft512(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[64],*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[64],*y256=(simd256_q15_t*)y; + simd256_q15_t *tw512_256p=(simd256_q15_t *)tw512,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_256 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + + transpose4_ooff_simd256(x256 ,xtmp,32); + transpose4_ooff_simd256(x256+2,xtmp+1,32); + transpose4_ooff_simd256(x256+4,xtmp+2,32); + transpose4_ooff_simd256(x256+6,xtmp+3,32); + transpose4_ooff_simd256(x256+8,xtmp+4,32); + transpose4_ooff_simd256(x256+10,xtmp+5,32); + transpose4_ooff_simd256(x256+12,xtmp+6,32); + transpose4_ooff_simd256(x256+14,xtmp+7,32); + transpose4_ooff_simd256(x256+16,xtmp+8,32); + transpose4_ooff_simd256(x256+18,xtmp+9,32); + transpose4_ooff_simd256(x256+20,xtmp+10,32); + transpose4_ooff_simd256(x256+22,xtmp+11,32); + transpose4_ooff_simd256(x256+24,xtmp+12,32); + transpose4_ooff_simd256(x256+26,xtmp+13,32); + transpose4_ooff_simd256(x256+28,xtmp+14,32); + transpose4_ooff_simd256(x256+30,xtmp+15,32); + transpose4_ooff_simd256(x256+32,xtmp+16,32); + transpose4_ooff_simd256(x256+34,xtmp+17,32); + transpose4_ooff_simd256(x256+36,xtmp+18,32); + transpose4_ooff_simd256(x256+38,xtmp+19,32); + transpose4_ooff_simd256(x256+40,xtmp+20,32); + transpose4_ooff_simd256(x256+42,xtmp+21,32); + transpose4_ooff_simd256(x256+44,xtmp+22,32); + transpose4_ooff_simd256(x256+46,xtmp+23,32); + transpose4_ooff_simd256(x256+48,xtmp+24,32); + transpose4_ooff_simd256(x256+50,xtmp+25,32); + transpose4_ooff_simd256(x256+52,xtmp+26,32); + transpose4_ooff_simd256(x256+54,xtmp+27,32); + transpose4_ooff_simd256(x256+56,xtmp+28,32); + transpose4_ooff_simd256(x256+58,xtmp+29,32); + transpose4_ooff_simd256(x256+60,xtmp+30,32); + transpose4_ooff_simd256(x256+62,xtmp+31,32); + + idft256((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1); + + + for (i=0; i<32; i++) { + ibfly2_256(ytmpp,ytmpp+32, + y256p,y256p+32, + tw512_256p); + tw512_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0;i<4;i++) { + y256[0] = mulhi_int16_simd256(y256[0],ONE_OVER_SQRT2_Q15_256); + y256[1] = mulhi_int16_simd256(y256[1],ONE_OVER_SQRT2_Q15_256); + y256[2] = mulhi_int16_simd256(y256[2],ONE_OVER_SQRT2_Q15_256); + y256[3] = mulhi_int16_simd256(y256[3],ONE_OVER_SQRT2_Q15_256); + y256[4] = mulhi_int16_simd256(y256[4],ONE_OVER_SQRT2_Q15_256); + y256[5] = mulhi_int16_simd256(y256[5],ONE_OVER_SQRT2_Q15_256); + y256[6] = mulhi_int16_simd256(y256[6],ONE_OVER_SQRT2_Q15_256); + y256[7] = mulhi_int16_simd256(y256[7],ONE_OVER_SQRT2_Q15_256); + y256[8] = mulhi_int16_simd256(y256[8],ONE_OVER_SQRT2_Q15_256); + y256[9] = mulhi_int16_simd256(y256[9],ONE_OVER_SQRT2_Q15_256); + y256[10] = mulhi_int16_simd256(y256[10],ONE_OVER_SQRT2_Q15_256); + y256[11] = mulhi_int16_simd256(y256[11],ONE_OVER_SQRT2_Q15_256); + y256[12] = mulhi_int16_simd256(y256[12],ONE_OVER_SQRT2_Q15_256); + y256[13] = mulhi_int16_simd256(y256[13],ONE_OVER_SQRT2_Q15_256); + y256[14] = mulhi_int16_simd256(y256[14],ONE_OVER_SQRT2_Q15_256); + y256[15] = mulhi_int16_simd256(y256[15],ONE_OVER_SQRT2_Q15_256); + y256+=16; + } + } + +} + +#endif + +int16_t tw1024[1536] __attribute__((aligned(32))); + +#ifndef __AVX2__ +void dft1024(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[256],ytmp[256],*tw1024_128p=(simd_q15_t *)tw1024,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<256; i+=4,j++) { + transpose16_ooff(x128+i,xtmp+j,64); + } + + + dft256((int16_t*)(xtmp),(int16_t*)(ytmp),1); + dft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1); + dft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1); + dft256((int16_t*)(xtmp+192),(int16_t*)(ytmp+192),1); + + for (i=0; i<64; i++) { + bfly4(ytmpp,ytmpp+64,ytmpp+128,ytmpp+192, + y128p,y128p+64,y128p+128,y128p+192, + tw1024_128p,tw1024_128p+64,tw1024_128p+128); + tw1024_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<16; i++) { + y128[0] = shiftright_int16(y128[0],1); + y128[1] = shiftright_int16(y128[1],1); + y128[2] = shiftright_int16(y128[2],1); + y128[3] = shiftright_int16(y128[3],1); + y128[4] = shiftright_int16(y128[4],1); + y128[5] = shiftright_int16(y128[5],1); + y128[6] = shiftright_int16(y128[6],1); + y128[7] = shiftright_int16(y128[7],1); + y128[8] = shiftright_int16(y128[8],1); + y128[9] = shiftright_int16(y128[9],1); + y128[10] = shiftright_int16(y128[10],1); + y128[11] = shiftright_int16(y128[11],1); + y128[12] = shiftright_int16(y128[12],1); + y128[13] = shiftright_int16(y128[13],1); + y128[14] = shiftright_int16(y128[14],1); + y128[15] = shiftright_int16(y128[15],1); + + y128+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +void idft1024(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[256],ytmp[256],*tw1024_128p=(simd_q15_t *)tw1024,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<256; i+=4,j++) { + transpose16_ooff(x128+i,xtmp+j,64); + } + + + idft256((int16_t*)(xtmp),(int16_t*)(ytmp),1); + idft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1); + idft256((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1); + idft256((int16_t*)(xtmp+192),(int16_t*)(ytmp+192),1); + + for (i=0; i<64; i++) { + ibfly4(ytmpp,ytmpp+64,ytmpp+128,ytmpp+192, + y128p,y128p+64,y128p+128,y128p+192, + tw1024_128p,tw1024_128p+64,tw1024_128p+128); + tw1024_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<16; i++) { + y128[0] = shiftright_int16(y128[0],1); + y128[1] = shiftright_int16(y128[1],1); + y128[2] = shiftright_int16(y128[2],1); + y128[3] = shiftright_int16(y128[3],1); + y128[4] = shiftright_int16(y128[4],1); + y128[5] = shiftright_int16(y128[5],1); + y128[6] = shiftright_int16(y128[6],1); + y128[7] = shiftright_int16(y128[7],1); + y128[8] = shiftright_int16(y128[8],1); + y128[9] = shiftright_int16(y128[9],1); + y128[10] = shiftright_int16(y128[10],1); + y128[11] = shiftright_int16(y128[11],1); + y128[12] = shiftright_int16(y128[12],1); + y128[13] = shiftright_int16(y128[13],1); + y128[14] = shiftright_int16(y128[14],1); + y128[15] = shiftright_int16(y128[15],1); + + y128+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +#else //__AVX2__ +void dft1024(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[128],ytmp[128],*tw1024_256p=(simd256_q15_t *)tw1024,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<128; i+=4,j++) { + transpose16_ooff_simd256(x256+i,xtmp+j,32); + } + + + dft256((int16_t*)(xtmp),(int16_t*)(ytmp),1); + dft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1); + dft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1); + dft256((int16_t*)(xtmp+96),(int16_t*)(ytmp+96),1); + + for (i=0; i<32; i++) { + bfly4_256(ytmpp,ytmpp+32,ytmpp+64,ytmpp+96, + y256p,y256p+32,y256p+64,y256p+96, + tw1024_256p,tw1024_256p+32,tw1024_256p+64); + tw1024_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<8; i++) { + y256[0] = shiftright_int16_simd256(y256[0],1); + y256[1] = shiftright_int16_simd256(y256[1],1); + y256[2] = shiftright_int16_simd256(y256[2],1); + y256[3] = shiftright_int16_simd256(y256[3],1); + y256[4] = shiftright_int16_simd256(y256[4],1); + y256[5] = shiftright_int16_simd256(y256[5],1); + y256[6] = shiftright_int16_simd256(y256[6],1); + y256[7] = shiftright_int16_simd256(y256[7],1); + y256[8] = shiftright_int16_simd256(y256[8],1); + y256[9] = shiftright_int16_simd256(y256[9],1); + y256[10] = shiftright_int16_simd256(y256[10],1); + y256[11] = shiftright_int16_simd256(y256[11],1); + y256[12] = shiftright_int16_simd256(y256[12],1); + y256[13] = shiftright_int16_simd256(y256[13],1); + y256[14] = shiftright_int16_simd256(y256[14],1); + y256[15] = shiftright_int16_simd256(y256[15],1); + + y256+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +void idft1024(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[128],ytmp[128],*tw1024_256p=(simd256_q15_t *)tw1024,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<128; i+=4,j++) { + transpose16_ooff_simd256(x256+i,xtmp+j,32); + } + + + idft256((int16_t*)(xtmp),(int16_t*)(ytmp),1); + idft256((int16_t*)(xtmp+32),(int16_t*)(ytmp+32),1); + idft256((int16_t*)(xtmp+64),(int16_t*)(ytmp+64),1); + idft256((int16_t*)(xtmp+96),(int16_t*)(ytmp+96),1); + + for (i=0; i<32; i++) { + ibfly4_256(ytmpp,ytmpp+32,ytmpp+64,ytmpp+96, + y256p,y256p+32,y256p+64,y256p+96, + tw1024_256p,tw1024_256p+32,tw1024_256p+64); + tw1024_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<8; i++) { + y256[0] = shiftright_int16_simd256(y256[0],1); + y256[1] = shiftright_int16_simd256(y256[1],1); + y256[2] = shiftright_int16_simd256(y256[2],1); + y256[3] = shiftright_int16_simd256(y256[3],1); + y256[4] = shiftright_int16_simd256(y256[4],1); + y256[5] = shiftright_int16_simd256(y256[5],1); + y256[6] = shiftright_int16_simd256(y256[6],1); + y256[7] = shiftright_int16_simd256(y256[7],1); + y256[8] = shiftright_int16_simd256(y256[8],1); + y256[9] = shiftright_int16_simd256(y256[9],1); + y256[10] = shiftright_int16_simd256(y256[10],1); + y256[11] = shiftright_int16_simd256(y256[11],1); + y256[12] = shiftright_int16_simd256(y256[12],1); + y256[13] = shiftright_int16_simd256(y256[13],1); + y256[14] = shiftright_int16_simd256(y256[14],1); + y256[15] = shiftright_int16_simd256(y256[15],1); + + y256+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} +#endif + +int16_t tw2048[2048] __attribute__((aligned(32))); + +#ifndef __AVX2__ +void dft2048(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[1024],*xtmpp,*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[512],*tw2048_128p=(simd_q15_t *)tw2048,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<16; i++) { + transpose4_ooff(x64 ,xtmpp,512); + transpose4_ooff(x64+2,xtmpp+1,512); + transpose4_ooff(x64+4,xtmpp+2,512); + transpose4_ooff(x64+6,xtmpp+3,512); + transpose4_ooff(x64+8,xtmpp+4,512); + transpose4_ooff(x64+10,xtmpp+5,512); + transpose4_ooff(x64+12,xtmpp+6,512); + transpose4_ooff(x64+14,xtmpp+7,512); + transpose4_ooff(x64+16,xtmpp+8,512); + transpose4_ooff(x64+18,xtmpp+9,512); + transpose4_ooff(x64+20,xtmpp+10,512); + transpose4_ooff(x64+22,xtmpp+11,512); + transpose4_ooff(x64+24,xtmpp+12,512); + transpose4_ooff(x64+26,xtmpp+13,512); + transpose4_ooff(x64+28,xtmpp+14,512); + transpose4_ooff(x64+30,xtmpp+15,512); + transpose4_ooff(x64+32,xtmpp+16,512); + transpose4_ooff(x64+34,xtmpp+17,512); + transpose4_ooff(x64+36,xtmpp+18,512); + transpose4_ooff(x64+38,xtmpp+19,512); + transpose4_ooff(x64+40,xtmpp+20,512); + transpose4_ooff(x64+42,xtmpp+21,512); + transpose4_ooff(x64+44,xtmpp+22,512); + transpose4_ooff(x64+46,xtmpp+23,512); + transpose4_ooff(x64+48,xtmpp+24,512); + transpose4_ooff(x64+50,xtmpp+25,512); + transpose4_ooff(x64+52,xtmpp+26,512); + transpose4_ooff(x64+54,xtmpp+27,512); + transpose4_ooff(x64+56,xtmpp+28,512); + transpose4_ooff(x64+58,xtmpp+29,512); + transpose4_ooff(x64+60,xtmpp+30,512); + transpose4_ooff(x64+62,xtmpp+31,512); + x64+=64; + xtmpp+=32; + } + + dft1024((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+256),1); + + + for (i=0; i<256; i++) { + bfly2(ytmpp,ytmpp+256, + y128p,y128p+256, + tw2048_128p); + tw2048_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + y128p = y128; + + for (i=0; i<32; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + + +} + +void idft2048(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[1024],*xtmpp,*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[512],*tw2048_128p=(simd_q15_t *)tw2048,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<16; i++) { + transpose4_ooff(x64 ,xtmpp,512); + transpose4_ooff(x64+2,xtmpp+1,512); + transpose4_ooff(x64+4,xtmpp+2,512); + transpose4_ooff(x64+6,xtmpp+3,512); + transpose4_ooff(x64+8,xtmpp+4,512); + transpose4_ooff(x64+10,xtmpp+5,512); + transpose4_ooff(x64+12,xtmpp+6,512); + transpose4_ooff(x64+14,xtmpp+7,512); + transpose4_ooff(x64+16,xtmpp+8,512); + transpose4_ooff(x64+18,xtmpp+9,512); + transpose4_ooff(x64+20,xtmpp+10,512); + transpose4_ooff(x64+22,xtmpp+11,512); + transpose4_ooff(x64+24,xtmpp+12,512); + transpose4_ooff(x64+26,xtmpp+13,512); + transpose4_ooff(x64+28,xtmpp+14,512); + transpose4_ooff(x64+30,xtmpp+15,512); + transpose4_ooff(x64+32,xtmpp+16,512); + transpose4_ooff(x64+34,xtmpp+17,512); + transpose4_ooff(x64+36,xtmpp+18,512); + transpose4_ooff(x64+38,xtmpp+19,512); + transpose4_ooff(x64+40,xtmpp+20,512); + transpose4_ooff(x64+42,xtmpp+21,512); + transpose4_ooff(x64+44,xtmpp+22,512); + transpose4_ooff(x64+46,xtmpp+23,512); + transpose4_ooff(x64+48,xtmpp+24,512); + transpose4_ooff(x64+50,xtmpp+25,512); + transpose4_ooff(x64+52,xtmpp+26,512); + transpose4_ooff(x64+54,xtmpp+27,512); + transpose4_ooff(x64+56,xtmpp+28,512); + transpose4_ooff(x64+58,xtmpp+29,512); + transpose4_ooff(x64+60,xtmpp+30,512); + transpose4_ooff(x64+62,xtmpp+31,512); + x64+=64; + xtmpp+=32; + } + + idft1024((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+256),1); + + + for (i=0; i<256; i++) { + ibfly2(ytmpp,ytmpp+256, + y128p,y128p+256, + tw2048_128p); + tw2048_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + y128p = y128; + + for (i=0; i<32; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +#else // __AVX2__ + +void dft2048(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[256],*xtmpp,*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[256],*tw2048_256p=(simd256_q15_t *)tw2048,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + + xtmpp = xtmp; + + for (i=0; i<4; i++) { + transpose4_ooff_simd256(x256 ,xtmpp,128); + transpose4_ooff_simd256(x256+2,xtmpp+1,128); + transpose4_ooff_simd256(x256+4,xtmpp+2,128); + transpose4_ooff_simd256(x256+6,xtmpp+3,128); + transpose4_ooff_simd256(x256+8,xtmpp+4,128); + transpose4_ooff_simd256(x256+10,xtmpp+5,128); + transpose4_ooff_simd256(x256+12,xtmpp+6,128); + transpose4_ooff_simd256(x256+14,xtmpp+7,128); + transpose4_ooff_simd256(x256+16,xtmpp+8,128); + transpose4_ooff_simd256(x256+18,xtmpp+9,128); + transpose4_ooff_simd256(x256+20,xtmpp+10,128); + transpose4_ooff_simd256(x256+22,xtmpp+11,128); + transpose4_ooff_simd256(x256+24,xtmpp+12,128); + transpose4_ooff_simd256(x256+26,xtmpp+13,128); + transpose4_ooff_simd256(x256+28,xtmpp+14,128); + transpose4_ooff_simd256(x256+30,xtmpp+15,128); + transpose4_ooff_simd256(x256+32,xtmpp+16,128); + transpose4_ooff_simd256(x256+34,xtmpp+17,128); + transpose4_ooff_simd256(x256+36,xtmpp+18,128); + transpose4_ooff_simd256(x256+38,xtmpp+19,128); + transpose4_ooff_simd256(x256+40,xtmpp+20,128); + transpose4_ooff_simd256(x256+42,xtmpp+21,128); + transpose4_ooff_simd256(x256+44,xtmpp+22,128); + transpose4_ooff_simd256(x256+46,xtmpp+23,128); + transpose4_ooff_simd256(x256+48,xtmpp+24,128); + transpose4_ooff_simd256(x256+50,xtmpp+25,128); + transpose4_ooff_simd256(x256+52,xtmpp+26,128); + transpose4_ooff_simd256(x256+54,xtmpp+27,128); + transpose4_ooff_simd256(x256+56,xtmpp+28,128); + transpose4_ooff_simd256(x256+58,xtmpp+29,128); + transpose4_ooff_simd256(x256+60,xtmpp+30,128); + transpose4_ooff_simd256(x256+62,xtmpp+31,128); + x256+=64; + xtmpp+=32; + } + + dft1024((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1); + + + for (i=0; i<128; i++) { + bfly2_256(ytmpp,ytmpp+128, + y256p,y256p+128, + tw2048_256p); + tw2048_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + y256p = y256; + + for (i=0; i<16; i++) { + y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128); + y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128); + y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128); + y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128); + y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128); + y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128); + y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128); + y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128); + y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128); + y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128); + y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128); + y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128); + y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128); + y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128); + y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128); + y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128); + y256p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +void idft2048(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[256],*xtmpp,*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[256],*tw2048_256p=(simd256_q15_t *)tw2048,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<4; i++) { + transpose4_ooff_simd256(x256 ,xtmpp,128); + transpose4_ooff_simd256(x256+2,xtmpp+1,128); + transpose4_ooff_simd256(x256+4,xtmpp+2,128); + transpose4_ooff_simd256(x256+6,xtmpp+3,128); + transpose4_ooff_simd256(x256+8,xtmpp+4,128); + transpose4_ooff_simd256(x256+10,xtmpp+5,128); + transpose4_ooff_simd256(x256+12,xtmpp+6,128); + transpose4_ooff_simd256(x256+14,xtmpp+7,128); + transpose4_ooff_simd256(x256+16,xtmpp+8,128); + transpose4_ooff_simd256(x256+18,xtmpp+9,128); + transpose4_ooff_simd256(x256+20,xtmpp+10,128); + transpose4_ooff_simd256(x256+22,xtmpp+11,128); + transpose4_ooff_simd256(x256+24,xtmpp+12,128); + transpose4_ooff_simd256(x256+26,xtmpp+13,128); + transpose4_ooff_simd256(x256+28,xtmpp+14,128); + transpose4_ooff_simd256(x256+30,xtmpp+15,128); + transpose4_ooff_simd256(x256+32,xtmpp+16,128); + transpose4_ooff_simd256(x256+34,xtmpp+17,128); + transpose4_ooff_simd256(x256+36,xtmpp+18,128); + transpose4_ooff_simd256(x256+38,xtmpp+19,128); + transpose4_ooff_simd256(x256+40,xtmpp+20,128); + transpose4_ooff_simd256(x256+42,xtmpp+21,128); + transpose4_ooff_simd256(x256+44,xtmpp+22,128); + transpose4_ooff_simd256(x256+46,xtmpp+23,128); + transpose4_ooff_simd256(x256+48,xtmpp+24,128); + transpose4_ooff_simd256(x256+50,xtmpp+25,128); + transpose4_ooff_simd256(x256+52,xtmpp+26,128); + transpose4_ooff_simd256(x256+54,xtmpp+27,128); + transpose4_ooff_simd256(x256+56,xtmpp+28,128); + transpose4_ooff_simd256(x256+58,xtmpp+29,128); + transpose4_ooff_simd256(x256+60,xtmpp+30,128); + transpose4_ooff_simd256(x256+62,xtmpp+31,128); + x256+=64; + xtmpp+=32; + } + + idft1024((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1); + + + for (i=0; i<128; i++) { + ibfly2_256(ytmpp,ytmpp+128, + y256p,y256p+128, + tw2048_256p); + tw2048_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + y256p = y256; + + for (i=0; i<16; i++) { + y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128); + y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128); + y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128); + y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128); + y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128); + y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128); + y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128); + y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128); + y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128); + y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128); + y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128); + y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128); + y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128); + y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128); + y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128); + y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128); + y256p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +#endif + + + +int16_t tw4096[3*2*1024]; + +#ifndef __AVX2__ +void dft4096(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[1024],ytmp[1024],*tw4096_128p=(simd_q15_t *)tw4096,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<1024; i+=4,j++) { + transpose16_ooff(x128+i,xtmp+j,256); + } + + + dft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1); + dft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1); + dft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1); + dft1024((int16_t*)(xtmp+768),(int16_t*)(ytmp+768),1); + + for (i=0; i<256; i++) { + bfly4(ytmpp,ytmpp+256,ytmpp+512,ytmpp+768, + y128p,y128p+256,y128p+512,y128p+768, + tw4096_128p,tw4096_128p+256,tw4096_128p+512); + tw4096_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<64; i++) { + y128[0] = shiftright_int16(y128[0],1); + y128[1] = shiftright_int16(y128[1],1); + y128[2] = shiftright_int16(y128[2],1); + y128[3] = shiftright_int16(y128[3],1); + y128[4] = shiftright_int16(y128[4],1); + y128[5] = shiftright_int16(y128[5],1); + y128[6] = shiftright_int16(y128[6],1); + y128[7] = shiftright_int16(y128[7],1); + y128[8] = shiftright_int16(y128[8],1); + y128[9] = shiftright_int16(y128[9],1); + y128[10] = shiftright_int16(y128[10],1); + y128[11] = shiftright_int16(y128[11],1); + y128[12] = shiftright_int16(y128[12],1); + y128[13] = shiftright_int16(y128[13],1); + y128[14] = shiftright_int16(y128[14],1); + y128[15] = shiftright_int16(y128[15],1); + + y128+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + + + +void idft4096(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t xtmp[1024],ytmp[1024],*tw4096_128p=(simd_q15_t *)tw4096,*x128=(simd_q15_t *)x,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<1024; i+=4,j++) { + transpose16_ooff(x128+i,xtmp+j,256); + } + + + idft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1); + idft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1); + idft1024((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1); + idft1024((int16_t*)(xtmp+768),(int16_t*)(ytmp+768),1); + + for (i=0; i<256; i++) { + ibfly4(ytmpp,ytmpp+256,ytmpp+512,ytmpp+768, + y128p,y128p+256,y128p+512,y128p+768, + tw4096_128p,tw4096_128p+256,tw4096_128p+512); + tw4096_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<64; i++) { + y128[0] = shiftright_int16(y128[0],scale); + y128[1] = shiftright_int16(y128[1],scale); + y128[2] = shiftright_int16(y128[2],scale); + y128[3] = shiftright_int16(y128[3],scale); + y128[4] = shiftright_int16(y128[4],scale); + y128[5] = shiftright_int16(y128[5],scale); + y128[6] = shiftright_int16(y128[6],scale); + y128[7] = shiftright_int16(y128[7],scale); + y128[8] = shiftright_int16(y128[8],scale); + y128[9] = shiftright_int16(y128[9],scale); + y128[10] = shiftright_int16(y128[10],scale); + y128[11] = shiftright_int16(y128[11],scale); + y128[12] = shiftright_int16(y128[12],scale); + y128[13] = shiftright_int16(y128[13],scale); + y128[14] = shiftright_int16(y128[14],scale); + y128[15] = shiftright_int16(y128[15],scale); + + y128+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +#else //__AVX2__ +void dft4096(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[512],ytmp[512],*tw4096_256p=(simd256_q15_t *)tw4096,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<512; i+=4,j++) { + transpose16_ooff_simd256(x256+i,xtmp+j,128); + } + + + dft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1); + dft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1); + dft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1); + dft1024((int16_t*)(xtmp+384),(int16_t*)(ytmp+384),1); + + for (i=0; i<128; i++) { + bfly4_256(ytmpp,ytmpp+128,ytmpp+256,ytmpp+384, + y256p,y256p+128,y256p+256,y256p+384, + tw4096_256p,tw4096_256p+128,tw4096_256p+256); + tw4096_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<32; i++) { + y256[0] = shiftright_int16_simd256(y256[0],1); + y256[1] = shiftright_int16_simd256(y256[1],1); + y256[2] = shiftright_int16_simd256(y256[2],1); + y256[3] = shiftright_int16_simd256(y256[3],1); + y256[4] = shiftright_int16_simd256(y256[4],1); + y256[5] = shiftright_int16_simd256(y256[5],1); + y256[6] = shiftright_int16_simd256(y256[6],1); + y256[7] = shiftright_int16_simd256(y256[7],1); + y256[8] = shiftright_int16_simd256(y256[8],1); + y256[9] = shiftright_int16_simd256(y256[9],1); + y256[10] = shiftright_int16_simd256(y256[10],1); + y256[11] = shiftright_int16_simd256(y256[11],1); + y256[12] = shiftright_int16_simd256(y256[12],1); + y256[13] = shiftright_int16_simd256(y256[13],1); + y256[14] = shiftright_int16_simd256(y256[14],1); + y256[15] = shiftright_int16_simd256(y256[15],1); + + y256+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +void idft4096(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[512],ytmp[512],*tw4096_256p=(simd256_q15_t *)tw4096,*x256=(simd256_q15_t *)x,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i,j; + + for (i=0,j=0; i<512; i+=4,j++) { + transpose16_ooff_simd256(x256+i,xtmp+j,128); + } + + + idft1024((int16_t*)(xtmp),(int16_t*)(ytmp),1); + idft1024((int16_t*)(xtmp+128),(int16_t*)(ytmp+128),1); + idft1024((int16_t*)(xtmp+256),(int16_t*)(ytmp+256),1); + idft1024((int16_t*)(xtmp+384),(int16_t*)(ytmp+384),1); + + for (i=0; i<128; i++) { + ibfly4_256(ytmpp,ytmpp+128,ytmpp+256,ytmpp+384, + y256p,y256p+128,y256p+256,y256p+384, + tw4096_256p,tw4096_256p+128,tw4096_256p+256); + tw4096_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + + for (i=0; i<32; i++) { + y256[0] = shiftright_int16_simd256(y256[0],1); + y256[1] = shiftright_int16_simd256(y256[1],1); + y256[2] = shiftright_int16_simd256(y256[2],1); + y256[3] = shiftright_int16_simd256(y256[3],1); + y256[4] = shiftright_int16_simd256(y256[4],1); + y256[5] = shiftright_int16_simd256(y256[5],1); + y256[6] = shiftright_int16_simd256(y256[6],1); + y256[7] = shiftright_int16_simd256(y256[7],1); + y256[8] = shiftright_int16_simd256(y256[8],1); + y256[9] = shiftright_int16_simd256(y256[9],1); + y256[10] = shiftright_int16_simd256(y256[10],1); + y256[11] = shiftright_int16_simd256(y256[11],1); + y256[12] = shiftright_int16_simd256(y256[12],1); + y256[13] = shiftright_int16_simd256(y256[13],1); + y256[14] = shiftright_int16_simd256(y256[14],1); + y256[15] = shiftright_int16_simd256(y256[15],1); + + y256+=16; + } + + } + + _mm_empty(); + _m_empty(); + +} + +#endif //__AVX2__ + + +int16_t tw8192[2*4096] __attribute__((aligned(32))); + +#ifndef __AVX2__ +void dft8192(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[4096],*xtmpp,*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[1024],*tw8192_128p=(simd_q15_t *)tw8192,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<64; i++) { + transpose4_ooff(x64 ,xtmpp,2048); + transpose4_ooff(x64+2,xtmpp+1,2048); + transpose4_ooff(x64+4,xtmpp+2,2048); + transpose4_ooff(x64+6,xtmpp+3,2048); + transpose4_ooff(x64+8,xtmpp+4,2048); + transpose4_ooff(x64+10,xtmpp+5,2048); + transpose4_ooff(x64+12,xtmpp+6,2048); + transpose4_ooff(x64+14,xtmpp+7,2048); + transpose4_ooff(x64+16,xtmpp+8,2048); + transpose4_ooff(x64+18,xtmpp+9,2048); + transpose4_ooff(x64+20,xtmpp+10,2048); + transpose4_ooff(x64+22,xtmpp+11,2048); + transpose4_ooff(x64+24,xtmpp+12,2048); + transpose4_ooff(x64+26,xtmpp+13,2048); + transpose4_ooff(x64+28,xtmpp+14,2048); + transpose4_ooff(x64+30,xtmpp+15,2048); + transpose4_ooff(x64+32,xtmpp+16,2048); + transpose4_ooff(x64+34,xtmpp+17,2048); + transpose4_ooff(x64+36,xtmpp+18,2048); + transpose4_ooff(x64+38,xtmpp+19,2048); + transpose4_ooff(x64+40,xtmpp+20,2048); + transpose4_ooff(x64+42,xtmpp+21,2048); + transpose4_ooff(x64+44,xtmpp+22,2048); + transpose4_ooff(x64+46,xtmpp+23,2048); + transpose4_ooff(x64+48,xtmpp+24,2048); + transpose4_ooff(x64+50,xtmpp+25,2048); + transpose4_ooff(x64+52,xtmpp+26,2048); + transpose4_ooff(x64+54,xtmpp+27,2048); + transpose4_ooff(x64+56,xtmpp+28,2048); + transpose4_ooff(x64+58,xtmpp+29,2048); + transpose4_ooff(x64+60,xtmpp+30,2048); + transpose4_ooff(x64+62,xtmpp+31,2048); + x64+=64; + xtmpp+=32; + } + + dft4096((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+1024),1); + + + for (i=0; i<1024; i++) { + bfly2(ytmpp,ytmpp+1024, + y128p,y128p+1024, + tw8192_128p); + tw8192_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + y128p = y128; + + for (i=0; i<128; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +void idft8192(int16_t *x,int16_t *y,unsigned char scale) +{ + + simdshort_q15_t xtmp[4096],*xtmpp,*x64 = (simdshort_q15_t *)x; + simd_q15_t ytmp[2048],*tw8192_128p=(simd_q15_t *)tw8192,*y128=(simd_q15_t *)y,*y128p=(simd_q15_t *)y; + simd_q15_t *ytmpp = &ytmp[0]; + int i; + simd_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<64; i++) { + transpose4_ooff(x64 ,xtmpp,2048); + transpose4_ooff(x64+2,xtmpp+1,2048); + transpose4_ooff(x64+4,xtmpp+2,2048); + transpose4_ooff(x64+6,xtmpp+3,2048); + transpose4_ooff(x64+8,xtmpp+4,2048); + transpose4_ooff(x64+10,xtmpp+5,2048); + transpose4_ooff(x64+12,xtmpp+6,2048); + transpose4_ooff(x64+14,xtmpp+7,2048); + transpose4_ooff(x64+16,xtmpp+8,2048); + transpose4_ooff(x64+18,xtmpp+9,2048); + transpose4_ooff(x64+20,xtmpp+10,2048); + transpose4_ooff(x64+22,xtmpp+11,2048); + transpose4_ooff(x64+24,xtmpp+12,2048); + transpose4_ooff(x64+26,xtmpp+13,2048); + transpose4_ooff(x64+28,xtmpp+14,2048); + transpose4_ooff(x64+30,xtmpp+15,2048); + transpose4_ooff(x64+32,xtmpp+16,2048); + transpose4_ooff(x64+34,xtmpp+17,2048); + transpose4_ooff(x64+36,xtmpp+18,2048); + transpose4_ooff(x64+38,xtmpp+19,2048); + transpose4_ooff(x64+40,xtmpp+20,2048); + transpose4_ooff(x64+42,xtmpp+21,2048); + transpose4_ooff(x64+44,xtmpp+22,2048); + transpose4_ooff(x64+46,xtmpp+23,2048); + transpose4_ooff(x64+48,xtmpp+24,2048); + transpose4_ooff(x64+50,xtmpp+25,2048); + transpose4_ooff(x64+52,xtmpp+26,2048); + transpose4_ooff(x64+54,xtmpp+27,2048); + transpose4_ooff(x64+56,xtmpp+28,2048); + transpose4_ooff(x64+58,xtmpp+29,2048); + transpose4_ooff(x64+60,xtmpp+30,2048); + transpose4_ooff(x64+62,xtmpp+31,2048); + x64+=64; + xtmpp+=32; + } + + idft4096((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft4096((int16_t*)(xtmp+2048),(int16_t*)(ytmp+1024),1); + + + for (i=0; i<1024; i++) { + ibfly2(ytmpp,ytmpp+1024, + y128p,y128p+1024, + tw8192_128p); + tw8192_128p++; + y128p++; + ytmpp++; + } + + if (scale>0) { + y128p = y128; + + for (i=0; i<128; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT2_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT2_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT2_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT2_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT2_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT2_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT2_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT2_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT2_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT2_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT2_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT2_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT2_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT2_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT2_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT2_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +#else // __AVX2__ +void dft8192(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[1024],*xtmpp,*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[1024],*tw8192_256p=(simd256_q15_t *)tw8192,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<16; i++) { + transpose4_ooff_simd256(x256 ,xtmpp,512); + transpose4_ooff_simd256(x256+2,xtmpp+1,512); + transpose4_ooff_simd256(x256+4,xtmpp+2,512); + transpose4_ooff_simd256(x256+6,xtmpp+3,512); + transpose4_ooff_simd256(x256+8,xtmpp+4,512); + transpose4_ooff_simd256(x256+10,xtmpp+5,512); + transpose4_ooff_simd256(x256+12,xtmpp+6,512); + transpose4_ooff_simd256(x256+14,xtmpp+7,512); + transpose4_ooff_simd256(x256+16,xtmpp+8,512); + transpose4_ooff_simd256(x256+18,xtmpp+9,512); + transpose4_ooff_simd256(x256+20,xtmpp+10,512); + transpose4_ooff_simd256(x256+22,xtmpp+11,512); + transpose4_ooff_simd256(x256+24,xtmpp+12,512); + transpose4_ooff_simd256(x256+26,xtmpp+13,512); + transpose4_ooff_simd256(x256+28,xtmpp+14,512); + transpose4_ooff_simd256(x256+30,xtmpp+15,512); + transpose4_ooff_simd256(x256+32,xtmpp+16,512); + transpose4_ooff_simd256(x256+34,xtmpp+17,512); + transpose4_ooff_simd256(x256+36,xtmpp+18,512); + transpose4_ooff_simd256(x256+38,xtmpp+19,512); + transpose4_ooff_simd256(x256+40,xtmpp+20,512); + transpose4_ooff_simd256(x256+42,xtmpp+21,512); + transpose4_ooff_simd256(x256+44,xtmpp+22,512); + transpose4_ooff_simd256(x256+46,xtmpp+23,512); + transpose4_ooff_simd256(x256+48,xtmpp+24,512); + transpose4_ooff_simd256(x256+50,xtmpp+25,512); + transpose4_ooff_simd256(x256+52,xtmpp+26,512); + transpose4_ooff_simd256(x256+54,xtmpp+27,512); + transpose4_ooff_simd256(x256+56,xtmpp+28,512); + transpose4_ooff_simd256(x256+58,xtmpp+29,512); + transpose4_ooff_simd256(x256+60,xtmpp+30,512); + transpose4_ooff_simd256(x256+62,xtmpp+31,512); + x256+=64; + xtmpp+=32; + } + + dft4096((int16_t*)(xtmp),(int16_t*)ytmp,1); + dft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1); + + + for (i=0; i<512; i++) { + bfly2_256(ytmpp,ytmpp+512, + y256p,y256p+512, + tw8192_256p); + tw8192_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + y256p = y256; + + for (i=0; i<64; i++) { + y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128); + y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128); + y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128); + y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128); + y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128); + y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128); + y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128); + y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128); + y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128); + y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128); + y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128); + y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128); + y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128); + y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128); + y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128); + y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128); + y256p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +void idft8192(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd256_q15_t xtmp[1024],*xtmpp,*x256 = (simd256_q15_t *)x; + simd256_q15_t ytmp[1024],*tw8192_256p=(simd256_q15_t *)tw8192,*y256=(simd256_q15_t *)y,*y256p=(simd256_q15_t *)y; + simd256_q15_t *ytmpp = &ytmp[0]; + int i; + simd256_q15_t ONE_OVER_SQRT2_Q15_128 = set1_int16_simd256(ONE_OVER_SQRT2_Q15); + + xtmpp = xtmp; + + for (i=0; i<16; i++) { + transpose4_ooff_simd256(x256 ,xtmpp,512); + transpose4_ooff_simd256(x256+2,xtmpp+1,512); + transpose4_ooff_simd256(x256+4,xtmpp+2,512); + transpose4_ooff_simd256(x256+6,xtmpp+3,512); + transpose4_ooff_simd256(x256+8,xtmpp+4,512); + transpose4_ooff_simd256(x256+10,xtmpp+5,512); + transpose4_ooff_simd256(x256+12,xtmpp+6,512); + transpose4_ooff_simd256(x256+14,xtmpp+7,512); + transpose4_ooff_simd256(x256+16,xtmpp+8,512); + transpose4_ooff_simd256(x256+18,xtmpp+9,512); + transpose4_ooff_simd256(x256+20,xtmpp+10,512); + transpose4_ooff_simd256(x256+22,xtmpp+11,512); + transpose4_ooff_simd256(x256+24,xtmpp+12,512); + transpose4_ooff_simd256(x256+26,xtmpp+13,512); + transpose4_ooff_simd256(x256+28,xtmpp+14,512); + transpose4_ooff_simd256(x256+30,xtmpp+15,512); + transpose4_ooff_simd256(x256+32,xtmpp+16,512); + transpose4_ooff_simd256(x256+34,xtmpp+17,512); + transpose4_ooff_simd256(x256+36,xtmpp+18,512); + transpose4_ooff_simd256(x256+38,xtmpp+19,512); + transpose4_ooff_simd256(x256+40,xtmpp+20,512); + transpose4_ooff_simd256(x256+42,xtmpp+21,512); + transpose4_ooff_simd256(x256+44,xtmpp+22,512); + transpose4_ooff_simd256(x256+46,xtmpp+23,512); + transpose4_ooff_simd256(x256+48,xtmpp+24,512); + transpose4_ooff_simd256(x256+50,xtmpp+25,512); + transpose4_ooff_simd256(x256+52,xtmpp+26,512); + transpose4_ooff_simd256(x256+54,xtmpp+27,512); + transpose4_ooff_simd256(x256+56,xtmpp+28,512); + transpose4_ooff_simd256(x256+58,xtmpp+29,512); + transpose4_ooff_simd256(x256+60,xtmpp+30,512); + transpose4_ooff_simd256(x256+62,xtmpp+31,512); + x256+=64; + xtmpp+=32; + } + + idft4096((int16_t*)(xtmp),(int16_t*)ytmp,1); + idft4096((int16_t*)(xtmp+512),(int16_t*)(ytmp+512),1); + + + for (i=0; i<512; i++) { + ibfly2_256(ytmpp,ytmpp+512, + y256p,y256p+512, + tw8192_256p); + tw8192_256p++; + y256p++; + ytmpp++; + } + + if (scale>0) { + y256p = y256; + + for (i=0; i<64; i++) { + y256p[0] = mulhi_int16_simd256(y256p[0],ONE_OVER_SQRT2_Q15_128); + y256p[1] = mulhi_int16_simd256(y256p[1],ONE_OVER_SQRT2_Q15_128); + y256p[2] = mulhi_int16_simd256(y256p[2],ONE_OVER_SQRT2_Q15_128); + y256p[3] = mulhi_int16_simd256(y256p[3],ONE_OVER_SQRT2_Q15_128); + y256p[4] = mulhi_int16_simd256(y256p[4],ONE_OVER_SQRT2_Q15_128); + y256p[5] = mulhi_int16_simd256(y256p[5],ONE_OVER_SQRT2_Q15_128); + y256p[6] = mulhi_int16_simd256(y256p[6],ONE_OVER_SQRT2_Q15_128); + y256p[7] = mulhi_int16_simd256(y256p[7],ONE_OVER_SQRT2_Q15_128); + y256p[8] = mulhi_int16_simd256(y256p[8],ONE_OVER_SQRT2_Q15_128); + y256p[9] = mulhi_int16_simd256(y256p[9],ONE_OVER_SQRT2_Q15_128); + y256p[10] = mulhi_int16_simd256(y256p[10],ONE_OVER_SQRT2_Q15_128); + y256p[11] = mulhi_int16_simd256(y256p[11],ONE_OVER_SQRT2_Q15_128); + y256p[12] = mulhi_int16_simd256(y256p[12],ONE_OVER_SQRT2_Q15_128); + y256p[13] = mulhi_int16_simd256(y256p[13],ONE_OVER_SQRT2_Q15_128); + y256p[14] = mulhi_int16_simd256(y256p[14],ONE_OVER_SQRT2_Q15_128); + y256p[15] = mulhi_int16_simd256(y256p[15],ONE_OVER_SQRT2_Q15_128); + y256p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + + +#endif + +int16_t twa1536[1024],twb1536[1024]; + +// 512 x 3 +void idft1536(int16_t *input, int16_t *output, unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][512 ]__attribute__((aligned(32))); + uint32_t tmpo[3][512] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<512; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + idft512((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + idft512((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + idft512((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + + for (i=0,i2=0; i<1024; i+=8,i2+=4) { + ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+1024+i),(simd_q15_t*)(output+2048+i), + (simd_q15_t*)(twa1536+i),(simd_q15_t*)(twb1536+i)); + } + + + if (scale==1) { + for (i=0; i<24; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +void dft1536(int16_t *input, int16_t *output, unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][512] __attribute__((aligned(32))); + uint32_t tmpo[3][512] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<512; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + dft512((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + dft512((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + dft512((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + + /* + for (i=1; i<512; i++) { + tmpo[0][i] = tmpo[0][i<<1]; + tmpo[1][i] = tmpo[1][i<<1]; + tmpo[2][i] = tmpo[2][i<<1]; + }*/ + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft1536out0.m","o0",tmpo[0],2048,1,1); + LOG_M("dft1536out1.m","o1",tmpo[1],2048,1,1); + LOG_M("dft1536out2.m","o2",tmpo[2],2048,1,1); + } + for (i=0,i2=0; i<1024; i+=8,i2+=4) { + bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+1024+i),(simd_q15_t*)(output+2048+i), + (simd_q15_t*)(twa1536+i),(simd_q15_t*)(twb1536+i)); + } + + if (scale==1) { + for (i=0; i<24; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + +int16_t twa3072[2048] __attribute__((aligned(32))); +int16_t twb3072[2048] __attribute__((aligned(32))); +// 1024 x 3 +void dft3072(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][1024] __attribute__((aligned(32))); + uint32_t tmpo[3][1024] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<1024; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + dft1024((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + dft1024((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + dft1024((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + + for (i=0,i2=0; i<2048; i+=8,i2+=4) { + bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+2048+i),(simd_q15_t*)(output+4096+i), + (simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i)); + } + + if (scale==1) { + for (i=0; i<48; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); +} + +void idft3072(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][1024]__attribute__((aligned(32))); + uint32_t tmpo[3][1024] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<1024; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + idft1024((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + idft1024((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + idft1024((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + + for (i=0,i2=0; i<2048; i+=8,i2+=4) { + ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+2048+i),(simd_q15_t*)(output+4096+i), + (simd_q15_t*)(twa3072+i),(simd_q15_t*)(twb3072+i)); + } + + + if (scale==1) { + for (i=0; i<48; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); +} + + +int16_t twa6144[4096] __attribute__((aligned(32))); +int16_t twb6144[4096] __attribute__((aligned(32))); + +void idft6144(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][2048] __attribute__((aligned(32))); + uint32_t tmpo[3][2048] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<2048; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + idft2048((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + idft2048((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + idft2048((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("idft6144in.m","in",input,6144,1,1); + LOG_M("idft6144out0.m","o0",tmpo[0],2048,1,1); + LOG_M("idft6144out1.m","o1",tmpo[1],2048,1,1); + LOG_M("idft6144out2.m","o2",tmpo[2],2048,1,1); + } + + for (i=0,i2=0; i<4096; i+=8,i2+=4) { + ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+4096+i),(simd_q15_t*)(output+8192+i), + (simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i)); + } + + + if (scale==1) { + for (i=0; i<96; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + + _mm_empty(); + _m_empty(); + +} + + +void dft6144(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][2048] __attribute__((aligned(32))); + uint32_t tmpo[3][2048] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<2048; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + dft2048((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + dft2048((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + dft2048((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + + /* + for (i=1; i<2048; i++) { + tmpo[0][i] = tmpo[0][i<<1]; + tmpo[1][i] = tmpo[1][i<<1]; + tmpo[2][i] = tmpo[2][i<<1]; + }*/ + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("ft6144out0.m","o0",tmpo[0],2048,1,1); + LOG_M("ft6144out1.m","o1",tmpo[1],2048,1,1); + LOG_M("ft6144out2.m","o2",tmpo[2],2048,1,1); + } + for (i=0,i2=0; i<4096; i+=8,i2+=4) { + bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+4096+i),(simd_q15_t*)(output+8192+i), + (simd_q15_t*)(twa6144+i),(simd_q15_t*)(twb6144+i)); + } + + if (scale==1) { + for (i=0; i<96; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + _mm_empty(); + _m_empty(); + +} + +int16_t twa9216[6144] __attribute__((aligned(32))); +int16_t twb9216[6144] __attribute__((aligned(32))); +// 3072 x 3 +void dft9216(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + +void idft9216(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + +int16_t twa12288[8192] __attribute__((aligned(32))); +int16_t twb12288[8192] __attribute__((aligned(32))); +// 4096 x 3 +void dft12288(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][4096] __attribute__((aligned(32))); + uint32_t tmpo[3][4096] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<4096; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + dft4096((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale); + dft4096((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale); + dft4096((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale); + /* + for (i=1; i<4096; i++) { + tmpo[0][i] = tmpo[0][i<<1]; + tmpo[1][i] = tmpo[1][i<<1]; + tmpo[2][i] = tmpo[2][i<<1]; + }*/ + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft12288out0.m","o0",tmpo[0],4096,1,1); + LOG_M("dft12288out1.m","o1",tmpo[1],4096,1,1); + LOG_M("dft12288out2.m","o2",tmpo[2],4096,1,1); + } + for (i=0,i2=0; i<8192; i+=8,i2+=4) { + bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+8192+i),(simd_q15_t*)(output+16384+i), + (simd_q15_t*)(twa12288+i),(simd_q15_t*)(twb12288+i)); + } + + if (scale==1) { + for (i=0; i<192; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + _mm_empty(); + _m_empty(); + +} + +void idft12288(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][4096] __attribute__((aligned(32))); + uint32_t tmpo[3][4096] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<4096; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + + + idft4096((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale); + idft4096((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale); + idft4096((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale); + + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("idft12288in.m","in",input,12288,1,1); + LOG_M("idft12288out0.m","o0",tmpo[0],4096,1,1); + LOG_M("idft12288out1.m","o1",tmpo[1],4096,1,1); + LOG_M("idft12288out2.m","o2",tmpo[2],4096,1,1); + } + + for (i=0,i2=0; i<8192; i+=8,i2+=4) { + ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+8192+i),(simd_q15_t*)(output+16384+i), + (simd_q15_t*)(twa12288+i),(simd_q15_t*)(twb12288+i)); + } + + if (scale==1) { + for (i=0; i<192; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + _mm_empty(); + _m_empty(); + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("idft12288out.m","out",output,6144,1,1); + } +} + +int16_t twa18432[12288] __attribute__((aligned(32))); +int16_t twb18432[12288] __attribute__((aligned(32))); +// 6144 x 3 +void dft18432(int16_t *input, int16_t *output,unsigned char scale) { + + int i,i2,j; + uint32_t tmp[3][6144] __attribute__((aligned(32))); + uint32_t tmpo[3][6144] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<6144; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + dft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale); + dft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale); + dft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale); + + for (i=0,i2=0; i<12288; i+=8,i2+=4) { + bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+12288+i),(simd_q15_t*)(output+24576+i), + (simd_q15_t*)(twa18432+i),(simd_q15_t*)(twb18432+i)); + } + if (scale==1) { + for (i=0; i<288; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + _mm_empty(); + _m_empty(); +} + +void idft18432(int16_t *input, int16_t *output,unsigned char scale) { + + int i,i2,j; + uint32_t tmp[3][6144] __attribute__((aligned(32))); + uint32_t tmpo[3][6144] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<6144; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + idft6144((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),scale); + idft6144((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),scale); + idft6144((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),scale); + + for (i=0,i2=0; i<12288; i+=8,i2+=4) { + ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+12288+i),(simd_q15_t*)(output+24576+i), + (simd_q15_t*)(twa18432+i),(simd_q15_t*)(twb18432+i)); + } + if (scale==1) { + for (i=0; i<288; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + _mm_empty(); + _m_empty(); +} + + +int16_t twa24576[16384] __attribute__((aligned(32))); +int16_t twb24576[16384] __attribute__((aligned(32))); +// 8192 x 3 +void dft24576(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][8192] __attribute__((aligned(32))); + uint32_t tmpo[3][8192] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<8192; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + dft8192((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + dft8192((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + dft8192((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + /* + for (i=1; i<8192; i++) { + tmpo[0][i] = tmpo[0][i<<1]; + tmpo[1][i] = tmpo[1][i<<1]; + tmpo[2][i] = tmpo[2][i<<1]; + }*/ + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("dft24576out0.m","o0",tmpo[0],8192,1,1); + LOG_M("dft24576out1.m","o1",tmpo[1],8192,1,1); + LOG_M("dft24576out2.m","o2",tmpo[2],8192,1,1); + } + for (i=0,i2=0; i<16384; i+=8,i2+=4) { + bfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),(simd_q15_t*)(&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+16384+i),(simd_q15_t*)(output+32768+i), + (simd_q15_t*)(twa24576+i),(simd_q15_t*)(twb24576+i)); + } + + + if (scale==1) { + for (i=0; i<384; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + _mm_empty(); + _m_empty(); + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("out.m","out",output,24576,1,1); + } +} + +void idft24576(int16_t *input, int16_t *output,unsigned char scale) +{ + int i,i2,j; + uint32_t tmp[3][8192] __attribute__((aligned(32))); + uint32_t tmpo[3][8192] __attribute__((aligned(32))); + simd_q15_t *y128p=(simd_q15_t*)output; + simd_q15_t ONE_OVER_SQRT3_Q15_128 = set1_int16(ONE_OVER_SQRT3_Q15); + + for (i=0,j=0; i<8192; i++) { + tmp[0][i] = ((uint32_t *)input)[j++]; + tmp[1][i] = ((uint32_t *)input)[j++]; + tmp[2][i] = ((uint32_t *)input)[j++]; + } + + idft8192((int16_t*)(tmp[0]),(int16_t*)(tmpo[0]),1); + idft8192((int16_t*)(tmp[1]),(int16_t*)(tmpo[1]),1); + idft8192((int16_t*)(tmp[2]),(int16_t*)(tmpo[2]),1); + + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("idft24576in.m","in",input,24576,1,1); + LOG_M("idft24576out0.m","o0",tmpo[0],8192,1,1); + LOG_M("idft24576out1.m","o1",tmpo[1],8192,1,1); + LOG_M("idft24576out2.m","o2",tmpo[2],8192,1,1); + } + + for (i=0,i2=0; i<16384; i+=8,i2+=4) { + ibfly3((simd_q15_t*)(&tmpo[0][i2]),(simd_q15_t*)(&tmpo[1][i2]),((simd_q15_t*)&tmpo[2][i2]), + (simd_q15_t*)(output+i),(simd_q15_t*)(output+16384+i),(simd_q15_t*)(output+32768+i), + (simd_q15_t*)(twa24576+i),(simd_q15_t*)(twb24576+i)); + } + if (scale==1) { + for (i=0; i<384; i++) { + y128p[0] = mulhi_int16(y128p[0],ONE_OVER_SQRT3_Q15_128); + y128p[1] = mulhi_int16(y128p[1],ONE_OVER_SQRT3_Q15_128); + y128p[2] = mulhi_int16(y128p[2],ONE_OVER_SQRT3_Q15_128); + y128p[3] = mulhi_int16(y128p[3],ONE_OVER_SQRT3_Q15_128); + y128p[4] = mulhi_int16(y128p[4],ONE_OVER_SQRT3_Q15_128); + y128p[5] = mulhi_int16(y128p[5],ONE_OVER_SQRT3_Q15_128); + y128p[6] = mulhi_int16(y128p[6],ONE_OVER_SQRT3_Q15_128); + y128p[7] = mulhi_int16(y128p[7],ONE_OVER_SQRT3_Q15_128); + y128p[8] = mulhi_int16(y128p[8],ONE_OVER_SQRT3_Q15_128); + y128p[9] = mulhi_int16(y128p[9],ONE_OVER_SQRT3_Q15_128); + y128p[10] = mulhi_int16(y128p[10],ONE_OVER_SQRT3_Q15_128); + y128p[11] = mulhi_int16(y128p[11],ONE_OVER_SQRT3_Q15_128); + y128p[12] = mulhi_int16(y128p[12],ONE_OVER_SQRT3_Q15_128); + y128p[13] = mulhi_int16(y128p[13],ONE_OVER_SQRT3_Q15_128); + y128p[14] = mulhi_int16(y128p[14],ONE_OVER_SQRT3_Q15_128); + y128p[15] = mulhi_int16(y128p[15],ONE_OVER_SQRT3_Q15_128); + y128p+=16; + } + } + _mm_empty(); + _m_empty(); + + if (LOG_DUMPFLAG(DEBUG_DFT)) { + LOG_M("idft24576out.m","out",output,24576,1,1); + } +} + +int16_t twa36864[24576] __attribute__((aligned(32))); +int16_t twb36884[24576] __attribute__((aligned(32))); +// 12288 x 3 +void dft36864(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} +void idft36864(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + +int16_t twa49152[32768] __attribute__((aligned(32))); +int16_t twb49152[32768] __attribute__((aligned(32))); +// 16384 x 3 +void dft49152(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + +void idft49152(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + +int16_t twa73728[49152] __attribute__((aligned(32))); +int16_t twb73728[49152] __attribute__((aligned(32))); +// 24576 x 3 +void dft73728(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + +void idft73728(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + + +int16_t twa98304[49152] __attribute__((aligned(32))); +int16_t twb98304[49152] __attribute__((aligned(32))); +// 32768 x 3 +void dft98304(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + +void idft98304(int16_t *input, int16_t *output,uint8_t scale) { + + AssertFatal(1==0,"Need to do this ..\n"); +} + + +/// THIS SECTION IS FOR ALL PUSCH DFTS (i.e. radix 2^a * 3^b * 4^c * 5^d) +/// They use twiddles for 4-way parallel DFTS (i.e. 4 DFTS with interleaved input/output) + +static int16_t W1_12s[8]__attribute__((aligned(32))) = {28377,-16383,28377,-16383,28377,-16383,28377,-16383}; +static int16_t W2_12s[8]__attribute__((aligned(32))) = {16383,-28377,16383,-28377,16383,-28377,16383,-28377}; +static int16_t W3_12s[8]__attribute__((aligned(32))) = {0,-32767,0,-32767,0,-32767,0,-32767}; +static int16_t W4_12s[8]__attribute__((aligned(32))) = {-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377}; +static int16_t W6_12s[8]__attribute__((aligned(32))) = {-32767,0,-32767,0,-32767,0,-32767,0}; + +simd_q15_t *W1_12=(simd_q15_t *)W1_12s; +simd_q15_t *W2_12=(simd_q15_t *)W2_12s; +simd_q15_t *W3_12=(simd_q15_t *)W3_12s; +simd_q15_t *W4_12=(simd_q15_t *)W4_12s; +simd_q15_t *W6_12=(simd_q15_t *)W6_12s; + + +static simd_q15_t norm128; + +static inline void dft12f(simd_q15_t *x0, + simd_q15_t *x1, + simd_q15_t *x2, + simd_q15_t *x3, + simd_q15_t *x4, + simd_q15_t *x5, + simd_q15_t *x6, + simd_q15_t *x7, + simd_q15_t *x8, + simd_q15_t *x9, + simd_q15_t *x10, + simd_q15_t *x11, + simd_q15_t *y0, + simd_q15_t *y1, + simd_q15_t *y2, + simd_q15_t *y3, + simd_q15_t *y4, + simd_q15_t *y5, + simd_q15_t *y6, + simd_q15_t *y7, + simd_q15_t *y8, + simd_q15_t *y9, + simd_q15_t *y10, + simd_q15_t *y11) __attribute__((always_inline)); + +static inline void dft12f(simd_q15_t *x0, + simd_q15_t *x1, + simd_q15_t *x2, + simd_q15_t *x3, + simd_q15_t *x4, + simd_q15_t *x5, + simd_q15_t *x6, + simd_q15_t *x7, + simd_q15_t *x8, + simd_q15_t *x9, + simd_q15_t *x10, + simd_q15_t *x11, + simd_q15_t *y0, + simd_q15_t *y1, + simd_q15_t *y2, + simd_q15_t *y3, + simd_q15_t *y4, + simd_q15_t *y5, + simd_q15_t *y6, + simd_q15_t *y7, + simd_q15_t *y8, + simd_q15_t *y9, + simd_q15_t *y10, + simd_q15_t *y11) +{ + + + simd_q15_t tmp_dft12[12]; + + simd_q15_t *tmp_dft12_ptr = &tmp_dft12[0]; + + // msg("dft12\n"); + + bfly4_tw1(x0, + x3, + x6, + x9, + tmp_dft12_ptr, + tmp_dft12_ptr+3, + tmp_dft12_ptr+6, + tmp_dft12_ptr+9); + + + bfly4_tw1(x1, + x4, + x7, + x10, + tmp_dft12_ptr+1, + tmp_dft12_ptr+4, + tmp_dft12_ptr+7, + tmp_dft12_ptr+10); + + + bfly4_tw1(x2, + x5, + x8, + x11, + tmp_dft12_ptr+2, + tmp_dft12_ptr+5, + tmp_dft12_ptr+8, + tmp_dft12_ptr+11); + + // k2=0; + bfly3_tw1(tmp_dft12_ptr, + tmp_dft12_ptr+1, + tmp_dft12_ptr+2, + y0, + y4, + y8); + + + + // k2=1; + bfly3(tmp_dft12_ptr+3, + tmp_dft12_ptr+4, + tmp_dft12_ptr+5, + y1, + y5, + y9, + W1_12, + W2_12); + + + + // k2=2; + bfly3(tmp_dft12_ptr+6, + tmp_dft12_ptr+7, + tmp_dft12_ptr+8, + y2, + y6, + y10, + W2_12, + W4_12); + + // k2=3; + bfly3(tmp_dft12_ptr+9, + tmp_dft12_ptr+10, + tmp_dft12_ptr+11, + y3, + y7, + y11, + W3_12, + W6_12); + +} + + + + +void dft12(int16_t *x,int16_t *y ,unsigned char scale_flag) +{ + + simd_q15_t *x128 = (simd_q15_t *)x,*y128 = (simd_q15_t *)y; + dft12f(&x128[0], + &x128[1], + &x128[2], + &x128[3], + &x128[4], + &x128[5], + &x128[6], + &x128[7], + &x128[8], + &x128[9], + &x128[10], + &x128[11], + &y128[0], + &y128[1], + &y128[2], + &y128[3], + &y128[4], + &y128[5], + &y128[6], + &y128[7], + &y128[8], + &y128[9], + &y128[10], + &y128[11]); + + _mm_empty(); + _m_empty(); + +} + +#ifdef __AVX2__ + +static int16_t W1_12s_256[16]__attribute__((aligned(32))) = {28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383,28377,-16383}; +static int16_t W2_12s_256[16]__attribute__((aligned(32))) = {16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377,16383,-28377}; +static int16_t W3_12s_256[16]__attribute__((aligned(32))) = {0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767}; +static int16_t W4_12s_256[16]__attribute__((aligned(32))) = {-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377,-16383,-28377}; +static int16_t W6_12s_256[16]__attribute__((aligned(32))) = {-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0,-32767,0}; + +simd256_q15_t *W1_12_256=(simd256_q15_t *)W1_12s_256; +simd256_q15_t *W2_12_256=(simd256_q15_t *)W2_12s_256; +simd256_q15_t *W3_12_256=(simd256_q15_t *)W3_12s_256; +simd256_q15_t *W4_12_256=(simd256_q15_t *)W4_12s_256; +simd256_q15_t *W6_12_256=(simd256_q15_t *)W6_12s_256; + + + +static inline void dft12f_simd256(simd256_q15_t *x0, + simd256_q15_t *x1, + simd256_q15_t *x2, + simd256_q15_t *x3, + simd256_q15_t *x4, + simd256_q15_t *x5, + simd256_q15_t *x6, + simd256_q15_t *x7, + simd256_q15_t *x8, + simd256_q15_t *x9, + simd256_q15_t *x10, + simd256_q15_t *x11, + simd256_q15_t *y0, + simd256_q15_t *y1, + simd256_q15_t *y2, + simd256_q15_t *y3, + simd256_q15_t *y4, + simd256_q15_t *y5, + simd256_q15_t *y6, + simd256_q15_t *y7, + simd256_q15_t *y8, + simd256_q15_t *y9, + simd256_q15_t *y10, + simd256_q15_t *y11) __attribute__((always_inline)); + +static inline void dft12f_simd256(simd256_q15_t *x0, + simd256_q15_t *x1, + simd256_q15_t *x2, + simd256_q15_t *x3, + simd256_q15_t *x4, + simd256_q15_t *x5, + simd256_q15_t *x6, + simd256_q15_t *x7, + simd256_q15_t *x8, + simd256_q15_t *x9, + simd256_q15_t *x10, + simd256_q15_t *x11, + simd256_q15_t *y0, + simd256_q15_t *y1, + simd256_q15_t *y2, + simd256_q15_t *y3, + simd256_q15_t *y4, + simd256_q15_t *y5, + simd256_q15_t *y6, + simd256_q15_t *y7, + simd256_q15_t *y8, + simd256_q15_t *y9, + simd256_q15_t *y10, + simd256_q15_t *y11) +{ + + + simd256_q15_t tmp_dft12[12]; + + simd256_q15_t *tmp_dft12_ptr = &tmp_dft12[0]; + + // msg("dft12\n"); + + bfly4_tw1_256(x0, + x3, + x6, + x9, + tmp_dft12_ptr, + tmp_dft12_ptr+3, + tmp_dft12_ptr+6, + tmp_dft12_ptr+9); + + + bfly4_tw1_256(x1, + x4, + x7, + x10, + tmp_dft12_ptr+1, + tmp_dft12_ptr+4, + tmp_dft12_ptr+7, + tmp_dft12_ptr+10); + + + bfly4_tw1_256(x2, + x5, + x8, + x11, + tmp_dft12_ptr+2, + tmp_dft12_ptr+5, + tmp_dft12_ptr+8, + tmp_dft12_ptr+11); + + // k2=0; + bfly3_tw1_256(tmp_dft12_ptr, + tmp_dft12_ptr+1, + tmp_dft12_ptr+2, + y0, + y4, + y8); + + + + // k2=1; + bfly3_256(tmp_dft12_ptr+3, + tmp_dft12_ptr+4, + tmp_dft12_ptr+5, + y1, + y5, + y9, + W1_12_256, + W2_12_256); + + + + // k2=2; + bfly3_256(tmp_dft12_ptr+6, + tmp_dft12_ptr+7, + tmp_dft12_ptr+8, + y2, + y6, + y10, + W2_12_256, + W4_12_256); + + // k2=3; + bfly3_256(tmp_dft12_ptr+9, + tmp_dft12_ptr+10, + tmp_dft12_ptr+11, + y3, + y7, + y11, + W3_12_256, + W6_12_256); + +} + + + + +void dft12_simd256(int16_t *x,int16_t *y) +{ + + simd256_q15_t *x256 = (simd256_q15_t *)x,*y256 = (simd256_q15_t *)y; + dft12f_simd256(&x256[0], + &x256[1], + &x256[2], + &x256[3], + &x256[4], + &x256[5], + &x256[6], + &x256[7], + &x256[8], + &x256[9], + &x256[10], + &x256[11], + &y256[0], + &y256[1], + &y256[2], + &y256[3], + &y256[4], + &y256[5], + &y256[6], + &y256[7], + &y256[8], + &y256[9], + &y256[10], + &y256[11]); + + _mm_empty(); + _m_empty(); + +} + +#endif + +static int16_t tw24[88]__attribute__((aligned(32))); + +void dft24(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *tw128=(simd_q15_t *)&tw24[0]; + simd_q15_t ytmp128[24];//=&ytmp128array[0]; + int i,j,k; + + // msg("dft24\n"); + dft12f(x128, + x128+2, + x128+4, + x128+6, + x128+8, + x128+10, + x128+12, + x128+14, + x128+16, + x128+18, + x128+20, + x128+22, + ytmp128, + ytmp128+2, + ytmp128+4, + ytmp128+6, + ytmp128+8, + ytmp128+10, + ytmp128+12, + ytmp128+14, + ytmp128+16, + ytmp128+18, + ytmp128+20, + ytmp128+22); + // msg("dft24b\n"); + + dft12f(x128+1, + x128+3, + x128+5, + x128+7, + x128+9, + x128+11, + x128+13, + x128+15, + x128+17, + x128+19, + x128+21, + x128+23, + ytmp128+1, + ytmp128+3, + ytmp128+5, + ytmp128+7, + ytmp128+9, + ytmp128+11, + ytmp128+13, + ytmp128+15, + ytmp128+17, + ytmp128+19, + ytmp128+21, + ytmp128+23); + + // msg("dft24c\n"); + + bfly2_tw1(ytmp128, + ytmp128+1, + y128, + y128+12); + + // msg("dft24d\n"); + + for (i=2,j=1,k=0; i<24; i+=2,j++,k++) { + + bfly2(ytmp128+i, + ytmp128+i+1, + y128+j, + y128+j+12, + tw128+k); + // msg("dft24e\n"); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[1]); + + for (i=0; i<24; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa36[88]__attribute__((aligned(32))); +static int16_t twb36[88]__attribute__((aligned(32))); + +void dft36(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa36[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb36[0]; + simd_q15_t ytmp128[36];//&ytmp128array[0]; + + + int i,j,k; + + dft12f(x128, + x128+3, + x128+6, + x128+9, + x128+12, + x128+15, + x128+18, + x128+21, + x128+24, + x128+27, + x128+30, + x128+33, + ytmp128, + ytmp128+3, + ytmp128+6, + ytmp128+9, + ytmp128+12, + ytmp128+15, + ytmp128+18, + ytmp128+21, + ytmp128+24, + ytmp128+27, + ytmp128+30, + ytmp128+33); + + dft12f(x128+1, + x128+4, + x128+7, + x128+10, + x128+13, + x128+16, + x128+19, + x128+22, + x128+25, + x128+28, + x128+31, + x128+34, + ytmp128+1, + ytmp128+4, + ytmp128+7, + ytmp128+10, + ytmp128+13, + ytmp128+16, + ytmp128+19, + ytmp128+22, + ytmp128+25, + ytmp128+28, + ytmp128+31, + ytmp128+34); + + dft12f(x128+2, + x128+5, + x128+8, + x128+11, + x128+14, + x128+17, + x128+20, + x128+23, + x128+26, + x128+29, + x128+32, + x128+35, + ytmp128+2, + ytmp128+5, + ytmp128+8, + ytmp128+11, + ytmp128+14, + ytmp128+17, + ytmp128+20, + ytmp128+23, + ytmp128+26, + ytmp128+29, + ytmp128+32, + ytmp128+35); + + + bfly3_tw1(ytmp128, + ytmp128+1, + ytmp128+2, + y128, + y128+12, + y128+24); + + for (i=3,j=1,k=0; i<36; i+=3,j++,k++) { + + bfly3(ytmp128+i, + ytmp128+i+1, + ytmp128+i+2, + y128+j, + y128+j+12, + y128+j+24, + twa128+k, + twb128+k); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[2]); + + for (i=0; i<36; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa48[88]__attribute__((aligned(32))); +static int16_t twb48[88]__attribute__((aligned(32))); +static int16_t twc48[88]__attribute__((aligned(32))); + +void dft48(int16_t *x, int16_t *y,unsigned char scale_flag) +{ + + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa48[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb48[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc48[0]; + simd_q15_t ytmp128[48];//=&ytmp128array[0]; + int i,j,k; + + + dft12f(x128, + x128+4, + x128+8, + x128+12, + x128+16, + x128+20, + x128+24, + x128+28, + x128+32, + x128+36, + x128+40, + x128+44, + ytmp128, + ytmp128+4, + ytmp128+8, + ytmp128+12, + ytmp128+16, + ytmp128+20, + ytmp128+24, + ytmp128+28, + ytmp128+32, + ytmp128+36, + ytmp128+40, + ytmp128+44); + + + dft12f(x128+1, + x128+5, + x128+9, + x128+13, + x128+17, + x128+21, + x128+25, + x128+29, + x128+33, + x128+37, + x128+41, + x128+45, + ytmp128+1, + ytmp128+5, + ytmp128+9, + ytmp128+13, + ytmp128+17, + ytmp128+21, + ytmp128+25, + ytmp128+29, + ytmp128+33, + ytmp128+37, + ytmp128+41, + ytmp128+45); + + + dft12f(x128+2, + x128+6, + x128+10, + x128+14, + x128+18, + x128+22, + x128+26, + x128+30, + x128+34, + x128+38, + x128+42, + x128+46, + ytmp128+2, + ytmp128+6, + ytmp128+10, + ytmp128+14, + ytmp128+18, + ytmp128+22, + ytmp128+26, + ytmp128+30, + ytmp128+34, + ytmp128+38, + ytmp128+42, + ytmp128+46); + + + dft12f(x128+3, + x128+7, + x128+11, + x128+15, + x128+19, + x128+23, + x128+27, + x128+31, + x128+35, + x128+39, + x128+43, + x128+47, + ytmp128+3, + ytmp128+7, + ytmp128+11, + ytmp128+15, + ytmp128+19, + ytmp128+23, + ytmp128+27, + ytmp128+31, + ytmp128+35, + ytmp128+39, + ytmp128+43, + ytmp128+47); + + + + bfly4_tw1(ytmp128, + ytmp128+1, + ytmp128+2, + ytmp128+3, + y128, + y128+12, + y128+24, + y128+36); + + + + for (i=4,j=1,k=0; i<48; i+=4,j++,k++) { + + bfly4(ytmp128+i, + ytmp128+i+1, + ytmp128+i+2, + ytmp128+i+3, + y128+j, + y128+j+12, + y128+j+24, + y128+j+36, + twa128+k, + twb128+k, + twc128+k); + + } + + if (scale_flag == 1) { + norm128 = set1_int16(dft_norm_table[3]); + + for (i=0; i<48; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa60[88]__attribute__((aligned(32))); +static int16_t twb60[88]__attribute__((aligned(32))); +static int16_t twc60[88]__attribute__((aligned(32))); +static int16_t twd60[88]__attribute__((aligned(32))); + +void dft60(int16_t *x,int16_t *y,unsigned char scale) +{ + + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa60[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb60[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc60[0]; + simd_q15_t *twd128=(simd_q15_t *)&twd60[0]; + simd_q15_t ytmp128[60];//=&ytmp128array[0]; + int i,j,k; + + dft12f(x128, + x128+5, + x128+10, + x128+15, + x128+20, + x128+25, + x128+30, + x128+35, + x128+40, + x128+45, + x128+50, + x128+55, + ytmp128, + ytmp128+5, + ytmp128+10, + ytmp128+15, + ytmp128+20, + ytmp128+25, + ytmp128+30, + ytmp128+35, + ytmp128+40, + ytmp128+45, + ytmp128+50, + ytmp128+55); + + dft12f(x128+1, + x128+6, + x128+11, + x128+16, + x128+21, + x128+26, + x128+31, + x128+36, + x128+41, + x128+46, + x128+51, + x128+56, + ytmp128+1, + ytmp128+6, + ytmp128+11, + ytmp128+16, + ytmp128+21, + ytmp128+26, + ytmp128+31, + ytmp128+36, + ytmp128+41, + ytmp128+46, + ytmp128+51, + ytmp128+56); + + dft12f(x128+2, + x128+7, + x128+12, + x128+17, + x128+22, + x128+27, + x128+32, + x128+37, + x128+42, + x128+47, + x128+52, + x128+57, + ytmp128+2, + ytmp128+7, + ytmp128+12, + ytmp128+17, + ytmp128+22, + ytmp128+27, + ytmp128+32, + ytmp128+37, + ytmp128+42, + ytmp128+47, + ytmp128+52, + ytmp128+57); + + dft12f(x128+3, + x128+8, + x128+13, + x128+18, + x128+23, + x128+28, + x128+33, + x128+38, + x128+43, + x128+48, + x128+53, + x128+58, + ytmp128+3, + ytmp128+8, + ytmp128+13, + ytmp128+18, + ytmp128+23, + ytmp128+28, + ytmp128+33, + ytmp128+38, + ytmp128+43, + ytmp128+48, + ytmp128+53, + ytmp128+58); + + dft12f(x128+4, + x128+9, + x128+14, + x128+19, + x128+24, + x128+29, + x128+34, + x128+39, + x128+44, + x128+49, + x128+54, + x128+59, + ytmp128+4, + ytmp128+9, + ytmp128+14, + ytmp128+19, + ytmp128+24, + ytmp128+29, + ytmp128+34, + ytmp128+39, + ytmp128+44, + ytmp128+49, + ytmp128+54, + ytmp128+59); + + bfly5_tw1(ytmp128, + ytmp128+1, + ytmp128+2, + ytmp128+3, + ytmp128+4, + y128, + y128+12, + y128+24, + y128+36, + y128+48); + + for (i=5,j=1,k=0; i<60; i+=5,j++,k++) { + + bfly5(ytmp128+i, + ytmp128+i+1, + ytmp128+i+2, + ytmp128+i+3, + ytmp128+i+4, + y128+j, + y128+j+12, + y128+j+24, + y128+j+36, + y128+j+48, + twa128+k, + twb128+k, + twc128+k, + twd128+k); + } + + if (scale == 1) { + norm128 = set1_int16(dft_norm_table[4]); + + for (i=0; i<60; i++) { + y128[i] = mulhi_int16(y128[i],norm128); +// printf("y[%d] = (%d,%d)\n",i,((int16_t*)&y128[i])[0],((int16_t*)&y128[i])[1]); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t tw72[280]__attribute__((aligned(32))); + +void dft72(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *tw128=(simd_q15_t *)&tw72[0]; + simd_q15_t x2128[72];// = (simd_q15_t *)&x2128array[0]; + + simd_q15_t ytmp128[72];//=&ytmp128array2[0]; + + for (i=0,j=0; i<36; i++,j+=2) { + x2128[i] = x128[j]; // even inputs + x2128[i+36] = x128[j+1]; // odd inputs + } + + dft36((int16_t *)x2128,(int16_t *)ytmp128,1); + dft36((int16_t *)(x2128+36),(int16_t *)(ytmp128+36),1); + + bfly2_tw1(ytmp128,ytmp128+36,y128,y128+36); + + for (i=1,j=0; i<36; i++,j++) { + bfly2(ytmp128+i, + ytmp128+36+i, + y128+i, + y128+36+i, + tw128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[5]); + + for (i=0; i<72; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t tw96[376]__attribute__((aligned(32))); + +void dft96(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *tw128=(simd_q15_t *)&tw96[0]; + simd_q15_t x2128[96];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[96];//=&ytmp128array2[0]; + + + for (i=0,j=0; i<48; i++,j+=2) { + x2128[i] = x128[j]; + x2128[i+48] = x128[j+1]; + } + + dft48((int16_t *)x2128,(int16_t *)ytmp128,0); + dft48((int16_t *)(x2128+48),(int16_t *)(ytmp128+48),0); + + + bfly2_tw1(ytmp128,ytmp128+48,y128,y128+48); + + for (i=1,j=0; i<48; i++,j++) { + bfly2(ytmp128+i, + ytmp128+48+i, + y128+i, + y128+48+i, + tw128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[6]); + + for (i=0; i<96; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa108[280]__attribute__((aligned(32))); +static int16_t twb108[280]__attribute__((aligned(32))); + +void dft108(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa108[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb108[0]; + simd_q15_t x2128[108];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[108];//=&ytmp128array2[0]; + + + for (i=0,j=0; i<36; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+36] = x128[j+1]; + x2128[i+72] = x128[j+2]; + } + + dft36((int16_t *)x2128,(int16_t *)ytmp128,0); + dft36((int16_t *)(x2128+36),(int16_t *)(ytmp128+36),0); + dft36((int16_t *)(x2128+72),(int16_t *)(ytmp128+72),0); + + bfly3_tw1(ytmp128,ytmp128+36,ytmp128+72,y128,y128+36,y128+72); + + for (i=1,j=0; i<36; i++,j++) { + bfly3(ytmp128+i, + ytmp128+36+i, + ytmp128+72+i, + y128+i, + y128+36+i, + y128+72+i, + twa128+j, + twb128+j); + + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[7]); + + for (i=0; i<108; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t tw120[472]__attribute__((aligned(32))); +void dft120(int16_t *x,int16_t *y, unsigned char scale_flag) +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *tw128=(simd_q15_t *)&tw120[0]; + simd_q15_t x2128[120];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[120];//=&ytmp128array2[0]; + + for (i=0,j=0; i<60; i++,j+=2) { + x2128[i] = x128[j]; + x2128[i+60] = x128[j+1]; + } + + dft60((int16_t *)x2128,(int16_t *)ytmp128,0); + dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),0); + + + bfly2_tw1(ytmp128,ytmp128+60,y128,y128+60); + + for (i=1,j=0; i<60; i++,j++) { + bfly2(ytmp128+i, + ytmp128+60+i, + y128+i, + y128+60+i, + tw128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[8]); + + for (i=0; i<120; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa144[376]__attribute__((aligned(32))); +static int16_t twb144[376]__attribute__((aligned(32))); + +void dft144(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa144[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb144[0]; + simd_q15_t x2128[144];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[144];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<48; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+48] = x128[j+1]; + x2128[i+96] = x128[j+2]; + } + + dft48((int16_t *)x2128,(int16_t *)ytmp128,1); + dft48((int16_t *)(x2128+48),(int16_t *)(ytmp128+48),1); + dft48((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1); + + bfly3_tw1(ytmp128,ytmp128+48,ytmp128+96,y128,y128+48,y128+96); + + for (i=1,j=0; i<48; i++,j++) { + bfly3(ytmp128+i, + ytmp128+48+i, + ytmp128+96+i, + y128+i, + y128+48+i, + y128+96+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[9]); + + for (i=0; i<144; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa180[472]__attribute__((aligned(32))); +static int16_t twb180[472]__attribute__((aligned(32))); + +void dft180(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa180[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb180[0]; + simd_q15_t x2128[180];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[180];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<60; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+60] = x128[j+1]; + x2128[i+120] = x128[j+2]; + } + + dft60((int16_t *)x2128,(int16_t *)ytmp128,1); + dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),1); + dft60((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1); + + bfly3_tw1(ytmp128,ytmp128+60,ytmp128+120,y128,y128+60,y128+120); + + for (i=1,j=0; i<60; i++,j++) { + bfly3(ytmp128+i, + ytmp128+60+i, + ytmp128+120+i, + y128+i, + y128+60+i, + y128+120+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[10]); + + for (i=0; i<180; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa192[376]__attribute__((aligned(32))); +static int16_t twb192[376]__attribute__((aligned(32))); +static int16_t twc192[376]__attribute__((aligned(32))); + +void dft192(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa192[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb192[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc192[0]; + simd_q15_t x2128[192];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[192];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<48; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+48] = x128[j+1]; + x2128[i+96] = x128[j+2]; + x2128[i+144] = x128[j+3]; + } + + dft48((int16_t *)x2128,(int16_t *)ytmp128,1); + dft48((int16_t *)(x2128+48),(int16_t *)(ytmp128+48),1); + dft48((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1); + dft48((int16_t *)(x2128+144),(int16_t *)(ytmp128+144),1); + + bfly4_tw1(ytmp128,ytmp128+48,ytmp128+96,ytmp128+144,y128,y128+48,y128+96,y128+144); + + for (i=1,j=0; i<48; i++,j++) { + bfly4(ytmp128+i, + ytmp128+48+i, + ytmp128+96+i, + ytmp128+144+i, + y128+i, + y128+48+i, + y128+96+i, + y128+144+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[11]); + + for (i=0; i<192; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa216[568]__attribute__((aligned(32))); +static int16_t twb216[568]__attribute__((aligned(32))); + +void dft216(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa216[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb216[0]; + simd_q15_t x2128[216];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[216];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<72; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+72] = x128[j+1]; + x2128[i+144] = x128[j+2]; + } + + dft72((int16_t *)x2128,(int16_t *)ytmp128,1); + dft72((int16_t *)(x2128+72),(int16_t *)(ytmp128+72),1); + dft72((int16_t *)(x2128+144),(int16_t *)(ytmp128+144),1); + + bfly3_tw1(ytmp128,ytmp128+72,ytmp128+144,y128,y128+72,y128+144); + + for (i=1,j=0; i<72; i++,j++) { + bfly3(ytmp128+i, + ytmp128+72+i, + ytmp128+144+i, + y128+i, + y128+72+i, + y128+144+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[12]); + + for (i=0; i<216; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa240[472]__attribute__((aligned(32))); +static int16_t twb240[472]__attribute__((aligned(32))); +static int16_t twc240[472]__attribute__((aligned(32))); + +void dft240(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa240[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb240[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc240[0]; + simd_q15_t x2128[240];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[240];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<60; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+60] = x128[j+1]; + x2128[i+120] = x128[j+2]; + x2128[i+180] = x128[j+3]; + } + + dft60((int16_t *)x2128,(int16_t *)ytmp128,1); + dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),1); + dft60((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1); + dft60((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1); + + bfly4_tw1(ytmp128,ytmp128+60,ytmp128+120,ytmp128+180,y128,y128+60,y128+120,y128+180); + + for (i=1,j=0; i<60; i++,j++) { + bfly4(ytmp128+i, + ytmp128+60+i, + ytmp128+120+i, + ytmp128+180+i, + y128+i, + y128+60+i, + y128+120+i, + y128+180+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[13]); + + for (i=0; i<240; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa288[760]__attribute__((aligned(32))); +static int16_t twb288[760]__attribute__((aligned(32))); + +void dft288(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa288[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb288[0]; + simd_q15_t x2128[288];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[288];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<96; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+96] = x128[j+1]; + x2128[i+192] = x128[j+2]; + } + + dft96((int16_t *)x2128,(int16_t *)ytmp128,1); + dft96((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1); + dft96((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1); + + bfly3_tw1(ytmp128,ytmp128+96,ytmp128+192,y128,y128+96,y128+192); + + for (i=1,j=0; i<96; i++,j++) { + bfly3(ytmp128+i, + ytmp128+96+i, + ytmp128+192+i, + y128+i, + y128+96+i, + y128+192+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<288; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa300[472]__attribute__((aligned(32))); +static int16_t twb300[472]__attribute__((aligned(32))); +static int16_t twc300[472]__attribute__((aligned(32))); +static int16_t twd300[472]__attribute__((aligned(32))); + +void dft300(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa300[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb300[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc300[0]; + simd_q15_t *twd128=(simd_q15_t *)&twd300[0]; + simd_q15_t x2128[300];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[300];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<60; i++,j+=5) { + x2128[i] = x128[j]; + x2128[i+60] = x128[j+1]; + x2128[i+120] = x128[j+2]; + x2128[i+180] = x128[j+3]; + x2128[i+240] = x128[j+4]; + } + + dft60((int16_t *)x2128,(int16_t *)ytmp128,1); + dft60((int16_t *)(x2128+60),(int16_t *)(ytmp128+60),1); + dft60((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1); + dft60((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1); + dft60((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1); + + bfly5_tw1(ytmp128,ytmp128+60,ytmp128+120,ytmp128+180,ytmp128+240,y128,y128+60,y128+120,y128+180,y128+240); + + for (i=1,j=0; i<60; i++,j++) { + bfly5(ytmp128+i, + ytmp128+60+i, + ytmp128+120+i, + ytmp128+180+i, + ytmp128+240+i, + y128+i, + y128+60+i, + y128+120+i, + y128+180+i, + y128+240+i, + twa128+j, + twb128+j, + twc128+j, + twd128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[15]); + + for (i=0; i<300; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +static int16_t twa324[107*2*4]; +static int16_t twb324[107*2*4]; + +void dft324(int16_t *x,int16_t *y,unsigned char scale_flag) // 108 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa324[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb324[0]; + simd_q15_t x2128[324];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[324];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<108; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+108] = x128[j+1]; + x2128[i+216] = x128[j+2]; + } + + dft108((int16_t *)x2128,(int16_t *)ytmp128,1); + dft108((int16_t *)(x2128+108),(int16_t *)(ytmp128+108),1); + dft108((int16_t *)(x2128+216),(int16_t *)(ytmp128+216),1); + + bfly3_tw1(ytmp128,ytmp128+108,ytmp128+216,y128,y128+108,y128+216); + + for (i=1,j=0; i<108; i++,j++) { + bfly3(ytmp128+i, + ytmp128+108+i, + ytmp128+216+i, + y128+i, + y128+108+i, + y128+216+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<324; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa360[119*2*4]; +static int16_t twb360[119*2*4]; + +void dft360(int16_t *x,int16_t *y,unsigned char scale_flag) // 120 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa360[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb360[0]; + simd_q15_t x2128[360];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[360];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<120; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+120] = x128[j+1]; + x2128[i+240] = x128[j+2]; + } + + dft120((int16_t *)x2128,(int16_t *)ytmp128,1); + dft120((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1); + dft120((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1); + + bfly3_tw1(ytmp128,ytmp128+120,ytmp128+240,y128,y128+120,y128+240); + + for (i=1,j=0; i<120; i++,j++) { + bfly3(ytmp128+i, + ytmp128+120+i, + ytmp128+240+i, + y128+i, + y128+120+i, + y128+240+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<360; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa384[95*2*4]; +static int16_t twb384[95*2*4]; +static int16_t twc384[95*2*4]; + +void dft384(int16_t *x,int16_t *y,unsigned char scale_flag) // 96 x 4 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa384[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb384[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc384[0]; + simd_q15_t x2128[384];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[384];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<96; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+96] = x128[j+1]; + x2128[i+192] = x128[j+2]; + x2128[i+288] = x128[j+3]; + } + + dft96((int16_t *)x2128,(int16_t *)ytmp128,1); + dft96((int16_t *)(x2128+96),(int16_t *)(ytmp128+96),1); + dft96((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1); + dft96((int16_t *)(x2128+288),(int16_t *)(ytmp128+288),1); + + bfly4_tw1(ytmp128,ytmp128+96,ytmp128+192,ytmp128+288,y128,y128+96,y128+192,y128+288); + + for (i=1,j=0; i<96; i++,j++) { + bfly4(ytmp128+i, + ytmp128+96+i, + ytmp128+192+i, + ytmp128+288+i, + y128+i, + y128+96+i, + y128+192+i, + y128+288+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + + for (i=0; i<384; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa432[107*2*4]; +static int16_t twb432[107*2*4]; +static int16_t twc432[107*2*4]; + +void dft432(int16_t *x,int16_t *y,unsigned char scale_flag) // 108 x 4 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa432[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb432[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc432[0]; + simd_q15_t x2128[432];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[432];//=&ytmp128array2[0]; + + + for (i=0,j=0; i<108; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+108] = x128[j+1]; + x2128[i+216] = x128[j+2]; + x2128[i+324] = x128[j+3]; + } + + dft108((int16_t *)x2128,(int16_t *)ytmp128,1); + dft108((int16_t *)(x2128+108),(int16_t *)(ytmp128+108),1); + dft108((int16_t *)(x2128+216),(int16_t *)(ytmp128+216),1); + dft108((int16_t *)(x2128+324),(int16_t *)(ytmp128+324),1); + + bfly4_tw1(ytmp128,ytmp128+108,ytmp128+216,ytmp128+324,y128,y128+108,y128+216,y128+324); + + for (i=1,j=0; i<108; i++,j++) { + bfly4(ytmp128+i, + ytmp128+108+i, + ytmp128+216+i, + ytmp128+324+i, + y128+i, + y128+108+i, + y128+216+i, + y128+324+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + + for (i=0; i<432; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; +static int16_t twa480[119*2*4]; +static int16_t twb480[119*2*4]; +static int16_t twc480[119*2*4]; + +void dft480(int16_t *x,int16_t *y,unsigned char scale_flag) // 120 x 4 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa480[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb480[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc480[0]; + simd_q15_t x2128[480];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[480];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<120; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+120] = x128[j+1]; + x2128[i+240] = x128[j+2]; + x2128[i+360] = x128[j+3]; + } + + dft120((int16_t *)x2128,(int16_t *)ytmp128,1); + dft120((int16_t *)(x2128+120),(int16_t *)(ytmp128+120),1); + dft120((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1); + dft120((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1); + + bfly4_tw1(ytmp128,ytmp128+120,ytmp128+240,ytmp128+360,y128,y128+120,y128+240,y128+360); + + for (i=1,j=0; i<120; i++,j++) { + bfly4(ytmp128+i, + ytmp128+120+i, + ytmp128+240+i, + ytmp128+360+i, + y128+i, + y128+120+i, + y128+240+i, + y128+360+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + + for (i=0; i<480; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + + +static int16_t twa540[179*2*4]; +static int16_t twb540[179*2*4]; + +void dft540(int16_t *x,int16_t *y,unsigned char scale_flag) // 180 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa540[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb540[0]; + simd_q15_t x2128[540];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[540];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<180; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+180] = x128[j+1]; + x2128[i+360] = x128[j+2]; + } + + dft180((int16_t *)x2128,(int16_t *)ytmp128,1); + dft180((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1); + dft180((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1); + + bfly3_tw1(ytmp128,ytmp128+180,ytmp128+360,y128,y128+180,y128+360); + + for (i=1,j=0; i<180; i++,j++) { + bfly3(ytmp128+i, + ytmp128+180+i, + ytmp128+360+i, + y128+i, + y128+180+i, + y128+360+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<540; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa576[191*2*4]; +static int16_t twb576[191*2*4]; + +void dft576(int16_t *x,int16_t *y,unsigned char scale_flag) // 192 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa576[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb576[0]; + simd_q15_t x2128[576];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[576];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<192; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+192] = x128[j+1]; + x2128[i+384] = x128[j+2]; + } + + + dft192((int16_t *)x2128,(int16_t *)ytmp128,1); + dft192((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1); + dft192((int16_t *)(x2128+384),(int16_t *)(ytmp128+384),1); + + bfly3_tw1(ytmp128,ytmp128+192,ytmp128+384,y128,y128+192,y128+384); + + for (i=1,j=0; i<192; i++,j++) { + bfly3(ytmp128+i, + ytmp128+192+i, + ytmp128+384+i, + y128+i, + y128+192+i, + y128+384+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<576; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); +}; + + +static int16_t twa600[299*2*4]; + +void dft600(int16_t *x,int16_t *y,unsigned char scale_flag) // 300 x 2 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *tw128=(simd_q15_t *)&twa600[0]; + simd_q15_t x2128[600];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[600];//=&ytmp128array2[0]; + + + for (i=0,j=0; i<300; i++,j+=2) { + x2128[i] = x128[j]; + x2128[i+300] = x128[j+1]; + } + + dft300((int16_t *)x2128,(int16_t *)ytmp128,1); + dft300((int16_t *)(x2128+300),(int16_t *)(ytmp128+300),1); + + + bfly2_tw1(ytmp128,ytmp128+300,y128,y128+300); + + for (i=1,j=0; i<300; i++,j++) { + bfly2(ytmp128+i, + ytmp128+300+i, + y128+i, + y128+300+i, + tw128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(ONE_OVER_SQRT2_Q15); + + for (i=0; i<600; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); +}; + + +static int16_t twa648[215*2*4]; +static int16_t twb648[215*2*4]; + +void dft648(int16_t *x,int16_t *y,unsigned char scale_flag) // 216 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa648[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb648[0]; + simd_q15_t x2128[648];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[648];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<216; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+216] = x128[j+1]; + x2128[i+432] = x128[j+2]; + } + + dft216((int16_t *)x2128,(int16_t *)ytmp128,1); + dft216((int16_t *)(x2128+216),(int16_t *)(ytmp128+216),1); + dft216((int16_t *)(x2128+432),(int16_t *)(ytmp128+432),1); + + bfly3_tw1(ytmp128,ytmp128+216,ytmp128+432,y128,y128+216,y128+432); + + for (i=1,j=0; i<216; i++,j++) { + bfly3(ytmp128+i, + ytmp128+216+i, + ytmp128+432+i, + y128+i, + y128+216+i, + y128+432+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<648; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + + +static int16_t twa720[179*2*4]; +static int16_t twb720[179*2*4]; +static int16_t twc720[179*2*4]; + + +void dft720(int16_t *x,int16_t *y,unsigned char scale_flag) // 180 x 4 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa720[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb720[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc720[0]; + simd_q15_t x2128[720];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[720];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<180; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+180] = x128[j+1]; + x2128[i+360] = x128[j+2]; + x2128[i+540] = x128[j+3]; + } + + dft180((int16_t *)x2128,(int16_t *)ytmp128,1); + dft180((int16_t *)(x2128+180),(int16_t *)(ytmp128+180),1); + dft180((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1); + dft180((int16_t *)(x2128+540),(int16_t *)(ytmp128+540),1); + + bfly4_tw1(ytmp128,ytmp128+180,ytmp128+360,ytmp128+540,y128,y128+180,y128+360,y128+540); + + for (i=1,j=0; i<180; i++,j++) { + bfly4(ytmp128+i, + ytmp128+180+i, + ytmp128+360+i, + ytmp128+540+i, + y128+i, + y128+180+i, + y128+360+i, + y128+540+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + + for (i=0; i<720; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa768[191*2*4]; +static int16_t twb768[191*2*4]; +static int16_t twc768[191*2*4]; + +void dft768(int16_t *x,int16_t *y,unsigned char scale_flag) { // 192x 4; + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa768[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb768[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc768[0]; + simd_q15_t x2128[768];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[768];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<192; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+192] = x128[j+1]; + x2128[i+384] = x128[j+2]; + x2128[i+576] = x128[j+3]; + } + + dft192((int16_t *)x2128,(int16_t *)ytmp128,1); + dft192((int16_t *)(x2128+192),(int16_t *)(ytmp128+192),1); + dft192((int16_t *)(x2128+384),(int16_t *)(ytmp128+384),1); + dft192((int16_t *)(x2128+576),(int16_t *)(ytmp128+576),1); + + bfly4_tw1(ytmp128,ytmp128+192,ytmp128+384,ytmp128+576,y128,y128+192,y128+384,y128+576); + + for (i=1,j=0; i<192; i++,j++) { + bfly4(ytmp128+i, + ytmp128+192+i, + ytmp128+384+i, + ytmp128+576+i, + y128+i, + y128+192+i, + y128+384+i, + y128+576+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + + for (i=0; i<768; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + + +} + + +static int16_t twa864[287*2*4]; +static int16_t twb864[287*2*4]; + +void dft864(int16_t *x,int16_t *y,unsigned char scale_flag) // 288 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa864[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb864[0]; + simd_q15_t x2128[864];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[864];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<288; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+288] = x128[j+1]; + x2128[i+576] = x128[j+2]; + } + + dft288((int16_t *)x2128,(int16_t *)ytmp128,1); + dft288((int16_t *)(x2128+288),(int16_t *)(ytmp128+288),1); + dft288((int16_t *)(x2128+576),(int16_t *)(ytmp128+576),1); + + bfly3_tw1(ytmp128,ytmp128+288,ytmp128+576,y128,y128+288,y128+576); + + for (i=1,j=0; i<288; i++,j++) { + bfly3(ytmp128+i, + ytmp128+288+i, + ytmp128+576+i, + y128+i, + y128+288+i, + y128+576+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<864; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa900[299*2*4]; +static int16_t twb900[299*2*4]; + +void dft900(int16_t *x,int16_t *y,unsigned char scale_flag) // 300 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa900[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb900[0]; + simd_q15_t x2128[900];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[900];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<300; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+300] = x128[j+1]; + x2128[i+600] = x128[j+2]; + } + + dft300((int16_t *)x2128,(int16_t *)ytmp128,1); + dft300((int16_t *)(x2128+300),(int16_t *)(ytmp128+300),1); + dft300((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1); + + bfly3_tw1(ytmp128,ytmp128+300,ytmp128+600,y128,y128+300,y128+600); + + for (i=1,j=0; i<300; i++,j++) { + bfly3(ytmp128+i, + ytmp128+300+i, + ytmp128+600+i, + y128+i, + y128+300+i, + y128+600+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<900; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + + +static int16_t twa960[239*2*4]; +static int16_t twb960[239*2*4]; +static int16_t twc960[239*2*4]; + + +void dft960(int16_t *x,int16_t *y,unsigned char scale_flag) // 240 x 4 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa960[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb960[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc960[0]; + simd_q15_t x2128[960];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[960];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<240; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+240] = x128[j+1]; + x2128[i+480] = x128[j+2]; + x2128[i+720] = x128[j+3]; + } + + dft240((int16_t *)x2128,(int16_t *)ytmp128,1); + dft240((int16_t *)(x2128+240),(int16_t *)(ytmp128+240),1); + dft240((int16_t *)(x2128+480),(int16_t *)(ytmp128+480),1); + dft240((int16_t *)(x2128+720),(int16_t *)(ytmp128+720),1); + + bfly4_tw1(ytmp128,ytmp128+240,ytmp128+480,ytmp128+720,y128,y128+240,y128+480,y128+720); + + for (i=1,j=0; i<240; i++,j++) { + bfly4(ytmp128+i, + ytmp128+240+i, + ytmp128+480+i, + ytmp128+720+i, + y128+i, + y128+240+i, + y128+480+i, + y128+720+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + + for (i=0; i<960; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + + +static int16_t twa972[323*2*4]; +static int16_t twb972[323*2*4]; + +void dft972(int16_t *x,int16_t *y,unsigned char scale_flag) // 324 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa972[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb972[0]; + simd_q15_t x2128[972];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[972];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<324; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+324] = x128[j+1]; + x2128[i+648] = x128[j+2]; + } + + dft324((int16_t *)x2128,(int16_t *)ytmp128,1); + dft324((int16_t *)(x2128+324),(int16_t *)(ytmp128+324),1); + dft324((int16_t *)(x2128+648),(int16_t *)(ytmp128+648),1); + + bfly3_tw1(ytmp128,ytmp128+324,ytmp128+648,y128,y128+324,y128+648); + + for (i=1,j=0; i<324; i++,j++) { + bfly3(ytmp128+i, + ytmp128+324+i, + ytmp128+648+i, + y128+i, + y128+324+i, + y128+648+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<972; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa1080[359*2*4]; +static int16_t twb1080[359*2*4]; + +void dft1080(int16_t *x,int16_t *y,unsigned char scale_flag) // 360 x 3 +{ + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa1080[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb1080[0]; + simd_q15_t x2128[1080];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[1080];//=&ytmp128array3[0]; + + + + for (i=0,j=0; i<360; i++,j+=3) { + x2128[i] = x128[j]; + x2128[i+360] = x128[j+1]; + x2128[i+720] = x128[j+2]; + } + + dft360((int16_t *)x2128,(int16_t *)ytmp128,1); + dft360((int16_t *)(x2128+360),(int16_t *)(ytmp128+360),1); + dft360((int16_t *)(x2128+720),(int16_t *)(ytmp128+720),1); + + bfly3_tw1(ytmp128,ytmp128+360,ytmp128+720,y128,y128+360,y128+720); + + for (i=1,j=0; i<360; i++,j++) { + bfly3(ytmp128+i, + ytmp128+360+i, + ytmp128+720+i, + y128+i, + y128+360+i, + y128+720+i, + twa128+j, + twb128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(dft_norm_table[14]); + + for (i=0; i<1080; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +}; + +static int16_t twa1152[287*2*4]; +static int16_t twb1152[287*2*4]; +static int16_t twc1152[287*2*4]; + +void dft1152(int16_t *x,int16_t *y,unsigned char scale_flag) // 288 x 4 +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa1152[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb1152[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc1152[0]; + simd_q15_t x2128[1152];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[1152];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<288; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+288] = x128[j+1]; + x2128[i+576] = x128[j+2]; + x2128[i+864] = x128[j+3]; + } + + dft288((int16_t *)x2128,(int16_t *)ytmp128,1); + dft288((int16_t *)(x2128+288),(int16_t *)(ytmp128+288),1); + dft288((int16_t *)(x2128+576),(int16_t *)(ytmp128+576),1); + dft288((int16_t *)(x2128+864),(int16_t *)(ytmp128+864),1); + + bfly4_tw1(ytmp128,ytmp128+288,ytmp128+576,ytmp128+864,y128,y128+288,y128+576,y128+864); + + for (i=1,j=0; i<288; i++,j++) { + bfly4(ytmp128+i, + ytmp128+288+i, + ytmp128+576+i, + ytmp128+864+i, + y128+i, + y128+288+i, + y128+576+i, + y128+864+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + + for (i=0; i<1152; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); +}; + +int16_t twa1200[4784]; +int16_t twb1200[4784]; +int16_t twc1200[4784]; + +void dft1200(int16_t *x,int16_t *y,unsigned char scale_flag) +{ + + int i,j; + simd_q15_t *x128=(simd_q15_t *)x; + simd_q15_t *y128=(simd_q15_t *)y; + simd_q15_t *twa128=(simd_q15_t *)&twa1200[0]; + simd_q15_t *twb128=(simd_q15_t *)&twb1200[0]; + simd_q15_t *twc128=(simd_q15_t *)&twc1200[0]; + simd_q15_t x2128[1200];// = (simd_q15_t *)&x2128array[0]; + simd_q15_t ytmp128[1200];//=&ytmp128array2[0]; + + + + for (i=0,j=0; i<300; i++,j+=4) { + x2128[i] = x128[j]; + x2128[i+300] = x128[j+1]; + x2128[i+600] = x128[j+2]; + x2128[i+900] = x128[j+3]; + } + + dft300((int16_t *)x2128,(int16_t *)ytmp128,1); + dft300((int16_t *)(x2128+300),(int16_t *)(ytmp128+300),1); + dft300((int16_t *)(x2128+600),(int16_t *)(ytmp128+600),1); + dft300((int16_t *)(x2128+900),(int16_t *)(ytmp128+900),1); + + bfly4_tw1(ytmp128,ytmp128+300,ytmp128+600,ytmp128+900,y128,y128+300,y128+600,y128+900); + + for (i=1,j=0; i<300; i++,j++) { + bfly4(ytmp128+i, + ytmp128+300+i, + ytmp128+600+i, + ytmp128+900+i, + y128+i, + y128+300+i, + y128+600+i, + y128+900+i, + twa128+j, + twb128+j, + twc128+j); + } + + if (scale_flag==1) { + norm128 = set1_int16(16384);//dft_norm_table[13]); + for (i=0; i<1200; i++) { + y128[i] = mulhi_int16(y128[i],norm128); + } + } + + _mm_empty(); + _m_empty(); + +} + +void init_rad4(int N,int16_t *tw) { + + int16_t *twa = tw; + int16_t *twb = twa+(N/2); + int16_t *twc = twb+(N/2); + int i; + + for (i=0;i<(N/4);i++) { + *twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++; + *twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++; + *twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++; + *twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++; + *twc = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); twc++; + *twc = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N)); twc++; + } +} +void init_rad4_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc) { + + int i,j; + + for (i=1;i<(N/4);i++) { + twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); + twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); + twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); + twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); + twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); + twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N)); + for (j=1;j<4;j++) { + ((int32_t*)twa)[j]=((int32_t*)twa)[0]; + ((int32_t*)twb)[j]=((int32_t*)twb)[0]; + ((int32_t*)twc)[j]=((int32_t*)twc)[0]; + } + twa+=8; + twb+=8; + twc+=8; + } +} + +void init_rad2(int N,int16_t *tw) { + + int16_t *twa = tw; + int i; + + for (i=0;i<(N>>1);i++) { + *twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++; + *twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++; + } +} + +void init_rad2_rep(int N,int16_t *twa) { + + int i,j; + + for (i=1;i<(N/2);i++) { + twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); + twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); + for (j=1;j<4;j++) { + ((int32_t*)twa)[j]=((int32_t*)twa)[0]; + } + twa+=8; + } +} + +void init_rad3(int N,int16_t *twa,int16_t *twb) { + + int i; + + for (i=0;i<(N/3);i++) { + *twa = (int16_t)round(32767.0*cos(2*M_PI*i/N)); twa++; + *twa = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); twa++; + *twb = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); twb++; + *twb = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); twb++; + } +} + +void init_rad3_rep(int N,int16_t *twa,int16_t *twb) { + + int i,j; + + for (i=1;i<(N/3);i++) { + twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); + twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); + twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); + twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); + for (j=1;j<4;j++) { + ((int32_t*)twa)[j]=((int32_t*)twa)[0]; + ((int32_t*)twb)[j]=((int32_t*)twb)[0]; + } + twa+=8; + twb+=8; + } +} + +void init_rad5_rep(int N,int16_t *twa,int16_t *twb,int16_t *twc,int16_t *twd) { + + int i,j; + + for (i=1;i<(N/5);i++) { + twa[0] = (int16_t)round(32767.0*cos(2*M_PI*i/N)); + twa[1] = -(int16_t)round(32767.0*sin(2*M_PI*i/N)); + twb[0] = (int16_t)round(32767.0*cos(2*M_PI*2*i/N)); + twb[1] = -(int16_t)round(32767.0*sin(2*M_PI*2*i/N)); + twc[0] = (int16_t)round(32767.0*cos(2*M_PI*3*i/N)); + twc[1] = -(int16_t)round(32767.0*sin(2*M_PI*3*i/N)); + twd[0] = (int16_t)round(32767.0*cos(2*M_PI*4*i/N)); + twd[1] = -(int16_t)round(32767.0*sin(2*M_PI*4*i/N)); + for (j=1;j<4;j++) { + ((int32_t*)twa)[j]=((int32_t*)twa)[0]; + ((int32_t*)twb)[j]=((int32_t*)twb)[0]; + ((int32_t*)twc)[j]=((int32_t*)twc)[0]; + ((int32_t*)twd)[j]=((int32_t*)twd)[0]; + } + twa+=8; + twb+=8; + twc+=8; + twd+=8; + } +} +/*----------------------------------------------------------------*/ +/* dft library entry points: */ + +int dfts_autoinit(void) +{ + init_rad4(1024,tw1024); + init_rad2(2048,tw2048); + init_rad4(4096,tw4096); + init_rad2(8192,tw8192); + + init_rad3(1536,twa1536,twb1536); + init_rad3(3072,twa3072,twb3072); + init_rad3(6144,twa6144,twb6144); + init_rad3(12288,twa12288,twb12288); + init_rad3(18432,twa18432,twb18432); + init_rad3(24576,twa24576,twb24576); + + init_rad2_rep(24,tw24); + init_rad3_rep(36,twa36,twb36); + init_rad4_rep(48,twa48,twb48,twc48); + init_rad5_rep(60,twa60,twb60,twc60,twd60); + init_rad2_rep(72,tw72); + init_rad2_rep(96,tw96); + init_rad3_rep(108,twa108,twb108); + init_rad2_rep(120,tw120); + init_rad3_rep(144,twa144,twb144); + init_rad3_rep(180,twa180,twb180); + init_rad4_rep(192,twa192,twb192,twc192); + init_rad3_rep(216,twa216,twb216); + init_rad4_rep(240,twa240,twb240,twc240); + init_rad3_rep(288,twa288,twb288); + init_rad5_rep(300,twa300,twb300,twc300,twd300); + init_rad3_rep(324,twa324,twb324); + init_rad3_rep(360,twa360,twb360); + init_rad4_rep(384,twa384,twb384,twc384); + init_rad4_rep(432,twa432,twb432,twc432); + init_rad4_rep(480,twa480,twb480,twc480); + init_rad3_rep(540,twa540,twb540); + init_rad3_rep(576,twa576,twb576); + init_rad2_rep(600,twa600); + init_rad3_rep(648,twa648,twb648); + init_rad4_rep(720,twa720,twb720,twc720); + init_rad4_rep(768,twa768,twb768,twc768); + init_rad3_rep(864,twa864,twb864); + init_rad3_rep(900,twa900,twb900); + init_rad4_rep(960,twa960,twb960,twc960); + init_rad3_rep(972,twa972,twb972); + init_rad3_rep(1080,twa1080,twb1080); + init_rad4_rep(1152,twa1152,twb1152,twc1152); + init_rad4_rep(1200,twa1200,twb1200,twc1200); + return 0; +} + + + + +void dft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){ + AssertFatal((sizeidx>=0 && sizeidx<(int)DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx); + dft_ftab[sizeidx](sigF,sig,scale_flag); +}; + +void idft(uint8_t sizeidx, int16_t *sigF,int16_t *sig,unsigned char scale_flag){ + AssertFatal((sizeidx>=0 && sizeidx<(int)IDFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx); + idft_ftab[sizeidx](sigF,sig,scale_flag); +}; + +/*---------------------------------------------------------------------------------------*/ + +#ifdef MR_MAIN +#include <string.h> +#include <stdio.h> + +#define LOG_M write_output +int write_output(const char *fname,const char *vname,void *data,int length,int dec,char format) +{ + + FILE *fp=NULL; + int i; + + + printf("Writing %d elements of type %d to %s\n",length,format,fname); + + + if (format == 10 || format ==11 || format == 12 || format == 13 || format == 14) { + fp = fopen(fname,"a+"); + } else if (format != 10 && format !=11 && format != 12 && format != 13 && format != 14) { + fp = fopen(fname,"w+"); + } + + + + if (fp== NULL) { + printf("[OPENAIR][FILE OUTPUT] Cannot open file %s\n",fname); + return(-1); + } + + if (format != 10 && format !=11 && format != 12 && format != 13 && format != 14) + fprintf(fp,"%s = [",vname); + + + switch (format) { + case 0: // real 16-bit + + for (i=0; i<length; i+=dec) { + fprintf(fp,"%d\n",((short *)data)[i]); + } + + break; + + case 1: // complex 16-bit + case 13: + case 14: + case 15: + + for (i=0; i<length<<1; i+=(2*dec)) { + fprintf(fp,"%d + j*(%d)\n",((short *)data)[i],((short *)data)[i+1]); + + } + + + break; + + case 2: // real 32-bit + for (i=0; i<length; i+=dec) { + fprintf(fp,"%d\n",((int *)data)[i]); + } + + break; + + case 3: // complex 32-bit + for (i=0; i<length<<1; i+=(2*dec)) { + fprintf(fp,"%d + j*(%d)\n",((int *)data)[i],((int *)data)[i+1]); + } + + break; + + case 4: // real 8-bit + for (i=0; i<length; i+=dec) { + fprintf(fp,"%d\n",((char *)data)[i]); + } + + break; + + case 5: // complex 8-bit + for (i=0; i<length<<1; i+=(2*dec)) { + fprintf(fp,"%d + j*(%d)\n",((char *)data)[i],((char *)data)[i+1]); + } + + break; + + case 6: // real 64-bit + for (i=0; i<length; i+=dec) { + fprintf(fp,"%lld\n",((long long*)data)[i]); + } + + break; + + case 7: // real double + for (i=0; i<length; i+=dec) { + fprintf(fp,"%g\n",((double *)data)[i]); + } + + break; + + case 8: // complex double + for (i=0; i<length<<1; i+=2*dec) { + fprintf(fp,"%g + j*(%g)\n",((double *)data)[i], ((double *)data)[i+1]); + } + + break; + + case 9: // real unsigned 8-bit + for (i=0; i<length; i+=dec) { + fprintf(fp,"%d\n",((unsigned char *)data)[i]); + } + + break; + + + case 10 : // case eren 16 bit complex : + + for (i=0; i<length<<1; i+=(2*dec)) { + + if((i < 2*(length-1)) && (i > 0)) + fprintf(fp,"%d + j*(%d),",((short *)data)[i],((short *)data)[i+1]); + else if (i == 2*(length-1)) + fprintf(fp,"%d + j*(%d);",((short *)data)[i],((short *)data)[i+1]); + else if (i == 0) + fprintf(fp,"\n%d + j*(%d),",((short *)data)[i],((short *)data)[i+1]); + + + + } + + break; + + case 11 : //case eren 16 bit real for channel magnitudes: + for (i=0; i<length; i+=dec) { + + if((i <(length-1))&& (i > 0)) + fprintf(fp,"%d,",((short *)data)[i]); + else if (i == (length-1)) + fprintf(fp,"%d;",((short *)data)[i]); + else if (i == 0) + fprintf(fp,"\n%d,",((short *)data)[i]); + } + + printf("\n erennnnnnnnnnnnnnn: length :%d",length); + break; + + case 12 : // case eren for log2_maxh real unsigned 8 bit + fprintf(fp,"%d \n",((unsigned char *)&data)[0]); + break; + + } + + if (format != 10 && format !=11 && format !=12 && format != 13 && format != 15) { + fprintf(fp,"];\n"); + fclose(fp); + return(0); + } else if (format == 10 || format ==11 || format == 12 || format == 13 || format == 15) { + fclose(fp); + return(0); + } + + return 0; +} + + +int main(int argc, char**argv) +{ + + + time_stats_t ts; +#ifdef __AVX2__ + simd256_q15_t x[4096],x2[4096],y[4096],tw0,tw1,tw2,tw3; +#else + simd_q15_t x[8192],y[8192],tw0,tw1,tw2,tw3; +#endif + int i; + simd_q15_t *x128=(simd_q15_t*)x,*y128=(simd_q15_t*)y; + + dfts_autoinit(); + + set_taus_seed(0); + opp_enabled = 1; + /* + ((int16_t *)&tw0)[0] = 32767; + ((int16_t *)&tw0)[1] = 0; + ((int16_t *)&tw0)[2] = 32767; + ((int16_t *)&tw0)[3] = 0; + ((int16_t *)&tw0)[4] = 32767; + ((int16_t *)&tw0)[5] = 0; + ((int16_t *)&tw0)[6] = 32767; + ((int16_t *)&tw0)[7] = 0; + + ((int16_t *)&tw1)[0] = 32767; + ((int16_t *)&tw1)[1] = 0; + ((int16_t *)&tw1)[2] = 32767; + ((int16_t *)&tw1)[3] = 0; + ((int16_t *)&tw1)[4] = 32767; + ((int16_t *)&tw1)[5] = 0; + ((int16_t *)&tw1)[6] = 32767; + ((int16_t *)&tw1)[7] = 0; + + ((int16_t *)&tw2)[0] = 32767; + ((int16_t *)&tw2)[1] = 0; + ((int16_t *)&tw2)[2] = 32767; + ((int16_t *)&tw2)[3] = 0; + ((int16_t *)&tw2)[4] = 32767; + ((int16_t *)&tw2)[5] = 0; + ((int16_t *)&tw2)[6] = 32767; + ((int16_t *)&tw2)[7] = 0; + + ((int16_t *)&tw3)[0] = 32767; + ((int16_t *)&tw3)[1] = 0; + ((int16_t *)&tw3)[2] = 32767; + ((int16_t *)&tw3)[3] = 0; + ((int16_t *)&tw3)[4] = 32767; + ((int16_t *)&tw3)[5] = 0; + ((int16_t *)&tw3)[6] = 32767; + ((int16_t *)&tw3)[7] = 0; + */ + for (i=0;i<300;i++) { +#if defined(__x86_64__) || defined(__i386__) +#ifndef __AVX2__ + x[i] = _mm_set1_epi32(taus()); + x[i] = _mm_srai_epi16(x[i],4); +#else + x[i] = _mm256_set1_epi32(taus()); + x[i] = _mm256_srai_epi16(x[i],4); +#endif +#elif defined(__arm__) + x[i] = (int16x8_t)vdupq_n_s32(taus()); + x[i] = vshrq_n_s16(x[i],4); +#endif + } + /* + bfly2_tw1(x,x+1,y,y+1); + printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1]); + printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3]); + printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5]); + printf("(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7]); + bfly2(x,x+1,y,y+1, &tw0); + printf("0(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1]); + printf("1(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3]); + printf("2(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5]); + printf("3(%d,%d) (%d,%d) => (%d,%d) (%d,%d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7]); + bfly2(x,x+1,y,y+1, &tw0); + + bfly3_tw1(x,x+1,x+2,y, y+1,y+2); + printf("0(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1],((int16_t*)&y[2])[0],((int16_t*)&y[2])[1]); + printf("1(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3],((int16_t*)&y[2])[2],((int16_t*)&y[2])[3]); + printf("2(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5],((int16_t*)&y[2])[4],((int16_t*)&y[2])[5]); + printf("3(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7],((int16_t*)&y[2])[6],((int16_t*)&y[2])[7]); + bfly3(x,x+1,x+2,y, y+1,y+2,&tw0,&tw1); + + printf("0(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1],((int16_t*)&y[2])[0],((int16_t*)&y[2])[1]); + printf("1(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3],((int16_t*)&y[2])[2],((int16_t*)&y[2])[3]); + printf("2(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5],((int16_t*)&y[2])[4],((int16_t*)&y[2])[5]); + printf("3(%d,%d) (%d,%d) (%d %d) => (%d,%d) (%d,%d) (%d %d)\n",((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1],((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7],((int16_t*)&y[2])[6],((int16_t*)&y[2])[7]); + + + bfly4_tw1(x,x+1,x+2,x+3,y, y+1,y+2,y+3); + printf("(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n", + ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1], + ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1], + ((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1], + ((int16_t*)&y[2])[0],((int16_t*)&y[2])[1],((int16_t*)&y[3])[0],((int16_t*)&y[3])[1]); + + bfly4(x,x+1,x+2,x+3,y, y+1,y+2,y+3,&tw0,&tw1,&tw2); + printf("0(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n", + ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1], + ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1], + ((int16_t*)&y[0])[0],((int16_t*)&y[0])[1],((int16_t*)&y[1])[0],((int16_t*)&y[1])[1], + ((int16_t*)&y[2])[0],((int16_t*)&y[2])[1],((int16_t*)&y[3])[0],((int16_t*)&y[3])[1]); + printf("1(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n", + ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1], + ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1], + ((int16_t*)&y[0])[2],((int16_t*)&y[0])[3],((int16_t*)&y[1])[2],((int16_t*)&y[1])[3], + ((int16_t*)&y[2])[2],((int16_t*)&y[2])[3],((int16_t*)&y[3])[2],((int16_t*)&y[3])[3]); + printf("2(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n", + ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1], + ((int16_t*)&x[2])[0],((int16_t*)&x[2])[1],((int16_t*)&x[3])[0],((int16_t*)&x[3])[1], + ((int16_t*)&y[0])[4],((int16_t*)&y[0])[5],((int16_t*)&y[1])[4],((int16_t*)&y[1])[5], + ((int16_t*)&y[2])[4],((int16_t*)&y[2])[5],((int16_t*)&y[3])[4],((int16_t*)&y[3])[5]); + printf("3(%d,%d) (%d,%d) (%d %d) (%d,%d) => (%d,%d) (%d,%d) (%d %d) (%d,%d)\n", + ((int16_t*)&x[0])[0],((int16_t*)&x[0])[1],((int16_t*)&x[1])[0],((int16_t*)&x[1])[1], + ((int16_t*)&x[2])[6],((int16_t*)&x[2])[7],((int16_t*)&x[3])[6],((int16_t*)&x[3])[7], + ((int16_t*)&y[0])[6],((int16_t*)&y[0])[7],((int16_t*)&y[1])[6],((int16_t*)&y[1])[7], + ((int16_t*)&y[2])[0],((int16_t*)&y[2])[1],((int16_t*)&y[3])[0],((int16_t*)&y[3])[1]); + + bfly5_tw1(x,x+1,x+2,x+3,x+4,y,y+1,y+2,y+3,y+4); + + for (i=0;i<5;i++) + printf("%d,%d,", + ((int16_t*)&x[i])[0],((int16_t*)&x[i])[1]); + printf("\n"); + for (i=0;i<5;i++) + printf("%d,%d,", + ((int16_t*)&y[i])[0],((int16_t*)&y[i])[1]); + printf("\n"); + + bfly5(x,x+1,x+2,x+3,x+4,y, y+1,y+2,y+3,y+4,&tw0,&tw1,&tw2,&tw3); + for (i=0;i<5;i++) + printf("%d,%d,", + ((int16_t*)&x[i])[0],((int16_t*)&x[i])[1]); + printf("\n"); + for (i=0;i<5;i++) + printf("%d,%d,", + ((int16_t*)&y[i])[0],((int16_t*)&y[i])[1]); + printf("\n"); + + + printf("\n\n12-point\n"); + dft12f(x, + x+1, + x+2, + x+3, + x+4, + x+5, + x+6, + x+7, + x+8, + x+9, + x+10, + x+11, + y, + y+1, + y+2, + y+3, + y+4, + y+5, + y+6, + y+7, + y+8, + y+9, + y+10, + y+11); + + + printf("X: "); + for (i=0;i<12;i++) + printf("%d,%d,",((int16_t*)(&x[i]))[0],((int16_t *)(&x[i]))[1]); + printf("\nY:"); + for (i=0;i<12;i++) + printf("%d,%d,",((int16_t*)(&y[i]))[0],((int16_t *)(&y[i]))[1]); + printf("\n"); + + */ + + for (i=0;i<32;i++) { + ((int16_t*)x)[i] = (int16_t)((taus()&0xffff))>>5; + } + memset((void*)&y[0],0,16*4); + idft16((int16_t *)x,(int16_t *)y); + printf("\n\n16-point\n"); + printf("X: "); + for (i=0;i<4;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]); + printf("\nY:"); + + for (i=0;i<4;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]); + printf("\n"); + + memset((void*)&x[0],0,2048*4); + + for (i=0; i<2048; i+=4) { + ((int16_t*)x)[i<<1] = 1024; + ((int16_t*)x)[1+(i<<1)] = 0; + ((int16_t*)x)[2+(i<<1)] = 0; + ((int16_t*)x)[3+(i<<1)] = 1024; + ((int16_t*)x)[4+(i<<1)] = -1024; + ((int16_t*)x)[5+(i<<1)] = 0; + ((int16_t*)x)[6+(i<<1)] = 0; + ((int16_t*)x)[7+(i<<1)] = -1024; + } + /* + for (i=0; i<2048; i+=2) { + ((int16_t*)x)[i<<1] = 1024; + ((int16_t*)x)[1+(i<<1)] = 0; + ((int16_t*)x)[2+(i<<1)] = -1024; + ((int16_t*)x)[3+(i<<1)] = 0; + } + + for (i=0;i<2048*2;i++) { + ((int16_t*)x)[i] = i/2;//(int16_t)((taus()&0xffff))>>5; + } + */ + memset((void*)&x[0],0,64*sizeof(int32_t)); + for (i=2;i<36;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=(128-36);i<128;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + idft64((int16_t *)x,(int16_t *)y,1); + + + printf("64-point\n"); + printf("X: "); + for (i=0;i<8;i++) + print_shorts256("",((int16_t *)x)+(i*16)); + + printf("\nY:"); + + for (i=0;i<8;i++) + print_shorts256("",((int16_t *)y)+(i*16)); + printf("\n"); + + + + + idft64((int16_t *)x,(int16_t *)y,1); + idft64((int16_t *)x,(int16_t *)y,1); + idft64((int16_t *)x,(int16_t *)y,1); + reset_meas(&ts); + + for (i=0; i<10000000; i++) { + start_meas(&ts); + idft64((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + + } + /* + printf("\n\n64-point (%f cycles, #trials %d)\n",(double)ts.diff/(double)ts.trials,ts.trials); + // LOG_M("x64.m","x64",x,64,1,1); + LOG_M("y64.m","y64",y,64,1,1); + LOG_M("x64.m","x64",x,64,1,1); + */ +/* + printf("X: "); + for (i=0;i<16;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]); + printf("\nY:"); + + for (i=0;i<16;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]); + printf("\n"); + + idft64((int16_t*)y,(int16_t*)x,1); + printf("X: "); + for (i=0;i<16;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]); + + for (i=0; i<256; i++) { + ((int16_t*)x)[i] = (int16_t)((taus()&0xffff))>>5; + } +*/ + + memset((void*)&x[0],0,128*4); + for (i=2;i<72;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=(256-72);i<256;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + idft128((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n128-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y128.m","y128",y,128,1,1); + LOG_M("x128.m","x128",x,128,1,1); +/* + printf("X: "); + for (i=0;i<32;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]); + printf("\nY:"); + + for (i=0;i<32;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]); + printf("\n"); +*/ + + /* + for (i=0; i<512; i++) { + ((int16_t*)x)[i] = (int16_t)((taus()&0xffff))>>5; + } + + memset((void*)&y[0],0,256*4); + */ + memset((void*)&x[0],0,256*sizeof(int32_t)); + for (i=2;i<144;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=(512-144);i<512;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + idft256((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n256-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y256.m","y256",y,256,1,1); + LOG_M("x256.m","x256",x,256,1,1); + + memset((void*)&x[0],0,512*sizeof(int32_t)); + for (i=2;i<302;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=(1024-300);i<1024;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft512((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n512-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y512.m","y512",y,512,1,1); + LOG_M("x512.m","x512",x,512,1,1); + /* + printf("X: "); + for (i=0;i<64;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&x[i])[0],((int16_t *)&x[i])[1],((int16_t*)&x[i])[2],((int16_t *)&x[i])[3],((int16_t*)&x[i])[4],((int16_t*)&x[i])[5],((int16_t*)&x[i])[6],((int16_t*)&x[i])[7]); + printf("\nY:"); + + for (i=0;i<64;i++) + printf("%d,%d,%d,%d,%d,%d,%d,%d,",((int16_t*)&y[i])[0],((int16_t *)&y[i])[1],((int16_t*)&y[i])[2],((int16_t *)&y[i])[3],((int16_t*)&y[i])[4],((int16_t *)&y[i])[5],((int16_t*)&y[i])[6],((int16_t *)&y[i])[7]); + printf("\n"); + */ + + memset((void*)x,0,1024*sizeof(int32_t)); + for (i=2;i<602;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*724;i<2048;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + idft1024((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n1024-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y1024.m","y1024",y,1024,1,1); + LOG_M("x1024.m","x1024",x,1024,1,1); + + + memset((void*)x,0,1536*sizeof(int32_t)); + for (i=2;i<1202;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(1536-600);i<3072;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + idft1536((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + write_output("y1536.m","y1536",y,1536,1,1); + write_output("x1536.m","x1536",x,1536,1,1); + + + memset((void*)x,0,2048*sizeof(int32_t)); + for (i=2;i<1202;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(2048-600);i<4096;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + dft2048((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n2048-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y2048.m","y2048",y,2048,1,1); + LOG_M("x2048.m","x2048",x,2048,1,1); + +// NR 80Mhz, 217 PRB, 3/4 sampling + memset((void*)x, 0, 3072*sizeof(int32_t)); + for (i=2;i<2506;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(3072-1252);i<6144;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + idft3072((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n3072-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + write_output("y3072.m","y3072",y,3072,1,1); + write_output("x3072.m","x3072",x,3072,1,1); + + + memset((void*)x,0,4096*sizeof(int32_t)); + for (i=0;i<2400;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(4096-1200);i<8192;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + idft4096((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n4096-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y4096.m","y4096",y,4096,1,1); + LOG_M("x4096.m","x4096",x,4096,1,1); + + dft4096((int16_t *)y,(int16_t *)x2,1); + LOG_M("x4096_2.m","x4096_2",x2,4096,1,1); + +// NR 160Mhz, 434 PRB, 3/4 sampling + memset((void*)x, 0, 6144*sizeof(int32_t)); + for (i=2;i<5010;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(6144-2504);i<12288;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + + reset_meas(&ts); + + for (i=0; i<10000; i++) { + start_meas(&ts); + idft6144((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n6144-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + write_output("y6144.m","y6144",y,6144,1,1); + write_output("x6144.m","x6144",x,6144,1,1); + + memset((void*)x,0,8192*sizeof(int32_t)); + for (i=2;i<4802;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(8192-2400);i<16384;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft8192((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y8192.m","y8192",y,8192,1,1); + LOG_M("x8192.m","x8192",x,8192,1,1); + + memset((void*)x,0,1536*sizeof(int32_t)); + for (i=2;i<1202;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(1536-600);i<3072;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft1536((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y1536.m","y1536",y,1536,1,1); + LOG_M("x1536.m","x1536",x,1536,1,1); + + printf("\n\n1536-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y8192.m","y8192",y,8192,1,1); + LOG_M("x8192.m","x8192",x,8192,1,1); + + memset((void*)x,0,3072*sizeof(int32_t)); + for (i=2;i<1202;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(3072-600);i<3072;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft3072((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n3072-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y3072.m","y3072",y,3072,1,1); + LOG_M("x3072.m","x3072",x,3072,1,1); + + memset((void*)x,0,6144*sizeof(int32_t)); + for (i=2;i<4802;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(6144-2400);i<12288;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft6144((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n6144-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y6144.m","y6144",y,6144,1,1); + LOG_M("x6144.m","x6144",x,6144,1,1); + + memset((void*)x,0,12288*sizeof(int32_t)); + for (i=2;i<9602;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(12288-4800);i<24576;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft12288((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n12288-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y12288.m","y12288",y,12288,1,1); + LOG_M("x12288.m","x12288",x,12288,1,1); + + memset((void*)x,0,18432*sizeof(int32_t)); + for (i=2;i<14402;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(18432-7200);i<36864;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft18432((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n18432-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y18432.m","y18432",y,18432,1,1); + LOG_M("x18432.m","x18432",x,18432,1,1); + + memset((void*)x,0,24576*sizeof(int32_t)); + for (i=2;i<19202;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + for (i=2*(24576-19200);i<49152;i++) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + } + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + idft24576((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n24576-point(%f cycles)\n",(double)ts.diff/(double)ts.trials); + LOG_M("y24576.m","y24576",y,24576,1,1); + LOG_M("x24576.m","x24576",x,24576,1,1); + + int dftsizes[33]={24,36,48,60,72,96,108,120,144,180,192,216,240,288,300,324,360,384,432,480,540,576,600,648,720,768,864,900,960,972,1080,1152,1200}; + void (*dft)(int16_t *x,int16_t *y,uint8_t scale)[33] = {dft24,dft36,dft48,dft60,dft72,dft96,dft108,dft120,dft144,dft180,dft192,dft216,dft240,dft288,dft300,dft324,dft360,dft384,dft432,dft480,dft540,dft576,dft600,dft648,dft720,dft768,dft864,dft900,dft960,dft972,dft1080,dft1152,dft1200}; + for (int n=0;n<33;n++) { + // 4xN-point DFT + memset((void*)x,0,dftsizes[n]*8*sizeof(int16_t)); + for (i=0;i<dftsizes[n]*8;i+=8) { + if ((taus() & 1)==0) + ((int16_t*)x)[i] = 364; + else + ((int16_t*)x)[i] = -364; + if ((taus() & 1)==0) + ((int16_t*)x)[i+1] = 364; + else + ((int16_t*)x)[i+1] = -364; + } + + reset_meas(&ts); + for (i=0; i<10000; i++) { + start_meas(&ts); + (dft[n])((int16_t *)x,(int16_t *)y,1); + stop_meas(&ts); + } + + printf("\n\n4x%d-point(%f cycles)\n",dftsizes[n],(double)ts.diff/(double)ts.trials); + char ystr[5],xstr[5],ystr2[5],xstr2[5]; + sprintf(ystr,"y%d.m",dftsizes[n]); + sprintf(xstr,"x%d.m",dftsizes[n]); + sprintf(ystr2,"y%d",dftsizes[n]); + sprintf(xstr2,"x%d",dftsizes[n]); + LOG_M(ystr,ystr2,y,dftsizes[n]*4,1,1); + LOG_M(xstr,xstr2,x,dftsizes[n]*4,1,1); + } + + + return(0); +} + + +#endif diff --git a/openair2/LAYER2/rlc_v2/TODO b/openair2/LAYER2/rlc_v2/TODO new file mode 100644 index 0000000000000000000000000000000000000000..0778d4320b888ac2cf9b695f0e3129863656fa2a --- /dev/null +++ b/openair2/LAYER2/rlc_v2/TODO @@ -0,0 +1,18 @@ +RLC AM +====== + +- 36.322 5.4 Re-establishment procedure + when possible, reassemble RLC SDUs from any byte segments of AMD PDUs + with SN < VR(MR) in the receiving side, remove RLC headers when doing + so and deliver all reassembled RLC SDUs to upper layer in ascending order + of the RLC SN, if not delivered before; + +- 36.322 5.2.3 Status reporting + delay triggering the STATUS report until x < VR(MS) or x >= VR(MR) + +- 36.322 5.1.3.2.3 Actions when a RLC data PDU is placed in the reception + buffer + [...] and in-sequence byte segments of the AMD PDU with SN = VR(R) [...] + +- use SOstart/SOend in NACK reporting, do not NACK full PDU if + parts of it have been received diff --git a/openair2/LAYER2/rlc_v2/asn1_utils.c b/openair2/LAYER2/rlc_v2/asn1_utils.c new file mode 100644 index 0000000000000000000000000000000000000000..46f7d90da57d2cb7d15cee8c60614a49a832e955 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/asn1_utils.c @@ -0,0 +1,129 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#include "rlc.h" + +int decode_t_reordering(int v) +{ + static int tab[32] = { + 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, + 90, 95, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 1600 + }; + + if (v < 0 || v > 31) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + return tab[v]; +} + +int decode_t_status_prohibit(int v) +{ + static int tab[62] = { + 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, + 95, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, + 170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, 230, 235, 240, + 245, 250, 300, 350, 400, 450, 500, 800, 1000, 1200, 1600, 2000, 2400 + }; + + if (v < 0 || v > 61) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + return tab[v]; +} + +int decode_t_poll_retransmit(int v) +{ + static int tab[59] = { + 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, + 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170, + 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, 230, 235, 240, 245, + 250, 300, 350, 400, 450, 500, 800, 1000, 2000, 4000 + }; + + if (v < 0 || v > 58) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + return tab[v]; +} + +int decode_poll_pdu(int v) +{ + static int tab[8] = { + 4, 8, 16, 32, 64, 128, 256, -1 /* -1 means infinity */ + }; + + if (v < 0 || v > 7) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + return tab[v]; +} + +int decode_poll_byte(int v) +{ + static int tab[15] = { + 25, 50, 75, 100, 125, 250, 375, 500, 750, 1000, 1250, 1500, 2000, 3000, + -1 /* -1 means infinity */ + }; + + if (v < 0 || v > 14) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + if (tab[v] == -1) return -1; + return tab[v] * 1024; +} + +int decode_max_retx_threshold(int v) +{ + static int tab[8] = { + 1, 2, 3, 4, 6, 8, 16, 32 + }; + + if (v < 0 || v > 7) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + return tab[v]; +} + +int decode_sn_field_length(int v) +{ + static int tab[2] = { + 5, 10 + }; + + if (v < 0 || v > 1) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + return tab[v]; +} diff --git a/openair2/LAYER2/rlc_v2/asn1_utils.h b/openair2/LAYER2/rlc_v2/asn1_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..61394c9c6991ccdc32722bfb039bfdac82a741ae --- /dev/null +++ b/openair2/LAYER2/rlc_v2/asn1_utils.h @@ -0,0 +1,33 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#ifndef _ASN1_UTILS_H_ +#define _ASN1_UTILS_H_ + +int decode_t_reordering(int v); +int decode_t_status_prohibit(int v); +int decode_t_poll_retransmit(int v); +int decode_poll_pdu(int v); +int decode_poll_byte(int v); +int decode_max_retx_threshold(int v); +int decode_sn_field_length(int v); + +#endif /* _ASN1_UTILS_H_ */ diff --git a/openair2/LAYER2/rlc_v2/rlc_entity.c b/openair2/LAYER2/rlc_v2/rlc_entity.c new file mode 100644 index 0000000000000000000000000000000000000000..d774e2b7e17788f71a0edc178295f1a682488469 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_entity.c @@ -0,0 +1,144 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#include "rlc_entity.h" + +#include <stdlib.h> + +#include "rlc_entity_am.h" +#include "rlc_entity_um.h" + +#include "LOG/log.h" + +rlc_entity_t *new_rlc_entity_am( + int rx_maxsize, + int tx_maxsize, + void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity, + char *buf, int size), + void *deliver_sdu_data, + void (*sdu_successful_delivery)(void *sdu_successful_delivery_data, + struct rlc_entity_t *entity, + int sdu_id), + void *sdu_successful_delivery_data, + void (*max_retx_reached)(void *max_retx_reached_data, + struct rlc_entity_t *entity), + void *max_retx_reached_data, + int t_reordering, + int t_status_prohibit, + int t_poll_retransmit, + int poll_pdu, + int poll_byte, + int max_retx_threshold) +{ + rlc_entity_am_t *ret; + + ret = calloc(1, sizeof(rlc_entity_am_t)); + if (ret == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + ret->common.recv_pdu = rlc_entity_am_recv_pdu; + ret->common.buffer_status = rlc_entity_am_buffer_status; + ret->common.generate_pdu = rlc_entity_am_generate_pdu; + + ret->common.recv_sdu = rlc_entity_am_recv_sdu; + + ret->common.set_time = rlc_entity_am_set_time; + + ret->common.discard_sdu = rlc_entity_am_discard_sdu; + + ret->common.reestablishment = rlc_entity_am_reestablishment; + + ret->common.delete = rlc_entity_am_delete; + + ret->common.deliver_sdu = deliver_sdu; + ret->common.deliver_sdu_data = deliver_sdu_data; + + ret->common.sdu_successful_delivery = sdu_successful_delivery; + ret->common.sdu_successful_delivery_data = sdu_successful_delivery_data; + + ret->common.max_retx_reached = max_retx_reached; + ret->common.max_retx_reached_data = max_retx_reached_data; + + ret->rx_maxsize = rx_maxsize; + ret->tx_maxsize = tx_maxsize; + ret->t_reordering = t_reordering; + ret->t_status_prohibit = t_status_prohibit; + ret->t_poll_retransmit = t_poll_retransmit; + ret->poll_pdu = poll_pdu; + ret->poll_byte = poll_byte; + ret->max_retx_threshold = max_retx_threshold; + + return (rlc_entity_t *)ret; +} + +rlc_entity_t *new_rlc_entity_um( + int rx_maxsize, + int tx_maxsize, + void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity, + char *buf, int size), + void *deliver_sdu_data, + int t_reordering, + int sn_field_length) +{ + rlc_entity_um_t *ret; + + ret = calloc(1, sizeof(rlc_entity_um_t)); + if (ret == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + ret->common.recv_pdu = rlc_entity_um_recv_pdu; + ret->common.buffer_status = rlc_entity_um_buffer_status; + ret->common.generate_pdu = rlc_entity_um_generate_pdu; + + ret->common.recv_sdu = rlc_entity_um_recv_sdu; + + ret->common.set_time = rlc_entity_um_set_time; + + ret->common.discard_sdu = rlc_entity_um_discard_sdu; + + ret->common.reestablishment = rlc_entity_um_reestablishment; + + ret->common.delete = rlc_entity_um_delete; + + ret->common.deliver_sdu = deliver_sdu; + ret->common.deliver_sdu_data = deliver_sdu_data; + + ret->sn_field_length = sn_field_length; + ret->rx_maxsize = rx_maxsize; + ret->tx_maxsize = tx_maxsize; + ret->t_reordering = t_reordering; + + if (sn_field_length == 5) + ret->sn_modulus = 32; + else if (sn_field_length == 10) + ret->sn_modulus = 1024; + else { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + ret->window_size = ret->sn_modulus / 2; + + return (rlc_entity_t *)ret; +} diff --git a/openair2/LAYER2/rlc_v2/rlc_entity.h b/openair2/LAYER2/rlc_v2/rlc_entity.h new file mode 100644 index 0000000000000000000000000000000000000000..c9b35204f03e92d305dc0bba1b40e4d36bd8964e --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_entity.h @@ -0,0 +1,97 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#ifndef _RLC_ENTITY_H_ +#define _RLC_ENTITY_H_ + +#include <stdint.h> + +#define SDU_MAX 16000 /* maximum PDCP SDU size is 8188, let's take more */ + +typedef struct { + int status_size; + int tx_size; + int retx_size; +} rlc_entity_buffer_status_t; + +typedef struct rlc_entity_t { + /* functions provided by the RLC module */ + void (*recv_pdu)(struct rlc_entity_t *entity, char *buffer, int size); + rlc_entity_buffer_status_t (*buffer_status)( + struct rlc_entity_t *entity, int maxsize); + int (*generate_pdu)(struct rlc_entity_t *entity, char *buffer, int size); + + void (*recv_sdu)(struct rlc_entity_t *entity, char *buffer, int size, + int sdu_id); + + void (*set_time)(struct rlc_entity_t *entity, uint64_t now); + + void (*discard_sdu)(struct rlc_entity_t *entity, int sdu_id); + + void (*reestablishment)(struct rlc_entity_t *entity); + + void (*delete)(struct rlc_entity_t *entity); + + /* callbacks provided to the RLC module */ + void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity, + char *buf, int size); + void *deliver_sdu_data; + + void (*sdu_successful_delivery)(void *sdu_successful_delivery_data, + struct rlc_entity_t *entity, + int sdu_id); + void *sdu_successful_delivery_data; + + void (*max_retx_reached)(void *max_retx_reached_data, + struct rlc_entity_t *entity); + void *max_retx_reached_data; +} rlc_entity_t; + +rlc_entity_t *new_rlc_entity_am( + int rx_maxsize, + int tx_maxsize, + void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity, + char *buf, int size), + void *deliver_sdu_data, + void (*sdu_successful_delivery)(void *sdu_successful_delivery_data, + struct rlc_entity_t *entity, + int sdu_id), + void *sdu_successful_delivery_data, + void (*max_retx_reached)(void *max_retx_reached_data, + struct rlc_entity_t *entity), + void *max_retx_reached_data, + int t_reordering, + int t_status_prohibit, + int t_poll_retransmit, + int poll_pdu, + int poll_byte, + int max_retx_threshold); + +rlc_entity_t *new_rlc_entity_um( + int rx_maxsize, + int tx_maxsize, + void (*deliver_sdu)(void *deliver_sdu_data, struct rlc_entity_t *entity, + char *buf, int size), + void *deliver_sdu_data, + int t_reordering, + int sn_field_length); + +#endif /* _RLC_ENTITY_H_ */ diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_am.c b/openair2/LAYER2/rlc_v2/rlc_entity_am.c new file mode 100644 index 0000000000000000000000000000000000000000..b4f3d2f47c86508d628edd0c5468a4ac96269004 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_entity_am.c @@ -0,0 +1,1700 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#include "rlc_entity_am.h" +#include "rlc_pdu.h" + +#include <stdlib.h> +#include <string.h> + +#include "LOG/log.h" + +/*************************************************************************/ +/* PDU RX functions */ +/*************************************************************************/ + +static int modulus_rx(rlc_entity_am_t *entity, int a) +{ + /* as per 36.322 7.1, modulus base is vr(r) and modulus is 1024 for rx */ + int r = a - entity->vr_r; + if (r < 0) r += 1024; + return r; +} + +/* used in both RX and TX processing */ +static int modulus_tx(rlc_entity_am_t *entity, int a) +{ + /* as per 36.322 7.1, modulus base is vt(a) and modulus is 1024 for tx */ + int r = a - entity->vt_a; + if (r < 0) r += 1024; + return r; +} + +static int sn_in_recv_window(void *_entity, int sn) +{ + rlc_entity_am_t *entity = _entity; + int mod_sn = modulus_rx(entity, sn); + /* we simplify vr(r)<=sn<vr(mr). base is vr(r) and vr(mr) = vr(r) + 512 */ + return mod_sn < 512; +} + +static int sn_compare_rx(void *_entity, int a, int b) +{ + rlc_entity_am_t *entity = _entity; + return modulus_rx(entity, a) - modulus_rx(entity, b); +} + +/* used in both RX and TX processing */ +static int sn_compare_tx(void *_entity, int a, int b) +{ + rlc_entity_am_t *entity = _entity; + return modulus_tx(entity, a) - modulus_tx(entity, b); +} + +static int segment_already_received(rlc_entity_am_t *entity, + int sn, int so, int data_size) +{ + /* TODO: optimize */ + rlc_rx_pdu_segment_t *l = entity->rx_list; + + while (l != NULL) { + if (l->sn == sn && l->so <= so && + l->so + l->size - l->data_offset >= so + data_size) + return 1; + l = l->next; + } + + return 0; +} + +static int rlc_am_segment_full(rlc_entity_am_t *entity, int sn) +{ + rlc_rx_pdu_segment_t *l = entity->rx_list; + int last_byte; + int new_last_byte; + + last_byte = -1; + while (l != NULL) { + if (l->sn == sn) + break; + l = l->next; + } + while (l != NULL && l->sn == sn) { + if (l->so > last_byte + 1) + return 0; + if (l->is_last) + return 1; + new_last_byte = l->so + l->size - l->data_offset - 1; + if (new_last_byte > last_byte) + last_byte = new_last_byte; + l = l->next; + } + return 0; +} + +/* return 1 if the new segment has some data to consume, 0 if not */ +static int rlc_am_reassemble_next_segment(rlc_am_reassemble_t *r) +{ + int rf; + int sn; + + r->sdu_offset = r->start->data_offset; + + rlc_pdu_decoder_init(&r->dec, r->start->data, r->start->size); + + rlc_pdu_decoder_get_bits(&r->dec, 1); /* dc */ + rf = rlc_pdu_decoder_get_bits(&r->dec, 1); + rlc_pdu_decoder_get_bits(&r->dec, 1); /* p */ + r->fi = rlc_pdu_decoder_get_bits(&r->dec, 2); + r->e = rlc_pdu_decoder_get_bits(&r->dec, 1); + sn = rlc_pdu_decoder_get_bits(&r->dec, 10); + if (rf) { + rlc_pdu_decoder_get_bits(&r->dec, 1); /* lsf */ + r->so = rlc_pdu_decoder_get_bits(&r->dec, 15); + } else { + r->so = 0; + } + + if (r->e) { + r->e = rlc_pdu_decoder_get_bits(&r->dec, 1); + r->sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11); + } else + r->sdu_len = r->start->size - r->sdu_offset; + + /* new sn: read starts from PDU byte 0 */ + if (sn != r->sn) { + r->pdu_byte = 0; + r->sn = sn; + } + + r->data_pos = r->start->data_offset + r->pdu_byte - r->so; + + /* TODO: remove this check, it is useless, data has been validated before */ + if (r->pdu_byte < r->so) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + /* if pdu_byte is not in [so .. so+len-1] then all bytes from this segment + * have already been consumed + */ + if (r->pdu_byte >= r->so + r->start->size - r->start->data_offset) + return 0; + + /* go to correct SDU */ + while (r->pdu_byte >= r->so + (r->sdu_offset - r->start->data_offset) + r->sdu_len) { + r->sdu_offset += r->sdu_len; + if (r->e) { + r->e = rlc_pdu_decoder_get_bits(&r->dec, 1); + r->sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11); + } else { + r->sdu_len = r->start->size - r->sdu_offset; + } + } + + return 1; +} + +static void rlc_am_reassemble(rlc_entity_am_t *entity) +{ + rlc_am_reassemble_t *r = &entity->reassemble; + + while (r->start != NULL) { + if (r->sdu_pos >= SDU_MAX) { + /* TODO: proper error handling (discard PDUs with current sn from + * reassembly queue? something else?) + */ + LOG_E(RLC, "%s:%d:%s: bad RLC PDU\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + r->sdu[r->sdu_pos] = r->start->data[r->data_pos]; + r->sdu_pos++; + r->data_pos++; + r->pdu_byte++; + if (r->data_pos == r->sdu_offset + r->sdu_len) { + /* all bytes of SDU are consumed, check if SDU is fully there. + * It is if the data pointer is not at the end of the PDU segment + * or if 'fi' & 1 == 0 + */ + if (r->data_pos != r->start->size || + (r->fi & 1) == 0) { + /* SDU is full - deliver to higher layer */ + entity->common.deliver_sdu(entity->common.deliver_sdu_data, + (rlc_entity_t *)entity, + r->sdu, r->sdu_pos); + r->sdu_pos = 0; + } + if (r->data_pos != r->start->size) { + /* not at the end, process next SDU */ + r->sdu_offset += r->sdu_len; + if (r->e) { + r->e = rlc_pdu_decoder_get_bits(&r->dec, 1); + r->sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11); + } else + r->sdu_len = r->start->size - r->sdu_offset; + } else { + /* all bytes are consumend, go to next segment not already fully + * processed, if any + */ + do { + rlc_rx_pdu_segment_t *e = r->start; + entity->rx_size -= e->size; + r->start = r->start->next; + rlc_rx_free_pdu_segment(e); + } while (r->start != NULL && !rlc_am_reassemble_next_segment(r)); + } + } + } +} + +static void rlc_am_reception_actions(rlc_entity_am_t *entity, + rlc_rx_pdu_segment_t *pdu_segment) +{ + int x = pdu_segment->sn; + int vr_ms; + int vr_r; + + if (modulus_rx(entity, x) >= modulus_rx(entity, entity->vr_h)) + entity->vr_h = (x + 1) % 1024; + + vr_ms = entity->vr_ms; + while (rlc_am_segment_full(entity, vr_ms)) + vr_ms = (vr_ms + 1) % 1024; + entity->vr_ms = vr_ms; + + if (x == entity->vr_r) { + vr_r = entity->vr_r; + while (rlc_am_segment_full(entity, vr_r)) { + /* move segments with sn=vr(r) from rx list to end of reassembly list */ + while (entity->rx_list != NULL && entity->rx_list->sn == vr_r) { + rlc_rx_pdu_segment_t *e = entity->rx_list; + entity->rx_list = e->next; + e->next = NULL; + if (entity->reassemble.start == NULL) { + entity->reassemble.start = e; + /* the list was empty, we need to init decoder */ + entity->reassemble.sn = -1; + if (!rlc_am_reassemble_next_segment(&entity->reassemble)) { + /* TODO: proper error recovery (or remove the test, it should not happen) */ + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + } else { + entity->reassemble.end->next = e; + } + entity->reassemble.end = e; + } + + /* update vr_r */ + vr_r = (vr_r + 1) % 1024; + } + entity->vr_r = vr_r; + } + + rlc_am_reassemble(entity); + + if (entity->t_reordering_start) { + int vr_x = entity->vr_x; + if (vr_x < entity->vr_r) vr_x += 1024; + if (vr_x == entity->vr_r || vr_x > entity->vr_r + 512) + entity->t_reordering_start = 0; + } + + if (entity->t_reordering_start == 0) { + if (sn_compare_rx(entity, entity->vr_h, entity->vr_r) > 0) { + entity->t_reordering_start = entity->t_current; + entity->vr_x = entity->vr_h; + } + } +} + +static void process_received_ack(rlc_entity_am_t *entity, int sn) +{ + rlc_tx_pdu_segment_t head; + rlc_tx_pdu_segment_t *cur; + rlc_tx_pdu_segment_t *prev; + + /* put all PDUs from wait and retransmit lists with SN < 'sn' to ack_list */ + + /* process wait list */ + head.next = entity->wait_list; + prev = &head; + cur = entity->wait_list; + while (cur != NULL) { + if (sn_compare_tx(entity, cur->sn, sn) < 0) { + /* remove from wait list */ + prev->next = cur->next; + /* put the PDU in the ack list */ + entity->ack_list = rlc_tx_pdu_list_add(sn_compare_tx, entity, + entity->ack_list, cur); + cur = prev->next; + } else { + prev = cur; + cur = cur->next; + } + } + entity->wait_list = head.next; + + /* process retransmit list */ + head.next = entity->retransmit_list; + prev = &head; + cur = entity->retransmit_list; + while (cur != NULL) { + if (sn_compare_tx(entity, cur->sn, sn) < 0) { + /* dec. retx_count in case we put this segment back in retransmit list + * in 'process_received_nack' + */ + cur->retx_count--; + /* remove from retransmit list */ + prev->next = cur->next; + /* put the PDU in the ack list */ + entity->ack_list = rlc_tx_pdu_list_add(sn_compare_tx, entity, + entity->ack_list, cur); + cur = prev->next; + } else { + prev = cur; + cur = cur->next; + } + } + entity->retransmit_list = head.next; + +} + +static void consider_retransmission(rlc_entity_am_t *entity, + rlc_tx_pdu_segment_t *cur) +{ + cur->retx_count++; + + /* let's report max RETX reached for all retx_count >= max_retx_threshold + * (specs say to report if retx_count == max_retx_threshold). + * Upper layers should react (radio link failure), so no big deal actually. + */ + if (cur->retx_count >= entity->max_retx_threshold) { + entity->common.max_retx_reached(entity->common.max_retx_reached_data, + (rlc_entity_t *)entity); + } + + /* let's put in retransmit list even if we are over max_retx_threshold. + * upper layers should deal with this condition, internally it's better + * for the RLC code to keep going with this segment (we only remove + * a segment that was ACKed) + */ + entity->retransmit_list = rlc_tx_pdu_list_add(sn_compare_tx, entity, + entity->retransmit_list, cur); +} + +static int so_overlap(int s1, int e1, int s2, int e2) +{ + if (s1 < s2) { + if (e1 == -1 || e1 >= s2) + return 1; + return 0; + } + if (e2 == -1 || s1 <= e2) + return 1; + return 0; +} + +static void process_received_nack(rlc_entity_am_t *entity, int sn, + int so_start, int so_end) +{ + /* put all PDU segments with SN == 'sn' and with an overlapping so start/end + * to the retransmit list + * source lists are ack list and wait list. + * Not sure if we should consider wait list, isn't the other end supposed + * to only NACK SNs lower than the ACK SN sent in the status PDU, in which + * case all potential PDU segments should all be in ack list when calling + * the current function? in doubt let's accept anything and thus process + * also wait list. + */ + rlc_tx_pdu_segment_t head; + rlc_tx_pdu_segment_t *cur; + rlc_tx_pdu_segment_t *prev; + + /* check that VT(A) <= sn < VT(S) */ + if (!(sn_compare_tx(entity, entity->vt_a, sn) <= 0 && + sn_compare_tx(entity, sn, entity->vt_s) < 0)) + return; + + /* process wait list */ + head.next = entity->wait_list; + prev = &head; + cur = entity->wait_list; + while (cur != NULL) { + if (cur->sn == sn && + so_overlap(so_start, so_end, cur->so, cur->so + cur->data_size - 1)) { + /* remove from wait list */ + prev->next = cur->next; + /* consider the PDU segment for retransmission */ + consider_retransmission(entity, cur); + cur = prev->next; + } else { + prev = cur; + cur = cur->next; + } + } + entity->wait_list = head.next; + + /* process ack list */ + head.next = entity->ack_list; + prev = &head; + cur = entity->ack_list; + while (cur != NULL) { + if (cur->sn == sn && + so_overlap(so_start, so_end, cur->so, cur->so + cur->data_size - 1)) { + /* remove from ack list */ + prev->next = cur->next; + /* consider the PDU segment for retransmission */ + consider_retransmission(entity, cur); + cur = prev->next; + } else { + prev = cur; + cur = cur->next; + } + } + entity->ack_list = head.next; +} + +int tx_pdu_in_ack_list_full(rlc_tx_pdu_segment_t *pdu) +{ + int sn = pdu->sn; + int last_byte = -1; + int new_last_byte; + int is_last_seen = 0; + + while (pdu != NULL && pdu->sn == sn) { + if (pdu->so > last_byte + 1) return 0; + if (pdu->is_last) + is_last_seen = 1; + new_last_byte = pdu->so + pdu->data_size - 1; + if (new_last_byte > last_byte) + last_byte = new_last_byte; + pdu = pdu->next; + } + + return is_last_seen == 1; +} + +int tx_pdu_in_ack_list_size(rlc_tx_pdu_segment_t *pdu) +{ + int sn = pdu->sn; + int ret = 0; + + while (pdu != NULL && pdu->sn == sn) { + ret += pdu->data_size; + pdu = pdu->next; + } + + return ret; +} + +void ack_sdu_bytes(rlc_sdu_t *start, int start_byte, int sdu_size) +{ + rlc_sdu_t *cur = start; + int remaining_size = sdu_size; + + while (remaining_size) { + int cursize = cur->size - start_byte; + if (cursize > remaining_size) + cursize = remaining_size; + cur->acked_bytes += cursize; + remaining_size -= cursize; + /* start_byte is only meaningful for the 1st SDU, then it is 0 */ + start_byte = 0; + cur = cur->next; + } +} + +rlc_tx_pdu_segment_t *tx_list_remove_sn(rlc_tx_pdu_segment_t *list, int sn) +{ + rlc_tx_pdu_segment_t head; + rlc_tx_pdu_segment_t *cur; + rlc_tx_pdu_segment_t *prev; + + head.next = list; + cur = list; + prev = &head; + + while (cur != NULL) { + if (cur->sn == sn) { + prev->next = cur->next; + rlc_tx_free_pdu(cur); + cur = prev->next; + } else { + prev = cur; + cur = cur->next; + } + } + + return head.next; +} + +void cleanup_sdu_list(rlc_entity_am_t *entity) +{ + rlc_sdu_t head; + rlc_sdu_t *cur; + rlc_sdu_t *prev; + + /* remove fully acked SDUs, indicate successful delivery to upper layer */ + head.next = entity->tx_list; + cur = entity->tx_list; + prev = &head; + + while (cur != NULL) { + if (cur->acked_bytes == cur->size) { + prev->next = cur->next; + entity->tx_size -= cur->size; + entity->common.sdu_successful_delivery( + entity->common.sdu_successful_delivery_data, + (rlc_entity_t *)entity, cur->upper_layer_id); + rlc_free_sdu(cur); + entity->tx_end = prev; + cur = prev->next; + } else { + entity->tx_end = cur; + prev = cur; + cur = cur->next; + } + } + + entity->tx_list = head.next; + + /* if tx_end == head then it means that the list is now empty */ + if (entity->tx_end == &head) + entity->tx_end = NULL; +} + +static void finalize_ack_nack_processing(rlc_entity_am_t *entity) +{ + int sn; + rlc_tx_pdu_segment_t *cur = entity->ack_list; + int pdu_size; + + if (cur == NULL) + return; + + /* Remove full PDUs and ack the SDU bytes they cover. Start from SN == VT(A) + * and process increasing SNs until end of list or missing ACK or PDU not + * fully ACKed. + */ + while (cur != NULL && cur->sn == entity->vt_a && + tx_pdu_in_ack_list_full(cur)) { + sn = cur->sn; + entity->vt_a = (entity->vt_a + 1) % 1024; + pdu_size = tx_pdu_in_ack_list_size(cur); + ack_sdu_bytes(cur->start_sdu, cur->sdu_start_byte, pdu_size); + while (cur != NULL && cur->sn == sn) + cur = cur->next; + entity->ack_list = tx_list_remove_sn(entity->ack_list, sn); + } + + cleanup_sdu_list(entity); +} + +void rlc_entity_am_recv_pdu(rlc_entity_t *_entity, char *buffer, int size) +{ +#define R(d) do { if (rlc_pdu_decoder_in_error(&d)) goto err; } while (0) + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + rlc_pdu_decoder_t decoder; + rlc_pdu_decoder_t data_decoder; + rlc_pdu_decoder_t control_decoder; + + int dc; + int rf; + int p = 0; + int fi; + int e; + int sn; + int lsf; + int so; + + int cpt; + int e1; + int e2; + int ack_sn; + int nack_sn; + int so_start; + int so_end; + int control_e1; + int control_e2; + + int data_e; + int data_li; + + int packet_count; + int data_size; + int data_start; + int indicated_data_size; + + rlc_rx_pdu_segment_t *pdu_segment; + + rlc_pdu_decoder_init(&decoder, buffer, size); + dc = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + if (dc == 0) goto control; + + /* data PDU */ + rf = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + p = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + fi = rlc_pdu_decoder_get_bits(&decoder, 2); R(decoder); + e = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + sn = rlc_pdu_decoder_get_bits(&decoder, 10); R(decoder); + + /* dicard PDU if rx buffer is full */ + if (entity->rx_size + size > entity->rx_maxsize) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, RX buffer full\n", + __FILE__, __LINE__, __FUNCTION__); + goto discard; + } + + if (!sn_in_recv_window(entity, sn)) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, sn out of window (sn %d vr_r %d)\n", + __FILE__, __LINE__, __FUNCTION__, + sn, entity->vr_r); + goto discard; + } + + if (rf) { + lsf = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + so = rlc_pdu_decoder_get_bits(&decoder, 15); R(decoder); + } else { + lsf = 1; + so = 0; + } + + packet_count = 1; + + /* go to start of data */ + indicated_data_size = 0; + data_decoder = decoder; + data_e = e; + while (data_e) { + data_e = rlc_pdu_decoder_get_bits(&data_decoder, 1); R(data_decoder); + data_li = rlc_pdu_decoder_get_bits(&data_decoder, 11); R(data_decoder); + if (data_li == 0) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, li == 0\n", + __FILE__, __LINE__, __FUNCTION__); + goto discard; + } + indicated_data_size += data_li; + packet_count++; + } + rlc_pdu_decoder_align(&data_decoder); + + data_start = data_decoder.byte; + data_size = size - data_start; + + if (data_size <= 0) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, wrong data size (sum of LI %d data size %d)\n", + __FILE__, __LINE__, __FUNCTION__, + indicated_data_size, data_size); + goto discard; + } + if (indicated_data_size >= data_size) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, bad LIs (sum of LI %d data size %d)\n", + __FILE__, __LINE__, __FUNCTION__, + indicated_data_size, data_size); + goto discard; + } + + /* discard segment if all the bytes of the segment are already there */ + if (segment_already_received(entity, sn, so, data_size)) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, already received\n", + __FILE__, __LINE__, __FUNCTION__); + goto discard; + } + + char *fi_str[] = { + "first byte: YES last byte: YES", + "first byte: YES last byte: NO", + "first byte: NO last byte: YES", + "first byte: NO last byte: NO", + }; + + LOG_D(RLC, "found %d packets, data size %d data start %d [fi %d %s] (sn %d) (p %d)\n", + packet_count, data_size, data_decoder.byte, fi, fi_str[fi], sn, p); + + /* put in pdu reception list */ + entity->rx_size += size; + pdu_segment = rlc_rx_new_pdu_segment(sn, so, size, lsf, buffer, data_start); + entity->rx_list = rlc_rx_pdu_segment_list_add(sn_compare_rx, entity, + entity->rx_list, pdu_segment); + + /* do reception actions (36.322 5.1.3.2.3) */ + rlc_am_reception_actions(entity, pdu_segment); + + if (p) { + /* 36.322 5.2.3 says status triggering should be delayed + * until x < VR(MS) or x >= VR(MR). This is not clear (what + * is x then? we keep the same?). So let's trigger no matter what. + */ + int vr_mr = (entity->vr_r + 512) % 1024; + entity->status_triggered = 1; + if (!(sn_compare_rx(entity, sn, entity->vr_ms) < 0 || + sn_compare_rx(entity, sn, vr_mr) >= 0)) { + LOG_D(RLC, "%s:%d:%s: warning: STATUS trigger should be delayed, according to specs\n", + __FILE__, __LINE__, __FUNCTION__); + } + } + + return; + +control: + cpt = rlc_pdu_decoder_get_bits(&decoder, 3); R(decoder); + if (cpt != 0) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, CPT not 0 (%d)\n", + __FILE__, __LINE__, __FUNCTION__, cpt); + goto discard; + } + ack_sn = rlc_pdu_decoder_get_bits(&decoder, 10); R(decoder); + e1 = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + + /* let's try to parse the control PDU once to check consistency */ + control_decoder = decoder; + control_e1 = e1; + while (control_e1) { + rlc_pdu_decoder_get_bits(&control_decoder, 10); R(control_decoder); /* NACK_SN */ + control_e1 = rlc_pdu_decoder_get_bits(&control_decoder, 1); R(control_decoder); + control_e2 = rlc_pdu_decoder_get_bits(&control_decoder, 1); R(control_decoder); + if (control_e2) { + rlc_pdu_decoder_get_bits(&control_decoder, 15); R(control_decoder); /* SOstart */ + rlc_pdu_decoder_get_bits(&control_decoder, 15); R(control_decoder); /* SOend */ + } + } + + /* 36.322 5.2.2.2 says to stop t_poll_retransmit if a ACK or NACK is + * received for the SN 'poll_sn' + */ + if (sn_compare_tx(entity, entity->poll_sn, ack_sn) < 0) + entity->t_poll_retransmit_start = 0; + + /* at this point, accept the PDU even if the actual values + * may be incorrect (eg. if so_start > so_end) + */ + process_received_ack(entity, ack_sn); + + while (e1) { + nack_sn = rlc_pdu_decoder_get_bits(&decoder, 10); R(decoder); + e1 = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + e2 = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + if (e2) { + so_start = rlc_pdu_decoder_get_bits(&decoder, 15); R(decoder); + so_end = rlc_pdu_decoder_get_bits(&decoder, 15); R(decoder); + if (so_end < so_start) { + LOG_W(RLC, "%s:%d:%s: warning, bad so start/end, NACK the whole PDU (sn %d)\n", + __FILE__, __LINE__, __FUNCTION__, nack_sn); + so_start = 0; + so_end = -1; + } + /* special value 0x7fff indicates 'all bytes to the end' */ + if (so_end == 0x7fff) + so_end = -1; + } else { + so_start = 0; + so_end = -1; + } + process_received_nack(entity, nack_sn, so_start, so_end); + + /* 36.322 5.2.2.2 says to stop t_poll_retransmit if a ACK or NACK is + * received for the SN 'poll_sn' + */ + if (entity->poll_sn == nack_sn) + entity->t_poll_retransmit_start = 0; + } + + finalize_ack_nack_processing(entity); + + return; + +err: + LOG_W(RLC, "%s:%d:%s: error decoding PDU, discarding\n", __FILE__, __LINE__, __FUNCTION__); + goto discard; + +discard: + if (p) + entity->status_triggered = 1; + +#undef R +} + +/*************************************************************************/ +/* TX functions */ +/*************************************************************************/ + +static int pdu_size(rlc_entity_am_t *entity, rlc_tx_pdu_segment_t *pdu) +{ + int header_size; + int sdu_count; + int data_size; + int li_bits; + rlc_sdu_t *sdu; + + header_size = 2; + if (pdu->is_segment) + header_size += 2; + + data_size = pdu->data_size; + + sdu = pdu->start_sdu; + + sdu_count = 1; + data_size -= sdu->size - pdu->sdu_start_byte; + sdu = sdu->next; + + while (data_size > 0) { + sdu_count++; + data_size -= sdu->size; + sdu = sdu->next; + } + + li_bits = 12 * (sdu_count - 1); + header_size += (li_bits + 7) / 8; + + return header_size + pdu->data_size; +} + +static int header_size(int sdu_count) +{ + int bits = 16 + 12 * (sdu_count - 1); + /* padding if we have to */ + return (bits + 7) / 8; +} + +typedef struct { + int sdu_count; + int data_size; + int header_size; +} tx_pdu_size_t; + +static tx_pdu_size_t compute_new_pdu_size(rlc_entity_am_t *entity, int maxsize) +{ + tx_pdu_size_t ret; + int sdu_count; + int sdu_size; + int pdu_data_size; + rlc_sdu_t *sdu; + + int vt_ms = (entity->vt_a + 512) % 1024; + + ret.sdu_count = 0; + ret.data_size = 0; + ret.header_size = 0; + + /* sn out of window? nothing to do */ + if (!(sn_compare_tx(entity, entity->vt_s, entity->vt_a) >= 0 && + sn_compare_tx(entity, entity->vt_s, vt_ms) < 0)) + return ret; + + /* TX PDU - let's make the biggest PDU we can with the SDUs we have */ + sdu_count = 0; + pdu_data_size = 0; + sdu = entity->tx_list; + while (sdu != NULL) { + /* include SDU only if it has not been fully included in PDUs already */ + if (sdu->next_byte != sdu->size) { + int new_header_size = header_size(sdu_count + 1); + /* if we cannot put new header + at least 1 byte of data then over */ + if (new_header_size + pdu_data_size + 1 > maxsize) + break; + sdu_count++; + /* only include the bytes of this SDU not included in PDUs already */ + sdu_size = sdu->size - sdu->next_byte; + /* don't feed more than 'maxsize' bytes */ + if (new_header_size + pdu_data_size + sdu_size > maxsize) + sdu_size = maxsize - new_header_size - pdu_data_size; + pdu_data_size += sdu_size; + /* if we put more than 2^11-1 bytes then the LI field cannot be used, + * so this is the last SDU we can put + */ + if (sdu_size > 2047) + break; + } + sdu = sdu->next; + } + + if (sdu_count) { + ret.sdu_count = sdu_count; + ret.data_size = pdu_data_size; + ret.header_size = header_size(sdu_count); + } + + return ret; +} + +static int status_size(rlc_entity_am_t *entity, int maxsize) +{ + /* let's count bits */ + int bits = 15; /* minimum size is 15 (header+ack_sn+e1) */ + int sn; + + maxsize *= 8; + + if (bits > maxsize) { + LOG_W(RLC, "%s:%d:%s: warning: cannot generate status PDU, not enough room\n", + __FILE__, __LINE__, __FUNCTION__); + return 0; + } + + /* each NACK adds 12 bits */ + sn = entity->vr_r; + while (bits + 12 <= maxsize && sn_compare_rx(entity, sn, entity->vr_ms) < 0) { + if (!(rlc_am_segment_full(entity, sn))) + bits += 12; + sn = (sn + 1) % 1024; + } + + return (bits + 7) / 8; +} + +static int generate_status(rlc_entity_am_t *entity, char *buffer, int size) +{ + /* let's count bits */ + int bits = 15; /* minimum size is 15 (header+ack_sn+e1) */ + int sn; + rlc_pdu_encoder_t encoder; + int has_nack = 0; + int ack; + + rlc_pdu_encoder_init(&encoder, buffer, size); + + size *= 8; + + if (bits > size) { + LOG_W(RLC, "%s:%d:%s: warning: cannot generate status PDU, not enough room\n", + __FILE__, __LINE__, __FUNCTION__); + return 0; + } + + /* header */ + rlc_pdu_encoder_put_bits(&encoder, 0, 1); /* D/C */ + rlc_pdu_encoder_put_bits(&encoder, 0, 3); /* CPT */ + + /* reserve room for ACK (it will be set after putting the NACKs) */ + rlc_pdu_encoder_put_bits(&encoder, 0, 10); + + /* at this point, ACK is VR(R) */ + ack = entity->vr_r; + + /* each NACK adds 12 bits */ + sn = entity->vr_r; + while (bits + 12 <= size && sn_compare_rx(entity, sn, entity->vr_ms) < 0) { + if (!(rlc_am_segment_full(entity, sn))) { + /* put previous e1 (is 1) */ + rlc_pdu_encoder_put_bits(&encoder, 1, 1); + /* if previous was NACK, put previous e2 (0, we don't do 'so' thing) */ + if (has_nack) + rlc_pdu_encoder_put_bits(&encoder, 0, 1); + /* put NACKed sn */ + rlc_pdu_encoder_put_bits(&encoder, sn, 10); + has_nack = 1; + bits += 12; + } else { + /* this sn is full and we put all NACKs before it, use it for ACK */ + ack = (sn + 1) % 1024; + } + sn = (sn + 1) % 1024; + } + + /* go to highest full sn+1 for ACK, VR(MS) is the limit */ + while (sn_compare_rx(entity, sn, entity->vr_ms) < 0 && + rlc_am_segment_full(entity, sn)) { + ack = (sn + 1) % 1024; + sn = (sn + 1) % 1024; + } + + /* at this point, if last put was NACK then put 2 bits else put 1 bit */ + if (has_nack) + rlc_pdu_encoder_put_bits(&encoder, 0, 2); + else + rlc_pdu_encoder_put_bits(&encoder, 0, 1); + + rlc_pdu_encoder_align(&encoder); + + /* let's put the ACK */ + buffer[0] |= ack >> 6; + buffer[1] |= (ack & 0x3f) << 2; + + /* reset the trigger */ + entity->status_triggered = 0; + + /* start t_status_prohibit */ + entity->t_status_prohibit_start = entity->t_current; + + return encoder.byte; +} + +int transmission_buffer_empty(rlc_entity_am_t *entity) +{ + rlc_sdu_t *sdu; + + /* is transmission buffer empty? */ + sdu = entity->tx_list; + while (sdu != NULL) { + if (sdu->next_byte != sdu->size) + return 0; + sdu = sdu->next; + } + return 1; +} + +int check_poll_after_pdu_assembly(rlc_entity_am_t *entity) +{ + int retransmission_buffer_empty; + int window_stalling; + int vt_ms; + + /* is retransmission buffer empty? */ + if (entity->retransmit_list == NULL) + retransmission_buffer_empty = 1; + else + retransmission_buffer_empty = 0; + + /* is window stalling? */ + vt_ms = (entity->vt_a + 512) % 1024; + if (!(sn_compare_tx(entity, entity->vt_s, entity->vt_a) >= 0 && + sn_compare_tx(entity, entity->vt_s, vt_ms) < 0)) + window_stalling = 1; + else + window_stalling = 0; + + return (transmission_buffer_empty(entity) && retransmission_buffer_empty) || + window_stalling; +} + +void include_poll(rlc_entity_am_t *entity, char *buffer) +{ + /* set the P bit to 1 */ + buffer[0] |= 0x20; + + entity->pdu_without_poll = 0; + entity->byte_without_poll = 0; + + /* set POLL_SN to VT(S) - 1 */ + entity->poll_sn = (entity->vt_s + 1023) % 1024; + + /* start t_poll_retransmit */ + entity->t_poll_retransmit_start = entity->t_current; +} + +static int serialize_pdu(rlc_entity_am_t *entity, char *buffer, int bufsize, + rlc_tx_pdu_segment_t *pdu, int p) +{ + int first_sdu_full; + int last_sdu_full; + int sdu_next_byte; + rlc_sdu_t *sdu; + int i; + int cursize; + rlc_pdu_encoder_t encoder; + int fi; + int e; + int li; + char *out; + int outpos; + int sdu_count; + int header_size; + int sdu_start_byte; + + first_sdu_full = pdu->sdu_start_byte == 0; + + /* is last SDU full? (and also compute sdu_count) */ + last_sdu_full = 1; + sdu = pdu->start_sdu; + sdu_next_byte = pdu->sdu_start_byte; + cursize = 0; + sdu_count = 0; + while (cursize != pdu->data_size) { + int sdu_size = sdu->size - sdu_next_byte; + sdu_count++; + if (cursize + sdu_size > pdu->data_size) { + last_sdu_full = 0; + break; + } + cursize += sdu_size; + sdu = sdu->next; + sdu_next_byte = 0; + } + + /* generate header */ + rlc_pdu_encoder_init(&encoder, buffer, bufsize); + + rlc_pdu_encoder_put_bits(&encoder, 1, 1); /* D/C: 1 = data */ + rlc_pdu_encoder_put_bits(&encoder, pdu->is_segment, 1); /* RF */ + rlc_pdu_encoder_put_bits(&encoder, 0, 1); /* P: reserve, set later */ + + fi = 0; + if (!first_sdu_full) + fi |= 0x02; + if (!last_sdu_full) + fi |= 0x01; + rlc_pdu_encoder_put_bits(&encoder, fi, 2); /* FI */ + + /* to understand the logic for Es and LIs: + * If we have: + * 1 SDU: E=0 + * + * 2 SDUs: E=1 + * then: E=0 LI(sdu[0]) + * + * 3 SDUs: E=1 + * then: E=1 LI(sdu[0]) + * then: E=0 LI(sdu[1]) + * + * 4 SDUs: E=1 + * then: E=1 LI(sdu[0]) + * then: E=1 LI(sdu[1]) + * then: E=0 LI(sdu[2]) + */ + if (sdu_count >= 2) + e = 1; + else + e = 0; + rlc_pdu_encoder_put_bits(&encoder, e, 1); /* E */ + + rlc_pdu_encoder_put_bits(&encoder, pdu->sn, 10); /* SN */ + + if (pdu->is_segment) { + rlc_pdu_encoder_put_bits(&encoder, pdu->is_last, 1); /* LSF */ + rlc_pdu_encoder_put_bits(&encoder, pdu->so, 15); /* SO */ + } + + /* put LIs */ + sdu = pdu->start_sdu; + /* first SDU */ + li = sdu->size - pdu->sdu_start_byte; + /* put E+LI only if at least 2 SDUs */ + if (sdu_count >= 2) { + /* E is 1 if at least 3 SDUs */ + if (sdu_count >= 3) + e = 1; + else + e = 0; + rlc_pdu_encoder_put_bits(&encoder, e, 1); /* E */ + rlc_pdu_encoder_put_bits(&encoder, li, 11); /* LI */ + } + /* next SDUs, but not the last (no LI for the last) */ + sdu = sdu->next; + for (i = 2; i < sdu_count; i++, sdu = sdu->next) { + if (i != sdu_count - 1) + e = 1; + else + e = 0; + li = sdu->size; + rlc_pdu_encoder_put_bits(&encoder, e, 1); /* E */ + rlc_pdu_encoder_put_bits(&encoder, li, 11); /* LI */ + } + + rlc_pdu_encoder_align(&encoder); + + header_size = encoder.byte; + + /* generate data */ + out = buffer + header_size; + sdu = pdu->start_sdu; + sdu_start_byte = pdu->sdu_start_byte; + outpos = 0; + for (i = 0; i < sdu_count; i++, sdu = sdu->next) { + li = sdu->size - sdu_start_byte; + if (outpos + li >= pdu->data_size) + li = pdu->data_size - outpos; + memcpy(out+outpos, sdu->data + sdu_start_byte, li); + outpos += li; + sdu_start_byte = 0; + } + + if (p) + include_poll(entity, buffer); + + return header_size + pdu->data_size; +} + +static int generate_tx_pdu(rlc_entity_am_t *entity, char *buffer, int bufsize) +{ + int vt_ms; + tx_pdu_size_t pdu_size; + rlc_sdu_t *sdu; + int i; + int cursize; + int p; + rlc_tx_pdu_segment_t *pdu; + + /* sn out of window? do nothing */ + vt_ms = (entity->vt_a + 512) % 1024; + if (!(sn_compare_tx(entity, entity->vt_s, entity->vt_a) >= 0 && + sn_compare_tx(entity, entity->vt_s, vt_ms) < 0)) + return 0; + + pdu_size = compute_new_pdu_size(entity, bufsize); + if (pdu_size.sdu_count == 0) + return 0; + + pdu = rlc_tx_new_pdu(); + + pdu->sn = entity->vt_s; + entity->vt_s = (entity->vt_s + 1) % 1024; + + /* go to first SDU (skip those already fully processed) */ + sdu = entity->tx_list; + while (sdu->next_byte == sdu->size) + sdu = sdu->next; + + pdu->start_sdu = sdu; + + pdu->sdu_start_byte = sdu->next_byte; + + pdu->so = 0; + pdu->is_segment = 0; + pdu->is_last = 1; + /* to conform to specs' logic, put -1 (specs say "for 1st retransmission + * put 0 otherwise increase", let's put -1 and always increase when the + * segment goes to retransmit list) + */ + pdu->retx_count = -1; + + /* reserve SDU bytes */ + cursize = 0; + for (i = 0; i < pdu_size.sdu_count; i++, sdu = sdu->next) { + int sdu_size = sdu->size - sdu->next_byte; + if (cursize + sdu_size > pdu_size.data_size) + sdu_size = pdu_size.data_size - cursize; + sdu->next_byte += sdu_size; + cursize += sdu_size; + } + + pdu->data_size = cursize; + + /* put PDU at the end of the wait list */ + entity->wait_list = rlc_tx_pdu_list_append(entity->wait_list, pdu); + + /* polling actions for a new PDU */ + entity->pdu_without_poll++; + entity->byte_without_poll += pdu_size.data_size; + if ((entity->poll_pdu != -1 && + entity->pdu_without_poll >= entity->poll_pdu) || + (entity->poll_byte != -1 && + entity->byte_without_poll >= entity->poll_byte)) + p = 1; + else + p = check_poll_after_pdu_assembly(entity); + + if (entity->force_poll) { + p = 1; + entity->force_poll = 0; + } + + return serialize_pdu(entity, buffer, bufsize, pdu, p); +} + +static void resegment(rlc_tx_pdu_segment_t *pdu, int size) +{ + rlc_tx_pdu_segment_t *new_pdu; + rlc_sdu_t *sdu; + int sdu_count; + int pdu_header_size; + int pdu_data_size; + int sdu_pos; + int sdu_bytes_to_take; + + /* PDU segment too big, cut in two parts so that first part fits into + * size bytes (including header) + */ + sdu = pdu->start_sdu; + pdu_data_size = 0; + sdu_pos = pdu->sdu_start_byte; + sdu_count = 0; + while (1) { + /* can we put a new header and at least one byte of data? */ + /* header has 2 more bytes for SO */ + pdu_header_size = 2 + header_size(sdu_count + 1); + if (pdu_header_size + pdu_data_size + 1 > size) { + /* no we can't, stop here */ + break; + } + /* yes we can, go ahead */ + sdu_count++; + sdu_bytes_to_take = sdu->size - sdu_pos; + if (pdu_header_size + pdu_data_size + sdu_bytes_to_take > size) { + sdu_bytes_to_take = size - (pdu_header_size + pdu_data_size); + } + sdu_pos += sdu_bytes_to_take; + if (sdu_pos == sdu->size) { + sdu = sdu->next; + sdu_pos = 0; + } + pdu_data_size += sdu_bytes_to_take; + } + + new_pdu = rlc_tx_new_pdu(); + pdu->is_segment = 1; + *new_pdu = *pdu; + + new_pdu->so = pdu->so + pdu_data_size; + new_pdu->data_size = pdu->data_size - pdu_data_size; + new_pdu->start_sdu = sdu; + new_pdu->sdu_start_byte = sdu_pos; + + pdu->is_last = 0; + pdu->data_size = pdu_data_size; + pdu->next = new_pdu; +} + +static int generate_retx_pdu(rlc_entity_am_t *entity, char *buffer, int size) +{ + rlc_tx_pdu_segment_t *pdu; + int orig_size; + int p; + + pdu = entity->retransmit_list; + orig_size = pdu_size(entity, pdu); + + if (orig_size > size) { + /* we can't resegment if size is less than 5 + * (4 bytes for header, 1 byte for data) + */ + if (size < 5) + return 0; + resegment(pdu, size); + } + + /* remove from retransmit list and put in wait list */ + entity->retransmit_list = pdu->next; + entity->wait_list = rlc_tx_pdu_list_add(sn_compare_tx, entity, + entity->wait_list, pdu); + + p = check_poll_after_pdu_assembly(entity); + + if (entity->force_poll) { + p = 1; + entity->force_poll = 0; + } + + return serialize_pdu(entity, buffer, orig_size, pdu, p); +} + +static int status_to_report(rlc_entity_am_t *entity) +{ + return entity->status_triggered && + (entity->t_status_prohibit_start == 0 || + entity->t_current - entity->t_status_prohibit_start > + entity->t_status_prohibit); +} + +static int retx_pdu_size(rlc_entity_am_t *entity, int maxsize) +{ + int size; + + if (entity->retransmit_list == NULL) + return 0; + + size = pdu_size(entity, entity->retransmit_list); + if (size <= maxsize) + return size; + + /* we can segment head of retransmist list if maxsize is large enough + * to hold a PDU segment with at least 1 data byte (so 5 bytes: 4 bytes + * header + 1 byte data) + */ + if (maxsize < 5) + return 0; + + /* a later segmentation of the head of retransmit list will generate a pdu + * of maximum size 'maxsize' (can be less) + */ + return maxsize; +} + +rlc_entity_buffer_status_t rlc_entity_am_buffer_status( + rlc_entity_t *_entity, int maxsize) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + rlc_entity_buffer_status_t ret; + tx_pdu_size_t tx_size; + + /* status PDU, if we have to */ + if (status_to_report(entity)) + ret.status_size = status_size(entity, maxsize); + else + ret.status_size = 0; + + /* TX PDU */ + /* todo: if an SDU has size >2047 in the tx list then processing + * stops and computed size will not be accurate. Change the computation + * to be more accurate (if needed). + */ + tx_size = compute_new_pdu_size(entity, maxsize); + ret.tx_size = tx_size.data_size + tx_size.header_size; + + /* reTX PDU */ + /* todo: report size of all available data, not just first PDU */ + ret.retx_size = retx_pdu_size(entity, maxsize); + + return ret; +} + +int rlc_entity_am_generate_pdu(rlc_entity_t *_entity, char *buffer, int size) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + int ret; + + if (status_to_report(entity)) { + ret = generate_status(entity, buffer, size); + if (ret != 0) + return ret; + } + + if (entity->retransmit_list != NULL) { + ret = generate_retx_pdu(entity, buffer, size); + if (ret != 0) + return ret; + } + + return generate_tx_pdu(entity, buffer, size); +} + +/*************************************************************************/ +/* SDU RX functions */ +/*************************************************************************/ + +void rlc_entity_am_recv_sdu(rlc_entity_t *_entity, char *buffer, int size, + int sdu_id) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + rlc_sdu_t *sdu; + + if (size > SDU_MAX) { + LOG_E(RLC, "%s:%d:%s: fatal: SDU size too big (%d bytes)\n", + __FILE__, __LINE__, __FUNCTION__, size); + exit(1); + } + + if (entity->tx_size + size > entity->tx_maxsize) { + LOG_D(RLC, "%s:%d:%s: warning: SDU rejected, SDU buffer full\n", + __FILE__, __LINE__, __FUNCTION__); + return; + } + + entity->tx_size += size; + + sdu = rlc_new_sdu(buffer, size, sdu_id); + rlc_sdu_list_add(&entity->tx_list, &entity->tx_end, sdu); +} + +/*************************************************************************/ +/* time/timers */ +/*************************************************************************/ + +static void check_t_poll_retransmit(rlc_entity_am_t *entity) +{ + rlc_tx_pdu_segment_t head; + rlc_tx_pdu_segment_t *cur; + rlc_tx_pdu_segment_t *prev; + int sn; + + /* 36.322 5.2.2.3 */ + /* did t_poll_retransmit expire? */ + if (entity->t_poll_retransmit_start == 0 || + entity->t_current <= entity->t_poll_retransmit_start + + entity->t_poll_retransmit) + return; + + /* stop timer */ + entity->t_poll_retransmit_start = 0; + + /* 36.322 5.2.2.3 says: + * + * - include a poll in a RLC data PDU as described in section 5.2.2.1 + * + * That does not seem to be conditional. So we forcefully will send + * a poll as soon as we generate a PDU. + * Hopefully this interpretation is correct. In the worst case we generate + * more polling than necessary, but it's not a big deal. When + * 't_poll_retransmit' expires it means we didn't receive a status report, + * meaning a bad radio link, so things are quite bad at this point and + * asking again for a poll won't hurt much more. + */ + entity->force_poll = 1; + + LOG_D(RLC, "%s:%d:%s: warning: t_poll_retransmit expired\n", + __FILE__, __LINE__, __FUNCTION__); + + /* do we meet conditions of 36.322 5.2.2.3? */ + if (!check_poll_after_pdu_assembly(entity)) + return; + + /* search wait list for PDU with SN = VT(S)-1 */ + sn = (entity->vt_s + 1023) % 1024; + + head.next = entity->wait_list; + cur = entity->wait_list; + prev = &head; + + while (cur != NULL) { + if (cur->sn == sn) + break; + prev = cur; + cur = cur->next; + } + + /* PDU with SN = VT(S)-1 not found?, take the head of wait list */ + if (cur == NULL) { + cur = entity->wait_list; + prev = &head; + sn = cur->sn; + } + + /* 36.322 says "PDU", not "PDU segment", so let's retransmit all + * PDU segments with this SN + */ + while (cur != NULL && cur->sn == sn) { + prev->next = cur->next; + entity->wait_list = head.next; + /* put in retransmit list */ + consider_retransmission(entity, cur); + cur = prev->next; + } +} + +static void check_t_reordering(rlc_entity_am_t *entity) +{ + int sn; + + /* is t_reordering running and if yes has it expired? */ + if (entity->t_reordering_start == 0 || + entity->t_current <= entity->t_reordering_start + entity->t_reordering) + return; + + /* stop timer */ + entity->t_reordering_start = 0; + + LOG_D(RLC, "%s:%d:%s: t_reordering expired\n", __FILE__, __LINE__, __FUNCTION__); + + /* update VR(MS) to first SN >= VR(X) for which not all PDU segments + * have been received + */ + sn = entity->vr_x; + while (rlc_am_segment_full(entity, sn)) + sn = (sn + 1) % 1024; + entity->vr_ms = sn; + + if (sn_compare_rx(entity, entity->vr_h, entity->vr_ms) > 0) { + entity->t_reordering_start = entity->t_current; + entity->vr_x = entity->vr_h; + } + + /* trigger STATUS report */ + entity->status_triggered = 1; +} + +void rlc_entity_am_set_time(rlc_entity_t *_entity, uint64_t now) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + + entity->t_current = now; + + check_t_poll_retransmit(entity); + + check_t_reordering(entity); + + /* t_status_prohibit is handled by generate_status */ +} + +/*************************************************************************/ +/* discard/re-establishment/delete */ +/*************************************************************************/ + +void rlc_entity_am_discard_sdu(rlc_entity_t *_entity, int sdu_id) +{ + /* implements 36.322 5.3 */ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + rlc_sdu_t head; + rlc_sdu_t *cur; + rlc_sdu_t *prev; + + head.next = entity->tx_list; + cur = entity->tx_list; + prev = &head; + + while (cur != NULL && cur->upper_layer_id != sdu_id) { + prev = cur; + cur = cur->next; + } + + /* if sdu_id not found or some bytes have already been 'PDU-ized' + * then do nothing + */ + if (cur == NULL || cur->next_byte != 0) + return; + + /* remove SDU from tx_list */ + prev->next = cur->next; + entity->tx_list = head.next; + if (entity->tx_end == cur) { + if (prev != &head) + entity->tx_end = prev; + else + entity->tx_end = NULL; + } + + rlc_free_sdu(cur); +} + +static void free_pdu_segment_list(rlc_tx_pdu_segment_t *l) +{ + rlc_tx_pdu_segment_t *cur; + + while (l != NULL) { + cur = l; + l = l->next; + rlc_tx_free_pdu(cur); + } +} + +static void clear_entity(rlc_entity_am_t *entity) +{ + rlc_rx_pdu_segment_t *cur_rx; + rlc_sdu_t *cur_tx; + + entity->vr_r = 0; + entity->vr_x = 0; + entity->vr_ms = 0; + entity->vr_h = 0; + + entity->status_triggered = 0; + + entity->vt_a = 0; + entity->vt_s = 0; + entity->poll_sn = 0; + entity->pdu_without_poll = 0; + entity->byte_without_poll = 0; + entity->force_poll = 0; + + entity->t_current = 0; + + entity->t_reordering_start = 0; + entity->t_status_prohibit_start = 0; + entity->t_poll_retransmit_start = 0; + + cur_rx = entity->rx_list; + while (cur_rx != NULL) { + rlc_rx_pdu_segment_t *p = cur_rx; + cur_rx = cur_rx->next; + rlc_rx_free_pdu_segment(p); + } + entity->rx_list = NULL; + entity->rx_size = 0; + + memset(&entity->reassemble, 0, sizeof(rlc_am_reassemble_t)); + + cur_tx = entity->tx_list; + while (cur_tx != NULL) { + rlc_sdu_t *p = cur_tx; + cur_tx = cur_tx->next; + rlc_free_sdu(p); + } + entity->tx_list = NULL; + entity->tx_end = NULL; + entity->tx_size = 0; + + free_pdu_segment_list(entity->wait_list); + free_pdu_segment_list(entity->retransmit_list); + free_pdu_segment_list(entity->ack_list); + entity->wait_list = NULL; + entity->retransmit_list = NULL; + entity->ack_list = NULL; +} + +void rlc_entity_am_reestablishment(rlc_entity_t *_entity) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + + /* 36.322 5.4 says to deliver SDUs if possible. + * Let's not do that, it makes the code simpler. + * TODO: change this behavior if wanted/needed. + */ + + clear_entity(entity); +} + +void rlc_entity_am_delete(rlc_entity_t *_entity) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + clear_entity(entity); + free(entity); +} diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_am.h b/openair2/LAYER2/rlc_v2/rlc_entity_am.h new file mode 100644 index 0000000000000000000000000000000000000000..0437f17ad8e63e97c9a9cca6e92a5c85a73fb604 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_entity_am.h @@ -0,0 +1,285 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#ifndef _RLC_ENTITY_AM_H_ +#define _RLC_ENTITY_AM_H_ + +#include <stdint.h> + +#include "rlc_entity.h" +#include "rlc_pdu.h" +#include "rlc_sdu.h" + +/* + * Here comes some documentation to understand the reassembly + * logic in the code and the fields in the structure rlc_am_reassemble_t. + * + * Inside RLC, we deal with SDUs, PDUs and PDU segments. + * SDUs are packets coming from upper layer. + * A PDU is made of a header and a payload. + * In the payload there are SDUs. + * First SDU and last SDU in a PDU may be incomplete. + * PDU segments exist in case of retransmissions when the MAC + * layer asks for less data than previously, in which case + * only part of the previous PDU is sent. + * + * This is PDU data (just bytes): + * --------------------------------------------------------- + * | PDU data | + * --------------------------------------------------------- + * It contains SDUs, like: + * --------------------------------------------------------- + * | SDU 1 | SDU 2 | [...] | SDU n | + * --------------------------------------------------------- + * SDU 1 may be only the end of an SDU from which previous bytes were + * transmitted in previous PDUs. + * SDU n may be only the start of an SDU, that is more bytes from + * this SDU may be sent in successive PDUs. + * + * At front of the PDU data, we have a header: + * --------------- --------------------------------------------------------- + * | PDU header | | SDU 1 | SDU 2 | [...] | SDU n | + * --------------- --------------------------------------------------------- + * PDU header describes PDU data (most notably lengths). + * + * A PDU segment is a part of a PDU. For example, from this PDU data: + * --------------------------------------------------------- + * | SDU 1 | SDU 2 | [...] | SDU n | + * --------------------------------------------------------- + * We can extract the following PDU segment (data part only): + * ---------------------- + * | PDU segment data | + * ---------------------- + * This PDU segment would contain the end of SDU 2 above and some SDUs up to, + * let's say SDU x (x is 5 below). + * + * In front of a transmitted PDU segment, we have a header, + * containing the important variable 'so' (segment offset) that gives + * the index of the first byte of the segment in the original PDU. + * -------------- ---------------------- + * | seg. header| | PDU segment data | + * -------------- ---------------------- + * + * Let's now explain the data structure rlc_am_reassemble_t. + * + * In the structure rlc_am_reassemble_t, the fields fi, e, sn and so + * are coming from the PDU segment header and the semantics is the + * one of the RLC specs. + * + * The currently processed PDU segment is stored in 'start'. + * We have 'start->s->data_offset' and 'start->s->size'. + * start->s->data_offset is the index of the start of the data in the + * PDU segment. That is if the header is of length 3 bytes + * then start->s->data_offset is 3. + * start->s->size is the total length of the PDU segment, + * including header. + * The size of actual data bytes in the PDU segment is thus + * start->s->size - start->s->data_offset. + * + * The field sdu_len is the length of the current SDU being + * processed. + * + * The field sdu_offset is the starting point of the + * current SDU being processed (starting from beginning + * of PDU segment, including header). + * + * The field data_pos is the current read pointer. 0 points to + * the beginning of the PDU segment (including header). + * + * The field pdu_byte points to the current byte in the original + * PDU (not the PDU segment). It starts at 0 when we start + * processing a new PDU (when a new 'sn' is seen) and always + * increases after each byte processed. This is tha variable + * that is used to know if the next PDU segment will be used + * or not and if yes, starting from which data byte (see + * function rlc_am_reassemble_next_segment). + * + * 'so' is important and points to the byte in the original PDU + * that is the first byte of the PDU segment. + * + * For example, let's take this PDU segment data from above: + * ---------------------- + * | PDU segment data | + * ---------------------- + * Let's say it is decomposed as: + * ---------------------- + * |222|33|4444|55555555| + * ---------------------- + * It contains SDUs 2, 3, 4, and 5. + * SDU 2 is 3 bytes, SDU 3 is 2 bytes, SDU 4 is 4 bytes, SDU 5 is 8 bytes. + * + * Let's suppose that the original PDU starts with: + * ---------------- + * |1111111|222222| + * ---------------- + * + * (In this example, in the PDU segment, SDU 2 is not full, + * we only have its end.) + * + * Then 'so' is 13 (SDU 1 is 7 bytes, head of SDU 2 is 6 bytes). + * + * Let's continue with our PDU segment data. + * Let's say we are current processing SDU 4. + * Let's say the read pointer (variable 'data_pos') is there: + * ---------------------- + * |222|33|4444|55555555| + * ---------------------- + * ^ + * read pointer (data_pos) + * + * Then: + * - sdu_len is 4 + * - sdu_offset is 5 + [PDU segment header length] + * (it points to the beginning of SDU 4, starting + * from the head of the PDU segment, that is + * 3 bytes for SDU 2, 2 bytes for SDU 3, and the + * PDU segment header length) + * - start->s->data_offset is [PDU segment header length] + * - pdu_byte is 20 + * (13 bytes from beginning of original PDU, + * 3 bytes for SDU 2, 2 bytes for SDU 3, then 2 bytes for SDU 4) + * - data_pos = read pointer = 7 + [PDU segment header length] + * + * To finish this description, in the code, a PDU is simply + * seen as a PDU segment with 'so' = 0 (and is_last == 1 (lsf in the specs), + * but this variable is not used by the reassembly logic). + * + * And for [PDU segment header length] we use start->s->data_offset. + * + * To recap, here is an illustration of the various variables + * and what starting point they use. In the figures, the start + * of the variable name is aligned to the byte it refers to. + * + is used to show the starting point. + * + * Let's put the PDU segment back into the original PDU. + * And let's show the values for when the read pointer + * is on the second byte of SDU 4 (as above). + * + * +++++++++++++++ so + * +++++++++++++++++++++++ pdu_byte + * --------------------------------------------------------- + * | SDU 1| SDU 2..222|33|4444|55555555| [...] | SDU n | + * --------------------------------------------------------- + * + * And now the PDU segment with header. + * + * + * ++++ sdu_len + * ++++++++++++++++++++++ sdu_offset + * +++++++++++++++++++++++ data_pos + * +++++++++++++++ start->s->data_offset + * +++++++++++++++++++++++++++++++++++++ start->s->size + * -------------- ---------------------- + * | seg. header| |222|33|4444|55555555| + * -------------- ---------------------- + * + * We see three case for the starting point: + * - start of original PDU (without any header) + * - start of header of current PDU segment + * - start of current SDU (for sdu_len) + */ + +typedef struct { + rlc_rx_pdu_segment_t *start; /* start of list */ + rlc_rx_pdu_segment_t *end; /* end of list (last element) */ + int pos; /* byte to get from current buffer */ + char sdu[SDU_MAX]; /* sdu is reassembled here */ + int sdu_pos; /* next byte to put in sdu */ + + /* decoder of current PDU */ + rlc_pdu_decoder_t dec; + int fi; + int e; + int sn; + int so; + int sdu_len; + int sdu_offset; + int data_pos; + int pdu_byte; +} rlc_am_reassemble_t; + +typedef struct { + rlc_entity_t common; + + /* configuration */ + int t_reordering; + int t_status_prohibit; + int t_poll_retransmit; + int poll_pdu; /* -1 means infinity */ + int poll_byte; /* -1 means infinity */ + int max_retx_threshold; + + /* runtime rx */ + int vr_r; + int vr_x; + int vr_ms; + int vr_h; + + int status_triggered; + + /* runtime tx */ + int vt_a; + int vt_s; + int poll_sn; + int pdu_without_poll; + int byte_without_poll; + int force_poll; + + /* set to the latest know time by the user of the module. Unit: ms */ + uint64_t t_current; + + /* timers (stores the TTI of activation, 0 means not active) */ + uint64_t t_reordering_start; + uint64_t t_status_prohibit_start; + uint64_t t_poll_retransmit_start; + + /* rx management */ + rlc_rx_pdu_segment_t *rx_list; + int rx_size; + int rx_maxsize; + + /* reassembly management */ + rlc_am_reassemble_t reassemble; + + /* tx management */ + rlc_sdu_t *tx_list; + rlc_sdu_t *tx_end; + int tx_size; + int tx_maxsize; + + rlc_tx_pdu_segment_t *wait_list; + rlc_tx_pdu_segment_t *retransmit_list; + + rlc_tx_pdu_segment_t *ack_list; +} rlc_entity_am_t; + +void rlc_entity_am_recv_sdu(rlc_entity_t *entity, char *buffer, int size, + int sdu_id); +void rlc_entity_am_recv_pdu(rlc_entity_t *entity, char *buffer, int size); +rlc_entity_buffer_status_t rlc_entity_am_buffer_status( + rlc_entity_t *entity, int maxsize); +int rlc_entity_am_generate_pdu(rlc_entity_t *entity, char *buffer, int size); +void rlc_entity_am_set_time(rlc_entity_t *entity, uint64_t now); +void rlc_entity_am_discard_sdu(rlc_entity_t *entity, int sdu_id); +void rlc_entity_am_reestablishment(rlc_entity_t *entity); +void rlc_entity_am_delete(rlc_entity_t *entity); + +#endif /* _RLC_ENTITY_AM_H_ */ diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_um.c b/openair2/LAYER2/rlc_v2/rlc_entity_um.c new file mode 100644 index 0000000000000000000000000000000000000000..54707875a0b25f7c91086131f4515d75dea9f5c9 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_entity_um.c @@ -0,0 +1,708 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#include "rlc_entity_um.h" +#include "rlc_pdu.h" + +#include <stdlib.h> +#include <string.h> + +#include "LOG/log.h" + +/*************************************************************************/ +/* PDU RX functions */ +/*************************************************************************/ + +static int modulus_rx(rlc_entity_um_t *entity, int a) +{ + /* as per 36.322 7.1, modulus base is vr(uh)-window_size and modulus is + * 2^sn_field_length (which is 'sn_modulus' in rlc_entity_um_t) + */ + int r = a - (entity->vr_uh - entity->window_size); + if (r < 0) r += entity->sn_modulus; + return r % entity->sn_modulus; +} + +static int sn_compare_rx(void *_entity, int a, int b) +{ + rlc_entity_um_t *entity = _entity; + return modulus_rx(entity, a) - modulus_rx(entity, b); +} + +static int sn_in_recv_window(void *_entity, int sn) +{ + rlc_entity_um_t *entity = _entity; + int mod_sn = modulus_rx(entity, sn); + /* we simplify (VR(UH) - UM_Window_Size) <= SN < VR(UH), base is + * (VR(UH) - UM_Window_Size) and VR(UH) = base + window_size + */ + return mod_sn < entity->window_size; +} + +/* return 1 if a PDU with SN == 'sn' is in the rx list, 0 otherwise */ +static int rlc_um_pdu_received(rlc_entity_um_t *entity, int sn) +{ + rlc_rx_pdu_segment_t *cur = entity->rx_list; + while (cur != NULL) { + if (cur->sn == sn) + return 1; + cur = cur->next; + } + return 0; +} + +static int less_than_vr_ur(rlc_entity_um_t *entity, int sn) +{ + return sn_compare_rx(entity, sn, entity->vr_ur) < 0; +} + +static int outside_of_reordering_window(rlc_entity_um_t *entity, int sn) +{ + return !sn_in_recv_window(entity, sn); +} + +static int less_than_vr_uh(rlc_entity_um_t *entity, int sn) +{ + return sn_compare_rx(entity, sn, entity->vr_uh) < 0; +} + +static void rlc_um_reassemble_pdu(rlc_entity_um_t *entity, + rlc_rx_pdu_segment_t *pdu) +{ + rlc_um_reassemble_t *r = &entity->reassemble; + + int fi; + int e; + int sn; + int data_pos; + int sdu_len; + int sdu_offset; + + sdu_offset = pdu->data_offset; + + rlc_pdu_decoder_init(&r->dec, pdu->data, pdu->size); + + if (entity->sn_field_length == 10) + rlc_pdu_decoder_get_bits(&r->dec, 3); + + fi = rlc_pdu_decoder_get_bits(&r->dec, 2); + e = rlc_pdu_decoder_get_bits(&r->dec, 1); + sn = rlc_pdu_decoder_get_bits(&r->dec, entity->sn_field_length); + + if (e) { + e = rlc_pdu_decoder_get_bits(&r->dec, 1); + sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11); + } else + sdu_len = pdu->size - sdu_offset; + + /* discard current SDU being reassembled if bad SN or bad FI */ + if (sn != (r->sn + 1) % entity->sn_modulus || + !(fi & 0x02)) { + if (r->sdu_pos) + LOG_D(RLC, "%s:%d:%s: warning: discard partially reassembled SDU\n", + __FILE__, __LINE__, __FUNCTION__); + r->sdu_pos = 0; + } + + /* if the head of the SDU is missing, still process the PDU + * but remember to discard the reassembled SDU later on (the + * head has not been received). + * The head is missing if sdu_pos == 0 and fi says the PDU does not + * start an SDU. + */ + if (r->sdu_pos == 0 && (fi & 0x02)) + r->sdu_head_missing = 1; + + r->sn = sn; + data_pos = pdu->data_offset; + + while (1) { + if (r->sdu_pos >= SDU_MAX) { + /* TODO: proper error handling (discard PDUs with current sn from + * reassembly queue? something else?) + */ + LOG_E(RLC, "%s:%d:%s: bad RLC PDU\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + r->sdu[r->sdu_pos] = pdu->data[data_pos]; + r->sdu_pos++; + data_pos++; + if (data_pos == sdu_offset + sdu_len) { + /* all bytes of SDU are consumed, check if SDU is fully there. + * It is if the data pointer is not at the end of the PDU segment + * or if 'fi' & 1 == 0 + */ + if (data_pos != pdu->size || (fi & 1) == 0) { + /* time to discard the SDU if we didn't receive the head */ + if (r->sdu_head_missing) { + LOG_D(RLC, "%s:%d:%s: warning: discard SDU, head not received\n", + __FILE__, __LINE__, __FUNCTION__); + r->sdu_head_missing = 0; + } else { + /* SDU is full - deliver to higher layer */ + entity->common.deliver_sdu(entity->common.deliver_sdu_data, + (rlc_entity_t *)entity, + r->sdu, r->sdu_pos); + } + r->sdu_pos = 0; + } + /* done with PDU? */ + if (data_pos == pdu->size) + break; + /* not at the end of PDU, process next SDU */ + sdu_offset += sdu_len; + if (e) { + e = rlc_pdu_decoder_get_bits(&r->dec, 1); + sdu_len = rlc_pdu_decoder_get_bits(&r->dec, 11); + } else + sdu_len = pdu->size - sdu_offset; + } + } +} + +static void rlc_um_reassemble(rlc_entity_um_t *entity, + int (*check_sn)(rlc_entity_um_t *entity, int sn)) +{ + rlc_rx_pdu_segment_t *cur; + + /* process all PDUs from head of rx list until all is processed or + * the SN is not valid anymore with respect to 'check_sn' + */ + while (entity->rx_list != NULL && check_sn(entity, entity->rx_list->sn)) { + cur = entity->rx_list; + rlc_um_reassemble_pdu(entity, cur); + entity->rx_size -= cur->size; + entity->rx_list = cur->next; + rlc_rx_free_pdu_segment(cur); + } +} + +static void rlc_um_reception_actions(rlc_entity_um_t *entity, + rlc_rx_pdu_segment_t *pdu_segment) +{ + if (!sn_in_recv_window(entity, pdu_segment->sn)) { + entity->vr_uh = (pdu_segment->sn + 1) % entity->sn_modulus; + rlc_um_reassemble(entity, outside_of_reordering_window); + if (!sn_in_recv_window(entity, entity->vr_ur)) + entity->vr_ur = (entity->vr_uh - entity->window_size + + entity->sn_modulus) % entity->sn_modulus; + } + + if (rlc_um_pdu_received(entity, entity->vr_ur)) { + do { + entity->vr_ur = (entity->vr_ur + 1) % entity->sn_modulus; + } while (rlc_um_pdu_received(entity, entity->vr_ur)); + rlc_um_reassemble(entity, less_than_vr_ur); + } + + if (entity->t_reordering_start) { + if (sn_compare_rx(entity, entity->vr_ux, entity->vr_ur) <= 0 || + (!sn_in_recv_window(entity, entity->vr_ux) && + entity->vr_ux != entity->vr_uh)) + entity->t_reordering_start = 0; + } + + if (entity->t_reordering_start == 0) { + if (sn_compare_rx(entity, entity->vr_uh, entity->vr_ur) > 0) { + entity->t_reordering_start = entity->t_current; + entity->vr_ux = entity->vr_uh; + } + } +} + +void rlc_entity_um_recv_pdu(rlc_entity_t *_entity, char *buffer, int size) +{ +#define R(d) do { if (rlc_pdu_decoder_in_error(&d)) goto err; } while (0) + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + rlc_pdu_decoder_t decoder; + rlc_pdu_decoder_t data_decoder; + + int e; + int sn; + + int data_e; + int data_li; + + int packet_count; + int data_size; + int data_start; + int indicated_data_size; + + rlc_rx_pdu_segment_t *pdu_segment; + + rlc_pdu_decoder_init(&decoder, buffer, size); + + if (entity->sn_field_length == 10) { + rlc_pdu_decoder_get_bits(&decoder, 3); R(decoder); /* R1 */ + } + + rlc_pdu_decoder_get_bits(&decoder, 2); R(decoder); /* FI */ + e = rlc_pdu_decoder_get_bits(&decoder, 1); R(decoder); + sn = rlc_pdu_decoder_get_bits(&decoder, entity->sn_field_length); R(decoder); + + /* dicard PDU if rx buffer is full */ + if (entity->rx_size + size > entity->rx_maxsize) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, RX buffer full\n", + __FILE__, __LINE__, __FUNCTION__); + return; + } + + /* discard according to 36.322 5.1.2.2.2 */ + if ((sn_compare_rx(entity, entity->vr_ur, sn) < 0 && + sn_compare_rx(entity, sn, entity->vr_uh) < 0 && + rlc_um_pdu_received(entity, sn)) || + (sn_compare_rx(entity, entity->vr_uh - entity->window_size, sn) <= 0 && + sn_compare_rx(entity, sn, entity->vr_ur) < 0)) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU (sn %d vr(ur) %d vr(uh) %d)\n", + __FILE__, __LINE__, __FUNCTION__, + sn, entity->vr_ur, entity->vr_uh); + return; + } + + packet_count = 1; + + /* go to start of data */ + indicated_data_size = 0; + data_decoder = decoder; + data_e = e; + while (data_e) { + data_e = rlc_pdu_decoder_get_bits(&data_decoder, 1); R(data_decoder); + data_li = rlc_pdu_decoder_get_bits(&data_decoder, 11); R(data_decoder); + if (data_li == 0) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, li == 0\n", + __FILE__, __LINE__, __FUNCTION__); + return; + } + indicated_data_size += data_li; + packet_count++; + } + rlc_pdu_decoder_align(&data_decoder); + + data_start = data_decoder.byte; + data_size = size - data_start; + + if (data_size <= 0) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, wrong data size (sum of LI %d data size %d)\n", + __FILE__, __LINE__, __FUNCTION__, + indicated_data_size, data_size); + return; + } + if (indicated_data_size >= data_size) { + LOG_D(RLC, "%s:%d:%s: warning: discard PDU, bad LIs (sum of LI %d data size %d)\n", + __FILE__, __LINE__, __FUNCTION__, + indicated_data_size, data_size); + return; + } + + /* put in pdu reception list */ + entity->rx_size += size; + pdu_segment = rlc_rx_new_pdu_segment(sn, 0, size, 1, buffer, data_start); + entity->rx_list = rlc_rx_pdu_segment_list_add(sn_compare_rx, entity, + entity->rx_list, pdu_segment); + + /* do reception actions (36.322 5.1.2.2.3) */ + rlc_um_reception_actions(entity, pdu_segment); + + return; + +err: + LOG_D(RLC, "%s:%d:%s: error decoding PDU, discarding\n", __FILE__, __LINE__, __FUNCTION__); + +#undef R +} + +/*************************************************************************/ +/* TX functions */ +/*************************************************************************/ + +typedef struct { + int sdu_count; + int data_size; + int header_size; + int last_sdu_is_full; + int first_sdu_length; +} tx_pdu_size_t; + +static int header_size(int sn_field_length, int sdu_count) +{ + int bits = 8 + 8 * (sn_field_length == 10) + 12 * (sdu_count - 1); + /* padding if we have to */ + return (bits + 7) / 8; +} + +static tx_pdu_size_t tx_pdu_size(rlc_entity_um_t *entity, int maxsize) +{ + tx_pdu_size_t ret; + int sdu_count; + int sdu_size; + int pdu_data_size; + rlc_sdu_t *sdu; + + ret.sdu_count = 0; + ret.data_size = 0; + ret.header_size = 0; + ret.last_sdu_is_full = 1; + ret.first_sdu_length = 0; + + /* TX PDU - let's make the biggest PDU we can with the SDUs we have */ + sdu_count = 0; + pdu_data_size = 0; + sdu = entity->tx_list; + while (sdu != NULL) { + int new_header_size = header_size(entity->sn_field_length, sdu_count+1); + /* if we cannot put new header + at least 1 byte of data then over */ + if (new_header_size + pdu_data_size >= maxsize) + break; + sdu_count++; + /* only include the bytes of this SDU not included in PDUs already */ + sdu_size = sdu->size - sdu->next_byte; + /* don't feed more than 'maxsize' bytes */ + if (new_header_size + pdu_data_size + sdu_size > maxsize) { + sdu_size = maxsize - new_header_size - pdu_data_size; + ret.last_sdu_is_full = 0; + } + if (sdu_count == 1) + ret.first_sdu_length = sdu_size; + pdu_data_size += sdu_size; + /* if we put more than 2^11-1 bytes then the LI field cannot be used, + * so this is the last SDU we can put + */ + if (sdu_size > 2047) + break; + sdu = sdu->next; + } + + if (sdu_count) { + ret.sdu_count = sdu_count; + ret.data_size = pdu_data_size; + ret.header_size = header_size(entity->sn_field_length, sdu_count); + } + + return ret; +} + +rlc_entity_buffer_status_t rlc_entity_um_buffer_status( + rlc_entity_t *_entity, int maxsize) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + rlc_entity_buffer_status_t ret; + tx_pdu_size_t tx_size; + + ret.status_size = 0; + + /* todo: if an SDU has size >2047 in the tx list then processing + * stops and computed size will not be accurate. Change the computation + * to be more accurate (if needed). + */ + tx_size = tx_pdu_size(entity, maxsize); + ret.tx_size = tx_size.data_size + tx_size.header_size; + + ret.retx_size = 0; + + return ret; +} + +int rlc_entity_um_generate_pdu(rlc_entity_t *_entity, char *buffer, int size) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + tx_pdu_size_t pdu_size; + rlc_sdu_t *sdu; + int i; + int cursize; + int first_sdu_full; + int last_sdu_full; + rlc_pdu_encoder_t encoder; + int fi; + int e; + int li; + char *out; + int outpos; + int first_sdu_start_byte; + int sdu_start_byte; + + pdu_size = tx_pdu_size(entity, size); + if (pdu_size.sdu_count == 0) + return 0; + + sdu = entity->tx_list; + + first_sdu_start_byte = sdu->next_byte; + + /* reserve SDU bytes */ + cursize = 0; + for (i = 0; i < pdu_size.sdu_count; i++, sdu = sdu->next) { + int sdu_size = sdu->size - sdu->next_byte; + if (cursize + sdu_size > pdu_size.data_size) + sdu_size = pdu_size.data_size - cursize; + sdu->next_byte += sdu_size; + cursize += sdu_size; + } + + first_sdu_full = first_sdu_start_byte == 0; + last_sdu_full = pdu_size.last_sdu_is_full; + + /* generate header */ + rlc_pdu_encoder_init(&encoder, buffer, size); + + if (entity->sn_field_length == 10) + rlc_pdu_encoder_put_bits(&encoder, 0, 3); /* R1 */ + + fi = 0; + if (!first_sdu_full) + fi |= 0x02; + if (!last_sdu_full) + fi |= 0x01; + rlc_pdu_encoder_put_bits(&encoder, fi, 2); /* FI */ + + /* see the AM code to understand the logic for Es and LIs */ + if (pdu_size.sdu_count >= 2) + e = 1; + else + e = 0; + rlc_pdu_encoder_put_bits(&encoder, e, 1); /* E */ + + if (entity->sn_field_length == 10) + rlc_pdu_encoder_put_bits(&encoder, entity->vt_us, 10); /* SN */ + else + rlc_pdu_encoder_put_bits(&encoder, entity->vt_us, 5); /* SN */ + + /* put LIs */ + sdu = entity->tx_list; + /* first SDU */ + li = pdu_size.first_sdu_length; + /* put E+LI only if at least 2 SDUs */ + if (pdu_size.sdu_count >= 2) { + /* E is 1 if at least 3 SDUs */ + if (pdu_size.sdu_count >= 3) + e = 1; + else + e = 0; + rlc_pdu_encoder_put_bits(&encoder, e, 1); /* E */ + rlc_pdu_encoder_put_bits(&encoder, li, 11); /* LI */ + } + /* next SDUs, but not the last (no LI for the last) */ + sdu = sdu->next; + for (i = 2; i < pdu_size.sdu_count; i++, sdu = sdu->next) { + if (i != pdu_size.sdu_count - 1) + e = 1; + else + e = 0; + li = sdu->size; + rlc_pdu_encoder_put_bits(&encoder, e, 1); /* E */ + rlc_pdu_encoder_put_bits(&encoder, li, 11); /* LI */ + } + + rlc_pdu_encoder_align(&encoder); + + /* generate data */ + out = buffer + pdu_size.header_size; + sdu = entity->tx_list; + sdu_start_byte = first_sdu_start_byte; + outpos = 0; + for (i = 0; i < pdu_size.sdu_count; i++, sdu = sdu->next) { + li = sdu->size - sdu_start_byte; + if (outpos + li >= pdu_size.data_size) + li = pdu_size.data_size - outpos; + memcpy(out+outpos, sdu->data + sdu_start_byte, li); + outpos += li; + sdu_start_byte = 0; + } + + /* cleanup sdu list */ + while (entity->tx_list != NULL && + entity->tx_list->size == entity->tx_list->next_byte) { + rlc_sdu_t *c = entity->tx_list; + /* release SDU bytes */ + entity->tx_size -= c->size; + entity->tx_list = c->next; + rlc_free_sdu(c); + } + if (entity->tx_list == NULL) + entity->tx_end = NULL; + + /* update VT(US) */ + entity->vt_us = (entity->vt_us + 1) % entity->sn_modulus; + + return pdu_size.header_size + pdu_size.data_size; +} + +/*************************************************************************/ +/* SDU RX functions */ +/*************************************************************************/ + +void rlc_entity_um_recv_sdu(rlc_entity_t *_entity, char *buffer, int size, + int sdu_id) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + rlc_sdu_t *sdu; + + if (size > SDU_MAX) { + LOG_E(RLC, "%s:%d:%s: fatal: SDU size too big (%d bytes)\n", + __FILE__, __LINE__, __FUNCTION__, size); + exit(1); + } + + if (entity->tx_size + size > entity->tx_maxsize) { + LOG_D(RLC, "%s:%d:%s: warning: SDU rejected, SDU buffer full\n", + __FILE__, __LINE__, __FUNCTION__); + return; + } + + entity->tx_size += size; + + sdu = rlc_new_sdu(buffer, size, sdu_id); + rlc_sdu_list_add(&entity->tx_list, &entity->tx_end, sdu); +} + +/*************************************************************************/ +/* time/timers */ +/*************************************************************************/ + +static void check_t_reordering(rlc_entity_um_t *entity) +{ + int sn; + + /* is t_reordering running and if yes has it expired? */ + if (entity->t_reordering_start == 0 || + entity->t_current <= entity->t_reordering_start + entity->t_reordering) + return; + + /* stop timer */ + entity->t_reordering_start = 0; + + LOG_D(RLC, "%s:%d:%s: t_reordering expired\n", __FILE__, __LINE__, __FUNCTION__); + + /* update VR(UR) to first SN >= VR(UX) of PDU not received + */ + sn = entity->vr_ux; + while (rlc_um_pdu_received(entity, sn)) + sn = (sn + 1) % entity->sn_modulus; + entity->vr_ur = sn; + + rlc_um_reassemble(entity, less_than_vr_ur); + + if (sn_compare_rx(entity, entity->vr_uh, entity->vr_ur) > 0) { + entity->t_reordering_start = entity->t_current; + entity->vr_ux = entity->vr_uh; + } +} + +void rlc_entity_um_set_time(rlc_entity_t *_entity, uint64_t now) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + + entity->t_current = now; + + check_t_reordering(entity); +} + +/*************************************************************************/ +/* discard/re-establishment/delete */ +/*************************************************************************/ + +void rlc_entity_um_discard_sdu(rlc_entity_t *_entity, int sdu_id) +{ + /* implements 36.322 5.3 */ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + rlc_sdu_t head; + rlc_sdu_t *cur; + rlc_sdu_t *prev; + + head.next = entity->tx_list; + cur = entity->tx_list; + prev = &head; + + while (cur != NULL && cur->upper_layer_id != sdu_id) { + prev = cur; + cur = cur->next; + } + + /* if sdu_id not found or some bytes have already been 'PDU-ized' + * then do nothing + */ + if (cur == NULL || cur->next_byte != 0) + return; + + /* remove SDU from tx_list */ + prev->next = cur->next; + entity->tx_list = head.next; + if (entity->tx_end == cur) { + if (prev != &head) + entity->tx_end = prev; + else + entity->tx_end = NULL; + } + + rlc_free_sdu(cur); +} + +static void clear_entity(rlc_entity_um_t *entity) +{ + rlc_rx_pdu_segment_t *cur_rx; + rlc_sdu_t *cur_tx; + + entity->vr_ur = 0; + entity->vr_ux = 0; + entity->vr_uh = 0; + + entity->vt_us = 0; + + entity->t_current = 0; + + entity->t_reordering_start = 0; + + cur_rx = entity->rx_list; + while (cur_rx != NULL) { + rlc_rx_pdu_segment_t *p = cur_rx; + cur_rx = cur_rx->next; + rlc_rx_free_pdu_segment(p); + } + entity->rx_list = NULL; + entity->rx_size = 0; + + memset(&entity->reassemble, 0, sizeof(rlc_um_reassemble_t)); + + cur_tx = entity->tx_list; + while (cur_tx != NULL) { + rlc_sdu_t *p = cur_tx; + cur_tx = cur_tx->next; + rlc_free_sdu(p); + } + entity->tx_list = NULL; + entity->tx_end = NULL; + entity->tx_size = 0; +} + +void rlc_entity_um_reestablishment(rlc_entity_t *_entity) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + + rlc_um_reassemble(entity, less_than_vr_uh); + + clear_entity(entity); +} + +void rlc_entity_um_delete(rlc_entity_t *_entity) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + clear_entity(entity); + free(entity); +} diff --git a/openair2/LAYER2/rlc_v2/rlc_entity_um.h b/openair2/LAYER2/rlc_v2/rlc_entity_um.h new file mode 100644 index 0000000000000000000000000000000000000000..02c5141a7a6613536728e2b81c75ca1b21b1db1f --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_entity_um.h @@ -0,0 +1,90 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#ifndef _RLC_ENTITY_UM_H_ +#define _RLC_ENTITY_UM_H_ + +#include "rlc_entity.h" +#include "rlc_pdu.h" +#include "rlc_sdu.h" + +typedef struct { + char sdu[SDU_MAX]; /* sdu is reassembled here */ + int sdu_pos; /* next byte to put in sdu */ + + /* decoder of current PDU */ + rlc_pdu_decoder_t dec; + int sn; + + int sdu_head_missing; +} rlc_um_reassemble_t; + +typedef struct { + rlc_entity_t common; + + /* configuration */ + int t_reordering; + int sn_field_length; + + int sn_modulus; /* 1024 for sn_field_length == 10, 32 for 5 */ + int window_size; /* 512 for sn_field_length == 10, 16 for 5 */ + + /* runtime rx */ + int vr_ur; + int vr_ux; + int vr_uh; + + /* runtime tx */ + int vt_us; + + /* set to the latest know time by the user of the module. Unit: ms */ + uint64_t t_current; + + /* timers (stores the TTI of activation, 0 means not active) */ + uint64_t t_reordering_start; + + /* rx management */ + rlc_rx_pdu_segment_t *rx_list; + int rx_size; + int rx_maxsize; + + /* reassembly management */ + rlc_um_reassemble_t reassemble; + + /* tx management */ + rlc_sdu_t *tx_list; + rlc_sdu_t *tx_end; + int tx_size; + int tx_maxsize; +} rlc_entity_um_t; + +void rlc_entity_um_recv_sdu(rlc_entity_t *_entity, char *buffer, int size, + int sdu_id); +void rlc_entity_um_recv_pdu(rlc_entity_t *entity, char *buffer, int size); +rlc_entity_buffer_status_t rlc_entity_um_buffer_status( + rlc_entity_t *entity, int maxsize); +int rlc_entity_um_generate_pdu(rlc_entity_t *_entity, char *buffer, int size); +void rlc_entity_um_set_time(rlc_entity_t *entity, uint64_t now); +void rlc_entity_um_discard_sdu(rlc_entity_t *entity, int sdu_id); +void rlc_entity_um_reestablishment(rlc_entity_t *entity); +void rlc_entity_um_delete(rlc_entity_t *entity); + +#endif /* _RLC_ENTITY_UM_H_ */ diff --git a/openair2/LAYER2/rlc_v2/rlc_oai_api.c b/openair2/LAYER2/rlc_v2/rlc_oai_api.c new file mode 100644 index 0000000000000000000000000000000000000000..bd3eebe6355f487690178f08f31246627b444438 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_oai_api.c @@ -0,0 +1,1030 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/* from openair */ +#include "rlc.h" +#include "pdcp.h" + +/* from new rlc module */ +#include "asn1_utils.h" +#include "rlc_ue_manager.h" +#include "rlc_entity.h" + +#include <stdint.h> + +static rlc_ue_manager_t *rlc_ue_manager; + +/* TODO: handle time a bit more properly */ +static uint64_t rlc_current_time; +static int rlc_current_time_last_frame; +static int rlc_current_time_last_subframe; + +void mac_rlc_data_ind ( + const module_id_t module_idP, + const rnti_t rntiP, + const eNB_index_t eNB_index, + const frame_t frameP, + const eNB_flag_t enb_flagP, + const MBMS_flag_t MBMS_flagP, + const logical_chan_id_t channel_idP, + char *buffer_pP, + const tb_size_t tb_sizeP, + num_tb_t num_tbP, + crc_t *crcs_pP) +{ + rlc_ue_t *ue; + rlc_entity_t *rb; + int rnti; + int channel_id; + + if (enb_flagP == 1 && module_idP != 0) { + LOG_E(RLC, "%s:%d:%s: fatal, module_id must be 0 for eNB\n", + __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + if (/*module_idP != 0 ||*/ eNB_index != 0 /*|| enb_flagP != 1 || MBMS_flagP != 0*/) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + if (enb_flagP) + T(T_ENB_RLC_MAC_UL, T_INT(module_idP), T_INT(rntiP), + T_INT(channel_idP), T_INT(tb_sizeP)); + + /* TODO: better handle mbms, maybe we should not change rnti here */ + if (!enb_flagP && MBMS_flagP) { + rnti = 0xfffd; + /* TODO: handle channel_id properly */ + if (channel_idP != 5) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + channel_id = 7; + } else { + rnti = rntiP; + channel_id = channel_idP; + } + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rnti); + + switch (channel_id) { + case 1 ... 2: rb = ue->srb[channel_id - 1]; break; + case 3 ... 7: rb = ue->drb[channel_id - 3]; break; + default: rb = NULL; break; + } + + if (rb != NULL) { + rb->set_time(rb, rlc_current_time); + rb->recv_pdu(rb, buffer_pP, tb_sizeP); + } else { + LOG_E(RLC, "%s:%d:%s: fatal: no RB found (rnti %d channel ID %d)\n", + __FILE__, __LINE__, __FUNCTION__, rnti, channel_id); + exit(1); + } + + rlc_manager_unlock(rlc_ue_manager); +} + +tbs_size_t mac_rlc_data_req( + const module_id_t module_idP, + const rnti_t rntiP, + const eNB_index_t eNB_index, + const frame_t frameP, + const eNB_flag_t enb_flagP, + const MBMS_flag_t MBMS_flagP, + const logical_chan_id_t channel_idP, + const tb_size_t tb_sizeP, + char *buffer_pP, + const uint32_t sourceL2Id, + const uint32_t destinationL2Id + ) +{ + int ret; + rlc_ue_t *ue; + rlc_entity_t *rb; + int maxsize; + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rntiP); + + switch (channel_idP) { + case 1 ... 2: rb = ue->srb[channel_idP - 1]; break; + case 3 ... 7: rb = ue->drb[channel_idP - 3]; break; + default: rb = NULL; break; + } + + if (MBMS_flagP == MBMS_FLAG_YES) { + if (channel_idP >= 1 && channel_idP <= 5) + rb = ue->drb[channel_idP - 1]; + else + rb = NULL; + } + + + if (rb != NULL) { + rb->set_time(rb, rlc_current_time); + maxsize = tb_sizeP; + ret = rb->generate_pdu(rb, buffer_pP, maxsize); + } else { + LOG_E(RLC, "%s:%d:%s: fatal: data req for unknown RB\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + ret = 0; + } + + rlc_manager_unlock(rlc_ue_manager); + + if (enb_flagP) + T(T_ENB_RLC_MAC_DL, T_INT(module_idP), T_INT(rntiP), + T_INT(channel_idP), T_INT(ret)); + + return ret; +} + +mac_rlc_status_resp_t mac_rlc_status_ind( + const module_id_t module_idP, + const rnti_t rntiP, + const eNB_index_t eNB_index, + const frame_t frameP, + const sub_frame_t subframeP, + const eNB_flag_t enb_flagP, + const MBMS_flag_t MBMS_flagP, + const logical_chan_id_t channel_idP, + const uint32_t sourceL2Id, + const uint32_t destinationL2Id + ) +{ + rlc_ue_t *ue; + mac_rlc_status_resp_t ret; + rlc_entity_t *rb; + + /* TODO: handle time a bit more properly */ + if (rlc_current_time_last_frame != frameP || + rlc_current_time_last_subframe != subframeP) { + rlc_current_time++; + rlc_current_time_last_frame = frameP; + rlc_current_time_last_subframe = subframeP; + } + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rntiP); + + switch (channel_idP) { + case 1 ... 2: rb = ue->srb[channel_idP - 1]; break; + case 3 ... 7: rb = ue->drb[channel_idP - 3]; break; + default: rb = NULL; break; + } + + if (MBMS_flagP == MBMS_FLAG_YES) { + if (channel_idP >= 1 && channel_idP <= 5) + rb = ue->drb[channel_idP - 1]; + else + rb = NULL; + } + + if (rb != NULL) { + rlc_entity_buffer_status_t buf_stat; + rb->set_time(rb, rlc_current_time); + /* 36.321 deals with BSR values up to 3000000 bytes, after what it + * reports '> 3000000' (table 6.1.3.1-2). Passing 4000000 is thus + * more than enough. + */ + buf_stat = rb->buffer_status(rb, 4000000); + ret.bytes_in_buffer = buf_stat.status_size + + buf_stat.retx_size + + buf_stat.tx_size; + } else { + ret.bytes_in_buffer = 0; + } + + rlc_manager_unlock(rlc_ue_manager); + + ret.pdus_in_buffer = 0; + /* TODO: creation time may be important (unit: frame, as it seems) */ + ret.head_sdu_creation_time = 0; + ret.head_sdu_remaining_size_to_send = 0; + ret.head_sdu_is_segmented = 0; + return ret; +} + +rlc_buffer_occupancy_t mac_rlc_get_buffer_occupancy_ind( + const module_id_t module_idP, + const rnti_t rntiP, + const eNB_index_t eNB_index, + const frame_t frameP, + const sub_frame_t subframeP, + const eNB_flag_t enb_flagP, + const logical_chan_id_t channel_idP) +{ + rlc_ue_t *ue; + rlc_buffer_occupancy_t ret; + rlc_entity_t *rb; + + if (enb_flagP) { + LOG_E(RLC, "Tx mac_rlc_get_buffer_occupancy_ind function is not implemented for eNB LcId=%u\n", channel_idP); + exit(1); + } + + /* TODO: handle time a bit more properly */ + if (rlc_current_time_last_frame != frameP || + rlc_current_time_last_subframe != subframeP) { + rlc_current_time++; + rlc_current_time_last_frame = frameP; + rlc_current_time_last_subframe = subframeP; + } + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rntiP); + + switch (channel_idP) { + case 1 ... 2: rb = ue->srb[channel_idP - 1]; break; + case 3 ... 7: rb = ue->drb[channel_idP - 3]; break; + default: rb = NULL; break; + } + + if (rb != NULL) { + rlc_entity_buffer_status_t buf_stat; + rb->set_time(rb, rlc_current_time); + /* 36.321 deals with BSR values up to 3000000 bytes, after what it + * reports '> 3000000' (table 6.1.3.1-2). Passing 4000000 is thus + * more than enough. + */ + buf_stat = rb->buffer_status(rb, 4000000); + ret = buf_stat.status_size + + buf_stat.retx_size + + buf_stat.tx_size; + } else { + ret = 0; + } + + rlc_manager_unlock(rlc_ue_manager); + + return ret; +} + +int oai_emulation; + +rlc_op_status_t rlc_data_req (const protocol_ctxt_t *const ctxt_pP, + const srb_flag_t srb_flagP, + const MBMS_flag_t MBMS_flagP, + const rb_id_t rb_idP, + const mui_t muiP, + confirm_t confirmP, + sdu_size_t sdu_sizeP, + mem_block_t *sdu_pP, + const uint32_t *const sourceL2Id, + const uint32_t *const destinationL2Id + ) +{ + int rnti = ctxt_pP->rnti; + rlc_ue_t *ue; + rlc_entity_t *rb; + + if (MBMS_flagP == MBMS_FLAG_YES) + rnti = 0xfffd; + + LOG_D(RLC, "%s rnti %d srb_flag %d rb_id %d mui %d confirm %d sdu_size %d MBMS_flag %d\n", + __FUNCTION__, rnti, srb_flagP, (int)rb_idP, muiP, confirmP, sdu_sizeP, + MBMS_flagP); + + if (ctxt_pP->enb_flag) + T(T_ENB_RLC_DL, T_INT(ctxt_pP->module_id), + T_INT(ctxt_pP->rnti), T_INT(rb_idP), T_INT(sdu_sizeP)); + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rnti); + + rb = NULL; + + if (srb_flagP) { + if (rb_idP >= 1 && rb_idP <= 2) + rb = ue->srb[rb_idP - 1]; + } else { + if (rb_idP >= 1 && rb_idP <= 5) + rb = ue->drb[rb_idP - 1]; + } + + if( MBMS_flagP == MBMS_FLAG_YES) { + if (rb_idP >= 1 && rb_idP <= 5) + rb = ue->drb[rb_idP - 1]; + } + + if (rb != NULL) { + rb->set_time(rb, rlc_current_time); + rb->recv_sdu(rb, (char *)sdu_pP->data, sdu_sizeP, muiP); + } else { + LOG_E(RLC, "%s:%d:%s: fatal: SDU sent to unknown RB\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + rlc_manager_unlock(rlc_ue_manager); + + free_mem_block(sdu_pP, __func__); + + return RLC_OP_STATUS_OK; +} + +int rlc_module_init(int enb_flag) +{ + static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + static int inited = 0; + + if (pthread_mutex_lock(&lock)) abort(); + + if (inited) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + inited = 1; + + rlc_ue_manager = new_rlc_ue_manager(enb_flag); + + if (pthread_mutex_unlock(&lock)) abort(); + + return 0; +} + +void rlc_util_print_hex_octets(comp_name_t componentP, unsigned char *dataP, const signed long sizeP) +{ +} + +#include "common/ran_context.h" +extern RAN_CONTEXT_t RC; + +static void deliver_sdu(void *_ue, rlc_entity_t *entity, char *buf, int size) +{ + rlc_ue_t *ue = _ue; + int is_srb; + int rb_id; + protocol_ctxt_t ctx; + mem_block_t *memblock; + int i; + int is_enb; + int is_mbms; + + /* TODO: be sure it's fine to check rnti for MBMS */ + is_mbms = ue->rnti == 0xfffd; + + /* is it SRB? */ + for (i = 0; i < 2; i++) { + if (entity == ue->srb[i]) { + is_srb = 1; + rb_id = i+1; + goto rb_found; + } + } + + /* maybe DRB? */ + for (i = 0; i < 5; i++) { + if (entity == ue->drb[i]) { + is_srb = 0; + rb_id = i+1; + goto rb_found; + } + } + + LOG_E(RLC, "%s:%d:%s: fatal, no RB found for ue %d\n", + __FILE__, __LINE__, __FUNCTION__, ue->rnti); + exit(1); + +rb_found: + LOG_D(RLC, "%s:%d:%s: delivering SDU (rnti %d is_srb %d rb_id %d) size %d", + __FILE__, __LINE__, __FUNCTION__, ue->rnti, is_srb, rb_id, size); + + memblock = get_free_mem_block(size, __func__); + if (memblock == NULL) { + LOG_E(RLC, "%s:%d:%s: ERROR: get_free_mem_block failed\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + memcpy(memblock->data, buf, size); + + /* unused fields? */ + ctx.instance = ue->module_id; + ctx.frame = 0; + ctx.subframe = 0; + ctx.eNB_index = 0; + ctx.configured = 1; + ctx.brOption = 0; + + /* used fields? */ + ctx.module_id = ue->module_id; + ctx.rnti = ue->rnti; + + is_enb = rlc_manager_get_enb_flag(rlc_ue_manager); + ctx.enb_flag = is_enb; + + if (is_enb) { + T(T_ENB_RLC_UL, + T_INT(0 /*ctxt_pP->module_id*/), + T_INT(ue->rnti), T_INT(rb_id), T_INT(size)); + + const ngran_node_t type = RC.rrc[0 /*ctxt_pP->module_id*/]->node_type; + AssertFatal(type != ngran_eNB_CU && type != ngran_ng_eNB_CU && type != ngran_gNB_CU, + "Can't be CU, bad node type %d\n", type); + + if (NODE_IS_DU(type) && is_srb == 1) { + MessageDef *msg = itti_alloc_new_message(TASK_RLC_ENB, F1AP_UL_RRC_MESSAGE); + F1AP_UL_RRC_MESSAGE(msg).rnti = ue->rnti; + F1AP_UL_RRC_MESSAGE(msg).srb_id = rb_id; + F1AP_UL_RRC_MESSAGE(msg).rrc_container = (unsigned char *)buf; + F1AP_UL_RRC_MESSAGE(msg).rrc_container_length = size; + itti_send_msg_to_task(TASK_DU_F1, ENB_MODULE_ID_TO_INSTANCE(0 /*ctxt_pP->module_id*/), msg); + return; + } + } + + if (!get_pdcp_data_ind_func()(&ctx, is_srb, is_mbms, rb_id, size, memblock, NULL, NULL)) { + LOG_E(RLC, "%s:%d:%s: ERROR: pdcp_data_ind failed (is_srb %d rb_id %d rnti %d)\n", + __FILE__, __LINE__, __FUNCTION__, + is_srb, rb_id, ue->rnti); + /* what to do in case of failure? for the moment: nothing */ + } +} + +static void successful_delivery(void *_ue, rlc_entity_t *entity, int sdu_id) +{ + rlc_ue_t *ue = _ue; + int i; + int is_srb; + int rb_id; + MessageDef *msg; + int is_enb; + + /* is it SRB? */ + for (i = 0; i < 2; i++) { + if (entity == ue->srb[i]) { + is_srb = 1; + rb_id = i+1; + goto rb_found; + } + } + + /* maybe DRB? */ + for (i = 0; i < 5; i++) { + if (entity == ue->drb[i]) { + is_srb = 0; + rb_id = i+1; + goto rb_found; + } + } + + LOG_E(RLC, "%s:%d:%s: fatal, no RB found for ue %d\n", + __FILE__, __LINE__, __FUNCTION__, ue->rnti); + exit(1); + +rb_found: + LOG_D(RLC, "sdu %d was successfully delivered on %s %d\n", + sdu_id, + is_srb ? "SRB" : "DRB", + rb_id); + + /* TODO: do something for DRBs? */ + if (is_srb == 0) + return; + + is_enb = rlc_manager_get_enb_flag(rlc_ue_manager); + if (!is_enb) + return; + + msg = itti_alloc_new_message(TASK_RLC_ENB, RLC_SDU_INDICATION); + RLC_SDU_INDICATION(msg).rnti = ue->rnti; + RLC_SDU_INDICATION(msg).is_successful = 1; + RLC_SDU_INDICATION(msg).srb_id = rb_id; + RLC_SDU_INDICATION(msg).message_id = sdu_id; + /* TODO: accept more than 1 instance? here we send to instance id 0 */ + itti_send_msg_to_task(TASK_RRC_ENB, 0, msg); +} + +static void max_retx_reached(void *_ue, rlc_entity_t *entity) +{ + rlc_ue_t *ue = _ue; + int i; + int is_srb; + int rb_id; + MessageDef *msg; + int is_enb; + + /* is it SRB? */ + for (i = 0; i < 2; i++) { + if (entity == ue->srb[i]) { + is_srb = 1; + rb_id = i+1; + goto rb_found; + } + } + + /* maybe DRB? */ + for (i = 0; i < 5; i++) { + if (entity == ue->drb[i]) { + is_srb = 0; + rb_id = i+1; + goto rb_found; + } + } + + LOG_E(RLC, "%s:%d:%s: fatal, no RB found for ue %d\n", + __FILE__, __LINE__, __FUNCTION__, ue->rnti); + exit(1); + +rb_found: + LOG_D(RLC, "max RETX reached on %s %d\n", + is_srb ? "SRB" : "DRB", + rb_id); + + /* TODO: do something for DRBs? */ + if (is_srb == 0) + return; + + is_enb = rlc_manager_get_enb_flag(rlc_ue_manager); + if (!is_enb) + return; + + msg = itti_alloc_new_message(TASK_RLC_ENB, RLC_SDU_INDICATION); + RLC_SDU_INDICATION(msg).rnti = ue->rnti; + RLC_SDU_INDICATION(msg).is_successful = 0; + RLC_SDU_INDICATION(msg).srb_id = rb_id; + RLC_SDU_INDICATION(msg).message_id = -1; + /* TODO: accept more than 1 instance? here we send to instance id 0 */ + itti_send_msg_to_task(TASK_RRC_ENB, 0, msg); +} + +static void add_srb(int rnti, int module_id, struct LTE_SRB_ToAddMod *s) +{ + rlc_entity_t *rlc_am; + rlc_ue_t *ue; + + struct LTE_SRB_ToAddMod__rlc_Config *r = s->rlc_Config; + struct LTE_SRB_ToAddMod__logicalChannelConfig *l = s->logicalChannelConfig; + int srb_id = s->srb_Identity; + int logical_channel_group; + + int t_reordering; + int t_status_prohibit; + int t_poll_retransmit; + int poll_pdu; + int poll_byte; + int max_retx_threshold; + + if (srb_id != 1 && srb_id != 2) { + LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n", + __FILE__, __LINE__, __FUNCTION__, srb_id); + exit(1); + } + + switch (l->present) { + case LTE_SRB_ToAddMod__logicalChannelConfig_PR_explicitValue: + logical_channel_group = *l->choice.explicitValue.ul_SpecificParameters->logicalChannelGroup; + break; + case LTE_SRB_ToAddMod__logicalChannelConfig_PR_defaultValue: + /* default value from 36.331 9.2.1 */ + logical_channel_group = 0; + break; + default: + LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + /* TODO: accept other values? */ + if (logical_channel_group != 0) { + LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + switch (r->present) { + case LTE_SRB_ToAddMod__rlc_Config_PR_explicitValue: { + struct LTE_RLC_Config__am *am; + if (r->choice.explicitValue.present != LTE_RLC_Config_PR_am) { + LOG_E(RLC, "%s:%d:%s: fatal error, must be RLC AM\n", + __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + am = &r->choice.explicitValue.choice.am; + t_reordering = decode_t_reordering(am->dl_AM_RLC.t_Reordering); + t_status_prohibit = decode_t_status_prohibit(am->dl_AM_RLC.t_StatusProhibit); + t_poll_retransmit = decode_t_poll_retransmit(am->ul_AM_RLC.t_PollRetransmit); + poll_pdu = decode_poll_pdu(am->ul_AM_RLC.pollPDU); + poll_byte = decode_poll_byte(am->ul_AM_RLC.pollByte); + max_retx_threshold = decode_max_retx_threshold(am->ul_AM_RLC.maxRetxThreshold); + break; + } + case LTE_SRB_ToAddMod__rlc_Config_PR_defaultValue: + /* default values from 36.331 9.2.1 */ + t_reordering = 35; + t_status_prohibit = 0; + t_poll_retransmit = 45; + poll_pdu = -1; + poll_byte = -1; + max_retx_threshold = 4; + break; + default: + LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rnti); + ue->module_id = module_id; + if (ue->srb[srb_id-1] != NULL) { + LOG_D(RLC, "%s:%d:%s: warning SRB %d already exist for ue %d, do nothing\n", + __FILE__, __LINE__, __FUNCTION__, srb_id, rnti); + } else { + rlc_am = new_rlc_entity_am(100000, + 100000, + deliver_sdu, ue, + successful_delivery, ue, + max_retx_reached, ue, + t_reordering, t_status_prohibit, + t_poll_retransmit, + poll_pdu, poll_byte, max_retx_threshold); + rlc_ue_add_srb_rlc_entity(ue, srb_id, rlc_am); + + LOG_D(RLC, "%s:%d:%s: added srb %d to ue %d\n", + __FILE__, __LINE__, __FUNCTION__, srb_id, rnti); + } + rlc_manager_unlock(rlc_ue_manager); +} + +static void add_drb_am(int rnti, int module_id, struct LTE_DRB_ToAddMod *s) +{ + rlc_entity_t *rlc_am; + rlc_ue_t *ue; + + struct LTE_RLC_Config *r = s->rlc_Config; + struct LTE_LogicalChannelConfig *l = s->logicalChannelConfig; + int drb_id = s->drb_Identity; + int channel_id = *s->logicalChannelIdentity; + int logical_channel_group; + + int t_reordering; + int t_status_prohibit; + int t_poll_retransmit; + int poll_pdu; + int poll_byte; + int max_retx_threshold; + + if (!(drb_id >= 1 && drb_id <= 5)) { + LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n", + __FILE__, __LINE__, __FUNCTION__, drb_id); + exit(1); + } + + if (channel_id != drb_id + 2) { + LOG_E(RLC, "%s:%d:%s: todo, remove this limitation\n", + __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + logical_channel_group = *l->ul_SpecificParameters->logicalChannelGroup; + + /* TODO: accept other values? */ + if (logical_channel_group != 1) { + LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + switch (r->present) { + case LTE_RLC_Config_PR_am: { + struct LTE_RLC_Config__am *am; + am = &r->choice.am; + t_reordering = decode_t_reordering(am->dl_AM_RLC.t_Reordering); + t_status_prohibit = decode_t_status_prohibit(am->dl_AM_RLC.t_StatusProhibit); + t_poll_retransmit = decode_t_poll_retransmit(am->ul_AM_RLC.t_PollRetransmit); + poll_pdu = decode_poll_pdu(am->ul_AM_RLC.pollPDU); + poll_byte = decode_poll_byte(am->ul_AM_RLC.pollByte); + max_retx_threshold = decode_max_retx_threshold(am->ul_AM_RLC.maxRetxThreshold); + break; + } + default: + LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rnti); + ue->module_id = module_id; + if (ue->drb[drb_id-1] != NULL) { + LOG_D(RLC, "%s:%d:%s: warning DRB %d already exist for ue %d, do nothing\n", + __FILE__, __LINE__, __FUNCTION__, drb_id, rnti); + } else { + rlc_am = new_rlc_entity_am(1000000, + 1000000, + deliver_sdu, ue, + successful_delivery, ue, + max_retx_reached, ue, + t_reordering, t_status_prohibit, + t_poll_retransmit, + poll_pdu, poll_byte, max_retx_threshold); + rlc_ue_add_drb_rlc_entity(ue, drb_id, rlc_am); + + LOG_D(RLC, "%s:%d:%s: added drb %d to ue %d\n", + __FILE__, __LINE__, __FUNCTION__, drb_id, rnti); + } + rlc_manager_unlock(rlc_ue_manager); +} + +static void add_drb_um(int rnti, int module_id, struct LTE_DRB_ToAddMod *s) +{ + rlc_entity_t *rlc_um; + rlc_ue_t *ue; + + struct LTE_RLC_Config *r = s->rlc_Config; + struct LTE_LogicalChannelConfig *l = s->logicalChannelConfig; + int drb_id = s->drb_Identity; + int channel_id = *s->logicalChannelIdentity; + int logical_channel_group; + + int t_reordering; + int sn_field_length; + + if (!(drb_id >= 1 && drb_id <= 5)) { + LOG_E(RLC, "%s:%d:%s: fatal, bad srb id %d\n", + __FILE__, __LINE__, __FUNCTION__, drb_id); + exit(1); + } + + if (channel_id != drb_id + 2) { + LOG_E(RLC, "%s:%d:%s: todo, remove this limitation\n", + __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + logical_channel_group = *l->ul_SpecificParameters->logicalChannelGroup; + + /* TODO: accept other values? */ + if (logical_channel_group != 1) { + LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + switch (r->present) { + case LTE_RLC_Config_PR_um_Bi_Directional: { + struct LTE_RLC_Config__um_Bi_Directional *um; + um = &r->choice.um_Bi_Directional; + t_reordering = decode_t_reordering(um->dl_UM_RLC.t_Reordering); + if (um->dl_UM_RLC.sn_FieldLength != um->ul_UM_RLC.sn_FieldLength) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + sn_field_length = decode_sn_field_length(um->dl_UM_RLC.sn_FieldLength); + break; + } + default: + LOG_E(RLC, "%s:%d:%s: fatal error\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, rnti); + ue->module_id = module_id; + if (ue->drb[drb_id-1] != NULL) { + LOG_D(RLC, "%s:%d:%s: warning DRB %d already exist for ue %d, do nothing\n", + __FILE__, __LINE__, __FUNCTION__, drb_id, rnti); + } else { + rlc_um = new_rlc_entity_um(1000000, + 1000000, + deliver_sdu, ue, + t_reordering, + sn_field_length); + rlc_ue_add_drb_rlc_entity(ue, drb_id, rlc_um); + + LOG_D(RLC, "%s:%d:%s: added drb %d to ue %d\n", + __FILE__, __LINE__, __FUNCTION__, drb_id, rnti); + } + rlc_manager_unlock(rlc_ue_manager); +} + +static void add_drb(int rnti, int module_id, struct LTE_DRB_ToAddMod *s) +{ + switch (s->rlc_Config->present) { + case LTE_RLC_Config_PR_am: + add_drb_am(rnti, module_id, s); + break; + case LTE_RLC_Config_PR_um_Bi_Directional: + add_drb_um(rnti, module_id, s); + break; + default: + LOG_E(RLC, "%s:%d:%s: fatal: unhandled DRB type\n", + __FILE__, __LINE__, __FUNCTION__); + exit(1); + } +} + +rlc_op_status_t rrc_rlc_config_asn1_req (const protocol_ctxt_t * const ctxt_pP, + const LTE_SRB_ToAddModList_t * const srb2add_listP, + const LTE_DRB_ToAddModList_t * const drb2add_listP, + const LTE_DRB_ToReleaseList_t * const drb2release_listP, + const LTE_PMCH_InfoList_r9_t * const pmch_InfoList_r9_pP, + const uint32_t sourceL2Id, + const uint32_t destinationL2Id + ) +{ + int rnti = ctxt_pP->rnti; + int module_id = ctxt_pP->module_id; + int i; + int j; + + if (ctxt_pP->enb_flag == 1 && + (ctxt_pP->module_id != 0 || ctxt_pP->instance != 0)) { + LOG_E(RLC, "%s: module_id != 0 or instance != 0 not handled for eNB\n", + __FUNCTION__); + exit(1); + } + + if (0 /*|| + ctxt_pP->instance != 0 || ctxt_pP->eNB_index != 0 || + ctxt_pP->configured != 1 || ctxt_pP->brOption != 0 */) { + LOG_E(RLC, "%s: ctxt_pP not handled (%d %d %d %d %d %d)\n", __FUNCTION__, + ctxt_pP->enb_flag , ctxt_pP->module_id, ctxt_pP->instance, + ctxt_pP->eNB_index, ctxt_pP->configured, ctxt_pP->brOption); + exit(1); + } + + if (pmch_InfoList_r9_pP != NULL) { + int mbms_rnti = 0xfffd; + LTE_MBMS_SessionInfoList_r9_t *mbms_SessionInfoList_r9_p = NULL; + LTE_MBMS_SessionInfo_r9_t *MBMS_SessionInfo_p = NULL; + mbms_session_id_t mbms_session_id; + mbms_service_id_t mbms_service_id; + rb_id_t drb_id = 0; + logical_chan_id_t lc_id = 0; + //LTE_DRB_Identity_t drb_id = 0; + //LTE_DRB_Identity_t* pdrb_id = NULL; + + for (i=0; i<pmch_InfoList_r9_pP->list.count; i++) { + mbms_SessionInfoList_r9_p = &(pmch_InfoList_r9_pP->list.array[i]->mbms_SessionInfoList_r9); + + for (j=0; j<mbms_SessionInfoList_r9_p->list.count; j++) { + MBMS_SessionInfo_p = mbms_SessionInfoList_r9_p->list.array[j]; + if (0/*MBMS_SessionInfo_p->sessionId_r9*/) + mbms_session_id = MBMS_SessionInfo_p->sessionId_r9->buf[0]; + else + mbms_session_id = MBMS_SessionInfo_p->logicalChannelIdentity_r9; + lc_id = mbms_session_id; + mbms_service_id = MBMS_SessionInfo_p->tmgi_r9.serviceId_r9.buf[2]; //serviceId is 3-octet string +// mbms_service_id = j; + +#if 0 + /* TODO: check if this code should stay there + * as it is both enb and ue cases do the same thing + */ + // can set the mch_id = i + if (ctxt_pP->enb_flag) { + drb_id = (mbms_service_id * LTE_maxSessionPerPMCH ) + mbms_session_id;//+ (LTE_maxDRB + 3) * MAX_MOBILES_PER_ENB; // 1 + } else { + drb_id = (mbms_service_id * LTE_maxSessionPerPMCH ) + mbms_session_id; // + (LTE_maxDRB + 3); // 15 + } +#endif + + drb_id = (mbms_service_id * LTE_maxSessionPerPMCH ) + mbms_session_id; + + LOG_I(RLC, PROTOCOL_CTXT_FMT" CONFIG REQ MBMS ASN1 LC ID %u RB ID %u SESSION ID %u SERVICE ID %u, mbms_rnti %x\n", + PROTOCOL_CTXT_ARGS(ctxt_pP), + lc_id, + (int)drb_id, + mbms_session_id, + mbms_service_id, + mbms_rnti + ); + + rlc_entity_t *rlc_um; + rlc_ue_t *ue; + + //drb_id = rb_id; + + rlc_manager_lock(rlc_ue_manager); + ue = rlc_manager_get_ue(rlc_ue_manager, mbms_rnti); + if (ue->drb[drb_id-1] != NULL) { + LOG_D(RLC, "%s:%d:%s: warning DRB %d already exist for ue %d, do nothing\n", + __FILE__, __LINE__, __FUNCTION__, (int)drb_id, mbms_rnti); + } else { + rlc_um = new_rlc_entity_um(1000000, + 1000000, + deliver_sdu, ue, + 0,//LTE_T_Reordering_ms0,//t_reordering, + 5//LTE_SN_FieldLength_size5//sn_field_length + ); + rlc_ue_add_drb_rlc_entity(ue, drb_id, rlc_um); + + LOG_D(RLC, "%s:%d:%s: added drb %d to ue %d\n", + __FILE__, __LINE__, __FUNCTION__, (int)drb_id, mbms_rnti); + } + rlc_manager_unlock(rlc_ue_manager); + + } + } + + } + + if (drb2release_listP != NULL) { + LOG_E(RLC, "%s:%d:%s: TODO\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + if (srb2add_listP != NULL) { + for (i = 0; i < srb2add_listP->list.count; i++) { + add_srb(rnti, module_id, srb2add_listP->list.array[i]); + } + } + + if (drb2add_listP != NULL) { + for (i = 0; i < drb2add_listP->list.count; i++) { + add_drb(rnti, module_id, drb2add_listP->list.array[i]); + } + } + + return RLC_OP_STATUS_OK; +} + +rlc_op_status_t rrc_rlc_config_req ( + const protocol_ctxt_t* const ctxt_pP, + const srb_flag_t srb_flagP, + const MBMS_flag_t mbms_flagP, + const config_action_t actionP, + const rb_id_t rb_idP, + const rlc_info_t rlc_infoP) +{ + rlc_ue_t *ue; + int i; + + if (mbms_flagP) { + LOG_E(RLC, "%s:%d:%s: todo (mbms not supported)\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + if (actionP != CONFIG_ACTION_REMOVE) { + LOG_E(RLC, "%s:%d:%s: todo (only CONFIG_ACTION_REMOVE supported)\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + if (ctxt_pP->module_id) { + LOG_E(RLC, "%s:%d:%s: todo (only module_id 0 supported)\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + if ((srb_flagP && !(rb_idP >= 1 && rb_idP <= 2)) || + (!srb_flagP && !(rb_idP >= 1 && rb_idP <= 5))) { + LOG_E(RLC, "%s:%d:%s: bad rb_id (%d) (is_srb %d)\n", __FILE__, __LINE__, __FUNCTION__, (int)rb_idP, srb_flagP); + exit(1); + } + rlc_manager_lock(rlc_ue_manager); + LOG_D(RLC, "%s:%d:%s: remove rb %d (is_srb %d) for UE %d\n", __FILE__, __LINE__, __FUNCTION__, (int)rb_idP, srb_flagP, ctxt_pP->rnti); + ue = rlc_manager_get_ue(rlc_ue_manager, ctxt_pP->rnti); + if (srb_flagP) { + if (ue->srb[rb_idP-1] != NULL) { + ue->srb[rb_idP-1]->delete(ue->srb[rb_idP-1]); + ue->srb[rb_idP-1] = NULL; + } else + LOG_W(RLC, "removing non allocated SRB %d, do nothing\n", (int)rb_idP); + } else { + if (ue->drb[rb_idP-1] != NULL) { + ue->drb[rb_idP-1]->delete(ue->drb[rb_idP-1]); + ue->drb[rb_idP-1] = NULL; + } else + LOG_W(RLC, "removing non allocated DRB %d, do nothing\n", (int)rb_idP); + } + /* remove UE if it has no more RB configured */ + for (i = 0; i < 2; i++) + if (ue->srb[i] != NULL) + break; + if (i == 2) { + for (i = 0; i < 5; i++) + if (ue->drb[i] != NULL) + break; + if (i == 5) + rlc_manager_remove_ue(rlc_ue_manager, ctxt_pP->rnti); + } + rlc_manager_unlock(rlc_ue_manager); + return RLC_OP_STATUS_OK; +} + +void rrc_rlc_register_rrc (rrc_data_ind_cb_t rrc_data_indP, rrc_data_conf_cb_t rrc_data_confP) +{ + /* nothing to do */ +} + +rlc_op_status_t rrc_rlc_remove_ue (const protocol_ctxt_t* const x) +{ + LOG_D(RLC, "%s:%d:%s: remove UE %d\n", __FILE__, __LINE__, __FUNCTION__, x->rnti); + rlc_manager_lock(rlc_ue_manager); + rlc_manager_remove_ue(rlc_ue_manager, x->rnti); + rlc_manager_unlock(rlc_ue_manager); + + return RLC_OP_STATUS_OK; +} diff --git a/openair2/LAYER2/rlc_v2/rlc_pdu.c b/openair2/LAYER2/rlc_v2/rlc_pdu.c new file mode 100644 index 0000000000000000000000000000000000000000..c55e2d9c3c54bcd6b7415146f9688f8cc500699c --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_pdu.c @@ -0,0 +1,266 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#include "rlc_pdu.h" + +#include <stdlib.h> +#include <string.h> + +#include "LOG/log.h" + +/**************************************************************************/ +/* RX PDU segment and segment list */ +/**************************************************************************/ + +rlc_rx_pdu_segment_t *rlc_rx_new_pdu_segment(int sn, int so, int size, + int is_last, char *data, int data_offset) +{ + rlc_rx_pdu_segment_t *ret = malloc(sizeof(rlc_rx_pdu_segment_t)); + if (ret == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + ret->sn = sn; + ret->so = so; + ret->size = size; + ret->is_last = is_last; + ret->next = NULL; + + ret->data = malloc(size); + if (ret->data == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + memcpy(ret->data, data, size); + + ret->data_offset = data_offset; + + return ret; +} + +void rlc_rx_free_pdu_segment(rlc_rx_pdu_segment_t *pdu_segment) +{ + free(pdu_segment->data); + free(pdu_segment); +} + +rlc_rx_pdu_segment_t *rlc_rx_pdu_segment_list_add( + int (*sn_compare)(void *, int, int), void *sn_compare_data, + rlc_rx_pdu_segment_t *list, rlc_rx_pdu_segment_t *pdu_segment) +{ + rlc_rx_pdu_segment_t head; + rlc_rx_pdu_segment_t *cur; + rlc_rx_pdu_segment_t *prev; + + head.next = list; + cur = list; + prev = &head; + + /* order is by 'sn', if 'sn' is the same then order is by 'so' */ + while (cur != NULL) { + /* check if 'pdu_segment' is before 'cur' in the list */ + if (sn_compare(sn_compare_data, cur->sn, pdu_segment->sn) > 0 || + (cur->sn == pdu_segment->sn && cur->so > pdu_segment->so)) { + break; + } + prev = cur; + cur = cur->next; + } + prev->next = pdu_segment; + pdu_segment->next = cur; + return head.next; +} + +/**************************************************************************/ +/* TX PDU management */ +/**************************************************************************/ + +rlc_tx_pdu_segment_t *rlc_tx_new_pdu(void) +{ + rlc_tx_pdu_segment_t *ret = calloc(1, sizeof(rlc_tx_pdu_segment_t)); + if (ret == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + ret->retx_count = -1; + return ret; +} + +void rlc_tx_free_pdu(rlc_tx_pdu_segment_t *pdu) +{ + free(pdu); +} + +rlc_tx_pdu_segment_t *rlc_tx_pdu_list_append(rlc_tx_pdu_segment_t *list, + rlc_tx_pdu_segment_t *pdu) +{ + rlc_tx_pdu_segment_t head; + rlc_tx_pdu_segment_t *cur; + + head.next = list; + + cur = &head; + while (cur->next != NULL) { + cur = cur->next; + } + cur->next = pdu; + + return head.next; +} + +rlc_tx_pdu_segment_t *rlc_tx_pdu_list_add( + int (*sn_compare)(void *, int, int), void *sn_compare_data, + rlc_tx_pdu_segment_t *list, rlc_tx_pdu_segment_t *pdu_segment) +{ + rlc_tx_pdu_segment_t head; + rlc_tx_pdu_segment_t *cur; + rlc_tx_pdu_segment_t *prev; + + head.next = list; + cur = list; + prev = &head; + + /* order is by 'sn', if 'sn' is the same then order is by 'so' */ + while (cur != NULL) { + /* check if 'pdu_segment' is before 'cur' in the list */ + if (sn_compare(sn_compare_data, cur->sn, pdu_segment->sn) > 0 || + (cur->sn == pdu_segment->sn && cur->so > pdu_segment->so)) { + break; + } + prev = cur; + cur = cur->next; + } + prev->next = pdu_segment; + pdu_segment->next = cur; + return head.next; +} + +/**************************************************************************/ +/* PDU decoder */ +/**************************************************************************/ + +void rlc_pdu_decoder_init(rlc_pdu_decoder_t *decoder, char *buffer, int size) +{ + decoder->error = 0; + decoder->byte = 0; + decoder->bit = 0; + decoder->buffer = buffer; + decoder->size = size; +} + +static int get_bit(rlc_pdu_decoder_t *decoder) +{ + int ret; + + if (decoder->byte >= decoder->size) { + decoder->error = 1; + return 0; + } + + ret = (decoder->buffer[decoder->byte] >> (7 - decoder->bit)) & 1; + + decoder->bit++; + if (decoder->bit == 8) { + decoder->bit = 0; + decoder->byte++; + } + + return ret; +} + +int rlc_pdu_decoder_get_bits(rlc_pdu_decoder_t *decoder, int count) +{ + int ret = 0; + int i; + + if (count > 31) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + for (i = 0; i < count; i++) { + ret <<= 1; + ret |= get_bit(decoder); + if (decoder->error) return -1; + } + + return ret; +} + +void rlc_pdu_decoder_align(rlc_pdu_decoder_t *decoder) +{ + if (decoder->bit) { + decoder->bit = 0; + decoder->byte++; + } +} + +/**************************************************************************/ +/* PDU encoder */ +/**************************************************************************/ + +void rlc_pdu_encoder_init(rlc_pdu_encoder_t *encoder, char *buffer, int size) +{ + encoder->byte = 0; + encoder->bit = 0; + encoder->buffer = buffer; + encoder->size = size; +} + +static void put_bit(rlc_pdu_encoder_t *encoder, int bit) +{ + if (encoder->byte == encoder->size) { + LOG_E(RLC, "%s:%d:%s: fatal, buffer full\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + encoder->buffer[encoder->byte] <<= 1; + if (bit) + encoder->buffer[encoder->byte] |= 1; + + encoder->bit++; + if (encoder->bit == 8) { + encoder->bit = 0; + encoder->byte++; + } +} + +void rlc_pdu_encoder_put_bits(rlc_pdu_encoder_t *encoder, int value, int count) +{ + int i; + int x; + + if (count > 31) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + x = 1 << (count - 1); + for (i = 0; i < count; i++, x >>= 1) + put_bit(encoder, value & x); +} + +void rlc_pdu_encoder_align(rlc_pdu_encoder_t *encoder) +{ + while (encoder->bit) + put_bit(encoder, 0); +} diff --git a/openair2/LAYER2/rlc_v2/rlc_pdu.h b/openair2/LAYER2/rlc_v2/rlc_pdu.h new file mode 100644 index 0000000000000000000000000000000000000000..dbffe9f3cbff92fe706985af5bfcc5156b2f52b9 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_pdu.h @@ -0,0 +1,109 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#ifndef _RLC_PDU_H_ +#define _RLC_PDU_H_ + +/**************************************************************************/ +/* RX PDU segment and segment list */ +/**************************************************************************/ + +typedef struct rlc_rx_pdu_segment_t { + int sn; + int so; + int size; + int is_last; + char *data; + int data_offset; + struct rlc_rx_pdu_segment_t *next; +} rlc_rx_pdu_segment_t; + +rlc_rx_pdu_segment_t *rlc_rx_new_pdu_segment(int sn, int so, int size, + int is_last, char *data, int data_offset); + +void rlc_rx_free_pdu_segment(rlc_rx_pdu_segment_t *pdu_segment); + +rlc_rx_pdu_segment_t *rlc_rx_pdu_segment_list_add( + int (*sn_compare)(void *, int, int), void *sn_compare_data, + rlc_rx_pdu_segment_t *list, rlc_rx_pdu_segment_t *pdu_segment); + +/**************************************************************************/ +/* TX PDU management */ +/**************************************************************************/ + +typedef struct rlc_tx_pdu_segment_t { + int sn; + void *start_sdu; /* real type is rlc_sdu_t * */ + int sdu_start_byte; /* starting byte in 'start_sdu' */ + int so; /* starting byte of the segment in full PDU */ + int data_size; /* number of data bytes (exclude header) */ + int is_segment; + int is_last; + int retx_count; + struct rlc_tx_pdu_segment_t *next; +} rlc_tx_pdu_segment_t; + +rlc_tx_pdu_segment_t *rlc_tx_new_pdu(void); +void rlc_tx_free_pdu(rlc_tx_pdu_segment_t *pdu); +rlc_tx_pdu_segment_t *rlc_tx_pdu_list_append(rlc_tx_pdu_segment_t *list, + rlc_tx_pdu_segment_t *pdu); +rlc_tx_pdu_segment_t *rlc_tx_pdu_list_add( + int (*sn_compare)(void *, int, int), void *sn_compare_data, + rlc_tx_pdu_segment_t *list, rlc_tx_pdu_segment_t *pdu_segment); + +/**************************************************************************/ +/* PDU decoder */ +/**************************************************************************/ + +typedef struct { + int error; + int byte; /* next byte to decode */ + int bit; /* next bit in next byte to decode */ + char *buffer; + int size; +} rlc_pdu_decoder_t; + +void rlc_pdu_decoder_init(rlc_pdu_decoder_t *decoder, char *buffer, int size); + +#define rlc_pdu_decoder_in_error(d) ((d)->error == 1) + +int rlc_pdu_decoder_get_bits(rlc_pdu_decoder_t *decoder, int count); + +void rlc_pdu_decoder_align(rlc_pdu_decoder_t *decoder); + +/**************************************************************************/ +/* PDU encoder */ +/**************************************************************************/ + +typedef struct { + int byte; /* next byte to encode */ + int bit; /* next bit in next byte to encode */ + char *buffer; + int size; +} rlc_pdu_encoder_t; + +void rlc_pdu_encoder_init(rlc_pdu_encoder_t *encoder, char *buffer, int size); + +void rlc_pdu_encoder_put_bits(rlc_pdu_encoder_t *encoder, int value, int count); + +void rlc_pdu_encoder_align(rlc_pdu_encoder_t *encoder); + +#endif /* _RLC_PDU_H_ */ diff --git a/openair1/PHY/NR_TRANSPORT/nr_dci_tools_common.c b/openair2/LAYER2/rlc_v2/rlc_sdu.c similarity index 56% rename from openair1/PHY/NR_TRANSPORT/nr_dci_tools_common.c rename to openair2/LAYER2/rlc_v2/rlc_sdu.c index d26a52f157c0e051cbda07c296135c18b9e38247..16465a9ff13bede7314c4ee0c9eef757242944c5 100644 --- a/openair1/PHY/NR_TRANSPORT/nr_dci_tools_common.c +++ b/openair2/LAYER2/rlc_v2/rlc_sdu.c @@ -19,44 +19,50 @@ * contact@openairinterface.org */ -/*! \file PHY/NR_TRANSPORT/nr_dci_tools_common.c - * \brief - * \author - * \date 2018 - * \version 0.1 - * \company Eurecom - * \email: - * \note - * \warning - */ +#include "rlc_sdu.h" + +#include <stdlib.h> +#include <string.h> -#include "nr_dci.h" +#include "LOG/log.h" -//#define DEBUG_FILL_DCI +rlc_sdu_t *rlc_new_sdu(char *buffer, int size, int upper_layer_id) +{ + rlc_sdu_t *ret = calloc(1, sizeof(rlc_sdu_t)); + if (ret == NULL) + goto oom; -#include "nr_dlsch.h" + ret->upper_layer_id = upper_layer_id; + ret->data = malloc(size); + if (ret->data == NULL) + goto oom; -void get_coreset_rballoc(uint8_t *FreqDomainResource,int *n_rb,int *rb_offset) { + memcpy(ret->data, buffer, size); - uint8_t count=0, start=0, start_set=0; + ret->size = size; - uint64_t bitmap = (((uint64_t)FreqDomainResource[0])<<37)| - (((uint64_t)FreqDomainResource[1])<<29)| - (((uint64_t)FreqDomainResource[2])<<21)| - (((uint64_t)FreqDomainResource[3])<<13)| - (((uint64_t)FreqDomainResource[4])<<5)| - (((uint64_t)FreqDomainResource[5])>>3); - - for (int i=0; i<45; i++) - if ((bitmap>>(44-i))&1) { - count++; - if (!start_set) { - start = i; - start_set = 1; - } - } - *rb_offset = 6*start; - *n_rb = 6*count; + return ret; + +oom: + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); } +void rlc_free_sdu(rlc_sdu_t *sdu) +{ + free(sdu->data); + free(sdu); +} + +void rlc_sdu_list_add(rlc_sdu_t **list, rlc_sdu_t **end, rlc_sdu_t *sdu) +{ + if (*list == NULL) { + *list = sdu; + *end = sdu; + return; + } + + (*end)->next = sdu; + *end = sdu; +} diff --git a/openair2/LAYER2/rlc_v2/rlc_sdu.h b/openair2/LAYER2/rlc_v2/rlc_sdu.h new file mode 100644 index 0000000000000000000000000000000000000000..2c678956ee47a1286db4a2838a1ac96cc1129e72 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_sdu.h @@ -0,0 +1,39 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#ifndef _RLC_SDU_H_ +#define _RLC_SDU_H_ + +typedef struct rlc_sdu_t { + int upper_layer_id; + char *data; + int size; + /* next_byte indicates the starting byte to use to construct a new PDU */ + int next_byte; + int acked_bytes; + struct rlc_sdu_t *next; +} rlc_sdu_t; + +rlc_sdu_t *rlc_new_sdu(char *buffer, int size, int upper_layer_id); +void rlc_free_sdu(rlc_sdu_t *sdu); +void rlc_sdu_list_add(rlc_sdu_t **list, rlc_sdu_t **end, rlc_sdu_t *sdu); + +#endif /* _RLC_SDU_H_ */ diff --git a/openair2/LAYER2/rlc_v2/rlc_ue_manager.c b/openair2/LAYER2/rlc_v2/rlc_ue_manager.c new file mode 100644 index 0000000000000000000000000000000000000000..1fd366fda1523a73c35aa91d9127ea66e51c9ffd --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_ue_manager.c @@ -0,0 +1,190 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#include "rlc_ue_manager.h" + +#include <pthread.h> +#include <stdlib.h> +#include <string.h> + +#include "LOG/log.h" + +typedef struct { + pthread_mutex_t lock; + rlc_ue_t **ue_list; + int ue_count; + int enb_flag; +} rlc_ue_manager_internal_t; + +rlc_ue_manager_t *new_rlc_ue_manager(int enb_flag) +{ + rlc_ue_manager_internal_t *ret; + + ret = calloc(1, sizeof(rlc_ue_manager_internal_t)); + if (ret == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + if (pthread_mutex_init(&ret->lock, NULL)) abort(); + ret->enb_flag = enb_flag; + + return ret; +} + +int rlc_manager_get_enb_flag(rlc_ue_manager_t *_m) +{ + rlc_ue_manager_internal_t *m = _m; + return m->enb_flag; +} + +void rlc_manager_lock(rlc_ue_manager_t *_m) +{ + rlc_ue_manager_internal_t *m = _m; + if (pthread_mutex_lock(&m->lock)) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } +} + +void rlc_manager_unlock(rlc_ue_manager_t *_m) +{ + rlc_ue_manager_internal_t *m = _m; + if (pthread_mutex_unlock(&m->lock)) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } +} + +/* must be called with lock acquired */ +rlc_ue_t *rlc_manager_get_ue(rlc_ue_manager_t *_m, int rnti) +{ + /* TODO: optimze */ + rlc_ue_manager_internal_t *m = _m; + int i; + + for (i = 0; i < m->ue_count; i++) + if (m->ue_list[i]->rnti == rnti) + return m->ue_list[i]; + + LOG_D(RLC, "%s:%d:%s: new UE %d\n", __FILE__, __LINE__, __FUNCTION__, rnti); + + m->ue_count++; + m->ue_list = realloc(m->ue_list, sizeof(rlc_ue_t *) * m->ue_count); + if (m->ue_list == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + m->ue_list[m->ue_count-1] = calloc(1, sizeof(rlc_ue_t)); + if (m->ue_list[m->ue_count-1] == NULL) { + LOG_E(RLC, "%s:%d:%s: out of memory\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + m->ue_list[m->ue_count-1]->rnti = rnti; + + return m->ue_list[m->ue_count-1]; +} + +/* must be called with lock acquired */ +void rlc_manager_remove_ue(rlc_ue_manager_t *_m, int rnti) +{ + rlc_ue_manager_internal_t *m = _m; + rlc_ue_t *ue; + int i; + int j; + + for (i = 0; i < m->ue_count; i++) + if (m->ue_list[i]->rnti == rnti) + break; + + if (i == m->ue_count) { + LOG_D(RLC, "%s:%d:%s: warning: ue %d not found\n", + __FILE__, __LINE__, __FUNCTION__, + rnti); + return; + } + + ue = m->ue_list[i]; + + for (j = 0; j < 2; j++) + if (ue->srb[j] != NULL) + ue->srb[j]->delete(ue->srb[j]); + + for (j = 0; j < 5; j++) + if (ue->drb[j] != NULL) + ue->drb[j]->delete(ue->drb[j]); + + free(ue); + + m->ue_count--; + if (m->ue_count == 0) { + free(m->ue_list); + m->ue_list = NULL; + return; + } + + memmove(&m->ue_list[i], &m->ue_list[i+1], + (m->ue_count - i) * sizeof(rlc_ue_t *)); + m->ue_list = realloc(m->ue_list, m->ue_count * sizeof(rlc_ue_t *)); + if (m->ue_list == NULL) { + LOG_E(RLC, "%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } +} + +/* must be called with lock acquired */ +void rlc_ue_add_srb_rlc_entity(rlc_ue_t *ue, int srb_id, rlc_entity_t *entity) +{ + if (srb_id < 1 || srb_id > 2) { + LOG_E(RLC, "%s:%d:%s: fatal, bad srb id\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + srb_id--; + + if (ue->srb[srb_id] != NULL) { + LOG_E(RLC, "%s:%d:%s: fatal, srb already present\n", + __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + ue->srb[srb_id] = entity; +} + +/* must be called with lock acquired */ +void rlc_ue_add_drb_rlc_entity(rlc_ue_t *ue, int drb_id, rlc_entity_t *entity) +{ + if (drb_id < 1 || drb_id > 5) { + LOG_E(RLC, "%s:%d:%s: fatal, bad drb id\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + drb_id--; + + if (ue->drb[drb_id] != NULL) { + LOG_E(RLC, "%s:%d:%s: fatal, drb already present\n", + __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + ue->drb[drb_id] = entity; +} diff --git a/openair2/LAYER2/rlc_v2/rlc_ue_manager.h b/openair2/LAYER2/rlc_v2/rlc_ue_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..81bb0d0fa1ef767a5d2721872e945738fcf2ff40 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/rlc_ue_manager.h @@ -0,0 +1,57 @@ +/* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +#ifndef _RLC_UE_MANAGER_H_ +#define _RLC_UE_MANAGER_H_ + +#include "rlc_entity.h" + +typedef void rlc_ue_manager_t; + +typedef struct rlc_ue_t { + int rnti; + int module_id; /* necesarry for the L2 simulator - not clean, to revise */ + rlc_entity_t *srb[2]; + rlc_entity_t *drb[5]; +} rlc_ue_t; + +/***********************************************************************/ +/* manager functions */ +/***********************************************************************/ + +rlc_ue_manager_t *new_rlc_ue_manager(int enb_flag); + +int rlc_manager_get_enb_flag(rlc_ue_manager_t *m); + +void rlc_manager_lock(rlc_ue_manager_t *m); +void rlc_manager_unlock(rlc_ue_manager_t *m); + +rlc_ue_t *rlc_manager_get_ue(rlc_ue_manager_t *m, int rnti); +void rlc_manager_remove_ue(rlc_ue_manager_t *m, int rnti); + +/***********************************************************************/ +/* ue functions */ +/***********************************************************************/ + +void rlc_ue_add_srb_rlc_entity(rlc_ue_t *ue, int srb_id, rlc_entity_t *entity); +void rlc_ue_add_drb_rlc_entity(rlc_ue_t *ue, int drb_id, rlc_entity_t *entity); + +#endif /* _RLC_UE_MANAGER_H_ */ diff --git a/openair2/LAYER2/rlc_v2/tests/LOG/log.h b/openair2/LAYER2/rlc_v2/tests/LOG/log.h new file mode 100644 index 0000000000000000000000000000000000000000..5c9fcd643cfca036cc81eca221f4a5e818aee685 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/LOG/log.h @@ -0,0 +1,10 @@ +#ifndef _LOG_H_ +#define _LOG_H_ + +#include <stdio.h> + +#define LOG_E(x, ...) printf(__VA_ARGS__) +#define LOG_D(x, ...) printf(__VA_ARGS__) +#define LOG_W(x, ...) printf(__VA_ARGS__) + +#endif /* _LOG_H_ */ diff --git a/openair2/LAYER2/rlc_v2/tests/Makefile b/openair2/LAYER2/rlc_v2/tests/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..14bb186d4c2cf78d1405f1afa9ab218e7461b6e3 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/Makefile @@ -0,0 +1,32 @@ +CC=gcc +CFLAGS=-Wall -g --coverage -I. + +LIB=rlc_entity.o rlc_entity_am.o rlc_entity_um.o rlc_pdu.o rlc_sdu.o + +tests: + @./run_tests.sh + +all: clean_run $(TEST).run + +%.run: $(TEST).bin + #valgrind ./$(TEST).bin > $(TEST).run_pre 2> $(TEST).valgrind + ./$(TEST).bin > $(TEST).run_pre + grep ^TEST $(TEST).run_pre > $(TEST).run + gunzip -c $(TEST).txt.gz > $(TEST).txt + diff -q $(TEST).txt $(TEST).run + +$(TEST).bin: $(TEST).o $(LIB) + $(CC) $(CFLAGS) -o $@ $^ + +%.o: ../%.c + $(CC) $(CFLAGS) -I.. -c -o $@ $< + +$(TEST).o: test.c + $(CC) $(CFLAGS) -c -o $@ $< -DTEST='"$(TEST).h"' + +clean_run: + rm -f $(TEST).run $(TEST).bin $(TEST).o + +clean: + rm -f *.o *.bin *.run *.run_pre *.gcov *.gcda *.gcno test*.txt a.out \ + *.valgrind diff --git a/openair2/LAYER2/rlc_v2/tests/README b/openair2/LAYER2/rlc_v2/tests/README new file mode 100644 index 0000000000000000000000000000000000000000..db69cd4fa716be83bafe0422c601c7037268f2b4 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/README @@ -0,0 +1,38 @@ +To run tests, simply type: make + +Each test is made of: + testXX.h definition of the test + testXX.txt.gz compressed expected output of the test + +At runtime, we generate: + testXX.run actual output of the test + +test.c is the test driving program. + +Only the output lines of the test program starting with "TEST" are +stored into testXX.txt and testXX.run. + +A test is considered a success if testXX.txt and testXX.run are equal. + +Only failed tests are reported. + +How to define a new test? + +1 - get the ID + + Look in the file run_tests.sh, the variable test_count gives you + the number of existing tests. The ID of your test has to be + test_count + 1. + +2 - create files + + Create the file test<ID>.h containing the test, then generate test<ID>.run + by running 'make all TEST=test<ID>' and copy test<ID>.run to test<ID>.txt. + Then compress this file (gzip -9 test<ID>.txt). Be sure that the output + is correct, of course. + + For the file names, replace <ID> by the actual number of the test. + For example, if your test ID is 47, then name the files test47.h and + test47.txt. And run 'make all TEST=test47' to generate test47.run. + +The available instructions for a test are described at the top of test.c. diff --git a/openair2/LAYER2/rlc_v2/tests/make_pdu.c b/openair2/LAYER2/rlc_v2/tests/make_pdu.c new file mode 100644 index 0000000000000000000000000000000000000000..057cc3e36db2e06958969d9d79dc474ea9a9b7bf --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/make_pdu.c @@ -0,0 +1,29 @@ +/* gcc -Wall make_pdu.c -I.. ../rlc_pdu.c */ + +#include "rlc_pdu.h" +#include <stdio.h> + +int main(void) +{ + char out[100]; + rlc_pdu_encoder_t e; + int i; + + rlc_pdu_encoder_init(&e, out, 100); + + rlc_pdu_encoder_put_bits(&e, 0, 1); // D/C + rlc_pdu_encoder_put_bits(&e, 0, 3); // CPT + rlc_pdu_encoder_put_bits(&e, 0, 10); // ack_sn + rlc_pdu_encoder_put_bits(&e, 1, 1); // e1 + rlc_pdu_encoder_put_bits(&e, 10, 10); // nack_sn + rlc_pdu_encoder_put_bits(&e, 0, 1); // e1 + rlc_pdu_encoder_put_bits(&e, 0, 1); // e2 + + rlc_pdu_encoder_align(&e); + + for (i = 0; i < e.byte; i++) printf(" %2.2x", (unsigned char)e.buffer[i]); + + printf("\n"); + + return 0; +} diff --git a/openair2/LAYER2/rlc_v2/tests/run_tests.sh b/openair2/LAYER2/rlc_v2/tests/run_tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..72feff00363bf3e917a112b2cbbe76bd2b38dec9 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/run_tests.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +test_count=45 + +for i in `seq $test_count` +do + make all TEST=test$i >/dev/null 2>/dev/null + if [ $? != 0 ] + then + echo TEST $i FAILURE + fi +done diff --git a/openair2/LAYER2/rlc_v2/tests/test.c b/openair2/LAYER2/rlc_v2/tests/test.c new file mode 100644 index 0000000000000000000000000000000000000000..734e85f1f56cc38abe6226d6e6865aadf0522d03 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test.c @@ -0,0 +1,433 @@ +#include "../rlc_entity.h" +#include "../rlc_entity_am.h" +#include "../rlc_entity_um.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <sys/wait.h> +#include <unistd.h> + +/* + * ENB_AM <rx_maxsize> <tx_maxsize> <t_reordering> <t_status_prohibit> + * <t_poll_retransmit> <poll_pdu> <poll_byte> <max_retx_threshold> + * create the eNB RLC AM entity with given parameters + * + * UE_AM <rx_maxsize> <tx_maxsize> <t_reordering> <t_status_prohibit> + * <t_poll_retransmit> <poll_pdu> <poll_byte> <max_retx_threshold> + * create the UE RLC AM entity with given parameters + * + * ENB_UM <rx_maxsize> <tx_maxsize> <t_reordering> <sn_field_length> + * create the eNB RLC UM entity with given parameters + * + * UE_UM <rx_maxsize> <tx_maxsize> <t_reordering> <sn_field_length> + * create the UE RLC UM entity with given parameters + * + * TIME <time> + * following actions to be performed at time <time> + * <time> starts at 1 + * You must end your test definition with a line 'TIME, -1'. + * + * ENB_SDU <id> <size> + * send an SDU to eNB with id <i> and size <size> + * the SDU is [00 01 ... ff 01 ...] + * (ie. start byte is 00 then we increment for each byte, loop if needed) + * + * UE_SDU <id> <size> + * same as ENB_SDU but the SDU is sent to the UE + * + * ENB_PDU <size> <'size' bytes> + * send a custom PDU from eNB to UE (eNB does not see this PDU at all) + * + * UE_PDU <size> <'size' bytes> + * send a custom PDU from UE to eNB (UE does not see this PDU at all) + * + * ENB_PDU_SIZE <size> + * set 'enb_pdu_size' + * + * UE_PDU_SIZE <size> + * set 'ue_pdu_size' + * + * ENB_RECV_FAILS <fails> + * set the 'enb_recv_fails' flag to <fails> + * (1: recv will fail, 0: recv will succeed) + * + * UE_RECV_FAILS <fails> + * same as ENB_RECV_FAILS but for 'ue_recv_fails' + * + * MUST_FAIL + * to be used as first command after the first TIME to indicate + * that the test must fail (ie. exit with non zero, crash not allowed) + * + * ENB_BUFFER_STATUS + * call buffer_status for eNB and print result + * + * UE_BUFFER_STATUS + * call buffer_status for UE and print result + * + * ENB_DISCARD_SDU <sdu ID> + * discards given SDU + * + * UE_DISCARD_SDU <sdu ID> + * discards given SDU + * + * RE_ESTABLISH + * re-establish both eNB and UE + */ + +enum action { + ENB_AM, UE_AM, + ENB_UM, UE_UM, + TIME, ENB_SDU, UE_SDU, ENB_PDU, UE_PDU, + ENB_PDU_SIZE, UE_PDU_SIZE, + ENB_RECV_FAILS, UE_RECV_FAILS, + MUST_FAIL, + ENB_BUFFER_STATUS, UE_BUFFER_STATUS, + ENB_DISCARD_SDU, UE_DISCARD_SDU, + RE_ESTABLISH +}; + +int test[] = { +/* TEST is defined at compilation time */ +#include TEST +}; + +void deliver_sdu_enb_am(void *deliver_sdu_data, struct rlc_entity_t *_entity, + char *buf, int size) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + printf("TEST: ENB: %"PRIu64": deliver SDU size %d [", + entity->t_current, size); + for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]); + printf("]\n"); +} + +void deliver_sdu_enb_um(void *deliver_sdu_data, struct rlc_entity_t *_entity, + char *buf, int size) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + printf("TEST: ENB: %"PRIu64": deliver SDU size %d [", + entity->t_current, size); + for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]); + printf("]\n"); +} + +void successful_delivery_enb(void *successful_delivery_data, + rlc_entity_t *_entity, int sdu_id) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + printf("TEST: ENB: %"PRIu64": SDU %d was successfully delivered.\n", + entity->t_current, sdu_id); +} + +void max_retx_reached_enb(void *max_retx_reached_data, rlc_entity_t *_entity) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + printf("TEST: ENB: %"PRIu64": max RETX reached! radio link failure!\n", + entity->t_current); + exit(1); +} + +void deliver_sdu_ue_am(void *deliver_sdu_data, struct rlc_entity_t *_entity, + char *buf, int size) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + printf("TEST: UE: %"PRIu64": deliver SDU size %d [", + entity->t_current, size); + for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]); + printf("]\n"); +} + +void deliver_sdu_ue_um(void *deliver_sdu_data, struct rlc_entity_t *_entity, + char *buf, int size) +{ + rlc_entity_um_t *entity = (rlc_entity_um_t *)_entity; + printf("TEST: UE: %"PRIu64": deliver SDU size %d [", + entity->t_current, size); + for (int i = 0; i < size; i++) printf(" %2.2x", (unsigned char)buf[i]); + printf("]\n"); +} + +void successful_delivery_ue(void *successful_delivery_data, + rlc_entity_t *_entity, int sdu_id) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + printf("TEST: UE: %"PRIu64": SDU %d was successfully delivered.\n", + entity->t_current, sdu_id); +} + +void max_retx_reached_ue(void *max_retx_reached_data, rlc_entity_t *_entity) +{ + rlc_entity_am_t *entity = (rlc_entity_am_t *)_entity; + printf("TEST: UE: %"PRIu64", max RETX reached! radio link failure!\n", + entity->t_current); + exit(1); +} + +int test_main(void) +{ + rlc_entity_t *enb = NULL; + rlc_entity_t *ue = NULL; + int i; + int k; + char *sdu; + char *pdu; + rlc_entity_buffer_status_t buffer_status; + int enb_do_buffer_status = 0; + int ue_do_buffer_status = 0; + int size; + int pos; + int next_byte_enb = 0; + int next_byte_ue = 0; + int enb_recv_fails = 0; + int ue_recv_fails = 0; + int enb_pdu_size = 1000; + int ue_pdu_size = 1000; + + sdu = malloc(16001); + pdu = malloc(3000); + if (sdu == NULL || pdu == NULL) { + printf("out of memory\n"); + exit(1); + } + + for (i = 0; i < 16001; i++) + sdu[i] = i & 255; + + pos = 0; + if (test[pos] != TIME) { + printf("%s:%d:%s: fatal\n", __FILE__, __LINE__, __FUNCTION__); + exit(1); + } + + for (i = 1; i < 1000; i++) { + if (i == test[pos+1]) { + pos += 2; + while (test[pos] != TIME) + switch (test[pos]) { + default: printf("fatal: unknown action\n"); exit(1); + case ENB_AM: + enb = new_rlc_entity_am(test[pos+1], test[pos+2], + deliver_sdu_enb_am, NULL, + successful_delivery_enb, NULL, + max_retx_reached_enb, NULL, + test[pos+3], test[pos+4], test[pos+5], + test[pos+6], test[pos+7], test[pos+8]); + pos += 9; + break; + case UE_AM: + ue = new_rlc_entity_am(test[pos+1], test[pos+2], + deliver_sdu_ue_am, NULL, + successful_delivery_ue, NULL, + max_retx_reached_ue, NULL, + test[pos+3], test[pos+4], test[pos+5], + test[pos+6], test[pos+7], test[pos+8]); + pos += 9; + break; + case ENB_UM: + enb = new_rlc_entity_um(test[pos+1], test[pos+2], + deliver_sdu_enb_um, NULL, + test[pos+3], test[pos+4]); + pos += 5; + break; + case UE_UM: + ue = new_rlc_entity_um(test[pos+1], test[pos+2], + deliver_sdu_ue_um, NULL, + test[pos+3], test[pos+4]); + pos += 5; + break; + case ENB_SDU: + for (k = 0; k < test[pos+2]; k++, next_byte_enb++) + sdu[k] = next_byte_enb; + printf("TEST: ENB: %d: recv_sdu (id %d): size %d: [", + i, test[pos+1], test[pos+2]); + for (k = 0; k < test[pos+2]; k++) + printf(" %2.2x", (unsigned char)sdu[k]); + printf("]\n"); + enb->recv_sdu(enb, sdu, test[pos+2], test[pos+1]); + pos += 3; + break; + case UE_SDU: + for (k = 0; k < test[pos+2]; k++, next_byte_ue++) + sdu[k] = next_byte_ue; + printf("TEST: UE: %d: recv_sdu (id %d): size %d: [", + i, test[pos+1], test[pos+2]); + for (k = 0; k < test[pos+2]; k++) + printf(" %2.2x", (unsigned char)sdu[k]); + printf("]\n"); + ue->recv_sdu(ue, sdu, test[pos+2], test[pos+1]); + pos += 3; + break; + case ENB_PDU: + for (k = 0; k < test[pos+1]; k++) + pdu[k] = test[pos+2+k]; + printf("TEST: ENB: %d: custom PDU: size %d: [", i, test[pos+1]); + for (k = 0; k < test[pos+1]; k++) printf(" %2.2x", (unsigned char)pdu[k]); + printf("]\n"); + if (!ue_recv_fails) + ue->recv_pdu(ue, pdu, test[pos+1]); + pos += 2 + test[pos+1]; + break; + case UE_PDU: + for (k = 0; k < test[pos+1]; k++) + pdu[k] = test[pos+2+k]; + printf("TEST: UE: %d: custom PDU: size %d: [", i, test[pos+1]); + for (k = 0; k < test[pos+1]; k++) printf(" %2.2x", (unsigned char)pdu[k]); + printf("]\n"); + if (!enb_recv_fails) + enb->recv_pdu(enb, pdu, test[pos+1]); + pos += 2 + test[pos+1]; + break; + case ENB_PDU_SIZE: + enb_pdu_size = test[pos+1]; + pos += 2; + break; + case UE_PDU_SIZE: + ue_pdu_size = test[pos+1]; + pos += 2; + break; + case ENB_RECV_FAILS: + enb_recv_fails = test[pos+1]; + pos += 2; + break; + case UE_RECV_FAILS: + ue_recv_fails = test[pos+1]; + pos += 2; + break; + case MUST_FAIL: + /* do nothing, only used by caller */ + pos++; + break; + case ENB_BUFFER_STATUS: + enb_do_buffer_status = 1; + pos++; + break; + case UE_BUFFER_STATUS: + ue_do_buffer_status = 1; + pos++; + break; + case ENB_DISCARD_SDU: + printf("TEST: ENB: %d: discard SDU %d\n", i, test[pos+1]); + enb->discard_sdu(enb, test[pos+1]); + pos += 2; + break; + case UE_DISCARD_SDU: + printf("TEST: UE: %d: discard SDU %d\n", i, test[pos+1]); + ue->discard_sdu(ue, test[pos+1]); + pos += 2; + break; + case RE_ESTABLISH: + printf("TEST: %d: re-establish eNB and UE\n", i); + enb->reestablishment(enb); + ue->reestablishment(ue); + pos++; + break; + } + } + + enb->set_time(enb, i); + ue->set_time(ue, i); + + if (enb_do_buffer_status) { + enb_do_buffer_status = 0; + buffer_status = enb->buffer_status(enb, enb_pdu_size); + printf("TEST: ENB: %d: buffer_status: status_size %d tx_size %d retx_size %d\n", + i, + buffer_status.status_size, + buffer_status.tx_size, + buffer_status.retx_size); + } + + size = enb->generate_pdu(enb, pdu, enb_pdu_size); + if (size) { + printf("TEST: ENB: %d: generate_pdu: size %d: [", i, size); + for (k = 0; k < size; k++) printf(" %2.2x", (unsigned char)pdu[k]); + printf("]\n"); + if (!ue_recv_fails) + ue->recv_pdu(ue, pdu, size); + } + + if (ue_do_buffer_status) { + ue_do_buffer_status = 0; + buffer_status = ue->buffer_status(ue, ue_pdu_size); + printf("TEST: UE: %d: buffer_status: status_size %d tx_size %d retx_size %d\n", + i, + buffer_status.status_size, + buffer_status.tx_size, + buffer_status.retx_size); + } + + size = ue->generate_pdu(ue, pdu, ue_pdu_size); + if (size) { + printf("TEST: UE: %d: generate_pdu: size %d: [", i, size); + for (k = 0; k < size; k++) printf(" %2.2x", (unsigned char)pdu[k]); + printf("]\n"); + if (!enb_recv_fails) + enb->recv_pdu(enb, pdu, size); + } + } + + enb->delete(enb); + ue->delete(ue); + + free(sdu); + free(pdu); + + return 0; +} + +void usage(void) +{ + printf("options:\n"); + printf(" -no-fork\n"); + printf(" don't fork (to ease debugging with gdb)\n"); + exit(0); +} + +int main(int n, char **v) +{ + int must_fail = 0; + int son; + int status; + int i; + int no_fork = 0; + + for (i = 1; i < n; i++) { + if (!strcmp(v[i], "-no-fork")) { no_fork = 1; continue; } + usage(); + } + + if (test[2] == MUST_FAIL) + must_fail = 1; + + if (no_fork) return test_main(); + + son = fork(); + if (son == -1) { + perror("fork"); + return 1; + } + + if (son == 0) + return test_main(); + + if (wait(&status) == -1) { + perror("wait"); + return 1; + } + + /* child must quit properly */ + if (!WIFEXITED(status)) + return 1; + + /* child must fail if expected to */ + if (must_fail && WEXITSTATUS(status) == 0) + return 1; + + /* child must not fail if not expected to */ + if (!must_fail && WEXITSTATUS(status)) + return 1; + + return 0; +} diff --git a/openair2/LAYER2/rlc_v2/tests/test1.h b/openair2/LAYER2/rlc_v2/tests/test1.h new file mode 100644 index 0000000000000000000000000000000000000000..c7744da55c28ed7012cadc71d698777351843b7f --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test1.h @@ -0,0 +1,14 @@ +/* + * basic am test: + * at time 1, eNB receives an SDU of 10 bytes + * at time 10, UE receives an SDU of 5 bytes + */ + +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, + UE_BUFFER_STATUS, +TIME, 10, + UE_SDU, 0, 5, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test1.txt.gz b/openair2/LAYER2/rlc_v2/tests/test1.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..1c6661e9ea1c43c854ecf24cdac718215bbd1f22 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test1.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test10.h b/openair2/LAYER2/rlc_v2/tests/test10.h new file mode 100644 index 0000000000000000000000000000000000000000..c7aca15eb058d7371963f8f29f68e398fa7e1d0b --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test10.h @@ -0,0 +1,23 @@ +/* + * rlc am test resegmentation of PDU segment with several SDUs + * eNB sends 3 SDUs [1..10] [11.20] [21..30], not received + * eNB retx with smaller PDUs, not received + * eNB retx with still smaller PDUs, not received + * then reception on, all passes + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_SDU, 0, 10, + ENB_SDU, 1, 10, + ENB_SDU, 2, 10, +TIME, 2, + ENB_PDU_SIZE, 25, +TIME, 48, + ENB_PDU_SIZE, 15, +TIME, 100, + UE_RECV_FAILS, 0, + ENB_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test10.txt.gz b/openair2/LAYER2/rlc_v2/tests/test10.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..68fd3fa0ba7ec7fad990101439087dcdf55693b4 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test10.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test11.h b/openair2/LAYER2/rlc_v2/tests/test11.h new file mode 100644 index 0000000000000000000000000000000000000000..5801689aea498b2b350967df97de389eaa3481c8 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test11.h @@ -0,0 +1,37 @@ +/* + * rlc am test function rlc_am_reassemble_next_segment + * in r->pdu_byte >= r->so + (r->sdu_offset - r->start->data_offset) + * + r->sdu_len + * when case 'if (r->e)' is false + * eNB sends 3 SDUs [1..10] [11.20] [21..30], not received + * eNB retx with smaller PDUs, not received + * eNB retx with still smaller PDUs, not received + * then UE reception on + * then custom PDUs, first a small part of head of original PDU + * then a bigger part, covering the first part + * so that the beginning of this part triggers the 'while' + * then eNB reception on, all passes + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_SDU, 0, 10, + ENB_SDU, 1, 10, + ENB_SDU, 2, 10, +TIME, 2, + ENB_PDU_SIZE, 25, +TIME, 48, + ENB_PDU_SIZE, 15, +TIME, 95, + ENB_BUFFER_STATUS, +TIME, 99, + UE_RECV_FAILS, 0, + ENB_PDU, 14, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + ENB_PDU, 25, 0xec, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, +TIME, 100, + ENB_RECV_FAILS, 0, +TIME, 134, + UE_BUFFER_STATUS, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test11.txt.gz b/openair2/LAYER2/rlc_v2/tests/test11.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..ea435a666025ab5d90ca2992b91dd94e1551a654 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test11.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test12.h b/openair2/LAYER2/rlc_v2/tests/test12.h new file mode 100644 index 0000000000000000000000000000000000000000..0387f0aa7f380b65224a4f71f97de78548bb5c59 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test12.h @@ -0,0 +1,34 @@ +/* + * rlc am test function rlc_am_reassemble_next_segment + * in r->pdu_byte >= r->so + (r->sdu_offset - r->start->data_offset) + * + r->sdu_len + * when case 'if (r->e)' is true + * eNB sends 4 SDUs [1..5] [6..10] [11.20] [21..30], not received + * eNB retx with smaller PDUs, not received + * eNB retx with still smaller PDUs, not received + * then UE reception on + * then custom PDUs, first a small part of head of original PDU + * then a bigger part, covering the first part + * so that the beginning of this part triggers the 'while' + * then eNB reception on, all passes + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_SDU, 0, 5, + ENB_SDU, 1, 5, + ENB_SDU, 2, 10, + ENB_SDU, 3, 10, +TIME, 2, + ENB_PDU_SIZE, 25, +TIME, 48, + ENB_PDU_SIZE, 15, +TIME, 99, + UE_RECV_FAILS, 0, + ENB_PDU, 15, 0xec, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + ENB_PDU, 25, 0xec, 0x00, 0x00, 0x00, 0x80, 0x50, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, +TIME, 100, + ENB_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test12.txt.gz b/openair2/LAYER2/rlc_v2/tests/test12.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..988d7ae644c008cef2f34adce448d00a3a7ce4e1 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test12.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test13.h b/openair2/LAYER2/rlc_v2/tests/test13.h new file mode 100644 index 0000000000000000000000000000000000000000..a57bd43a946e3c5fdd3d38a482ba6bdbcee85318 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test13.h @@ -0,0 +1,30 @@ +/* + * rlc am test function process_received_ack with something in + * the retransmit_list to put in the ack_list + * eNB sends 4 PDUs, not received + * eNB retransmits 4th PDU, received, ACKed with NACKs for PDU 1, 2, 3 + * UE receives custom PDU for 1, 2, 3, 4 (they are not sent by eNB) + * (4 resent to have the P bit set) + * UE sends ACK for all, eNB puts from retransmit_list to ack_list + * + * Maybe not very realistic (custom PDUs). + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_PDU_SIZE, 12, + ENB_SDU, 0, 10, + ENB_SDU, 1, 10, + ENB_SDU, 2, 10, + ENB_SDU, 3, 10, +TIME, 10, + UE_RECV_FAILS, 0, + ENB_RECV_FAILS, 0, +TIME, 87, + ENB_PDU, 12, 0x80, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + ENB_PDU, 12, 0x80, 0x01, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, + ENB_PDU, 12, 0x80, 0x02, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, + ENB_PDU, 12, 0xa0, 0x03, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test13.txt.gz b/openair2/LAYER2/rlc_v2/tests/test13.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..55a26712db1f9d1efb68b8a66a275d20b4867beb Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test13.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test14.h b/openair2/LAYER2/rlc_v2/tests/test14.h new file mode 100644 index 0000000000000000000000000000000000000000..0a3a50179614faf31f9e6c3fc1e473fd75204c05 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test14.h @@ -0,0 +1,12 @@ +/* + * rlc am test max_retx_reached + * eNB sends PDU, never received + */ +TIME, 1, + MUST_FAIL, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_SDU, 0, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test14.txt.gz b/openair2/LAYER2/rlc_v2/tests/test14.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..93aa5de81ea42ae69511552066c75ccf11febbea Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test14.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test15.h b/openair2/LAYER2/rlc_v2/tests/test15.h new file mode 100644 index 0000000000000000000000000000000000000000..4adf93f81c045c26e8e6d3fab5013f7fc5071514 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test15.h @@ -0,0 +1,42 @@ +/* + * rlc am test so_overlap + * eNB sends PDU, not received + * then PDU is segmented in 3 parts, part 1 & 3 not received, + * then we generate a fake control PDU from UE to eNB that + * contains NACK with so_start/so_end being inside part 2. + * + * code to generate fake control PDU: + * rlc_pdu_encoder_init(&e, out, 100); + * rlc_pdu_encoder_put_bits(&e, 0, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 0, 3); // CPT + * rlc_pdu_encoder_put_bits(&e, 2, 10); // ack_sn + * rlc_pdu_encoder_put_bits(&e, 1, 1); // e1 + * rlc_pdu_encoder_put_bits(&e, 1, 10); // nack_sn + * rlc_pdu_encoder_put_bits(&e, 0, 1); // e1 + * rlc_pdu_encoder_put_bits(&e, 1, 1); // e2 + * rlc_pdu_encoder_put_bits(&e, 14, 15); // so_start + * rlc_pdu_encoder_put_bits(&e, 16, 15); // so_end + * rlc_pdu_encoder_align(&e); + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 8, + ENB_RECV_FAILS, 1, +TIME, 2, + UE_RECV_FAILS, 1, + ENB_SDU, 1, 30, +TIME, 20, + ENB_PDU_SIZE, 14, +TIME, 48, + UE_RECV_FAILS, 0, +TIME, 49, + UE_RECV_FAILS, 1, +TIME, 50, + UE_RECV_FAILS, 0, +TIME, 60, + ENB_RECV_FAILS, 0, + UE_PDU, 8, 0x00, 0x0a, 0x00, 0xa0, 0x03, 0x80, 0x08, 0x00, +TIME, 70, + UE_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test15.txt.gz b/openair2/LAYER2/rlc_v2/tests/test15.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..f6f25dac9857198c69c7a5c05bd468b9458d65f9 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test15.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test16.h b/openair2/LAYER2/rlc_v2/tests/test16.h new file mode 100644 index 0000000000000000000000000000000000000000..862cecf344bdcea3978fd055b4c62242702c7bb6 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test16.h @@ -0,0 +1,48 @@ +/* + * rlc am test process_received_nack + * Same events as for test15 except the fake control PDU + * does not ACK anything (ack_sn = 0) so that PDU in the + * wait_list are not transfered into the ack_list and + * we cover the case: + * } else { + * prev = cur; + * cur = cur->next; + * } + * for the wait_list case. + * + * code to generate fake control PDU: + * rlc_pdu_encoder_init(&e, out, 100); + * rlc_pdu_encoder_put_bits(&e, 0, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 0, 3); // CPT + * rlc_pdu_encoder_put_bits(&e, 0, 10); // ack_sn + * rlc_pdu_encoder_put_bits(&e, 1, 1); // e1 + * rlc_pdu_encoder_put_bits(&e, 1, 10); // nack_sn + * rlc_pdu_encoder_put_bits(&e, 0, 1); // e1 + * rlc_pdu_encoder_put_bits(&e, 1, 1); // e2 + * rlc_pdu_encoder_put_bits(&e, 14, 15); // so_start + * rlc_pdu_encoder_put_bits(&e, 16, 15); // so_end + * rlc_pdu_encoder_align(&e); + */ + +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 8, + ENB_RECV_FAILS, 1, +TIME, 2, + UE_RECV_FAILS, 1, + ENB_SDU, 1, 30, +TIME, 20, + ENB_PDU_SIZE, 14, +TIME, 48, + UE_RECV_FAILS, 0, +TIME, 49, + UE_RECV_FAILS, 1, +TIME, 50, + UE_RECV_FAILS, 0, +TIME, 60, + ENB_RECV_FAILS, 0, + UE_PDU, 8, 0x00, 0x02, 0x00, 0xa0, 0x03, 0x80, 0x08, 0x00, +TIME, 70, + UE_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test16.txt.gz b/openair2/LAYER2/rlc_v2/tests/test16.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..61f36c292ec8ec46edaa3de7d77b235d78322a06 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test16.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test17.h b/openair2/LAYER2/rlc_v2/tests/test17.h new file mode 100644 index 0000000000000000000000000000000000000000..a2e6c237de9b8302744bb022ad22aa81025a2639 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test17.h @@ -0,0 +1,30 @@ +/* + * rlc am test function process_received_nack + * case 'check that VT(A) <= sn < VT(S)' + * eNB sends PDU, not received, resends segmented + * we generate a fake control PDU containing nack_sn == 10, + * to fail the 'check ...' and cover the return. + * + * code to generate fake control PDU: + * rlc_pdu_encoder_init(&e, out, 100); + * rlc_pdu_encoder_put_bits(&e, 0, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 0, 3); // CPT + * rlc_pdu_encoder_put_bits(&e, 0, 10); // ack_sn + * rlc_pdu_encoder_put_bits(&e, 1, 1); // e1 + * rlc_pdu_encoder_put_bits(&e, 10, 10); // nack_sn + * rlc_pdu_encoder_put_bits(&e, 0, 1); // e1 + * rlc_pdu_encoder_put_bits(&e, 0, 1); // e2 + * rlc_pdu_encoder_align(&e); + */ + +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 30, + ENB_RECV_FAILS, 1, +TIME, 20, + ENB_PDU_SIZE, 14, +TIME, 60, + ENB_RECV_FAILS, 0, + UE_PDU, 4, 0x00, 0x02, 0x05, 0x00, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test17.txt.gz b/openair2/LAYER2/rlc_v2/tests/test17.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..a35b5cecd18759c7c683afd4e83df2fc7ba38293 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test17.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test18.h b/openair2/LAYER2/rlc_v2/tests/test18.h new file mode 100644 index 0000000000000000000000000000000000000000..0ac25d5c915ad4db43c8bf93fcbaeafae9619f0e --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test18.h @@ -0,0 +1,10 @@ +/* + * test rlc am simulate rx pdu buffer full + * eNB sends too big PDU to UE, rejected because buffer full + */ +TIME, 1, + MUST_FAIL, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 10, 10, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test18.txt.gz b/openair2/LAYER2/rlc_v2/tests/test18.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..e119c2b018fcece7c4504135b4bf09c23d902590 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test18.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test19.h b/openair2/LAYER2/rlc_v2/tests/test19.h new file mode 100644 index 0000000000000000000000000000000000000000..f28e7609f451a9becdb7f5c4737681c4a69d501a --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test19.h @@ -0,0 +1,54 @@ +/* + * test rlc am bad PDU + * eNB sends custom PDUs to UE, all of them are wrong for a reason or another + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + /* data PDU, LI == 0 + * rlc_pdu_encoder_put_bits(&e, 1, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 0, 1); // RF + * rlc_pdu_encoder_put_bits(&e, 0, 1); // P + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 1, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 10); // SN + * rlc_pdu_encoder_put_bits(&e, 0, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 11); // LI + */ + ENB_PDU, 4, 0x84, 0x00, 0x00, 0x00, + /* data PDU, no data + * rlc_pdu_encoder_put_bits(&e, 1, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 0, 1); // RF + * rlc_pdu_encoder_put_bits(&e, 0, 1); // P + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 0, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 10); // SN + */ + ENB_PDU, 2, 0x80, 0x00, + /* data PDU, LI == 2 > data size == 1 + * rlc_pdu_encoder_put_bits(&e, 1, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 0, 1); // RF + * rlc_pdu_encoder_put_bits(&e, 0, 1); // P + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 1, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 10); // SN + * rlc_pdu_encoder_put_bits(&e, 0, 1); // E + * rlc_pdu_encoder_put_bits(&e, 2, 11); // LI + * rlc_pdu_encoder_align(&e); + * rlc_pdu_encoder_put_bits(&e, 0, 8); // 1 byte of data + */ + ENB_PDU, 5, 0x84, 0x00, 0x00, 0x20, 0x00, + /* control PDU, CPT != 0 + * rlc_pdu_encoder_put_bits(&e, 0, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 2, 3); // CPT + */ + ENB_PDU, 1, 0x20, + /* data PDU, but only 1 byte + * rlc_pdu_encoder_put_bits(&e, 1, 1); // D/C + * rlc_pdu_encoder_put_bits(&e, 0, 1); // RF + * rlc_pdu_encoder_put_bits(&e, 0, 1); // P + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 1, 1); // E + */ + ENB_PDU, 1, 0x84, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test19.txt.gz b/openair2/LAYER2/rlc_v2/tests/test19.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..a3c034e7298d5298cd54493622afdfca7c51b9be Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test19.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test2.h b/openair2/LAYER2/rlc_v2/tests/test2.h new file mode 100644 index 0000000000000000000000000000000000000000..ba00920778b2821b5807cb9ecf4e5424df892df7 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test2.h @@ -0,0 +1,10 @@ +/* + * basic am test: + * at time 1, eNB receives an SDU of 16000 bytes + */ + +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 16000, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test2.txt.gz b/openair2/LAYER2/rlc_v2/tests/test2.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..9961ff3a1020fe5ecf83b49b11ede590b229de6d Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test2.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test20.h b/openair2/LAYER2/rlc_v2/tests/test20.h new file mode 100644 index 0000000000000000000000000000000000000000..54f4bec720ab5c6b28123d372f541ddfbc88772d --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test20.h @@ -0,0 +1,28 @@ +/* + * rlc am test full tx window + * for that eNB sends a lot of small PDUs + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 513, + ENB_PDU_SIZE, 3, + ENB_RECV_FAILS, 1, + ENB_BUFFER_STATUS, +TIME, 511, + UE_BUFFER_STATUS, +TIME, 512, + UE_BUFFER_STATUS, +TIME, 513, + UE_BUFFER_STATUS, +TIME, 557, + ENB_BUFFER_STATUS, +TIME, 558, + ENB_BUFFER_STATUS, +TIME, 559, + ENB_BUFFER_STATUS, +TIME, 600, + ENB_BUFFER_STATUS, + ENB_RECV_FAILS, 0, +TIME, -1 + diff --git a/openair2/LAYER2/rlc_v2/tests/test20.txt.gz b/openair2/LAYER2/rlc_v2/tests/test20.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..5fedad91a452500def6a850e2da72d99f68346d0 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test20.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test21.h b/openair2/LAYER2/rlc_v2/tests/test21.h new file mode 100644 index 0000000000000000000000000000000000000000..ba2a2088e683df7682e62b8273ed044d5cfc1e31 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test21.h @@ -0,0 +1,18 @@ +/* + * rlc am test big SDU (size > 2047) + * first generate SDU with exactly 2047 bytes + * later on generate SDU with exactly 2048 bytes + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 20, + ENB_SDU, 1, 2047, + ENB_SDU, 2, 20, + ENB_PDU_SIZE, 2200, +TIME, 10, + ENB_SDU, 3, 20, + ENB_SDU, 4, 2048, + ENB_SDU, 5, 20, +TIME, -1 + diff --git a/openair2/LAYER2/rlc_v2/tests/test21.txt.gz b/openair2/LAYER2/rlc_v2/tests/test21.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..7fc8cbacdef75cc7a77684509989bc7d414e37d6 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test21.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test22.h b/openair2/LAYER2/rlc_v2/tests/test22.h new file mode 100644 index 0000000000000000000000000000000000000000..6e2e8cd410acd6e122fb39047d6438a2e33dfe85 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test22.h @@ -0,0 +1,25 @@ +/* + * am test: ask for retx with TX buffer too small + * then ask for status with buffer too small + */ + +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 100, + UE_RECV_FAILS, 1, +TIME, 47, + ENB_PDU_SIZE, 4, + ENB_BUFFER_STATUS, + UE_BUFFER_STATUS, +TIME, 48, + ENB_PDU_SIZE, 1000, + UE_PDU_SIZE, 1, + UE_BUFFER_STATUS, + UE_RECV_FAILS, 0, +TIME, 49, + UE_BUFFER_STATUS, +TIME, 50, + UE_PDU_SIZE, 1000, + UE_BUFFER_STATUS, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test22.txt.gz b/openair2/LAYER2/rlc_v2/tests/test22.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..cdc7f51a162aae7cff631abeb0324a088ab48907 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test22.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test23.h b/openair2/LAYER2/rlc_v2/tests/test23.h new file mode 100644 index 0000000000000000000000000000000000000000..5ad2d25b7defac794d7cfe9c71e3c440c2dd1070 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test23.h @@ -0,0 +1,9 @@ +/* + * am test: basic test with poll_byte == 1 + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, 1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, 1, 4, + ENB_SDU, 0, 30, + ENB_PDU_SIZE, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test23.txt.gz b/openair2/LAYER2/rlc_v2/tests/test23.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d66e6afa45fde0e6ebc6f9907c15b970f3a13d7 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test23.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test24.h b/openair2/LAYER2/rlc_v2/tests/test24.h new file mode 100644 index 0000000000000000000000000000000000000000..2393f7a95a8249b0c33e93dc36ca65b5996a342d --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test24.h @@ -0,0 +1,9 @@ +/* + * am test: basic test with poll_pdu == 2 + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, 2, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, 2, -1, 4, + ENB_SDU, 0, 50, + ENB_PDU_SIZE, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test24.txt.gz b/openair2/LAYER2/rlc_v2/tests/test24.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c457987dcf297d3beb6a93f27aeddbac5fd58af Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test24.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test25.h b/openair2/LAYER2/rlc_v2/tests/test25.h new file mode 100644 index 0000000000000000000000000000000000000000..ddb584cdf64af9a5eb359512a4fd8927e2e235a3 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test25.h @@ -0,0 +1,8 @@ +/* + * am test: reject SDU because not enough room in rx buffer + */ +TIME, 1, + ENB_AM, 10, 10, 35, 0, 45, -1, -1, 4, + UE_AM, 10, 10, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 50, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test25.txt.gz b/openair2/LAYER2/rlc_v2/tests/test25.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..7ad895aaccc095103430cffdf324d3976077f0f2 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test25.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test26.h b/openair2/LAYER2/rlc_v2/tests/test26.h new file mode 100644 index 0000000000000000000000000000000000000000..95d8367247a6a10ae5e880a8d34e0973d9936a11 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test26.h @@ -0,0 +1,25 @@ +/* + * am test: test function check_t_poll_retransmit + * case 'PDU with SN = VT(S)-1 not found?' + * eNB sends some PDUs, UE receives none + * then UE receives the first retransmitted PDU and nothing more + * until poll retransmit occurs again in the eNB to trigger the case + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, + UE_RECV_FAILS, 1, +TIME, 2, + ENB_SDU, 1, 10, +TIME, 3, + ENB_SDU, 2, 10, +TIME, 4, + ENB_SDU, 3, 10, +TIME, 50, + UE_RECV_FAILS, 0, +TIME, 51, + UE_RECV_FAILS, 1, +TIME, 100, + UE_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test26.txt.gz b/openair2/LAYER2/rlc_v2/tests/test26.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..85f1af55f691179defd6ab24bf8d4c0960d986dc Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test26.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test27.h b/openair2/LAYER2/rlc_v2/tests/test27.h new file mode 100644 index 0000000000000000000000000000000000000000..224fd1218592c8cd5834ad7dccb736d295553e4e --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test27.h @@ -0,0 +1,17 @@ +/* + * am test: test function check_t_poll_retransmit + * case 'do we meet conditions of 36.322 5.2.2.3?' + * eNB sends one PDU, UE does not receive + * just before calling check_t_poll_retransmit, eNB receives a new SDU + * for the function 'check_poll_after_pdu_assembly' to fail + * then UE receives all what eNB sends + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, + UE_RECV_FAILS, 1, +TIME, 47, + ENB_SDU, 1, 10, + UE_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test27.txt.gz b/openair2/LAYER2/rlc_v2/tests/test27.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..15fc41defe11b0d13f2b044a3e3b02eab4c133ad Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test27.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test28.h b/openair2/LAYER2/rlc_v2/tests/test28.h new file mode 100644 index 0000000000000000000000000000000000000000..ac768f36523f6afa52f7459146ec981c58aea2e1 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test28.h @@ -0,0 +1,18 @@ +/* + * am test: test function check_t_reordering, + * case 'update VR(MS) to first SN >= VR(X) for which not + * all PDU segments have been received' + * eNB sends 3 PDUs, first not received, two others received + * later on, everything is received + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, + UE_RECV_FAILS, 1, +TIME, 2, + UE_RECV_FAILS, 0, + ENB_SDU, 1, 10, +TIME, 3, + ENB_SDU, 2, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test28.txt.gz b/openair2/LAYER2/rlc_v2/tests/test28.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..760d1f2b84f0aa4849d987f157f258ab7d22b90b Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test28.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test29.h b/openair2/LAYER2/rlc_v2/tests/test29.h new file mode 100644 index 0000000000000000000000000000000000000000..61bb183641d1251b26afa17055a6bc9b8fd611a3 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test29.h @@ -0,0 +1,21 @@ +/* + * am test: test function check_t_reordering, + * case 'VR(H) > VR(MS)' + * eNB sends 4 PDUs, only 1st and 3rd are received + * later on, everything is received + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, + UE_RECV_FAILS, 1, +TIME, 2, + UE_RECV_FAILS, 0, + ENB_SDU, 1, 10, +TIME, 3, + UE_RECV_FAILS, 1, + ENB_SDU, 2, 10, +TIME, 4, + UE_RECV_FAILS, 0, + ENB_SDU, 3, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test29.txt.gz b/openair2/LAYER2/rlc_v2/tests/test29.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..265735edbceb54e2a54c0f2d7c171080262c95de Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test29.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test3.h b/openair2/LAYER2/rlc_v2/tests/test3.h new file mode 100644 index 0000000000000000000000000000000000000000..5a469d82e24a872af68c8e11c83414797acebc87 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test3.h @@ -0,0 +1,11 @@ +/* + * basic am test: + * at time 1, eNB receives an SDU of 16001 bytes + */ + +TIME, 1, + MUST_FAIL, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 16001, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test3.txt.gz b/openair2/LAYER2/rlc_v2/tests/test3.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..30a96e22781c5f1d3b4711f48fc337cef23a574e Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test3.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test30.h b/openair2/LAYER2/rlc_v2/tests/test30.h new file mode 100644 index 0000000000000000000000000000000000000000..feeee977fd371854098c482e867a4912f5bf3576 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test30.h @@ -0,0 +1,16 @@ +/* + * am test: test function generate_status + * enter the while loop 'go to highest full sn+1 for ACK' + * eNB sends several PDUs, only the last is received + * UE sends status PDU of a chosen size that let the code enter the while + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 70, + ENB_PDU_SIZE, 12, + UE_RECV_FAILS, 1, +TIME, 7, + UE_RECV_FAILS, 0, + UE_PDU_SIZE, 12, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test30.txt.gz b/openair2/LAYER2/rlc_v2/tests/test30.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..eeb856c3414ce973ac95ab4e3258f6e9aacd3ff1 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test30.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test31.h b/openair2/LAYER2/rlc_v2/tests/test31.h new file mode 100644 index 0000000000000000000000000000000000000000..a978c69b39a056233c332724f155a24912709015 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test31.h @@ -0,0 +1,10 @@ +/* + * um test: several SDUs in a PDU (field length 5 bits) + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + ENB_SDU, 0, 10, + ENB_SDU, 1, 20, + ENB_SDU, 2, 30, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test31.txt.gz b/openair2/LAYER2/rlc_v2/tests/test31.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..2c5e6fcc3415544b1ca81a258e81eef6d190311b Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test31.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test32.h b/openair2/LAYER2/rlc_v2/tests/test32.h new file mode 100644 index 0000000000000000000000000000000000000000..69d068cc836cd33d8541d76864d015bc9207ff99 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test32.h @@ -0,0 +1,10 @@ +/* + * um test: several SDUs in a PDU (field length 10 bits) + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 10, + UE_UM, 100000, 100000, 35, 10, + ENB_SDU, 0, 10, + ENB_SDU, 1, 20, + ENB_SDU, 2, 30, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test32.txt.gz b/openair2/LAYER2/rlc_v2/tests/test32.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..0b4633045337017eb15e520e995f9754478fa423 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test32.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test33.h b/openair2/LAYER2/rlc_v2/tests/test33.h new file mode 100644 index 0000000000000000000000000000000000000000..6e907db577f80fe0f565a7d7ed86cef11b8c4638 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test33.h @@ -0,0 +1,18 @@ +/* + * um test: test function rlc_um_reassemble_pdu, discard SDU + * case '!(fi & 0x02' + * eNB sends 33 PDUs covering 1 SDU, only PDU 0 received (with SN=0 and FI=1) + * then eNB sends 1 PDU covering 1 SDU (so SN=1 and FI=0 for this one) + * received by UE + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + ENB_SDU, 0, 33, + ENB_PDU_SIZE, 2, +TIME, 2, + UE_RECV_FAILS, 1, +TIME, 34, + UE_RECV_FAILS, 0, + ENB_SDU, 1, 1, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test33.txt.gz b/openair2/LAYER2/rlc_v2/tests/test33.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..08cb366be415251d3c552da7b4f22615fa8f6138 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test33.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test34.h b/openair2/LAYER2/rlc_v2/tests/test34.h new file mode 100644 index 0000000000000000000000000000000000000000..da119a6047fa5fc03e274b62cb330cf7ce21e925 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test34.h @@ -0,0 +1,15 @@ +/* + * um test: trigger some cases in rlc_um_reception_actions + * eNB sends several PDUs, only the beginning PDUs and ending PDUs are + * received. Middle PDUs are not. + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + ENB_SDU, 0, 40, + ENB_PDU_SIZE, 2, +TIME, 2, + UE_RECV_FAILS, 1, +TIME, 8, + UE_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test34.txt.gz b/openair2/LAYER2/rlc_v2/tests/test34.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..aabbe570e56ea236be3853ee4f8f445dbde899fd Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test34.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test35.h b/openair2/LAYER2/rlc_v2/tests/test35.h new file mode 100644 index 0000000000000000000000000000000000000000..35ccec1a42a4b0e7fbcf05a6d6742ffb44efa02f --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test35.h @@ -0,0 +1,9 @@ +/* + * um: discard PDU because rx buffer full + * eNB sends a PDU too big + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 10, 10, 35, 5, + ENB_SDU, 0, 40, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test35.txt.gz b/openair2/LAYER2/rlc_v2/tests/test35.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..6581c390c73f05a34696e1effc3a7194e5f97f3c Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test35.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test36.h b/openair2/LAYER2/rlc_v2/tests/test36.h new file mode 100644 index 0000000000000000000000000000000000000000..0a49527a923350ae87bab862bcb8d094818c0f15 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test36.h @@ -0,0 +1,14 @@ +/* + * um: discard according to 36.322 5.1.2.2.2 + * eNB sends many PDUs. 1st is received, then not, then again. + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + ENB_SDU, 0, 33, + ENB_PDU_SIZE, 2, +TIME, 2, + UE_RECV_FAILS, 1, +TIME, 22, + UE_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test36.txt.gz b/openair2/LAYER2/rlc_v2/tests/test36.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ad38454f9ba8cbe02dbb137842d91cab52bbcca Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test36.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test37.h b/openair2/LAYER2/rlc_v2/tests/test37.h new file mode 100644 index 0000000000000000000000000000000000000000..b418e2c7151ac82ddae3c623b74feae3360e6502 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test37.h @@ -0,0 +1,37 @@ +/* + * um: some wrong PDUs + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + /* LI == 0 + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 1, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 5); // SN + * rlc_pdu_encoder_put_bits(&e, 0, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 11); // LI + */ + ENB_PDU, 3, 0x20, 0x00, 0x00, + /* no data + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 0, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 5); // SN + */ + ENB_PDU, 1, 0x00, + /* LI == 2 >= data_size == 1 + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 1, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 5); // SN + * rlc_pdu_encoder_put_bits(&e, 0, 1); // E + * rlc_pdu_encoder_put_bits(&e, 2, 11); // LI + * rlc_pdu_encoder_align(&e); + * rlc_pdu_encoder_put_bits(&e, 0, 8); // 1 byte of data + */ + ENB_PDU, 4, 0x20, 0x00, 0x20, 0x00, + /* PDU with E == 1 but has size 1 byte only (truncated PDU) + * rlc_pdu_encoder_put_bits(&e, 0, 2); // FI + * rlc_pdu_encoder_put_bits(&e, 1, 1); // E + * rlc_pdu_encoder_put_bits(&e, 0, 5); // SN + */ + ENB_PDU, 1, 0x20, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test37.txt.gz b/openair2/LAYER2/rlc_v2/tests/test37.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..2a1a837bf0329b2859fa9e71ac9a4bc460c55075 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test37.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test38.h b/openair2/LAYER2/rlc_v2/tests/test38.h new file mode 100644 index 0000000000000000000000000000000000000000..66a37207e0274ddf93abfd7064908ec4a4c4b32e --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test38.h @@ -0,0 +1,22 @@ +/* + * um: test some cases of functions tx_pdu_size and rlc_entity_um_generate_pdu + * eNB has too much data to fit in one PDU + * then later eNB wants to send an SDU of size > 2047 + * then later eNB sends several SDUs in one PDU + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + ENB_PDU_SIZE, 2050, + ENB_SDU, 0, 1500, + ENB_SDU, 1, 1500, + ENB_SDU, 2, 10, +TIME, 10, + ENB_SDU, 3, 2048, + ENB_SDU, 4, 10, +TIME, 20, + ENB_SDU, 5, 10, + ENB_SDU, 6, 10, + ENB_SDU, 7, 10, + ENB_SDU, 8, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test38.txt.gz b/openair2/LAYER2/rlc_v2/tests/test38.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..57e4ed270acc00bd861eeb824e24cc0a154c3a60 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test38.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test39.h b/openair2/LAYER2/rlc_v2/tests/test39.h new file mode 100644 index 0000000000000000000000000000000000000000..8c926b3745ff69d70dd75f0d421e763c3451283a --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test39.h @@ -0,0 +1,9 @@ +/* + * um: SDU too big + */ +TIME, 1, + MUST_FAIL, + ENB_UM, 10, 10, 35, 5, + UE_UM, 100, 100, 35, 5, + ENB_SDU, 0, 16001, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test39.txt.gz b/openair2/LAYER2/rlc_v2/tests/test39.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..c4f6501d596f474dd77abbce265bd7ab4e7c9cd4 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test39.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test4.h b/openair2/LAYER2/rlc_v2/tests/test4.h new file mode 100644 index 0000000000000000000000000000000000000000..8801096de117e51e9a0a07ccc6bd4c22114ef905 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test4.h @@ -0,0 +1,13 @@ +/* + * basic um test: UE field length 5 bits + * at time 1, eNB receives an SDU of 10 bytes + * at time 10, UE receives an SDU of 5 bytes + */ + +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + ENB_SDU, 0, 10, +TIME, 10, + UE_SDU, 0, 5, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test4.txt.gz b/openair2/LAYER2/rlc_v2/tests/test4.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..4339005cd60ae367d0f4bd3bf919253bcad82241 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test4.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test40.h b/openair2/LAYER2/rlc_v2/tests/test40.h new file mode 100644 index 0000000000000000000000000000000000000000..478fe1af06536d88afe7c9c72abb4e91742119f7 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test40.h @@ -0,0 +1,9 @@ +/* + * um: not enough room in SDU list + */ +TIME, 1, + ENB_UM, 10, 10, 35, 5, + UE_UM, 100, 100, 35, 5, + ENB_SDU, 0, 20, + ENB_BUFFER_STATUS, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test40.txt.gz b/openair2/LAYER2/rlc_v2/tests/test40.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..38d4b31cdaa43d1bac222a9d92ea9a615201cae6 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test40.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test41.h b/openair2/LAYER2/rlc_v2/tests/test41.h new file mode 100644 index 0000000000000000000000000000000000000000..076d3e0d8c041f3310779967507157b91cea6eee --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test41.h @@ -0,0 +1,45 @@ +/* + * um: test function check_t_reordering + * eNB sends PDUs, UE receives some and some not + */ +TIME, 1, + ENB_UM, 10000, 10000, 35, 5, + UE_UM, 10000, 10000, 35, 5, + ENB_SDU, 0, 10, + ENB_SDU, 1, 10, + ENB_SDU, 2, 10, + ENB_SDU, 3, 10, + ENB_SDU, 4, 10, + ENB_SDU, 5, 10, + ENB_SDU, 6, 10, + ENB_SDU, 7, 10, + ENB_SDU, 8, 10, + ENB_SDU, 9, 10, + ENB_SDU, 10, 10, + ENB_SDU, 11, 10, + ENB_SDU, 12, 10, + ENB_SDU, 13, 10, + ENB_SDU, 14, 10, + ENB_SDU, 15, 10, + ENB_SDU, 16, 10, + ENB_SDU, 17, 10, + ENB_SDU, 18, 10, + ENB_SDU, 19, 10, + ENB_SDU, 20, 10, + ENB_SDU, 21, 10, + ENB_SDU, 22, 10, + ENB_SDU, 23, 10, + ENB_SDU, 24, 10, + ENB_SDU, 25, 10, + ENB_PDU_SIZE, 40, +TIME, 2, + UE_RECV_FAILS, 1, +TIME, 3, + UE_RECV_FAILS, 0, +TIME, 6, + UE_RECV_FAILS, 1, +TIME, 7, + UE_RECV_FAILS, 0, +TIME, 8, + UE_RECV_FAILS, 1, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test41.txt.gz b/openair2/LAYER2/rlc_v2/tests/test41.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b799ac084ca15f8c1914c93d71d2c25d6271e7d Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test41.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test42.h b/openair2/LAYER2/rlc_v2/tests/test42.h new file mode 100644 index 0000000000000000000000000000000000000000..66f27b9dac46468006efea07f7e691db53a45ee1 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test42.h @@ -0,0 +1,39 @@ +/* + * am test: test rlc_entity_am_discard_sdu + * eNB and UE get some SDU, later on some are discarded + */ + +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, + ENB_SDU, 1, 10, + ENB_SDU, 2, 10, + ENB_SDU, 3, 10, + ENB_PDU_SIZE, 23, +TIME, 2, + ENB_DISCARD_SDU, 0, + ENB_DISCARD_SDU, 2, + ENB_DISCARD_SDU, 3, + ENB_DISCARD_SDU, 1, +TIME, 10, + UE_SDU, 0, 5, + UE_SDU, 1, 5, + UE_SDU, 2, 5, + UE_SDU, 3, 5, + UE_SDU, 4, 5, + UE_SDU, 5, 5, + UE_PDU_SIZE, 13, +TIME, 12, + UE_DISCARD_SDU, 3, + UE_DISCARD_SDU, 1, + UE_DISCARD_SDU, 0, + UE_DISCARD_SDU, 5, + UE_DISCARD_SDU, 4, + UE_DISCARD_SDU, 2, +TIME, 30, + UE_SDU, 6, 5, + UE_DISCARD_SDU, 6, +TIME, 31, + UE_SDU, 7, 8, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test42.txt.gz b/openair2/LAYER2/rlc_v2/tests/test42.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..cf9f45c88268e0a986a41c335480dded4f33abbd Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test42.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test43.h b/openair2/LAYER2/rlc_v2/tests/test43.h new file mode 100644 index 0000000000000000000000000000000000000000..e594437ae8869c8d4f08d975ccaf7ccf5591ee85 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test43.h @@ -0,0 +1,39 @@ +/* + * um test: test rlc_entity_um_discard_sdu + * eNB and UE get some SDU, later on some are discarded + */ + +TIME, 1, + ENB_UM, 100000, 100000, 35, 10, + UE_UM, 100000, 100000, 35, 10, + ENB_SDU, 0, 10, + ENB_SDU, 1, 10, + ENB_SDU, 2, 10, + ENB_SDU, 3, 10, + ENB_PDU_SIZE, 23, +TIME, 2, + ENB_DISCARD_SDU, 0, + ENB_DISCARD_SDU, 2, + ENB_DISCARD_SDU, 3, + ENB_DISCARD_SDU, 1, +TIME, 10, + UE_SDU, 0, 5, + UE_SDU, 1, 5, + UE_SDU, 2, 5, + UE_SDU, 3, 5, + UE_SDU, 4, 5, + UE_SDU, 5, 5, + UE_PDU_SIZE, 13, +TIME, 12, + UE_DISCARD_SDU, 3, + UE_DISCARD_SDU, 1, + UE_DISCARD_SDU, 0, + UE_DISCARD_SDU, 5, + UE_DISCARD_SDU, 4, + UE_DISCARD_SDU, 2, +TIME, 30, + UE_SDU, 6, 5, + UE_DISCARD_SDU, 6, +TIME, 31, + UE_SDU, 7, 8, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test43.txt.gz b/openair2/LAYER2/rlc_v2/tests/test43.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..3387b6530e11728fbd180554a216f20c2b4ef2f8 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test43.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test44.h b/openair2/LAYER2/rlc_v2/tests/test44.h new file mode 100644 index 0000000000000000000000000000000000000000..cc9873ac34b40f7c030a7bf16ea68860bd3bf808 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test44.h @@ -0,0 +1,20 @@ +/* + * am: test function rlc_entity_am_reestablishment + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + RE_ESTABLISH, +TIME, 2, + ENB_SDU, 0, 10, + RE_ESTABLISH, +TIME, 3, + ENB_SDU, 0, 40, + ENB_PDU_SIZE, 14, + UE_RECV_FAILS, 1, +TIME, 4, + UE_RECV_FAILS, 0, +TIME, 10, + RE_ESTABLISH, +TIME, -1 + diff --git a/openair2/LAYER2/rlc_v2/tests/test44.txt.gz b/openair2/LAYER2/rlc_v2/tests/test44.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..bdad9e3fbc5ce1eb162b82c6ea82b0c8cf6fef86 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test44.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test45.h b/openair2/LAYER2/rlc_v2/tests/test45.h new file mode 100644 index 0000000000000000000000000000000000000000..c27fd8e2f0641bef9abda06882ba332a85bce506 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test45.h @@ -0,0 +1,30 @@ +/* + * um: test function rlc_entity_am_reestablishment + * and also the function clear_entity, case 'while (cur_rx != NULL)' + */ +TIME, 1, + ENB_UM, 100000, 100000, 35, 5, + UE_UM, 100000, 100000, 35, 5, + RE_ESTABLISH, +TIME, 2, + ENB_SDU, 0, 10, + RE_ESTABLISH, +TIME, 3, + ENB_SDU, 0, 10, + ENB_SDU, 0, 10, + ENB_SDU, 0, 10, + ENB_SDU, 0, 10, + ENB_PDU_SIZE, 14, +TIME, 5, + UE_RECV_FAILS, 1, +TIME, 6, + UE_RECV_FAILS, 0, +TIME, 10, + RE_ESTABLISH, +TIME, 998, + ENB_SDU, 0, 10, + ENB_SDU, 0, 10, + UE_RECV_FAILS, 1, +TIME, 999, + UE_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test45.txt.gz b/openair2/LAYER2/rlc_v2/tests/test45.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5e3e71d46e7f4a547b59fd5403557ac623e7d4f Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test45.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test5.h b/openair2/LAYER2/rlc_v2/tests/test5.h new file mode 100644 index 0000000000000000000000000000000000000000..3224817c264296f8491a877f309ea62074064615 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test5.h @@ -0,0 +1,13 @@ +/* + * basic um test: UE field length 10 bits + * at time 1, eNB receives an SDU of 10 bytes + * at time 10, UE receives an SDU of 5 bytes + */ + +TIME, 1, + ENB_UM, 100000, 100000, 35, 10, + UE_UM, 100000, 100000, 35, 10, + ENB_SDU, 0, 10, +TIME, 10, + UE_SDU, 0, 5, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test5.txt.gz b/openair2/LAYER2/rlc_v2/tests/test5.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..5a27d5260641878ac7e26cda1d77b8eeb7442154 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test5.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test6.h b/openair2/LAYER2/rlc_v2/tests/test6.h new file mode 100644 index 0000000000000000000000000000000000000000..2115c8a328af4f490353e978be4e77961bf93035 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test6.h @@ -0,0 +1,27 @@ +/* + * rlc am test function segment_already_received + * eNB sends SDU [1..900], not received + * eNB retx with smaller PDUs [1..600] [601..900] + * [1..600] is received but ACK/NACK not + * eNB retx with still smaller PDUs [1..400] [401..600] [601..900] + * all is received, ACKs/NACKs go through + * + * this test will fail if NACK mechanism uses SOstart/SOend + * (not implemented for the moment) + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_SDU, 0, 900, +TIME, 2, + ENB_PDU_SIZE, 600, + UE_RECV_FAILS, 0, +TIME, 48, + UE_RECV_FAILS, 1, + ENB_PDU_SIZE, 400, +TIME, 90, + UE_RECV_FAILS, 0, + ENB_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test6.txt.gz b/openair2/LAYER2/rlc_v2/tests/test6.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..54870821a619725938e1ea529ff533d748d9c7db Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test6.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test7.h b/openair2/LAYER2/rlc_v2/tests/test7.h new file mode 100644 index 0000000000000000000000000000000000000000..081227a400dcfebf40bfc63b85cb244e17de1d81 --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test7.h @@ -0,0 +1,26 @@ +/* + * rlc am test function rlc_am_segment_full + * eNB sends SDU [1..900], not received + * eNB retx with smaller PDUs [1..600] [601..900] + * nothing received + * eNB retx with still smaller PDUs [1..400] [401..600] [601..900] + * [401..600] received, ACK goes through + * link clean, all goes through + * + * this test will fail if NACK mechanism uses SOstart/SOend + * (not implemented for the moment) + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_SDU, 0, 900, +TIME, 2, + ENB_PDU_SIZE, 600, +TIME, 48, + ENB_PDU_SIZE, 400, +TIME, 95, + UE_RECV_FAILS, 0, + ENB_RECV_FAILS, 0, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test7.txt.gz b/openair2/LAYER2/rlc_v2/tests/test7.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..9976a6050779805882bbefb2dab98fa27fd789bc Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test7.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test8.h b/openair2/LAYER2/rlc_v2/tests/test8.h new file mode 100644 index 0000000000000000000000000000000000000000..aa7f5bed5be78d0979df6a12f245da7e8e38bdfb --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test8.h @@ -0,0 +1,19 @@ +/* + * basic am test: + * at time 1, eNB receives 10 SDUs of 10 bytes + */ + +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + ENB_SDU, 0, 10, + ENB_SDU, 1, 10, + ENB_SDU, 2, 10, + ENB_SDU, 3, 10, + ENB_SDU, 4, 10, + ENB_SDU, 5, 10, + ENB_SDU, 6, 10, + ENB_SDU, 7, 10, + ENB_SDU, 8, 10, + ENB_SDU, 9, 10, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test8.txt.gz b/openair2/LAYER2/rlc_v2/tests/test8.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..c8016878635a3f971d3c63298ac49ddefe6835b7 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test8.txt.gz differ diff --git a/openair2/LAYER2/rlc_v2/tests/test9.h b/openair2/LAYER2/rlc_v2/tests/test9.h new file mode 100644 index 0000000000000000000000000000000000000000..88e23d94e95891923a49a486445c119f4590b85f --- /dev/null +++ b/openair2/LAYER2/rlc_v2/tests/test9.h @@ -0,0 +1,34 @@ +/* + * rlc am test function rlc_am_reassemble_next_segment + * case 'if pdu_byte is not in [so .. so+len-1]' + * eNB sends SDU [1..30], not received + * eNB retx with smaller PDUs [1..21] [22..30], not received + * eNB retx with still smaller PDUs [1..11] [12..21] [22..30], not received + * custom PDU [12..21] sent to UE, received + * custom PDU [1..21] sent to UE, received + * + * Not sure if in a real setup [12..21] is sent and then [1..21] is sent. + * In the current RLC implementation, this is impossible. If we send [12..21] + * it means [1..21] has been split and so we won't sent it later on. + * Maybe with HARQ retransmissions in PHY/MAC in bad radio conditions? + * + * this test will fail if NACK mechanism uses SOstart/SOend + * (not implemented for the moment) + */ +TIME, 1, + ENB_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_AM, 100000, 100000, 35, 0, 45, -1, -1, 4, + UE_RECV_FAILS, 1, + ENB_RECV_FAILS, 1, + ENB_SDU, 0, 30, +TIME, 2, + ENB_PDU_SIZE, 25, +TIME, 48, + ENB_PDU_SIZE, 15, +TIME, 100, + UE_RECV_FAILS, 0, + ENB_RECV_FAILS, 0, + ENB_PDU, 14, 0xd8, 0x00, 0x00, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, +TIME, 101, + ENB_PDU, 25, 0xe8, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, +TIME, -1 diff --git a/openair2/LAYER2/rlc_v2/tests/test9.txt.gz b/openair2/LAYER2/rlc_v2/tests/test9.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..cc6d934e708e1ddad7286e7c3eec33cd23c91f94 Binary files /dev/null and b/openair2/LAYER2/rlc_v2/tests/test9.txt.gz differ