diff --git a/ci-scripts/datalog_rt_stats.1x1.60.yaml b/ci-scripts/datalog_rt_stats.1x1.60.yaml index 140f3939d7f64da016ad206858127e1f69d38934..f2cd345737d6c35551b7c628be39f36893284607 100644 --- a/ci-scripts/datalog_rt_stats.1x1.60.yaml +++ b/ci-scripts/datalog_rt_stats.1x1.60.yaml @@ -10,7 +10,7 @@ Ref : feprx : 46.0 feptx_prec : 15.0 feptx_ofdm : 35.0 - feptx_total : 57.0 + feptx_total : 50.0 L1 Tx processing : 260.0 DLSCH encoding : 160.0 L1 Rx processing : 420.0 diff --git a/ci-scripts/datalog_rt_stats.default.yaml b/ci-scripts/datalog_rt_stats.default.yaml index 3e665f1f0b04120743247b12405fa46f92a86a57..1b83aed7837053c7d6de80c2fc858ce6e1ec54be 100644 --- a/ci-scripts/datalog_rt_stats.default.yaml +++ b/ci-scripts/datalog_rt_stats.default.yaml @@ -10,7 +10,7 @@ Ref : feprx : 43.0 feptx_prec : 13.0 feptx_ofdm : 33.0 - feptx_total : 55.0 + feptx_total : 50.0 L1 Tx processing : 200.0 DLSCH encoding : 100.0 L1 Rx processing : 330.0 diff --git a/ci-scripts/xml_files/t2_offload_dec_nr_ulsim.xml b/ci-scripts/xml_files/t2_offload_dec_nr_ulsim.xml index 808f7155b5460dc095615518e591e2c32b300e16..c959d99e1b3ec043d7fe034bacd91ea650ec6391 100644 --- a/ci-scripts/xml_files/t2_offload_dec_nr_ulsim.xml +++ b/ci-scripts/xml_files/t2_offload_dec_nr_ulsim.xml @@ -24,19 +24,36 @@ <htmlTabName>Test T2 Offload Decoder</htmlTabName> <htmlTabIcon>tasks</htmlTabIcon> <TestCaseRequestedList> + 010204 010111 010112 010121 010122 010131 010132 010211 010212 010221 010222 010231 010232 010311 010312 010321 010322 010331 010332 + 402010 </TestCaseRequestedList> <TestCaseExclusionList></TestCaseExclusionList> + <testCase id="010204"> + <class>Custom_Command</class> + <desc>Disable Sleep States</desc> + <node>caracal</node> + <command>sudo cpupower idle-set -D 0</command> + </testCase> + + <testCase id="402010"> + <class>Custom_Command</class> + <always_exec>true</always_exec> + <desc>Enable Sleep States</desc> + <node>caracal</node> + <command>sudo cpupower idle-set -E</command> + </testCase> + <testCase id="010111"> <class>Run_Physim</class> <desc>Run nr_ulsim with CPU: SNR = 30, MCS = 5, 106 PRBs, 1 layer</desc> <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>300</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m5 -r106 -R106 -C10 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m5 -r106 -R106 -C10 -P</physim_run_args> </testCase> <testCase id="010112"> @@ -45,7 +62,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>100</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m5 -r106 -R106 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m5 -r106 -R106 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010121"> @@ -54,7 +71,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>300</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m15 -r106 -R106 -C10 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m15 -r106 -R106 -C10 -P</physim_run_args> </testCase> <testCase id="010122"> @@ -63,7 +80,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>150</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m15 -r106 -R106 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m15 -r106 -R106 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010131"> @@ -72,7 +89,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>250</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m25 -r106 -R106 -C10 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m25 -r106 -R106 -C10 -P</physim_run_args> </testCase> <testCase id="010132"> @@ -81,7 +98,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>250</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m25 -r106 -R106 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m25 -r106 -R106 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010211"> @@ -90,7 +107,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>300</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m5 -r273 -R273 -C10 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m5 -r273 -R273 -C10 -P</physim_run_args> </testCase> <testCase id="010212"> @@ -99,7 +116,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>150</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m5 -r273 -R273 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m5 -r273 -R273 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010221"> @@ -108,7 +125,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>400</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m15 -r273 -R273 -C10 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m15 -r273 -R273 -C10 -P</physim_run_args> </testCase> <testCase id="010222"> @@ -117,7 +134,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>350</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m15 -r273 -R273 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m15 -r273 -R273 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010231"> @@ -126,7 +143,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>400</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m25 -r273 -R273 -C10 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m25 -r273 -R273 -C10 -P</physim_run_args> </testCase> <testCase id="010232"> @@ -135,7 +152,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>550</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m25 -r273 -R273 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m25 -r273 -R273 -C10 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010311"> @@ -144,7 +161,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>300</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m5 -r273 -R273 -C10 -W2 -z2 -y2 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m5 -r273 -R273 -C10 -W2 -z2 -y2 -P</physim_run_args> </testCase> <testCase id="010312"> @@ -153,7 +170,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>250</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m5 -r273 -R273 -C10 -W2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m5 -r273 -R273 -C10 -W2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010321"> @@ -162,7 +179,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>600</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m15 -r273 -R273 -C10 -W2 -z2 -y2 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m15 -r273 -R273 -C10 -W2 -z2 -y2 -P</physim_run_args> </testCase> <testCase id="010322"> @@ -171,7 +188,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>650</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m15 -r273 -R273 -C10 -W2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m15 -r273 -R273 -C10 -W2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="010331"> @@ -180,7 +197,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>650</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m25 -r273 -R273 -C10 -W2 -z2 -y2 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m25 -r273 -R273 -C10 -W2 -z2 -y2 -P</physim_run_args> </testCase> <testCase id="010332"> @@ -189,7 +206,7 @@ <always_exec>true</always_exec> <physim_test>nr_ulsim</physim_test> <physim_time_threshold>1100</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -m25 -r273 -R273 -C10 -W2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -m25 -r273 -R273 -C10 -W2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> </testCaseList> diff --git a/ci-scripts/xml_files/t2_offload_enc_nr_dlsim.xml b/ci-scripts/xml_files/t2_offload_enc_nr_dlsim.xml index f1217ae737891e7797081123c1e9de98f85ed199..400399d3a1a11cc9b83a798a1656e3d8a43251bc 100644 --- a/ci-scripts/xml_files/t2_offload_enc_nr_dlsim.xml +++ b/ci-scripts/xml_files/t2_offload_enc_nr_dlsim.xml @@ -24,20 +24,37 @@ <htmlTabName>Test T2 Offload Encoder</htmlTabName> <htmlTabIcon>tasks</htmlTabIcon> <TestCaseRequestedList> + 102040 000111 000112 000121 000122 000131 000132 000211 000212 000221 000222 000231 000232 000311 000312 000321 000322 000331 000332 000411 000412 000421 000422 000431 000432 + 040201 </TestCaseRequestedList> <TestCaseExclusionList></TestCaseExclusionList> + <testCase id="102040"> + <class>Custom_Command</class> + <desc>Disable Sleep States</desc> + <node>caracal</node> + <command>sudo cpupower idle-set -D 0</command> + </testCase> + + <testCase id="040201"> + <class>Custom_Command</class> + <always_exec>true</always_exec> + <desc>Enable Sleep States</desc> + <node>caracal</node> + <command>sudo cpupower idle-set -E</command> + </testCase> + <testCase id="000111"> <class>Run_Physim</class> <desc>Run nr_dlsim with CPU: SNR = 30, MCS = 5, 106 PRBs, 1 layer</desc> <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>230</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b106 -R106 -X 8,9,10,11,12 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b106 -R106 -X 8,9,10,11,12 -P</physim_run_args> </testCase> <testCase id="000112"> @@ -46,7 +63,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>100</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b106 -R106 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b106 -R106 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000121"> @@ -55,7 +72,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>300</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b106 -R106 -X 8,9,10,11,12 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b106 -R106 -X 8,9,10,11,12 -P</physim_run_args> </testCase> <testCase id="000122"> @@ -64,7 +81,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>100</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b106 -R106 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b106 -R106 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000131"> @@ -73,7 +90,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>350</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b106 -R106 -X 8,9,10,11,12 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b106 -R106 -X 8,9,10,11,12 -P</physim_run_args> </testCase> <testCase id="000132"> @@ -82,7 +99,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>200</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b106 -R106 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b106 -R106 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000211"> @@ -91,7 +108,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>300</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b273 -R273 -X 8,9,10,11,12 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b273 -R273 -X 8,9,10,11,12 -P</physim_run_args> </testCase> <testCase id="000212"> @@ -100,7 +117,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>150</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b273 -R273 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b273 -R273 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000221"> @@ -109,7 +126,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>350</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b273 -R273 -X 8,9,10,11,12 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b273 -R273 -X 8,9,10,11,12 -P</physim_run_args> </testCase> <testCase id="000222"> @@ -118,7 +135,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>250</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b273 -R273 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b273 -R273 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000231"> @@ -127,7 +144,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>400</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b273 -R273 -X 8,9,10,11,12 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b273 -R273 -X 8,9,10,11,12 -P</physim_run_args> </testCase> <testCase id="000232"> @@ -136,7 +153,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>400</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b273 -R273 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b273 -R273 -X4,5,6,7,8,9 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000311"> @@ -145,7 +162,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>400</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b273 -R273 -X 8,9,10,11,12 -x2 -z2 -y2 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b273 -R273 -X 8,9,10,11,12 -x2 -z2 -y2 -P</physim_run_args> </testCase> <testCase id="000312"> @@ -154,7 +171,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>200</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b273 -R273 -X4,5,6,7,8,9 -x2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b273 -R273 -X4,5,6,7,8,9 -x2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000321"> @@ -163,7 +180,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>450</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b273 -R273 -X 8,9,10,11,12 -x2 -z2 -y2 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b273 -R273 -X 8,9,10,11,12 -x2 -z2 -y2 -P</physim_run_args> </testCase> <testCase id="000322"> @@ -172,7 +189,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>500</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b273 -R273 -X4,5,6,7,8,9 -x2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b273 -R273 -X4,5,6,7,8,9 -x2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000331"> @@ -181,7 +198,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>500</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b273 -R273 -X 8,9,10,11,12 -x2 -z2 -y2 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b273 -R273 -X 8,9,10,11,12 -x2 -z2 -y2 -P</physim_run_args> </testCase> <testCase id="000332"> @@ -190,7 +207,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>500</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b273 -R273 -X4,5,6,7,8,9 -x2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b273 -R273 -X4,5,6,7,8,9 -x2 -z2 -y2 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000411"> @@ -199,7 +216,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>400</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b273 -R273 -X8,9,10,11,12 -x2 -z4 -y4 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b273 -R273 -X8,9,10,11,12 -x2 -z4 -y4 -P</physim_run_args> </testCase> <testCase id="000412"> @@ -208,7 +225,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>200</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e5 -b273 -R273 -X4,5,6,7,8,9 -x2 -z4 -y4 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e5 -b273 -R273 -X4,5,6,7,8,9 -x2 -z4 -y4 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000421"> @@ -217,7 +234,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>400</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b273 -R273 -X8,9,10,11,12 -x2 -z4 -y4 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b273 -R273 -X8,9,10,11,12 -x2 -z4 -y4 -P</physim_run_args> </testCase> <testCase id="000422"> @@ -226,7 +243,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>300</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e15 -b273 -R273 -X4,5,6,7,8,9 -x2 -z4 -y4 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e15 -b273 -R273 -X4,5,6,7,8,9 -x2 -z4 -y4 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> <testCase id="000431"> @@ -235,7 +252,7 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>450</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b273 -R273 -X8,9,10,11,12 -x2 -z4 -y4 -P</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b273 -R273 -X8,9,10,11,12 -x2 -z4 -y4 -P</physim_run_args> </testCase> <testCase id="000432"> @@ -244,6 +261,6 @@ <always_exec>true</always_exec> <physim_test>nr_dlsim</physim_test> <physim_time_threshold>450</physim_time_threshold> - <physim_run_args>-n100 -s30 -S30.2 -e25 -b273 -R273 -X4,5,6,7,8,9 -x2 -z4 -y4 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> + <physim_run_args>-n1000 -s30 -S30.2 -e25 -b273 -R273 -X4,5,6,7,8,9 -x2 -z4 -y4 -P --loader.ldpc.shlibversion _t2 --nrLDPC_coding_t2.dpdk_dev d8:00.0 --nrLDPC_coding_t2.dpdk_core_list 11-12</physim_run_args> </testCase> </testCaseList> diff --git a/cmake_targets/autotests/test_case_list.xml b/cmake_targets/autotests/test_case_list.xml index 2c6dc73bfa629ce700e4f78fb61e094a2063577c..32876634c06bb945232ff7895e8754cdecc5ee51 100755 --- a/cmake_targets/autotests/test_case_list.xml +++ b/cmake_targets/autotests/test_case_list.xml @@ -318,7 +318,8 @@ (Test25: Format 2 11-bit 2/273 PRB), (Test26: Format 2 12-bit 8/273 PRB), (Test27: Format 2 19-bit 8/273 PRB), - (Test28: Format 2 64-bit 16/273 PRB)</desc> + (Test28: Format 2 64-bit 16/273 PRB), + (Test29: Format 2 64-bit 16/273 PRB Delay 2us)</desc> <main_exec>nr_pucchsim</main_exec> <main_exec_args>-R 106 -i 1 -P 0 -b 1 -s-2 -n1000 -R 106 -i 1 -P 0 -b 2 -s-2 -n1000 @@ -347,8 +348,9 @@ -R 273 -z8 -i 1 -P 2 -b 11 -s6 -n1000 -R 273 -z8 -i 1 -P 2 -q8 -b 12 -s-3 -n1000 -R 273 -z8 -i 1 -P 2 -q8 -b 19 -s-3 -n1000 - -R 273 -z8 -i 1 -P 2 -q16 -b 64 -s-3 -n1000</main_exec_args> - <tags>test1 test2 test3 test4 test5 test6 test7 test8 test9 test10 test11 test12 test13 test14 test15 test16 test17 test18 test19 test20 test21 test22 test23 test24 test25 test26 test27 test28</tags> + -R 273 -z8 -i 1 -P 2 -q16 -b 64 -s-3 -n1000 + -R 273 -z8 -i 1 -P 2 -q16 -b 64 -s-3 -d 2 -n1000</main_exec_args> + <tags>test1 test2 test3 test4 test5 test6 test7 test8 test9 test10 test11 test12 test13 test14 test15 test16 test17 test18 test19 test20 test21 test22 test23 test24 test25 test26 test27 test28 test29</tags> <search_expr_true>PUCCH test OK</search_expr_true> <search_expr_false>segmentation fault|assertion|exiting|fatal</search_expr_false> <nruns>3</nruns> diff --git a/common/utils/LOG/log.c b/common/utils/LOG/log.c index 80244c0912c37caa26d5a87992b5f6642b5119a9..af6c30b858d96882436a18ce7077a5ad9d33523b 100644 --- a/common/utils/LOG/log.c +++ b/common/utils/LOG/log.c @@ -651,6 +651,10 @@ void log_dump(int component, wbuf=malloc((buffsize * 10) + 64 + MAX_LOG_TOTAL); break; + case LOG_DUMP_C16: + wbuf = malloc((buffsize * 10) + 64 + MAX_LOG_TOTAL); + break; + case LOG_DUMP_CHAR: default: wbuf=malloc((buffsize * 3 ) + 64 + MAX_LOG_TOTAL); @@ -669,6 +673,21 @@ void log_dump(int component, pos = pos + sprintf(wbuf+pos,"%04.4lf ", (double)((double *)buffer)[i]); break; + case LOG_DUMP_I16: { + int16_t *tmp = ((int16_t *)buffer) + i; + pos = pos + sprintf(wbuf + pos, "%d, ", *tmp); + } break; + + case LOG_DUMP_C16: { + int16_t *tmp = ((int16_t *)buffer) + i * 2; + pos = pos + sprintf(wbuf + pos, "(%d,%d), ", *tmp, *(tmp + 1)); + } break; + + case LOG_DUMP_C32: { + int32_t *tmp = ((int32_t *)buffer) + i * 2; + pos = pos + sprintf(wbuf + pos, "(%d,%d), ", *tmp, *(tmp + 1)); + } break; + case LOG_DUMP_CHAR: default: pos = pos + sprintf(wbuf+pos,"%02x ", (unsigned char)((unsigned char *)buffer)[i]); diff --git a/common/utils/LOG/log.h b/common/utils/LOG/log.h index 1add78c6f1029d74ae5545b50cf799202a8e8b17..f16ee296d478ac0186fb7e2d03a180b80720e190 100644 --- a/common/utils/LOG/log.h +++ b/common/utils/LOG/log.h @@ -335,6 +335,9 @@ int32_t write_file_matlab(const char *fname, const char *vname, void *data, int * @{*/ #define LOG_DUMP_CHAR 0 #define LOG_DUMP_DOUBLE 1 +#define LOG_DUMP_I16 2 +#define LOG_DUMP_C16 3 +#define LOG_DUMP_C32 4 // debugging macros #define LOG_F LOG_I /* because LOG_F was originaly to dump a message or buffer but is also used as a regular level...., to dump use LOG_DUMPMSG */ diff --git a/common/utils/T/tracer/hacks/dump_nack_signal.c b/common/utils/T/tracer/hacks/dump_nack_signal.c index f50e8a761a4807efe16a910feaed60ea37b8cb05..686425a4b93e3ff3e21a7b0256f1946ed30b5b40 100644 --- a/common/utils/T/tracer/hacks/dump_nack_signal.c +++ b/common/utils/T/tracer/hacks/dump_nack_signal.c @@ -4,7 +4,7 @@ #include "utils.h" #include "event.h" #include "database.h" -#include "config.h" +#include "configuration.h" #include "../T_defs.h" void usage(void) { diff --git a/common/utils/T/tracer/hacks/time_meas.c b/common/utils/T/tracer/hacks/time_meas.c index ae38dcd1772e6e28d39c13c207219455dfe09703..dccba130ad143226e70fb32f33012a00cfb7ac14 100644 --- a/common/utils/T/tracer/hacks/time_meas.c +++ b/common/utils/T/tracer/hacks/time_meas.c @@ -4,7 +4,7 @@ #include "utils.h" #include "event.h" #include "database.h" -#include "config.h" +#include "configuration.h" #include "../T_defs.h" void usage(void) diff --git a/common/utils/threadPool/task_ans.c b/common/utils/threadPool/task_ans.c index f7e876568f461551253ff62278bce35dcad9b92c..4f8fc265ab00de64d9da8f470f8248cda889496b 100644 --- a/common/utils/threadPool/task_ans.c +++ b/common/utils/threadPool/task_ans.c @@ -26,37 +26,50 @@ #include <stdint.h> #include <stdlib.h> #include <time.h> +#include "pthread_utils.h" +#include "errno.h" +#include <string.h> -void completed_task_ans(task_ans_t* task) -{ - DevAssert(task != NULL); - - int status = atomic_load_explicit(&task->status, memory_order_acquire); - AssertFatal(status == 0, "Task not expected to be finished here. Status = %d\n", status); +#define seminit(sem) \ + { \ + int ret = sem_init(&sem, 0, 0); \ + AssertFatal(ret == 0, "sem_init(): ret=%d, errno=%d (%s)\n", ret, errno, strerror(errno)); \ + } +#define sempost(sem) \ + { \ + int ret = sem_post(&sem); \ + AssertFatal(ret == 0, "sem_post(): ret=%d, errno=%d (%s)\n", ret, errno, strerror(errno)); \ + } +#define semwait(sem) \ + { \ + int ret = sem_wait(&sem); \ + AssertFatal(ret == 0, "sem_wait(): ret=%d, errno=%d (%s)\n", ret, errno, strerror(errno)); \ + } +#define semdestroy(sem) \ + { \ + int ret = sem_destroy(&sem); \ + AssertFatal(ret == 0, "sem_destroy(): ret=%d, errno=%d (%s)\n", ret, errno, strerror(errno)); \ + } - atomic_store_explicit(&task->status, 1, memory_order_release); +void init_task_ans(task_ans_t* ans, uint num_jobs) +{ + ans->counter = num_jobs; + seminit(ans->sem); } -void join_task_ans(task_ans_t* arr, size_t len) +void completed_many_task_ans(task_ans_t* ans, uint num_completed_jobs) { - DevAssert(len < INT_MAX); - DevAssert(arr != NULL); - - // Spin lock inspired by: - // The Art of Writing Efficient Programs: - // An advanced programmer's guide to efficient hardware utilization - // and compiler optimizations using C++ examples - const struct timespec ns = {0, 1}; - uint64_t i = 0; - int j = len - 1; - for (; j != -1; i++) { - for (; j != -1; --j) { - int const task_completed = 1; - if (atomic_load_explicit(&arr[j].status, memory_order_acquire) != task_completed) - break; - } - if (i % 8 == 0) { - nanosleep(&ns, NULL); - } + DevAssert(ans != NULL); + // Using atomic counter in contention scenario to avoid locking in producers + int num_jobs = atomic_fetch_sub_explicit(&ans->counter, num_completed_jobs, memory_order_relaxed); + if (num_jobs == num_completed_jobs) { + // Using semaphore to enable blocking call in join_task_ans + sempost(ans->sem); } } + +void join_task_ans(task_ans_t* ans) +{ + semwait(ans->sem); + semdestroy(ans->sem); +} diff --git a/common/utils/threadPool/task_ans.h b/common/utils/threadPool/task_ans.h index 4d1203555df868e462bb379530b1823e6e788780..2fa7d0fc83eae577c972658e4d415f3e1edeb836 100644 --- a/common/utils/threadPool/task_ans.h +++ b/common/utils/threadPool/task_ans.h @@ -21,7 +21,7 @@ #ifndef TASK_ANSWER_THREAD_POOL_H #define TASK_ANSWER_THREAD_POOL_H - +#include "pthread_utils.h" #ifdef __cplusplus extern "C" { #endif @@ -31,12 +31,15 @@ extern "C" { #include <stdatomic.h> #else #include <atomic> +#ifndef _Atomic #define _Atomic(X) std::atomic<X> +#endif #define _Alignas(X) alignas(X) #endif #include <stddef.h> #include <stdint.h> +#include <semaphore.h> #if defined(__i386__) || defined(__x86_64__) #define LEVEL1_DCACHE_LINESIZE 64 @@ -50,9 +53,17 @@ extern "C" { #error Unknown CPU architecture #endif +/** @brief + * A multi-producer - single-consumer synchronization mechanism built for efficiency under + * contention. + * + * @param sem semaphore to wait on + * @param counter atomic counter to keep track of the number of tasks completed. Atomic counter + * is used for efficiency under contention. + */ typedef struct { - // Avoid false sharing - _Alignas(LEVEL1_DCACHE_LINESIZE) _Atomic(int) status; + sem_t sem; + _Alignas(LEVEL1_DCACHE_LINESIZE) _Atomic(int) counter; } task_ans_t; typedef struct { @@ -62,10 +73,29 @@ typedef struct { task_ans_t* ans; } thread_info_tm_t; +/// @brief Initialize a task_ans_t struct +/// +/// @param ans task_ans_t struct +/// @param num_jobs number of tasks to wait for +void init_task_ans(task_ans_t* ans, unsigned int num_jobs); + +/// @brief Wait for all tasks to complete +/// @param ans task_ans_t struct +void join_task_ans(task_ans_t* arr); -void join_task_ans(task_ans_t* arr, size_t len); +/// @brief Mark a number of tasks as completed. +/// +/// @param ans task_ans_t struct +/// @param num_completed_jobs number of tasks to mark as completed +void completed_many_task_ans(task_ans_t* ans, uint num_completed_jobs); -void completed_task_ans(task_ans_t* task); +/// @brief Mark 1 tasks as completed. +/// +/// @param ans task_ans_t struct +static inline void completed_task_ans(task_ans_t* ans) +{ + completed_many_task_ans(ans, 1); +} #ifdef __cplusplus } diff --git a/common/utils/threadPool/test/test_thread-pool.c b/common/utils/threadPool/test/test_thread-pool.c index f1e3322ac97aad450eb1734acb184c85a39642f5..16d40bc552a0af2c26b0837731d514042fd6ad6a 100644 --- a/common/utils/threadPool/test/test_thread-pool.c +++ b/common/utils/threadPool/test/test_thread-pool.c @@ -114,18 +114,18 @@ int main() int nb_jobs = 4; for (int i = 0; i < 1000; i++) { int parall = nb_jobs; - task_ans_t task_ans[parall]; - memset(task_ans, 0, sizeof(task_ans)); + task_ans_t task_ans; + init_task_ans(&task_ans, parall); struct testData test_data[parall]; memset(test_data, 0, sizeof(test_data)); for (int j = 0; j < parall; j++) { task_t task = {.args = &test_data[j], .func = processing}; struct testData *x = (struct testData *)task.args; x->id = i; - x->task_ans = &task_ans[j]; + x->task_ans = &task_ans; pushTpool(&pool, task); } - join_task_ans(task_ans, parall); + join_task_ans(&task_ans); int sleepmax = 0; for (int j = 0; j < parall; j++) { if (test_data[j].sleepTime > sleepmax) { diff --git a/executables/nr-ue.c b/executables/nr-ue.c index 6da78819f5a6ee993a13e83ff5d7de5bf44213bb..e54af77ab6284424e1b24d1e522f13924b736fd5 100644 --- a/executables/nr-ue.c +++ b/executables/nr-ue.c @@ -642,8 +642,9 @@ static int UE_dl_preprocessing(PHY_VARS_NR_UE *UE, } } + bool dl_slot = false; if (proc->rx_slot_type == NR_DOWNLINK_SLOT || proc->rx_slot_type == NR_MIXED_SLOT) { - + dl_slot = true; if(UE->if_inst != NULL && UE->if_inst->dl_indication != NULL) { nr_downlink_indication_t dl_indication; nr_fill_dl_indication(&dl_indication, NULL, NULL, proc, UE, phy_data); @@ -656,7 +657,8 @@ static int UE_dl_preprocessing(PHY_VARS_NR_UE *UE, const int ack_nack_slot = (proc->nr_slot_rx + phy_data->dlsch[0].dlsch_config.k1_feedback) % UE->frame_parms.slots_per_frame; tx_wait_for_dlsch[ack_nack_slot]++; } - } else { + } + if (fp->frame_type == FDD || !dl_slot) { // good time to print statistics, we don't have to spend time to decode DCI if (proc->frame_rx % 128 == 0) { if (*stats_printed == false) { diff --git a/openair1/PHY/CODING/3gpplte_sse.c b/openair1/PHY/CODING/3gpplte_sse.c index c161682f7d43e6915f6ebecb0b1e20ddf6b859c3..79fd825a4b443ff31e2ded502fd412a39a1b8e87 100644 --- a/openair1/PHY/CODING/3gpplte_sse.c +++ b/openair1/PHY/CODING/3gpplte_sse.c @@ -349,8 +349,6 @@ void threegpplte_turbo_encoder_sse(unsigned char *input, #ifdef DEBUG_TURBO_ENCODER printf("term: x0 %u, x1 %u, state1 %d\n",x[10],x[11],state1); #endif // DEBUG_TURBO_ENCODER - simde_mm_empty(); - simde_m_empty(); } void init_encoder_sse (void) { diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c index 77708f4489a7fc284c023f697760f3178ed13cee..57c53df1b1341c6cbd2f5d03761e629d8fd60640 100644 --- a/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c +++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c @@ -1258,8 +1258,6 @@ unsigned char phy_threegpplte_turbo_decoder16avx2(int16_t *y, // fprintf(fdavx2,"crc %x, oldcrc %x\n",crc,oldcrc); - simde_mm_empty(); - simde_m_empty(); #ifdef DEBUG_LOGMAP fclose(fdavx2); diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c index fa1b55b45eee59d90e1094b58416b092f4fe970d..e6d1f87b4e70774bc7edd5573c8bc5a234e085aa 100644 --- a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c +++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c @@ -364,8 +364,6 @@ void compute_gamma(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity, simde_mm_extract_epi8(m11_128[k],15)); */ #endif - simde_mm_empty(); - simde_m_empty(); } #define L 40 @@ -1183,8 +1181,6 @@ void compute_alpha(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned sho break; } - simde_mm_empty(); - simde_m_empty(); } @@ -1747,8 +1743,6 @@ void compute_beta(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned shor break; } - simde_mm_empty(); - simde_m_empty(); } void compute_ext(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, llr_t *systematic,unsigned short frame_length) { @@ -1910,8 +1904,6 @@ void compute_ext(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, ll } #endif - simde_mm_empty(); - simde_m_empty(); } diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c index c7f2c7663a3aff742658eae8c365cdd7af1d39b7..cae2093e9919081a425292599b5950739af7faab 100644 --- a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c +++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c @@ -1391,8 +1391,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, fclose(fdsse4); #endif #if defined(__x86_64__) || defined(__i386__) - simde_mm_empty(); - simde_m_empty(); #endif if (iteration_cnt > max_iterations) set_abort(ab, true); diff --git a/openair1/PHY/CODING/TESTBENCH/pdcch_test.c b/openair1/PHY/CODING/TESTBENCH/pdcch_test.c index f4574bb0c523391ec3c960a2cbf34f6e0a423341..873e9e19bcfc8f93aa28269a6d1b7ef73bd191a3 100644 --- a/openair1/PHY/CODING/TESTBENCH/pdcch_test.c +++ b/openair1/PHY/CODING/TESTBENCH/pdcch_test.c @@ -246,7 +246,6 @@ int main(int argc, char *argv[]) set_taus_seed(0); ccodelte_init(); - ccodelte_init_inv(); phy_generate_viterbi_tables_lte(); lte_frame_parms = &(PHY_config->lte_frame_parms); diff --git a/openair1/PHY/CODING/ccoding_byte_lte.c b/openair1/PHY/CODING/ccoding_byte_lte.c index becdc590740a1e24721b22f0c67357afb560ee3d..4121cf2b3659c647eab82c7dc50662316084a489 100644 --- a/openair1/PHY/CODING/ccoding_byte_lte.c +++ b/openair1/PHY/CODING/ccoding_byte_lte.c @@ -32,9 +32,7 @@ static const unsigned short glte[] = {0133, 0171, 0165}; // {A,B} static const unsigned short glte_rev[] = {0155, 0117, 0127}; // {A,B} static const unsigned short gdab[] = {0133, 0171, 0145}; // {A,B} static const unsigned short gdab_rev[] = {0155, 0117, 0123}; // {A,B} -unsigned char ccodelte_table[128]; // for transmitter -unsigned char ccodelte_table_rev[128]; // for receiver - +static unsigned char ccodelte_table[128]; // for transmitter /************************************************************************* @@ -45,8 +43,6 @@ unsigned char ccodelte_table_rev[128]; // for receiver Trellis tail-biting is included here *************************************************************************/ - - void ccodelte_encode (int32_t numbits, uint8_t add_crc, @@ -238,7 +234,8 @@ void ccodelte_init(void) { } /* Input in LSB, followed by state in 6 MSBs */ -void ccodelte_init_inv(void) { +void ccodelte_init_inv(unsigned char ccodelte_table_rev[128]) +{ unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -279,7 +276,8 @@ void ccodedab_init(void) { } /* Input in LSB, followed by state in 6 MSBs */ -void ccodedab_init_inv(void) { +void ccodedab_init_inv(unsigned char ccodelte_table_rev[128]) +{ unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -299,7 +297,6 @@ void ccodedab_init_inv(void) { } } - /*****************************************************************/ /** Test program diff --git a/openair1/PHY/CODING/coding_defs.h b/openair1/PHY/CODING/coding_defs.h index 7da151fea939f93a239ba590374ff9911b6845e1..3c91892d8c883023230a6f6f3cdd945a45a991ae 100644 --- a/openair1/PHY/CODING/coding_defs.h +++ b/openair1/PHY/CODING/coding_defs.h @@ -304,17 +304,17 @@ void ccodelte_encode(int32_t numbits, uint8_t add_crc, uint8_t *inPtr, uint8_t * \brief This function initializes the generator polynomials for an LTE convolutional code.*/ void ccodelte_init(void); -/*!\fn void ccodelte_init_inv(void) +/*!\fn void ccodelte_init_inv(unsigned char ccodelte_table_rev[128]) \brief This function initializes the trellis structure for decoding an LTE convolutional code.*/ -void ccodelte_init_inv(void); +void ccodelte_init_inv(unsigned char ccodelte_table_rev[128]); /*!\fn void ccodelte_init(void) \brief This function initializes the generator polynomials for an DAB convolutional code (first 3 bits).*/ void ccodedab_init(void); -/*!\fn void ccodelte_init_inv(void) +/*!\fn void ccodelte_init_inv(unsigned char ccodelte_table_rev[128]) \brief This function initializes the trellis structure for decoding an DAB convolutional code (first 3 bits).*/ -void ccodedab_init_inv(void); +void ccodedab_init_inv(unsigned char ccodelte_table_rev[128]); /*!\fn void crcTableInit(void) \brief This function initializes the different crc tables.*/ diff --git a/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_decoder.c b/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_decoder.c index 616715269a4bc0567f8fdf4334707b87d798d78a..3d228da0089c441cae930af0a3c6d793856e056b 100644 --- a/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_decoder.c +++ b/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_decoder.c @@ -250,7 +250,7 @@ int nrLDPC_prepare_TB_decoding(nrLDPC_slot_decoding_parameters_t *nrLDPC_slot_de for (int r = 0; r < nrLDPC_TB_decoding_parameters->C; r++) { nrLDPC_decoding_parameters_t *rdata = &((nrLDPC_decoding_parameters_t *)t_info->buf)[t_info->len]; DevAssert(t_info->len < t_info->cap); - rdata->ans = &t_info->ans[t_info->len]; + rdata->ans = t_info->ans; t_info->len += 1; decParams.R = nrLDPC_TB_decoding_parameters->segments[r].R; @@ -309,19 +309,16 @@ int32_t nrLDPC_coding_decoder(nrLDPC_slot_decoding_parameters_t *nrLDPC_slot_dec nbSegments += nrLDPC_TB_decoding_parameters->C; } nrLDPC_decoding_parameters_t arr[nbSegments]; - task_ans_t ans[nbSegments]; - memset(ans, 0, nbSegments * sizeof(task_ans_t)); - thread_info_tm_t t_info = {.buf = (uint8_t *)arr, .len = 0, .cap = nbSegments, .ans = ans}; + task_ans_t ans; + init_task_ans(&ans, nbSegments); + thread_info_tm_t t_info = {.buf = (uint8_t *)arr, .len = 0, .cap = nbSegments, .ans = &ans}; - int nbDecode = 0; for (int pusch_id = 0; pusch_id < nrLDPC_slot_decoding_parameters->nb_TBs; pusch_id++) { - nbDecode += nrLDPC_prepare_TB_decoding(nrLDPC_slot_decoding_parameters, pusch_id, &t_info); + (void)nrLDPC_prepare_TB_decoding(nrLDPC_slot_decoding_parameters, pusch_id, &t_info); } - DevAssert(nbDecode == t_info.len); - - // Execute thread poool tasks - join_task_ans(t_info.ans, t_info.len); + // Execute thread pool tasks + join_task_ans(t_info.ans); for (int pusch_id = 0; pusch_id < nrLDPC_slot_decoding_parameters->nb_TBs; pusch_id++) { nrLDPC_TB_decoding_parameters_t *nrLDPC_TB_decoding_parameters = &nrLDPC_slot_decoding_parameters->TBs[pusch_id]; diff --git a/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_encoder.c b/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_encoder.c index 4d740de2cddc8f7e1a3079a2d1ee7c0d87fd4305..4a4e6beaafe07672368c86ada0199edacd64a667 100644 --- a/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_encoder.c +++ b/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_segment/nrLDPC_coding_segment_encoder.c @@ -189,7 +189,7 @@ static int nrLDPC_prepare_TB_encoding(nrLDPC_slot_encoding_parameters_t *nrLDPC_ for (int j = 0; j < n_seg; j++) { ldpc8blocks_args_t *perJobImpp = &((ldpc8blocks_args_t *)t_info->buf)[t_info->len]; DevAssert(t_info->len < t_info->cap); - impp.ans = &t_info->ans[t_info->len]; + impp.ans = t_info->ans; t_info->len += 1; impp.macro_num = j; @@ -211,19 +211,19 @@ int nrLDPC_coding_encoder(nrLDPC_slot_encoding_parameters_t *nrLDPC_slot_encodin nbTasks += n_seg; } ldpc8blocks_args_t arr[nbTasks]; - task_ans_t ans[nbTasks]; - memset(ans, 0, nbTasks * sizeof(task_ans_t)); - thread_info_tm_t t_info = {.buf = (uint8_t *)arr, .len = 0, .cap = nbTasks, .ans = ans}; + task_ans_t ans; + init_task_ans(&ans, nbTasks); + thread_info_tm_t t_info = {.buf = (uint8_t *)arr, .len = 0, .cap = nbTasks, .ans = &ans}; int nbEncode = 0; for (int dlsch_id = 0; dlsch_id < nrLDPC_slot_encoding_parameters->nb_TBs; dlsch_id++) { nbEncode += nrLDPC_prepare_TB_encoding(nrLDPC_slot_encoding_parameters, dlsch_id, &t_info); } - - DevAssert(nbEncode == t_info.len); - - // Execute thread poool tasks - join_task_ans(ans, nbEncode); + if (nbEncode < nbTasks) { + completed_many_task_ans(&ans, nbTasks - nbEncode); + } + // Execute thread pool tasks + join_task_ans(&ans); return 0; } diff --git a/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_xdma/nrLDPC_coding_xdma.c b/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_xdma/nrLDPC_coding_xdma.c index 793825f14f3e946972cdde130006833941099f3c..5df0bd4406f82b65edc2c91f8f2850c9065bf95c 100644 --- a/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_xdma/nrLDPC_coding_xdma.c +++ b/openair1/PHY/CODING/nrLDPC_coding/nrLDPC_coding_xdma/nrLDPC_coding_xdma.c @@ -278,7 +278,7 @@ int decoder_xdma(nrLDPC_TB_decoding_parameters_t *TB_params, int frame_rx, int s DevAssert(num_threads_prepare == t_info.len); // wait for the prepare jobs to complete - join_task_ans(t_info.ans, t_info.len); + join_task_ans(t_info.ans); for (uint32_t job = 0; job < num_threads_prepare; job++) { args_fpga_decode_prepare_t *args = &arr[job]; diff --git a/openair1/PHY/CODING/viterbi.c b/openair1/PHY/CODING/viterbi.c index 20242c89148459d8b23352a203ca18dc8dcfd184..4f8df56fc26611a465fda4b0374e5597959edaaa 100644 --- a/openair1/PHY/CODING/viterbi.c +++ b/openair1/PHY/CODING/viterbi.c @@ -314,7 +314,6 @@ void phy_viterbi_dot11_sse2(char *y,unsigned char *decoded_bytes,unsigned short } } - simde_mm_empty(); } #ifdef TEST_DEBUG diff --git a/openair1/PHY/CODING/viterbi_lte.c b/openair1/PHY/CODING/viterbi_lte.c index 4efab84f497037bd999447b3756a4bbfd5b7993c..b074e1c91448605e7e8d1c44bba912749610e3a9 100644 --- a/openair1/PHY/CODING/viterbi_lte.c +++ b/openair1/PHY/CODING/viterbi_lte.c @@ -40,8 +40,7 @@ #include "PHY/sse_intrin.h" - -extern uint8_t ccodelte_table[128],ccodelte_table_rev[128]; +#include "coding_defs.h" static int8_t m0_table[64*16*16*16] __attribute__ ((aligned(16))); static int8_t m1_table[64*16*16*16] __attribute__ ((aligned(16))); @@ -50,6 +49,8 @@ static int8_t m1_table[64*16*16*16] __attribute__ ((aligned(16))); // Set up Viterbi tables for SSE2 implementation void phy_generate_viterbi_tables_lte( void ) { + uint8_t ccodelte_table_rev[128]; + ccodelte_init_inv(ccodelte_table_rev); int8_t w[8],in0,in1,in2; uint8_t state,index0,index1; @@ -126,52 +127,29 @@ void print_shorts(simde__m128i x,char *s) { void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n) { simde__m128i TB[4 * 8192]; - simde__m128i *m0_ptr, *m1_ptr, *TB_ptr = &TB[0]; - - simde__m128i metrics0_15, metrics16_31, metrics32_47, metrics48_63, even0_30a, even0_30b, even32_62a, even32_62b, odd1_31a, - odd1_31b, odd33_63a, odd33_63b, TBeven0_30, TBeven32_62, TBodd1_31, TBodd33_63; - - simde__m128i min_state, min_state2; - - int8_t *in = y; - uint8_t prev_state0,maxm,s; - static uint8_t *TB_ptr2; - uint32_t table_offset; - uint8_t iter; - int16_t position; - - // set initial metrics - //debug_msg("Doing viterbi\n"); - - metrics0_15 = simde_mm_setzero_si128(); - metrics16_31 = simde_mm_setzero_si128(); - metrics32_47 = simde_mm_setzero_si128(); - metrics48_63 = simde_mm_setzero_si128(); - - for (iter=0; iter<2; iter++) { - in = y; - TB_ptr=&TB[0]; - - for (position=0; position<n; position++) { - + simde__m128i metrics0_15 = {0}, metrics16_31 = {0}, metrics32_47 = {0}, metrics48_63 = {0}; + for (int iter = 0; iter < 2; iter++) { + int8_t *in = y; + simde__m128i *TB_ptr = TB; + for (int position = 0; position < n; position++) { // get branch metric offsets for the 64 states - table_offset = (in[0]+8 + ((in[1]+8)<<4) + ((in[2]+8)<<8))<<6; + uint table_offset = (in[0] + 8 + ((in[1] + 8) << 4) + ((in[2] + 8) << 8)) << 6; - m0_ptr = (simde__m128i *)&m0_table[table_offset]; - m1_ptr = (simde__m128i *)&m1_table[table_offset]; + simde__m128i *m0_ptr = (simde__m128i *)&m0_table[table_offset]; + simde__m128i *m1_ptr = (simde__m128i *)&m1_table[table_offset]; // even states - even0_30a = simde_mm_adds_epu8(metrics0_15, m0_ptr[0]); - even32_62a = simde_mm_adds_epu8(metrics16_31, m0_ptr[1]); - even0_30b = simde_mm_adds_epu8(metrics32_47, m0_ptr[2]); - even32_62b = simde_mm_adds_epu8(metrics48_63, m0_ptr[3]); + simde__m128i even0_30a = simde_mm_adds_epu8(metrics0_15, m0_ptr[0]); + simde__m128i even32_62a = simde_mm_adds_epu8(metrics16_31, m0_ptr[1]); + simde__m128i even0_30b = simde_mm_adds_epu8(metrics32_47, m0_ptr[2]); + simde__m128i even32_62b = simde_mm_adds_epu8(metrics48_63, m0_ptr[3]); // odd states - odd1_31a = simde_mm_adds_epu8(metrics0_15, m1_ptr[0]); - odd33_63a = simde_mm_adds_epu8(metrics16_31, m1_ptr[1]); - odd1_31b = simde_mm_adds_epu8(metrics32_47, m1_ptr[2]); - odd33_63b = simde_mm_adds_epu8(metrics48_63, m1_ptr[3]); + simde__m128i odd1_31a = simde_mm_adds_epu8(metrics0_15, m1_ptr[0]); + simde__m128i odd33_63a = simde_mm_adds_epu8(metrics16_31, m1_ptr[1]); + simde__m128i odd1_31b = simde_mm_adds_epu8(metrics32_47, m1_ptr[2]); + simde__m128i odd33_63b = simde_mm_adds_epu8(metrics48_63, m1_ptr[3]); // select maxima @@ -182,33 +160,31 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n) // Traceback information - TBeven0_30 = simde_mm_cmpeq_epi8(even0_30a, even0_30b); - TBeven32_62 = simde_mm_cmpeq_epi8(even32_62a, even32_62b); - TBodd1_31 = simde_mm_cmpeq_epi8(odd1_31a, odd1_31b); - TBodd33_63 = simde_mm_cmpeq_epi8(odd33_63a, odd33_63b); + simde__m128i TBeven0_30 = simde_mm_cmpeq_epi8(even0_30a, even0_30b); + simde__m128i TBeven32_62 = simde_mm_cmpeq_epi8(even32_62a, even32_62b); + simde__m128i TBodd1_31 = simde_mm_cmpeq_epi8(odd1_31a, odd1_31b); + simde__m128i TBodd33_63 = simde_mm_cmpeq_epi8(odd33_63a, odd33_63b); metrics0_15 = simde_mm_unpacklo_epi8(even0_30a, odd1_31a); metrics16_31 = simde_mm_unpackhi_epi8(even0_30a, odd1_31a); metrics32_47 = simde_mm_unpacklo_epi8(even32_62a, odd33_63a); metrics48_63 = simde_mm_unpackhi_epi8(even32_62a, odd33_63a); - TB_ptr[0] = simde_mm_unpacklo_epi8(TBeven0_30, TBodd1_31); - TB_ptr[1] = simde_mm_unpackhi_epi8(TBeven0_30, TBodd1_31); - TB_ptr[2] = simde_mm_unpacklo_epi8(TBeven32_62, TBodd33_63); - TB_ptr[3] = simde_mm_unpackhi_epi8(TBeven32_62, TBodd33_63); - - in+=3; - TB_ptr += 4; + *TB_ptr++ = simde_mm_unpacklo_epi8(TBeven0_30, TBodd1_31); + *TB_ptr++ = simde_mm_unpackhi_epi8(TBeven0_30, TBodd1_31); + *TB_ptr++ = simde_mm_unpacklo_epi8(TBeven32_62, TBodd33_63); + *TB_ptr++ = simde_mm_unpackhi_epi8(TBeven32_62, TBodd33_63); + in += 3; // rescale by subtracting minimum /**************************************************** USE SSSE instruction phminpos!!!!!!! ****************************************************/ - min_state = simde_mm_min_epu8(metrics0_15, metrics16_31); + simde__m128i min_state = simde_mm_min_epu8(metrics0_15, metrics16_31); min_state = simde_mm_min_epu8(min_state, metrics32_47); min_state = simde_mm_min_epu8(min_state, metrics48_63); - min_state2 = min_state; + simde__m128i min_state2 = min_state; min_state = simde_mm_unpacklo_epi8(min_state, min_state); min_state2 = simde_mm_unpackhi_epi8(min_state2, min_state2); min_state = simde_mm_min_epu8(min_state, min_state2); @@ -237,37 +213,34 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n) } // iteration // Traceback - prev_state0 = 0; - maxm = 0; - - for (s=0; s<16; s++) - if (((uint8_t *)&metrics0_15)[s] > maxm) { - maxm = ((uint8_t *)&metrics0_15)[s]; + uint prev_state0 = 0; + uint maxm = 0; + uint s = 0; + for (uint8_t *ptr = (uint8_t *)&metrics0_15; s < 16; s++, ptr++) + if (*ptr > maxm) { + maxm = *ptr; prev_state0 = s; } - for (s=0; s<16; s++) - if (((uint8_t *)&metrics16_31)[s] > maxm) { - maxm = ((uint8_t *)&metrics16_31)[s]; - prev_state0 = s+16; + for (uint8_t *ptr = (uint8_t *)&metrics16_31; s < 32; s++, ptr++) + if (*ptr > maxm) { + maxm = *ptr; + prev_state0 = s; } - - for (s=0; s<16; s++) - if (((uint8_t *)&metrics32_47)[s] > maxm) { - maxm = ((uint8_t *)&metrics32_47)[s]; - prev_state0 = s+32; + for (uint8_t *ptr = (uint8_t *)&metrics32_47; s < 48; s++, ptr++) + if (*ptr > maxm) { + maxm = *ptr; + prev_state0 = s; } - - for (s=0; s<16; s++) - if (((uint8_t *)&metrics48_63)[s] > maxm) { - maxm = ((uint8_t *)&metrics48_63)[s]; - prev_state0 = s+48; + for (uint8_t *ptr = (uint8_t *)&metrics48_63; s < 64; s++, ptr++) + if (*ptr > maxm) { + maxm = *ptr; + prev_state0 = s; } - TB_ptr2 = (uint8_t *)&TB[(n-1)*4]; - - for (position = n-1 ; position>-1; position--) { + uint8_t *TB_ptr2 = (uint8_t *)&TB[(n - 1) * 4]; + for (int position = n - 1; position > -1; position--) { decoded_bytes[(position)>>3] += (prev_state0 & 0x1)<<(7-(position & 0x7)); @@ -278,9 +251,6 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n) TB_ptr2-=64; } - - simde_mm_empty(); - simde_m_empty(); } #ifdef TEST_DEBUG @@ -306,10 +276,8 @@ int test_viterbi(uint8_t dabflag) if (dabflag==0) { ccodelte_init(); - ccodelte_init_inv(); } else { ccodedab_init(); - ccodedab_init_inv(); printf("Running with DAB polynomials\n"); } diff --git a/openair1/PHY/INIT/init_top.c b/openair1/PHY/INIT/init_top.c index 15b27b4db1dd18afd8bff61e862379b76889c6bb..b4ac0617a6673dd8c79061bbafc9ecbe05d9b32c 100644 --- a/openair1/PHY/INIT/init_top.c +++ b/openair1/PHY/INIT/init_top.c @@ -63,7 +63,6 @@ void generate_qpsk_table(void) { void init_lte_top(LTE_DL_FRAME_PARMS *frame_parms) { ccodelte_init(); - ccodelte_init_inv(); phy_generate_viterbi_tables_lte(); load_codinglib(); generate_ul_ref_sigs(); diff --git a/openair1/PHY/INIT/nr_init.c b/openair1/PHY/INIT/nr_init.c index ca14ca99bea5b39f67e6729bfcbf743cf063b435..581338fa454f42b3bdadadddd32c25f30cacc5e2 100644 --- a/openair1/PHY/INIT/nr_init.c +++ b/openair1/PHY/INIT/nr_init.c @@ -138,6 +138,7 @@ void phy_init_nr_gNB(PHY_VARS_gNB *gNB) gNB->max_nb_pdsch = MAX_MOBILES_PER_GNB; init_delay_table(fp->ofdm_symbol_size, MAX_DELAY_COMP, NR_MAX_OFDM_SYMBOL_SIZE, fp->delay_table); + init_delay_table(128, MAX_DELAY_COMP, 128, fp->delay_table128); gNB->bad_pucch = 0; if (gNB->TX_AMP == 0) diff --git a/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c b/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c index c9ec2449e16f964b8e018bf1239c541c50763c6d..6abe02c941513a85ee4956430758fa875c01c5fc 100644 --- a/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c +++ b/openair1/PHY/LTE_ESTIMATION/lte_ue_measurements.c @@ -493,8 +493,6 @@ void conjch0_mult_ch1(int *ch0, dl_ch1_128+=1; ch0conj_ch1_128+=1; } - simde_mm_empty(); - simde_m_empty(); } void construct_HhH_elements(int *ch0conj_ch0, //00_00 @@ -567,8 +565,6 @@ void construct_HhH_elements(int *ch0conj_ch0, //00_00 after_mf_10_128+=1; after_mf_11_128+=1; } - simde_mm_empty(); - simde_m_empty(); } @@ -595,8 +591,6 @@ void squared_matrix_element(int32_t *Hh_h_00, Hh_h_00_sq_128+=1; Hh_h_00_128+=1; } - simde_mm_empty(); - simde_m_empty(); } @@ -645,8 +639,6 @@ void det_HhH(int32_t *after_mf_00, //after_mf_10_128+=1; after_mf_11_128+=1; } - simde_mm_empty(); - simde_m_empty(); } void numer(int32_t *Hh_h_00_sq, @@ -691,8 +683,6 @@ void numer(int32_t *Hh_h_00_sq, h_h_10_sq_128+=1; h_h_11_sq_128+=1; } - simde_mm_empty(); - simde_m_empty(); } void dlsch_channel_level_TM34_meas(int *ch00, @@ -768,8 +758,6 @@ void dlsch_channel_level_TM34_meas(int *ch00, avg_0[0] = min (avg_0[0], avg_1[0]); avg_1[0] = avg_0[0]; - simde_mm_empty(); - simde_m_empty(); } @@ -1339,8 +1327,6 @@ void lte_ue_measurements(PHY_VARS_UE *ue, // printf("in lte_ue_measurements: selected rx_antenna[eNB_id==0]:%u\n", ue->measurements.selected_rx_antennas[eNB_id][i]); } // eNB_id loop - simde_mm_empty(); - simde_m_empty(); } diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_coding.c b/openair1/PHY/LTE_TRANSPORT/dlsch_coding.c index 52905cfaf9e39084126df17eed41fea2ad4d671d..8e98677872d61278604118ad63d0493da6adec64 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_coding.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_coding.c @@ -348,12 +348,12 @@ int dlsch_encoding(PHY_VARS_eNB *eNB, } turboEncode_t arr[hadlsch->C]; - task_ans_t ans[hadlsch->C]; - memset(ans, 0, hadlsch->C * sizeof(task_ans_t)); + task_ans_t ans; + init_task_ans(&ans, hadlsch->C); for (int r = 0, r_offset = 0; r < hadlsch->C; r++) { turboEncode_t *rdata = &arr[r]; - rdata->ans = &ans[r]; + rdata->ans = &ans; rdata->input=hadlsch->c[r]; rdata->Kr_bytes= ( r<hadlsch->Cminus ? hadlsch->Kminus : hadlsch->Kplus) >>3; @@ -382,7 +382,7 @@ int dlsch_encoding(PHY_VARS_eNB *eNB, r_offset += Nl*Qm * ((GpmodC==0?0:1) + (Gp/C)); } - join_task_ans(ans, hadlsch->C); + join_task_ans(&ans); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_ENCODING, VCD_FUNCTION_OUT); return(0); @@ -449,12 +449,12 @@ int dlsch_encoding_fembms_pmch(PHY_VARS_eNB *eNB, return(-1); } turboEncode_t arr[hadlsch->C]; - task_ans_t ans[hadlsch->C]; - memset(ans, 0, hadlsch->C * sizeof(task_ans_t)); + task_ans_t ans; + init_task_ans(&ans, hadlsch->C); for (int r = 0, r_offset = 0; r < hadlsch->C; r++) { turboEncode_t *rdata = &arr[r]; - rdata->ans = &ans[r]; + rdata->ans = &ans; rdata->input=hadlsch->c[r]; rdata->Kr_bytes= ( r<hadlsch->Cminus ? hadlsch->Kminus : hadlsch->Kplus) >>3; @@ -483,7 +483,7 @@ int dlsch_encoding_fembms_pmch(PHY_VARS_eNB *eNB, r_offset += Nl*Qm * ((GpmodC==0?0:1) + (Gp/C)); } - join_task_ans(ans, hadlsch->C); + join_task_ans(&ans); return(0); } diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c b/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c index 9755455762dfaef79356e210af0ff8e75a60c55f..6b499f14db7ba69d26590e965ab52cd24f4a43ee 100644 --- a/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c +++ b/openair1/PHY/LTE_TRANSPORT/ulsch_decoding.c @@ -341,7 +341,7 @@ static int ulsch_decoding_data(PHY_VARS_eNB *eNB, turboDecode_t *rdata = &((turboDecode_t *)t_info->buf)[t_info->len]; DevAssert(t_info->len < t_info->cap); - rdata->ans = &t_info->ans[t_info->len]; + rdata->ans = t_info->ans; t_info->len += 1; rdata->eNB=eNB; diff --git a/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c index 0370285e0754172295cee582a290b1dd63a48db2..25068537054b769c1dbd0ba25e870fb8944606cf 100644 --- a/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c +++ b/openair1/PHY/LTE_TRANSPORT/ulsch_demodulation.c @@ -216,8 +216,6 @@ int32_t ulsch_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms, (*llrp128)++; } - simde_mm_empty(); - simde_m_empty(); return(0); } @@ -245,8 +243,6 @@ void ulsch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms, // print_bytes("rxF[i+1]",&rxF[i+1]); } - simde_mm_empty(); - simde_m_empty(); } void ulsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, @@ -289,8 +285,6 @@ void ulsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, (*llrp32)+=12; } - simde_mm_empty(); - simde_m_empty(); } void ulsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, @@ -347,8 +341,6 @@ void ulsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } void ulsch_extract_rbs_single(int32_t **rxdataF, @@ -524,8 +516,6 @@ void ulsch_channel_compensation(int32_t **rxdataF_ext, } } - simde_mm_empty(); - simde_m_empty(); } void ulsch_channel_level(int32_t **drs_ch_estimates_ext, @@ -556,8 +546,6 @@ void ulsch_channel_level(int32_t **drs_ch_estimates_ext, ((float *)&avg128U)[3])/(float)(nb_rb*12)); } - simde_mm_empty(); - simde_m_empty(); } static int ulsch_power_LUT[750]; diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c index 381dfc0cdff6b1897865baa3d16c47e00dffb986..f3d14b80eacc9f2f23881c3e8078614c640385fb 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/dci_ue.c @@ -384,8 +384,6 @@ void pdcch_channel_level(int32_t **dl_ch_estimates_ext, // printf("Channel level : %d\n",avg[(aatx<<1)+aarx]); } - simde_mm_empty(); - simde_m_empty(); } @@ -433,8 +431,6 @@ void pdcch_detection_mrc_i(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } @@ -1078,8 +1074,6 @@ void pdcch_channel_compensation(int32_t **rxdataF_ext, } - simde_mm_empty(); - simde_m_empty(); } void pdcch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, @@ -1103,8 +1097,6 @@ void pdcch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c index b9ba9347cf07686b988d5be972a8f4fc45ca1588..0d13a1c38d6b881ef7eb738ba8d91a8c9d881308 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c @@ -1481,8 +1481,6 @@ void dlsch_channel_compensation(int **rxdataF_ext, } } - simde_mm_empty(); - simde_m_empty(); } void dlsch_channel_compensation_core(int **rxdataF_ext, @@ -1666,8 +1664,6 @@ void dlsch_channel_compensation_core(int **rxdataF_ext, } } - simde_mm_empty(); - simde_m_empty(); } @@ -1706,8 +1702,6 @@ void prec2A_TM56_128(unsigned char pmi,simde__m128i *ch0,simde__m128i *ch1) { ch0[0] = simde_mm_mulhi_epi16(ch0[0],amp); ch0[0] = simde_mm_slli_epi16(ch0[0],1); - simde_mm_empty(); - simde_m_empty(); } // precoding is stream 0 .5(1,1) .5(1,-1) .5(1,1) .5(1,-1) // stream 1 .5(1,-1) .5(1,1) .5(1,-1) .5(1,1) @@ -1740,8 +1734,6 @@ void prec2A_TM3_128(simde__m128i *ch0,simde__m128i *ch1) { //ch1[0] = simde_mm_srai_epi16(ch1[0],1); // print_shorts("prec2A_TM3 ch0 (after):",ch0); // print_shorts("prec2A_TM3 ch1 (after):",ch1); - simde_mm_empty(); - simde_m_empty(); } // pmi = 0 => stream 0 (1,1), stream 1 (1,-1) @@ -1782,8 +1774,6 @@ void prec2A_TM4_128(int pmi,simde__m128i *ch0,simde__m128i *ch1) { // ch1[0] = simde_mm_srai_epi16(ch1[0],1); //divide by 2 //print_shorts("prec2A_TM4 ch0 (end):",ch0); //print_shorts("prec2A_TM4 ch1 (end):",ch1); - simde_mm_empty(); - simde_m_empty(); // print_shorts("prec2A_TM4 ch0 (end):",ch0); //print_shorts("prec2A_TM4 ch1 (end):",ch1); } @@ -2359,8 +2349,6 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, measurements->precoded_cqi_dB[eNB_id][1] = dB_fixed2(precoded_signal_strength1,measurements->n0_power_tot); // printf("eNB_id %d, symbol %d: precoded CQI %d dB\n",eNB_id,symbol, // measurements->precoded_cqi_dB[eNB_id][0]); - simde_mm_empty(); - simde_m_empty(); } @@ -2453,8 +2441,6 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } @@ -2526,8 +2512,6 @@ void dlsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } void dlsch_detection_mrc_TM34(LTE_DL_FRAME_PARMS *frame_parms, @@ -2651,8 +2635,6 @@ void dlsch_detection_mrc_TM34(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } void dlsch_scale_channel(int **dl_ch_estimates_ext, @@ -2765,8 +2747,6 @@ void dlsch_channel_level(int **dl_ch_estimates_ext, ((int32_t *)&avg128D)[3])/y; } - simde_mm_empty(); - simde_m_empty(); } void dlsch_channel_level_core(int **dl_ch_estimates_ext, @@ -2809,8 +2789,6 @@ void dlsch_channel_level_core(int **dl_ch_estimates_ext, //printf("Channel level [%d]: %d\n",aatx*n_rx + aarx, avg[aatx*n_rx + aarx]); } - simde_mm_empty(); - simde_m_empty(); /* FIXME This part needs to be adapted like the one above */ } @@ -2857,8 +2835,6 @@ void dlsch_channel_level_median(int **dl_ch_estimates_ext, } } - simde_mm_empty(); - simde_m_empty(); } void mmse_processing_oai(LTE_UE_PDSCH *pdsch_vars, @@ -3189,8 +3165,6 @@ void dlsch_channel_aver_band(int **dl_ch_estimates_ext, } } - simde_mm_empty(); - simde_m_empty(); } void rxdataF_to_float(int32_t **rxdataF_ext, @@ -3558,8 +3532,6 @@ void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, // printf("From Chan_level aver stream 1 final =%d\n", avg_1[0]); avg_0[0] = min (avg_0[0], avg_1[0]); avg_1[0] = avg_0[0]; - simde_mm_empty(); - simde_m_empty(); } //compute average channel_level of effective (precoded) channel @@ -3624,8 +3596,6 @@ void dlsch_channel_level_TM56(int **dl_ch_estimates_ext, // choose maximum of the 2 effective channels avg[0] = cmax(avg[0],avg[1]); - simde_mm_empty(); - simde_m_empty(); } //compute average channel_level for TM7 @@ -3684,8 +3654,6 @@ void dlsch_channel_level_TM7(int **dl_bf_ch_estimates_ext, // printf("Channel level : %d\n",avg[(aatx<<1)+aarx]); } - simde_mm_empty(); - simde_m_empty(); } //#define ONE_OVER_2_Q15 16384 void dlsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms, @@ -3753,8 +3721,6 @@ void dlsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } //============================================================================================== @@ -5587,8 +5553,6 @@ unsigned short dlsch_extract_rbs_TM7(int **rxdataF, } } - simde_mm_empty(); - simde_m_empty(); return(nb_rb/frame_parms->nb_antennas_rx); } diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c index 9995f9b8d9d50fbb720b2a95eb4286ae7fbd92f2..72422d4dcf2a45653a4937f4d462270753da7ff9 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation_avx2.c @@ -1668,8 +1668,6 @@ void qam64_qam16_avx2(short *stream0_in, } - simde_mm_empty(); - simde_m_empty(); } @@ -3499,8 +3497,6 @@ void qam64_qam64_avx2(int32_t *stream0_in, } - simde_mm_empty(); - simde_m_empty(); } -#endif \ No newline at end of file +#endif diff --git a/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c index cf62a3a945136fc17ff97dee85defa7b388c7359..9eefe212c70cf38d5def31d1f7e2e6dde8346f0b 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/pbch_ue.c @@ -185,8 +185,6 @@ int pbch_channel_level(int **dl_ch_estimates_ext, //msg("Channel level : %d, %d\n",avg1, avg2); } - simde_mm_empty(); - simde_m_empty(); return(avg2); } @@ -277,8 +275,6 @@ void pbch_channel_compensation(int **rxdataF_ext, } } - simde_mm_empty(); - simde_m_empty(); } void pbch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, @@ -302,8 +298,6 @@ void pbch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } void pbch_unscrambling(LTE_DL_FRAME_PARMS *frame_parms, diff --git a/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c index a3b33cf54b87adf7128130c4a880130e1c9863c0..04546b2a193b76574c2e4f01d6cba3ad6b71333e 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/pmch_ue.c @@ -244,8 +244,6 @@ void mch_channel_level(int **dl_ch_estimates_ext, // printf("Channel level : %d\n",avg[(aatx<<1)+aarx]); } - simde_mm_empty(); - simde_m_empty(); } void mch_channel_level_khz_1dot25(int **dl_ch_estimates_ext, @@ -285,8 +283,6 @@ void mch_channel_level_khz_1dot25(int **dl_ch_estimates_ext, //printf("Channel level : %d\n",avg[(aatx<<1)+aarx]); } - simde_mm_empty(); - simde_m_empty(); } @@ -393,8 +389,6 @@ void mch_channel_compensation(int **rxdataF_ext, } } - simde_mm_empty(); - simde_m_empty(); } @@ -501,8 +495,6 @@ void mch_channel_compensation_khz_1dot25(int **rxdataF_ext, } } - simde_mm_empty(); - simde_m_empty(); } @@ -533,8 +525,6 @@ void mch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } @@ -564,8 +554,6 @@ void mch_detection_mrc_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } @@ -603,8 +591,6 @@ int mch_qpsk_llr(LTE_DL_FRAME_PARMS *frame_parms, } *llr32p = (short *)llr32; - simde_mm_empty(); - simde_m_empty(); return(0); } @@ -634,8 +620,6 @@ int mch_qpsk_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms, } *llr32p = (short *)llr32; - simde_mm_empty(); - simde_m_empty(); return(0); } @@ -700,8 +684,6 @@ void mch_16qam_llr(LTE_DL_FRAME_PARMS *frame_parms, llr32 += 8; } - simde_mm_empty(); - simde_m_empty(); } void mch_16qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms, @@ -750,8 +732,6 @@ void mch_16qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms, llr32 += 8; } - simde_mm_empty(); - simde_m_empty(); } //---------------------------------------------------------------------------------------------- @@ -843,8 +823,6 @@ void mch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, } *llr_save = llr; - simde_mm_empty(); - simde_m_empty(); } void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms, @@ -924,8 +902,6 @@ void mch_64qam_llr_khz_1dot25(LTE_DL_FRAME_PARMS *frame_parms, } *llr_save = llr; - simde_mm_empty(); - simde_m_empty(); } int avg_pmch[4]; diff --git a/openair1/PHY/LTE_UE_TRANSPORT/rar_tools_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/rar_tools_ue.c index 44855e551e80b22b18ea73f944051190d3ad6ad4..ad513a424e124b84214d3fbfff86f717f3756bd7 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/rar_tools_ue.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/rar_tools_ue.c @@ -39,7 +39,7 @@ #include "PHY/LTE_TRANSPORT/transport_vars.h" #include "assertions.h" -static int8_t delta_PUSCH_msg2[8] = {-6, -4, -2, 0, 2, 4, 6, 8}; +static const int8_t delta_PUSCH_msg2[8] = {-6, -4, -2, 0, 2, 4, 6, 8}; int generate_ue_ulsch_params_from_rar(PHY_VARS_UE *ue, UE_rxtx_proc_t *proc, diff --git a/openair1/PHY/LTE_UE_TRANSPORT/sss_ue.c b/openair1/PHY/LTE_UE_TRANSPORT/sss_ue.c index 79b20dc10c8f26165e6b1f06fe49156c29436d69..6d155f896f9a1af2b2b05fde9fd6c52ed62d1a7e 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/sss_ue.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/sss_ue.c @@ -225,26 +225,9 @@ int pss_sss_extract(PHY_VARS_UE *phy_vars_ue, return _do_pss_sss_extract(phy_vars_ue, pss_ext, sss_ext, 1 /* doPss */, 1 /* doSss */, subframe); } -int pss_only_extract(PHY_VARS_UE *phy_vars_ue, - int32_t pss_ext[4][72], - uint8_t subframe) -{ - static int32_t dummy[4][72]; - return _do_pss_sss_extract(phy_vars_ue, pss_ext, dummy, 1 /* doPss */, 0 /* doSss */, subframe); -} - - -int sss_only_extract(PHY_VARS_UE *phy_vars_ue, - int32_t sss_ext[4][72], - uint8_t subframe) -{ - static int32_t dummy[4][72]; - return _do_pss_sss_extract(phy_vars_ue, dummy, sss_ext, 0 /* doPss */, 1 /* doSss */, subframe); -} - -int16_t phase_re[7] = {16383, 25101, 30791, 32767, 30791, 25101, 16383}; -int16_t phase_im[7] = {-28378, -21063, -11208, 0, 11207, 21062, 28377}; +static const int16_t phase_re[7] = {16383, 25101, 30791, 32767, 30791, 25101, 16383}; +static const int16_t phase_im[7] = {-28378, -21063, -11208, 0, 11207, 21062, 28377}; int rx_sss(PHY_VARS_UE *ue,int32_t *tot_metric,uint8_t *flip_max,uint8_t *phase_max) diff --git a/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h b/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h index 18eb375f62b753f7347cceb103c8583689cacf8b..9aab56e52a54d5bdc998545cdd777125951b9e8a 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h +++ b/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h @@ -1082,16 +1082,6 @@ int pss_sss_extract(PHY_VARS_UE *phy_vars_ue, int32_t sss_ext[4][72], uint8_t subframe); -/*! \brief Extract only PSS resource elements - @param phy_vars_ue Pointer to UE variables - @param[out] pss_ext contain the PSS signals after the extraction -@param subframe - @returns 0 on success -*/ -int pss_only_extract(PHY_VARS_UE *phy_vars_ue, - int32_t pss_ext[4][72], - uint8_t subframe); - /*! \brief Extract only SSS resource elements @param phy_vars_ue Pointer to UE variables @param[out] sss_ext contain the SSS signals after the extraction diff --git a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c index cae3a301607baa766c0f80f41aad78ad806e07cb..6fdf6cd99538400f331aee2b49b41e0ed8514330 100644 --- a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c +++ b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c @@ -506,8 +506,8 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB, int num_jobs = CEILIDIV(gNB->frame_parms.nb_antennas_rx, numAntennas); puschAntennaProc_t rdatas[num_jobs]; memset(rdatas, 0, sizeof(rdatas)); - task_ans_t ans[num_jobs]; - memset(ans, 0, sizeof(ans)); + task_ans_t ans; + init_task_ans(&ans, num_jobs); for (int job_id = 0; job_id < num_jobs; job_id++) { puschAntennaProc_t *rdata = &rdatas[job_id]; task_t task = {.func = nr_pusch_antenna_processing, .args = rdata}; @@ -531,7 +531,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB, rdata->pusch_vars = &gNB->pusch_vars[ul_id]; rdata->chest_freq = gNB->chest_freq; rdata->rxdataF = gNB->common_vars.rxdataF; - rdata->ans = &ans[job_id]; + rdata->ans = &ans; // Call the nr_pusch_antenna_processing function if (job_id == num_jobs - 1) { // Run the last job inline @@ -539,10 +539,9 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB, } else { pushTpool(&gNB->threadPool, task); } - LOG_D(PHY, "Added Antenna (count %d/%d) to process, in pipe\n", job_id, num_jobs); } // Antenna Loop - join_task_ans(ans, num_jobs - 1); + join_task_ans(&ans); stop_meas(&gNB->pusch_channel_estimation_antenna_processing_stats); for (int aarx = 0; aarx < gNB->frame_parms.nb_antennas_rx; aarx++) { diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c index 8c8b8212a2ee68e84e458b0ae231d0984ecf1a37..396c8d89f4f3c9101dc5f645b7c6602ee80b52c6 100644 --- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c +++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c @@ -223,8 +223,6 @@ static void nr_ulsch_channel_level(int size_est, } } - simde_mm_empty(); - simde_m_empty(); } static void nr_ulsch_channel_compensation(c16_t *rxFext, @@ -334,8 +332,6 @@ static void nr_ulsch_channel_compensation(c16_t *rxFext, } } - simde_mm_empty(); - simde_m_empty(); } // Zero Forcing Rx function: nr_det_HhH() @@ -397,8 +393,6 @@ static void nr_ulsch_det_HhH (int32_t *after_mf_00,//a after_mf_10_128+=1; after_mf_11_128+=1; } - simde_mm_empty(); - simde_m_empty(); } /* Zero Forcing Rx function: nr_conjch0_mult_ch1() @@ -444,8 +438,6 @@ static void nr_ulsch_conjch0_mult_ch1(int *ch0, dl_ch1_128+=1; ch0conj_ch1_128+=1; } - simde_mm_empty(); - simde_m_empty(); } static simde__m128i nr_ulsch_comp_muli_sum(simde__m128i input_x, @@ -507,8 +499,6 @@ static simde__m128i nr_ulsch_comp_muli_sum(simde__m128i input_x, //print_ints("unpack hi:",&tmp_z1[0]); output = simde_mm_packs_epi32(tmp_z0,tmp_z1); - simde_mm_empty(); - simde_m_empty(); return(output); } @@ -625,8 +615,6 @@ static void nr_ulsch_construct_HhH_elements(int *conjch00_ch00, after_mf_10_128 += 1; after_mf_11_128 += 1; } - simde_mm_empty(); - simde_m_empty(); } // MMSE Rx function: nr_ulsch_mmse_2layers() @@ -1017,8 +1005,6 @@ static uint8_t nr_ulsch_mmse_2layers(NR_DL_FRAME_PARMS *frame_parms, after_mf_c_128 += 1; after_mf_d_128 += 1; } - simde_mm_empty(); - simde_m_empty(); return(0); } @@ -1260,7 +1246,6 @@ int nr_rx_pusch_tp(PHY_VARS_gNB *gNB, nfapi_nr_pusch_pdu_t *rel15_ul = &gNB->ulsch[ulsch_id].harq_process->ulsch_pdu; NR_gNB_PUSCH *pusch_vars = &gNB->pusch_vars[ulsch_id]; - int nbSymb = 0; uint32_t bwp_start_subcarrier = ((rel15_ul->rb_start + rel15_ul->bwp_start) * NR_NB_SC_PER_RB + frame_parms->first_carrier_offset) % frame_parms->ofdm_symbol_size; LOG_D(PHY,"pusch %d.%d : bwp_start_subcarrier %d, rb_start %d, first_carrier_offset %d\n", frame,slot,bwp_start_subcarrier, rel15_ul->rb_start, frame_parms->first_carrier_offset); LOG_D(PHY,"pusch %d.%d : ul_dmrs_symb_pos %x\n",frame,slot,rel15_ul->ul_dmrs_symb_pos); @@ -1483,9 +1468,9 @@ int nr_rx_pusch_tp(PHY_VARS_gNB *gNB, int total_res = 0; int const loop_iter = CEILIDIV(rel15_ul->nr_of_symbols, numSymbols); puschSymbolProc_t arr[loop_iter]; - task_ans_t arr_ans[loop_iter]; + task_ans_t ans; + init_task_ans(&ans, loop_iter); - memset(arr_ans, 0, sizeof(arr_ans)); int sz_arr = 0; for(uint8_t task_index = 0; task_index < loop_iter; task_index++) { int symbol = task_index * numSymbols + rel15_ul->start_symbol_index; @@ -1500,7 +1485,7 @@ int nr_rx_pusch_tp(PHY_VARS_gNB *gNB, total_res += res_per_task; if (res_per_task > 0) { puschSymbolProc_t *rdata = &arr[sz_arr]; - rdata->ans = &arr_ans[sz_arr]; + rdata->ans = &ans; ++sz_arr; rdata->gNB = gNB; @@ -1522,16 +1507,15 @@ int nr_rx_pusch_tp(PHY_VARS_gNB *gNB, } else { task_t t = {.func = &nr_pusch_symbol_processing, .args = rdata}; pushTpool(&gNB->threadPool, t); - nbSymb++; } - LOG_D(PHY, "%d.%d Added symbol %d (count %d) to process, in pipe\n", frame, slot, symbol, nbSymb); + LOG_D(PHY, "%d.%d Added symbol %d to process, in pipe\n", frame, slot, symbol); + } else { + completed_task_ans(&ans); } } // symbol loop - if (nbSymb > 0) { - join_task_ans(arr_ans, sz_arr); - } + join_task_ans(&ans); stop_meas(&gNB->rx_pusch_symbol_processing_stats); // Copy the data to the scope. This cannot be performed in one call to gNBscopeCopy because the data is not contiguous in the diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c index 6c5c37385bd85de75f5de4ded8ec0d47b6884433..286386297fa9794f881a9613c02c2dd7220c899d 100644 --- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c +++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c @@ -388,7 +388,6 @@ void nr_ulsch_qpsk_qpsk(c16_t *stream0_in, c16_t *stream1_in, c16_t *stream0_out } } #endif - simde_mm_empty(); } @@ -1056,7 +1055,6 @@ void nr_ulsch_qam16_qam16(c16_t *stream0_in, stream0_128i_out[3] = simde_mm_unpackhi_epi32(xmm1_128, xmm3_128); // 8 LLRs, 2 REs } #endif - simde_mm_empty(); } /* @@ -1777,7 +1775,6 @@ void nr_ulsch_qam64_qam64(c16_t *stream0_in, } } #endif - simde_mm_empty(); } static void nr_ulsch_shift_llr(int16_t **llr_layers, uint32_t nb_re, uint32_t rxdataF_ext_offset, uint8_t mod_order, int shift) diff --git a/openair1/PHY/NR_TRANSPORT/pucch_rx.c b/openair1/PHY/NR_TRANSPORT/pucch_rx.c index e545766206255e4d40b52d2ee177fcf30f196600..6f2ccc8bc190db0e6d6c0678bf02a4843bf3dd61 100644 --- a/openair1/PHY/NR_TRANSPORT/pucch_rx.c +++ b/openair1/PHY/NR_TRANSPORT/pucch_rx.c @@ -53,6 +53,7 @@ #include "SCHED_NR/sched_nr.h" #include "T.h" +#include "nr_phy_common.h" //#define DEBUG_NR_PUCCH_RX 1 @@ -280,6 +281,7 @@ void nr_decode_pucch0(PHY_VARS_gNB *gNB, } } signal_energy /= (pucch_pdu->nr_of_symbols * frame_parms->nb_antennas_rx); + signal_energy_ant0 /= pucch_pdu->nr_of_symbols; int pucch_power_dBtimes10 = 10 * dB_fixed(signal_energy); //int32_t no_corr = 0; @@ -403,7 +405,7 @@ void nr_decode_pucch0(PHY_VARS_gNB *gNB, uci_pdu->rnti = pucch_pdu->rnti; uci_pdu->ul_cqi = cqi; uci_pdu->timing_advance = 0xffff; // currently not valid - uci_pdu->rssi = 1280 - (10 * dB_fixed(32767 * 32767)) - dB_fixed_times10(signal_energy_ant0); + uci_pdu->rssi = 1280 - (10 * dB_fixed(32767 * 32767) - dB_fixed_times10(signal_energy_ant0)); if (pucch_pdu->bit_len_harq==0) { uci_pdu->sr.sr_confidence_level = SNRtimes10 < gNB->pucch0_thres; @@ -693,10 +695,6 @@ void nr_decode_pucch1(c16_t **rxdataF, table_6_3_2_4_1_1_N_SF_mprime_PUCCH_1_noHop[nrofSymbols - 1]; // only if intra-slot hopping not enabled (PUCCH) int N_SF_mprime_PUCCH_DMRS_1 = table_6_4_1_3_1_1_1_N_SF_mprime_PUCCH_1_noHop[nrofSymbols - 1]; // only if intra-slot hopping not enabled (DM-RS) -#ifdef DEBUG_NR_PUCCH_RX - printf("\t [nr_generate_pucch1] w_index = %d, N_SF_mprime_PUCCH_1 = %d, N_SF_mprime_PUCCH_DMRS_1 = %d, N_SF_mprime0_PUCCH_1 = %d, N_SF_mprime0_PUCCH_DMRS_1 = %d\n", - w_index, N_SF_mprime_PUCCH_1,N_SF_mprime_PUCCH_DMRS_1,N_SF_mprime0_PUCCH_1,N_SF_mprime0_PUCCH_DMRS_1); -#endif if(l%2==1){ for (int m=0; m < N_SF_mprime_PUCCH_1; m++) { @@ -860,164 +858,59 @@ void nr_decode_pucch1(c16_t **rxdataF, } } -static simde__m256i pucch2_3bit[8 * 2]; -static simde__m256i pucch2_4bit[16 * 2]; -static simde__m256i pucch2_5bit[32 * 2]; -static simde__m256i pucch2_6bit[64 * 2]; -static simde__m256i pucch2_7bit[128 * 2]; -static simde__m256i pucch2_8bit[256 * 2]; -static simde__m256i pucch2_9bit[512 * 2]; -static simde__m256i pucch2_10bit[1024 * 2]; -static simde__m256i pucch2_11bit[2048 * 2]; - -static simde__m256i *pucch2_lut[9] = +typedef struct {c16_t cw[16];} cw_t; +static cw_t pucch2_3bit[8] __attribute__((aligned(32))); +static cw_t pucch2_4bit[16] __attribute__((aligned(32))); +static cw_t pucch2_5bit[32] __attribute__((aligned(32))); +static cw_t pucch2_6bit[64] __attribute__((aligned(32))); +static cw_t pucch2_7bit[128] __attribute__((aligned(32))); +static cw_t pucch2_8bit[256] __attribute__((aligned(32))); +static cw_t pucch2_9bit[512] __attribute__((aligned(32))); +static cw_t pucch2_10bit[1024] __attribute__((aligned(32))); +static cw_t pucch2_11bit[2048] __attribute__((aligned(32))); + +static cw_t* pucch2_lut[9] = {pucch2_3bit, pucch2_4bit, pucch2_5bit, pucch2_6bit, pucch2_7bit, pucch2_8bit, pucch2_9bit, pucch2_10bit, pucch2_11bit}; -static simde__m64 pucch2_polar_4bit[16]; -static simde__m128i pucch2_polar_llr_num_lut[256], pucch2_polar_llr_den_lut[256]; +typedef struct { + int16_t cw[4]; +} cw4bit_t; +static cw4bit_t pucch2_polar_4bit[16] __attribute__((aligned(32))); +static simde__m128i pucch2_polar_llr_num_lut[256]; -void init_pucch2_luts() { - - uint32_t out; - int8_t bit; - +void init_pucch2_luts() +{ for (int b=3;b<12;b++) { - for (int i = 0; i < (1 << b); i++) { - out = encodeSmallBlock(i, b); -#ifdef DEBUG_NR_PUCCH_RX - if (b==3) printf("in %d, out %x\n",i,out); -#endif - simde__m256i *lut_i=&pucch2_lut[b-3][i<<1]; - simde__m256i *lut_ip1=&pucch2_lut[b-3][1+(i<<1)]; - bit = (out&0x1) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,0); - bit = (out&0x2) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,0); - bit = (out&0x4) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,1); - bit = (out&0x8) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,1); - bit = (out&0x10) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,2); - bit = (out&0x20) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,2); - bit = (out&0x40) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,3); - bit = (out&0x80) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,3); - bit = (out&0x100) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,4); - bit = (out&0x200) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,4); - bit = (out&0x400) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,5); - bit = (out&0x800) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,5); - bit = (out&0x1000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,6); - bit = (out&0x2000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,6); - bit = (out&0x4000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,7); - bit = (out&0x8000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,7); - bit = (out&0x10000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,8); - bit = (out&0x20000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,8); - bit = (out&0x40000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,9); - bit = (out&0x80000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,9); - bit = (out&0x100000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,10); - bit = (out&0x200000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,10); - bit = (out&0x400000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,11); - bit = (out&0x800000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,11); - bit = (out&0x1000000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,12); - bit = (out&0x2000000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,12); - bit = (out&0x4000000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,13); - bit = (out&0x8000000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,13); - bit = (out&0x10000000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,14); - bit = (out&0x20000000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,14); - bit = (out&0x40000000) > 0 ? -1 : 1; - *lut_i = simde_mm256_insert_epi16(*lut_i,bit,15); - bit = (out&0x80000000) > 0 ? -1 : 1; - *lut_ip1 = simde_mm256_insert_epi16(*lut_ip1,bit,15); + for (int cw = 0; cw < (1 << b); cw++) { + uint32_t out = encodeSmallBlock(cw, b); + uint16_t *tmp = (uint16_t *)pucch2_lut[b - 3][cw].cw; + for (int j = 0; j < 32; j++) + *tmp++ = (out & (1U<<j)) > 0 ? -1 : 1; } } for (int i = 0; i < 16; i++) { - simde__m64 *lut_i=&pucch2_polar_4bit[i]; - - bit = (i&0x1) > 0 ? -1 : 1; - *lut_i = simde_mm_insert_pi16(*lut_i,bit,0); - bit = (i&0x2) > 0 ? -1 : 1; - *lut_i = simde_mm_insert_pi16(*lut_i,bit,1); - bit = (i&0x4) > 0 ? -1 : 1; - *lut_i = simde_mm_insert_pi16(*lut_i,bit,2); - bit = (i&0x8) > 0 ? -1 : 1; - *lut_i = simde_mm_insert_pi16(*lut_i,bit,3); + int16_t *lut_i = pucch2_polar_4bit[i].cw; + *lut_i++ = (i & 0x1) <= 0; + *lut_i++ = (i & 0x2) <= 0; + *lut_i++ = (i & 0x4) <= 0; + *lut_i++ = (i & 0x8) <= 0; } - for (int i=0;i<256;i++) { - simde__m128i *lut_num_i=&pucch2_polar_llr_num_lut[i]; - simde__m128i *lut_den_i=&pucch2_polar_llr_den_lut[i]; - bit = (i&0x1) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 0); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 0); - - bit = (i&0x10) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 1); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 1); - - bit = (i&0x2) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 2); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 2); - - bit = (i&0x20) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 3); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 3); - - bit = (i&0x4) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 4); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 4); - - bit = (i&0x40) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 5); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 5); - - bit = (i&0x8) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 6); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 6); - - bit = (i&0x80) > 0 ? 0 : 1; - *lut_num_i = simde_mm_insert_epi16(*lut_num_i, bit, 7); - *lut_den_i = simde_mm_insert_epi16(*lut_den_i, 1 - bit, 7); - + for (int cw = 0; cw < 256; cw++) { + int16_t *lut_num_i = (int16_t *)&pucch2_polar_llr_num_lut[cw]; + *lut_num_i++ = (cw & 0x1) <= 0; + *lut_num_i++ = (cw & 0x10) <= 0; + *lut_num_i++ = (cw & 0x2) <= 0; + *lut_num_i++ = (cw & 0x20) <= 0; + *lut_num_i++ = (cw & 0x4) <= 0; + *lut_num_i++ = (cw & 0x40) <= 0; + *lut_num_i++ = (cw & 0x8) <= 0; + *lut_num_i++ = (cw & 0x80) <= 0; #ifdef DEBUG_NR_PUCCH_RX - printf("i %d, lut_num (%d,%d,%d,%d,%d,%d,%d,%d)\n", - i, - ((int16_t *)lut_num_i)[0], - ((int16_t *)lut_num_i)[1], - ((int16_t *)lut_num_i)[2], - ((int16_t *)lut_num_i)[3], - ((int16_t *)lut_num_i)[4], - ((int16_t *)lut_num_i)[5], - ((int16_t *)lut_num_i)[6], - ((int16_t *)lut_num_i)[7]); + log_dump(PHY, pucch2_polar_llr_num_lut, 8, LOG_DUMP_C16, "lut_num %d:", i); #endif } } - void nr_decode_pucch2(PHY_VARS_gNB *gNB, c16_t **rxdataF, int frame, @@ -1026,11 +919,14 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB, nfapi_nr_pucch_pdu_t* pucch_pdu) { NR_DL_FRAME_PARMS *frame_parms = &gNB->frame_parms; + const simde__m256i conj256 = simde_mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1); + //pucch_GroupHopping_t pucch_GroupHopping = pucch_pdu->group_hop_flag + (pucch_pdu->sequence_hop_flag<<1); + const int nb_symbols=pucch_pdu->nr_of_symbols; - AssertFatal(pucch_pdu->nr_of_symbols == 1 || pucch_pdu->nr_of_symbols == 2, + AssertFatal(nb_symbols == 1 || nb_symbols == 2, "Illegal number of symbols for PUCCH 2 %d\n", - pucch_pdu->nr_of_symbols); + nb_symbols); AssertFatal((pucch_pdu->prb_start-((pucch_pdu->prb_start>>2)<<2))==0, "Current pucch2 receiver implementation requires a PRB offset multiple of 4. The one selected is %d", @@ -1040,35 +936,30 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB, int l2 = pucch_pdu->start_symbol_index; int soffset = (slot % RU_RX_SLOT_DEPTH) * frame_parms->symbols_per_slot * frame_parms->ofdm_symbol_size; - int re_offset[2]; + int re_offset[nb_symbols]; re_offset[0] = (12 * (pucch_pdu->prb_start + pucch_pdu->bwp_start) + frame_parms->first_carrier_offset) % frame_parms->ofdm_symbol_size; - if (pucch_pdu->freq_hop_flag == 0) - re_offset[1] = re_offset[0]; - else { - re_offset[1] = 12*(pucch_pdu->second_hop_prb+pucch_pdu->bwp_start) + frame_parms->first_carrier_offset; - if (re_offset[1]>= frame_parms->ofdm_symbol_size) - re_offset[1]-=frame_parms->ofdm_symbol_size; + if (nb_symbols==2) { + if (pucch_pdu->freq_hop_flag ) + re_offset[1] = (12*(pucch_pdu->second_hop_prb+pucch_pdu->bwp_start) + frame_parms->first_carrier_offset) % frame_parms->ofdm_symbol_size ; + else + re_offset[1] = re_offset[0]; } - AssertFatal(pucch_pdu->prb_size * pucch_pdu->nr_of_symbols > 1, + AssertFatal(pucch_pdu->prb_size * nb_symbols > 1, "number of PRB*SYMB (%d,%d)< 2", pucch_pdu->prb_size, - pucch_pdu->nr_of_symbols); + nb_symbols); int Prx = gNB->gNB_config.carrier_config.num_rx_ant.value; // AssertFatal((pucch_pdu->prb_size&1) == 0,"prb_size %d is not a multiple of2\n",pucch_pdu->prb_size); - int Prx2 = (Prx==1)?2:Prx; // use 2 for Nb antennas in case of single antenna to allow the following allocations - int nb_re_pucch = 12*pucch_pdu->prb_size; - int prb_size_ext = pucch_pdu->prb_size+(pucch_pdu->prb_size&1); - - c16_t rp[Prx2][2][nb_re_pucch]; + const int nb_re_pucch = 12 * pucch_pdu->prb_size; + c16_t rp[Prx][nb_symbols][nb_re_pucch]; memset(rp, 0, sizeof(rp)); int64_t pucch2_lev = 0; - for (int aa=0;aa<Prx;aa++){ - for (int symb=0;symb<pucch_pdu->nr_of_symbols;symb++) { + for (int symb=0;symb<nb_symbols;symb++) { c16_t *tmp_rp = ((c16_t *)&rxdataF[aa][soffset + (l2 + symb) * frame_parms->ofdm_symbol_size]); if (re_offset[symb] + nb_re_pucch < frame_parms->ofdm_symbol_size) { @@ -1084,23 +975,14 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB, } } - pucch2_lev /= Prx * pucch_pdu->nr_of_symbols; + pucch2_lev /= Prx * nb_symbols; int pucch2_levdB = dB_fixed(pucch2_lev); - int scaling = 0; - if (pucch2_levdB > 72) - scaling = 4; - else if (pucch2_levdB > 66) - scaling = 3; - else if (pucch2_levdB > 60) - scaling = 2; - else if (pucch2_levdB > 54) - scaling = 1; - + int scaling = max((log2_approx64(pucch2_lev) >> 1) - 8, 0); LOG_D(NR_PHY, "%d.%d Decoding pucch2 for %d symbols, %d PRB, nb_harq %d, nb_sr %d, nb_csi %d/%d, pucch2_lev %d dB (scaling %d)\n", frame, slot, - pucch_pdu->nr_of_symbols, + nb_symbols, pucch_pdu->prb_size, pucch_pdu->bit_len_harq, pucch_pdu->sr_flag, @@ -1109,68 +991,87 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB, pucch2_levdB, scaling); + int prb_size_ext = pucch_pdu->prb_size + (pucch_pdu->prb_size & 1); int nc_group_size=1; // 2 PRB int ngroup = prb_size_ext/nc_group_size/2; - int32_t corr32_re[2][ngroup][Prx2],corr32_im[2][ngroup][Prx2]; - memset(corr32_re, 0, sizeof(corr32_re)); - memset(corr32_im, 0, sizeof(corr32_im)); - - int16_t r_re_ext[Prx2][2][8 * prb_size_ext] __attribute__((aligned(32))); - int16_t r_im_ext[Prx2][2][8 * prb_size_ext] __attribute__((aligned(32))); - int16_t r_re_ext2[Prx2][2][8 * prb_size_ext] __attribute__((aligned(32))); - int16_t r_im_ext2[Prx2][2][8 * prb_size_ext] __attribute__((aligned(32))); - int16_t rd_re_ext[Prx2][2][4 * prb_size_ext] __attribute__((aligned(32))); - int16_t rd_im_ext[Prx2][2][4 * prb_size_ext] __attribute__((aligned(32))); - - if (pucch_pdu->prb_size != prb_size_ext) { - // if the number of PRBs is odd - // we fill the unsed part of the arrays - for (int aa = 0; aa < Prx; aa++) { - for (int symb = 0; symb < pucch_pdu->nr_of_symbols; symb++) { - const int sz = pucch_pdu->prb_size; - memset(r_re_ext[aa][symb] + 8 * sz, 0, 8 * sizeof(int16_t)); - memset(r_im_ext[aa][symb] + 8 * sz, 0, 8 * sizeof(int16_t)); - memset(rd_re_ext[aa][symb] + 4 * sz, 0, 4 * sizeof(int16_t)); - memset(rd_im_ext[aa][symb] + 4 * sz, 0, 4 * sizeof(int16_t)); - } - } - } + c32_t corr32[nb_symbols][ngroup][Prx]; + memset(corr32, 0, sizeof(corr32)); + const int nb_re_data = 8 * prb_size_ext; + const int nb_re_dmrs = 4 * prb_size_ext; + c16_t r_ext[Prx][nb_symbols][nb_re_data] __attribute__((aligned(32))); + c16_t r_ext2[Prx][nb_symbols][nb_re_data] __attribute__((aligned(32))); + const simde__m256i swap = simde_mm256_set_epi8(29, + 28, + 31, + 30, + 25, + 24, + 27, + 26, + 21, + 20, + 23, + 22, + 17, + 16, + 19, + 18, + 13, + 12, + 15, + 14, + 9, + 8, + 11, + 10, + 5, + 4, + 7, + 6, + 1, + 0, + 3, + 2); + // prepare scrambling sequence for data + uint32_t x2 = ((pucch_pdu->rnti) << 15) + pucch_pdu->data_scrambling_id; +#ifdef DEBUG_NR_PUCCH_RX + printf("x2 %x\n", x2); +#endif + c16_t scramb_data[nb_re_data] __attribute__((aligned(32))); - for (int symb=0; symb<pucch_pdu->nr_of_symbols;symb++) { - // 24 REs contains 48x16-bit, so 6x8x16-bit + uint32_t *sGold = gold_cache(x2, nb_symbols * nb_re_data/2); + uint8_t *sGold8 = (uint8_t *)sGold; + for (int i = 0; i < nb_re_data; i += 4) + *(simde__m128i *)(scramb_data + i) = byte2m128i[*sGold8++]; + + + for (int symb=0; symb<nb_symbols;symb++) { + c16_t rdmrs_ext[Prx][nb_re_dmrs] __attribute__((aligned(32))); + + // extract DMRS for (int aa = 0; aa < Prx; aa++) { + c16_t *rdmrs_ext_p = rdmrs_ext[aa]; + c16_t *rp_base = rp[aa][symb]; for (int prb = 0; prb < pucch_pdu->prb_size; prb++) { - int16_t *r_re_ext_p = &r_re_ext[aa][symb][8 * prb]; - int16_t *r_im_ext_p = &r_im_ext[aa][symb][8 * prb]; - int16_t *rd_re_ext_p = &rd_re_ext[aa][symb][4 * prb]; - int16_t *rd_im_ext_p = &rd_im_ext[aa][symb][4 * prb]; - for (int idx = 0; idx < 4; idx++) { - c16_t *rp_base = rp[aa][symb] + prb * 12 + 3 * idx; - AssertFatal(prb * 12 + 3 * idx + 2 < nb_re_pucch, ""); - r_re_ext_p[idx << 1] = rp_base->r >> scaling; - r_im_ext_p[idx << 1] = rp_base->i >> scaling; rp_base++; - rd_re_ext_p[idx] = rp_base->r >> scaling; - rd_im_ext_p[idx] = rp_base->i >> scaling; + *rdmrs_ext_p++ = *rp_base++; rp_base++; - r_re_ext_p[1 + (idx << 1)] = rp_base->r >> scaling; - r_im_ext_p[1 + (idx << 1)] = rp_base->i >> scaling; } + } + if (pucch_pdu->prb_size != prb_size_ext) + // if the number of PRBs is odd + // we fill the unsed part of the arrays + memset(rdmrs_ext[aa] + pucch_pdu->prb_size * 4, 0, 4 * sizeof(c16_t)); + } #ifdef DEBUG_NR_PUCCH_RX - for (int i = 0; i < 8; i++) - printf("Ant %d PRB %d dmrs[%d] -> (%d,%d)\n", aa, prb + (i >> 2), i, rd_re_ext_p[i], rd_im_ext_p[i]); - for (int i = 0; i < 16; i++) - printf("Ant %d PRB %d data[%d] -> (%d,%d)\n", aa, prb + (i >> 3), i, r_re_ext_p[i], r_im_ext_p[i]); + for (int aa = 0; aa < Prx; aa++) + log_dump(PHY, rdmrs_ext[aa], nb_re_dmrs, LOG_DUMP_C16, "Ant %d dmrs:\n", aa); #endif - } - } // first compute DMRS component - const int scramble = pucch_pdu->dmrs_scrambling_id * 2; - // fixme: when MR2754 will be merged, use the gold sequence cache instead of regenerate each time uint32_t x2 = ((1ULL << 17) * ((NR_NUMBER_OF_SYMBOLS_PER_SLOT * slot + pucch_pdu->start_symbol_index + symb + 1) * (scramble + 1)) + scramble) @@ -1180,453 +1081,210 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB, slot,pucch_pdu->start_symbol_index,symb,pucch_pdu->dmrs_scrambling_id); #endif uint32_t *sGold = gold_cache(x2, pucch_pdu->prb_start / 4 + ngroup / 2); - for (int group = 0, goldIdx = pucch_pdu->prb_start / 4; group < ngroup; group++) { - // each group has 8*nc_group_size elements, compute 1 complex correlation with DMRS per group - // non-coherent combining across groups - uint8_t *sGold8 = (uint8_t *)&sGold[goldIdx]; - simde__m64 dmrs_re = byte2m64_re[sGold8[(group & 1) << 1]]; - int16_t *dmrs_re16 = (int16_t *)&dmrs_re; - simde__m64 dmrs_im = byte2m64_im[sGold8[(group & 1) << 1]]; - int16_t *dmrs_im16 = (int16_t *)&dmrs_im; -#ifdef DEBUG_NR_PUCCH_RX - printf("Group %d: x2 %x ((%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - group, - x2, - dmrs_re16[0], - dmrs_im16[0], - dmrs_re16[1], - dmrs_im16[1], - dmrs_re16[2], - dmrs_im16[2], - dmrs_re16[3], - dmrs_im16[3]); -#endif - for (int aa=0;aa<Prx;aa++) { - int16_t *rd_re_ext_p = &rd_re_ext[aa][symb][8 * group]; - int16_t *rd_im_ext_p = &rd_im_ext[aa][symb][8 * group]; + // Compute pilot conjugate + c16_t pil_dmrs[nb_re_dmrs] __attribute__((aligned(32))); + uint8_t *sGold8 = (uint8_t *)(sGold + pucch_pdu->prb_start / 4); + for (int group = 0; group < nb_re_dmrs; group += 4) + *(simde__m128i *)(pil_dmrs + group) = simde_mm_sign_epi16(byte2m128i[*sGold8++], *(simde__m128i *)&conj256); + + // Compute delay + c16_t ch_ls[128] __attribute__((aligned(32))) = {0}; + { + c16_t rdmrs_gold[nb_re_dmrs] __attribute__((aligned(32))); + for (int aa = 0; aa < Prx; aa++) { + mult_complex_vectors(rdmrs_ext[aa], pil_dmrs, rdmrs_gold, nb_re_dmrs, 0); + c16_t *ch_ls_ptr = ch_ls; + c16_t *end = ch_ls_ptr + 128; + for (int i = 0; i < nb_re_dmrs; i++) + for (int k = 0; k < 3 && ch_ls_ptr < end; k++) + *ch_ls_ptr++ = rdmrs_gold[i]; + } + } + c16_t ch_temp[128] __attribute__((aligned(32))) = {0}; + delay_t delay = {0}; + nr_est_delay(128, ch_ls, ch_temp, &delay); + + // Apply delay compensation on the input + if (delay.est_delay != 0) { + int delay_idx = get_delay_idx(delay.est_delay, MAX_DELAY_COMP); + c16_t *delay_table = frame_parms->delay_table128[delay_idx]; + for (int aa = 0; aa < Prx; aa++) + mult_complex_vectors(rp[aa][symb], delay_table, rp[aa][symb], nb_re_pucch, 8); + } -#ifdef DEBUG_NR_PUCCH_RX - printf("Group %d: rd ((%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - group, - rd_re_ext_p[0],rd_im_ext_p[0], - rd_re_ext_p[1],rd_im_ext_p[1], - rd_re_ext_p[2],rd_im_ext_p[2], - rd_re_ext_p[3],rd_im_ext_p[3]); -#endif - for (int z = 0; z < 4; z++) { - corr32_re[symb][group][aa] += rd_re_ext_p[z] * dmrs_re16[z] + rd_im_ext_p[z] * dmrs_im16[z]; - corr32_im[symb][group][aa] += -rd_re_ext_p[z] * dmrs_im16[z] + rd_im_ext_p[z] * dmrs_re16[z]; + // extract again DMRS, and signal, after delay compensation + for (int aa = 0; aa < Prx; aa++) { + c16_t *r_ext_p = r_ext[aa][symb]; + c16_t *rdmrs_ext_p = rdmrs_ext[aa]; + c16_t *rp_base = rp[aa][symb]; + for (int prb = 0; prb < pucch_pdu->prb_size; prb++) { + for (int idx = 0; idx < 4; idx++) { + *r_ext_p++ = *rp_base++; + *rdmrs_ext_p++ = *rp_base++; + *r_ext_p++ = *rp_base++; } } - dmrs_re = byte2m64_re[sGold8[1 + ((group & 1) << 1)]]; - dmrs_im = byte2m64_im[sGold8[1 + ((group & 1) << 1)]]; + if (pucch_pdu->prb_size != prb_size_ext) { + // if the number of PRBs is odd + // we fill the unsed part of the arrays + memset(rdmrs_ext[aa] + pucch_pdu->prb_size * 4, 0, 4 * sizeof(c16_t)); + memset(r_ext[aa][symb] + pucch_pdu->prb_size * 8, 0, 8 * sizeof(c16_t)); + } + } #ifdef DEBUG_NR_PUCCH_RX - printf("Group %d: s %x ((%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - group, - ((uint16_t *)&sGold)[1], - dmrs_re16[0], - dmrs_im16[0], - dmrs_re16[1], - dmrs_im16[1], - dmrs_re16[2], - dmrs_im16[2], - dmrs_re16[3], - dmrs_im16[3]); + for (int aa = 0; aa < Prx; aa++) { + log_dump(PHY, rdmrs_ext[aa], nb_re_dmrs, LOG_DUMP_C16, "after delay compensation ant %d dmrs:\n", aa); + log_dump(PHY, r_ext[aa], nb_re_data, LOG_DUMP_C16, "after delay compensation ant %d data:\n", aa); + } #endif + c16_t rdmrs_gold[Prx][nb_re_dmrs] __attribute__((aligned(32))); + for (int aa = 0; aa < Prx; aa++) + mult_complex_vectors(rdmrs_ext[aa], pil_dmrs, rdmrs_gold[aa], nb_re_dmrs, 0); for (int aa=0;aa<Prx;aa++) { - int16_t *rd_re_ext_p = &rd_re_ext[aa][symb][8 * group]; - int16_t *rd_im_ext_p = &rd_im_ext[aa][symb][8 * group]; -#ifdef DEBUG_NR_PUCCH_RX - printf("Group %d: rd ((%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - group, - rd_re_ext_p[4],rd_im_ext_p[4], - rd_re_ext_p[5],rd_im_ext_p[5], - rd_re_ext_p[6],rd_im_ext_p[6], - rd_re_ext_p[7],rd_im_ext_p[7]); -#endif - for (int z = 0; z < 4; z++) { - corr32_re[symb][group][aa] += rd_re_ext_p[z + 4] * dmrs_re16[z] + rd_im_ext_p[z + 4] * dmrs_im16[z]; - corr32_im[symb][group][aa] += -rd_re_ext_p[z + 4] * dmrs_im16[z] + rd_im_ext_p[z + 4] * dmrs_re16[z]; + c16_t *pil_ptr = pil_dmrs; + for (int group = 0; group < ngroup; group++) { + // each group has 8*nc_group_size elements, compute 1 complex correlation with DMRS per group + // non-coherent combining across groups + c16_t *rdmrs_p = &rdmrs_ext[aa][8 * group]; + for (int z = 0; z < 8; z++) { + c16_t tmp = c16mulShift(*rdmrs_p++, *pil_ptr++, scaling); + corr32[symb][group][aa].r += tmp.r; + corr32[symb][group][aa].i += tmp.i; + } } - /* corr32_re[group][aa]>>=5; - corr32_im[group][aa]>>=5;*/ -#ifdef DEBUG_NR_PUCCH_RX - printf("Group %d: corr32 (%d,%d)\n",group,corr32_re[symb][group][aa],corr32_im[symb][group][aa]); -#endif - } //aa - - if ((group & 1) == 1) - goldIdx++; - } // group - } // symb - - // unscrambling - uint32_t x2 = ((pucch_pdu->rnti) << 15) + pucch_pdu->data_scrambling_id; -#ifdef DEBUG_NR_PUCCH_RX - printf("x2 %x\n", x2); -#endif - uint32_t *sGold = gold_cache(x2, pucch_pdu->nr_of_symbols * prb_size_ext / 2); - int goldIdx = 0; - for (int symb=0;symb<pucch_pdu->nr_of_symbols;symb++) { - simde__m64 c_re[4], c_im[4]; - int re_off=0; - for (int prb=0;prb<prb_size_ext;prb+=2,re_off+=16) { - uint8_t *sGold8 = (uint8_t *)(sGold + goldIdx); - for (int z = 0; z < 4; z++) { - c_re[z] = byte2m64_re[sGold8[z]]; - c_im[z] = byte2m64_im[sGold8[z]]; } - - for (int aa=0;aa<Prx;aa++) { #ifdef DEBUG_NR_PUCCH_RX - printf("prb %d: rd ((%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - prb, - r_re_ext[aa][symb][re_off], - r_im_ext[aa][symb][re_off], - r_re_ext[aa][symb][re_off + 1], - r_im_ext[aa][symb][re_off + 1], - r_re_ext[aa][symb][re_off + 2], - r_im_ext[aa][symb][re_off + 2], - r_re_ext[aa][symb][re_off + 3], - r_im_ext[aa][symb][re_off + 3], - r_re_ext[aa][symb][re_off + 4], - r_im_ext[aa][symb][re_off + 4], - r_re_ext[aa][symb][re_off + 5], - r_im_ext[aa][symb][re_off + 5], - r_re_ext[aa][symb][re_off + 6], - r_im_ext[aa][symb][re_off + 6], - r_re_ext[aa][symb][re_off + 7], - r_im_ext[aa][symb][re_off + 7]); - printf("prb %d: rd ((%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - prb+1, - r_re_ext[aa][symb][re_off + 8], - r_im_ext[aa][symb][re_off + 8], - r_re_ext[aa][symb][re_off + 9], - r_im_ext[aa][symb][re_off + 9], - r_re_ext[aa][symb][re_off + 10], - r_im_ext[aa][symb][re_off + 10], - r_re_ext[aa][symb][re_off + 11], - r_im_ext[aa][symb][re_off + 11], - r_re_ext[aa][symb][re_off + 12], - r_im_ext[aa][symb][re_off + 12], - r_re_ext[aa][symb][re_off + 13], - r_im_ext[aa][symb][re_off + 13], - r_re_ext[aa][symb][re_off + 14], - r_im_ext[aa][symb][re_off + 14], - r_re_ext[aa][symb][re_off + 15], - r_im_ext[aa][symb][re_off + 15]); + log_dump(PHY, corr32[symb][0], 8, LOG_DUMP_C32, "corr32:"); #endif - simde__m64 *r_re_ext_64 = (simde__m64 *)&r_re_ext[aa][symb][re_off]; - simde__m64 *r_re_ext2_64 = (simde__m64 *)&r_re_ext2[aa][symb][re_off]; - simde__m64 *r_im_ext_64 = (simde__m64 *)&r_im_ext[aa][symb][re_off]; - simde__m64 *r_im_ext2_64 = (simde__m64 *)&r_im_ext2[aa][symb][re_off]; - for (int z = 0; z < 4; z++) { - r_re_ext2_64[z] = simde_mm_mullo_pi16(r_re_ext_64[z], c_im[z]); - r_re_ext_64[z] = simde_mm_mullo_pi16(r_re_ext_64[z], c_re[z]); - r_im_ext2_64[z] = simde_mm_mullo_pi16(r_im_ext_64[z], c_re[z]); - r_im_ext_64[z] = simde_mm_mullo_pi16(r_im_ext_64[z], c_im[z]); - } - -#ifdef DEBUG_NR_PUCCH_RX - printf("prb %d: r ((%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - prb, - r_re_ext[aa][symb][re_off],r_im_ext[aa][symb][re_off], - r_re_ext[aa][symb][re_off+1],r_im_ext[aa][symb][re_off+1], - r_re_ext[aa][symb][re_off+2],r_im_ext[aa][symb][re_off+2], - r_re_ext[aa][symb][re_off+3],r_im_ext[aa][symb][re_off+3], - r_re_ext[aa][symb][re_off+4],r_im_ext[aa][symb][re_off+4], - r_re_ext[aa][symb][re_off+5],r_im_ext[aa][symb][re_off+5], - r_re_ext[aa][symb][re_off+6],r_im_ext[aa][symb][re_off+6], - r_re_ext[aa][symb][re_off+7],r_im_ext[aa][symb][re_off+7]); - printf("prb %d: r ((%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d),(%d,%d))\n", - prb+1, - r_re_ext[aa][symb][re_off+8],r_im_ext[aa][symb][re_off+8], - r_re_ext[aa][symb][re_off+9],r_im_ext[aa][symb][re_off+9], - r_re_ext[aa][symb][re_off+10],r_im_ext[aa][symb][re_off+10], - r_re_ext[aa][symb][re_off+11],r_im_ext[aa][symb][re_off+11], - r_re_ext[aa][symb][re_off+12],r_im_ext[aa][symb][re_off+12], - r_re_ext[aa][symb][re_off+13],r_im_ext[aa][symb][re_off+13], - r_re_ext[aa][symb][re_off+14],r_im_ext[aa][symb][re_off+14], - r_re_ext[aa][symb][re_off+15],r_im_ext[aa][symb][re_off+15]); -#endif + // apply gold sequence on data symbols + for (int aa = 0; aa < Prx; aa++) { + simde__m256i *pil_ptr = (simde__m256i *)scramb_data; + simde__m256i *end = (simde__m256i *)(scramb_data + nb_re_data); + for (simde__m256i *ptr = (simde__m256i *)r_ext[aa][symb], *ptr2 = (simde__m256i *)r_ext2[aa][symb]; pil_ptr < end; + ptr++, pil_ptr++, ptr2++) { + simde__m256i tmp = simde_mm256_srai_epi16(*ptr, scaling); + *ptr2 = simde_mm256_sign_epi16(simde_mm256_sign_epi16(simde_mm256_shuffle_epi8(tmp, swap), *pil_ptr), conj256); + *ptr = simde_mm256_sign_epi16(tmp, *pil_ptr); } - goldIdx++; -#ifdef DEBUG_NR_PUCCH_RX - printf("\n"); -#endif } - } //symb + } + int nb_bit = pucch_pdu->bit_len_harq+pucch_pdu->sr_flag+pucch_pdu->bit_len_csi_part1+pucch_pdu->bit_len_csi_part2; - AssertFatal(nb_bit > 2 && nb_bit< 65,"illegal length (%d : %d,%d,%d,%d)\n",nb_bit,pucch_pdu->bit_len_harq,pucch_pdu->sr_flag,pucch_pdu->bit_len_csi_part1,pucch_pdu->bit_len_csi_part2); + AssertFatal(nb_bit > 2 && nb_bit < 65, + "illegal length (%d : %d,%d,%d,%d)\n", + nb_bit, + pucch_pdu->bit_len_harq, + pucch_pdu->sr_flag, + pucch_pdu->bit_len_csi_part1, + pucch_pdu->bit_len_csi_part2); - uint64_t decodedPayload[2]; + uint64_t decodedPayload[nb_symbols]; + memset(decodedPayload,0,sizeof(decodedPayload)); uint8_t corr_dB; int decoderState = 2; if (pucch2_levdB < gNB->measurements.n0_subband_power_avg_dB + (gNB->pucch0_thres / 10)) decoderState = 1; // assuming missed detection, only attempt to decode for polar case (with CRC) LOG_D(NR_PHY, "n0+thres %d decoderState %d\n", gNB->measurements.n0_subband_power_avg_dB + (gNB->pucch0_thres / 10), decoderState); + if (nb_bit < 12 && decoderState == 2) { // short blocklength case - simde__m256i *rp_re[Prx2][2]; - simde__m256i *rp2_re[Prx2][2]; - simde__m256i *rp_im[Prx2][2]; - simde__m256i *rp2_im[Prx2][2]; - for (int aa=0;aa<Prx;aa++) { - for (int symb=0;symb<pucch_pdu->nr_of_symbols;symb++) { - rp_re[aa][symb] = (simde__m256i*)r_re_ext[aa][symb]; - rp_im[aa][symb] = (simde__m256i*)r_im_ext[aa][symb]; - rp2_re[aa][symb] = (simde__m256i*)r_re_ext2[aa][symb]; - rp2_im[aa][symb] = (simde__m256i*)r_im_ext2[aa][symb]; - } - } - simde__m256i prod_re[Prx2],prod_im[Prx2]; uint64_t corr=0; int cw_ML=0; - - for (int cw=0;cw<1<<nb_bit;cw++) { -#ifdef DEBUG_NR_PUCCH_RX - printf("cw %d:",cw); - for (int i=0;i<32;i+=2) { - printf("%d,%d,", - ((int16_t *)&pucch2_lut[nb_bit - 3][cw << 1])[i >> 1], - ((int16_t *)&pucch2_lut[nb_bit - 3][cw << 1])[1 + (i >> 1)]); - } - printf("\n"); -#endif + for (int cw = 0; cw < 1 << nb_bit; cw++) { uint64_t corr_tmp = 0; - - for (int symb=0;symb<pucch_pdu->nr_of_symbols;symb++) { + for (int symb=0;symb<nb_symbols;symb++) { for (int group=0;group<ngroup;group++) { // do complex correlation - for (int aa=0;aa<Prx;aa++) { - prod_re[aa] = /*simde_mm256_srai_epi16(*/ simde_mm256_adds_epi16( - simde_mm256_mullo_epi16(pucch2_lut[nb_bit - 3][cw << 1], rp_re[aa][symb][group]), - simde_mm256_mullo_epi16(pucch2_lut[nb_bit - 3][(cw << 1) + 1], rp_im[aa][symb][group])) /*,5)*/; - prod_im[aa] = /*simde_mm256_srai_epi16(*/ simde_mm256_subs_epi16( - simde_mm256_mullo_epi16(pucch2_lut[nb_bit - 3][cw << 1], rp2_im[aa][symb][group]), - simde_mm256_mullo_epi16(pucch2_lut[nb_bit - 3][(cw << 1) + 1], rp2_re[aa][symb][group])) /*,5)*/; -#ifdef DEBUG_NR_PUCCH_RX - printf("prod_re[%d] => (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)\n",aa, - ((int16_t*)&prod_re[aa])[0],((int16_t*)&prod_re[aa])[1],((int16_t*)&prod_re[aa])[2],((int16_t*)&prod_re[aa])[3], - ((int16_t*)&prod_re[aa])[4],((int16_t*)&prod_re[aa])[5],((int16_t*)&prod_re[aa])[6],((int16_t*)&prod_re[aa])[7], - ((int16_t*)&prod_re[aa])[8],((int16_t*)&prod_re[aa])[9],((int16_t*)&prod_re[aa])[10],((int16_t*)&prod_re[aa])[11], - ((int16_t*)&prod_re[aa])[12],((int16_t*)&prod_re[aa])[13],((int16_t*)&prod_re[aa])[14],((int16_t*)&prod_re[aa])[15]); - printf("prod_im[%d] => (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)\n",aa, - ((int16_t*)&prod_im[aa])[0],((int16_t*)&prod_im[aa])[1],((int16_t*)&prod_im[aa])[2],((int16_t*)&prod_im[aa])[3], - ((int16_t*)&prod_im[aa])[4],((int16_t*)&prod_im[aa])[5],((int16_t*)&prod_im[aa])[6],((int16_t*)&prod_im[aa])[7], - ((int16_t*)&prod_im[aa])[8],((int16_t*)&prod_im[aa])[9],((int16_t*)&prod_im[aa])[10],((int16_t*)&prod_im[aa])[11], - ((int16_t*)&prod_im[aa])[12],((int16_t*)&prod_im[aa])[13],((int16_t*)&prod_im[aa])[14],((int16_t*)&prod_im[aa])[15]); - -#endif - prod_re[aa] = simde_mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1 -#ifdef DEBUG_NR_PUCCH_RX - printf("0.prod_re[%d] => (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)\n",aa, - ((int16_t*)&prod_re[aa])[0],((int16_t*)&prod_re[aa])[1],((int16_t*)&prod_re[aa])[2],((int16_t*)&prod_re[aa])[3], - ((int16_t*)&prod_re[aa])[4],((int16_t*)&prod_re[aa])[5],((int16_t*)&prod_re[aa])[6],((int16_t*)&prod_re[aa])[7], - ((int16_t*)&prod_re[aa])[8],((int16_t*)&prod_re[aa])[9],((int16_t*)&prod_re[aa])[10],((int16_t*)&prod_re[aa])[11], - ((int16_t*)&prod_re[aa])[12],((int16_t*)&prod_re[aa])[13],((int16_t*)&prod_re[aa])[14],((int16_t*)&prod_re[aa])[15]); -#endif - prod_im[aa] = simde_mm256_hadds_epi16(prod_im[aa],prod_im[aa]); - prod_re[aa] = simde_mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3 -#ifdef DEBUG_NR_PUCCH_RX - printf("1.prod_re[%d] => (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)\n",aa, - ((int16_t*)&prod_re[aa])[0],((int16_t*)&prod_re[aa])[1],((int16_t*)&prod_re[aa])[2],((int16_t*)&prod_re[aa])[3], - ((int16_t*)&prod_re[aa])[4],((int16_t*)&prod_re[aa])[5],((int16_t*)&prod_re[aa])[6],((int16_t*)&prod_re[aa])[7], - ((int16_t*)&prod_re[aa])[8],((int16_t*)&prod_re[aa])[9],((int16_t*)&prod_re[aa])[10],((int16_t*)&prod_re[aa])[11], - ((int16_t*)&prod_re[aa])[12],((int16_t*)&prod_re[aa])[13],((int16_t*)&prod_re[aa])[14],((int16_t*)&prod_re[aa])[15]); -#endif - prod_im[aa] = simde_mm256_hadds_epi16(prod_im[aa],prod_im[aa]); - prod_re[aa] = simde_mm256_hadds_epi16(prod_re[aa],prod_re[aa]);// 0+1+2+3+4+5+6+7 + for (int aa = 0; aa < Prx; aa++) { + const simde__m256i *coeff = (simde__m256i *)&pucch2_lut[nb_bit - 3][cw].cw; + const simde__m256i *rext = (simde__m256i *)r_ext[aa][symb]; + const simde__m256i *rext2 = (simde__m256i *)r_ext2[aa][symb]; + simde__m256i re = simde_mm256_madd_epi16(coeff[0], rext[group]); + simde__m256i im = simde_mm256_madd_epi16(coeff[0], rext2[group]); + simde__m256i re2 = simde_mm256_madd_epi16(coeff[1], rext[group + 1]); + simde__m256i im2 = simde_mm256_madd_epi16(coeff[1], rext2[group + 1]); + re = simde_mm256_add_epi32(re, re2); + im = simde_mm256_add_epi32(im, im2); + re = simde_mm256_hadd_epi32(re, re); + re = simde_mm256_hadd_epi32(re, re); + im = simde_mm256_hadd_epi32(im, im); + im = simde_mm256_hadd_epi32(im, im); + int32_t *re32 = (int32_t *)&re; + int32_t *im32 = (int32_t *)&im; + c64_t prod = (c64_t){re32[0] + re32[5], im32[0] + im32[5]}; + csum(prod, prod, corr32[symb][group][aa]); + corr_tmp += squaredMod(prod); #ifdef DEBUG_NR_PUCCH_RX - printf("2.prod_re[%d] => (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)\n",aa, - ((int16_t*)&prod_re[aa])[0],((int16_t*)&prod_re[aa])[1],((int16_t*)&prod_re[aa])[2],((int16_t*)&prod_re[aa])[3], - ((int16_t*)&prod_re[aa])[4],((int16_t*)&prod_re[aa])[5],((int16_t*)&prod_re[aa])[6],((int16_t*)&prod_re[aa])[7], - ((int16_t*)&prod_re[aa])[8],((int16_t*)&prod_re[aa])[9],((int16_t*)&prod_re[aa])[10],((int16_t*)&prod_re[aa])[11], - ((int16_t*)&prod_re[aa])[12],((int16_t*)&prod_re[aa])[13],((int16_t*)&prod_re[aa])[14],((int16_t*)&prod_re[aa])[15]); -#endif - prod_im[aa] = simde_mm256_hadds_epi16(prod_im[aa],prod_im[aa]); - } - int64_t corr_re=0,corr_im=0; - - - for (int aa=0;aa<Prx;aa++) { - - corr_re = ( corr32_re[symb][group][aa]+((int16_t*)(&prod_re[aa]))[0]+((int16_t*)(&prod_re[aa]))[8]); - corr_im = ( corr32_im[symb][group][aa]+((int16_t*)(&prod_im[aa]))[0]+((int16_t*)(&prod_im[aa]))[8]); -#ifdef DEBUG_NR_PUCCH_RX - printf("pucch2 cw %d group %d aa %d: (%d,%d)+(%d,%d) = (%ld,%ld)\n", + printf("pucch2 cw %d group %d aa %d: (%d,%d)+prod=(%ld,%ld)\n", cw, group, aa, - corr32_re[symb][group][aa], - corr32_im[symb][group][aa], - ((int16_t *)(&prod_re[aa]))[0] + ((int16_t *)(&prod_re[aa]))[8], - ((int16_t *)(&prod_im[aa]))[0] + ((int16_t *)(&prod_im[aa]))[8], - corr_re, - corr_im); + corr32[symb][group][aa].r, + corr32[symb][group][aa].i, + prod.r, + prod.i); #endif - - corr_tmp += corr_re*corr_re + corr_im*corr_im; - } // aa loop + } }// group loop } // symb loop if (corr_tmp > corr) { corr = corr_tmp; cw_ML = cw; -#ifdef DEBUG_NR_PUCCH_RX +#ifdef DEBUG_NR_PUCCH_RX printf("slot %d PUCCH2 cw_ML %d, corr %lu\n", slot, cw_ML, corr); #endif } } // cw loop - corr_dB = dB_fixed64((uint64_t)corr); + corr_dB = dB_fixed64(corr); #ifdef DEBUG_NR_PUCCH_RX printf("slot %d PUCCH2 cw_ML %d, metric %d \n",slot,cw_ML,corr_dB); #endif decodedPayload[0]=(uint64_t)cw_ML; + } else if (nb_bit >= 12) { // polar coded case - simde__m64 *rp_re[Prx2][2]; - simde__m64 *rp2_re[Prx2][2]; - simde__m64 *rp_im[Prx2][2]; - simde__m64 *rp2_im[Prx2][2]; - simde__m128i llrs[pucch_pdu->prb_size*2*pucch_pdu->nr_of_symbols]; - - for (int aa=0;aa<Prx;aa++) { - for (int symb=0;symb<pucch_pdu->nr_of_symbols;symb++) { - rp_re[aa][symb] = (simde__m64*)r_re_ext[aa][symb]; - rp_im[aa][symb] = (simde__m64*)r_im_ext[aa][symb]; - rp2_re[aa][symb] = (simde__m64*)r_re_ext2[aa][symb]; - rp2_im[aa][symb] = (simde__m64*)r_im_ext2[aa][symb]; - } - } - simde__m64 prod_re[Prx2],prod_im[Prx2]; - -#ifdef DEBUG_NR_PUCCH_RX - for (int cw=0;cw<16;cw++) { - - printf("cw %d:",cw); - for (int i=0;i<4;i++) { - printf("%d,", ((int16_t *)&pucch2_polar_4bit[cw])[i >> 1]); - } - printf("\n"); - } -#endif + simde__m128i llrs[pucch_pdu->prb_size * 2 * nb_symbols]; // non-coherent LLR computation on groups of 4 REs (half-PRBs) - int32_t corr_re,corr_im,corr_tmp; - simde__m128i corr16,llr_num,llr_den; uint64_t corr = 0; - for (int symb=0;symb<pucch_pdu->nr_of_symbols;symb++) { + const simde__m128i ones = simde_mm_set1_epi16(1); + for (int symb=0;symb<nb_symbols;symb++) { for (int half_prb=0;half_prb<(2*pucch_pdu->prb_size);half_prb++) { - llr_num=simde_mm_set1_epi16(0);llr_den=simde_mm_set1_epi16(0); + simde__m128i llr_num = simde_mm_set1_epi16(0); + simde__m128i llr_den = simde_mm_set1_epi16(0); for (int cw=0;cw<256;cw++) { - corr_tmp=0; + int32_t corr_tmp=0; for (int aa=0;aa<Prx;aa++) { - prod_re[aa] = - simde_mm_srai_pi16(simde_mm_adds_pi16(simde_mm_mullo_pi16(pucch2_polar_4bit[cw & 15], rp_re[aa][symb][half_prb]), - simde_mm_mullo_pi16(pucch2_polar_4bit[cw >> 4], rp_im[aa][symb][half_prb])), - 5); - prod_im[aa] = - simde_mm_srai_pi16(simde_mm_subs_pi16(simde_mm_mullo_pi16(pucch2_polar_4bit[cw & 15], rp2_im[aa][symb][half_prb]), - simde_mm_mullo_pi16(pucch2_polar_4bit[cw >> 4], rp2_re[aa][symb][half_prb])), - 5); - prod_re[aa] = simde_mm_hadds_pi16(prod_re[aa],prod_re[aa]);// 0+1 - prod_im[aa] = simde_mm_hadds_pi16(prod_im[aa],prod_im[aa]); - prod_re[aa] = simde_mm_hadds_pi16(prod_re[aa],prod_re[aa]);// 0+1+2+3 - prod_im[aa] = simde_mm_hadds_pi16(prod_im[aa],prod_im[aa]); - + simde__m128i part1 = simde_mm_set_epi64x(0ULL, *(int64_t *)&pucch2_polar_4bit[cw & 15].cw); + simde__m128i part2 = simde_mm_set_epi64x(0ULL, *(int64_t *)&pucch2_polar_4bit[cw >> 4].cw); + simde__m128i factor = simde_mm_unpacklo_epi16(part1, part2); + simde__m128i re = *(simde__m128i *)&r_ext[aa][symb][half_prb * 4]; + simde__m128i im = *(simde__m128i *)&r_ext2[aa][symb][half_prb * 4]; + simde__m128i prod_re = simde_mm_madd_epi16(re, factor); + simde__m128i prod_im = simde_mm_madd_epi16(im, factor); + prod_re = simde_mm_hadd_epi32(prod_re, prod_re); + prod_im = simde_mm_hadd_epi32(prod_im, prod_im); + prod_re = simde_mm_hadd_epi32(prod_re, prod_re); + prod_im = simde_mm_hadd_epi32(prod_im, prod_im); + simde__m128i prod = simde_mm_srai_epi32(simde_mm_unpacklo_epi32(prod_re, prod_im), 5); + c64_t corr64 = (c64_t){corr32[symb][half_prb >> 2][aa].r / (2 * nc_group_size * 4 / 2), + corr32[symb][half_prb >> 2][aa].i / (2 * nc_group_size * 4 / 2)}; + // _mm_srai_epi64 is missing in SIMDE package, we need to update it + c64_t prod2 = {simde_mm_extract_epi32(prod, 0), simde_mm_extract_epi32(prod, 1)}; + csum(prod2, prod2, corr64); + corr_tmp += squaredMod(prod2) >> (Prx / 2); // this is for UL CQI measurement - if (cw==0) corr += ((int64_t)corr32_re[symb][half_prb>>2][aa]*corr32_re[symb][half_prb>>2][aa])+ - ((int64_t)corr32_im[symb][half_prb>>2][aa]*corr32_im[symb][half_prb>>2][aa]); - - - corr_re = ( corr32_re[symb][half_prb>>2][aa]/(2*nc_group_size*4/2)+((int16_t*)(&prod_re[aa]))[0]); - corr_im = ( corr32_im[symb][half_prb>>2][aa]/(2*nc_group_size*4/2)+((int16_t*)(&prod_im[aa]))[0]); - corr_tmp += (corr_re*corr_re + corr_im*corr_im)>>(Prx/2); - - LOG_D(PHY, - "pucch2 half_prb %d cw %d (%d,%d) aa %d: (%d,%d,%d,%d,%d,%d,%d,%d)x(%d,%d,%d,%d,%d,%d,%d,%d) (%d,%d)+(%d,%d) = " - "(%d,%d) => %d\n", - half_prb, - cw, - cw & 15, - cw >> 4, - aa, - ((int16_t *)&pucch2_polar_4bit[cw & 15])[0], - ((int16_t *)&pucch2_polar_4bit[cw >> 4])[0], - ((int16_t *)&pucch2_polar_4bit[cw & 15])[1], - ((int16_t *)&pucch2_polar_4bit[cw >> 4])[1], - ((int16_t *)&pucch2_polar_4bit[cw & 15])[2], - ((int16_t *)&pucch2_polar_4bit[cw >> 4])[2], - ((int16_t *)&pucch2_polar_4bit[cw & 15])[3], - ((int16_t *)&pucch2_polar_4bit[cw >> 4])[3], - ((int16_t *)&rp_re[aa][half_prb])[0], - ((int16_t *)&rp_im[aa][half_prb])[0], - ((int16_t *)&rp_re[aa][half_prb])[1], - ((int16_t *)&rp_im[aa][half_prb])[1], - ((int16_t *)&rp_re[aa][half_prb])[2], - ((int16_t *)&rp_im[aa][half_prb])[2], - ((int16_t *)&rp_re[aa][half_prb])[3], - ((int16_t *)&rp_im[aa][half_prb])[3], - corr32_re[symb][half_prb >> 2][aa] / (2 * nc_group_size * 4 / 2), - corr32_im[symb][half_prb >> 2][aa] / (2 * nc_group_size * 4 / 2), - ((int16_t *)(&prod_re[aa]))[0], - ((int16_t *)(&prod_im[aa]))[0], - corr_re, - corr_im, - corr_tmp); + if (cw == 0) + corr += squaredMod(corr32[symb][half_prb >> 2][aa]); } - corr16 = simde_mm_set1_epi16((int16_t)(corr_tmp >> 8)); - - LOG_D(PHY, "half_prb %d cw %d corr16 %d\n", half_prb, cw, corr_tmp >> 8); - + simde__m128i corr16 = simde_mm_set1_epi16((int16_t)(corr_tmp >> 8)); + simde__m128i den = simde_mm_xor_si128(pucch2_polar_llr_num_lut[cw], ones); llr_num = simde_mm_max_epi16(simde_mm_mullo_epi16(corr16, pucch2_polar_llr_num_lut[cw]), llr_num); - llr_den = simde_mm_max_epi16(simde_mm_mullo_epi16(corr16, pucch2_polar_llr_den_lut[cw]), llr_den); - - LOG_D(PHY, - "lut_num (%d,%d,%d,%d,%d,%d,%d,%d)\n", - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[0], - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[1], - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[2], - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[3], - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[4], - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[5], - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[6], - ((int16_t *)&pucch2_polar_llr_num_lut[cw])[7]); - - LOG_D(PHY, - "llr_num (%d,%d,%d,%d,%d,%d,%d,%d)\n", - ((int16_t *)&llr_num)[0], - ((int16_t *)&llr_num)[1], - ((int16_t *)&llr_num)[2], - ((int16_t *)&llr_num)[3], - ((int16_t *)&llr_num)[4], - ((int16_t *)&llr_num)[5], - ((int16_t *)&llr_num)[6], - ((int16_t *)&llr_num)[7]); - LOG_D(PHY, - "llr_den (%d,%d,%d,%d,%d,%d,%d,%d)\n", - ((int16_t *)&llr_den)[0], - ((int16_t *)&llr_den)[1], - ((int16_t *)&llr_den)[2], - ((int16_t *)&llr_den)[3], - ((int16_t *)&llr_den)[4], - ((int16_t *)&llr_den)[5], - ((int16_t *)&llr_den)[6], - ((int16_t *)&llr_den)[7]); + llr_den = simde_mm_max_epi16(simde_mm_mullo_epi16(corr16, den), llr_den); } // compute llrs - llrs[half_prb + (symb*2*pucch_pdu->prb_size)] = simde_mm_subs_epi16(llr_num,llr_den); - LOG_D(PHY,"llrs[%d] : (%d,%d,%d,%d,%d,%d,%d,%d)\n", - half_prb, - ((int16_t*)&llrs[half_prb])[0], - ((int16_t*)&llrs[half_prb])[1], - ((int16_t*)&llrs[half_prb])[2], - ((int16_t*)&llrs[half_prb])[3], - ((int16_t*)&llrs[half_prb])[4], - ((int16_t*)&llrs[half_prb])[5], - ((int16_t*)&llrs[half_prb])[6], - ((int16_t*)&llrs[half_prb])[7]); + llrs[half_prb + symb * 2 * pucch_pdu->prb_size] = simde_mm_subs_epi16(llr_num, llr_den); + LOG_DDUMP(PHY, llrs+half_prb + symb * 2 * pucch_pdu->prb_size, 8, LOG_DUMP_I16, "llrs:"); } // half_prb } // symb @@ -1636,10 +1294,12 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB, // Decoder reversal decodedPayload[0] = reverse_bits(decodedPayload[0], nb_bit); - if (decoderState>0) decoderState=1; + if (decoderState > 0) + decoderState = 1; corr_dB = dB_fixed64(corr); - LOG_D(PHY,"metric %d dB\n",corr_dB); - } + LOG_D(PHY, "metric %d dB\n", corr_dB); + } else + LOG_E(PHY, "PUCCH not processed: nb_bit %d decoderState %d\n", nb_bit, decoderState); LOG_D(PHY, "UCI decoderState %d, payload[0] %llu\n", decoderState, (unsigned long long)decodedPayload[0]); @@ -1647,42 +1307,46 @@ void nr_decode_pucch2(PHY_VARS_gNB *gNB, // TODO this computation is wrong -> to be ignored at MAC for now int cqi = 0xff; /*int SNRtimes10 = - dB_fixed_times10(signal_energy_nodc((int32_t *)&rxdataF[0][soffset + (l2 * frame_parms->ofdm_symbol_size) + re_offset[0]], - 12 * pucch_pdu->prb_size)) - - (10 * gNB->measurements.n0_power_tot_dB); - int cqi,bit_left; - if (SNRtimes10 < -640) cqi=0; - else if (SNRtimes10 > 635) cqi=255; - else cqi=(640+SNRtimes10)/5;*/ + dB_fixed_times10(signal_energy_nodc((int32_t *)&rxdataF[0][soffset + (l2 * frame_parms->ofdm_symbol_size) + re_offset[0]], + 12 * pucch_pdu->prb_size)) + - (10 * gNB->measurements.n0_power_tot_dB); + int cqi,bit_left; + if (SNRtimes10 < -640) cqi=0; + else if (SNRtimes10 > 635) cqi=255; + else cqi=(640+SNRtimes10)/5;*/ uci_pdu->harq.harq_bit_len = pucch_pdu->bit_len_harq; - uci_pdu->pduBitmap=0; - uci_pdu->rnti=pucch_pdu->rnti; - uci_pdu->handle=pucch_pdu->handle; - uci_pdu->pucch_format=0; - uci_pdu->ul_cqi=cqi; - uci_pdu->timing_advance=0xffff; // currently not valid - uci_pdu->rssi=1280 - (10*dB_fixed(32767*32767)-dB_fixed_times10(signal_energy_nodc(&rxdataF[0][soffset+(l2*frame_parms->ofdm_symbol_size)+re_offset[0]],12*pucch_pdu->prb_size))); - if (pucch_pdu->bit_len_harq>0) { - int harq_bytes=pucch_pdu->bit_len_harq>>3; - if ((pucch_pdu->bit_len_harq&7) > 0) harq_bytes++; - uci_pdu->pduBitmap|=2; - uci_pdu->harq.harq_payload = (uint8_t*)malloc(harq_bytes); + uci_pdu->pduBitmap = 0; + uci_pdu->rnti = pucch_pdu->rnti; + uci_pdu->handle = pucch_pdu->handle; + uci_pdu->pucch_format = 0; + uci_pdu->ul_cqi = cqi; + uci_pdu->timing_advance = 0xffff; // currently not valid + uci_pdu->rssi = + 1280 + - (10 * dB_fixed(32767 * 32767) + - dB_fixed_times10(signal_energy_nodc(&rxdataF[0][soffset + (l2 * frame_parms->ofdm_symbol_size) + re_offset[0]], + 12 * pucch_pdu->prb_size))); + if (pucch_pdu->bit_len_harq > 0) { + int harq_bytes = pucch_pdu->bit_len_harq >> 3; + if ((pucch_pdu->bit_len_harq & 7) > 0) + harq_bytes++; + uci_pdu->pduBitmap |= 2; + uci_pdu->harq.harq_payload = (uint8_t *)malloc(harq_bytes); uci_pdu->harq.harq_crc = decoderState; - LOG_D(PHY,"[DLSCH/PDSCH/PUCCH2] %d.%d HARQ bytes (%d) Decoder state %d\n", - frame,slot,harq_bytes,decoderState); - int i=0; - for (;i<harq_bytes-1;i++) { + LOG_D(PHY, "[DLSCH/PDSCH/PUCCH2] %d.%d HARQ bytes (%d) Decoder state %d\n", frame, slot, harq_bytes, decoderState); + int i = 0; + for (; i < harq_bytes - 1; i++) { uci_pdu->harq.harq_payload[i] = decodedPayload[0] & 255; LOG_D(PHY, "[DLSCH/PDSCH/PUCCH2] %d.%d HARQ payload (%d) = %d\n", frame, slot, i, uci_pdu->harq.harq_payload[i]); - decodedPayload[0]>>=8; + decodedPayload[0] >>= 8; } int bit_left = pucch_pdu->bit_len_harq - ((harq_bytes - 1) << 3); uci_pdu->harq.harq_payload[i] = decodedPayload[0] & ((1 << bit_left) - 1); LOG_D(PHY, "[DLSCH/PDSCH/PUCCH2] %d.%d HARQ payload (%d) = %d\n", frame, slot, i, uci_pdu->harq.harq_payload[i]); decodedPayload[0] >>= pucch_pdu->bit_len_harq; } - + if (pucch_pdu->sr_flag == 1) { uci_pdu->pduBitmap|=1; uci_pdu->sr.sr_bit_len = 1; diff --git a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c index bcd24fef4a8073196e7baffb7356f42081b84c39..c80bc4e5f227119ad2f729b1a699613db4565de5 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c +++ b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c @@ -86,8 +86,6 @@ void nr_det_A_MF_2x2(int32_t *a_mf_00, a_mf_10_128+=1; a_mf_11_128+=1; } - simde_mm_empty(); - simde_m_empty(); } void nr_squared_matrix_element(int32_t *a, @@ -100,8 +98,6 @@ void nr_squared_matrix_element(int32_t *a, a_sq_128+=1; a_128+=1; } - simde_mm_empty(); - simde_m_empty(); } void nr_numer_2x2(int32_t *a_00_sq, @@ -125,8 +121,6 @@ void nr_numer_2x2(int32_t *a_00_sq, a_10_sq_128+=1; a_11_sq_128+=1; } - simde_mm_empty(); - simde_m_empty(); } bool is_csi_rs_in_symbol(const fapi_nr_dl_config_csirs_pdu_rel15_t csirs_config_pdu, const int symbol) { diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c index 217a3700545fe6e021153e08075ae933375acfb5..e5442aab4833262fe971728dbbf609068d9f0b4f 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c @@ -1304,8 +1304,6 @@ static void nr_dlsch_detection_mrc(uint32_t rx_size_symbol, rho128_0[i] = simde_mm_adds_epi16(simde_mm_srai_epi16(rho128_0[i],1),simde_mm_srai_epi16(rho128_1[i],1)); }*/ } - simde_mm_empty(); - simde_m_empty(); } } @@ -1617,8 +1615,6 @@ void nr_conjch0_mult_ch1(int *ch0, dl_ch1_128+=1; ch0conj_ch1_128+=1; } - simde_mm_empty(); - simde_m_empty(); } /* diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c b/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c index b6b76ab8a863dda8a33fd6d4b0314a517172f318..3143333a3cc6356dd81898569fc2bfcf07ad19a5 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_initial_sync.c @@ -308,8 +308,8 @@ nr_initial_sync_t nr_initial_sync(UE_nr_rxtx_proc_t *proc, fp->N_RB_DL, numGscn); - task_ans_t ans[numGscn]; - memset(ans, 0, sizeof(ans)); + task_ans_t ans; + init_task_ans(&ans, numGscn); nr_ue_ssb_scan_t ssb_info[numGscn]; for (int s = 0; s < numGscn; s++) { nr_ue_ssb_scan_t *ssbInfo = &ssb_info[s]; @@ -331,7 +331,7 @@ nr_initial_sync_t nr_initial_sync(UE_nr_rxtx_proc_t *proc, ssbInfo->gscnInfo.gscn, ssbInfo->gscnInfo.ssbFirstSC, ssbInfo->gscnInfo.ssRef); - ssbInfo->ans = &ans[s]; + ssbInfo->ans = &ans; task_t t = {.func = nr_scan_ssb, .args = ssbInfo}; pushTpool(&get_nrUE_params()->Tpool, t); } @@ -339,7 +339,7 @@ nr_initial_sync_t nr_initial_sync(UE_nr_rxtx_proc_t *proc, // Collect the scan results nr_ue_ssb_scan_t res = {0}; if (numGscn > 0) { - join_task_ans(ans, numGscn); + join_task_ans(&ans); for (int i = 0; i < numGscn; i++) { nr_ue_ssb_scan_t *ssbInfo = &ssb_info[i]; if (ssbInfo->syncRes.cell_detected) { diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c b/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c index 229191269f4223dd4db2c0cff051eea0b63f28c4..1de1996126b0265f4321b871a6948f7361cf8fb6 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_pbch.c @@ -263,8 +263,6 @@ void nr_pbch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms, } } - simde_mm_empty(); - simde_m_empty(); } void nr_pbch_unscrambling(int16_t *demod_pbch_e, diff --git a/openair1/PHY/TOOLS/cadd_vv.c b/openair1/PHY/TOOLS/cadd_vv.c index f1c61cc60adc2959babb523dc980ecb13137b165..7eade29bb0f80dc28fa7f3c56aaa4da647704681 100644 --- a/openair1/PHY/TOOLS/cadd_vv.c +++ b/openair1/PHY/TOOLS/cadd_vv.c @@ -47,8 +47,6 @@ int32_t sub_cpx_vector16(int16_t *x, } - simde_mm_empty(); - simde_m_empty(); return(0); } diff --git a/openair1/PHY/TOOLS/cmult_vv.c b/openair1/PHY/TOOLS/cmult_vv.c index 6bee29551f101c327fea487dcbf22faa27483089..111bfcabcb8f6c04018b554556daf7aba0cb80e1 100644 --- a/openair1/PHY/TOOLS/cmult_vv.c +++ b/openair1/PHY/TOOLS/cmult_vv.c @@ -27,7 +27,6 @@ static const int16_t conjug[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1, static const int16_t conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ; #define simd_q15_t simde__m128i -#define simdshort_q15_t simde__m64 #define set1_int16(a) simde_mm_set1_epi16(a) #define setr_int16(a0, a1, a2, a3, a4, a5, a6, a7) simde_mm_setr_epi16(a0, a1, a2, a3, a4, a5, a6, a7 ) @@ -87,8 +86,6 @@ int mult_cpx_conj_vector(int16_t *x1, } - simde_mm_empty(); - simde_m_empty(); return(0); } @@ -150,8 +147,6 @@ int mult_cpx_vector(int16_t *x1, //Q15 x2_128++; y_128++; } - simde_mm_empty(); - simde_m_empty(); return(0); } @@ -207,7 +202,5 @@ int multadd_cpx_vector(int16_t *x1, x2_128++; y_128++; } - simde_mm_empty(); - simde_m_empty(); return(0); } diff --git a/openair1/PHY/TOOLS/invSqrt.c b/openair1/PHY/TOOLS/invSqrt.c index b3e0d9749afed846cd04f2a15aac314166fad728..fce565b1d3f33866d5c23b9215140fcde4813a7d 100644 --- a/openair1/PHY/TOOLS/invSqrt.c +++ b/openair1/PHY/TOOLS/invSqrt.c @@ -19,7 +19,7 @@ * contact@openairinterface.org */ -static short lookup_table[1025] = {0x7fff, 0x16a0, 0x1000, 0xd10, 0xb50, 0xa1e, 0x93c, 0x88d, 0x800, 0x78a, 0x727, 0x6d2, 0x688, 0x646, 0x60c, 0x5d7, 0x5a8, 0x57c, 0x555, 0x530, 0x50f, 0x4f0, 0x4d2, 0x4b7, 0x49e, 0x486, 0x470, 0x45a, 0x446, 0x433, 0x421, 0x410, 0x400, 0x3f0, 0x3e1, 0x3d3, 0x3c5, 0x3b8, 0x3ab, 0x39f, 0x393, 0x388, 0x37d, 0x373, 0x369, 0x35f, 0x356, 0x34c, 0x344, 0x33b, 0x333, 0x32b, 0x323, 0x31b, 0x314, 0x30d, 0x306, 0x2ff, 0x2f8, 0x2f2, 0x2eb, 0x2e5, 0x2df, 0x2d9, 0x2d4, 0x2ce, 0x2c9, 0x2c3, 0x2be, 0x2b9, 0x2b4, 0x2af, 0x2aa, 0x2a5, 0x2a1, 0x29c, 0x298, 0x294, 0x28f, 0x28b, 0x287, 0x283, 0x27f, 0x27b, 0x278, 0x274, 0x270, 0x26d, 0x269, 0x266, 0x262, 0x25f, 0x25b, 0x258, 0x255, 0x252, 0x24f, 0x24c, 0x249, 0x246, 0x243, 0x240, 0x23d, 0x23a, 0x238, 0x235, 0x232, 0x22f, 0x22d, 0x22a, 0x228, 0x225, 0x223, 0x220, 0x21e, 0x21c, 0x219, 0x217, 0x215, 0x213, 0x210, 0x20e, 0x20c, 0x20a, 0x208, 0x206, 0x204, 0x202, 0x200, 0x1fe, 0x1fc, 0x1fa, 0x1f8, 0x1f6, 0x1f4, 0x1f2, 0x1f0, 0x1ee, 0x1ed, 0x1eb, 0x1e9, 0x1e7, 0x1e6, 0x1e4, 0x1e2, 0x1e1, 0x1df, 0x1dd, 0x1dc, 0x1da, 0x1d8, 0x1d7, 0x1d5, 0x1d4, 0x1d2, 0x1d1, 0x1cf, 0x1ce, 0x1cc, 0x1cb, 0x1c9, 0x1c8, 0x1c7, 0x1c5, 0x1c4, 0x1c2, 0x1c1, 0x1c0, 0x1be, 0x1bd, 0x1bc, 0x1ba, 0x1b9, 0x1b8, 0x1b7, 0x1b5, 0x1b4, 0x1b3, 0x1b2, 0x1b0, 0x1af, 0x1ae, 0x1ad, 0x1ac, 0x1ab, 0x1a9, 0x1a8, 0x1a7, 0x1a6, 0x1a5, 0x1a4, 0x1a3, 0x1a2, 0x1a0, 0x19f, 0x19e, 0x19d, 0x19c, 0x19b, 0x19a, 0x199, 0x198, 0x197, 0x196, 0x195, 0x194, 0x193, 0x192, 0x191, 0x190, 0x18f, 0x18e, 0x18d, 0x18c, 0x18b, 0x18b, 0x18a, 0x189, 0x188, 0x187, 0x186, 0x185, 0x184, 0x183, 0x183, 0x182, 0x181, 0x180, 0x17f, 0x17e, 0x17d, 0x17d, 0x17c, 0x17b, 0x17a, 0x179, 0x179, 0x178, 0x177, 0x176, 0x175, 0x175, 0x174, 0x173, 0x172, 0x172, 0x171, 0x170, 0x16f, 0x16f, 0x16e, 0x16d, 0x16c, 0x16c, 0x16b, 0x16a, 0x16a, 0x169, 0x168, 0x167, 0x167, 0x166, 0x165, 0x165, 0x164, 0x163, 0x163, 0x162, 0x161, 0x161, 0x160, 0x15f, 0x15f, 0x15e, 0x15d, 0x15d, 0x15c, 0x15c, 0x15b, 0x15a, 0x15a, 0x159, 0x158, 0x158, 0x157, 0x157, 0x156, 0x155, 0x155, 0x154, 0x154, 0x153, 0x152, 0x152, 0x151, 0x151, 0x150, 0x150, 0x14f, 0x14e, 0x14e, 0x14d, 0x14d, 0x14c, 0x14c, 0x14b, 0x14b, 0x14a, 0x14a, 0x149, 0x148, 0x148, 0x147, 0x147, 0x146, 0x146, 0x145, 0x145, 0x144, 0x144, 0x143, 0x143, 0x142, 0x142, 0x141, 0x141, 0x140, 0x140, 0x13f, 0x13f, 0x13e, 0x13e, 0x13d, 0x13d, 0x13c, 0x13c, 0x13c, 0x13b, 0x13b, 0x13a, 0x13a, 0x139, 0x139, 0x138, 0x138, 0x137, 0x137, 0x136, 0x136, 0x136, 0x135, 0x135, 0x134, 0x134, 0x133, 0x133, 0x133, 0x132, 0x132, 0x131, 0x131, 0x130, 0x130, 0x130, 0x12f, 0x12f, 0x12e, 0x12e, 0x12d, 0x12d, 0x12d, 0x12c, 0x12c, 0x12b, 0x12b, 0x12b, 0x12a, 0x12a, 0x129, 0x129, 0x129, 0x128, 0x128, 0x127, 0x127, 0x127, 0x126, 0x126, 0x126, 0x125, 0x125, 0x124, 0x124, 0x124, 0x123, 0x123, 0x123, 0x122, 0x122, 0x121, 0x121, 0x121, 0x120, 0x120, 0x120, 0x11f, 0x11f, 0x11f, 0x11e, 0x11e, 0x11e, 0x11d, 0x11d, 0x11d, 0x11c, 0x11c, 0x11c, 0x11b, 0x11b, 0x11a, 0x11a, 0x11a, 0x119, 0x119, 0x119, 0x118, 0x118, 0x118, 0x117, 0x117, 0x117, 0x117, 0x116, 0x116, 0x116, 0x115, 0x115, 0x115, 0x114, 0x114, 0x114, 0x113, 0x113, 0x113, 0x112, 0x112, 0x112, 0x111, 0x111, 0x111, 0x111, 0x110, 0x110, 0x110, 0x10f, 0x10f, 0x10f, 0x10e, 0x10e, 0x10e, 0x10e, 0x10d, 0x10d, 0x10d, 0x10c, 0x10c, 0x10c, 0x10c, 0x10b, 0x10b, 0x10b, 0x10a, 0x10a, 0x10a, 0x10a, 0x109, 0x109, 0x109, 0x108, 0x108, 0x108, 0x108, 0x107, 0x107, 0x107, 0x107, 0x106, 0x106, 0x106, 0x105, 0x105, 0x105, 0x105, 0x104, 0x104, 0x104, 0x104, 0x103, 0x103, 0x103, 0x103, 0x102, 0x102, 0x102, 0x102, 0x101, 0x101, 0x101, 0x101, 0x100, 0x100, 0x100, 0x100, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfd, 0xfd, 0xfd, 0xfd, 0xfc, 0xfc, 0xfc, 0xfc, 0xfb, 0xfb, 0xfb, 0xfb, 0xfa, 0xfa, 0xfa, 0xfa, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf7, 0xf7, 0xf7, 0xf7, 0xf6, 0xf6, 0xf6, 0xf6, 0xf6, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf4, 0xf4, 0xf4, 0xf4, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3, 0xf2, 0xf2, 0xf2, 0xf2, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xef, 0xef, 0xef, 0xef, 0xef, 0xee, 0xee, 0xee, 0xee, 0xee, 0xed, 0xed, 0xed, 0xed, 0xed, 0xec, 0xec, 0xec, 0xec, 0xec, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xea, 0xea, 0xea, 0xea, 0xea, 0xe9, 0xe9, 0xe9, 0xe9, 0xe9, 0xe9, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe7, 0xe7, 0xe7, 0xe7, 0xe7, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe4, 0xe4, 0xe4, 0xe4, 0xe4, 0xe4, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe2, 0xe2, 0xe2, 0xe2, 0xe2, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd7, 0xd7, 0xd7, 0xd7, 0xd7, 0xd7, 0xd6, 0xd6, 0xd6, 0xd6, 0xd6, 0xd6, 0xd6, 0xd5, 0xd5, 0xd5, 0xd5, 0xd5, 0xd5, 0xd5, 0xd4, 0xd4, 0xd4, 0xd4, 0xd4, 0xd4, 0xd4, 0xd3, 0xd3, 0xd3, 0xd3, 0xd3, 0xd3, 0xd3, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xce, 0xce, 0xce, 0xce, 0xce, 0xce, 0xce, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5}; +static const short lookup_table[1025] = {0x7fff, 0x16a0, 0x1000, 0xd10, 0xb50, 0xa1e, 0x93c, 0x88d, 0x800, 0x78a, 0x727, 0x6d2, 0x688, 0x646, 0x60c, 0x5d7, 0x5a8, 0x57c, 0x555, 0x530, 0x50f, 0x4f0, 0x4d2, 0x4b7, 0x49e, 0x486, 0x470, 0x45a, 0x446, 0x433, 0x421, 0x410, 0x400, 0x3f0, 0x3e1, 0x3d3, 0x3c5, 0x3b8, 0x3ab, 0x39f, 0x393, 0x388, 0x37d, 0x373, 0x369, 0x35f, 0x356, 0x34c, 0x344, 0x33b, 0x333, 0x32b, 0x323, 0x31b, 0x314, 0x30d, 0x306, 0x2ff, 0x2f8, 0x2f2, 0x2eb, 0x2e5, 0x2df, 0x2d9, 0x2d4, 0x2ce, 0x2c9, 0x2c3, 0x2be, 0x2b9, 0x2b4, 0x2af, 0x2aa, 0x2a5, 0x2a1, 0x29c, 0x298, 0x294, 0x28f, 0x28b, 0x287, 0x283, 0x27f, 0x27b, 0x278, 0x274, 0x270, 0x26d, 0x269, 0x266, 0x262, 0x25f, 0x25b, 0x258, 0x255, 0x252, 0x24f, 0x24c, 0x249, 0x246, 0x243, 0x240, 0x23d, 0x23a, 0x238, 0x235, 0x232, 0x22f, 0x22d, 0x22a, 0x228, 0x225, 0x223, 0x220, 0x21e, 0x21c, 0x219, 0x217, 0x215, 0x213, 0x210, 0x20e, 0x20c, 0x20a, 0x208, 0x206, 0x204, 0x202, 0x200, 0x1fe, 0x1fc, 0x1fa, 0x1f8, 0x1f6, 0x1f4, 0x1f2, 0x1f0, 0x1ee, 0x1ed, 0x1eb, 0x1e9, 0x1e7, 0x1e6, 0x1e4, 0x1e2, 0x1e1, 0x1df, 0x1dd, 0x1dc, 0x1da, 0x1d8, 0x1d7, 0x1d5, 0x1d4, 0x1d2, 0x1d1, 0x1cf, 0x1ce, 0x1cc, 0x1cb, 0x1c9, 0x1c8, 0x1c7, 0x1c5, 0x1c4, 0x1c2, 0x1c1, 0x1c0, 0x1be, 0x1bd, 0x1bc, 0x1ba, 0x1b9, 0x1b8, 0x1b7, 0x1b5, 0x1b4, 0x1b3, 0x1b2, 0x1b0, 0x1af, 0x1ae, 0x1ad, 0x1ac, 0x1ab, 0x1a9, 0x1a8, 0x1a7, 0x1a6, 0x1a5, 0x1a4, 0x1a3, 0x1a2, 0x1a0, 0x19f, 0x19e, 0x19d, 0x19c, 0x19b, 0x19a, 0x199, 0x198, 0x197, 0x196, 0x195, 0x194, 0x193, 0x192, 0x191, 0x190, 0x18f, 0x18e, 0x18d, 0x18c, 0x18b, 0x18b, 0x18a, 0x189, 0x188, 0x187, 0x186, 0x185, 0x184, 0x183, 0x183, 0x182, 0x181, 0x180, 0x17f, 0x17e, 0x17d, 0x17d, 0x17c, 0x17b, 0x17a, 0x179, 0x179, 0x178, 0x177, 0x176, 0x175, 0x175, 0x174, 0x173, 0x172, 0x172, 0x171, 0x170, 0x16f, 0x16f, 0x16e, 0x16d, 0x16c, 0x16c, 0x16b, 0x16a, 0x16a, 0x169, 0x168, 0x167, 0x167, 0x166, 0x165, 0x165, 0x164, 0x163, 0x163, 0x162, 0x161, 0x161, 0x160, 0x15f, 0x15f, 0x15e, 0x15d, 0x15d, 0x15c, 0x15c, 0x15b, 0x15a, 0x15a, 0x159, 0x158, 0x158, 0x157, 0x157, 0x156, 0x155, 0x155, 0x154, 0x154, 0x153, 0x152, 0x152, 0x151, 0x151, 0x150, 0x150, 0x14f, 0x14e, 0x14e, 0x14d, 0x14d, 0x14c, 0x14c, 0x14b, 0x14b, 0x14a, 0x14a, 0x149, 0x148, 0x148, 0x147, 0x147, 0x146, 0x146, 0x145, 0x145, 0x144, 0x144, 0x143, 0x143, 0x142, 0x142, 0x141, 0x141, 0x140, 0x140, 0x13f, 0x13f, 0x13e, 0x13e, 0x13d, 0x13d, 0x13c, 0x13c, 0x13c, 0x13b, 0x13b, 0x13a, 0x13a, 0x139, 0x139, 0x138, 0x138, 0x137, 0x137, 0x136, 0x136, 0x136, 0x135, 0x135, 0x134, 0x134, 0x133, 0x133, 0x133, 0x132, 0x132, 0x131, 0x131, 0x130, 0x130, 0x130, 0x12f, 0x12f, 0x12e, 0x12e, 0x12d, 0x12d, 0x12d, 0x12c, 0x12c, 0x12b, 0x12b, 0x12b, 0x12a, 0x12a, 0x129, 0x129, 0x129, 0x128, 0x128, 0x127, 0x127, 0x127, 0x126, 0x126, 0x126, 0x125, 0x125, 0x124, 0x124, 0x124, 0x123, 0x123, 0x123, 0x122, 0x122, 0x121, 0x121, 0x121, 0x120, 0x120, 0x120, 0x11f, 0x11f, 0x11f, 0x11e, 0x11e, 0x11e, 0x11d, 0x11d, 0x11d, 0x11c, 0x11c, 0x11c, 0x11b, 0x11b, 0x11a, 0x11a, 0x11a, 0x119, 0x119, 0x119, 0x118, 0x118, 0x118, 0x117, 0x117, 0x117, 0x117, 0x116, 0x116, 0x116, 0x115, 0x115, 0x115, 0x114, 0x114, 0x114, 0x113, 0x113, 0x113, 0x112, 0x112, 0x112, 0x111, 0x111, 0x111, 0x111, 0x110, 0x110, 0x110, 0x10f, 0x10f, 0x10f, 0x10e, 0x10e, 0x10e, 0x10e, 0x10d, 0x10d, 0x10d, 0x10c, 0x10c, 0x10c, 0x10c, 0x10b, 0x10b, 0x10b, 0x10a, 0x10a, 0x10a, 0x10a, 0x109, 0x109, 0x109, 0x108, 0x108, 0x108, 0x108, 0x107, 0x107, 0x107, 0x107, 0x106, 0x106, 0x106, 0x105, 0x105, 0x105, 0x105, 0x104, 0x104, 0x104, 0x104, 0x103, 0x103, 0x103, 0x103, 0x102, 0x102, 0x102, 0x102, 0x101, 0x101, 0x101, 0x101, 0x100, 0x100, 0x100, 0x100, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfd, 0xfd, 0xfd, 0xfd, 0xfc, 0xfc, 0xfc, 0xfc, 0xfb, 0xfb, 0xfb, 0xfb, 0xfa, 0xfa, 0xfa, 0xfa, 0xf9, 0xf9, 0xf9, 0xf9, 0xf9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf7, 0xf7, 0xf7, 0xf7, 0xf6, 0xf6, 0xf6, 0xf6, 0xf6, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf4, 0xf4, 0xf4, 0xf4, 0xf3, 0xf3, 0xf3, 0xf3, 0xf3, 0xf2, 0xf2, 0xf2, 0xf2, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xef, 0xef, 0xef, 0xef, 0xef, 0xee, 0xee, 0xee, 0xee, 0xee, 0xed, 0xed, 0xed, 0xed, 0xed, 0xec, 0xec, 0xec, 0xec, 0xec, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xea, 0xea, 0xea, 0xea, 0xea, 0xe9, 0xe9, 0xe9, 0xe9, 0xe9, 0xe9, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe7, 0xe7, 0xe7, 0xe7, 0xe7, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe4, 0xe4, 0xe4, 0xe4, 0xe4, 0xe4, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe2, 0xe2, 0xe2, 0xe2, 0xe2, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe1, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd7, 0xd7, 0xd7, 0xd7, 0xd7, 0xd7, 0xd6, 0xd6, 0xd6, 0xd6, 0xd6, 0xd6, 0xd6, 0xd5, 0xd5, 0xd5, 0xd5, 0xd5, 0xd5, 0xd5, 0xd4, 0xd4, 0xd4, 0xd4, 0xd4, 0xd4, 0xd4, 0xd3, 0xd3, 0xd3, 0xd3, 0xd3, 0xd3, 0xd3, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd2, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xce, 0xce, 0xce, 0xce, 0xce, 0xce, 0xce, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xcb, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xca, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc5, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc4, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc1, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbe, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbd, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbc, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb9, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb8, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb7, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb6, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5, 0xb5}; short invSqrt(short x) { diff --git a/openair1/PHY/TOOLS/oai_dfts.c b/openair1/PHY/TOOLS/oai_dfts.c index dcb206b5a40e3f2adb0f8fbcfa5d4611c1c19882..36bae5bd3ddea5c9781fd6070f746650226fdfd9 100644 --- a/openair1/PHY/TOOLS/oai_dfts.c +++ b/openair1/PHY/TOOLS/oai_dfts.c @@ -1513,7 +1513,6 @@ const static int16_t tw64c[96] __attribute__((aligned(32))) = { -12539,-30273,-20787,-25330,-27244,-18205,-31356,-9512 }; #define simd_q15_t simde__m128i -#define simdshort_q15_t simde__m64 #define shiftright_int16(a,shift) simde_mm_srai_epi16(a,shift) #define mulhi_int16(a,b) simde_mm_mulhrs_epi16 (a,b) #define simd256_q15_t simde__m256i @@ -1661,8 +1660,6 @@ void dft64(int16_t *x,int16_t *y,unsigned char scale) y256[7] = shiftright_int16_simd256(y256[7], 1); } - simde_mm_empty(); - simde_m_empty(); } void idft64(int16_t *x,int16_t *y,unsigned char scale) @@ -1760,8 +1757,6 @@ void idft64(int16_t *x,int16_t *y,unsigned char scale) y256[7] = shiftright_int16_simd256(y256[7],3); } - simde_mm_empty(); - simde_m_empty(); } static const int16_t tw128[128] __attribute__((aligned(32))) = { 32767,0,32727,-1608,32609,-3212,32412,-4808,32137,-6393,31785,-7962,31356,-9512,30851,-11039,30272,-12540,29621,-14010,28897,-15447,28105,-16846,27244,-18205,26318,-19520,25329,-20788,24278,-22005,23169,-23170,22004,-24279,20787,-25330,19519,-26319,18204,-27245,16845,-28106,15446,-28898,14009,-29622,12539,-30273,11038,-30852,9511,-31357,7961,-31786,6392,-32138,4807,-32413,3211,-32610,1607,-32728,0,-32767,-1608,-32728,-3212,-32610,-4808,-32413,-6393,-32138,-7962,-31786,-9512,-31357,-11039,-30852,-12540,-30273,-14010,-29622,-15447,-28898,-16846,-28106,-18205,-27245,-19520,-26319,-20788,-25330,-22005,-24279,-23170,-23170,-24279,-22005,-25330,-20788,-26319,-19520,-27245,-18205,-28106,-16846,-28898,-15447,-29622,-14010,-30273,-12540,-30852,-11039,-31357,-9512,-31786,-7962,-32138,-6393,-32413,-4808,-32610,-3212,-32728,-1608}; @@ -1998,8 +1993,6 @@ void dft256(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } void idft256(int16_t *x,int16_t *y,unsigned char scale) @@ -2082,8 +2075,6 @@ void idft256(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } static const int16_t tw512[512] __attribute__((aligned(32))) = { @@ -2307,8 +2298,6 @@ void dft1024(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } void idft1024(int16_t *x,int16_t *y,unsigned char scale) @@ -2362,8 +2351,6 @@ void idft1024(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } int16_t tw2048[2048] __attribute__((aligned(32))); @@ -2454,8 +2441,6 @@ void dft2048(int16_t *x,int16_t *y,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } void idft2048(int16_t *x,int16_t *y,unsigned char scale) @@ -2543,8 +2528,6 @@ void idft2048(int16_t *x,int16_t *y,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } int16_t tw4096[3*2*1024]; @@ -2600,8 +2583,6 @@ void dft4096(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } void idft4096(int16_t *x,int16_t *y,unsigned char scale) @@ -2655,8 +2636,6 @@ void idft4096(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } int16_t tw8192[2*4096] __attribute__((aligned(32))); @@ -2747,8 +2726,6 @@ void dft8192(int16_t *x,int16_t *y,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } void idft8192(int16_t *x,int16_t *y,unsigned char scale) @@ -2836,8 +2813,6 @@ void idft8192(int16_t *x,int16_t *y,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } int16_t tw16384[3*2*4096] __attribute__((aligned(32))); @@ -2893,8 +2868,6 @@ void dft16384(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } void idft16384(int16_t *x,int16_t *y,unsigned char scale) @@ -2948,8 +2921,6 @@ void idft16384(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } int16_t tw32768[2*16384] __attribute__((aligned(32))); @@ -3040,8 +3011,6 @@ void dft32768(int16_t *x,int16_t *y,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } void idft32768(int16_t *x,int16_t *y,unsigned char scale) @@ -3129,8 +3098,6 @@ void idft32768(int16_t *x,int16_t *y,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } int16_t twa768[512],twb768[512]; @@ -3183,8 +3150,6 @@ void idft768(int16_t *input, int16_t *output, unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } void dft768(int16_t *input, int16_t *output, unsigned char scale) @@ -3246,8 +3211,6 @@ void dft768(int16_t *input, int16_t *output, unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } int16_t twa1536[1024],twb1536[1024]; @@ -3299,8 +3262,6 @@ void idft1536(int16_t *input, int16_t *output, unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } void dft1536(int16_t *input, int16_t *output, unsigned char scale) @@ -3362,8 +3323,6 @@ void dft1536(int16_t *input, int16_t *output, unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } int16_t twa3072[2048] __attribute__((aligned(32))); @@ -3415,8 +3374,6 @@ void dft3072(int16_t *input, int16_t *output,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } void idft3072(int16_t *input, int16_t *output,unsigned char scale) @@ -3465,8 +3422,6 @@ void idft3072(int16_t *input, int16_t *output,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } @@ -3527,8 +3482,6 @@ void idft6144(int16_t *input, int16_t *output,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } @@ -3590,8 +3543,6 @@ void dft6144(int16_t *input, int16_t *output,unsigned char scale) y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } int16_t twa12288[8192] __attribute__((aligned(32))); @@ -3654,8 +3605,6 @@ void dft12288(int16_t *input, int16_t *output,unsigned char scale) y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } void idft12288(int16_t *input, int16_t *output,unsigned char scale) @@ -3712,8 +3661,6 @@ void idft12288(int16_t *input, int16_t *output,unsigned char scale) y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); #ifndef MR_MAIN if (LOG_DUMPFLAG(DEBUG_DFT)) { LOG_M("idft12288out.m","out",output,6144,1,1); @@ -3768,8 +3715,6 @@ void dft18432(int16_t *input, int16_t *output,unsigned char scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } void idft18432(int16_t *input, int16_t *output,unsigned char scale) { @@ -3816,8 +3761,6 @@ void idft18432(int16_t *input, int16_t *output,unsigned char scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } @@ -3882,8 +3825,6 @@ void dft24576(int16_t *input, int16_t *output,unsigned char scale) y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); #ifndef MR_MAIN if (LOG_DUMPFLAG(DEBUG_DFT)) { LOG_M("out.m","out",output,24576,1,1); @@ -3942,8 +3883,6 @@ void idft24576(int16_t *input, int16_t *output,unsigned char scale) y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); #ifndef MR_MAIN if (LOG_DUMPFLAG(DEBUG_DFT)) { LOG_M("idft24576out.m","out",output,24576,1,1); @@ -4006,8 +3945,6 @@ void dft36864(int16_t *input, int16_t *output,uint8_t scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); #ifndef MR_MAIN if (LOG_DUMPFLAG(DEBUG_DFT)) { LOG_M("out.m","out",output,36864,1,1); @@ -4059,8 +3996,6 @@ void idft36864(int16_t *input, int16_t *output,uint8_t scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } int16_t twa49152[32768] __attribute__((aligned(32))); @@ -4111,8 +4046,6 @@ void dft49152(int16_t *input, int16_t *output,uint8_t scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } void idft49152(int16_t *input, int16_t *output,uint8_t scale) { @@ -4159,8 +4092,6 @@ void idft49152(int16_t *input, int16_t *output,uint8_t scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } int16_t tw65536[3*2*16384] __attribute__((aligned(32))); @@ -4216,8 +4147,6 @@ void idft65536(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } int16_t twa98304[65536] __attribute__((aligned(32))); @@ -4267,8 +4196,6 @@ void dft98304(int16_t *input, int16_t *output,uint8_t scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } void idft98304(int16_t *input, int16_t *output,uint8_t scale) { @@ -4315,8 +4242,6 @@ void idft98304(int16_t *input, int16_t *output,uint8_t scale) { y128p+=16; } } - simde_mm_empty(); - simde_m_empty(); } @@ -4470,8 +4395,6 @@ void dft12(int16_t *x,int16_t *y ,unsigned char scale_flag) &y128[10], &y128[11]); - simde_mm_empty(); - simde_m_empty(); } static const int16_t W1_12s_256[16] __attribute__((aligned(32))) = @@ -4641,8 +4564,6 @@ void dft12_simd256(int16_t *x,int16_t *y) &y256[10], &y256[11]); - simde_mm_empty(); - simde_m_empty(); } static int16_t tw24[88]__attribute__((aligned(32))); @@ -4735,8 +4656,6 @@ void dft24(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa36[88]__attribute__((aligned(32))); @@ -4857,8 +4776,6 @@ void dft36(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa48[88]__attribute__((aligned(32))); @@ -5017,8 +4934,6 @@ void dft48(int16_t *x, int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa60[88]__attribute__((aligned(32))); @@ -5201,8 +5116,6 @@ void dft60(int16_t *x,int16_t *y,unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t tw72[280]__attribute__((aligned(32))); @@ -5244,8 +5157,6 @@ void dft72(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t tw96[376]__attribute__((aligned(32))); @@ -5289,8 +5200,6 @@ void dft96(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa108[280]__attribute__((aligned(32))); @@ -5339,8 +5248,6 @@ void dft108(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t tw120[472]__attribute__((aligned(32))); @@ -5380,8 +5287,6 @@ void dft120(int16_t *x,int16_t *y, unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa144[376]__attribute__((aligned(32))); @@ -5430,8 +5335,6 @@ void dft144(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa180[472]__attribute__((aligned(32))); @@ -5481,8 +5384,6 @@ void dft180(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa192[376]__attribute__((aligned(32))); @@ -5539,8 +5440,6 @@ void dft192(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa216[568]__attribute__((aligned(32))); @@ -5590,8 +5489,6 @@ void dft216(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa240[472]__attribute__((aligned(32))); @@ -5648,8 +5545,6 @@ void dft240(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa288[760]__attribute__((aligned(32))); @@ -5699,8 +5594,6 @@ void dft288(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa300[472]__attribute__((aligned(32))); @@ -5764,8 +5657,6 @@ void dft300(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa324[107*2*4]; @@ -5814,8 +5705,6 @@ void dft324(int16_t *x,int16_t *y,unsigned char scale_flag) // 108 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa360[119*2*4]; @@ -5864,8 +5753,6 @@ void dft360(int16_t *x,int16_t *y,unsigned char scale_flag) // 120 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa384[95*2*4]; @@ -5921,8 +5808,6 @@ void dft384(int16_t *x,int16_t *y,unsigned char scale_flag) // 96 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa432[107*2*4]; @@ -5977,8 +5862,6 @@ void dft432(int16_t *x,int16_t *y,unsigned char scale_flag) // 108 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa480[119*2*4]; static int16_t twb480[119*2*4]; @@ -6033,8 +5916,6 @@ void dft480(int16_t *x,int16_t *y,unsigned char scale_flag) // 120 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; @@ -6084,8 +5965,6 @@ void dft540(int16_t *x,int16_t *y,unsigned char scale_flag) // 180 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa576[191*2*4]; @@ -6135,8 +6014,6 @@ void dft576(int16_t *x,int16_t *y,unsigned char scale_flag) // 192 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; @@ -6179,8 +6056,6 @@ void dft600(int16_t *x,int16_t *y,unsigned char scale_flag) // 300 x 2 } } - simde_mm_empty(); - simde_m_empty(); }; @@ -6230,8 +6105,6 @@ void dft648(int16_t *x,int16_t *y,unsigned char scale_flag) // 216 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; @@ -6289,8 +6162,6 @@ void dft720(int16_t *x,int16_t *y,unsigned char scale_flag) // 180 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa768p[191*2*4]; @@ -6346,8 +6217,6 @@ void dft768p(int16_t *x,int16_t *y,unsigned char scale_flag) { // 192x 4; } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa384i[256]; @@ -6400,8 +6269,6 @@ void idft384(int16_t *input, int16_t *output, unsigned char scale) } } - simde_mm_empty(); - simde_m_empty(); } @@ -6451,8 +6318,6 @@ void dft864(int16_t *x,int16_t *y,unsigned char scale_flag) // 288 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa900[299*2*4]; @@ -6501,8 +6366,6 @@ void dft900(int16_t *x,int16_t *y,unsigned char scale_flag) // 300 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; @@ -6560,8 +6423,6 @@ void dft960(int16_t *x,int16_t *y,unsigned char scale_flag) // 240 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; @@ -6611,8 +6472,6 @@ void dft972(int16_t *x,int16_t *y,unsigned char scale_flag) // 324 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa1080[359*2*4]; @@ -6661,8 +6520,6 @@ void dft1080(int16_t *x,int16_t *y,unsigned char scale_flag) // 360 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa1152[287*2*4]; @@ -6719,8 +6576,6 @@ void dft1152(int16_t *x,int16_t *y,unsigned char scale_flag) // 288 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; int16_t twa1200[4784]; @@ -6776,8 +6631,6 @@ void dft1200(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } @@ -6828,8 +6681,6 @@ void dft1296(int16_t *x,int16_t *y,unsigned char scale_flag) //432 * 3 } } - simde_mm_empty(); - simde_m_empty(); }; @@ -6879,8 +6730,6 @@ void dft1440(int16_t *x,int16_t *y,unsigned char scale_flag) // 480 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa1500[2392]__attribute__((aligned(32))); @@ -6944,8 +6793,6 @@ void dft1500(int16_t *x,int16_t *y,unsigned char scale_flag) } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa1620[539*2*4]; @@ -6994,8 +6841,6 @@ void dft1620(int16_t *x,int16_t *y,unsigned char scale_flag) // 540 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa1728[575*2*4]; @@ -7044,8 +6889,6 @@ void dft1728(int16_t *x,int16_t *y,unsigned char scale_flag) // 576 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa1800[599*2*4]; @@ -7094,8 +6937,6 @@ void dft1800(int16_t *x,int16_t *y,unsigned char scale_flag) // 600 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa1920[479*2*4]; @@ -7150,8 +6991,6 @@ void dft1920(int16_t *x,int16_t *y,unsigned char scale_flag) // 480 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa1944[647*2*4]; @@ -7200,8 +7039,6 @@ void dft1944(int16_t *x,int16_t *y,unsigned char scale_flag) // 648 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa2160[719*2*4]; @@ -7250,8 +7087,6 @@ void dft2160(int16_t *x,int16_t *y,unsigned char scale_flag) // 720 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa2304[767*2*4]; @@ -7300,8 +7135,6 @@ void dft2304(int16_t *x,int16_t *y,unsigned char scale_flag) // 768 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa2400[599*2*4]; @@ -7357,8 +7190,6 @@ void dft2400(int16_t *x,int16_t *y,unsigned char scale_flag) // 600 x 4 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa2592[863*2*4]; @@ -7407,8 +7238,6 @@ void dft2592(int16_t *x,int16_t *y,unsigned char scale_flag) // 864 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa2700[899*2*4]; @@ -7457,8 +7286,6 @@ void dft2700(int16_t *x,int16_t *y,unsigned char scale_flag) // 900 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa2880[959*2*4]; @@ -7507,8 +7334,6 @@ void dft2880(int16_t *x,int16_t *y,unsigned char scale_flag) // 960 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa2916[971*2*4]; @@ -7557,8 +7382,6 @@ void dft2916(int16_t *x,int16_t *y,unsigned char scale_flag) // 972 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; static int16_t twa3000[599*8]__attribute__((aligned(32))); @@ -7622,8 +7445,6 @@ void dft3000(int16_t *x,int16_t *y,unsigned char scale_flag) // 600 * 5 } } - simde_mm_empty(); - simde_m_empty(); } static int16_t twa3240[1079*2*4]; @@ -7672,8 +7493,6 @@ void dft3240(int16_t *x,int16_t *y,unsigned char scale_flag) // 1080 x 3 } } - simde_mm_empty(); - simde_m_empty(); }; void init_rad4(int N,int16_t *tw) { diff --git a/openair1/PHY/TOOLS/oai_dfts_neon.c b/openair1/PHY/TOOLS/oai_dfts_neon.c index f3a2e5cb46a0bb264a7b6ee0410dfe00cb2ced45..ddf8a59bf524dd99c0a44cb08d535c86a115abeb 100644 --- a/openair1/PHY/TOOLS/oai_dfts_neon.c +++ b/openair1/PHY/TOOLS/oai_dfts_neon.c @@ -909,9 +909,6 @@ const static int16_t tw64c[96] __attribute__((aligned(32))) = { #ifdef simd_q15_t #undef simd_q15_t #endif -#ifdef simdshort_q15_t -#undef simdshort_q15_t -#endif #ifdef shiftright_int16 #undef shiftright_int16 #endif @@ -3633,8 +3630,6 @@ void idft65536(int16_t *x,int16_t *y,unsigned char scale) } - simde_mm_empty(); - simde_m_empty(); } int16_t twa98304[65536] __attribute__((aligned(32))); diff --git a/openair1/PHY/TOOLS/signal_energy.c b/openair1/PHY/TOOLS/signal_energy.c index 012677e8bb0ac54c5257ef3d62fd0af738344f93..93544da75883f13f89648e72d06026eb25b7b48a 100644 --- a/openair1/PHY/TOOLS/signal_energy.c +++ b/openair1/PHY/TOOLS/signal_energy.c @@ -65,52 +65,6 @@ int32_t signal_energy(int32_t *input,uint32_t length) return temp; } -int32_t signal_energy_amp_shift(int32_t *input,uint32_t length) -{ - - int32_t i; - int32_t temp,temp2; - register simde__m64 mm0,mm1,mm2,mm3; - simde__m64 *in = (simde__m64 *)input; - - mm0 = simde_mm_setzero_si64(); - mm3 = simde_mm_setzero_si64(); - - for (i=0; i<length>>1; i++) { - - mm1 = in[i]; - mm2 = mm1; - mm1 = simde_m_pmaddwd(mm1,mm1); - mm1 = simde_m_psradi(mm1,AMP_SHIFT);// shift any 32 bits blocs of the word by the value shift_p9 - mm0 = simde_m_paddd(mm0,mm1);// add the two 64 bits words 4 bytes by 4 bytes - mm3 = simde_m_paddw(mm3,mm2);// add the two 64 bits words 2 bytes by 2 bytes - } - - mm1 = mm0; - mm0 = simde_m_psrlqi(mm0,32); - mm0 = simde_m_paddd(mm0,mm1); - temp = simde_m_to_int(mm0); - temp/=length; // this is the average of x^2 - - - // now remove the DC component - - - mm2 = simde_m_psrlqi(mm3,32); - mm2 = simde_m_paddw(mm2,mm3); - mm2 = simde_m_pmaddwd(mm2,mm2); - mm2 = simde_m_psradi(mm2,AMP_SHIFT); // fixed point representation of elements - temp2 = simde_m_to_int(mm2); - temp2/=(length*length); - - temp -= temp2; - - simde_mm_empty(); - simde_m_empty(); - - return((temp>0)?temp:1); -} - uint32_t signal_energy_nodc(const c16_t *input, uint32_t length) { // init diff --git a/openair1/PHY/TOOLS/simde_operations.c b/openair1/PHY/TOOLS/simde_operations.c index 03a611e08dd69f212e0fe34e9a5f3385be83f028..f0dd306033f91298375948028e9bc4100c2e49d7 100644 --- a/openair1/PHY/TOOLS/simde_operations.c +++ b/openair1/PHY/TOOLS/simde_operations.c @@ -57,4 +57,4 @@ void simde_mm256_separate_real_imag_parts(simde__m256i *out_re, simde__m256i *ou *out_re = simde_mm256_permute4x64_epi64(tmp0, 0xd8); *out_im = simde_mm256_permute4x64_epi64(tmp1, 0xd8); -} \ No newline at end of file +} diff --git a/openair1/PHY/TOOLS/tools_defs.h b/openair1/PHY/TOOLS/tools_defs.h index 41af6ba0a95c8ecd065c4d0e5208bf0035aa0e41..d7e738a9f44fc5cc39f8c25013187d8f61425c33 100644 --- a/openair1/PHY/TOOLS/tools_defs.h +++ b/openair1/PHY/TOOLS/tools_defs.h @@ -41,7 +41,6 @@ #include "common/utils/LOG/log.h" #define simd_q15_t simde__m128i -#define simdshort_q15_t simde__m64 #define shiftright_int16(a,shift) simde_mm_srai_epi16(a,shift) #define set1_int16(a) simde_mm_set1_epi16(a) #define mulhi_int16(a,b) simde_mm_mulhrs_epi16 (a,b) @@ -727,8 +726,6 @@ int32_t sub_cpx_vector16(int16_t *x, */ int32_t signal_energy(int32_t *,uint32_t); -int32_t signal_energy_amp_shift(int32_t *input, uint32_t length); - #ifdef LOCALIZATION /*!\fn int32_t signal_energy(int *,uint32_t); \brief Computes the signal energy per subcarrier diff --git a/openair1/PHY/defs_nr_UE.h b/openair1/PHY/defs_nr_UE.h index b64cb20752ec503cdc8fac09c5ed21b3022192f6..dd035a73e3b5111609e0f4ddc35467c62697e43f 100644 --- a/openair1/PHY/defs_nr_UE.h +++ b/openair1/PHY/defs_nr_UE.h @@ -34,8 +34,10 @@ #ifdef __cplusplus #include <atomic> +#ifndef _Atomic #define _Atomic(X) std::atomic< X > #endif +#endif #include "defs_nr_common.h" #include "CODING/nrPolar_tools/nr_polar_pbch_defs.h" diff --git a/openair1/PHY/defs_nr_common.h b/openair1/PHY/defs_nr_common.h index 64f3ef303ca6c025c240c03f0c136a401d425d9a..f6f18eb9427e2a8b02da52d0f21f2db83baaebe5 100644 --- a/openair1/PHY/defs_nr_common.h +++ b/openair1/PHY/defs_nr_common.h @@ -217,6 +217,8 @@ struct NR_DL_FRAME_PARMS { c16_t timeshift_symbol_rotation[4096*2] __attribute__ ((aligned (16))); /// Table used to apply the delay compensation in DL/UL c16_t delay_table[2 * MAX_DELAY_COMP + 1][NR_MAX_OFDM_SYMBOL_SIZE]; + /// Table used to apply the delay compensation in PUCCH2 + c16_t delay_table128[2 * MAX_DELAY_COMP + 1][128]; /// SRS configuration from TS 38.331 RRC SRS_NR srs_nr; /// Power used by SSB in order to estimate signal strength and path loss diff --git a/openair1/PHY/nr_phy_common/src/nr_phy_common.c b/openair1/PHY/nr_phy_common/src/nr_phy_common.c index 11a40fd69e21ed2ef234736dc77718ad72cfaf2f..9aa45e20b385f1092739671d25f5f643315ce7da 100644 --- a/openair1/PHY/nr_phy_common/src/nr_phy_common.c +++ b/openair1/PHY/nr_phy_common/src/nr_phy_common.c @@ -25,6 +25,8 @@ #define USE_128BIT #endif +#define PEAK_DETECT_THRESHOLD 15 + int16_t saturating_sub(int16_t a, int16_t b) { int32_t result = (int32_t)a - (int32_t)b; @@ -109,7 +111,6 @@ void nr_16qam_llr(int32_t *rxdataF_comp, int32_t *ch_mag_in, int16_t *llr, uint3 ch_mag_128++; } - simde_mm_empty(); nb_re &= 0x3; int16_t *rxDataF_i16 = (int16_t *)rxF_128; @@ -195,9 +196,6 @@ void nr_64qam_llr(int32_t *rxdataF_comp, int32_t *ch_mag, int32_t *ch_mag2, int1 simde__m128i *rxF_128 = (simde__m128i *)rxF; simde__m128i *ch_mag_128 = (simde__m128i *)ch_maga; simde__m128i *ch_magb_128 = (simde__m128i *)ch_magb; - - simde__m64 *llr64 = (simde__m64 *)llr_32; - // Each iteration does 4 RE (gives 24 16bit-llrs) for (int i = 0; i < (nb_re >> 2); i++) { simde__m128i xmm0, xmm1, xmm2; @@ -207,12 +205,18 @@ void nr_64qam_llr(int32_t *rxdataF_comp, int32_t *ch_mag, int32_t *ch_mag2, int1 xmm2 = simde_mm_abs_epi16(xmm1); xmm2 = simde_mm_subs_epi16(*ch_magb_128, xmm2); - *llr64++ = simde_mm_set_pi32(simde_mm_extract_epi32(xmm1, 0), simde_mm_extract_epi32(xmm0, 0)); - *llr64++ = simde_mm_set_pi32(simde_mm_extract_epi32(xmm0, 1), simde_mm_extract_epi32(xmm2, 0)); - *llr64++ = simde_mm_set_pi32(simde_mm_extract_epi32(xmm2, 1), simde_mm_extract_epi32(xmm1, 1)); - *llr64++ = simde_mm_set_pi32(simde_mm_extract_epi32(xmm1, 2), simde_mm_extract_epi32(xmm0, 2)); - *llr64++ = simde_mm_set_pi32(simde_mm_extract_epi32(xmm0, 3), simde_mm_extract_epi32(xmm2, 2)); - *llr64++ = simde_mm_set_pi32(simde_mm_extract_epi32(xmm2, 3), simde_mm_extract_epi32(xmm1, 3)); + *llr_32++ = simde_mm_extract_epi32(xmm0, 0); + *llr_32++ = simde_mm_extract_epi32(xmm1, 0); + *llr_32++ = simde_mm_extract_epi32(xmm2, 0); + *llr_32++ = simde_mm_extract_epi32(xmm0, 1); + *llr_32++ = simde_mm_extract_epi32(xmm1, 1); + *llr_32++ = simde_mm_extract_epi32(xmm2, 1); + *llr_32++ = simde_mm_extract_epi32(xmm0, 2); + *llr_32++ = simde_mm_extract_epi32(xmm1, 2); + *llr_32++ = simde_mm_extract_epi32(xmm2, 2); + *llr_32++ = simde_mm_extract_epi32(xmm0, 3); + *llr_32++ = simde_mm_extract_epi32(xmm1, 3); + *llr_32++ = simde_mm_extract_epi32(xmm2, 3); rxF_128++; ch_mag_128++; ch_magb_128++; @@ -223,7 +227,7 @@ void nr_64qam_llr(int32_t *rxdataF_comp, int32_t *ch_mag, int32_t *ch_mag2, int1 int16_t *rxDataF_i16 = (int16_t *)rxF_128; int16_t *ch_mag_i16 = (int16_t *)ch_mag_128; int16_t *ch_magb_i16 = (int16_t *)ch_magb_128; - int16_t *llr_i16 = (int16_t *)llr64; + int16_t *llr_i16 = (int16_t *)llr_32; for (int i = 0; i < nb_re; i++) { int16_t real = rxDataF_i16[2 * i]; int16_t imag = rxDataF_i16[2 * i + 1]; @@ -238,7 +242,6 @@ void nr_64qam_llr(int32_t *rxdataF_comp, int32_t *ch_mag, int32_t *ch_mag2, int1 llr_i16[6 * i + 4] = saturating_sub(mag_realb, abs(llr_i16[6 * i + 2])); llr_i16[6 * i + 5] = saturating_sub(mag_imagb, abs(llr_i16[6 * i + 3])); } - simde_mm_empty(); } void nr_256qam_llr(int32_t *rxdataF_comp, int32_t *ch_mag, int32_t *ch_mag2, int32_t *ch_mag3, int16_t *llr, uint32_t nb_re) @@ -350,7 +353,6 @@ void nr_256qam_llr(int32_t *rxdataF_comp, int32_t *ch_mag, int32_t *ch_mag2, int llr_i16[8 * i + 7] = saturating_sub(magc_imag, abs(llr_i16[8 * i + 5])); } } - simde_mm_empty(); } void freq2time(uint16_t ofdm_symbol_size, int16_t *freq_signal, int16_t *time_signal) @@ -367,20 +369,28 @@ void nr_est_delay(int ofdm_symbol_size, const c16_t *ls_est, c16_t *ch_estimates int max_val = delay->delay_max_val; const int sync_pos = 0; + uint64_t mean_val = 0; for (int i = 0; i < ofdm_symbol_size; i++) { int temp = c16amp2(ch_estimates_time[i]) >> 1; + mean_val += temp; if (temp > max_val) { max_pos = i; max_val = temp; } } + mean_val /= ofdm_symbol_size; if (max_pos > ofdm_symbol_size / 2) max_pos = max_pos - ofdm_symbol_size; delay->delay_max_pos = max_pos; delay->delay_max_val = max_val; - delay->est_delay = max_pos - sync_pos; + + // The peak in general is quite clear. It only gives a small peak when the noise is high, generally obtaining an incorrect + // estimated delay, and causing the delay compensation to worsen the result instead of improving it. After analyzing several + // peaks, and doing many tests, a PEAK_DETECT_THRESHOLD = 15 is an adequate value, to apply delay compensation only when there is + // clearly a peak + delay->est_delay = mean_val > 0 && max_val / mean_val > PEAK_DETECT_THRESHOLD ? max_pos - sync_pos : 0; } unsigned int nr_get_tx_amp(int power_dBm, int power_max_dBm, int total_nb_rb, int nb_rb) diff --git a/openair1/SCHED/phy_procedures_lte_eNb.c b/openair1/SCHED/phy_procedures_lte_eNb.c index 2bde391979fbb41869728c6f515514505ec53f01..5d885cac91b80d9bd56f219625b4750ac00fbad9 100644 --- a/openair1/SCHED/phy_procedures_lte_eNb.c +++ b/openair1/SCHED/phy_procedures_lte_eNb.c @@ -1329,8 +1329,9 @@ void pusch_procedures(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc) { uint32_t harq_pid0 = subframe2harq_pid(&eNB->frame_parms,frame,subframe); turboDecode_t arr[64] = {0}; - task_ans_t ans[64] = {0}; - thread_info_tm_t t_info = {.ans = ans, .cap = 64, .len = 0, .buf = (uint8_t *)arr}; + task_ans_t ans; + init_task_ans(&ans, 64); + thread_info_tm_t t_info = {.ans = &ans, .cap = 64, .len = 0, .buf = (uint8_t *)arr}; for (i = 0; i < NUMBER_OF_ULSCH_MAX; i++) { ulsch = eNB->ulsch[i]; @@ -1420,7 +1421,10 @@ void pusch_procedures(PHY_VARS_eNB *eNB,L1_rxtx_proc_t *proc) { const bool decode = proc->nbDecode; DevAssert(t_info.len == proc->nbDecode); if (proc->nbDecode > 0) { - join_task_ans(t_info.ans, t_info.len); + if (t_info.len != t_info.cap) { + completed_many_task_ans(t_info.ans, t_info.cap - t_info.len); + } + join_task_ans(t_info.ans); for (size_t i = 0; i < t_info.len; ++i) { postDecode(proc, &arr[i]); } diff --git a/openair1/SCHED_NR/nr_ru_procedures.c b/openair1/SCHED_NR/nr_ru_procedures.c index b385a1b540b2dabd9e4487f5cef5d6f59d82c95e..a0b3a69ba8c24becbbc24e8a6aa5fd8d5abb3c9c 100644 --- a/openair1/SCHED_NR/nr_ru_procedures.c +++ b/openair1/SCHED_NR/nr_ru_procedures.c @@ -300,14 +300,14 @@ void nr_feptx_tp(RU_t *ru, int frame_tx, int slot) start_meas(&ru->ofdm_total_stats); size_t const sz = ru->nb_tx + (ru->half_slot_parallelization > 0) * ru->nb_tx; - AssertFatal(sz < 64, "Please, increase the buffer size"); - feptx_cmd_t arr[64] = {0}; - task_ans_t ans[64] = {0}; + feptx_cmd_t arr[sz]; + task_ans_t ans; + init_task_ans(&ans, sz); int nbfeptx = 0; for (int aid = 0; aid < ru->nb_tx; aid++) { feptx_cmd_t *feptx_cmd = &arr[nbfeptx]; - feptx_cmd->ans = &ans[nbfeptx]; + feptx_cmd->ans = &ans; feptx_cmd->aid = aid; feptx_cmd->ru = ru; @@ -321,7 +321,7 @@ void nr_feptx_tp(RU_t *ru, int frame_tx, int slot) nbfeptx++; if (ru->half_slot_parallelization > 0) { feptx_cmd_t *feptx_cmd = &arr[nbfeptx]; - feptx_cmd->ans = &ans[nbfeptx]; + feptx_cmd->ans = &ans; feptx_cmd->aid = aid; feptx_cmd->ru = ru; @@ -334,8 +334,7 @@ void nr_feptx_tp(RU_t *ru, int frame_tx, int slot) nbfeptx++; } } - - join_task_ans(ans, nbfeptx); + join_task_ans(&ans); stop_meas(&ru->ofdm_total_stats); if (ru->idx == 0) @@ -379,13 +378,13 @@ void nr_fep_tp(RU_t *ru, int slot) { start_meas(&ru->ofdm_demod_stats); size_t const sz = ru->nb_rx + (ru->half_slot_parallelization > 0) * ru->nb_rx; - AssertFatal(sz < 64, "Please, increase buffer size"); - feprx_cmd_t arr[64] = {0}; - task_ans_t ans[64] = {0}; + feprx_cmd_t arr[sz]; + task_ans_t ans; + init_task_ans(&ans, sz); for (int aid=0;aid<ru->nb_rx;aid++) { feprx_cmd_t *feprx_cmd = &arr[nbfeprx]; - feprx_cmd->ans = &ans[nbfeprx]; + feprx_cmd->ans = &ans; feprx_cmd->aid = aid; feprx_cmd->ru = ru; @@ -399,7 +398,7 @@ void nr_fep_tp(RU_t *ru, int slot) { nbfeprx++; if (ru->half_slot_parallelization > 0) { feprx_cmd_t *feprx_cmd = &arr[nbfeprx]; - feprx_cmd->ans = &ans[nbfeprx]; + feprx_cmd->ans = &ans; feprx_cmd->aid = aid; feprx_cmd->ru = ru; @@ -413,8 +412,7 @@ void nr_fep_tp(RU_t *ru, int slot) { nbfeprx++; } } - - join_task_ans(ans, nbfeprx); + join_task_ans(&ans); stop_meas(&ru->ofdm_demod_stats); if (ru->idx == 0) VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME( VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPRX, 0 ); diff --git a/openair1/SCHED_NR_UE/phy_frame_config_nr_ue.c b/openair1/SCHED_NR_UE/phy_frame_config_nr_ue.c index 4fbe2412239c43d21caa6a44c2146a72b0b49fc6..049d79483260a38ade0a7d4b4abeee7a97306454 100644 --- a/openair1/SCHED_NR_UE/phy_frame_config_nr_ue.c +++ b/openair1/SCHED_NR_UE/phy_frame_config_nr_ue.c @@ -139,4 +139,4 @@ int sl_nr_ue_slot_select(const sl_nr_phy_config_request_t *cfg, int slot, uint8_ } return slot_type; -} \ No newline at end of file +} diff --git a/openair1/SIMULATION/NR_PHY/pucchsim.c b/openair1/SIMULATION/NR_PHY/pucchsim.c index 81d6fff9b3cd890fe55688accbe7396e1a2d5847..ab73a13626b55c0ccc45ae23f70897c4e869092a 100644 --- a/openair1/SIMULATION/NR_PHY/pucchsim.c +++ b/openair1/SIMULATION/NR_PHY/pucchsim.c @@ -144,6 +144,7 @@ int main(int argc, char **argv) int sr_flag = 0; int pucch_DTX_thres = 0; cpuf = get_cpu_freq_GHz(); + bool print_perf = false; if ((uniqCfg = load_configmodule(argc, argv, CONFIG_ENABLECMDLINEONLY)) == 0) { exit_fun("[NR_PUCCHSIM] Error, configuration module init failed\n"); @@ -153,8 +154,7 @@ int main(int argc, char **argv) logInit(); int c; - while ((c = getopt (argc, argv, "--:O:f:hA:f:g:i:I:P:B:b:t:T:m:n:r:o:s:S:x:y:z:N:F:GR:IL:q:cd:")) != -1) { - + while ((c = getopt(argc, argv, "--:O:f:hA:f:g:i:I:P:B:b:t:T:m:n:r:o:s:S:x:y:z:N:F:GR:IL:q:cd:C")) != -1) { /* ignore long options starting with '--', option '-O' and their arguments that are handled by configmodule */ /* with this opstring getopt returns 1 for non-option arguments, refer to 'man 3 getopt' */ if (c == 1 || c == '-' || c == 'O') @@ -349,6 +349,10 @@ int main(int argc, char **argv) //nacktoack_flag=(uint8_t)atoi(optarg); target_error_rate=0.001; break; + case 'C': + print_perf = 1; + cpu_meas_enabled = 1; + break; default: case 'h': printf("%s -h(elp) -p(extended_prefix) -N cell_id -f output_filename -F input_filename -g channel_model -n n_frames -t Delayspread -s snr0 -S snr1 -x transmission_mode -y TXant -z RXant -i Intefrence0 -j Interference1 -A interpolation_file -C(alibration offset dB) -N CellId\n", argv[0]); @@ -381,6 +385,7 @@ int main(int argc, char **argv) printf("-x Transmission mode (1,2,6 for the moment)\n"); printf("-y Number of TX antennas used in eNB\n"); printf("-z Number of RX antennas used in UE\n"); + printf("-C print CPU cost\n"); exit (-1); break; } @@ -406,8 +411,8 @@ int main(int argc, char **argv) if ((format < 2) && (actual_payload == 4)) do_DTX=1; if (random_payload) { - srand(time(NULL)); // Initialization, should only be called once. - actual_payload = rand(); // Returns a pseudo-random integer between 0 and RAND_MAX. + double tmp = uniformrandom(); + memcpy(&actual_payload, &tmp, sizeof(actual_payload)); } actual_payload &= nr_bit < 64 ? (1UL << nr_bit) - 1: 0xffffffffffffffff; @@ -638,6 +643,7 @@ int main(int argc, char **argv) // noise measurement (all PRBs) gNB_I0_measurements(gNB, nr_slot_tx, 0, gNB->frame_parms.symbols_per_slot, rb_mask_ul); + start_meas(&gNB->phy_proc_rx); if (n_trials==1) printf("noise rxlev %d (%d dB), rxlev pucch %d dB sigma2 %f dB, SNR %f, TX %f, I0 (pucch) %d, I0 (avg) %d\n",rxlev,dB_fixed(rxlev),dB_fixed(rxlev_pucch),sigma2_dB,SNR,10*log10((double)txlev*UE->frame_parms.ofdm_symbol_size/12),gNB->measurements.n0_subband_power_tot_dB[startingPRB],gNB->measurements.n0_subband_power_avg_dB); if(format==0){ @@ -731,10 +737,21 @@ int main(int argc, char **argv) free(uci_pdu.csi_part1.csi_part1_payload); } + stop_meas(&gNB->phy_proc_rx); + n_errors=((actual_payload^payload_received)&1)+(((actual_payload^payload_received)&2)>>1)+(((actual_payload^payload_received)&4)>>2)+n_errors; } if (sr_flag == 1) printf("SR: SNR=%f, n_trials=%d, n_bit_errors=%d\n",SNR,n_trials,sr_errors); + if (print_perf) { + time_stats_t *ts = &gNB->phy_proc_rx; + printf("cpu time for pucch format %d: per block %.2f us; nb blocks %d, max time %.2f;\n", + format, + ts->diff / ts->trials / cpuf / 1000.0, + ts->trials, + ts->max / cpuf / 1000.0); + reset_meas(ts); + } if(nr_bit > 0) printf("ACK/NACK: SNR=%f, n_trials=%d, n_bit_errors=%d\n",SNR,n_trials,ack_nack_errors); if((float)(ack_nack_errors+sr_errors)/(float)(n_trials)<=target_error_rate){ diff --git a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c index 8e4fbf7154c361a4512b2a4bbd5fc79bcedb3517..300789084459b2366859f0b1756309c30a9caa10 100644 --- a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c +++ b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c @@ -3416,8 +3416,8 @@ static bool fill_mac_sdu(NR_UE_MAC_INST_t *mac, } else { *header = (NR_MAC_SUBHEADER_LONG){.R = 0, .F = 1, .LCID = lcid, .L = htons(sdu_length)}; #ifdef ENABLE_MAC_PAYLOAD_DEBUG - LOG_I(NR_MAC, "dumping MAC SDU with length %d: \n", sduL); - log_dump(NR_MAC, header, sduL, LOG_DUMP_CHAR, "\n"); + LOG_I(NR_MAC, "dumping MAC SDU with length %d: \n", sdu_length); + log_dump(NR_MAC, header, sdu_length, LOG_DUMP_CHAR, "\n"); #endif } mac_ce_p->cur_ptr += header_sz + sdu_length; @@ -3588,10 +3588,23 @@ static uint8_t nr_ue_get_sdu(NR_UE_MAC_INST_t *mac, LOG_D(NR_MAC, "Filling remainder %d bytes to the UL PDU \n", remain); *(NR_MAC_SUBHEADER_FIXED *)mac_ce_info.cur_ptr = (NR_MAC_SUBHEADER_FIXED){.R = 0, .LCID = UL_SCH_LCID_PADDING}; mac_ce_info.cur_ptr++; - memset(mac_ce_info.cur_ptr, 0, mac_ce_info.pdu_end - mac_ce_info.cur_ptr); + if (get_softmodem_params()->phy_test || get_softmodem_params()->do_ra) { + uint8_t *buf = mac_ce_info.cur_ptr; + uint8_t *end = mac_ce_info.pdu_end; + for (; buf < end && ((intptr_t)buf) % 4; buf++) + *buf = lrand48() & 0xff; + for (; buf < end - 3; buf += 4) { + uint32_t *buf32 = (uint32_t *)buf; + *buf32 = lrand48(); + } + for (; buf < end; buf++) + *buf = lrand48() & 0xff; + } else { + memset(mac_ce_info.cur_ptr, 0, mac_ce_info.pdu_end - mac_ce_info.cur_ptr); + } } #ifdef ENABLE_MAC_PAYLOAD_DEBUG - LOG_I(NR_MAC, "MAC PDU %d bytes \n", mac_ce_p->cur_ptr - ulsch_buffer); + LOG_I(NR_MAC, "MAC PDU %ld bytes\n", mac_ce_info.cur_ptr - ulsch_buffer); log_dump(NR_MAC, ulsch_buffer, buflen, LOG_DUMP_CHAR, "\n"); #endif diff --git a/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c b/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c index 0f88066cc13c120657b813ca6fbcf158537d43f0..00f3b303e9d0928e0c6e945b20d439c067b90fe0 100644 --- a/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c +++ b/openair2/NR_UE_PHY_INTERFACE/NR_IF_Module.c @@ -70,6 +70,8 @@ static void save_pdsch_pdu_for_crnti(nfapi_nr_dl_tti_request_t *dl_tti_request); void print_ue_mac_stats(const module_id_t mod, const int frame_rx, const int slot_rx) { NR_UE_MAC_INST_t *mac = get_mac_inst(mod); + if (mac->state != UE_CONNECTED) + return; int ret = pthread_mutex_lock(&mac->if_mutex); AssertFatal(!ret, "mutex failed %d\n", ret);