diff --git a/analysis/statistics/1dade7d11ec5fd20c27ec970b182885e1e5a9158.txt b/analysis/statistics/1dade7d11ec5fd20c27ec970b182885e1e5a9158.txt new file mode 100644 index 000000000..c02c44f32 --- /dev/null +++ b/analysis/statistics/1dade7d11ec5fd20c27ec970b182885e1e5a9158.txt @@ -0,0 +1,48 @@ + +changeset: 1807:1dade7d11ec5fd20c27ec970b182885e1e5a9158 +char kNewtonVersion[] = "0.3-alpha-1807 (1dade7d11ec5fd20c27ec970b182885e1e5a9158) (build 03-13-2026-17:53-yufeng.xia@gala1-Linux-6.8.0-101-generic-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/446db116293f217d77614392a6f9f155200977bb.txt b/analysis/statistics/446db116293f217d77614392a6f9f155200977bb.txt new file mode 100644 index 000000000..603e51c59 --- /dev/null +++ b/analysis/statistics/446db116293f217d77614392a6f9f155200977bb.txt @@ -0,0 +1,48 @@ + +changeset: 1803:446db116293f217d77614392a6f9f155200977bb +char kNewtonVersion[] = "0.3-alpha-1803 (446db116293f217d77614392a6f9f155200977bb) (build 03-10-2026-19:20-yufeng.xia@gala1-Linux-6.8.0-101-generic-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/4daf9c891b7b185a7c9c5c6dca3d9a264a20a0c3.txt b/analysis/statistics/4daf9c891b7b185a7c9c5c6dca3d9a264a20a0c3.txt new file mode 100644 index 000000000..969c19f64 --- /dev/null +++ b/analysis/statistics/4daf9c891b7b185a7c9c5c6dca3d9a264a20a0c3.txt @@ -0,0 +1,48 @@ + +changeset: 1801:4daf9c891b7b185a7c9c5c6dca3d9a264a20a0c3 +char kNewtonVersion[] = "0.3-alpha-1801 (4daf9c891b7b185a7c9c5c6dca3d9a264a20a0c3) (build 03-10-2026-17:25-yufeng.xia@gala1-Linux-6.8.0-101-generic-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/508d8a890a4cea4ecd014e1fb89373e433beb777.txt b/analysis/statistics/508d8a890a4cea4ecd014e1fb89373e433beb777.txt new file mode 100644 index 000000000..60a69af0c --- /dev/null +++ b/analysis/statistics/508d8a890a4cea4ecd014e1fb89373e433beb777.txt @@ -0,0 +1,48 @@ + +changeset: 1806:508d8a890a4cea4ecd014e1fb89373e433beb777 +char kNewtonVersion[] = "0.3-alpha-1806 (508d8a890a4cea4ecd014e1fb89373e433beb777) (build 03-12-2026-12:50-yufeng.xia@gala1-Linux-6.8.0-101-generic-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/56ef63cc7539a244c30227b74337e6fa573bfdf4.txt b/analysis/statistics/56ef63cc7539a244c30227b74337e6fa573bfdf4.txt new file mode 100644 index 000000000..85c1406ae --- /dev/null +++ b/analysis/statistics/56ef63cc7539a244c30227b74337e6fa573bfdf4.txt @@ -0,0 +1,48 @@ + +changeset: 1794:56ef63cc7539a244c30227b74337e6fa573bfdf4 +char kNewtonVersion[] = "0.3-alpha-1794 (56ef63cc7539a244c30227b74337e6fa573bfdf4) (build 07-22-2025-22:56-yufeng@firefly-Linux-4.4.178+-aarch64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/5de8c3df61f467493f116c827f577f6e3c762325.txt b/analysis/statistics/5de8c3df61f467493f116c827f577f6e3c762325.txt new file mode 100644 index 000000000..d5b518b4d --- /dev/null +++ b/analysis/statistics/5de8c3df61f467493f116c827f577f6e3c762325.txt @@ -0,0 +1,48 @@ + +changeset: 1791:5de8c3df61f467493f116c827f577f6e3c762325 +char kNewtonVersion[] = "0.3-alpha-1791 (5de8c3df61f467493f116c827f577f6e3c762325) (build 07-21-2025-19:44-yufeng@firefly-Linux-4.4.178+-aarch64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/63e8db18cb6126e71563631153aaabdce43481ac.txt b/analysis/statistics/63e8db18cb6126e71563631153aaabdce43481ac.txt new file mode 100644 index 000000000..af700c36b --- /dev/null +++ b/analysis/statistics/63e8db18cb6126e71563631153aaabdce43481ac.txt @@ -0,0 +1,48 @@ + +changeset: 1792:63e8db18cb6126e71563631153aaabdce43481ac +char kNewtonVersion[] = "0.3-alpha-1792 (63e8db18cb6126e71563631153aaabdce43481ac) (build 07-21-2025-20:13-yufeng@firefly-Linux-4.4.178+-aarch64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/8b50bb4a077ee84ca480bc32a847ca8ee6e612a6.txt b/analysis/statistics/8b50bb4a077ee84ca480bc32a847ca8ee6e612a6.txt new file mode 100644 index 000000000..b566a1d65 --- /dev/null +++ b/analysis/statistics/8b50bb4a077ee84ca480bc32a847ca8ee6e612a6.txt @@ -0,0 +1,48 @@ + +changeset: 1793:8b50bb4a077ee84ca480bc32a847ca8ee6e612a6 +char kNewtonVersion[] = "0.3-alpha-1793 (8b50bb4a077ee84ca480bc32a847ca8ee6e612a6) (build 07-21-2025-21:36-yufeng@firefly-Linux-4.4.178+-aarch64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/9c4975268fa3dac9e85718f0d4660c8d615e3e5c.txt b/analysis/statistics/9c4975268fa3dac9e85718f0d4660c8d615e3e5c.txt new file mode 100644 index 000000000..a96a25f73 --- /dev/null +++ b/analysis/statistics/9c4975268fa3dac9e85718f0d4660c8d615e3e5c.txt @@ -0,0 +1,48 @@ + +changeset: 1780:9c4975268fa3dac9e85718f0d4660c8d615e3e5c +char kNewtonVersion[] = "0.3-alpha-1780 (9c4975268fa3dac9e85718f0d4660c8d615e3e5c) (build 06-08-2025-12:32-xyf@aurum-Linux-6.6.87.1-microsoft-standard-WSL2-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/9d270c140bcaec49865624c0f33ec54a1ecef68e.txt b/analysis/statistics/9d270c140bcaec49865624c0f33ec54a1ecef68e.txt new file mode 100644 index 000000000..82019f1cb --- /dev/null +++ b/analysis/statistics/9d270c140bcaec49865624c0f33ec54a1ecef68e.txt @@ -0,0 +1,48 @@ + +changeset: 1805:9d270c140bcaec49865624c0f33ec54a1ecef68e +char kNewtonVersion[] = "0.3-alpha-1805 (9d270c140bcaec49865624c0f33ec54a1ecef68e) (build 03-11-2026-17:16-yufeng.xia@gala1-Linux-6.8.0-101-generic-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/a5e28b43534774e11ff199bed8410ef5f5c27493.txt b/analysis/statistics/a5e28b43534774e11ff199bed8410ef5f5c27493.txt new file mode 100644 index 000000000..dc5bfb261 --- /dev/null +++ b/analysis/statistics/a5e28b43534774e11ff199bed8410ef5f5c27493.txt @@ -0,0 +1,48 @@ + +changeset: 1804:a5e28b43534774e11ff199bed8410ef5f5c27493 +char kNewtonVersion[] = "0.3-alpha-1804 (a5e28b43534774e11ff199bed8410ef5f5c27493) (build 03-11-2026-16:04-yufeng.xia@gala1-Linux-6.8.0-101-generic-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/bb03344ed167c5f01a902de8f90f728ffb1bef53.txt b/analysis/statistics/bb03344ed167c5f01a902de8f90f728ffb1bef53.txt new file mode 100644 index 000000000..99d1ed820 --- /dev/null +++ b/analysis/statistics/bb03344ed167c5f01a902de8f90f728ffb1bef53.txt @@ -0,0 +1,48 @@ + +changeset: 1781:bb03344ed167c5f01a902de8f90f728ffb1bef53 +char kNewtonVersion[] = "0.3-alpha-1781 (bb03344ed167c5f01a902de8f90f728ffb1bef53) (build 06-12-2025-19:24-xyf@aurum-Linux-6.6.87.1-microsoft-standard-WSL2-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/c4b5e5f9dbd8d5fe177ddde16b70d3eb4f69d9f9.txt b/analysis/statistics/c4b5e5f9dbd8d5fe177ddde16b70d3eb4f69d9f9.txt new file mode 100644 index 000000000..0d07ce9ac --- /dev/null +++ b/analysis/statistics/c4b5e5f9dbd8d5fe177ddde16b70d3eb4f69d9f9.txt @@ -0,0 +1,48 @@ + +changeset: 1778:c4b5e5f9dbd8d5fe177ddde16b70d3eb4f69d9f9 +char kNewtonVersion[] = "0.3-alpha-1778 (c4b5e5f9dbd8d5fe177ddde16b70d3eb4f69d9f9) (build 06-04-2025-20:59-xyf@aurum-Linux-6.6.87.1-microsoft-standard-WSL2-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/d0a3723ea7e63ac184a4e3e2db8be30e3d26d70a.txt b/analysis/statistics/d0a3723ea7e63ac184a4e3e2db8be30e3d26d70a.txt new file mode 100644 index 000000000..4c0481572 --- /dev/null +++ b/analysis/statistics/d0a3723ea7e63ac184a4e3e2db8be30e3d26d70a.txt @@ -0,0 +1,48 @@ + +changeset: 1802:d0a3723ea7e63ac184a4e3e2db8be30e3d26d70a +char kNewtonVersion[] = "0.3-alpha-1802 (d0a3723ea7e63ac184a4e3e2db8be30e3d26d70a) (build 03-10-2026-17:52-yufeng.xia@gala1-Linux-6.8.0-101-generic-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/e22e6908d8904f59081e7b1adbef130bdbf0c3ac.txt b/analysis/statistics/e22e6908d8904f59081e7b1adbef130bdbf0c3ac.txt new file mode 100644 index 000000000..9412a5694 --- /dev/null +++ b/analysis/statistics/e22e6908d8904f59081e7b1adbef130bdbf0c3ac.txt @@ -0,0 +1,48 @@ + +changeset: 1779:e22e6908d8904f59081e7b1adbef130bdbf0c3ac +char kNewtonVersion[] = "0.3-alpha-1779 (e22e6908d8904f59081e7b1adbef130bdbf0c3ac) (build 06-04-2025-23:24-xyf@aurum-Linux-6.6.87.1-microsoft-standard-WSL2-x86_64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/analysis/statistics/f57abd6a8529a6c5b16e8c2cf5ba64c657b1fad0.txt b/analysis/statistics/f57abd6a8529a6c5b16e8c2cf5ba64c657b1fad0.txt new file mode 100644 index 000000000..8ad0872cc --- /dev/null +++ b/analysis/statistics/f57abd6a8529a6c5b16e8c2cf5ba64c657b1fad0.txt @@ -0,0 +1,48 @@ + +changeset: 1785:f57abd6a8529a6c5b16e8c2cf5ba64c657b1fad0 +char kNewtonVersion[] = "0.3-alpha-1785 (f57abd6a8529a6c5b16e8c2cf5ba64c657b1fad0) (build 06-16-2025-23:42-yufeng@firefly-Linux-4.4.178+-aarch64)"; + +./src/noisy/noisy-linux-EN -O0 applications/noisy/helloWorld.n -s + +./src/newton/newton-linux-EN -v 0 -eP applications/newton/invariants/ViolinWithTemperatureDependence-pigroups.nt + +Informational Report: +--------------------- +Invariant "ViolinWithTemperatureDependenceForPiGroups" has 2 unique kernels, each with 2 column(s)... + + Kernel 0 is a valid kernel: + + 1 1 + -0.5 -0 + 1 0 + 0.5 0 + 0 -1 + -0 -1 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 0, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^( 0) P5^(-0) + + Pi group 0, Pi 1 is: P0^(-0) P1^( 1) P2^( 0) P3^( 0) P4^(-1) P5^(-1) + + + Kernel 1 is a valid kernel: + + 1 0 + -0.5 1 + 1 -2 + 0.5 -1 + -0 -2 + 0 -2 + + + The ordering of parameters is: P1 P0 P3 P2 P4 P5 + + Pi group 1, Pi 0 is: P0^(-0.5) P1^( 1) P2^(0.5) P3^( 1) P4^(-0) P5^( 0) + + Pi group 1, Pi 1 is: P0^( 1) P1^( 0) P2^(-1) P3^(-2) P4^(-2) P5^(-2) + + + + diff --git a/applications/newton/llvm-ir/c-files/e_exp.c b/applications/newton/llvm-ir/c-files/e_exp.c index b0f977398..9b132ee8f 100644 --- a/applications/newton/llvm-ir/c-files/e_exp.c +++ b/applications/newton/llvm-ir/c-files/e_exp.c @@ -102,6 +102,13 @@ P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ */ typedef double bmx055xAcceleration; +static double __attribute__((noinline)) +__exp_horner_poly(double x) +{ + double t = x * x; + return x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); +} + #ifdef __STDC__ double __ieee754_exp(bmx055xAcceleration x) /* default IEEE double exp */ #else @@ -149,8 +156,7 @@ double __ieee754_exp(x) /* default IEEE double exp */ else k = 0; /* x is now in primary range */ - t = x*x; - c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); + c = __exp_horner_poly(x); if(k==0) return one-((x*c)/(c-2.0)-x); else y = one-((lo-(x*c)/(2.0-c))-hi); if(k >= -1021) { diff --git a/applications/newton/llvm-ir/c-files/e_j0.c b/applications/newton/llvm-ir/c-files/e_j0.c index df73a3030..75ab7d459 100644 --- a/applications/newton/llvm-ir/c-files/e_j0.c +++ b/applications/newton/llvm-ir/c-files/e_j0.c @@ -6,7 +6,7 @@ * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ @@ -30,20 +30,20 @@ * (To avoid cancellation, use * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) * to compute the worse one.) - * + * * 3 Special cases * j0(nan)= nan * j0(0) = 1 * j0(inf) = 0 - * + * * Method -- y0(x): * 1. For x<2. - * Since + * Since * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. * We use the following function to approximate y0, * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 - * where + * where * U(z) = u00 + u01*z + ... + u06*z^6 * V(z) = 1 + v01*z + ... + v04*z^4 * with absolute approximation error bounded by 2**-72. @@ -56,13 +56,7 @@ * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. */ -#define IEEE_IMPLEMENT_SIN_COS - #include "fdlibm.h" -#ifdef IEEE_IMPLEMENT_SIN_COS -#include "s_sin.c" -#include "s_cos.c" -#endif #ifdef __STDC__ static double pzero(double), qzero(double); @@ -76,9 +70,7 @@ static const double static double #endif huge = 1e300, -#ifndef IEEE_IMPLEMENT_SIN_COS one = 1.0, -#endif invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ /* R0/S0 on [0, 2.00] */ @@ -91,9 +83,7 @@ S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */ S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */ S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ -#ifndef IEEE_IMPLEMENT_SIN_COS static double zero = 0.0; -#endif /* * Definitions generated from Newton @@ -107,9 +97,6 @@ double __ieee754_j0(x) bmx055xAcceleration x; #endif { -#ifdef ASSUME - __builtin_assume(x > -16 && x < 16); -#endif double z, s,c,ss,cc,r,u,v; int hx,ix; @@ -418,100 +405,4 @@ static double qzero(x) r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); return (-.125 + r/s)/x; -} - -// clang ../c-files/e_j0.c -D DEBUG -D ASSUME -O3 -o e_j0_assume -lm -#ifdef DEBUG - -#include -#include -#include -#include -#include - -#define iteration_num 500000 - -typedef struct timespec timespec; -timespec diff(timespec start, timespec end) -{ - timespec temp; - if ((end.tv_nsec-start.tv_nsec)<0) { - temp.tv_sec = end.tv_sec-start.tv_sec-1; - temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; - } else { - temp.tv_sec = end.tv_sec-start.tv_sec; - temp.tv_nsec = end.tv_nsec-start.tv_nsec; - } - return temp; -} - -timespec sum(timespec t1, timespec t2) { - timespec temp; - if (t1.tv_nsec + t2.tv_nsec >= 1000000000) { - temp.tv_sec = t1.tv_sec + t2.tv_sec + 1; - temp.tv_nsec = t1.tv_nsec + t2.tv_nsec - 1000000000; - } else { - temp.tv_sec = t1.tv_sec + t2.tv_sec; - temp.tv_nsec = t1.tv_nsec + t2.tv_nsec; - } - return temp; -} - -void printTimeSpec(timespec t, const char* prefix) { - printf("%s: %d.%09d\n", prefix, (int)t.tv_sec, (int)t.tv_nsec); -} - -timespec tic( ) -{ - timespec start_time; - clock_gettime(CLOCK_REALTIME, &start_time); - return start_time; -} - -void toc( timespec* start_time, const char* prefix ) -{ - timespec current_time; - clock_gettime(CLOCK_REALTIME, ¤t_time); - printTimeSpec( diff( *start_time, current_time ), prefix ); - *start_time = current_time; -} - -/* - * random floating point, [min, max] - * */ -static bmx055xAcceleration -randomDouble(bmx055xAcceleration min, bmx055xAcceleration max) -{ - bmx055xAcceleration randDbValue = min + 1.0 * rand() / RAND_MAX * (max - min); - return randDbValue; -} - -int main(int argc, char** argv) { - double parameters[2]; - char *pEnd; - if (argc == 3) { - for (size_t idx = 0; idx < argc - 1; idx++) { - parameters[idx] = strtod(argv[idx + 1], &pEnd); - } - } else { - parameters[0] = 3.0; - parameters[1] = 10.0; - } - double result[iteration_num]; - bmx055xAcceleration xOps[iteration_num]; - for (size_t idx = 0; idx < iteration_num; idx++) { - xOps[idx] = randomDouble(parameters[0], parameters[1]); - } - - timespec timer = tic(); - for (size_t idx = 0; idx < iteration_num; idx++) { - result[idx] = __ieee754_j0(xOps[idx]); - } - - toc(&timer, "computation delay"); - - printf("results: %f\t%f\t%f\t%f\t%f\n", result[0], result[1], result[2], result[3], result[4]); - - return 0; -} -#endif +} \ No newline at end of file diff --git a/applications/newton/llvm-ir/c-files/e_log.c b/applications/newton/llvm-ir/c-files/e_log.c index 795390784..7689db756 100644 --- a/applications/newton/llvm-ir/c-files/e_log.c +++ b/applications/newton/llvm-ir/c-files/e_log.c @@ -87,6 +87,16 @@ static double zero = 0.0; */ typedef double bmx055xAcceleration; +static double __attribute__((noinline)) +__log_horner_poly(double s) +{ + double z = s * s; + double w = z * z; + double t1 = w*(Lg2+w*(Lg4+w*Lg6)); + double t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); + return t2 + t1; +} + #ifdef __STDC__ double __ieee754_log(bmx055xAcceleration x) #else @@ -125,14 +135,10 @@ double __ieee754_log(x) } s = f/(2.0+f); dk = (double)k; - z = s*s; i = hx-0x6147a; - w = z*z; j = 0x6b851-hx; - t1= w*(Lg2+w*(Lg4+w*Lg6)); - t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); i |= j; - R = t2+t1; + R = __log_horner_poly(s); if(i>0) { hfsq=0.5*f*f; if(k==0) return f-(hfsq-s*(hfsq+R)); else diff --git a/applications/newton/llvm-ir/c-files/e_y0.c b/applications/newton/llvm-ir/c-files/e_y0.c index 735051697..09f82fed6 100644 --- a/applications/newton/llvm-ir/c-files/e_y0.c +++ b/applications/newton/llvm-ir/c-files/e_y0.c @@ -6,7 +6,7 @@ * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ @@ -30,20 +30,20 @@ * (To avoid cancellation, use * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) * to compute the worse one.) - * + * * 3 Special cases * j0(nan)= nan * j0(0) = 1 * j0(inf) = 0 - * + * * Method -- y0(x): * 1. For x<2. - * Since + * Since * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. * We use the following function to approximate y0, * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 - * where + * where * U(z) = u00 + u01*z + ... + u06*z^6 * V(z) = 1 + v01*z + ... + v04*z^4 * with absolute approximation error bounded by 2**-72. @@ -56,13 +56,7 @@ * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. */ -#define IEEE_IMPLEMENT_SIN_COS - #include "fdlibm.h" -#ifdef IEEE_IMPLEMENT_SIN_COS -#include "s_sin.c" -#include "s_cos.c" -#endif #ifdef __STDC__ static double pzero(double), qzero(double); @@ -76,9 +70,7 @@ static const double static double #endif huge = 1e300, -#ifndef IEEE_IMPLEMENT_SIN_COS one = 1.0, -#endif invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ /* R0/S0 on [0, 2.00] */ @@ -91,9 +83,7 @@ S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */ S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */ S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ -#ifndef IEEE_IMPLEMENT_SIN_COS static double zero = 0.0; -#endif #ifdef __STDC__ double __ieee754_j0(double x) diff --git a/applications/newton/llvm-ir/c-files/sensfusion6.c b/applications/newton/llvm-ir/c-files/sensfusion6.c index 24fd45364..510b6df6d 100644 --- a/applications/newton/llvm-ir/c-files/sensfusion6.c +++ b/applications/newton/llvm-ir/c-files/sensfusion6.c @@ -98,7 +98,6 @@ void sensfusion6UpdateQImpl(float gx, float gy, float gz, float ax, float ay, fl _8qx = 8.0f * qx; _8qy = 8.0f * qy; qwqw = qw * qw; - qwqw = qw * qw; qxqx = qx * qx; qyqy = qy * qy; qzqz = qz * qz; diff --git a/applications/newton/llvm-ir/c-files/sincosf.c b/applications/newton/llvm-ir/c-files/sincosf.c index ee4a91341..c749dfe9c 100644 --- a/applications/newton/llvm-ir/c-files/sincosf.c +++ b/applications/newton/llvm-ir/c-files/sincosf.c @@ -36,6 +36,40 @@ */ typedef double bmx055xAcceleration; +/* + * Horner polynomial evaluation for sin(x) and cos(x): + * cos(x) ≈ c0 + x²(c1 + x²(c2 + x²(c3 + x²*c4))) + * sin(x) ≈ x(1 + x²(s1 + x²(s2 + x²*s3))) + * Extracted for quantization pass identification. + * Input x in [-π/4, π/4], x2 = x*x + */ +static void __attribute__((noinline)) +__sincosf_poly_horner(double x, double x2, const sincos_t *p, int n, + float *sinp, float *cosp) +{ + double x3, x4, x5, x6, s, c, c1, c2, s1; + + x4 = x2 * x2; + x3 = x2 * x; + c2 = p->c3 + x2 * p->c4; + s1 = p->s2 + x2 * p->s3; + + /* Swap sin/cos result based on quadrant. */ + float *tmp = (n & 1 ? cosp : sinp); + cosp = (n & 1 ? sinp : cosp); + sinp = tmp; + + c1 = p->c0 + x2 * p->c1; + x5 = x3 * x2; + x6 = x4 * x2; + + s = x + x3 * p->s1; + c = c1 + x4 * p->c2; + + *sinp = s + x5 * s1; + *cosp = c + x6 * c2; +} + /* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative error is 0.5303 * 2^-23. A single-step range reduction is used for small values. Large inputs have their range reduced using fast integer @@ -65,7 +99,7 @@ libc_sincosf (bmx055xAcceleration y, float *sinp, float *cosp) return; } - sincosf_poly (x, x2, p, 0, sinp, cosp); + __sincosf_poly_horner (x, x2, p, 0, sinp, cosp); } // [0, 5] [6, 10] else if (abstop12 (y) < abstop12 (120.0f)) @@ -78,7 +112,7 @@ libc_sincosf (bmx055xAcceleration y, float *sinp, float *cosp) if (n & 2) p = &__sincosf_table[1]; - sincosf_poly (x * s, x * x, p, n, sinp, cosp); + __sincosf_poly_horner (x * s, x * x, p, n, sinp, cosp); } else if (likely (abstop12 (y) < abstop12 (INFINITY))) { @@ -93,7 +127,7 @@ libc_sincosf (bmx055xAcceleration y, float *sinp, float *cosp) if ((n + sign) & 2) p = &__sincosf_table[1]; - sincosf_poly (x * s, x * x, p, n, sinp, cosp); + __sincosf_poly_horner (x * s, x * x, p, n, sinp, cosp); } else { diff --git a/applications/newton/llvm-ir/performance_test/Makefile b/applications/newton/llvm-ir/performance_test/Makefile index edb89e4f5..5fc072297 100644 --- a/applications/newton/llvm-ir/performance_test/Makefile +++ b/applications/newton/llvm-ir/performance_test/Makefile @@ -387,4 +387,4 @@ default: perf_exp perf_exp_opt perf_log perf_log_opt perf_acosh perf_acosh_opt p clean: $(QUIET)rm -f *.ll *.o *.s *.txt out.* libout.a main_out auto_test cd $(CHStone_DIR) && $(MAKE_CLEAN) - cd $(SUBDIR) && $(MAKE_CLEAN) \ No newline at end of file + cd $(SUBDIR) && $(MAKE_CLEAN) diff --git a/applications/newton/llvm-ir/performance_test/fig/aarch64-library_size_reduction_ratio.png b/applications/newton/llvm-ir/performance_test/fig/aarch64-library_size_reduction_ratio.png new file mode 100644 index 000000000..e708db5a1 Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/fig/aarch64-library_size_reduction_ratio.png differ diff --git a/applications/newton/llvm-ir/performance_test/fig/aarch64-time_consumption_speedup.png b/applications/newton/llvm-ir/performance_test/fig/aarch64-time_consumption_speedup.png new file mode 100644 index 000000000..801951fed Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/fig/aarch64-time_consumption_speedup.png differ diff --git a/applications/newton/llvm-ir/performance_test/fig/perf_woquant.log b/applications/newton/llvm-ir/performance_test/fig/perf_woquant.log new file mode 100644 index 000000000..639cb1fab --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/fig/perf_woquant.log @@ -0,0 +1,511 @@ +test case param instruction count time consumption ir lines library size compile time +perf_exp -2.000000 2.000000 -1 0.00497716 356 7110 62.7846 +perf_exp_opt -2.000000 2.000000 -1 0.00485117 263 5942 102.48 +speed up after optimization -2.000000 2.000000 0% 3% 26% 16% -39% +perf_exp -4.000000 4.000000 -1 0.00512212 356 7110 58.9307 +perf_exp_opt -4.000000 4.000000 -1 0.00498037 263 5942 64.4947 +speed up after optimization -4.000000 4.000000 0% 3% 26% 16% -9% +perf_exp -8.000000 8.000000 -1 0.00518424 356 7110 52.4746 +perf_exp_opt -8.000000 8.000000 -1 0.00505824 263 5942 77.2457 +speed up after optimization -8.000000 8.000000 0% 2% 26% 16% -32% +perf_exp -16.000000 16.000000 -1 0.00523353 356 7110 46.8484 +perf_exp_opt -16.000000 16.000000 -1 0.00508391 263 5942 64.3879 +speed up after optimization -16.000000 16.000000 0% 3% 26% 16% -27% +perf_exp -125.000000 125.000000 -1 0.00525336 356 7110 56.6294 +perf_exp_opt -125.000000 125.000000 -1 0.00509849 263 5942 71.3189 +speed up after optimization -125.000000 125.000000 0% 3% 26% 16% -21% +perf_exp -40.000000 110.000000 -1 0.00529565 356 7110 45.0987 +perf_exp_opt -40.000000 110.000000 -1 0.00510403 263 5942 98.795 +speed up after optimization -40.000000 110.000000 0% 4% 26% 16% -54% +perf_exp -55.000000 150.000000 -1 0.00527786 356 7110 52.1283 +perf_exp_opt -55.000000 150.000000 -1 0.00513758 263 5942 73.8711 +speed up after optimization -55.000000 150.000000 0% 3% 26% 16% -29% +perf_exp 0.000000 100.000000 -1 0.00530353 356 7110 45.8249 +perf_exp_opt 0.000000 100.000000 -1 0.00491183 247 5878 68.5935 +speed up after optimization 0.000000 100.000000 0% 8% 31% 17% -33% +perf_exp 0.000000 70.000000 -1 0.00527728 356 7110 60.1177 +perf_exp_opt 0.000000 70.000000 -1 0.00493341 247 5878 63.9123 +speed up after optimization 0.000000 70.000000 0% 7% 31% 17% -6% +perf_exp 260.000000 1260.000000 -1 0.00311783 356 7110 55.5039 +perf_exp_opt 260.000000 1260.000000 -1 0.00280051 200 5830 73.907 +speed up after optimization 260.000000 1260.000000 0% 11% 44% 18% -25% +perf_exp 10.000000 45.000000 -1 0.00528632 356 7110 50.5312 +perf_exp_opt 10.000000 45.000000 -1 0.00466159 188 5566 88.0591 +speed up after optimization 10.000000 45.000000 0% 13% 47% 22% -43% +perf_exp -55.000000 125.000000 -1 0.00526882 356 7110 58.3165 +perf_exp_opt -55.000000 125.000000 -1 0.00509966 263 5942 82.4976 +speed up after optimization -55.000000 125.000000 0% 3% 26% 16% -29% +perf_exp 20.000000 80.000000 -1 0.00525278 356 7110 47.6023 +perf_exp_opt 20.000000 80.000000 -1 0.00467705 188 5566 79.5849 +speed up after optimization 20.000000 80.000000 0% 12% 47% 22% -40% +perf_exp 0.000000 50.000000 -1 0.0052729 356 7110 93.1995 +perf_exp_opt 0.000000 50.000000 -1 0.0049197 247 5878 82.8763 +speed up after optimization 0.000000 50.000000 0% 7% 31% 17% 12% +perf_exp -0.200000 2.000000 -1 0.00490337 356 7110 60.3932 +perf_exp_opt -0.200000 2.000000 -1 0.0047835 273 5966 82.4083 +speed up after optimization -0.200000 2.000000 0% 3% 23% 16% -27% +perf_exp 30.000000 130.000000 -1 0.00525803 356 7110 56.3092 +perf_exp_opt 30.000000 130.000000 -1 0.00471175 188 5566 66.3511 +speed up after optimization 30.000000 130.000000 0% 12% 47% 22% -15% +perf_exp 1.000000 200.000000 -1 0.00528107 356 7110 46.9243 +perf_exp_opt 1.000000 200.000000 -1 0.00484183 214 5918 67.9836 +speed up after optimization 1.000000 200.000000 0% 9% 40% 17% -31% +perf_log -2.000000 2.000000 -1 0.00340862 413 8030 55.5639 +perf_log_opt -2.000000 2.000000 -1 0.00321933 336 6830 76.0221 +speed up after optimization -2.000000 2.000000 0% 6% 19% 15% -27% +perf_log -4.000000 4.000000 -1 0.00341124 413 8030 73.426 +perf_log_opt -4.000000 4.000000 -1 0.00320708 336 6830 76.6989 +speed up after optimization -4.000000 4.000000 0% 6% 19% 15% -4% +perf_log -8.000000 8.000000 -1 0.00342204 413 8030 54.998 +perf_log_opt -8.000000 8.000000 -1 0.00322342 336 6830 72.5264 +speed up after optimization -8.000000 8.000000 0% 6% 19% 15% -24% +perf_log -16.000000 16.000000 -1 0.0033847 413 8030 81.9909 +perf_log_opt -16.000000 16.000000 -1 0.00331558 336 6830 71.8646 +speed up after optimization -16.000000 16.000000 0% 2% 19% 15% 14% +perf_log -125.000000 125.000000 -1 0.00339258 413 8030 60.4784 +perf_log_opt -125.000000 125.000000 -1 0.00321 336 6830 81.6393 +speed up after optimization -125.000000 125.000000 0% 6% 19% 15% -26% +perf_log -40.000000 110.000000 -1 0.00427864 413 8030 50.9217 +perf_log_opt -40.000000 110.000000 -1 0.00397648 336 6830 90.017 +speed up after optimization -40.000000 110.000000 0% 8% 19% 15% -43% +perf_log -55.000000 150.000000 -1 0.00410248 413 8030 57.0691 +perf_log_opt -55.000000 150.000000 -1 0.00389073 336 6830 96.5907 +speed up after optimization -55.000000 150.000000 0% 5% 19% 15% -41% +perf_log 0.000000 100.000000 -1 0.00485554 413 8030 55.9561 +perf_log_opt 0.000000 100.000000 -1 0.00461405 336 6974 70.5315 +speed up after optimization 0.000000 100.000000 0% 5% 19% 13% -21% +perf_log 0.000000 70.000000 -1 0.00484912 413 8030 58.8897 +perf_log_opt 0.000000 70.000000 -1 0.0046088 336 6974 77.4672 +speed up after optimization 0.000000 70.000000 0% 5% 19% 13% -24% +perf_log 260.000000 1260.000000 -1 0.00485233 413 8030 48.7064 +perf_log_opt 260.000000 1260.000000 -1 0.00449038 270 6406 68.3857 +speed up after optimization 260.000000 1260.000000 0% 8% 35% 20% -29% +perf_log 10.000000 45.000000 -1 0.00485029 413 8030 79.175 +perf_log_opt 10.000000 45.000000 -1 0.00449476 270 6406 83.0578 +speed up after optimization 10.000000 45.000000 0% 8% 35% 20% -5% +perf_log -55.000000 125.000000 -1 0.00399048 413 8030 50.3369 +perf_log_opt -55.000000 125.000000 -1 0.00377319 336 6830 86.7836 +speed up after optimization -55.000000 125.000000 0% 6% 19% 15% -42% +perf_log 20.000000 80.000000 -1 0.00485846 413 8030 60.333 +perf_log_opt 20.000000 80.000000 -1 0.00452101 270 6406 72.0079 +speed up after optimization 20.000000 80.000000 0% 7% 35% 20% -16% +perf_log 0.000000 50.000000 -1 0.00485175 413 8030 52.0146 +perf_log_opt 0.000000 50.000000 -1 0.00459655 336 6974 76.9209 +speed up after optimization 0.000000 50.000000 0% 6% 19% 13% -32% +perf_log -0.200000 2.000000 -1 0.00457409 413 8030 52.8492 +perf_log_opt -0.200000 2.000000 -1 0.0043708 352 7150 69.8841 +speed up after optimization -0.200000 2.000000 0% 5% 15% 11% -24% +perf_log 30.000000 130.000000 -1 0.00485612 413 8030 67.5369 +perf_log_opt 30.000000 130.000000 -1 0.00448747 270 6406 67.8421 +speed up after optimization 30.000000 130.000000 0% 8% 35% 20% 0% +perf_log 1.000000 200.000000 -1 0.00485058 413 8030 49.6473 +perf_log_opt 1.000000 200.000000 -1 0.00448717 270 6406 71.9566 +speed up after optimization 1.000000 200.000000 0% 8% 35% 20% -31% +perf_acosh -2.000000 2.000000 -1 0.00330712 179 4424 50.3887 +perf_acosh_opt -2.000000 2.000000 -1 0.00313388 106 3528 59.0801 +speed up after optimization -2.000000 2.000000 0% 6% 41% 20% -15% +perf_acosh -4.000000 4.000000 -1 0.00353899 179 4424 40.3989 +perf_acosh_opt -4.000000 4.000000 -1 0.00347337 137 3912 85.391 +speed up after optimization -4.000000 4.000000 0% 2% 23% 12% -53% +perf_acosh -8.000000 8.000000 -1 0.00358012 179 4424 40.1185 +perf_acosh_opt -8.000000 8.000000 -1 0.00352032 137 3912 56.9483 +speed up after optimization -8.000000 8.000000 0% 2% 23% 12% -30% +perf_acosh -16.000000 16.000000 -1 0.00361249 179 4424 50.4101 +perf_acosh_opt -16.000000 16.000000 -1 0.00354541 137 3912 70.5256 +speed up after optimization -16.000000 16.000000 0% 2% 23% 12% -29% +perf_acosh -125.000000 125.000000 -1 0.00363932 179 4424 44.73 +perf_acosh_opt -125.000000 125.000000 -1 0.00356495 137 3912 59.8209 +speed up after optimization -125.000000 125.000000 0% 2% 23% 12% -25% +perf_acosh -40.000000 110.000000 -1 0.00444838 179 4424 79.8399 +perf_acosh_opt -40.000000 110.000000 -1 0.00432472 137 3912 58.2674 +speed up after optimization -40.000000 110.000000 0% 3% 23% 12% 37% +perf_acosh -55.000000 150.000000 -1 0.00442563 179 4424 70.5277 +perf_acosh_opt -55.000000 150.000000 -1 0.00432559 137 3912 59.9564 +speed up after optimization -55.000000 150.000000 0% 2% 23% 12% 18% +perf_acosh 0.000000 100.000000 -1 0.00534728 179 4424 44.5001 +perf_acosh_opt 0.000000 100.000000 -1 0.00519591 137 3912 56.8354 +speed up after optimization 0.000000 100.000000 0% 3% 23% 12% -22% +perf_acosh 0.000000 70.000000 -1 0.00529769 179 4424 44.2078 +perf_acosh_opt 0.000000 70.000000 -1 0.00520699 137 3912 81.4004 +speed up after optimization 0.000000 70.000000 0% 2% 23% 12% -46% +perf_acosh 260.000000 1260.000000 -1 0.00532161 179 4424 43.3011 +perf_acosh_opt 260.000000 1260.000000 -1 0.00479984 83 3312 63.7598 +speed up after optimization 260.000000 1260.000000 0% 11% 54% 25% -32% +perf_acosh 10.000000 45.000000 -1 0.00532804 179 4424 51.2069 +perf_acosh_opt 10.000000 45.000000 -1 0.00478672 83 3312 63.5673 +speed up after optimization 10.000000 45.000000 0% 11% 54% 25% -19% +perf_acosh -55.000000 125.000000 -1 0.00433377 179 4424 48.536 +perf_acosh_opt -55.000000 125.000000 -1 0.00421448 137 3912 65.0135 +speed up after optimization -55.000000 125.000000 0% 3% 23% 12% -25% +perf_acosh 20.000000 80.000000 -1 0.00532658 179 4424 66.8366 +perf_acosh_opt 20.000000 80.000000 -1 0.00477739 83 3312 61.8709 +speed up after optimization 20.000000 80.000000 0% 11% 54% 25% 8% +perf_acosh 0.000000 50.000000 -1 0.00529246 179 4424 47.131 +perf_acosh_opt 0.000000 50.000000 -1 0.00516004 137 3912 57.4877 +speed up after optimization 0.000000 50.000000 0% 3% 23% 12% -18% +perf_acosh -0.200000 2.000000 -1 0.00462835 179 4424 54.3767 +perf_acosh_opt -0.200000 2.000000 -1 0.00433523 106 3528 56.7893 +speed up after optimization -0.200000 2.000000 0% 7% 41% 20% -4% +perf_acosh 30.000000 130.000000 -1 0.00531666 179 4424 44.9723 +perf_acosh_opt 30.000000 130.000000 -1 0.00478759 83 3312 65.6412 +speed up after optimization 30.000000 130.000000 0% 11% 54% 25% -31% +perf_acosh 1.000000 200.000000 -1 0.00535166 179 4424 41.7003 +perf_acosh_opt 1.000000 200.000000 -1 0.00532396 122 3816 66.5477 +speed up after optimization 1.000000 200.000000 0% 1% 32% 14% -37% +perf_j0 -2.000000 2.000000 -1 0.00420194 672 12806 55.3299 +perf_j0_opt -2.000000 2.000000 -1 0.0041614 666 12710 98.0083 +speed up after optimization -2.000000 2.000000 0% 1% 1% 1% -44% +perf_j0 -4.000000 4.000000 -1 0.018248 672 12806 56.6481 +perf_j0_opt -4.000000 4.000000 -1 0.0181707 666 12710 83.5066 +speed up after optimization -4.000000 4.000000 0% 0% 1% 1% -32% +perf_j0 -8.000000 8.000000 -1 0.0256564 672 12806 57.6669 +perf_j0_opt -8.000000 8.000000 -1 0.0255748 666 12710 92.1957 +speed up after optimization -8.000000 8.000000 0% 0% 1% 1% -37% +perf_j0 -16.000000 16.000000 -1 0.0292024 672 12806 58.5595 +perf_j0_opt -16.000000 16.000000 -1 0.0289093 666 12710 89.6988 +speed up after optimization -16.000000 16.000000 0% 1% 1% 1% -35% +perf_j0 -125.000000 125.000000 -1 0.0320347 672 12806 61.861 +perf_j0_opt -125.000000 125.000000 -1 0.031762 666 12710 96.4954 +speed up after optimization -125.000000 125.000000 0% 1% 1% 1% -36% +perf_j0 -40.000000 110.000000 -1 0.0317267 672 12806 58.0168 +perf_j0_opt -40.000000 110.000000 -1 0.0314365 666 12710 91.1253 +speed up after optimization -40.000000 110.000000 0% 1% 1% 1% -36% +perf_j0 -55.000000 150.000000 -1 0.0319099 672 12806 72.5605 +perf_j0_opt -55.000000 150.000000 -1 0.0315826 666 12710 16.871 +speed up after optimization -55.000000 150.000000 0% 1% 1% 1% 330% +perf_j0 0.000000 100.000000 -1 0.0319504 672 12806 62.8608 +perf_j0_opt 0.000000 100.000000 -1 0.0336181 630 12390 98.9408 +speed up after optimization 0.000000 100.000000 0% 0% 6% 3% 330% +perf_j0 0.000000 70.000000 -1 0.0317538 672 12806 65.1974 +perf_j0_opt 0.000000 70.000000 -1 0.0311268 630 12390 80.6846 +speed up after optimization 0.000000 70.000000 0% 2% 6% 3% -19% +perf_j0 260.000000 1260.000000 -1 0.03246 672 12806 60.3438 +perf_j0_opt 260.000000 1260.000000 -1 0.0315882 510 11086 77.8234 +speed up after optimization 260.000000 1260.000000 0% 3% 24% 13% -22% +perf_j0 10.000000 45.000000 -1 0.0324477 672 12806 58.2647 +perf_j0_opt 10.000000 45.000000 -1 0.0306913 510 11086 83.4392 +speed up after optimization 10.000000 45.000000 0% 6% 24% 13% -30% +perf_j0 -55.000000 125.000000 -1 0.0318752 672 12806 55.1542 +perf_j0_opt -55.000000 125.000000 -1 0.0315561 666 12710 81.3201 +speed up after optimization -55.000000 125.000000 0% 1% 1% 1% -32% +perf_j0 20.000000 80.000000 -1 0.0323972 672 12806 58.8267 +perf_j0_opt 20.000000 80.000000 -1 0.0306989 510 11086 85.4219 +speed up after optimization 20.000000 80.000000 0% 6% 24% 13% -31% +perf_j0 0.000000 50.000000 -1 0.0314514 672 12806 53.6201 +perf_j0_opt 0.000000 50.000000 -1 0.0310839 630 12390 99.6094 +speed up after optimization 0.000000 50.000000 0% 1% 6% 3% -46% +perf_j0 -0.200000 2.000000 -1 0.00420077 672 12806 67.4524 +perf_j0_opt -0.200000 2.000000 -1 0.00415877 666 12710 84.5473 +speed up after optimization -0.200000 2.000000 0% 1% 1% 1% -20% +perf_j0 30.000000 130.000000 -1 0.0324037 672 12806 61.1679 +perf_j0_opt 30.000000 130.000000 -1 0.0307044 510 11086 81.0235 +speed up after optimization 30.000000 130.000000 0% 6% 24% 13% -25% +perf_j0 1.000000 200.000000 -1 0.0326405 672 12806 84.6439 +perf_j0_opt 1.000000 200.000000 -1 0.0316121 573 12014 83.7322 +speed up after optimization 1.000000 200.000000 0% 3% 15% 6% 1% +perf_y0 -2.000000 2.000000 -1 0.00641767 938 16974 64.5634 +perf_y0_opt -2.000000 2.000000 -1 0.00638355 922 16782 9.38077 +speed up after optimization -2.000000 2.000000 0% 1% 2% 1% 588% +perf_y0 -4.000000 4.000000 -1 0.011725 938 16974 68.9358 +perf_y0_opt -4.000000 4.000000 -1 0.011657 922 16782 101.059 +speed up after optimization -4.000000 4.000000 0% 1% 2% 1% -32% +perf_y0 -8.000000 8.000000 -1 0.0144013 938 16974 68.8102 +perf_y0_opt -8.000000 8.000000 -1 0.0143321 922 16782 104.33 +speed up after optimization -8.000000 8.000000 0% 0% 2% 1% -34% +perf_y0 -16.000000 16.000000 -1 0.0156315 938 16974 65.8035 +perf_y0_opt -16.000000 16.000000 -1 0.0155542 922 16782 97.7837 +speed up after optimization -16.000000 16.000000 0% 0% 2% 1% -33% +perf_y0 -125.000000 125.000000 -1 0.0165642 938 16974 75.9334 +perf_y0_opt -125.000000 125.000000 -1 0.016498 922 16782 97.1625 +speed up after optimization -125.000000 125.000000 0% 0% 2% 1% -22% +perf_y0 -40.000000 110.000000 -1 0.0233374 938 16974 64.8757 +perf_y0_opt -40.000000 110.000000 -1 0.0232607 922 16782 11.421 +speed up after optimization -40.000000 110.000000 0% 0% 2% 1% 468% +perf_y0 -55.000000 150.000000 -1 0.023581 938 16974 81.5739 +perf_y0_opt -55.000000 150.000000 -1 0.0233642 922 16782 22.9922 +speed up after optimization -55.000000 150.000000 0% 1% 2% 1% 255% +perf_y0 0.000000 100.000000 -1 0.0309792 938 16974 68.5489 +perf_y0_opt 0.000000 100.000000 -1 0.0306137 889 16478 13.9858 +speed up after optimization 0.000000 100.000000 0% 1% 5% 3% 390% +perf_y0 0.000000 70.000000 -1 0.030887 938 16974 72.4524 +perf_y0_opt 0.000000 70.000000 -1 0.030486 889 16478 96.5727 +speed up after optimization 0.000000 70.000000 0% 1% 5% 3% -25% +perf_y0 260.000000 1260.000000 -1 0.0313149 938 16974 88.4888 +perf_y0_opt 260.000000 1260.000000 -1 0.0305971 789 15006 97.4049 +speed up after optimization 260.000000 1260.000000 0% 2% 16% 12% -9% +perf_y0 10.000000 45.000000 -1 0.0314213 938 16974 66.8431 +perf_y0_opt 10.000000 45.000000 -1 0.0305965 789 15006 91.9116 +speed up after optimization 10.000000 45.000000 0% 3% 16% 12% -27% +perf_y0 -55.000000 125.000000 -1 0.0221635 938 16974 75.4482 +perf_y0_opt -55.000000 125.000000 -1 0.022132 922 16782 105.394 +speed up after optimization -55.000000 125.000000 0% 0% 2% 1% -28% +perf_y0 20.000000 80.000000 -1 0.0312676 938 16974 68.7983 +perf_y0_opt 20.000000 80.000000 -1 0.0305822 789 15006 104.449 +speed up after optimization 20.000000 80.000000 0% 2% 16% 12% -34% +perf_y0 0.000000 50.000000 -1 0.0306686 938 16974 69.3657 +perf_y0_opt 0.000000 50.000000 -1 0.0302964 889 16478 103.876 +speed up after optimization 0.000000 50.000000 0% 1% 5% 3% -33% +perf_y0 -0.200000 2.000000 -1 0.0101191 938 16974 83.1626 +perf_y0_opt -0.200000 2.000000 -1 0.0101028 936 16950 100.17 +speed up after optimization -0.200000 2.000000 0% 0% 0% 0% -17% +perf_y0 30.000000 130.000000 -1 0.0312904 938 16974 67.3162 +perf_y0_opt 30.000000 130.000000 -1 0.0305956 789 15006 103.896 +speed up after optimization 30.000000 130.000000 0% 2% 16% 12% -35% +perf_y0 1.000000 200.000000 -1 0.0312382 938 16974 70.7459 +perf_y0_opt 1.000000 200.000000 -1 0.0307114 856 16270 102.197 +speed up after optimization 1.000000 200.000000 0% 2% 9% 4% -31% +perf_rem_pio2 -2.000000 2.000000 -1 0.00189491 2036 19234 71.1152 +perf_rem_pio2_opt -2.000000 2.000000 -1 0.00179137 1161 14010 51.731 +speed up after optimization -2.000000 2.000000 0% 6% 43% 27% 37% +perf_rem_pio2 -4.000000 4.000000 -1 0.00254998 2036 19234 59.0818 +perf_rem_pio2_opt -4.000000 4.000000 -1 0.00244994 1268 15034 42.6252 +speed up after optimization -4.000000 4.000000 0% 4% 38% 22% 39% +perf_rem_pio2 -8.000000 8.000000 -1 0.00284689 2036 19234 81.5352 +perf_rem_pio2_opt -8.000000 8.000000 -1 0.00281451 1268 15034 33.7704 +speed up after optimization -8.000000 8.000000 0% 1% 38% 22% 141% +perf_rem_pio2 -16.000000 16.000000 -1 0.00296676 2036 19234 63.4337 +perf_rem_pio2_opt -16.000000 16.000000 -1 0.00292768 1268 15034 30.9702 +speed up after optimization -16.000000 16.000000 0% 1% 38% 22% 105% +perf_rem_pio2 -125.000000 125.000000 -1 0.00319571 2036 19234 73.6002 +perf_rem_pio2_opt -125.000000 125.000000 -1 0.0031088 1274 15026 37.2079 +speed up after optimization -125.000000 125.000000 0% 3% 37% 22% 98% +perf_rem_pio2 -40.000000 110.000000 -1 0.00309684 2036 19234 82.7373 +perf_rem_pio2_opt -40.000000 110.000000 -1 0.00298105 1274 15026 68.982 +speed up after optimization -40.000000 110.000000 0% 4% 37% 22% 20% +perf_rem_pio2 -55.000000 150.000000 -1 0.00306213 2036 19234 65.5023 +perf_rem_pio2_opt -55.000000 150.000000 -1 0.00303442 1274 15026 43.353 +speed up after optimization -55.000000 150.000000 0% 1% 37% 22% 51% +perf_rem_pio2 0.000000 100.000000 -1 0.00292913 2036 19234 64.1709 +perf_rem_pio2_opt 0.000000 100.000000 -1 0.00259314 1149 14114 35.6442 +speed up after optimization 0.000000 100.000000 0% 13% 44% 27% 80% +perf_rem_pio2 0.000000 70.000000 -1 0.00287576 2036 19234 68.3815 +perf_rem_pio2_opt 0.000000 70.000000 -1 0.00255377 1149 14114 26.7769 +speed up after optimization 0.000000 70.000000 0% 13% 44% 27% 155% +perf_rem_pio2 260.000000 1260.000000 -1 0.00287605 2036 19234 57.3938 +perf_rem_pio2_opt 260.000000 1260.000000 -1 0.00206291 906 11258 11.6465 +speed up after optimization 260.000000 1260.000000 0% 39% 56% 41% 393% +perf_rem_pio2 10.000000 45.000000 -1 0.0028078 2036 19234 61.679 +perf_rem_pio2_opt 10.000000 45.000000 -1 0.00206991 898 11426 36.2116 +speed up after optimization 10.000000 45.000000 0% 36% 56% 41% 70% +perf_rem_pio2 -55.000000 125.000000 -1 0.00309626 2036 19234 54.1869 +perf_rem_pio2_opt -55.000000 125.000000 -1 0.00300642 1274 15026 44.3535 +speed up after optimization -55.000000 125.000000 0% 3% 37% 22% 22% +perf_rem_pio2 20.000000 80.000000 -1 0.00292505 2036 19234 54.1094 +perf_rem_pio2_opt 20.000000 80.000000 -1 0.00207457 898 11426 22.4418 +speed up after optimization 20.000000 80.000000 0% 41% 56% 41% 141% +perf_rem_pio2 0.000000 50.000000 -1 0.00284426 2036 19234 51.6562 +perf_rem_pio2_opt 0.000000 50.000000 -1 0.00243477 1149 14114 29.2771 +speed up after optimization 0.000000 50.000000 0% 17% 44% 27% 76% +perf_rem_pio2 -0.200000 2.000000 -1 0.00177883 2036 19234 53.6316 +perf_rem_pio2_opt -0.200000 2.000000 -1 0.00166858 1161 14010 27.0668 +speed up after optimization -0.200000 2.000000 0% 7% 43% 27% 98% +perf_rem_pio2 30.000000 130.000000 -1 0.00289938 2036 19234 62.697 +perf_rem_pio2_opt 30.000000 130.000000 -1 0.0020422 898 11426 20.0622 +speed up after optimization 30.000000 130.000000 0% 42% 56% 41% 213% +perf_rem_pio2 1.000000 200.000000 -1 0.00290697 2036 19234 59.9646 +perf_rem_pio2_opt 1.000000 200.000000 -1 0.00223382 962 11994 31.6586 +speed up after optimization 1.000000 200.000000 0% 30% 53% 38% 89% +perf_sincosf -2.000000 2.000000 -1 0.00514574 614 13152 57.757 +perf_sincosf_opt -2.000000 2.000000 -1 0.00398319 440 11256 103.385 +speed up after optimization -2.000000 2.000000 0% 29% 28% 14% -44% +perf_sincosf -4.000000 4.000000 -1 0.00533299 614 13152 66.8662 +perf_sincosf_opt -4.000000 4.000000 -1 0.00416227 440 11256 100.039 +speed up after optimization -4.000000 4.000000 0% 28% 28% 14% -33% +perf_sincosf -8.000000 8.000000 -1 0.00541115 614 13152 58.0699 +perf_sincosf_opt -8.000000 8.000000 -1 0.00425268 440 11256 97.4972 +speed up after optimization -8.000000 8.000000 0% 27% 28% 14% -40% +perf_sincosf -16.000000 16.000000 -1 0.0054552 614 13152 86.2585 +perf_sincosf_opt -16.000000 16.000000 -1 0.00429906 440 11256 100.684 +speed up after optimization -16.000000 16.000000 0% 27% 28% 14% -14% +perf_sincosf -125.000000 125.000000 -1 0.00557769 614 13152 59.3692 +perf_sincosf_opt -125.000000 125.000000 -1 0.00433814 580 12832 103.527 +speed up after optimization -125.000000 125.000000 0% 29% 6% 2% -43% +perf_sincosf -40.000000 110.000000 -1 0.0054864 614 13152 78.7387 +perf_sincosf_opt -40.000000 110.000000 -1 0.00434864 440 11256 97.2297 +speed up after optimization -40.000000 110.000000 0% 26% 28% 14% -19% +perf_sincosf -55.000000 150.000000 -1 0.00573344 614 13152 67.6164 +perf_sincosf_opt -55.000000 150.000000 -1 0.00442768 580 12832 103.663 +speed up after optimization -55.000000 150.000000 0% 29% 6% 2% -35% +perf_sincosf 0.000000 100.000000 -1 0.00551469 614 13152 71.5489 +perf_sincosf_opt 0.000000 100.000000 -1 0.00434426 440 11256 100.071 +speed up after optimization 0.000000 100.000000 0% 27% 28% 14% -29% +perf_sincosf 0.000000 70.000000 -1 0.00550857 614 13152 77.5803 +perf_sincosf_opt 0.000000 70.000000 -1 0.00431043 440 11256 100.496 +speed up after optimization 0.000000 70.000000 0% 28% 28% 14% -23% +perf_sincosf 260.000000 1260.000000 -1 0.00675395 614 13152 64.3929 +perf_sincosf_opt 260.000000 1260.000000 -1 0.00386215 387 9984 88.7508 +speed up after optimization 260.000000 1260.000000 0% 75% 37% 24% -27% +perf_sincosf 10.000000 45.000000 -1 0.00552665 614 13152 74.3139 +perf_sincosf_opt 10.000000 45.000000 -1 0.00350837 329 9384 91.3322 +speed up after optimization 10.000000 45.000000 0% 58% 46% 29% -19% +perf_sincosf -55.000000 125.000000 -1 0.00558527 614 13152 65.7412 +perf_sincosf_opt -55.000000 125.000000 -1 0.00459188 580 12832 104.207 +speed up after optimization -55.000000 125.000000 0% 22% 6% 2% -37% +perf_sincosf 20.000000 80.000000 -1 0.00550682 614 13152 75.9047 +perf_sincosf_opt 20.000000 80.000000 -1 0.00349729 329 9384 95.4578 +speed up after optimization 20.000000 80.000000 0% 57% 46% 29% -20% +perf_sincosf 0.000000 50.000000 -1 0.00549165 614 13152 58.0776 +perf_sincosf_opt 0.000000 50.000000 -1 0.00431481 440 11256 12.7752 +speed up after optimization 0.000000 50.000000 0% 27% 28% 14% 355% +perf_sincosf -0.200000 2.000000 -1 0.00504483 614 13152 65.7001 +perf_sincosf_opt -0.200000 2.000000 -1 0.00393419 440 11256 91.0493 +speed up after optimization -0.200000 2.000000 0% 28% 28% 14% -28% +perf_sincosf 30.000000 130.000000 -1 0.00568415 614 13152 62.0761 +perf_sincosf_opt 30.000000 130.000000 -1 0.00418035 513 11856 95.1437 +speed up after optimization 30.000000 130.000000 0% 36% 16% 10% -35% +perf_sincosf 1.000000 200.000000 -1 0.00610764 614 13152 75.5469 +perf_sincosf_opt 1.000000 200.000000 -1 0.00444343 513 11856 98.8699 +speed up after optimization 1.000000 200.000000 0% 37% 16% 10% -24% +perf_float64_add -2.000000 2.000000 -1 0.00375628 1264 21904 80.5735 +perf_float64_add_opt -2.000000 2.000000 -1 0.00305017 1113 20384 44.4008 +speed up after optimization -2.000000 2.000000 0% 23% 12% 7% 81% +perf_float64_add -4.000000 4.000000 -1 0.00377815 1264 21904 80.0303 +perf_float64_add_opt -4.000000 4.000000 -1 0.00301838 1113 20384 44.8801 +speed up after optimization -4.000000 4.000000 0% 25% 12% 7% 78% +perf_float64_add -8.000000 8.000000 -1 0.00376678 1264 21904 87.1658 +perf_float64_add_opt -8.000000 8.000000 -1 0.00317879 1113 20384 52.135 +speed up after optimization -8.000000 8.000000 0% 18% 12% 7% 67% +perf_float64_add -16.000000 16.000000 -1 0.00376911 1264 21904 97.0271 +perf_float64_add_opt -16.000000 16.000000 -1 0.00305776 1113 20384 74.3579 +speed up after optimization -16.000000 16.000000 0% 23% 12% 7% 30% +perf_float64_add -125.000000 125.000000 -1 0.00378195 1264 21904 88.5859 +perf_float64_add_opt -125.000000 125.000000 -1 0.00302538 1113 20384 64.6369 +speed up after optimization -125.000000 125.000000 0% 25% 12% 7% 37% +perf_float64_add -40.000000 110.000000 -1 0.00374724 1264 21904 85.0971 +perf_float64_add_opt -40.000000 110.000000 -1 0.00302246 1113 20384 59.3649 +speed up after optimization -40.000000 110.000000 0% 24% 12% 7% 43% +perf_float64_add -55.000000 150.000000 -1 0.00377086 1264 21904 78.445 +perf_float64_add_opt -55.000000 150.000000 -1 0.00301576 1113 20384 44.625 +speed up after optimization -55.000000 150.000000 0% 25% 12% 7% 76% +perf_float64_add 0.000000 100.000000 -1 0.0037522 1264 21904 92.1216 +perf_float64_add_opt 0.000000 100.000000 -1 0.00301576 1111 20424 12.4105 +speed up after optimization 0.000000 100.000000 0% 24% 12% 7% 642% +perf_float64_add 0.000000 70.000000 -1 0.00377203 1264 21904 88.3146 +perf_float64_add_opt 0.000000 70.000000 -1 0.00303646 1111 20424 29.908 +speed up after optimization 0.000000 70.000000 0% 24% 12% 7% 195% +perf_float64_add 260.000000 1260.000000 -1 0.00373586 1264 21904 80.2404 +perf_float64_add_opt 260.000000 1260.000000 -1 0.00302363 1111 20424 21.9625 +speed up after optimization 260.000000 1260.000000 0% 24% 12% 7% 265% +perf_float64_add 10.000000 45.000000 -1 0.00376853 1264 21904 13.7028 +perf_float64_add_opt 10.000000 45.000000 -1 0.00305076 1111 20424 19.2322 +speed up after optimization 10.000000 45.000000 0% 24% 12% 7% -29% +perf_float64_add -55.000000 125.000000 -1 0.00376299 1264 21904 79.0181 +perf_float64_add_opt -55.000000 125.000000 -1 0.0030213 1113 20384 63.5966 +speed up after optimization -55.000000 125.000000 0% 25% 12% 7% 24% +perf_float64_add 20.000000 80.000000 -1 0.00374636 1264 21904 83.169 +perf_float64_add_opt 20.000000 80.000000 -1 0.00302217 1111 20424 19.1877 +speed up after optimization 20.000000 80.000000 0% 24% 12% 7% 333% +perf_float64_add 0.000000 50.000000 -1 0.00378195 1264 21904 87.4471 +perf_float64_add_opt 0.000000 50.000000 -1 0.00302538 1111 20424 18.8555 +speed up after optimization 0.000000 50.000000 0% 25% 12% 7% 364% +perf_float64_add -0.200000 2.000000 -1 0.00374024 1264 21904 84.4192 +perf_float64_add_opt -0.200000 2.000000 -1 0.00300905 1112 20408 25.376 +speed up after optimization -0.200000 2.000000 0% 24% 12% 7% 233% +perf_float64_add 30.000000 130.000000 -1 0.00376678 1264 21904 100.715 +perf_float64_add_opt 30.000000 130.000000 -1 0.00301838 1111 20424 24.9557 +speed up after optimization 30.000000 130.000000 0% 25% 12% 7% 304% +perf_float64_add 1.000000 200.000000 -1 0.00376678 1264 21904 91.1112 +perf_float64_add_opt 1.000000 200.000000 -1 0.00302451 1111 20424 28.6966 +speed up after optimization 1.000000 200.000000 0% 25% 12% 7% 217% +perf_float64_div -2.000000 2.000000 -1 0.00310617 1717 29710 97.1812 +perf_float64_div_opt -2.000000 2.000000 -1 0.00304463 1176 23478 71.3947 +speed up after optimization -2.000000 2.000000 0% 2% 32% 21% 36% +perf_float64_div -4.000000 4.000000 -1 0.00359441 1717 29710 18.2675 +perf_float64_div_opt -4.000000 4.000000 -1 0.00337712 1176 23478 107.904 +speed up after optimization -4.000000 4.000000 0% 6% 32% 21% -83% +perf_float64_div -8.000000 8.000000 -1 0.00382365 1717 29710 99.145 +perf_float64_div_opt -8.000000 8.000000 -1 0.00564623 1176 23478 91.8086 +speed up after optimization -8.000000 8.000000 0% 0% 32% 21% -83% +perf_float64_div -16.000000 16.000000 -1 0.00397794 1717 29710 107.144 +perf_float64_div_opt -16.000000 16.000000 -1 0.00357137 1176 23478 72.1419 +speed up after optimization -16.000000 16.000000 0% 11% 32% 21% 49% +perf_float64_div -125.000000 125.000000 -1 0.00399311 1717 29710 107.928 +perf_float64_div_opt -125.000000 125.000000 -1 0.00343312 1176 23478 76.5478 +speed up after optimization -125.000000 125.000000 0% 16% 32% 21% 41% +perf_float64_div -40.000000 110.000000 -1 0.00545519 1717 29710 107.433 +perf_float64_div_opt -40.000000 110.000000 -1 0.00440668 1176 23478 71.9203 +speed up after optimization -40.000000 110.000000 0% 24% 32% 21% 49% +perf_float64_div -55.000000 150.000000 -1 0.00542749 1717 29710 17.5225 +perf_float64_div_opt -55.000000 150.000000 -1 0.00420281 1198 23854 94.3591 +speed up after optimization -55.000000 150.000000 0% 29% 30% 20% -81% +perf_float64_div 0.000000 100.000000 -1 0.00764672 1717 29710 11.7355 +perf_float64_div_opt 0.000000 100.000000 -1 0.00570339 1142 22886 77.0772 +speed up after optimization 0.000000 100.000000 0% 34% 33% 23% -85% +perf_float64_div 0.000000 70.000000 -1 0.00766685 1717 29710 19.3986 +perf_float64_div_opt 0.000000 70.000000 -1 0.00568998 1142 22886 71.2487 +speed up after optimization 0.000000 70.000000 0% 35% 33% 23% -73% +perf_float64_div 260.000000 1260.000000 -1 0.00752481 1717 29710 15.4851 +perf_float64_div_opt 260.000000 1260.000000 -1 0.00623422 1171 23854 79.5488 +speed up after optimization 260.000000 1260.000000 0% 21% 32% 20% -81% +perf_float64_div 10.000000 45.000000 -1 0.00771497 1717 29710 106.153 +perf_float64_div_opt 10.000000 45.000000 -1 0.00567569 1142 22886 84.1894 +speed up after optimization 10.000000 45.000000 0% 36% 33% 23% 26% +perf_float64_div -55.000000 125.000000 -1 0.00517432 1717 29710 101.536 +perf_float64_div_opt -55.000000 125.000000 -1 0.00425502 1176 23478 78.1505 +speed up after optimization -55.000000 125.000000 0% 22% 32% 21% 30% +perf_float64_div 20.000000 80.000000 -1 0.0076368 1717 29710 100.157 +perf_float64_div_opt 20.000000 80.000000 -1 0.00568444 1142 22886 65.0455 +speed up after optimization 20.000000 80.000000 0% 34% 33% 23% 54% +perf_float64_div 0.000000 50.000000 -1 0.00765868 1717 29710 14.753 +perf_float64_div_opt 0.000000 50.000000 -1 0.00569114 1142 22886 69.7102 +speed up after optimization 0.000000 50.000000 0% 35% 33% 23% -79% +perf_float64_div -0.200000 2.000000 -1 0.00462717 1717 29710 10.3708 +perf_float64_div_opt -0.200000 2.000000 -1 0.0053744 1142 22870 95.6053 +speed up after optimization -0.200000 2.000000 0% 0% 33% 23% -79% +perf_float64_div 30.000000 130.000000 -1 0.00757643 1717 29710 104.466 +perf_float64_div_opt 30.000000 130.000000 -1 0.00567948 1142 22886 74.5501 +speed up after optimization 30.000000 130.000000 0% 33% 33% 23% 40% +perf_float64_div 1.000000 200.000000 -1 0.00761318 1717 29710 9.19747 +perf_float64_div_opt 1.000000 200.000000 -1 0.00525862 1142 22886 85.6298 +speed up after optimization 1.000000 200.000000 0% 45% 33% 23% -89% +perf_float64_mul -2.000000 2.000000 -1 0.0026649 1307 22968 101.778 +perf_float64_mul_opt -2.000000 2.000000 -1 0.00212533 973 20136 63.9451 +speed up after optimization -2.000000 2.000000 0% 25% 26% 12% 59% +perf_float64_mul -4.000000 4.000000 -1 0.00329868 1307 22968 96.8709 +perf_float64_mul_opt -4.000000 4.000000 -1 0.00266052 973 20136 29.6131 +speed up after optimization -4.000000 4.000000 0% 24% 26% 12% 227% +perf_float64_mul -8.000000 8.000000 -1 0.00363992 1307 22968 83.347 +perf_float64_mul_opt -8.000000 8.000000 -1 0.0029446 973 20136 37.1563 +speed up after optimization -8.000000 8.000000 0% 24% 26% 12% 124% +perf_float64_mul -16.000000 16.000000 -1 0.00379596 1307 22968 76.3828 +perf_float64_mul_opt -16.000000 16.000000 -1 0.00311405 973 20136 33.1222 +speed up after optimization -16.000000 16.000000 0% 22% 26% 12% 131% +perf_float64_mul -125.000000 125.000000 -1 0.0039797 1307 22968 99.3285 +perf_float64_mul_opt -125.000000 125.000000 -1 0.00325463 973 20136 28.6396 +speed up after optimization -125.000000 125.000000 0% 22% 26% 12% 247% +perf_float64_mul -40.000000 110.000000 -1 0.00576174 1307 22968 96.119 +perf_float64_mul_opt -40.000000 110.000000 -1 0.00475493 973 20136 41.1219 +speed up after optimization -40.000000 110.000000 0% 21% 26% 12% 134% +perf_float64_mul -55.000000 150.000000 -1 0.00577603 1307 22968 77.5838 +perf_float64_mul_opt -55.000000 150.000000 -1 0.00475727 973 20136 30.7856 +speed up after optimization -55.000000 150.000000 0% 21% 26% 12% 152% +perf_float64_mul 0.000000 100.000000 -1 0.00834439 1307 22968 87.6414 +perf_float64_mul_opt 0.000000 100.000000 -1 0.00681784 971 20160 40.7015 +speed up after optimization 0.000000 100.000000 0% 22% 26% 12% 115% +perf_float64_mul 0.000000 70.000000 -1 0.00837326 1307 22968 79.745 +perf_float64_mul_opt 0.000000 70.000000 -1 0.00678576 971 20160 27.6325 +speed up after optimization 0.000000 70.000000 0% 23% 26% 12% 189% +perf_float64_mul 260.000000 1260.000000 -1 0.0084278 1307 22968 90.7037 +perf_float64_mul_opt 260.000000 1260.000000 -1 0.00672976 948 20016 35.6668 +speed up after optimization 260.000000 1260.000000 0% 25% 27% 13% 154% +perf_float64_mul 10.000000 45.000000 -1 0.00839368 1307 22968 84.2465 +perf_float64_mul_opt 10.000000 45.000000 -1 0.00672888 948 20016 30.0022 +speed up after optimization 10.000000 45.000000 0% 25% 27% 13% 181% +perf_float64_mul -55.000000 125.000000 -1 0.005438 1307 22968 76.6602 +perf_float64_mul_opt -55.000000 125.000000 -1 0.00449623 973 20136 33.6959 +speed up after optimization -55.000000 125.000000 0% 21% 26% 12% 128% +perf_float64_mul 20.000000 80.000000 -1 0.00840534 1307 22968 76.3222 +perf_float64_mul_opt 20.000000 80.000000 -1 0.0067563 948 20016 38.5177 +speed up after optimization 20.000000 80.000000 0% 24% 27% 13% 98% +perf_float64_mul 0.000000 50.000000 -1 0.00821751 1307 22968 87.3592 +perf_float64_mul_opt 0.000000 50.000000 -1 0.00672538 971 20160 24.1762 +speed up after optimization 0.000000 50.000000 0% 22% 26% 12% 261% +perf_float64_mul -0.200000 2.000000 -1 0.00416928 1307 22968 79.5154 +perf_float64_mul_opt -0.200000 2.000000 -1 0.00339259 973 20168 24.8689 +speed up after optimization -0.200000 2.000000 0% 23% 26% 12% 220% +perf_float64_mul 30.000000 130.000000 -1 0.00842663 1307 22968 76.9548 +perf_float64_mul_opt 30.000000 130.000000 -1 0.0067458 948 20016 32.4625 +speed up after optimization 30.000000 130.000000 0% 25% 27% 13% 137% +perf_float64_mul 1.000000 200.000000 -1 0.00843159 1307 22968 81.0339 +perf_float64_mul_opt 1.000000 200.000000 -1 0.00676738 948 20016 54.8219 +speed up after optimization 1.000000 200.000000 0% 25% 27% 13% 48% diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf/e_j0_size_reduction_comparison.png b/applications/newton/llvm-ir/performance_test/firefly_perf/e_j0_size_reduction_comparison.png new file mode 100644 index 000000000..cccfcba9e Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf/e_j0_size_reduction_comparison.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf/e_j0_speedup_comparison.png b/applications/newton/llvm-ir/performance_test/firefly_perf/e_j0_speedup_comparison.png new file mode 100644 index 000000000..16f94972a Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf/e_j0_speedup_comparison.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf/e_y0_size_reduction_comparison.png b/applications/newton/llvm-ir/performance_test/firefly_perf/e_y0_size_reduction_comparison.png new file mode 100644 index 000000000..8459347eb Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf/e_y0_size_reduction_comparison.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf/e_y0_speedup_comparison.png b/applications/newton/llvm-ir/performance_test/firefly_perf/e_y0_speedup_comparison.png new file mode 100644 index 000000000..0a95c528f Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf/e_y0_speedup_comparison.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf/perf_quant.log b/applications/newton/llvm-ir/performance_test/firefly_perf/perf_quant.log new file mode 100644 index 000000000..2133a5cbd --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/firefly_perf/perf_quant.log @@ -0,0 +1,103 @@ +test case param precision_bits instruction count time consumption ir lines library size compile time +perf_j0 -2.000000 2.000000 12 -1 0.00419785 672 12806 8.47906 +perf_j0_opt -2.000000 2.000000 12 -1 0.00414156 553 9086 8.87451 +speed up after optimization -2.000000 2.000000 0% 1% 21% 40% -4% +perf_j0 -4.000000 4.000000 12 -1 0.0182643 672 12806 8.60355 +perf_j0_opt -4.000000 4.000000 12 -1 0.0162081 553 9070 8.77285 +speed up after optimization -4.000000 4.000000 0% 13% 21% 41% -2% +perf_j0 -8.000000 8.000000 12 -1 0.0256526 672 12806 8.6312 +perf_j0_opt -8.000000 8.000000 12 -1 0.0223749 553 9062 9.12953 +speed up after optimization -8.000000 8.000000 0% 15% 21% 41% -5% +perf_j0 -16.000000 16.000000 12 -1 0.0292059 672 12806 8.71164 +perf_j0_opt -16.000000 16.000000 12 -1 0.025191 553 9054 8.77293 +speed up after optimization -16.000000 16.000000 0% 16% 21% 41% -1% +perf_j0 -125.000000 125.000000 16 -1 0.0320668 672 12806 8.52784 +perf_j0_opt -125.000000 125.000000 16 -1 0.02817 553 9070 9.18924 +speed up after optimization -125.000000 125.000000 0% 14% 21% 41% -7% +perf_j0 -40.000000 110.000000 10 -1 0.0317711 672 12806 8.5338 +perf_j0_opt -40.000000 110.000000 10 -1 0.0317366 551 8990 11.0385 +speed up after optimization -40.000000 110.000000 0% 0% 21% 42% -23% +perf_j0 -55.000000 150.000000 11 -1 0.0320452 672 12806 8.68425 +perf_j0_opt -55.000000 150.000000 11 -1 0.0275624 553 9014 9.18352 +speed up after optimization -55.000000 150.000000 0% 16% 21% 42% -5% +perf_j0 0.000000 100.000000 10 -1 0.0330468 672 12806 8.47503 +perf_j0_opt 0.000000 100.000000 10 -1 0.0272468 517 8694 8.85619 +speed up after optimization 0.000000 100.000000 0% 21% 29% 47% -4% +perf_j0 0.000000 70.000000 10 -1 0.0317489 672 12806 8.50697 +perf_j0_opt 0.000000 70.000000 10 -1 0.0270736 517 8694 8.86639 +speed up after optimization 0.000000 70.000000 0% 17% 29% 47% -4% +perf_j0 260.000000 1260.000000 14 -1 0.0324252 672 12806 8.51517 +perf_j0_opt 260.000000 1260.000000 14 -1 0.0275411 397 7414 9.35105 +speed up after optimization 260.000000 1260.000000 0% 18% 69% 72% -9% +perf_j0 10.000000 45.000000 12 -1 0.0324191 672 12806 8.48592 +perf_j0_opt 10.000000 45.000000 12 -1 0.0276295 397 7430 8.8447 +speed up after optimization 10.000000 45.000000 0% 17% 69% 72% -4% +perf_j0 -55.000000 125.000000 11 -1 0.031883 672 12806 8.67599 +perf_j0_opt -55.000000 125.000000 11 -1 0.0275522 553 9014 9.54866 +speed up after optimization -55.000000 125.000000 0% 16% 21% 42% -9% +perf_j0 20.000000 80.000000 8 -1 0.0324229 672 12806 8.56161 +perf_j0_opt 20.000000 80.000000 8 -1 0.027502 395 7366 9.1907 +speed up after optimization 20.000000 80.000000 0% 18% 70% 73% -7% +perf_j0 0.000000 50.000000 8 -1 0.0314762 672 12806 8.53386 +perf_j0_opt 0.000000 50.000000 8 -1 0.0267758 515 8678 9.13387 +speed up after optimization 0.000000 50.000000 0% 18% 30% 47% -7% +perf_j0 -0.200000 2.000000 14 -1 0.00418735 672 12806 8.51811 +perf_j0_opt -0.200000 2.000000 14 -1 0.00417219 553 9118 9.32047 +speed up after optimization -0.200000 2.000000 0% 0% 21% 40% -9% +perf_j0 30.000000 130.000000 8 -1 0.0324139 672 12806 8.51825 +perf_j0_opt 30.000000 130.000000 8 -1 0.0273428 393 7342 8.94168 +speed up after optimization 30.000000 130.000000 0% 19% 70% 74% -5% +perf_j0 1.000000 200.000000 10 -1 0.0323296 672 12806 8.51264 +perf_j0_opt 1.000000 200.000000 10 -1 0.0273609 458 8286 9.26989 +speed up after optimization 1.000000 200.000000 0% 18% 46% 54% -8% +perf_y0 -2.000000 2.000000 12 -1 0.0064308 938 16974 8.66754 +perf_y0_opt -2.000000 2.000000 12 -1 0.00641738 809 13166 9.19182 +speed up after optimization -2.000000 2.000000 0% 0% 15% 28% -6% +perf_y0 -4.000000 4.000000 12 -1 0.0117311 938 16974 8.93471 +perf_y0_opt -4.000000 4.000000 12 -1 0.0110676 809 13142 9.85309 +speed up after optimization -4.000000 4.000000 0% 6% 15% 29% -9% +perf_y0 -8.000000 8.000000 12 -1 0.0149058 938 16974 8.81807 +perf_y0_opt -8.000000 8.000000 12 -1 0.0130485 809 13134 9.12111 +speed up after optimization -8.000000 8.000000 0% 14% 15% 29% -3% +perf_y0 -16.000000 16.000000 12 -1 0.0156341 938 16974 8.99968 +perf_y0_opt -16.000000 16.000000 12 -1 0.0140399 809 13118 9.26084 +speed up after optimization -16.000000 16.000000 0% 11% 15% 29% -3% +perf_y0 -125.000000 125.000000 16 -1 0.0165835 938 16974 8.58749 +perf_y0_opt -125.000000 125.000000 16 -1 0.0151456 809 13142 9.16466 +speed up after optimization -125.000000 125.000000 0% 9% 15% 29% -6% +perf_y0 -40.000000 110.000000 10 -1 0.0233336 938 16974 8.60218 +perf_y0_opt -40.000000 110.000000 10 -1 0.0208857 807 13070 9.63137 +speed up after optimization -40.000000 110.000000 0% 12% 16% 29% -11% +perf_y0 -55.000000 150.000000 11 -1 0.0233398 938 16974 8.62408 +perf_y0_opt -55.000000 150.000000 11 -1 0.0211171 809 13078 9.74726 +speed up after optimization -55.000000 150.000000 0% 11% 15% 29% -12% +perf_y0 0.000000 100.000000 10 -1 0.0310157 938 16974 8.62416 +perf_y0_opt 0.000000 100.000000 10 -1 0.0274478 776 12774 8.86065 +speed up after optimization 0.000000 100.000000 0% 13% 20% 32% -3% +perf_y0 0.000000 70.000000 10 -1 0.030885 938 16974 8.53802 +perf_y0_opt 0.000000 70.000000 10 -1 0.0274011 776 12774 8.97721 +speed up after optimization 0.000000 70.000000 0% 13% 20% 32% -5% +perf_y0 260.000000 1260.000000 14 -1 0.0312785 938 16974 8.71655 +perf_y0_opt 260.000000 1260.000000 14 -1 0.0274198 676 11318 9.64757 +speed up after optimization 260.000000 1260.000000 0% 14% 38% 49% -10% +perf_y0 10.000000 45.000000 12 -1 0.0312994 938 16974 8.56439 +perf_y0_opt 10.000000 45.000000 12 -1 0.0275052 676 11334 9.71338 +speed up after optimization 10.000000 45.000000 0% 14% 38% 49% -12% +perf_y0 -55.000000 125.000000 11 -1 0.0221734 938 16974 8.6586 +perf_y0_opt -55.000000 125.000000 11 -1 0.0198868 809 13078 9.64884 +speed up after optimization -55.000000 125.000000 0% 11% 15% 29% -10% +perf_y0 20.000000 80.000000 8 -1 0.0313044 938 16974 8.56893 +perf_y0_opt 20.000000 80.000000 8 -1 0.0274119 674 11270 9.54451 +speed up after optimization 20.000000 80.000000 0% 14% 39% 50% -10% +perf_y0 0.000000 50.000000 8 -1 0.0307357 938 16974 8.69925 +perf_y0_opt 0.000000 50.000000 8 -1 0.0270917 774 12750 9.67551 +speed up after optimization 0.000000 50.000000 0% 13% 21% 33% -10% +perf_y0 -0.200000 2.000000 14 -1 0.0101515 938 16974 8.83691 +perf_y0_opt -0.200000 2.000000 14 -1 0.010111 823 13342 9.90323 +speed up after optimization -0.200000 2.000000 0% 0% 13% 27% -11% +perf_y0 30.000000 130.000000 8 -1 0.0312822 938 16974 8.66924 +perf_y0_opt 30.000000 130.000000 8 -1 0.0272322 672 11254 9.36284 +speed up after optimization 30.000000 130.000000 0% 15% 39% 50% -7% +perf_y0 1.000000 200.000000 10 -1 0.0312822 938 16974 8.62417 +perf_y0_opt 1.000000 200.000000 10 -1 0.0274822 741 12558 8.92709 +speed up after optimization 1.000000 200.000000 0% 14% 26% 35% -3% diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf/perf_woquant.log b/applications/newton/llvm-ir/performance_test/firefly_perf/perf_woquant.log new file mode 100644 index 000000000..29927500e --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/firefly_perf/perf_woquant.log @@ -0,0 +1,103 @@ +test case param instruction count time consumption ir lines library size compile time +perf_j0 -2.000000 2.000000 -1 0.0041891 672 12806 8.64551 +perf_j0_opt -2.000000 2.000000 -1 0.00415031 666 12710 9.01051 +speed up after optimization -2.000000 2.000000 0% 1% 0% 0% -4% +perf_j0 -4.000000 4.000000 -1 0.0182625 672 12806 8.6457 +perf_j0_opt -4.000000 4.000000 -1 0.0182068 666 12710 8.87372 +speed up after optimization -4.000000 4.000000 0% 0% 0% 0% -3% +perf_j0 -8.000000 8.000000 -1 0.0256698 672 12806 8.77255 +perf_j0_opt -8.000000 8.000000 -1 0.025617 666 12710 8.77032 +speed up after optimization -8.000000 8.000000 0% 0% 0% 0% 0% +perf_j0 -16.000000 16.000000 -1 0.0292126 672 12806 8.46476 +perf_j0_opt -16.000000 16.000000 -1 0.0291192 666 12710 8.7701 +speed up after optimization -16.000000 16.000000 0% 0% 0% 0% -3% +perf_j0 -125.000000 125.000000 -1 0.0321245 672 12806 8.62104 +perf_j0_opt -125.000000 125.000000 -1 0.0317704 666 12710 8.79059 +speed up after optimization -125.000000 125.000000 0% 1% 0% 0% -2% +perf_j0 -40.000000 110.000000 -1 0.0317742 672 12806 8.54451 +perf_j0_opt -40.000000 110.000000 -1 0.0315552 666 12710 9.03265 +speed up after optimization -40.000000 110.000000 0% 1% 0% 0% -5% +perf_j0 -55.000000 150.000000 -1 0.0319148 672 12806 8.65462 +perf_j0_opt -55.000000 150.000000 -1 0.0316328 666 12710 8.7713 +speed up after optimization -55.000000 150.000000 0% 1% 0% 0% -1% +perf_j0 0.000000 100.000000 -1 0.0319399 672 12806 8.65606 +perf_j0_opt 0.000000 100.000000 -1 0.0314426 630 12390 8.85576 +speed up after optimization 0.000000 100.000000 0% 2% 6% 3% -2% +perf_j0 0.000000 70.000000 -1 0.0317582 672 12806 8.5159 +perf_j0_opt 0.000000 70.000000 -1 0.0313274 630 12390 8.86189 +speed up after optimization 0.000000 70.000000 0% 1% 6% 3% -4% +perf_j0 260.000000 1260.000000 -1 0.0324552 672 12806 8.8024 +perf_j0_opt 260.000000 1260.000000 -1 0.0307152 510 11086 8.74592 +speed up after optimization 260.000000 1260.000000 0% 6% 31% 15% 1% +perf_j0 10.000000 45.000000 -1 0.0324576 672 12806 8.52251 +perf_j0_opt 10.000000 45.000000 -1 0.0311419 510 11086 8.71426 +speed up after optimization 10.000000 45.000000 0% 4% 31% 15% -2% +perf_j0 -55.000000 125.000000 -1 0.0319125 672 12806 8.46449 +perf_j0_opt -55.000000 125.000000 -1 0.0315628 666 12710 8.7548 +speed up after optimization -55.000000 125.000000 0% 1% 0% 0% -3% +perf_j0 20.000000 80.000000 -1 0.0324182 672 12806 8.5206 +perf_j0_opt 20.000000 80.000000 -1 0.0321373 510 11086 8.77055 +speed up after optimization 20.000000 80.000000 0% 1% 31% 15% -3% +perf_j0 0.000000 50.000000 -1 0.0314788 672 12806 8.57177 +perf_j0_opt 0.000000 50.000000 -1 0.0309062 630 12390 8.8181 +speed up after optimization 0.000000 50.000000 0% 2% 6% 3% -3% +perf_j0 -0.200000 2.000000 -1 0.0041856 672 12806 8.46162 +perf_j0_opt -0.200000 2.000000 -1 0.00416198 666 12710 8.85254 +speed up after optimization -0.200000 2.000000 0% 1% 0% 0% -4% +perf_j0 30.000000 130.000000 -1 0.0326378 672 12806 8.4693 +perf_j0_opt 30.000000 130.000000 -1 0.0307111 510 11086 8.75236 +speed up after optimization 30.000000 130.000000 0% 6% 31% 15% -3% +perf_j0 1.000000 200.000000 -1 0.0323319 672 12806 8.70759 +perf_j0_opt 1.000000 200.000000 -1 0.031696 573 12014 8.74695 +speed up after optimization 1.000000 200.000000 0% 2% 17% 6% 0% +perf_y0 -2.000000 2.000000 -1 0.00642437 938 16974 8.57486 +perf_y0_opt -2.000000 2.000000 -1 0.00636692 922 16782 8.90978 +speed up after optimization -2.000000 2.000000 0% 1% 1% 1% -4% +perf_y0 -4.000000 4.000000 -1 0.0117262 938 16974 8.64497 +perf_y0_opt -4.000000 4.000000 -1 0.0116535 922 16782 8.93764 +speed up after optimization -4.000000 4.000000 0% 1% 1% 1% -3% +perf_y0 -8.000000 8.000000 -1 0.0144246 938 16974 8.54897 +perf_y0_opt -8.000000 8.000000 -1 0.0143566 922 16782 9.20771 +speed up after optimization -8.000000 8.000000 0% 0% 1% 1% -7% +perf_y0 -16.000000 16.000000 -1 0.015684 938 16974 8.56913 +perf_y0_opt -16.000000 16.000000 -1 0.0155574 922 16782 8.94524 +speed up after optimization -16.000000 16.000000 0% 1% 1% 1% -4% +perf_y0 -125.000000 125.000000 -1 0.0171761 938 16974 8.55576 +perf_y0_opt -125.000000 125.000000 -1 0.0168725 922 16782 8.95235 +speed up after optimization -125.000000 125.000000 0% 2% 1% 1% -4% +perf_y0 -40.000000 110.000000 -1 0.0233237 938 16974 8.67136 +perf_y0_opt -40.000000 110.000000 -1 0.02359 922 16782 8.91701 +speed up after optimization -40.000000 110.000000 0% 0% 1% 1% -4% +perf_y0 -55.000000 150.000000 -1 0.0233467 938 16974 8.81824 +perf_y0_opt -55.000000 150.000000 -1 0.023303 922 16782 10.975 +speed up after optimization -55.000000 150.000000 0% 0% 1% 1% -20% +perf_y0 0.000000 100.000000 -1 0.0310112 938 16974 8.63068 +perf_y0_opt 0.000000 100.000000 -1 0.0306758 889 16478 9.03746 +speed up after optimization 0.000000 100.000000 0% 1% 5% 3% -5% +perf_y0 0.000000 70.000000 -1 0.0308814 938 16974 10.6839 +perf_y0_opt 0.000000 70.000000 -1 0.0305064 889 16478 8.92305 +speed up after optimization 0.000000 70.000000 0% 1% 5% 3% 20% +perf_y0 260.000000 1260.000000 -1 0.031307 938 16974 8.59541 +perf_y0_opt 260.000000 1260.000000 -1 0.0306499 789 15006 8.83276 +speed up after optimization 260.000000 1260.000000 0% 2% 18% 13% -3% +perf_y0 10.000000 45.000000 -1 0.0315187 938 16974 8.58749 +perf_y0_opt 10.000000 45.000000 -1 0.030616 789 15006 8.85693 +speed up after optimization 10.000000 45.000000 0% 3% 18% 13% -3% +perf_y0 -55.000000 125.000000 -1 0.022237 938 16974 8.55153 +perf_y0_opt -55.000000 125.000000 -1 0.0221264 922 16782 8.97193 +speed up after optimization -55.000000 125.000000 0% 0% 1% 1% -5% +perf_y0 20.000000 80.000000 -1 0.0313478 938 16974 8.56306 +perf_y0_opt 20.000000 80.000000 -1 0.0306105 789 15006 8.82615 +speed up after optimization 20.000000 80.000000 0% 2% 18% 13% -3% +perf_y0 0.000000 50.000000 -1 0.0307467 938 16974 8.54775 +perf_y0_opt 0.000000 50.000000 -1 0.030323 889 16478 8.88378 +speed up after optimization 0.000000 50.000000 0% 1% 5% 3% -4% +perf_y0 -0.200000 2.000000 -1 0.0101133 938 16974 8.67696 +perf_y0_opt -0.200000 2.000000 -1 0.0101148 936 16950 8.94634 +speed up after optimization -0.200000 2.000000 0% 0% 0% 0% -4% +perf_y0 30.000000 130.000000 -1 0.0313155 938 16974 8.65747 +perf_y0_opt 30.000000 130.000000 -1 0.0305937 789 15006 8.90506 +speed up after optimization 30.000000 130.000000 0% 2% 18% 13% -3% +perf_y0 1.000000 200.000000 -1 0.0312584 938 16974 8.55173 +perf_y0_opt 1.000000 200.000000 -1 0.0307293 856 16270 8.88375 +speed up after optimization 1.000000 200.000000 0% 2% 9% 4% -4% diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64-library_size_reduction_ratio.png b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64-library_size_reduction_ratio.png new file mode 100644 index 000000000..e708db5a1 Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64-library_size_reduction_ratio.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64-time_consumption_speedup.png b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64-time_consumption_speedup.png new file mode 100644 index 000000000..801951fed Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64-time_consumption_speedup.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64_average_speedup_woquant.log b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64_average_speedup_woquant.log new file mode 100644 index 000000000..16c59e505 --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/aarch64_average_speedup_woquant.log @@ -0,0 +1,11 @@ +test cast instruction count time consumption ir lines library size compile time +perf_exp 0% 6% 50% 21% 2147483647% +perf_log 0% 6% 31% 19% 2147483647% +perf_acosh 0% 4% 56% 19% 2147483647% +perf_j0 0% 2% 10% 5% 2147483647% +perf_y0 0% 1% 7% 4% 153% +perf_rem_pio2 0% 15% 83% 41% 92% +perf_sincosf 0% 35% 38% 17% 2147483647% +perf_float64_add 0% 23% 14% 7% 166% +perf_float64_div 0% 23% 47% 28% 10% +perf_float64_mul 0% 23% 35% 14% 138% diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_j0_relative_size_reduction.png b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_j0_relative_size_reduction.png new file mode 100644 index 000000000..918b4568f Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_j0_relative_size_reduction.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_j0_relative_speedup.png b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_j0_relative_speedup.png new file mode 100644 index 000000000..85838fe69 Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_j0_relative_speedup.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_y0_relative_size_reduction.png b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_y0_relative_size_reduction.png new file mode 100644 index 000000000..2f74631f0 Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_y0_relative_size_reduction.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_y0_relative_speedup.png b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_y0_relative_speedup.png new file mode 100644 index 000000000..692890a66 Binary files /dev/null and b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/e_y0_relative_speedup.png differ diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/perf_quant.log b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/perf_quant.log new file mode 100644 index 000000000..82bd07fa0 --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/perf_quant.log @@ -0,0 +1,103 @@ +test case param precision_bits instruction count time consumption ir lines library size compile time +perf_j0 -2.000000 2.000000 -1 0.00418385 672 12806 65.4983 +perf_j0_opt -2.000000 2.000000 -1 0.00416664 553 9086 86.3303 +speed up after optimization -2.000000 2.000000 0% 0% 22% 41% -24% +perf_j0 -4.000000 4.000000 -1 0.0182509 672 12806 71.8814 +perf_j0_opt -4.000000 4.000000 -1 0.0162174 553 9070 104.822 +speed up after optimization -4.000000 4.000000 0% 13% 22% 41% -31% +perf_j0 -8.000000 8.000000 -1 0.02565 672 12806 67.7895 +perf_j0_opt -8.000000 8.000000 -1 0.0223525 553 9062 54.1416 +speed up after optimization -8.000000 8.000000 0% 15% 22% 41% 25% +perf_j0 -16.000000 16.000000 -1 0.0291954 672 12806 73.5736 +perf_j0_opt -16.000000 16.000000 -1 0.0251448 553 9054 38.8134 +speed up after optimization -16.000000 16.000000 0% 16% 22% 41% 90% +perf_j0 -125.000000 125.000000 -1 0.0321671 672 12806 98.5516 +perf_j0_opt -125.000000 125.000000 -1 0.0281244 553 9070 13.1054 +speed up after optimization -125.000000 125.000000 0% 14% 22% 41% 652% +perf_j0 -40.000000 110.000000 -1 0.0317273 672 12806 71.1512 +perf_j0_opt -40.000000 110.000000 -1 0.0273153 551 8990 102.597 +speed up after optimization -40.000000 110.000000 0% 16% 22% 42% -31% +perf_j0 -55.000000 150.000000 -1 0.0318999 672 12806 84.472 +perf_j0_opt -55.000000 150.000000 -1 0.0277161 553 9014 89.4097 +speed up after optimization -55.000000 150.000000 0% 15% 22% 42% -6% +perf_j0 0.000000 100.000000 -1 0.0321843 672 12806 92.1985 +perf_j0_opt 0.000000 100.000000 -1 0.0272354 517 8694 107.057 +speed up after optimization 0.000000 100.000000 0% 18% 30% 47% -14% +perf_j0 0.000000 70.000000 -1 0.0317343 672 12806 36.5856 +perf_j0_opt 0.000000 70.000000 -1 0.0270598 517 8694 72.0306 +speed up after optimization 0.000000 70.000000 0% 17% 30% 47% -49% +perf_j0 260.000000 1260.000000 -1 0.0324106 672 12806 101.595 +perf_j0_opt 260.000000 1260.000000 -1 0.027537 397 7414 100.71 +speed up after optimization 260.000000 1260.000000 0% 18% 69% 73% 1% +perf_j0 10.000000 45.000000 -1 0.0324232 672 12806 76.7883 +perf_j0_opt 10.000000 45.000000 -1 0.0276038 397 7430 13.5422 +speed up after optimization 10.000000 45.000000 0% 17% 69% 72% 467% +perf_j0 -55.000000 125.000000 -1 0.0318973 672 12806 63.7482 +perf_j0_opt -55.000000 125.000000 -1 0.0275344 553 9014 44.247 +speed up after optimization -55.000000 125.000000 0% 16% 22% 42% 44% +perf_j0 20.000000 80.000000 -1 0.0324211 672 12806 56.1169 +perf_j0_opt 20.000000 80.000000 -1 0.0274702 395 7366 44.2723 +speed up after optimization 20.000000 80.000000 0% 18% 70% 74% 27% +perf_j0 0.000000 50.000000 -1 0.0314496 672 12806 67.3216 +perf_j0_opt 0.000000 50.000000 -1 0.0267793 515 8678 96.6013 +speed up after optimization 0.000000 50.000000 0% 17% 30% 48% -30% +perf_j0 -0.200000 2.000000 -1 0.00420108 672 12806 73.62 +perf_j0_opt -0.200000 2.000000 -1 0.00414945 553 9118 95.2224 +speed up after optimization -0.200000 2.000000 0% 1% 22% 40% -23% +perf_j0 30.000000 130.000000 -1 0.0324049 672 12806 67.6828 +perf_j0_opt 30.000000 130.000000 -1 0.0273466 393 7342 33.4972 +speed up after optimization 30.000000 130.000000 0% 18% 71% 74% 102% +perf_j0 1.000000 200.000000 -1 0.0323573 672 12806 44.8586 +perf_j0_opt 1.000000 200.000000 -1 0.0273341 458 8286 87.6259 +speed up after optimization 1.000000 200.000000 0% 18% 47% 55% -49% +perf_y0 -2.000000 2.000000 -1 0.00644772 938 16974 68.3557 +perf_y0_opt -2.000000 2.000000 -1 0.00636664 809 13166 59.5284 +speed up after optimization -2.000000 2.000000 0% 1% 16% 29% 15% +perf_y0 -4.000000 4.000000 -1 0.0117244 938 16974 70.4264 +perf_y0_opt -4.000000 4.000000 -1 0.0108363 809 13142 15.0291 +speed up after optimization -4.000000 4.000000 0% 8% 16% 29% 369% +perf_y0 -8.000000 8.000000 -1 0.0143727 938 16974 68.5926 +perf_y0_opt -8.000000 8.000000 -1 0.0130582 809 13134 57.8289 +speed up after optimization -8.000000 8.000000 0% 10% 16% 29% 19% +perf_y0 -16.000000 16.000000 -1 0.0156277 938 16974 78.3375 +perf_y0_opt -16.000000 16.000000 -1 0.0140317 809 13118 20.9558 +speed up after optimization -16.000000 16.000000 0% 11% 16% 29% 274% +perf_y0 -125.000000 125.000000 -1 0.0165931 938 16974 70.9075 +perf_y0_opt -125.000000 125.000000 -1 0.0153389 809 13142 52.1113 +speed up after optimization -125.000000 125.000000 0% 8% 16% 29% 36% +perf_y0 -40.000000 110.000000 -1 0.0233252 938 16974 74.0577 +perf_y0_opt -40.000000 110.000000 -1 0.0208647 807 13070 32.5466 +speed up after optimization -40.000000 110.000000 0% 12% 16% 30% 128% +perf_y0 -55.000000 150.000000 -1 0.0241112 938 16974 84.4127 +perf_y0_opt -55.000000 150.000000 -1 0.0209423 809 13078 57.1981 +speed up after optimization -55.000000 150.000000 0% 15% 16% 30% 48% +perf_y0 0.000000 100.000000 -1 0.0314707 938 16974 91.1942 +perf_y0_opt 0.000000 100.000000 -1 0.027731 776 12774 48.4308 +speed up after optimization 0.000000 100.000000 0% 13% 21% 33% 88% +perf_y0 0.000000 70.000000 -1 0.0376877 938 16974 68.2174 +perf_y0_opt 0.000000 70.000000 -1 0.0273933 776 12774 108.352 +speed up after optimization 0.000000 70.000000 0% 38% 21% 33% -37% +perf_y0 260.000000 1260.000000 -1 0.0312633 938 16974 72.1049 +perf_y0_opt 260.000000 1260.000000 -1 0.0274029 676 11318 18.2844 +speed up after optimization 260.000000 1260.000000 0% 14% 39% 50% 294% +perf_y0 10.000000 45.000000 -1 0.0312991 938 16974 77.1561 +perf_y0_opt 10.000000 45.000000 -1 0.0274994 676 11334 47.5477 +speed up after optimization 10.000000 45.000000 0% 14% 39% 50% 62% +perf_y0 -55.000000 125.000000 -1 0.0221839 938 16974 81.8512 +perf_y0_opt -55.000000 125.000000 -1 0.0199069 809 13078 12.9256 +speed up after optimization -55.000000 125.000000 0% 11% 16% 30% 533% +perf_y0 20.000000 80.000000 -1 0.0312869 938 16974 70.0771 +perf_y0_opt 20.000000 80.000000 -1 0.0273582 674 11270 100.088 +speed up after optimization 20.000000 80.000000 0% 14% 39% 51% -30% +perf_y0 0.000000 50.000000 -1 0.0306875 938 16974 75.8735 +perf_y0_opt 0.000000 50.000000 -1 0.0270651 774 12750 94.9338 +speed up after optimization 0.000000 50.000000 0% 13% 21% 33% -20% +perf_y0 -0.200000 2.000000 -1 0.0101244 938 16974 72.8369 +perf_y0_opt -0.200000 2.000000 -1 0.0101299 823 13342 34.8513 +speed up after optimization -0.200000 2.000000 0% 0% 14% 27% -20% +perf_y0 30.000000 130.000000 -1 0.0312697 938 16974 81.7841 +perf_y0_opt 30.000000 130.000000 -1 0.0273708 672 11254 14.8843 +speed up after optimization 30.000000 130.000000 0% 14% 40% 51% 449% +perf_y0 1.000000 200.000000 -1 0.0312393 938 16974 64.1557 +perf_y0_opt 1.000000 200.000000 -1 0.0274653 741 12558 100.13 +speed up after optimization 1.000000 200.000000 0% 14% 27% 35% -36% diff --git a/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/perf_woquant.log b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/perf_woquant.log new file mode 100644 index 000000000..08abc5cd1 --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/firefly_perf_7_21/perf_woquant.log @@ -0,0 +1,511 @@ +test case param instruction count time consumption ir lines library size compile time +perf_exp -2.000000 2.000000 -1 0.00499058 356 7110 32.1484 +perf_exp_opt -2.000000 2.000000 -1 0.0048885 263 5942 20.8266 +speed up after optimization -2.000000 2.000000 0% 2% 35% 20% 54% +perf_exp -4.000000 4.000000 -1 0.00511774 356 7110 59.2076 +perf_exp_opt -4.000000 4.000000 -1 0.00498504 263 5942 98.2883 +speed up after optimization -4.000000 4.000000 0% 3% 35% 20% -40% +perf_exp -8.000000 8.000000 -1 0.00519474 356 7110 78.6126 +perf_exp_opt -8.000000 8.000000 -1 0.00507049 263 5942 90.1119 +speed up after optimization -8.000000 8.000000 0% 2% 35% 20% -13% +perf_exp -16.000000 16.000000 -1 0.00523003 356 7110 47.4204 +perf_exp_opt -16.000000 16.000000 -1 0.0050807 263 5942 68.1056 +speed up after optimization -16.000000 16.000000 0% 3% 35% 20% -30% +perf_exp -125.000000 125.000000 -1 0.00527465 356 7110 75.6498 +perf_exp_opt -125.000000 125.000000 -1 0.00510928 263 5942 73.9563 +speed up after optimization -125.000000 125.000000 0% 3% 35% 20% 2% +perf_exp -40.000000 110.000000 -1 0.00526386 356 7110 54.0057 +perf_exp_opt -40.000000 110.000000 -1 0.00512358 263 5942 85.5666 +speed up after optimization -40.000000 110.000000 0% 3% 35% 20% -37% +perf_exp -55.000000 150.000000 -1 0.0052487 356 7110 86.9196 +perf_exp_opt -55.000000 150.000000 -1 0.00512591 263 5942 77.8122 +speed up after optimization -55.000000 150.000000 0% 2% 35% 20% 12% +perf_exp 0.000000 100.000000 -1 0.00525949 356 7110 51.6465 +perf_exp_opt 0.000000 100.000000 -1 0.0049235 247 5878 70.4271 +speed up after optimization 0.000000 100.000000 0% 7% 44% 21% -27% +perf_exp 0.000000 70.000000 -1 0.0052869 356 7110 58.7703 +perf_exp_opt 0.000000 70.000000 -1 0.0049235 247 5878 90.5643 +speed up after optimization 0.000000 70.000000 0% 7% 44% 21% -35% +perf_exp 260.000000 1260.000000 -1 0.00314379 356 7110 57.6276 +perf_exp_opt 260.000000 1260.000000 -1 0.00278738 200 5830 73.7146 +speed up after optimization 260.000000 1260.000000 0% 13% 78% 22% -22% +perf_exp 10.000000 45.000000 -1 0.00527407 356 7110 48.4538 +perf_exp_opt 10.000000 45.000000 -1 0.00468434 188 5566 71.7847 +speed up after optimization 10.000000 45.000000 0% 13% 89% 28% -33% +perf_exp -55.000000 125.000000 -1 0.0052802 356 7110 65.5226 +perf_exp_opt -55.000000 125.000000 -1 0.00512124 263 5942 85.4387 +speed up after optimization -55.000000 125.000000 0% 3% 35% 20% -23% +perf_exp 20.000000 80.000000 -1 0.00529828 356 7110 61.5969 +perf_exp_opt 20.000000 80.000000 -1 0.00467296 188 5566 81.7607 +speed up after optimization 20.000000 80.000000 0% 13% 89% 28% -25% +perf_exp 0.000000 50.000000 -1 0.00526707 356 7110 75.8629 +perf_exp_opt 0.000000 50.000000 -1 0.00491883 247 5878 76.9426 +speed up after optimization 0.000000 50.000000 0% 7% 44% 21% -1% +perf_exp -0.200000 2.000000 -1 0.00491329 356 7110 47.8504 +perf_exp_opt -0.200000 2.000000 -1 0.00479487 273 5966 78.3256 +speed up after optimization -0.200000 2.000000 0% 2% 30% 19% -39% +perf_exp 30.000000 130.000000 -1 0.00528865 356 7110 59.5294 +perf_exp_opt 30.000000 130.000000 -1 0.00467238 188 5566 76.0667 +speed up after optimization 30.000000 130.000000 0% 13% 89% 28% -22% +perf_exp 1.000000 200.000000 -1 0.0052802 356 7110 49.2105 +perf_exp_opt 1.000000 200.000000 -1 0.00484767 214 5918 67.7755 +speed up after optimization 1.000000 200.000000 0% 9% 66% 20% -27% +perf_log -2.000000 2.000000 -1 0.00338995 413 8030 52.57 +perf_log_opt -2.000000 2.000000 -1 0.003294 336 6830 72.4853 +speed up after optimization -2.000000 2.000000 0% 3% 23% 18% -27% +perf_log -4.000000 4.000000 -1 0.00341649 413 8030 79.207 +perf_log_opt -4.000000 4.000000 -1 0.00327825 336 6830 78.3962 +speed up after optimization -4.000000 4.000000 0% 4% 23% 18% 1% +perf_log -8.000000 8.000000 -1 0.00341824 413 8030 76.9431 +perf_log_opt -8.000000 8.000000 -1 0.00320446 336 6830 85.609 +speed up after optimization -8.000000 8.000000 0% 7% 23% 18% -10% +perf_log -16.000000 16.000000 -1 0.0033952 413 8030 59.377 +perf_log_opt -16.000000 16.000000 -1 0.00320854 336 6830 70.4764 +speed up after optimization -16.000000 16.000000 0% 6% 23% 18% -16% +perf_log -125.000000 125.000000 -1 0.00339987 413 8030 53.7665 +perf_log_opt -125.000000 125.000000 -1 0.00321292 336 6830 94.9654 +speed up after optimization -125.000000 125.000000 0% 6% 23% 18% -43% +perf_log -40.000000 110.000000 -1 0.00411035 413 8030 58.5388 +perf_log_opt -40.000000 110.000000 -1 0.00388052 336 6830 76.4491 +speed up after optimization -40.000000 110.000000 0% 6% 23% 18% -23% +perf_log -55.000000 150.000000 -1 0.00410394 413 8030 67.6449 +perf_log_opt -55.000000 150.000000 -1 0.00390211 336 6830 74.9838 +speed up after optimization -55.000000 150.000000 0% 5% 23% 18% -10% +perf_log 0.000000 100.000000 -1 0.00484154 413 8030 47.4444 +perf_log_opt 0.000000 100.000000 -1 0.00459713 336 6974 79.8506 +speed up after optimization 0.000000 100.000000 0% 5% 23% 15% -41% +perf_log 0.000000 70.000000 -1 0.00486342 413 8030 79.8139 +perf_log_opt 0.000000 70.000000 -1 0.00459568 336 6974 86.4167 +speed up after optimization 0.000000 70.000000 0% 6% 23% 15% -8% +perf_log 260.000000 1260.000000 -1 0.00485525 413 8030 52.3998 +perf_log_opt 260.000000 1260.000000 -1 0.0044831 270 6406 88.6155 +speed up after optimization 260.000000 1260.000000 0% 8% 53% 25% -41% +perf_log 10.000000 45.000000 -1 0.0048535 413 8030 56.6053 +perf_log_opt 10.000000 45.000000 -1 0.00448368 270 6406 80.2513 +speed up after optimization 10.000000 45.000000 0% 8% 53% 25% -29% +perf_log -55.000000 125.000000 -1 0.00397998 413 8030 52.7002 +perf_log_opt -55.000000 125.000000 -1 0.00379536 336 6830 103.938 +speed up after optimization -55.000000 125.000000 0% 5% 23% 18% -49% +perf_log 20.000000 80.000000 -1 0.00484096 413 8030 59.4987 +perf_log_opt 20.000000 80.000000 -1 0.00450293 270 6406 73.2644 +speed up after optimization 20.000000 80.000000 0% 8% 53% 25% -19% +perf_log 0.000000 50.000000 -1 0.00486983 413 8030 58.8517 +perf_log_opt 0.000000 50.000000 -1 0.00460909 336 6974 84.3776 +speed up after optimization 0.000000 50.000000 0% 6% 23% 15% -30% +perf_log -0.200000 2.000000 -1 0.00457205 413 8030 52.9027 +perf_log_opt -0.200000 2.000000 -1 0.00435943 352 7150 79.764 +speed up after optimization -0.200000 2.000000 0% 5% 17% 12% -34% +perf_log 30.000000 130.000000 -1 0.00484534 413 8030 55.1776 +perf_log_opt 30.000000 130.000000 -1 0.00449447 270 6406 73.512 +speed up after optimization 30.000000 130.000000 0% 8% 53% 25% -25% +perf_log 1.000000 200.000000 -1 0.00485729 413 8030 50.3436 +perf_log_opt 1.000000 200.000000 -1 0.00448601 270 6406 81.5686 +speed up after optimization 1.000000 200.000000 0% 8% 53% 25% -38% +perf_acosh -2.000000 2.000000 -1 0.00330158 179 4424 45.9233 +perf_acosh_opt -2.000000 2.000000 -1 0.00310734 106 3528 70.7089 +speed up after optimization -2.000000 2.000000 0% 6% 69% 25% -35% +perf_acosh -4.000000 4.000000 -1 0.00352791 179 4424 70.6339 +perf_acosh_opt -4.000000 4.000000 -1 0.00347862 137 3912 70.5521 +speed up after optimization -4.000000 4.000000 0% 1% 31% 13% 0% +perf_acosh -8.000000 8.000000 -1 0.00357866 179 4424 60.0815 +perf_acosh_opt -8.000000 8.000000 -1 0.00351566 137 3912 59.7222 +speed up after optimization -8.000000 8.000000 0% 2% 31% 13% 1% +perf_acosh -16.000000 16.000000 -1 0.00363991 179 4424 57.6326 +perf_acosh_opt -16.000000 16.000000 -1 0.00353695 137 3912 65.3716 +speed up after optimization -16.000000 16.000000 0% 3% 31% 13% -12% +perf_acosh -125.000000 125.000000 -1 0.00363058 179 4424 47.0515 +perf_acosh_opt -125.000000 125.000000 -1 0.00356787 137 3912 61.5293 +speed up after optimization -125.000000 125.000000 0% 2% 31% 13% -24% +perf_acosh -40.000000 110.000000 -1 0.00443555 179 4424 63.3698 +perf_acosh_opt -40.000000 110.000000 -1 0.00433114 137 3912 72.3202 +speed up after optimization -40.000000 110.000000 0% 2% 31% 13% -12% +perf_acosh -55.000000 150.000000 -1 0.00443205 179 4424 43.925 +perf_acosh_opt -55.000000 150.000000 -1 0.00433406 137 3912 61.9965 +speed up after optimization -55.000000 150.000000 0% 2% 31% 13% -29% +perf_acosh 0.000000 100.000000 -1 0.00530703 179 4424 50.9217 +perf_acosh_opt 0.000000 100.000000 -1 0.00517695 137 3912 65.7978 +speed up after optimization 0.000000 100.000000 0% 3% 31% 13% -23% +perf_acosh 0.000000 70.000000 -1 0.00529945 179 4424 45.4568 +perf_acosh_opt 0.000000 70.000000 -1 0.00516441 137 3912 71.071 +speed up after optimization 0.000000 70.000000 0% 3% 31% 13% -36% +perf_acosh 260.000000 1260.000000 -1 0.00532657 179 4424 53.1084 +perf_acosh_opt 260.000000 1260.000000 -1 0.00479983 83 3312 60.225 +speed up after optimization 260.000000 1260.000000 0% 11% 116% 34% -12% +perf_acosh 10.000000 45.000000 -1 0.00531345 179 4424 64.3939 +perf_acosh_opt 10.000000 45.000000 -1 0.00486838 83 3312 58.4312 +speed up after optimization 10.000000 45.000000 0% 9% 116% 34% 10% +perf_acosh -55.000000 125.000000 -1 0.00430197 179 4424 40.7759 +perf_acosh_opt -55.000000 125.000000 -1 0.00421039 137 3912 62.0286 +speed up after optimization -55.000000 125.000000 0% 2% 31% 13% -34% +perf_acosh 20.000000 80.000000 -1 0.00528924 179 4424 57.023 +perf_acosh_opt 20.000000 80.000000 -1 0.00479079 83 3312 66.8883 +speed up after optimization 20.000000 80.000000 0% 10% 116% 34% -15% +perf_acosh 0.000000 50.000000 -1 0.00533707 179 4424 51.8054 +perf_acosh_opt 0.000000 50.000000 -1 0.00516266 137 3912 77.026 +speed up after optimization 0.000000 50.000000 0% 3% 31% 13% -33% +perf_acosh -0.200000 2.000000 -1 0.0046333 179 4424 48.4035 +perf_acosh_opt -0.200000 2.000000 -1 0.00433872 106 3528 67.6288 +speed up after optimization -0.200000 2.000000 0% 7% 69% 25% -28% +perf_acosh 30.000000 130.000000 -1 0.00530528 179 4424 46.3748 +perf_acosh_opt 30.000000 130.000000 -1 0.00478788 83 3312 62.2291 +speed up after optimization 30.000000 130.000000 0% 11% 116% 34% -25% +perf_acosh 1.000000 200.000000 -1 0.00533853 179 4424 62.8555 +perf_acosh_opt 1.000000 200.000000 -1 0.00512504 122 3816 59.5002 +speed up after optimization 1.000000 200.000000 0% 4% 47% 16% 6% +perf_j0 -2.000000 2.000000 -1 0.00418327 672 12806 64.3786 +perf_j0_opt -2.000000 2.000000 -1 0.00416285 666 12710 85.0542 +speed up after optimization -2.000000 2.000000 0% 0% 1% 1% -24% +perf_j0 -4.000000 4.000000 -1 0.0182418 672 12806 59.6762 +perf_j0_opt -4.000000 4.000000 -1 0.0181613 666 12710 12.723 +speed up after optimization -4.000000 4.000000 0% 0% 1% 1% 369% +perf_j0 -8.000000 8.000000 -1 0.025636 672 12806 75.1896 +perf_j0_opt -8.000000 8.000000 -1 0.0255826 666 12710 99.6287 +speed up after optimization -8.000000 8.000000 0% 0% 1% 1% -25% +perf_j0 -16.000000 16.000000 -1 0.0291983 672 12806 56.8803 +perf_j0_opt -16.000000 16.000000 -1 0.0289294 666 12710 84.0238 +speed up after optimization -16.000000 16.000000 0% 1% 1% 1% -32% +perf_j0 -125.000000 125.000000 -1 0.032044 672 12806 59.8427 +perf_j0_opt -125.000000 125.000000 -1 0.0317658 666 12710 85.6803 +speed up after optimization -125.000000 125.000000 0% 1% 1% 1% -30% +perf_j0 -40.000000 110.000000 -1 0.0317465 672 12806 63.0613 +perf_j0_opt -40.000000 110.000000 -1 0.0314056 666 12710 84.7155 +speed up after optimization -40.000000 110.000000 0% 1% 1% 1% -26% +perf_j0 -55.000000 150.000000 -1 0.0319142 672 12806 62.9414 +perf_j0_opt -55.000000 150.000000 -1 0.0321808 666 12710 85.7364 +speed up after optimization -55.000000 150.000000 0% 0% 1% 1% -26% +perf_j0 0.000000 100.000000 -1 0.0320271 672 12806 90.9372 +perf_j0_opt 0.000000 100.000000 -1 0.0314067 630 12390 85.9234 +speed up after optimization 0.000000 100.000000 0% 2% 7% 3% 6% +perf_j0 0.000000 70.000000 -1 0.0317454 672 12806 69.6269 +perf_j0_opt 0.000000 70.000000 -1 0.0311757 630 12390 102.637 +speed up after optimization 0.000000 70.000000 0% 2% 7% 3% -32% +perf_j0 260.000000 1260.000000 -1 0.0324517 672 12806 69.7262 +perf_j0_opt 260.000000 1260.000000 -1 0.030719 510 11086 93.3677 +speed up after optimization 260.000000 1260.000000 0% 6% 32% 16% -25% +perf_j0 10.000000 45.000000 -1 0.032434 672 12806 60.4614 +perf_j0_opt 10.000000 45.000000 -1 0.0306796 510 11086 85.2731 +speed up after optimization 10.000000 45.000000 0% 6% 32% 16% -29% +perf_j0 -55.000000 125.000000 -1 0.0318961 672 12806 58.7473 +perf_j0_opt -55.000000 125.000000 -1 0.0315946 666 12710 87.114 +speed up after optimization -55.000000 125.000000 0% 1% 1% 1% -33% +perf_j0 20.000000 80.000000 -1 0.032443 672 12806 59.9357 +perf_j0_opt 20.000000 80.000000 -1 0.0307065 510 11086 85.2741 +speed up after optimization 20.000000 80.000000 0% 6% 32% 16% -30% +perf_j0 0.000000 50.000000 -1 0.0314627 672 12806 66.2057 +perf_j0_opt 0.000000 50.000000 -1 0.030873 630 12390 88.6363 +speed up after optimization 0.000000 50.000000 0% 2% 7% 3% -25% +perf_j0 -0.200000 2.000000 -1 0.00420456 672 12806 68.3014 +perf_j0_opt -0.200000 2.000000 -1 0.00416723 666 12710 81.2873 +speed up after optimization -0.200000 2.000000 0% 1% 1% 1% -16% +perf_j0 30.000000 130.000000 -1 0.032396 672 12806 61.247 +perf_j0_opt 30.000000 130.000000 -1 0.0307068 510 11086 97.3819 +speed up after optimization 30.000000 130.000000 0% 6% 32% 16% -37% +perf_j0 1.000000 200.000000 -1 0.0323704 672 12806 67.0856 +perf_j0_opt 1.000000 200.000000 -1 0.0316497 573 12014 97.4157 +speed up after optimization 1.000000 200.000000 0% 2% 17% 7% -31% +perf_y0 -2.000000 2.000000 -1 0.00644654 938 16974 66.5978 +perf_y0_opt -2.000000 2.000000 -1 0.00634184 922 16782 21.9646 +speed up after optimization -2.000000 2.000000 0% 2% 2% 1% 203% +perf_y0 -4.000000 4.000000 -1 0.0118583 938 16974 62.2263 +perf_y0_opt -4.000000 4.000000 -1 0.0117224 922 16782 92.0489 +speed up after optimization -4.000000 4.000000 0% 1% 2% 1% -32% +perf_y0 -8.000000 8.000000 -1 0.0145039 938 16974 12.1277 +perf_y0_opt -8.000000 8.000000 -1 0.0143537 922 16782 18.8552 +speed up after optimization -8.000000 8.000000 0% 1% 2% 1% -36% +perf_y0 -16.000000 16.000000 -1 0.0156393 938 16974 71.9621 +perf_y0_opt -16.000000 16.000000 -1 0.0155551 922 16782 19.1347 +speed up after optimization -16.000000 16.000000 0% 1% 2% 1% 276% +perf_y0 -125.000000 125.000000 -1 0.0165802 938 16974 73.2502 +perf_y0_opt -125.000000 125.000000 -1 0.0164968 922 16782 15.9086 +speed up after optimization -125.000000 125.000000 0% 1% 2% 1% 360% +perf_y0 -40.000000 110.000000 -1 0.0233114 938 16974 64.9549 +perf_y0_opt -40.000000 110.000000 -1 0.0232773 922 16782 10.1409 +speed up after optimization -40.000000 110.000000 0% 0% 2% 1% 541% +perf_y0 -55.000000 150.000000 -1 0.023333 938 16974 87.6144 +perf_y0_opt -55.000000 150.000000 -1 0.0232499 922 16782 107.911 +speed up after optimization -55.000000 150.000000 0% 0% 2% 1% -19% +perf_y0 0.000000 100.000000 -1 0.0310089 938 16974 69.447 +perf_y0_opt 0.000000 100.000000 -1 0.0306411 889 16478 97.6604 +speed up after optimization 0.000000 100.000000 0% 1% 6% 3% -29% +perf_y0 0.000000 70.000000 -1 0.0308812 938 16974 82.6694 +perf_y0_opt 0.000000 70.000000 -1 0.0304938 889 16478 9.23071 +speed up after optimization 0.000000 70.000000 0% 1% 6% 3% 796% +perf_y0 260.000000 1260.000000 -1 0.0312863 938 16974 78.4957 +perf_y0_opt 260.000000 1260.000000 -1 0.0305775 789 15006 103.671 +speed up after optimization 260.000000 1260.000000 0% 2% 19% 13% -24% +perf_y0 10.000000 45.000000 -1 0.031295 938 16974 91.9115 +perf_y0_opt 10.000000 45.000000 -1 0.030602 789 15006 94.0361 +speed up after optimization 10.000000 45.000000 0% 2% 19% 13% -2% +perf_y0 -55.000000 125.000000 -1 0.0221664 938 16974 62.6938 +perf_y0_opt -55.000000 125.000000 -1 0.0228387 922 16782 104.203 +speed up after optimization -55.000000 125.000000 0% 0% 2% 1% -2% +perf_y0 20.000000 80.000000 -1 0.0313014 938 16974 67.5124 +perf_y0_opt 20.000000 80.000000 -1 0.0305933 789 15006 20.5355 +speed up after optimization 20.000000 80.000000 0% 2% 19% 13% 229% +perf_y0 0.000000 50.000000 -1 0.030696 938 16974 74.6902 +perf_y0_opt 0.000000 50.000000 -1 0.030332 889 16478 22.8171 +speed up after optimization 0.000000 50.000000 0% 1% 6% 3% 227% +perf_y0 -0.200000 2.000000 -1 0.0101168 938 16974 64.7004 +perf_y0_opt -0.200000 2.000000 -1 0.0101092 936 16950 22.6846 +speed up after optimization -0.200000 2.000000 0% 0% 0% 0% 185% +perf_y0 30.000000 130.000000 -1 0.0312732 938 16974 64.9792 +perf_y0_opt 30.000000 130.000000 -1 0.0305974 789 15006 102.389 +speed up after optimization 30.000000 130.000000 0% 2% 19% 13% -37% +perf_y0 1.000000 200.000000 -1 0.0312367 938 16974 70.4959 +perf_y0_opt 1.000000 200.000000 -1 0.0307251 856 16270 107.964 +speed up after optimization 1.000000 200.000000 0% 2% 10% 4% -35% +perf_rem_pio2 -2.000000 2.000000 -1 0.00187653 2036 19234 65.559 +perf_rem_pio2_opt -2.000000 2.000000 -1 0.00174354 1161 14010 53.4014 +speed up after optimization -2.000000 2.000000 0% 8% 75% 37% 23% +perf_rem_pio2 -4.000000 4.000000 -1 0.00255406 2036 19234 62.3821 +perf_rem_pio2_opt -4.000000 4.000000 -1 0.00243127 1268 15034 54.228 +speed up after optimization -4.000000 4.000000 0% 5% 61% 28% 15% +perf_rem_pio2 -8.000000 8.000000 -1 0.00283026 2036 19234 60.1851 +perf_rem_pio2_opt -8.000000 8.000000 -1 0.00278272 1268 15034 38.7881 +speed up after optimization -8.000000 8.000000 0% 2% 61% 28% 55% +perf_rem_pio2 -16.000000 16.000000 -1 0.00294663 2036 19234 57.5065 +perf_rem_pio2_opt -16.000000 16.000000 -1 0.00292767 1268 15034 47.8343 +speed up after optimization -16.000000 16.000000 0% 1% 61% 28% 20% +perf_rem_pio2 -125.000000 125.000000 -1 0.00331879 2036 19234 56.6043 +perf_rem_pio2_opt -125.000000 125.000000 -1 0.00313942 1274 15026 53.4823 +speed up after optimization -125.000000 125.000000 0% 6% 60% 28% 6% +perf_rem_pio2 -40.000000 110.000000 -1 0.00306242 2036 19234 59.583 +perf_rem_pio2_opt -40.000000 110.000000 -1 0.00299855 1274 15026 48.8175 +speed up after optimization -40.000000 110.000000 0% 2% 60% 28% 22% +perf_rem_pio2 -55.000000 150.000000 -1 0.00305717 2036 19234 75.878 +perf_rem_pio2_opt -55.000000 150.000000 -1 0.00301926 1274 15026 35.0261 +speed up after optimization -55.000000 150.000000 0% 1% 60% 28% 117% +perf_rem_pio2 0.000000 100.000000 -1 0.00293351 2036 19234 59.9497 +perf_rem_pio2_opt 0.000000 100.000000 -1 0.00258089 1149 14114 40.1399 +speed up after optimization 0.000000 100.000000 0% 14% 77% 36% 49% +perf_rem_pio2 0.000000 70.000000 -1 0.0028638 2036 19234 70.9895 +perf_rem_pio2_opt 0.000000 70.000000 -1 0.00256018 1149 14114 40.7238 +speed up after optimization 0.000000 70.000000 0% 12% 77% 36% 74% +perf_rem_pio2 260.000000 1260.000000 -1 0.00288305 2036 19234 60.7418 +perf_rem_pio2_opt 260.000000 1260.000000 -1 0.00207457 906 11258 32.7034 +speed up after optimization 260.000000 1260.000000 0% 39% 125% 71% 86% +perf_rem_pio2 10.000000 45.000000 -1 0.00281859 2036 19234 58.9317 +perf_rem_pio2_opt 10.000000 45.000000 -1 0.00206203 898 11426 31.7235 +speed up after optimization 10.000000 45.000000 0% 37% 127% 68% 86% +perf_rem_pio2 -55.000000 125.000000 -1 0.00309159 2036 19234 68.3924 +perf_rem_pio2_opt -55.000000 125.000000 -1 0.00299359 1274 15026 45.6819 +speed up after optimization -55.000000 125.000000 0% 3% 60% 28% 50% +perf_rem_pio2 20.000000 80.000000 -1 0.00293088 2036 19234 74.9148 +perf_rem_pio2_opt 20.000000 80.000000 -1 0.00206582 898 11426 26.5563 +speed up after optimization 20.000000 80.000000 0% 42% 127% 68% 182% +perf_rem_pio2 0.000000 50.000000 -1 0.00278884 2036 19234 56.2858 +perf_rem_pio2_opt 0.000000 50.000000 -1 0.0024406 1149 14114 30.8516 +speed up after optimization 0.000000 50.000000 0% 14% 77% 36% 82% +perf_rem_pio2 -0.200000 2.000000 -1 0.00177533 2036 19234 61.6379 +perf_rem_pio2_opt -0.200000 2.000000 -1 0.00162513 1161 14010 41.3884 +speed up after optimization -0.200000 2.000000 0% 9% 75% 37% 49% +perf_rem_pio2 30.000000 130.000000 -1 0.00290755 2036 19234 80.2821 +perf_rem_pio2_opt 30.000000 130.000000 -1 0.00202936 898 11426 16.7208 +speed up after optimization 30.000000 130.000000 0% 43% 127% 68% 380% +perf_rem_pio2 1.000000 200.000000 -1 0.00293642 2036 19234 69.0075 +perf_rem_pio2_opt 1.000000 200.000000 -1 0.00224228 962 11994 18.2302 +speed up after optimization 1.000000 200.000000 0% 31% 112% 60% 279% +perf_sincosf -2.000000 2.000000 -1 0.00520145 614 13152 59.6089 +perf_sincosf_opt -2.000000 2.000000 -1 0.00401469 440 11256 92.2785 +speed up after optimization -2.000000 2.000000 0% 30% 40% 17% -35% +perf_sincosf -4.000000 4.000000 -1 0.00537907 614 13152 61.5594 +perf_sincosf_opt -4.000000 4.000000 -1 0.00417102 440 11256 94.7917 +speed up after optimization -4.000000 4.000000 0% 29% 40% 17% -35% +perf_sincosf -8.000000 8.000000 -1 0.00540736 614 13152 57.5857 +perf_sincosf_opt -8.000000 8.000000 -1 0.00424947 440 11256 103.415 +speed up after optimization -8.000000 8.000000 0% 27% 40% 17% -44% +perf_sincosf -16.000000 16.000000 -1 0.00547532 614 13152 58.0336 +perf_sincosf_opt -16.000000 16.000000 -1 0.00428243 440 11256 92.5336 +speed up after optimization -16.000000 16.000000 0% 28% 40% 17% -37% +perf_sincosf -125.000000 125.000000 -1 0.00556894 614 13152 60.6626 +perf_sincosf_opt -125.000000 125.000000 -1 0.00432735 580 12832 105.833 +speed up after optimization -125.000000 125.000000 0% 29% 6% 2% -43% +perf_sincosf -40.000000 110.000000 -1 0.00549486 614 13152 63.6104 +perf_sincosf_opt -40.000000 110.000000 -1 0.00431305 440 11256 95.2524 +speed up after optimization -40.000000 110.000000 0% 27% 40% 17% -33% +perf_sincosf -55.000000 150.000000 -1 0.00573752 614 13152 64.2511 +perf_sincosf_opt -55.000000 150.000000 -1 0.00442447 580 12832 19.1659 +speed up after optimization -55.000000 150.000000 0% 30% 6% 2% 235% +perf_sincosf 0.000000 100.000000 -1 0.00553277 614 13152 59.3575 +perf_sincosf_opt 0.000000 100.000000 -1 0.00433493 440 11256 97.2379 +speed up after optimization 0.000000 100.000000 0% 28% 40% 17% -39% +perf_sincosf 0.000000 70.000000 -1 0.00548786 614 13152 65.3212 +perf_sincosf_opt 0.000000 70.000000 -1 0.00432093 440 11256 104.734 +speed up after optimization 0.000000 70.000000 0% 27% 40% 17% -38% +perf_sincosf 260.000000 1260.000000 -1 0.00667199 614 13152 60.307 +perf_sincosf_opt 260.000000 1260.000000 -1 0.00388694 387 9984 99.4341 +speed up after optimization 260.000000 1260.000000 0% 72% 59% 32% -39% +perf_sincosf 10.000000 45.000000 -1 0.00550798 614 13152 59.8283 +perf_sincosf_opt 10.000000 45.000000 -1 0.0034897 329 9384 91.8774 +speed up after optimization 10.000000 45.000000 0% 58% 87% 40% -35% +perf_sincosf -55.000000 125.000000 -1 0.00552169 614 13152 63.8882 +perf_sincosf_opt -55.000000 125.000000 -1 0.0043428 580 12832 103.069 +speed up after optimization -55.000000 125.000000 0% 27% 6% 2% -38% +perf_sincosf 20.000000 80.000000 -1 0.00551061 614 13152 72.1952 +perf_sincosf_opt 20.000000 80.000000 -1 0.00349087 329 9384 93.9119 +speed up after optimization 20.000000 80.000000 0% 58% 87% 40% -23% +perf_sincosf 0.000000 50.000000 -1 0.00549865 614 13152 73.9586 +perf_sincosf_opt 0.000000 50.000000 -1 0.00432122 440 11256 107.843 +speed up after optimization 0.000000 50.000000 0% 27% 40% 17% -31% +perf_sincosf -0.200000 2.000000 -1 0.00507341 614 13152 68.9819 +perf_sincosf_opt -0.200000 2.000000 -1 0.00395052 440 11256 18.0477 +speed up after optimization -0.200000 2.000000 0% 28% 40% 17% 282% +perf_sincosf 30.000000 130.000000 -1 0.00566927 614 13152 81.0595 +perf_sincosf_opt 30.000000 130.000000 -1 0.00416431 513 11856 103.753 +speed up after optimization 30.000000 130.000000 0% 36% 20% 11% -22% +perf_sincosf 1.000000 200.000000 -1 0.00611784 614 13152 64.6863 +perf_sincosf_opt 1.000000 200.000000 -1 0.00444284 513 11856 104.464 +speed up after optimization 1.000000 200.000000 0% 38% 20% 11% -38% +perf_float64_add -2.000000 2.000000 -1 0.00377086 1264 21904 86.2526 +perf_float64_add_opt -2.000000 2.000000 -1 0.00302713 1113 20384 43.0215 +speed up after optimization -2.000000 2.000000 0% 25% 14% 7% 100% +perf_float64_add -4.000000 4.000000 -1 0.00375161 1264 21904 87.9904 +perf_float64_add_opt -4.000000 4.000000 -1 0.003021 1113 20384 54.0384 +speed up after optimization -4.000000 4.000000 0% 24% 14% 7% 63% +perf_float64_add -8.000000 8.000000 -1 0.00374024 1264 21904 91.6398 +perf_float64_add_opt -8.000000 8.000000 -1 0.00302363 1113 20384 57.4735 +speed up after optimization -8.000000 8.000000 0% 24% 14% 7% 59% +perf_float64_add -16.000000 16.000000 -1 0.00374315 1264 21904 106.199 +perf_float64_add_opt -16.000000 16.000000 -1 0.00302188 1113 20384 38.0361 +speed up after optimization -16.000000 16.000000 0% 24% 14% 7% 179% +perf_float64_add -125.000000 125.000000 -1 0.00377757 1264 21904 80.2274 +perf_float64_add_opt -125.000000 125.000000 -1 0.00302421 1113 20384 41.6593 +speed up after optimization -125.000000 125.000000 0% 25% 14% 7% 93% +perf_float64_add -40.000000 110.000000 -1 0.0037764 1264 21904 89.4266 +perf_float64_add_opt -40.000000 110.000000 -1 0.00300992 1113 20384 59.4873 +speed up after optimization -40.000000 110.000000 0% 25% 14% 7% 50% +perf_float64_add -55.000000 150.000000 -1 0.00377378 1264 21904 102.773 +perf_float64_add_opt -55.000000 150.000000 -1 0.00302596 1113 20384 46.0043 +speed up after optimization -55.000000 150.000000 0% 25% 14% 7% 123% +perf_float64_add 0.000000 100.000000 -1 0.00376094 1264 21904 87.0613 +perf_float64_add_opt 0.000000 100.000000 -1 0.00302596 1111 20424 43.0601 +speed up after optimization 0.000000 100.000000 0% 24% 14% 7% 102% +perf_float64_add 0.000000 70.000000 -1 0.00377174 1264 21904 93.1532 +perf_float64_add_opt 0.000000 70.000000 -1 0.0030143 1111 20424 34.7034 +speed up after optimization 0.000000 70.000000 0% 25% 14% 7% 168% +perf_float64_add 260.000000 1260.000000 -1 0.00376211 1264 21904 86.6269 +perf_float64_add_opt 260.000000 1260.000000 -1 0.00304084 1111 20424 45.9554 +speed up after optimization 260.000000 1260.000000 0% 24% 14% 7% 89% +perf_float64_add 10.000000 45.000000 -1 0.00374694 1264 21904 85.5913 +perf_float64_add_opt 10.000000 45.000000 -1 0.00330712 1111 20424 24.3145 +speed up after optimization 10.000000 45.000000 0% 13% 14% 7% 252% +perf_float64_add -55.000000 125.000000 -1 0.00374578 1264 21904 88.6861 +perf_float64_add_opt -55.000000 125.000000 -1 0.0030248 1113 20384 58.115 +speed up after optimization -55.000000 125.000000 0% 24% 14% 7% 53% +perf_float64_add 20.000000 80.000000 -1 0.00380994 1264 21904 86.7379 +perf_float64_add_opt 20.000000 80.000000 -1 0.0030213 1111 20424 23.6127 +speed up after optimization 20.000000 80.000000 0% 26% 14% 7% 267% +perf_float64_add 0.000000 50.000000 -1 0.00377174 1264 21904 79.7359 +perf_float64_add_opt 0.000000 50.000000 -1 0.00303763 1111 20424 25.264 +speed up after optimization 0.000000 50.000000 0% 24% 14% 7% 216% +perf_float64_add -0.200000 2.000000 -1 0.00376736 1264 21904 80.7462 +perf_float64_add_opt -0.200000 2.000000 -1 0.00301196 1112 20408 17.1874 +speed up after optimization -0.200000 2.000000 0% 25% 14% 7% 370% +perf_float64_add 30.000000 130.000000 -1 0.00374724 1264 21904 97.7849 +perf_float64_add_opt 30.000000 130.000000 -1 0.00304142 1111 20424 21.2827 +speed up after optimization 30.000000 130.000000 0% 23% 14% 7% 359% +perf_float64_add 1.000000 200.000000 -1 0.00374111 1264 21904 96.6751 +perf_float64_add_opt 1.000000 200.000000 -1 0.00302655 1111 20424 25.446 +speed up after optimization 1.000000 200.000000 0% 24% 14% 7% 280% +perf_float64_div -2.000000 2.000000 -1 0.00312309 1717 29710 105.43 +perf_float64_div_opt -2.000000 2.000000 -1 0.00301867 1176 23478 80.1477 +speed up after optimization -2.000000 2.000000 0% 3% 46% 27% 32% +perf_float64_div -4.000000 4.000000 -1 0.00420485 1717 29710 31.5244 +perf_float64_div_opt -4.000000 4.000000 -1 0.00338558 1176 23478 78.7279 +speed up after optimization -4.000000 4.000000 0% 24% 46% 27% -60% +perf_float64_div -8.000000 8.000000 -1 0.00386302 1717 29710 101.926 +perf_float64_div_opt -8.000000 8.000000 -1 0.00349933 1176 23478 90.9582 +speed up after optimization -8.000000 8.000000 0% 10% 46% 27% 12% +perf_float64_div -16.000000 16.000000 -1 0.00396248 1717 29710 103.92 +perf_float64_div_opt -16.000000 16.000000 -1 0.00355387 1176 23478 87.6029 +speed up after optimization -16.000000 16.000000 0% 11% 46% 27% 19% +perf_float64_div -125.000000 125.000000 -1 0.00395344 1717 29710 97.5665 +perf_float64_div_opt -125.000000 125.000000 -1 0.00343166 1176 23478 92.4965 +speed up after optimization -125.000000 125.000000 0% 15% 46% 27% 5% +perf_float64_div -40.000000 110.000000 -1 0.00543828 1717 29710 107.206 +perf_float64_div_opt -40.000000 110.000000 -1 0.00440901 1176 23478 78.9705 +speed up after optimization -40.000000 110.000000 0% 23% 46% 27% 36% +perf_float64_div -55.000000 150.000000 -1 0.00544673 1717 29710 24.9018 +perf_float64_div_opt -55.000000 150.000000 -1 0.00422322 1198 23854 80.059 +speed up after optimization -55.000000 150.000000 0% 29% 43% 25% -69% +perf_float64_div 0.000000 100.000000 -1 0.00762864 1717 29710 95.9103 +perf_float64_div_opt 0.000000 100.000000 -1 0.00572206 1142 22886 78.882 +speed up after optimization 0.000000 100.000000 0% 33% 50% 30% 22% +perf_float64_div 0.000000 70.000000 -1 0.00765839 1717 29710 96.4395 +perf_float64_div_opt 0.000000 70.000000 -1 0.00569669 1142 22886 71.6929 +speed up after optimization 0.000000 70.000000 0% 34% 50% 30% 35% +perf_float64_div 260.000000 1260.000000 -1 0.00749214 1717 29710 103.724 +perf_float64_div_opt 260.000000 1260.000000 -1 0.00623451 1171 23854 74.4726 +speed up after optimization 260.000000 1260.000000 0% 20% 47% 25% 39% +perf_float64_div 10.000000 45.000000 -1 0.00769776 1717 29710 96.9194 +perf_float64_div_opt 10.000000 45.000000 -1 0.00571389 1142 22886 94.0095 +speed up after optimization 10.000000 45.000000 0% 35% 50% 30% 3% +perf_float64_div -55.000000 125.000000 -1 0.0051717 1717 29710 103.97 +perf_float64_div_opt -55.000000 125.000000 -1 0.00425356 1176 23478 81.9999 +speed up after optimization -55.000000 125.000000 0% 22% 46% 27% 27% +perf_float64_div 20.000000 80.000000 -1 0.00763418 1717 29710 38.9552 +perf_float64_div_opt 20.000000 80.000000 -1 0.00568881 1142 22886 78.7206 +speed up after optimization 20.000000 80.000000 0% 34% 50% 30% -51% +perf_float64_div 0.000000 50.000000 -1 0.00767472 1717 29710 101.724 +perf_float64_div_opt 0.000000 50.000000 -1 0.00568035 1142 22886 73.3888 +speed up after optimization 0.000000 50.000000 0% 35% 50% 30% 39% +perf_float64_div -0.200000 2.000000 -1 0.00463534 1717 29710 39.9535 +perf_float64_div_opt -0.200000 2.000000 -1 0.00538374 1142 22870 75.6228 +speed up after optimization -0.200000 2.000000 0% 0% 50% 30% 39% +perf_float64_div 30.000000 130.000000 -1 0.00759509 1717 29710 97.7076 +perf_float64_div_opt 30.000000 130.000000 -1 0.00567569 1142 22886 69.7072 +speed up after optimization 30.000000 130.000000 0% 34% 50% 30% 40% +perf_float64_div 1.000000 200.000000 -1 0.00761755 1717 29710 95.5351 +perf_float64_div_opt 1.000000 200.000000 -1 0.00525307 1142 22886 84.429 +speed up after optimization 1.000000 200.000000 0% 45% 50% 30% 13% +perf_float64_mul -2.000000 2.000000 -1 0.00266985 1307 22968 101.272 +perf_float64_mul_opt -2.000000 2.000000 -1 0.0021259 973 20136 30.6939 +speed up after optimization -2.000000 2.000000 0% 26% 34% 14% 230% +perf_float64_mul -4.000000 4.000000 -1 0.00327183 1307 22968 87.0684 +perf_float64_mul_opt -4.000000 4.000000 -1 0.00265322 973 20136 35.2902 +speed up after optimization -4.000000 4.000000 0% 23% 34% 14% 147% +perf_float64_mul -8.000000 8.000000 -1 0.00360461 1307 22968 99.6409 +perf_float64_mul_opt -8.000000 8.000000 -1 0.00294751 973 20136 41.0843 +speed up after optimization -8.000000 8.000000 0% 22% 34% 14% 143% +perf_float64_mul -16.000000 16.000000 -1 0.0038254 1307 22968 93.9277 +perf_float64_mul_opt -16.000000 16.000000 -1 0.0031225 973 20136 42.5257 +speed up after optimization -16.000000 16.000000 0% 23% 34% 14% 121% +perf_float64_mul -125.000000 125.000000 -1 0.0040004 1307 22968 89.2288 +perf_float64_mul_opt -125.000000 125.000000 -1 0.00325287 973 20136 43.2774 +speed up after optimization -125.000000 125.000000 0% 23% 34% 14% 106% +perf_float64_mul -40.000000 110.000000 -1 0.00576493 1307 22968 102.277 +perf_float64_mul_opt -40.000000 110.000000 -1 0.00474384 973 20136 44.5134 +speed up after optimization -40.000000 110.000000 0% 22% 34% 14% 130% +perf_float64_mul -55.000000 150.000000 -1 0.00578156 1307 22968 101.979 +perf_float64_mul_opt -55.000000 150.000000 -1 0.00475084 973 20136 58.1229 +speed up after optimization -55.000000 150.000000 0% 22% 34% 14% 75% +perf_float64_mul 0.000000 100.000000 -1 0.00833678 1307 22968 88.3567 +perf_float64_mul_opt 0.000000 100.000000 -1 0.00684582 971 20160 36.5989 +speed up after optimization 0.000000 100.000000 0% 22% 35% 14% 141% +perf_float64_mul 0.000000 70.000000 -1 0.00831345 1307 22968 98.397 +perf_float64_mul_opt 0.000000 70.000000 -1 0.00679449 971 20160 33.2685 +speed up after optimization 0.000000 70.000000 0% 22% 35% 14% 196% +perf_float64_mul 260.000000 1260.000000 -1 0.00840678 1307 22968 79.8156 +perf_float64_mul_opt 260.000000 1260.000000 -1 0.00673587 948 20016 32.3084 +speed up after optimization 260.000000 1260.000000 0% 25% 38% 15% 147% +perf_float64_mul 10.000000 45.000000 -1 0.00842195 1307 22968 89.6103 +perf_float64_mul_opt 10.000000 45.000000 -1 0.00671749 948 20016 24.7424 +speed up after optimization 10.000000 45.000000 0% 25% 38% 15% 262% +perf_float64_mul -55.000000 125.000000 -1 0.00543623 1307 22968 79.6857 +perf_float64_mul_opt -55.000000 125.000000 -1 0.00447697 973 20136 62.596 +speed up after optimization -55.000000 125.000000 0% 21% 34% 14% 27% +perf_float64_mul 20.000000 80.000000 -1 0.00840824 1307 22968 88.6385 +perf_float64_mul_opt 20.000000 80.000000 -1 0.00674578 948 20016 39.0752 +speed up after optimization 20.000000 80.000000 0% 25% 38% 15% 127% +perf_float64_mul 0.000000 50.000000 -1 0.00824404 1307 22968 83.6942 +perf_float64_mul_opt 0.000000 50.000000 -1 0.0067277 971 20160 36.4499 +speed up after optimization 0.000000 50.000000 0% 23% 35% 14% 130% +perf_float64_mul -0.200000 2.000000 -1 0.00416343 1307 22968 76.4011 +perf_float64_mul_opt -0.200000 2.000000 -1 0.00339141 973 20168 46.9652 +speed up after optimization -0.200000 2.000000 0% 23% 34% 14% 63% +perf_float64_mul 30.000000 130.000000 -1 0.00842982 1307 22968 88.3301 +perf_float64_mul_opt 30.000000 130.000000 -1 0.00673003 948 20016 35.1739 +speed up after optimization 30.000000 130.000000 0% 25% 38% 15% 151% +perf_float64_mul 1.000000 200.000000 -1 0.00844616 1307 22968 79.0253 +perf_float64_mul_opt 1.000000 200.000000 -1 0.00676999 948 20016 30.9037 +speed up after optimization 1.000000 200.000000 0% 25% 38% 15% 156% diff --git a/applications/newton/llvm-ir/performance_test/new_bar_plot.py b/applications/newton/llvm-ir/performance_test/new_bar_plot.py new file mode 100644 index 000000000..ece3142b2 --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/new_bar_plot.py @@ -0,0 +1,213 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from matplotlib.ticker import MultipleLocator + +# The data processing functions remain the same as the last version. +# They are included here so the script is complete. + +def build_precision_map_from_quant_file(file_content): + precision_map = {} + current_benchmark_key = None + lines = file_content.strip().split('\n') + if lines and "test case\tparam\tprecision_bits" in lines[0]: + lines = lines[1:] + + precision_bits_col_index = 2 + min_parts_for_precision_line = precision_bits_col_index + 1 + + for line in lines: + parts = line.split('\t') + if not parts: continue + test_case = parts[0].strip() + if test_case.startswith("perf_j0"): current_benchmark_key = "j0" + elif test_case.startswith("perf_y0"): current_benchmark_key = "y0" + else: continue + + if len(parts) > min_parts_for_precision_line: + params_str = parts[1].strip() + if len(params_str.split()) >= 2: + try: + precision_val = parts[precision_bits_col_index].strip() + if precision_val: + int(precision_val) + map_key = (current_benchmark_key, params_str) + if map_key not in precision_map: + precision_map[map_key] = precision_val + except (IndexError, ValueError): + pass + if not precision_map: + print("Warning: Precision map is empty. Check 'perf_quant.log' format.") + return precision_map + +def parse_summary_perf_data(file_content, quantization_type): + data = [] + current_benchmark_key = None + lines = file_content.strip().split('\n') + if lines and "test case\tparam" in lines[0]: + lines = lines[1:] + + for line in lines: + parts = line.split('\t') + if not parts: + continue + test_case = parts[0].strip() + if test_case.startswith("perf_j0") or (quantization_type=="w/o Auto Quantization" and test_case.startswith("perf_exp")): + current_benchmark_key = "j0" + elif test_case.startswith("perf_y0"): + current_benchmark_key = "y0" + elif test_case == "speed up after optimization": + if current_benchmark_key is None or current_benchmark_key not in ["j0", "y0"]: + continue + if len(parts) < 7: + continue + try: + params_str = parts[1].strip() + param_values = params_str.split() + if len(param_values) < 2: + continue + formatted_params = f"[{float(param_values[0]):.1f}, {float(param_values[1]):.1f}]" + time_speedup_pct = float(parts[3].replace('%', '')) + library_size_reduction_pct = float(parts[5].replace('%', '')) + speedup_factor = 1 + (time_speedup_pct / 100.0) + data.append({ + "benchmark": current_benchmark_key, + "params_raw": params_str, + "params_formatted": formatted_params, + "quant_type": quantization_type, + "speedup_factor": speedup_factor, + "library_size_reduction_pct": library_size_reduction_pct, + "precision_bits": None + }) + except (IndexError, ValueError): + continue + else: + current_benchmark_key = None + continue + return data + +def process_performance_data(wo_quant_path, w_quant_path): + quant_precision_map = {} + try: + with open(w_quant_path, 'r', encoding='utf-8') as f: + content = f.read() + quant_precision_map = build_precision_map_from_quant_file(content) + except FileNotFoundError: + print(f"CRITICAL: '{w_quant_path}' not found.") + return pd.DataFrame() + + all_data_collected = [] + try: + with open(wo_quant_path, 'r', encoding='utf-8') as f: + content = f.read() + all_data_collected.extend(parse_summary_perf_data(content, "w/o Auto Quantization")) + except FileNotFoundError: print(f"Warning: '{wo_quant_path}' not found.") + + try: + with open(w_quant_path, 'r', encoding='utf-8') as f: + content = f.read() + all_data_collected.extend(parse_summary_perf_data(content, "with Auto Quantization")) + except FileNotFoundError: print(f"Warning: '{w_quant_path}' not found for summary parsing.") + + if not all_data_collected: return pd.DataFrame() + + for item in all_data_collected: + item['precision_bits'] = quant_precision_map.get((item['benchmark'], item['params_raw'])) + df = pd.DataFrame(all_data_collected) + + if df.empty: return df + + df['range_over_precision_bits'] = df.apply( + lambda row: f"{row['params_formatted']}/{row['precision_bits']}" if pd.notna(row['precision_bits']) else row['params_formatted'], + axis=1 + ) + + pivot_df = df.pivot_table( + index=['benchmark', 'params_raw', 'range_over_precision_bits'], + columns='quant_type', + values=['speedup_factor', 'library_size_reduction_pct'] + ) + + pivot_df.columns = [f'{val}_{quant}' for val, quant in pivot_df.columns] + pivot_df.reset_index(inplace=True) + + pivot_df['relative_speedup'] = pivot_df.get('speedup_factor_with Auto Quantization', 1) / pivot_df.get('speedup_factor_w/o Auto Quantization', 1) + pivot_df['size_reduction_improvement'] = pivot_df.get('library_size_reduction_pct_with Auto Quantization', 0) - pivot_df.get('library_size_reduction_pct_w/o Auto Quantization', 0) + + # # + # print("------------- Original w/o Auto Quantization -------------") + # for index, row in pivot_df.iterrows(): + # print(f"{row['range_over_precision_bits']}: {row.get('speedup_factor_w/o Auto Quantization', 'N/A')}") + + return pivot_df + +# --- UPDATED Plotting function for single bars and refined Y-axis --- +def create_single_bar_plot(df_benchmark_data, benchmark_id, metric_column, y_axis_label, plot_title_suffix, output_filename): + if df_benchmark_data.empty: + print(f"No data available for {benchmark_id}, skipping '{plot_title_suffix}' plot.") + return + + df_sorted = df_benchmark_data.copy() + df_sorted['sort_key_params_numeric'] = df_sorted['params_raw'].apply(lambda x: tuple(map(float, x.split()))) + df_sorted = df_sorted.sort_values(by='sort_key_params_numeric').reset_index(drop=True) + + is_speedup_plot = "Speedup" in y_axis_label + + num_param_groups = len(df_sorted.index) + x_positions = np.arange(num_param_groups) + fig_width = max(12, num_param_groups * 0.8) + fig, ax = plt.subplots(figsize=(fig_width, 7)) + + bar_data = pd.to_numeric(df_sorted[metric_column], errors='coerce').fillna(0) + ax.bar(x_positions, bar_data, width=0.6, color='#6A0DAD', label=plot_title_suffix) + + ax.set_xlabel("Range/PrecisionBits") + ax.set_ylabel(y_axis_label) + ax.set_title(f"{benchmark_id.upper()} - {plot_title_suffix}") + ax.set_xticks(x_positions) + ax.set_xticklabels(df_sorted['range_over_precision_bits'], rotation=45, ha="right") + + if is_speedup_plot: + ax.set_ylim(0, 1.5) + ax.set_yticks([0, 1, 1.5]) + ax.axhline(1.0, color='grey', linestyle='-.', linewidth=1.0) + else: + ax.axhline(0, color='grey', linestyle='-.', linewidth=1.0) + ax.grid(True, linestyle='--', alpha=0.7, axis='y') + + plt.tight_layout() + plt.savefig(output_filename) + print(f"Plot saved: {output_filename}") + plt.close(fig) + +# --- Main script execution --- +file_path_wo_quant = "perf_woquant.log" +file_path_w_quant = "perf_quant.log" + +# Process data and calculate relative improvements +df_processed = process_performance_data(file_path_wo_quant, file_path_w_quant) + +if not df_processed.empty: + df_j0_final = df_processed[df_processed["benchmark"] == "j0"] + create_single_bar_plot(df_j0_final, "e_j0", "relative_speedup", + "Relative Speedup (Quant / w/o Quant)", + "Quantization Speedup Improvement", + "e_j0_relative_speedup.png") # New filename + create_single_bar_plot(df_j0_final, "e_j0", "size_reduction_improvement", + "Additional Size Reduction (%)", + "Quantization Size Reduction Improvement", + "e_j0_relative_size_reduction.png") # New filename + + df_y0_final = df_processed[df_processed["benchmark"] == "y0"] + create_single_bar_plot(df_y0_final, "e_y0", "relative_speedup", + "Relative Speedup (Quant / w/o Quant)", + "Quantization Speedup Improvement", + "e_y0_relative_speedup.png") # New filename + create_single_bar_plot(df_y0_final, "e_y0", "size_reduction_improvement", + "Additional Size Reduction (%)", + "Quantization Size Reduction Improvement", + "e_y0_relative_size_reduction.png") # New filename + + print("\n--- Plot generation complete ---") +else: + print("\n--- No data processed, plot generation skipped ---") diff --git a/applications/newton/llvm-ir/performance_test/perf_quant.log b/applications/newton/llvm-ir/performance_test/perf_quant.log new file mode 100644 index 000000000..2133a5cbd --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/perf_quant.log @@ -0,0 +1,103 @@ +test case param precision_bits instruction count time consumption ir lines library size compile time +perf_j0 -2.000000 2.000000 12 -1 0.00419785 672 12806 8.47906 +perf_j0_opt -2.000000 2.000000 12 -1 0.00414156 553 9086 8.87451 +speed up after optimization -2.000000 2.000000 0% 1% 21% 40% -4% +perf_j0 -4.000000 4.000000 12 -1 0.0182643 672 12806 8.60355 +perf_j0_opt -4.000000 4.000000 12 -1 0.0162081 553 9070 8.77285 +speed up after optimization -4.000000 4.000000 0% 13% 21% 41% -2% +perf_j0 -8.000000 8.000000 12 -1 0.0256526 672 12806 8.6312 +perf_j0_opt -8.000000 8.000000 12 -1 0.0223749 553 9062 9.12953 +speed up after optimization -8.000000 8.000000 0% 15% 21% 41% -5% +perf_j0 -16.000000 16.000000 12 -1 0.0292059 672 12806 8.71164 +perf_j0_opt -16.000000 16.000000 12 -1 0.025191 553 9054 8.77293 +speed up after optimization -16.000000 16.000000 0% 16% 21% 41% -1% +perf_j0 -125.000000 125.000000 16 -1 0.0320668 672 12806 8.52784 +perf_j0_opt -125.000000 125.000000 16 -1 0.02817 553 9070 9.18924 +speed up after optimization -125.000000 125.000000 0% 14% 21% 41% -7% +perf_j0 -40.000000 110.000000 10 -1 0.0317711 672 12806 8.5338 +perf_j0_opt -40.000000 110.000000 10 -1 0.0317366 551 8990 11.0385 +speed up after optimization -40.000000 110.000000 0% 0% 21% 42% -23% +perf_j0 -55.000000 150.000000 11 -1 0.0320452 672 12806 8.68425 +perf_j0_opt -55.000000 150.000000 11 -1 0.0275624 553 9014 9.18352 +speed up after optimization -55.000000 150.000000 0% 16% 21% 42% -5% +perf_j0 0.000000 100.000000 10 -1 0.0330468 672 12806 8.47503 +perf_j0_opt 0.000000 100.000000 10 -1 0.0272468 517 8694 8.85619 +speed up after optimization 0.000000 100.000000 0% 21% 29% 47% -4% +perf_j0 0.000000 70.000000 10 -1 0.0317489 672 12806 8.50697 +perf_j0_opt 0.000000 70.000000 10 -1 0.0270736 517 8694 8.86639 +speed up after optimization 0.000000 70.000000 0% 17% 29% 47% -4% +perf_j0 260.000000 1260.000000 14 -1 0.0324252 672 12806 8.51517 +perf_j0_opt 260.000000 1260.000000 14 -1 0.0275411 397 7414 9.35105 +speed up after optimization 260.000000 1260.000000 0% 18% 69% 72% -9% +perf_j0 10.000000 45.000000 12 -1 0.0324191 672 12806 8.48592 +perf_j0_opt 10.000000 45.000000 12 -1 0.0276295 397 7430 8.8447 +speed up after optimization 10.000000 45.000000 0% 17% 69% 72% -4% +perf_j0 -55.000000 125.000000 11 -1 0.031883 672 12806 8.67599 +perf_j0_opt -55.000000 125.000000 11 -1 0.0275522 553 9014 9.54866 +speed up after optimization -55.000000 125.000000 0% 16% 21% 42% -9% +perf_j0 20.000000 80.000000 8 -1 0.0324229 672 12806 8.56161 +perf_j0_opt 20.000000 80.000000 8 -1 0.027502 395 7366 9.1907 +speed up after optimization 20.000000 80.000000 0% 18% 70% 73% -7% +perf_j0 0.000000 50.000000 8 -1 0.0314762 672 12806 8.53386 +perf_j0_opt 0.000000 50.000000 8 -1 0.0267758 515 8678 9.13387 +speed up after optimization 0.000000 50.000000 0% 18% 30% 47% -7% +perf_j0 -0.200000 2.000000 14 -1 0.00418735 672 12806 8.51811 +perf_j0_opt -0.200000 2.000000 14 -1 0.00417219 553 9118 9.32047 +speed up after optimization -0.200000 2.000000 0% 0% 21% 40% -9% +perf_j0 30.000000 130.000000 8 -1 0.0324139 672 12806 8.51825 +perf_j0_opt 30.000000 130.000000 8 -1 0.0273428 393 7342 8.94168 +speed up after optimization 30.000000 130.000000 0% 19% 70% 74% -5% +perf_j0 1.000000 200.000000 10 -1 0.0323296 672 12806 8.51264 +perf_j0_opt 1.000000 200.000000 10 -1 0.0273609 458 8286 9.26989 +speed up after optimization 1.000000 200.000000 0% 18% 46% 54% -8% +perf_y0 -2.000000 2.000000 12 -1 0.0064308 938 16974 8.66754 +perf_y0_opt -2.000000 2.000000 12 -1 0.00641738 809 13166 9.19182 +speed up after optimization -2.000000 2.000000 0% 0% 15% 28% -6% +perf_y0 -4.000000 4.000000 12 -1 0.0117311 938 16974 8.93471 +perf_y0_opt -4.000000 4.000000 12 -1 0.0110676 809 13142 9.85309 +speed up after optimization -4.000000 4.000000 0% 6% 15% 29% -9% +perf_y0 -8.000000 8.000000 12 -1 0.0149058 938 16974 8.81807 +perf_y0_opt -8.000000 8.000000 12 -1 0.0130485 809 13134 9.12111 +speed up after optimization -8.000000 8.000000 0% 14% 15% 29% -3% +perf_y0 -16.000000 16.000000 12 -1 0.0156341 938 16974 8.99968 +perf_y0_opt -16.000000 16.000000 12 -1 0.0140399 809 13118 9.26084 +speed up after optimization -16.000000 16.000000 0% 11% 15% 29% -3% +perf_y0 -125.000000 125.000000 16 -1 0.0165835 938 16974 8.58749 +perf_y0_opt -125.000000 125.000000 16 -1 0.0151456 809 13142 9.16466 +speed up after optimization -125.000000 125.000000 0% 9% 15% 29% -6% +perf_y0 -40.000000 110.000000 10 -1 0.0233336 938 16974 8.60218 +perf_y0_opt -40.000000 110.000000 10 -1 0.0208857 807 13070 9.63137 +speed up after optimization -40.000000 110.000000 0% 12% 16% 29% -11% +perf_y0 -55.000000 150.000000 11 -1 0.0233398 938 16974 8.62408 +perf_y0_opt -55.000000 150.000000 11 -1 0.0211171 809 13078 9.74726 +speed up after optimization -55.000000 150.000000 0% 11% 15% 29% -12% +perf_y0 0.000000 100.000000 10 -1 0.0310157 938 16974 8.62416 +perf_y0_opt 0.000000 100.000000 10 -1 0.0274478 776 12774 8.86065 +speed up after optimization 0.000000 100.000000 0% 13% 20% 32% -3% +perf_y0 0.000000 70.000000 10 -1 0.030885 938 16974 8.53802 +perf_y0_opt 0.000000 70.000000 10 -1 0.0274011 776 12774 8.97721 +speed up after optimization 0.000000 70.000000 0% 13% 20% 32% -5% +perf_y0 260.000000 1260.000000 14 -1 0.0312785 938 16974 8.71655 +perf_y0_opt 260.000000 1260.000000 14 -1 0.0274198 676 11318 9.64757 +speed up after optimization 260.000000 1260.000000 0% 14% 38% 49% -10% +perf_y0 10.000000 45.000000 12 -1 0.0312994 938 16974 8.56439 +perf_y0_opt 10.000000 45.000000 12 -1 0.0275052 676 11334 9.71338 +speed up after optimization 10.000000 45.000000 0% 14% 38% 49% -12% +perf_y0 -55.000000 125.000000 11 -1 0.0221734 938 16974 8.6586 +perf_y0_opt -55.000000 125.000000 11 -1 0.0198868 809 13078 9.64884 +speed up after optimization -55.000000 125.000000 0% 11% 15% 29% -10% +perf_y0 20.000000 80.000000 8 -1 0.0313044 938 16974 8.56893 +perf_y0_opt 20.000000 80.000000 8 -1 0.0274119 674 11270 9.54451 +speed up after optimization 20.000000 80.000000 0% 14% 39% 50% -10% +perf_y0 0.000000 50.000000 8 -1 0.0307357 938 16974 8.69925 +perf_y0_opt 0.000000 50.000000 8 -1 0.0270917 774 12750 9.67551 +speed up after optimization 0.000000 50.000000 0% 13% 21% 33% -10% +perf_y0 -0.200000 2.000000 14 -1 0.0101515 938 16974 8.83691 +perf_y0_opt -0.200000 2.000000 14 -1 0.010111 823 13342 9.90323 +speed up after optimization -0.200000 2.000000 0% 0% 13% 27% -11% +perf_y0 30.000000 130.000000 8 -1 0.0312822 938 16974 8.66924 +perf_y0_opt 30.000000 130.000000 8 -1 0.0272322 672 11254 9.36284 +speed up after optimization 30.000000 130.000000 0% 15% 39% 50% -7% +perf_y0 1.000000 200.000000 10 -1 0.0312822 938 16974 8.62417 +perf_y0_opt 1.000000 200.000000 10 -1 0.0274822 741 12558 8.92709 +speed up after optimization 1.000000 200.000000 0% 14% 26% 35% -3% diff --git a/applications/newton/llvm-ir/performance_test/perf_woquant.log b/applications/newton/llvm-ir/performance_test/perf_woquant.log new file mode 100644 index 000000000..29927500e --- /dev/null +++ b/applications/newton/llvm-ir/performance_test/perf_woquant.log @@ -0,0 +1,103 @@ +test case param instruction count time consumption ir lines library size compile time +perf_j0 -2.000000 2.000000 -1 0.0041891 672 12806 8.64551 +perf_j0_opt -2.000000 2.000000 -1 0.00415031 666 12710 9.01051 +speed up after optimization -2.000000 2.000000 0% 1% 0% 0% -4% +perf_j0 -4.000000 4.000000 -1 0.0182625 672 12806 8.6457 +perf_j0_opt -4.000000 4.000000 -1 0.0182068 666 12710 8.87372 +speed up after optimization -4.000000 4.000000 0% 0% 0% 0% -3% +perf_j0 -8.000000 8.000000 -1 0.0256698 672 12806 8.77255 +perf_j0_opt -8.000000 8.000000 -1 0.025617 666 12710 8.77032 +speed up after optimization -8.000000 8.000000 0% 0% 0% 0% 0% +perf_j0 -16.000000 16.000000 -1 0.0292126 672 12806 8.46476 +perf_j0_opt -16.000000 16.000000 -1 0.0291192 666 12710 8.7701 +speed up after optimization -16.000000 16.000000 0% 0% 0% 0% -3% +perf_j0 -125.000000 125.000000 -1 0.0321245 672 12806 8.62104 +perf_j0_opt -125.000000 125.000000 -1 0.0317704 666 12710 8.79059 +speed up after optimization -125.000000 125.000000 0% 1% 0% 0% -2% +perf_j0 -40.000000 110.000000 -1 0.0317742 672 12806 8.54451 +perf_j0_opt -40.000000 110.000000 -1 0.0315552 666 12710 9.03265 +speed up after optimization -40.000000 110.000000 0% 1% 0% 0% -5% +perf_j0 -55.000000 150.000000 -1 0.0319148 672 12806 8.65462 +perf_j0_opt -55.000000 150.000000 -1 0.0316328 666 12710 8.7713 +speed up after optimization -55.000000 150.000000 0% 1% 0% 0% -1% +perf_j0 0.000000 100.000000 -1 0.0319399 672 12806 8.65606 +perf_j0_opt 0.000000 100.000000 -1 0.0314426 630 12390 8.85576 +speed up after optimization 0.000000 100.000000 0% 2% 6% 3% -2% +perf_j0 0.000000 70.000000 -1 0.0317582 672 12806 8.5159 +perf_j0_opt 0.000000 70.000000 -1 0.0313274 630 12390 8.86189 +speed up after optimization 0.000000 70.000000 0% 1% 6% 3% -4% +perf_j0 260.000000 1260.000000 -1 0.0324552 672 12806 8.8024 +perf_j0_opt 260.000000 1260.000000 -1 0.0307152 510 11086 8.74592 +speed up after optimization 260.000000 1260.000000 0% 6% 31% 15% 1% +perf_j0 10.000000 45.000000 -1 0.0324576 672 12806 8.52251 +perf_j0_opt 10.000000 45.000000 -1 0.0311419 510 11086 8.71426 +speed up after optimization 10.000000 45.000000 0% 4% 31% 15% -2% +perf_j0 -55.000000 125.000000 -1 0.0319125 672 12806 8.46449 +perf_j0_opt -55.000000 125.000000 -1 0.0315628 666 12710 8.7548 +speed up after optimization -55.000000 125.000000 0% 1% 0% 0% -3% +perf_j0 20.000000 80.000000 -1 0.0324182 672 12806 8.5206 +perf_j0_opt 20.000000 80.000000 -1 0.0321373 510 11086 8.77055 +speed up after optimization 20.000000 80.000000 0% 1% 31% 15% -3% +perf_j0 0.000000 50.000000 -1 0.0314788 672 12806 8.57177 +perf_j0_opt 0.000000 50.000000 -1 0.0309062 630 12390 8.8181 +speed up after optimization 0.000000 50.000000 0% 2% 6% 3% -3% +perf_j0 -0.200000 2.000000 -1 0.0041856 672 12806 8.46162 +perf_j0_opt -0.200000 2.000000 -1 0.00416198 666 12710 8.85254 +speed up after optimization -0.200000 2.000000 0% 1% 0% 0% -4% +perf_j0 30.000000 130.000000 -1 0.0326378 672 12806 8.4693 +perf_j0_opt 30.000000 130.000000 -1 0.0307111 510 11086 8.75236 +speed up after optimization 30.000000 130.000000 0% 6% 31% 15% -3% +perf_j0 1.000000 200.000000 -1 0.0323319 672 12806 8.70759 +perf_j0_opt 1.000000 200.000000 -1 0.031696 573 12014 8.74695 +speed up after optimization 1.000000 200.000000 0% 2% 17% 6% 0% +perf_y0 -2.000000 2.000000 -1 0.00642437 938 16974 8.57486 +perf_y0_opt -2.000000 2.000000 -1 0.00636692 922 16782 8.90978 +speed up after optimization -2.000000 2.000000 0% 1% 1% 1% -4% +perf_y0 -4.000000 4.000000 -1 0.0117262 938 16974 8.64497 +perf_y0_opt -4.000000 4.000000 -1 0.0116535 922 16782 8.93764 +speed up after optimization -4.000000 4.000000 0% 1% 1% 1% -3% +perf_y0 -8.000000 8.000000 -1 0.0144246 938 16974 8.54897 +perf_y0_opt -8.000000 8.000000 -1 0.0143566 922 16782 9.20771 +speed up after optimization -8.000000 8.000000 0% 0% 1% 1% -7% +perf_y0 -16.000000 16.000000 -1 0.015684 938 16974 8.56913 +perf_y0_opt -16.000000 16.000000 -1 0.0155574 922 16782 8.94524 +speed up after optimization -16.000000 16.000000 0% 1% 1% 1% -4% +perf_y0 -125.000000 125.000000 -1 0.0171761 938 16974 8.55576 +perf_y0_opt -125.000000 125.000000 -1 0.0168725 922 16782 8.95235 +speed up after optimization -125.000000 125.000000 0% 2% 1% 1% -4% +perf_y0 -40.000000 110.000000 -1 0.0233237 938 16974 8.67136 +perf_y0_opt -40.000000 110.000000 -1 0.02359 922 16782 8.91701 +speed up after optimization -40.000000 110.000000 0% 0% 1% 1% -4% +perf_y0 -55.000000 150.000000 -1 0.0233467 938 16974 8.81824 +perf_y0_opt -55.000000 150.000000 -1 0.023303 922 16782 10.975 +speed up after optimization -55.000000 150.000000 0% 0% 1% 1% -20% +perf_y0 0.000000 100.000000 -1 0.0310112 938 16974 8.63068 +perf_y0_opt 0.000000 100.000000 -1 0.0306758 889 16478 9.03746 +speed up after optimization 0.000000 100.000000 0% 1% 5% 3% -5% +perf_y0 0.000000 70.000000 -1 0.0308814 938 16974 10.6839 +perf_y0_opt 0.000000 70.000000 -1 0.0305064 889 16478 8.92305 +speed up after optimization 0.000000 70.000000 0% 1% 5% 3% 20% +perf_y0 260.000000 1260.000000 -1 0.031307 938 16974 8.59541 +perf_y0_opt 260.000000 1260.000000 -1 0.0306499 789 15006 8.83276 +speed up after optimization 260.000000 1260.000000 0% 2% 18% 13% -3% +perf_y0 10.000000 45.000000 -1 0.0315187 938 16974 8.58749 +perf_y0_opt 10.000000 45.000000 -1 0.030616 789 15006 8.85693 +speed up after optimization 10.000000 45.000000 0% 3% 18% 13% -3% +perf_y0 -55.000000 125.000000 -1 0.022237 938 16974 8.55153 +perf_y0_opt -55.000000 125.000000 -1 0.0221264 922 16782 8.97193 +speed up after optimization -55.000000 125.000000 0% 0% 1% 1% -5% +perf_y0 20.000000 80.000000 -1 0.0313478 938 16974 8.56306 +perf_y0_opt 20.000000 80.000000 -1 0.0306105 789 15006 8.82615 +speed up after optimization 20.000000 80.000000 0% 2% 18% 13% -3% +perf_y0 0.000000 50.000000 -1 0.0307467 938 16974 8.54775 +perf_y0_opt 0.000000 50.000000 -1 0.030323 889 16478 8.88378 +speed up after optimization 0.000000 50.000000 0% 1% 5% 3% -4% +perf_y0 -0.200000 2.000000 -1 0.0101133 938 16974 8.67696 +perf_y0_opt -0.200000 2.000000 -1 0.0101148 936 16950 8.94634 +speed up after optimization -0.200000 2.000000 0% 0% 0% 0% -4% +perf_y0 30.000000 130.000000 -1 0.0313155 938 16974 8.65747 +perf_y0_opt 30.000000 130.000000 -1 0.0305937 789 15006 8.90506 +speed up after optimization 30.000000 130.000000 0% 2% 18% 13% -3% +perf_y0 1.000000 200.000000 -1 0.0312584 938 16974 8.55173 +perf_y0_opt 1.000000 200.000000 -1 0.0307293 856 16270 8.88375 +speed up after optimization 1.000000 200.000000 0% 2% 9% 4% -4% diff --git a/applications/newton/llvm-ir/performance_test/plot_sensor_ranges.py b/applications/newton/llvm-ir/performance_test/plot_sensor_ranges.py index dfcbfb418..6dffe6b6d 100644 --- a/applications/newton/llvm-ir/performance_test/plot_sensor_ranges.py +++ b/applications/newton/llvm-ir/performance_test/plot_sensor_ranges.py @@ -51,7 +51,7 @@ merit_num = 4 average_data = [] -with open('average_speedup.log', 'r') as f: +with open('average_speedup_woquant.log.log', 'r') as f: for line in f.readlines(): line_list = line.strip('\n').split('\t') average_data.append(line_list) @@ -78,7 +78,7 @@ "times, lib size reduce: ", format(average_libsize_reduce[i], '.2f'), "%") performance_data = [] -with open('perf.log', 'r') as f: +with open('perf_woquant.log', 'r') as f: for line in f.readlines(): line_list = line.strip('\n').split('\t') performance_data.append(line_list) @@ -153,7 +153,7 @@ perf_data_speedup = [inst_speedup, time_speedup, ir_reduction, lib_size_reduction] -y_labels = ["instruction counts (million)", "time consumption speedup", "IR lines", "library size reduction ratio"] +y_labels = ["instruction counts (million)", "time consumption speedup", "IR lines", "library size reduction ratio","compile time"] machine = platform.machine() # machine = "aarch64" @@ -193,5 +193,5 @@ plt.close() -os.system('cp perf.log ' + machine + "_perf.log") -os.system('cp average_speedup.log ' + machine + "_average_speedup.log") +os.system('cp perf_woquant.log ' + machine + "_perf_woquant.log") +os.system('cp average_speedup_woquant.log ' + machine + "_average_speedup_woquant.log") diff --git a/applications/newton/llvm-ir/replace.py b/applications/newton/llvm-ir/replace.py deleted file mode 100644 index 89ff3ff79..000000000 --- a/applications/newton/llvm-ir/replace.py +++ /dev/null @@ -1,34 +0,0 @@ -import re -import os - -def replace_text_in_file(filepath, patterns_replacements): - # 检查文件是否存在 - if not os.path.isfile(filepath): - print(f"Error: File '{filepath}' does not exist.") - return - - # 读取文件内容 - with open(filepath, 'r', encoding='utf-8') as file: - content = file.read() - - # 对所有模式和替换进行操作 - for pattern, replacement in patterns_replacements: - updated_content = re.sub(pattern, replacement, content) - - # 将更新后的内容写回文件 - with open(filepath, 'w', encoding='utf-8') as file: - file.write(updated_content) - -# 定义文件路径和替换规则 -filepath = 'MadgwickAHRS_opt.ll' -patterns_replacements = [ - (r'declare dso_local i32 @printf\(i8\*\)', 'declare dso_local i32 @printf(i8*, i32)'), - (r'float\*\*', 'i32**'), - (r'float\*', 'i32*'), - (r'call i32 @invSqrt', 'call i32 @fixrsqrt') -] - -# 执行替换 -replace_text_in_file(filepath, patterns_replacements) - -print("Replacement complete.") \ No newline at end of file diff --git a/applications/newton/llvm-ir/replace.sh b/applications/newton/llvm-ir/replace.sh deleted file mode 100755 index d036f676a..000000000 --- a/applications/newton/llvm-ir/replace.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Define the file path -file="MadgwickAHRS_opt.ll" - -# Check if the file exists -if [ ! -f "$file" ]; then - echo "Error: File does not exist." - exit 1 -fi - -# Use sed to replace the text -#sed -i 's/declare dso_local i32 @printf(i8\*)/declare dso_local i32 @printf(i8\*, i32)/g' "$file" - -#Replace float** with i32** first to avoid conflicting replacements -sed -i 's/float\*\*/i32**/g' "$file" -# -##Replace float* with i32* -sed -i 's/float\*/i32*/g' "$file" - -# Now replace the call to invSqrt with the call to fixrsqrt -#sed -i 's/call i32 @invSqrt/call i32 @fixrsqrt/g' "$file" - -# Replace the specific call to MadgwickAHRSupdateIMU -#sed -i 's/call void @MadgwickAHRSupdateIMU(.*)/call void @MadgwickAHRSupdateIMU(float %0, float %1, float %2, float %3, float %4, float %5, i32* %119, i32* %120, i32* %121, i32* %122)/g' "$file" -#sed -i 's/call void @MadgwickAHRSupdateIMU(.*)/call void @MadgwickAHRSupdateIMU(float %0, float %1, float %2, float %3, float %4, float %5)/g' "$file" -#sed -i 's/call void @MadgwickAHRSupdateIMU(.*)/call void @MadgwickAHRSupdateIMU(i32 %83, i32 %84, i32 %85, i32 %86, i32 %87, i32 %88, i32* %89, i32* %90, i32* %91, i32* %92)/g' "$file" - -# Remove all occurrences of "_quantized" in the file -sed -i 's/_quantized//g' "$file" - -echo "Replacement complete." diff --git a/applications/newton/llvm-ir/stm32f303xc.h b/applications/newton/llvm-ir/stm32f303xc.h deleted file mode 100644 index 9a49f5793..000000000 --- a/applications/newton/llvm-ir/stm32f303xc.h +++ /dev/null @@ -1,13485 +0,0 @@ -/** -****************************************************************************** -* @file stm32f303xc.h -* @author MCD Application Team -* @brief CMSIS STM32F303xC Devices Peripheral Access Layer Header File. -* -* This file contains: -* - Data structures and the address mapping for all peripherals -* - Peripheral's registers declarations and bits definition -* - Macros to access peripheral's registers hardware -* -****************************************************************************** -* @attention -* -* Copyright (c) 2016 STMicroelectronics. -* All rights reserved. -* -* This software is licensed under terms that can be found in the LICENSE file -* in the root directory of this software component. -* If no LICENSE file comes with this software, it is provided AS-IS. -* -****************************************************************************** -*/ - -/** @addtogroup CMSIS_Device -* @{ -*/ - -/** @addtogroup stm32f303xc -* @{ -*/ - -#ifndef __STM32F303xC_H -#define __STM32F303xC_H - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** @addtogroup Configuration_section_for_CMSIS -* @{ -*/ - -/** -* @brief Configuration of the Cortex-M4 Processor and Core Peripherals -*/ -#define __CM4_REV 0x0001U /*!< Core revision r0p1 */ -#define __MPU_PRESENT 1U /*!< STM32F303xC devices provide an MPU */ -#define __NVIC_PRIO_BITS 4U /*!< STM32F303xC devices use 4 Bits for the Priority Levels */ -#define __Vendor_SysTickConfig 0U /*!< Set to 1 if different SysTick Config is used */ -#define __FPU_PRESENT 1U /*!< STM32F303xC devices provide an FPU */ - -/** -* @} -*/ - -/** @addtogroup Peripheral_interrupt_number_definition -* @{ -*/ - -/** -* @brief STM32F303xC devices Interrupt Number Definition, according to the selected device -* in @ref Library_configuration_section -*/ -typedef enum -{ - /****** Cortex-M4 Processor Exceptions Numbers ****************************************************************/ - NonMaskableInt_IRQn = -14, /*!< 2 Non Maskable Interrupt */ - HardFault_IRQn = -13, /*!< 3 Cortex-M4 Hard Fault Interrupt */ - MemoryManagement_IRQn = -12, /*!< 4 Cortex-M4 Memory Management Interrupt */ - BusFault_IRQn = -11, /*!< 5 Cortex-M4 Bus Fault Interrupt */ - UsageFault_IRQn = -10, /*!< 6 Cortex-M4 Usage Fault Interrupt */ - SVCall_IRQn = -5, /*!< 11 Cortex-M4 SV Call Interrupt */ - DebugMonitor_IRQn = -4, /*!< 12 Cortex-M4 Debug Monitor Interrupt */ - PendSV_IRQn = -2, /*!< 14 Cortex-M4 Pend SV Interrupt */ - SysTick_IRQn = -1, /*!< 15 Cortex-M4 System Tick Interrupt */ - /****** STM32 specific Interrupt Numbers **********************************************************************/ - WWDG_IRQn = 0, /*!< Window WatchDog Interrupt */ - PVD_IRQn = 1, /*!< PVD through EXTI Line detection Interrupt */ - TAMP_STAMP_IRQn = 2, /*!< Tamper and TimeStamp interrupts through the EXTI line 19 */ - RTC_WKUP_IRQn = 3, /*!< RTC Wakeup interrupt through the EXTI line 20 */ - FLASH_IRQn = 4, /*!< FLASH global Interrupt */ - RCC_IRQn = 5, /*!< RCC global Interrupt */ - EXTI0_IRQn = 6, /*!< EXTI Line0 Interrupt */ - EXTI1_IRQn = 7, /*!< EXTI Line1 Interrupt */ - EXTI2_TSC_IRQn = 8, /*!< EXTI Line2 Interrupt and Touch Sense Controller Interrupt */ - EXTI3_IRQn = 9, /*!< EXTI Line3 Interrupt */ - EXTI4_IRQn = 10, /*!< EXTI Line4 Interrupt */ - DMA1_Channel1_IRQn = 11, /*!< DMA1 Channel 1 Interrupt */ - DMA1_Channel2_IRQn = 12, /*!< DMA1 Channel 2 Interrupt */ - DMA1_Channel3_IRQn = 13, /*!< DMA1 Channel 3 Interrupt */ - DMA1_Channel4_IRQn = 14, /*!< DMA1 Channel 4 Interrupt */ - DMA1_Channel5_IRQn = 15, /*!< DMA1 Channel 5 Interrupt */ - DMA1_Channel6_IRQn = 16, /*!< DMA1 Channel 6 Interrupt */ - DMA1_Channel7_IRQn = 17, /*!< DMA1 Channel 7 Interrupt */ - ADC1_2_IRQn = 18, /*!< ADC1 & ADC2 Interrupts */ - USB_HP_CAN_TX_IRQn = 19, /*!< USB Device High Priority or CAN TX Interrupts */ - USB_LP_CAN_RX0_IRQn = 20, /*!< USB Device Low Priority or CAN RX0 Interrupts */ - CAN_RX1_IRQn = 21, /*!< CAN RX1 Interrupt */ - CAN_SCE_IRQn = 22, /*!< CAN SCE Interrupt */ - EXTI9_5_IRQn = 23, /*!< External Line[9:5] Interrupts */ - TIM1_BRK_TIM15_IRQn = 24, /*!< TIM1 Break and TIM15 Interrupts */ - TIM1_UP_TIM16_IRQn = 25, /*!< TIM1 Update and TIM16 Interrupts */ - TIM1_TRG_COM_TIM17_IRQn = 26, /*!< TIM1 Trigger and Commutation and TIM17 Interrupt */ - TIM1_CC_IRQn = 27, /*!< TIM1 Capture Compare Interrupt */ - TIM2_IRQn = 28, /*!< TIM2 global Interrupt */ - TIM3_IRQn = 29, /*!< TIM3 global Interrupt */ - TIM4_IRQn = 30, /*!< TIM4 global Interrupt */ - I2C1_EV_IRQn = 31, /*!< I2C1 Event Interrupt & EXTI Line23 Interrupt (I2C1 wakeup) */ - I2C1_ER_IRQn = 32, /*!< I2C1 Error Interrupt */ - I2C2_EV_IRQn = 33, /*!< I2C2 Event Interrupt & EXTI Line24 Interrupt (I2C2 wakeup) */ - I2C2_ER_IRQn = 34, /*!< I2C2 Error Interrupt */ - SPI1_IRQn = 35, /*!< SPI1 global Interrupt */ - SPI2_IRQn = 36, /*!< SPI2 global Interrupt */ - USART1_IRQn = 37, /*!< USART1 global Interrupt & EXTI Line25 Interrupt (USART1 wakeup) */ - USART2_IRQn = 38, /*!< USART2 global Interrupt & EXTI Line26 Interrupt (USART2 wakeup) */ - USART3_IRQn = 39, /*!< USART3 global Interrupt & EXTI Line28 Interrupt (USART3 wakeup) */ - EXTI15_10_IRQn = 40, /*!< External Line[15:10] Interrupts */ - RTC_Alarm_IRQn = 41, /*!< RTC Alarm (A and B) through EXTI Line 17 Interrupt */ - USBWakeUp_IRQn = 42, /*!< USB Wakeup Interrupt */ - TIM8_BRK_IRQn = 43, /*!< TIM8 Break Interrupt */ - TIM8_UP_IRQn = 44, /*!< TIM8 Update Interrupt */ - TIM8_TRG_COM_IRQn = 45, /*!< TIM8 Trigger and Commutation Interrupt */ - TIM8_CC_IRQn = 46, /*!< TIM8 Capture Compare Interrupt */ - ADC3_IRQn = 47, /*!< ADC3 global Interrupt */ - SPI3_IRQn = 51, /*!< SPI3 global Interrupt */ - UART4_IRQn = 52, /*!< UART4 global Interrupt & EXTI Line34 Interrupt (UART4 wakeup) */ - UART5_IRQn = 53, /*!< UART5 global Interrupt & EXTI Line35 Interrupt (UART5 wakeup) */ - TIM6_DAC_IRQn = 54, /*!< TIM6 global and DAC underrun error Interrupt */ - TIM7_IRQn = 55, /*!< TIM7 global Interrupt */ - DMA2_Channel1_IRQn = 56, /*!< DMA2 Channel 1 global Interrupt */ - DMA2_Channel2_IRQn = 57, /*!< DMA2 Channel 2 global Interrupt */ - DMA2_Channel3_IRQn = 58, /*!< DMA2 Channel 3 global Interrupt */ - DMA2_Channel4_IRQn = 59, /*!< DMA2 Channel 4 global Interrupt */ - DMA2_Channel5_IRQn = 60, /*!< DMA2 Channel 5 global Interrupt */ - ADC4_IRQn = 61, /*!< ADC4 global Interrupt */ - COMP1_2_3_IRQn = 64, /*!< COMP1, COMP2 and COMP3 global Interrupt via EXTI Line21, 22 and 29*/ - COMP4_5_6_IRQn = 65, /*!< COMP4, COMP5 and COMP6 global Interrupt via EXTI Line30, 31 and 32*/ - COMP7_IRQn = 66, /*!< COMP7 global Interrupt via EXTI Line33 */ - USB_HP_IRQn = 74, /*!< USB High Priority global Interrupt */ - USB_LP_IRQn = 75, /*!< USB Low Priority global Interrupt */ - USBWakeUp_RMP_IRQn = 76, /*!< USB Wakeup Interrupt remap */ - FPU_IRQn = 81, /*!< Floating point Interrupt */ -} IRQn_Type; - -/** -* @} -*/ - -#include "core_cm4.h" /* Cortex-M4 processor and core peripherals */ -#include "system_stm32f3xx.h" /* STM32F3xx System Header */ -#include - -/** @addtogroup Peripheral_registers_structures -* @{ -*/ - -/** -* @brief Analog to Digital Converter -*/ - -typedef struct -{ - __IO uint32_t ISR; /*!< ADC Interrupt and Status Register, Address offset: 0x00 */ - __IO uint32_t IER; /*!< ADC Interrupt Enable Register, Address offset: 0x04 */ - __IO uint32_t CR; /*!< ADC control register, Address offset: 0x08 */ - __IO uint32_t CFGR; /*!< ADC Configuration register, Address offset: 0x0C */ - uint32_t RESERVED0; /*!< Reserved, 0x010 */ - __IO uint32_t SMPR1; /*!< ADC sample time register 1, Address offset: 0x14 */ - __IO uint32_t SMPR2; /*!< ADC sample time register 2, Address offset: 0x18 */ - uint32_t RESERVED1; /*!< Reserved, 0x01C */ - __IO uint32_t TR1; /*!< ADC watchdog threshold register 1, Address offset: 0x20 */ - __IO uint32_t TR2; /*!< ADC watchdog threshold register 2, Address offset: 0x24 */ - __IO uint32_t TR3; /*!< ADC watchdog threshold register 3, Address offset: 0x28 */ - uint32_t RESERVED2; /*!< Reserved, 0x02C */ - __IO uint32_t SQR1; /*!< ADC regular sequence register 1, Address offset: 0x30 */ - __IO uint32_t SQR2; /*!< ADC regular sequence register 2, Address offset: 0x34 */ - __IO uint32_t SQR3; /*!< ADC regular sequence register 3, Address offset: 0x38 */ - __IO uint32_t SQR4; /*!< ADC regular sequence register 4, Address offset: 0x3C */ - __IO uint32_t DR; /*!< ADC regular data register, Address offset: 0x40 */ - uint32_t RESERVED3; /*!< Reserved, 0x044 */ - uint32_t RESERVED4; /*!< Reserved, 0x048 */ - __IO uint32_t JSQR; /*!< ADC injected sequence register, Address offset: 0x4C */ - uint32_t RESERVED5[4]; /*!< Reserved, 0x050 - 0x05C */ - __IO uint32_t OFR1; /*!< ADC offset register 1, Address offset: 0x60 */ - __IO uint32_t OFR2; /*!< ADC offset register 2, Address offset: 0x64 */ - __IO uint32_t OFR3; /*!< ADC offset register 3, Address offset: 0x68 */ - __IO uint32_t OFR4; /*!< ADC offset register 4, Address offset: 0x6C */ - uint32_t RESERVED6[4]; /*!< Reserved, 0x070 - 0x07C */ - __IO uint32_t JDR1; /*!< ADC injected data register 1, Address offset: 0x80 */ - __IO uint32_t JDR2; /*!< ADC injected data register 2, Address offset: 0x84 */ - __IO uint32_t JDR3; /*!< ADC injected data register 3, Address offset: 0x88 */ - __IO uint32_t JDR4; /*!< ADC injected data register 4, Address offset: 0x8C */ - uint32_t RESERVED7[4]; /*!< Reserved, 0x090 - 0x09C */ - __IO uint32_t AWD2CR; /*!< ADC Analog Watchdog 2 Configuration Register, Address offset: 0xA0 */ - __IO uint32_t AWD3CR; /*!< ADC Analog Watchdog 3 Configuration Register, Address offset: 0xA4 */ - uint32_t RESERVED8; /*!< Reserved, 0x0A8 */ - uint32_t RESERVED9; /*!< Reserved, 0x0AC */ - __IO uint32_t DIFSEL; /*!< ADC Differential Mode Selection Register, Address offset: 0xB0 */ - __IO uint32_t CALFACT; /*!< ADC Calibration Factors, Address offset: 0xB4 */ - -} ADC_TypeDef; - -typedef struct -{ - __IO uint32_t CSR; /*!< ADC Common status register, Address offset: ADC1/3 base address + 0x300 */ - uint32_t RESERVED; /*!< Reserved, ADC1/3 base address + 0x304 */ - __IO uint32_t CCR; /*!< ADC common control register, Address offset: ADC1/3 base address + 0x308 */ - __IO uint32_t CDR; /*!< ADC common regular data register for dual - AND triple modes, Address offset: ADC1/3 base address + 0x30C */ -} ADC_Common_TypeDef; - -/** -* @brief Controller Area Network TxMailBox -*/ -typedef struct -{ - __IO uint32_t TIR; /*!< CAN TX mailbox identifier register */ - __IO uint32_t TDTR; /*!< CAN mailbox data length control and time stamp register */ - __IO uint32_t TDLR; /*!< CAN mailbox data low register */ - __IO uint32_t TDHR; /*!< CAN mailbox data high register */ -} CAN_TxMailBox_TypeDef; - -/** -* @brief Controller Area Network FIFOMailBox -*/ -typedef struct -{ - __IO uint32_t RIR; /*!< CAN receive FIFO mailbox identifier register */ - __IO uint32_t RDTR; /*!< CAN receive FIFO mailbox data length control and time stamp register */ - __IO uint32_t RDLR; /*!< CAN receive FIFO mailbox data low register */ - __IO uint32_t RDHR; /*!< CAN receive FIFO mailbox data high register */ -} CAN_FIFOMailBox_TypeDef; - -/** -* @brief Controller Area Network FilterRegister -*/ -typedef struct -{ - __IO uint32_t FR1; /*!< CAN Filter bank register 1 */ - __IO uint32_t FR2; /*!< CAN Filter bank register 1 */ -} CAN_FilterRegister_TypeDef; - -/** -* @brief Controller Area Network -*/ -typedef struct -{ - __IO uint32_t MCR; /*!< CAN master control register, Address offset: 0x00 */ - __IO uint32_t MSR; /*!< CAN master status register, Address offset: 0x04 */ - __IO uint32_t TSR; /*!< CAN transmit status register, Address offset: 0x08 */ - __IO uint32_t RF0R; /*!< CAN receive FIFO 0 register, Address offset: 0x0C */ - __IO uint32_t RF1R; /*!< CAN receive FIFO 1 register, Address offset: 0x10 */ - __IO uint32_t IER; /*!< CAN interrupt enable register, Address offset: 0x14 */ - __IO uint32_t ESR; /*!< CAN error status register, Address offset: 0x18 */ - __IO uint32_t BTR; /*!< CAN bit timing register, Address offset: 0x1C */ - uint32_t RESERVED0[88]; /*!< Reserved, 0x020 - 0x17F */ - CAN_TxMailBox_TypeDef sTxMailBox[3]; /*!< CAN Tx MailBox, Address offset: 0x180 - 0x1AC */ - CAN_FIFOMailBox_TypeDef sFIFOMailBox[2]; /*!< CAN FIFO MailBox, Address offset: 0x1B0 - 0x1CC */ - uint32_t RESERVED1[12]; /*!< Reserved, 0x1D0 - 0x1FF */ - __IO uint32_t FMR; /*!< CAN filter master register, Address offset: 0x200 */ - __IO uint32_t FM1R; /*!< CAN filter mode register, Address offset: 0x204 */ - uint32_t RESERVED2; /*!< Reserved, 0x208 */ - __IO uint32_t FS1R; /*!< CAN filter scale register, Address offset: 0x20C */ - uint32_t RESERVED3; /*!< Reserved, 0x210 */ - __IO uint32_t FFA1R; /*!< CAN filter FIFO assignment register, Address offset: 0x214 */ - uint32_t RESERVED4; /*!< Reserved, 0x218 */ - __IO uint32_t FA1R; /*!< CAN filter activation register, Address offset: 0x21C */ - uint32_t RESERVED5[8]; /*!< Reserved, 0x220-0x23F */ - CAN_FilterRegister_TypeDef sFilterRegister[28]; /*!< CAN Filter Register, Address offset: 0x240-0x31C */ -} CAN_TypeDef; - -/** -* @brief Analog Comparators -*/ -typedef struct -{ - __IO uint32_t CSR; /*!< COMP control and status register, Address offset: 0x00 */ -} COMP_TypeDef; - -typedef struct -{ - __IO uint32_t CSR; /*!< COMP control and status register, used for bits common to several COMP instances, Address offset: 0x00 */ -} COMP_Common_TypeDef; - -/** -* @brief CRC calculation unit -*/ - -typedef struct -{ - __IO uint32_t DR; /*!< CRC Data register, Address offset: 0x00 */ - __IO uint8_t IDR; /*!< CRC Independent data register, Address offset: 0x04 */ - uint8_t RESERVED0; /*!< Reserved, 0x05 */ - uint16_t RESERVED1; /*!< Reserved, 0x06 */ - __IO uint32_t CR; /*!< CRC Control register, Address offset: 0x08 */ - uint32_t RESERVED2; /*!< Reserved, 0x0C */ - __IO uint32_t INIT; /*!< Initial CRC value register, Address offset: 0x10 */ - __IO uint32_t POL; /*!< CRC polynomial register, Address offset: 0x14 */ -} CRC_TypeDef; - -/** -* @brief Digital to Analog Converter -*/ - -typedef struct -{ - __IO uint32_t CR; /*!< DAC control register, Address offset: 0x00 */ - __IO uint32_t SWTRIGR; /*!< DAC software trigger register, Address offset: 0x04 */ - __IO uint32_t DHR12R1; /*!< DAC channel1 12-bit right-aligned data holding register, Address offset: 0x08 */ - __IO uint32_t DHR12L1; /*!< DAC channel1 12-bit left aligned data holding register, Address offset: 0x0C */ - __IO uint32_t DHR8R1; /*!< DAC channel1 8-bit right aligned data holding register, Address offset: 0x10 */ - __IO uint32_t DHR12R2; /*!< DAC channel2 12-bit right aligned data holding register, Address offset: 0x14 */ - __IO uint32_t DHR12L2; /*!< DAC channel2 12-bit left aligned data holding register, Address offset: 0x18 */ - __IO uint32_t DHR8R2; /*!< DAC channel2 8-bit right-aligned data holding register, Address offset: 0x1C */ - __IO uint32_t DHR12RD; /*!< Dual DAC 12-bit right-aligned data holding register, Address offset: 0x20 */ - __IO uint32_t DHR12LD; /*!< DUAL DAC 12-bit left aligned data holding register, Address offset: 0x24 */ - __IO uint32_t DHR8RD; /*!< DUAL DAC 8-bit right aligned data holding register, Address offset: 0x28 */ - __IO uint32_t DOR1; /*!< DAC channel1 data output register, Address offset: 0x2C */ - __IO uint32_t DOR2; /*!< DAC channel2 data output register, Address offset: 0x30 */ - __IO uint32_t SR; /*!< DAC status register, Address offset: 0x34 */ -} DAC_TypeDef; - -/** -* @brief Debug MCU -*/ - -typedef struct -{ - __IO uint32_t IDCODE; /*!< MCU device ID code, Address offset: 0x00 */ - __IO uint32_t CR; /*!< Debug MCU configuration register, Address offset: 0x04 */ - __IO uint32_t APB1FZ; /*!< Debug MCU APB1 freeze register, Address offset: 0x08 */ - __IO uint32_t APB2FZ; /*!< Debug MCU APB2 freeze register, Address offset: 0x0C */ -}DBGMCU_TypeDef; - -/** -* @brief DMA Controller -*/ - -typedef struct -{ - __IO uint32_t CCR; /*!< DMA channel x configuration register */ - __IO uint32_t CNDTR; /*!< DMA channel x number of data register */ - __IO uint32_t CPAR; /*!< DMA channel x peripheral address register */ - __IO uint32_t CMAR; /*!< DMA channel x memory address register */ -} DMA_Channel_TypeDef; - -typedef struct -{ - __IO uint32_t ISR; /*!< DMA interrupt status register, Address offset: 0x00 */ - __IO uint32_t IFCR; /*!< DMA interrupt flag clear register, Address offset: 0x04 */ -} DMA_TypeDef; - -/** -* @brief External Interrupt/Event Controller -*/ - -typedef struct -{ - __IO uint32_t IMR; /*! -#include -#include - -extern int _end; - -void _exit(int status) { - while (1) {} -} - -caddr_t _sbrk(int incr) { - static unsigned char *heap = NULL; - unsigned char *prev_heap; - - if (heap == NULL) { - heap = (unsigned char *)&_end; - } - prev_heap = heap; - heap += incr; - - return (caddr_t) prev_heap; -} - -int _write(int file, char *ptr, int len) { - return len; -} - -int _read(int file, char *ptr, int len) { - return 0; -} - -int _close(int file) { - return -1; -} - -int _fstat(int file, struct stat *st) { - st->st_mode = S_IFCHR; - return 0; -} - -int _isatty(int file) { - return 1; -} - -int _lseek(int file, int ptr, int dir) { - return 0; -} - -int _open(const char *name, int flags, int mode) { - return -1; -} - -int _kill(int pid, int sig) { - errno = EINVAL; - return -1; -} - -int _getpid(void) { - return 1; -} diff --git a/applications/newton/llvm-ir/system_stm32f3xx.c b/applications/newton/llvm-ir/system_stm32f3xx.c deleted file mode 100644 index f52fba4e6..000000000 --- a/applications/newton/llvm-ir/system_stm32f3xx.c +++ /dev/null @@ -1,287 +0,0 @@ -/** - ****************************************************************************** - * @file system_stm32f3xx.c - * @author MCD Application Team - * @brief CMSIS Cortex-M4 Device Peripheral Access Layer System Source File. - * - * 1. This file provides two functions and one global variable to be called from - * user application: - * - SystemInit(): This function is called at startup just after reset and - * before branch to main program. This call is made inside - * the "startup_stm32f3xx.s" file. - * - * - SystemCoreClock variable: Contains the core clock (HCLK), it can be used - * by the user application to setup the SysTick - * timer or configure other parameters. - * - * - SystemCoreClockUpdate(): Updates the variable SystemCoreClock and must - * be called whenever the core clock is changed - * during program execution. - * - * 2. After each device reset the HSI (8 MHz) is used as system clock source. - * Then SystemInit() function is called, in "startup_stm32f3xx.s" file, to - * configure the system clock before to branch to main program. - * - * 3. This file configures the system clock as follows: - *============================================================================= - * Supported STM32F3xx device - *----------------------------------------------------------------------------- - * System Clock source | HSI - *----------------------------------------------------------------------------- - * SYSCLK(Hz) | 8000000 - *----------------------------------------------------------------------------- - * HCLK(Hz) | 8000000 - *----------------------------------------------------------------------------- - * AHB Prescaler | 1 - *----------------------------------------------------------------------------- - * APB2 Prescaler | 1 - *----------------------------------------------------------------------------- - * APB1 Prescaler | 1 - *----------------------------------------------------------------------------- - * USB Clock | DISABLE - *----------------------------------------------------------------------------- - *============================================================================= - ****************************************************************************** - * @attention - * - * Copyright (c) 2016 STMicroelectronics. - * All rights reserved. - * - * This software is licensed under terms that can be found in the LICENSE file - * in the root directory of this software component. - * If no LICENSE file comes with this software, it is provided AS-IS. - * - ****************************************************************************** - */ - -/** @addtogroup CMSIS - * @{ - */ - -/** @addtogroup stm32f3xx_system - * @{ - */ - -/** @addtogroup STM32F3xx_System_Private_Includes - * @{ - */ - -#include "Include/stm32f3xx.h" - -/** - * @} - */ - -/** @addtogroup STM32F3xx_System_Private_TypesDefinitions - * @{ - */ - -/** - * @} - */ - -/** @addtogroup STM32F3xx_System_Private_Defines - * @{ - */ -#if !defined (HSE_VALUE) - #define HSE_VALUE ((uint32_t)8000000) /*!< Default value of the External oscillator in Hz. - This value can be provided and adapted by the user application. */ -#endif /* HSE_VALUE */ - -#if !defined (HSI_VALUE) - #define HSI_VALUE ((uint32_t)8000000) /*!< Default value of the Internal oscillator in Hz. - This value can be provided and adapted by the user application. */ -#endif /* HSI_VALUE */ - -/* Note: Following vector table addresses must be defined in line with linker - configuration. */ -/*!< Uncomment the following line if you need to relocate the vector table - anywhere in Flash or Sram, else the vector table is kept at the automatic - remap of boot address selected */ -/* #define USER_VECT_TAB_ADDRESS */ - -#if defined(USER_VECT_TAB_ADDRESS) -/*!< Uncomment the following line if you need to relocate your vector Table - in Sram else user remap will be done in Flash. */ -/* #define VECT_TAB_SRAM */ -#if defined(VECT_TAB_SRAM) -#define VECT_TAB_BASE_ADDRESS SRAM_BASE /*!< Vector Table base address field. - This value must be a multiple of 0x200. */ -#define VECT_TAB_OFFSET 0x00000000U /*!< Vector Table base offset field. - This value must be a multiple of 0x200. */ -#else -#define VECT_TAB_BASE_ADDRESS FLASH_BASE /*!< Vector Table base address field. - This value must be a multiple of 0x200. */ -#define VECT_TAB_OFFSET 0x00000000U /*!< Vector Table base offset field. - This value must be a multiple of 0x200. */ -#endif /* VECT_TAB_SRAM */ -#endif /* USER_VECT_TAB_ADDRESS */ - -/******************************************************************************/ -/** - * @} - */ - -/** @addtogroup STM32F3xx_System_Private_Macros - * @{ - */ - -/** - * @} - */ - -/** @addtogroup STM32F3xx_System_Private_Variables - * @{ - */ - /* This variable is updated in three ways: - 1) by calling CMSIS function SystemCoreClockUpdate() - 2) by calling HAL API function HAL_RCC_GetHCLKFreq() - 3) each time HAL_RCC_ClockConfig() is called to configure the system clock frequency - Note: If you use this function to configure the system clock there is no need to - call the 2 first functions listed above, since SystemCoreClock variable is - updated automatically. - */ -uint32_t SystemCoreClock = 8000000; - -const uint8_t AHBPrescTable[16] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 6, 7, 8, 9}; -const uint8_t APBPrescTable[8] = {0, 0, 0, 0, 1, 2, 3, 4}; - -/** - * @} - */ - -/** @addtogroup STM32F3xx_System_Private_FunctionPrototypes - * @{ - */ - -/** - * @} - */ - -/** @addtogroup STM32F3xx_System_Private_Functions - * @{ - */ - -/** - * @brief Setup the microcontroller system - * @param None - * @retval None - */ -void SystemInit(void) -{ -/* FPU settings --------------------------------------------------------------*/ -#if (__FPU_PRESENT == 1) && (__FPU_USED == 1) - SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2)); /* set CP10 and CP11 Full Access */ -#endif - - /* Configure the Vector Table location -------------------------------------*/ -#if defined(USER_VECT_TAB_ADDRESS) - SCB->VTOR = VECT_TAB_BASE_ADDRESS | VECT_TAB_OFFSET; /* Vector Table Relocation in Internal SRAM */ -#endif /* USER_VECT_TAB_ADDRESS */ -} - -/** - * @brief Update SystemCoreClock variable according to Clock Register Values. - * The SystemCoreClock variable contains the core clock (HCLK), it can - * be used by the user application to setup the SysTick timer or configure - * other parameters. - * - * @note Each time the core clock (HCLK) changes, this function must be called - * to update SystemCoreClock variable value. Otherwise, any configuration - * based on this variable will be incorrect. - * - * @note - The system frequency computed by this function is not the real - * frequency in the chip. It is calculated based on the predefined - * constant and the selected clock source: - * - * - If SYSCLK source is HSI, SystemCoreClock will contain the HSI_VALUE(*) - * - * - If SYSCLK source is HSE, SystemCoreClock will contain the HSE_VALUE(**) - * - * - If SYSCLK source is PLL, SystemCoreClock will contain the HSE_VALUE(**) - * or HSI_VALUE(*) multiplied/divided by the PLL factors. - * - * (*) HSI_VALUE is a constant defined in stm32f3xx_hal.h file (default value - * 8 MHz) but the real value may vary depending on the variations - * in voltage and temperature. - * - * (**) HSE_VALUE is a constant defined in stm32f3xx_hal.h file (default value - * 8 MHz), user has to ensure that HSE_VALUE is same as the real - * frequency of the crystal used. Otherwise, this function may - * have wrong result. - * - * - The result of this function could be not correct when using fractional - * value for HSE crystal. - * - * @param None - * @retval None - */ -void SystemCoreClockUpdate (void) -{ - uint32_t tmp = 0, pllmull = 0, pllsource = 0, predivfactor = 0; - - /* Get SYSCLK source -------------------------------------------------------*/ - tmp = RCC->CFGR & RCC_CFGR_SWS; - - switch (tmp) - { - case RCC_CFGR_SWS_HSI: /* HSI used as system clock */ - SystemCoreClock = HSI_VALUE; - break; - case RCC_CFGR_SWS_HSE: /* HSE used as system clock */ - SystemCoreClock = HSE_VALUE; - break; - case RCC_CFGR_SWS_PLL: /* PLL used as system clock */ - /* Get PLL clock source and multiplication factor ----------------------*/ - pllmull = RCC->CFGR & RCC_CFGR_PLLMUL; - pllsource = RCC->CFGR & RCC_CFGR_PLLSRC; - pllmull = ( pllmull >> 18) + 2; - -#if defined (STM32F302xE) || defined (STM32F303xE) || defined (STM32F398xx) - predivfactor = (RCC->CFGR2 & RCC_CFGR2_PREDIV) + 1; - if (pllsource == RCC_CFGR_PLLSRC_HSE_PREDIV) - { - /* HSE oscillator clock selected as PREDIV1 clock entry */ - SystemCoreClock = (HSE_VALUE / predivfactor) * pllmull; - } - else - { - /* HSI oscillator clock selected as PREDIV1 clock entry */ - SystemCoreClock = (HSI_VALUE / predivfactor) * pllmull; - } -#else - if (pllsource == RCC_CFGR_PLLSRC_HSI_DIV2) - { - /* HSI oscillator clock divided by 2 selected as PLL clock entry */ - SystemCoreClock = (HSI_VALUE >> 1) * pllmull; - } - else - { - predivfactor = (RCC->CFGR2 & RCC_CFGR2_PREDIV) + 1; - /* HSE oscillator clock selected as PREDIV1 clock entry */ - SystemCoreClock = (HSE_VALUE / predivfactor) * pllmull; - } -#endif /* STM32F302xE || STM32F303xE || STM32F398xx */ - break; - default: /* HSI used as system clock */ - SystemCoreClock = HSI_VALUE; - break; - } - /* Compute HCLK clock frequency ----------------*/ - /* Get HCLK prescaler */ - tmp = AHBPrescTable[((RCC->CFGR & RCC_CFGR_HPRE) >> 4)]; - /* HCLK clock frequency */ - SystemCoreClock >>= tmp; -} - -/** - * @} - */ - -/** - * @} - */ - -/** - * @} - */ diff --git a/src/common/common-data-structures.h b/src/common/common-data-structures.h index 561ffa946..d9b8aaad0 100644 --- a/src/common/common-data-structures.h +++ b/src/common/common-data-structures.h @@ -39,23 +39,19 @@ #include -typedef enum -{ - kCommonVerbosityDebugLexer = (1 << 0), - kCommonVerbosityDebugParser = (1 << 1), - kCommonVerbosityDebugAST = (1 << 2), - kCommonVerbosityDebugFF = (1 << 3), - +typedef enum { + kCommonVerbosityDebugLexer = (1 << 0), + kCommonVerbosityDebugParser = (1 << 1), + kCommonVerbosityDebugAST = (1 << 2), + kCommonVerbosityDebugFF = (1 << 3), + /* * Code depends on this bringing up the rear. - */ + */ kCommonVerbosityDebugMax, } VerbosityType; - - -typedef enum -{ +typedef enum { /* * Noisy Tokens */ @@ -254,7 +250,6 @@ typedef enum kNoisyIrNodeType_Txor, kNoisyIrNodeType_TxorAssign, - /* * Code depends on this bringing up the rear for Noisy tokens. */ @@ -270,9 +265,6 @@ typedef enum kNoisyIrNodeType_Zepsilon, kNoisyIrNodeType_Zeof, - - - /* * Noisy grammar productions. */ @@ -434,11 +426,10 @@ typedef enum */ kNoisyIrNodeTypeMax, - /* * Newton tokens */ - kNewtonIrNodeType_TMin, + kNewtonIrNodeType_TMin, /* * From auto-generated sets: */ @@ -587,8 +578,6 @@ typedef enum */ kNewtonIrNodeType_TMax, - - /* * Newton grammar productions */ @@ -671,7 +660,6 @@ typedef enum */ kNewtonIrNodeType_PMax, - /* * Code depends on this bringing up the rear for Newton. */ @@ -683,15 +671,12 @@ typedef enum kCommonIrNodeTypeMax } IrNodeType; - - -typedef enum -{ - kCommonVerbosityVerbose = (1 << 0), - kCommonVerbosityActionTrace = (1 << 1), - kCommonVerbosityCallTrace = (1 << 2), - kCommonVerbosityPostScanStreamCheck = (1 << 3), - kCommonVerbosityPreScanStreamCheck = (1 << 4), +typedef enum { + kCommonVerbosityVerbose = (1 << 0), + kCommonVerbosityActionTrace = (1 << 1), + kCommonVerbosityCallTrace = (1 << 2), + kCommonVerbosityPostScanStreamCheck = (1 << 3), + kCommonVerbosityPreScanStreamCheck = (1 << 4), /* * Code depends on this bringing up the rear. @@ -699,78 +684,68 @@ typedef enum kCommonVerbosityMax, } CommonVerbosity; - - -typedef enum -{ +typedef enum { kNewtonIrPassDimensionalMatrixAnnotation = (1 << 0), kNewtonIrPassDimensionalMatrixPiGroups = (1 << 1), - kNewtonIrPassDimensionalMatrixKernelRowCanonicalization = (1 << 2), + kNewtonIrPassDimensionalMatrixKernelRowCanonicalization = (1 << 2), kNewtonIrPassDimensionalMatrixPiGroupSorted = (1 << 3), - kNewtonIrPassDimensionalMatrixPiGroupsWeedOutDuplicates = (1 << 4), + kNewtonIrPassDimensionalMatrixPiGroupsWeedOutDuplicates = (1 << 4), kNewtonIrPassDimensionalMatrixKernelPrinter = (1 << 5), kNewtonIrPassDimensionalMatrixConvertToList = (1 << 6), - kNewtonIrPassDimensionalMatrixAnnotationByBody = (1 << 7), + kNewtonIrPassDimensionalMatrixAnnotationByBody = (1 << 7), kNewtonIrPassDimensionalMatrixKernelPrinterFromBody = (1 << 8), KNewtonIrPassDimensionalMatrixConstantPi = (1 << 9), kNewtonIrPassInvariantSignalAnnotation = (1 << 10), - kNewtonIrPassPiGroupsSignalAnnotation = (1 << 11), - kNewtonIrPassLLVMIRDimensionCheck = (1 << 12), - kNewtonIrPassSensorsDisable = (1 << 13), - kNewtonIrPassLLVMIRLivenessAnalysis = (1 << 14), - kNewtonirPassLLVMIROptimizeByRange = (1 << 15), - kNewtonirPassLLVMIRAutoQuantization = (1 << 16), - kNewtonirPassLLVMIREnableOverload = (1 << 17), - kNewtonirPassLLVMIREnableBuiltinAssume = (1 << 18), + kNewtonIrPassPiGroupsSignalAnnotation = (1 << 11), + kNewtonIrPassLLVMIRDimensionCheck = (1 << 12), + kNewtonIrPassSensorsDisable = (1 << 13), + kNewtonIrPassLLVMIRLivenessAnalysis = (1 << 14), + kNewtonirPassLLVMIROptimizeByRange = (1 << 15), + kNewtonirPassLLVMIRAutoQuantization = (1 << 16), + kNewtonirPassLLVMIREnableOverload = (1 << 17), + kNewtonirPassLLVMIREnableBuiltinAssume = (1 << 18), + kNewtonirPassLLVMIRQuantDeciderEnabled = (1 << 19), /* * Code depends on this bringing up the rear. */ kNewtonIrPassMax, } NewtonIrPasses; - - -typedef enum -{ +typedef enum { /* * Code depends on this bringing up the rear. */ kNoisyIrPassMax, } NoisyIrPasses; - - -typedef enum -{ - kNewtonIrBackendDot = (1 << 0), - kNewtonIrBackendProtobuf = (1 << 1), - kNewtonIrBackendSmt = (1 << 2), - kNewtonIrBackendC = (1 << 3), - kNewtonIrBackendRTL = (1 << 4), - kNewtonIrBackendTargetParam = (1 << 5), +typedef enum { + kNewtonIrBackendDot = (1 << 0), + kNewtonIrBackendProtobuf = (1 << 1), + kNewtonIrBackendSmt = (1 << 2), + kNewtonIrBackendC = (1 << 3), + kNewtonIrBackendRTL = (1 << 4), + kNewtonIrBackendTargetParam = (1 << 5), /* * The LaTeX backend isn't a true backend per se, but rather * the flag enables dumping the LaTeX / KaTeX when the kernel * dumping is happening. */ - kNewtonIrBackendLatex = (1 << 6), - kNewtonIrBackendEstimatorSynthesis = (1 << 7), - kNewtonIrBackendIpsa = (1 << 8), + kNewtonIrBackendLatex = (1 << 6), + kNewtonIrBackendEstimatorSynthesis = (1 << 7), + kNewtonIrBackendIpsa = (1 << 8), - kNewtonIrBackendSignalTypedefHeader = (1 << 9), + kNewtonIrBackendSignalTypedefHeader = (1 << 9), /* * Code depends on this bringing up the rear. */ kNewtonIrBackendMax, } NewtonIrBackends; - -typedef enum -{ - kNoisyIrBackendDot = (1 << 0), - kNoisyIrBackendProtobuf = (1 << 1), +typedef enum { + kNoisyIrBackendDot = (1 << 0), + kNoisyIrBackendProtobuf = (1 << 1), /* * Code depends on this bringing up the rear. @@ -778,24 +753,20 @@ typedef enum kNoisyIrBackendMax, } NoisyIrBackends; -typedef enum -{ - kCommonDotDetailLevelNoText = (1 << 0), - kCommonDotDetailLevelNoNilNodes = (1 << 1), - +typedef enum { + kCommonDotDetailLevelNoText = (1 << 0), + kCommonDotDetailLevelNoNilNodes = (1 << 1), + /* * Code depends on this bringing up the rear. */ kCommonDotDetailLevelMax, } DetailLevel; - - -typedef enum -{ - kCommonIrNodeColorDotBackendColoring = (1 << 0), - kCommonIrNodeColorProtobufBackendColoring = (1 << 1), - kCommonIrNodeColorTreeTransformedColoring = (1 << 2), +typedef enum { + kCommonIrNodeColorDotBackendColoring = (1 << 0), + kCommonIrNodeColorProtobufBackendColoring = (1 << 1), + kCommonIrNodeColorTreeTransformedColoring = (1 << 2), /* * Code depends on this bringing up the rear. @@ -803,22 +774,19 @@ typedef enum kCommonIrNodeColor, } IrNodeColor; - - -typedef enum -{ - kCommonMaxBufferLength = 65536, - kCommonChunkBufferLength = 8192, - kCommonMaxErrorTokenCount = 32, - kCommonStreamchkWidth = 32, - kCommonMaxPrintBufferLength = 8192, - kCommonMaxTokenCharacters = 32, - kCommonMaxFilenameLength = 128, - kCommonTimestampTimelineLength = 4000000 /* Set to, e.g., 4000000 if we want to capture very long traces for debug; set to 1 otherwise */, - kCommonCgiRandomDigits = 10, - kCommonRlimitCpuSeconds = 5*60, /* 5 mins */ - kCommonRlimitRssBytes = 2*1024*1024*1024UL, /* 2GB */ - kCommonProgressTimerSeconds = 5, +typedef enum { + kCommonMaxBufferLength = 65536, + kCommonChunkBufferLength = 8192, + kCommonMaxErrorTokenCount = 32, + kCommonStreamchkWidth = 32, + kCommonMaxPrintBufferLength = 8192, + kCommonMaxTokenCharacters = 32, + kCommonMaxFilenameLength = 128, + kCommonTimestampTimelineLength = 4000000 /* Set to, e.g., 4000000 if we want to capture very long traces for debug; set to 1 otherwise */, + kCommonCgiRandomDigits = 10, + kCommonRlimitCpuSeconds = 5 * 60, /* 5 mins */ + kCommonRlimitRssBytes = 2 * 1024 * 1024 * 1024UL, /* 2GB */ + kCommonProgressTimerSeconds = 5, /* * Code depends on this bringing up the rear. @@ -826,14 +794,11 @@ typedef enum kCommonConstantMax, } Constant; - - -typedef enum -{ - kCommonModeDefault = (0 << 0), - kCommonModeCallTracing = (1 << 0), - kCommonModeCallStatistics = (1 << 1), - kCommonModeCGI = (1 << 2), +typedef enum { + kCommonModeDefault = (0 << 0), + kCommonModeCallTracing = (1 << 0), + kCommonModeCallStatistics = (1 << 1), + kCommonModeCGI = (1 << 2), /* * Code depends on this bringing up the rear. @@ -841,11 +806,8 @@ typedef enum kCommonModeMax } CommonMode; - - -typedef enum -{ - kCommonPostFileWriteActionRenderDot = (1 << 0), +typedef enum { + kCommonPostFileWriteActionRenderDot = (1 << 0), /* * Code depends on this bringing up the rear. @@ -853,142 +815,130 @@ typedef enum kCommonPostFileWriteActionMax, } PostFileWriteAction; -typedef enum -{ - noisyBasicTypeInit, - noisyBasicTypeBool, - noisyBasicTypeInt4, - noisyBasicTypeInt8, - noisyBasicTypeInt16, - noisyBasicTypeInt32, - noisyBasicTypeInt64, - noisyBasicTypeInt128, - noisyBasicTypeNat4, - noisyBasicTypeNat8, - noisyBasicTypeNat16, - noisyBasicTypeNat32, - noisyBasicTypeNat64, - noisyBasicTypeNat128, - noisyBasicTypeIntegerConstType, - noisyBasicTypeFloat16, - noisyBasicTypeFloat32, - noisyBasicTypeFloat64, - noisyBasicTypeFloat128, - noisyBasicTypeRealConstType, - noisyBasicTypeArithType, - noisyBasicTypeString, - noisyBasicTypeArrayType, - noisyBasicTypeNilType, +typedef enum { + noisyBasicTypeInit, + noisyBasicTypeBool, + noisyBasicTypeInt4, + noisyBasicTypeInt8, + noisyBasicTypeInt16, + noisyBasicTypeInt32, + noisyBasicTypeInt64, + noisyBasicTypeInt128, + noisyBasicTypeNat4, + noisyBasicTypeNat8, + noisyBasicTypeNat16, + noisyBasicTypeNat32, + noisyBasicTypeNat64, + noisyBasicTypeNat128, + noisyBasicTypeIntegerConstType, + noisyBasicTypeFloat16, + noisyBasicTypeFloat32, + noisyBasicTypeFloat64, + noisyBasicTypeFloat128, + noisyBasicTypeRealConstType, + noisyBasicTypeArithType, + noisyBasicTypeString, + noisyBasicTypeArrayType, + noisyBasicTypeNilType, noisyBasicTypeNamegenType, - noisyBasicTypeErrorType + noisyBasicTypeErrorType } NoisyBasicType; - - -typedef struct Scope Scope; -typedef struct Symbol Symbol; -typedef struct Token Token; -typedef struct IrNode IrNode; -typedef struct SourceInfo SourceInfo; -typedef struct Dimension Dimension; -typedef struct Physics Physics; -typedef struct IntegralList IntegralList; -typedef struct Invariant Invariant; -typedef struct Signal Signal; -typedef struct Sensor Sensor; -typedef struct Modality Modality; - - -typedef struct NoisyType NoisyType; - -enum -{ +typedef struct Scope Scope; +typedef struct Symbol Symbol; +typedef struct Token Token; +typedef struct IrNode IrNode; +typedef struct SourceInfo SourceInfo; +typedef struct Dimension Dimension; +typedef struct Physics Physics; +typedef struct IntegralList IntegralList; +typedef struct Invariant Invariant; +typedef struct Signal Signal; +typedef struct Sensor Sensor; +typedef struct Modality Modality; + +typedef struct NoisyType NoisyType; + +enum { kNoisyStaticArrayMaxNumberOfDimensions = 128 }; -struct NoisyType -{ - NoisyBasicType basicType; - int dimensions; - NoisyBasicType arrayType; - Symbol * functionDefinition; - int sizeOfDimension[kNoisyStaticArrayMaxNumberOfDimensions]; +struct NoisyType { + NoisyBasicType basicType; + int dimensions; + NoisyBasicType arrayType; + Symbol * functionDefinition; + int sizeOfDimension[kNoisyStaticArrayMaxNumberOfDimensions]; }; +struct Dimension { + char * name; + char * abbreviation; + double exponent; // Default value is 1 if exists + Scope * scope; + SourceInfo * sourceInfo; + int primeNumber; - - -struct Dimension -{ - char * name; - char * abbreviation; - double exponent; // Default value is 1 if exists - Scope * scope; - SourceInfo * sourceInfo; - int primeNumber; - - Dimension * next; + Dimension * next; }; -struct Invariant -{ - char * identifier; // Name of the physics quantity, of type Tidentifier - Scope * scope; - SourceInfo * sourceInfo; - IrNode * parameterList; // This is just bunch of IrNode's in Xseq - uint64_t id; - IrNode * constraints; - double * dimensionalMatrix; // Dimensional matrix - int dimensionalMatrixRowCount; // Number of dimensional matrix rows - int dimensionalMatrixColumnCount; // Number of dimensional matrix columns - char ** dimensionalMatrixRowLabels; // Labels of dimensional matrix rows - char ** dimensionalMatrixColumnLabels; // Labels of dimensional matrix columns - double *** nullSpace; // Initial null space and parameter used in kernel printer - double *** nullSpaceWithoutDuplicates; // Duplicate kernels are taken out - double *** nullSpaceRowReordered; // Reorders the rows of the kernels lexicographically - double *** nullSpaceCanonicallyReordered; // Canonically reordered - char *** canonicallyReorderedLabels; // Debugging use - int kernelColumnCount; - int numberOfUniqueKernels; // Saves the unique kernel count - int numberOfTotalKernels; // Saves the total kernels before canonicalisation - int * permutedIndexArrayPointer; // Saves the permutation indeces - int ** numberOfConstPiArray; // Saves the number of constant Pi in each kernel - - Invariant * next; +struct Invariant { + char * identifier; // Name of the physics quantity, of type Tidentifier + Scope * scope; + SourceInfo * sourceInfo; + IrNode * parameterList; // This is just bunch of IrNode's in Xseq + uint64_t id; + IrNode * constraints; + double * dimensionalMatrix; // Dimensional matrix + int dimensionalMatrixRowCount; // Number of dimensional matrix rows + int dimensionalMatrixColumnCount; // Number of dimensional matrix columns + char ** dimensionalMatrixRowLabels; // Labels of dimensional matrix rows + char ** dimensionalMatrixColumnLabels; // Labels of dimensional matrix columns + double *** nullSpace; // Initial null space and parameter used in kernel printer + double *** nullSpaceWithoutDuplicates; // Duplicate kernels are taken out + double *** nullSpaceRowReordered; // Reorders the rows of the kernels lexicographically + double *** nullSpaceCanonicallyReordered; // Canonically reordered + char *** canonicallyReorderedLabels; // Debugging use + int kernelColumnCount; + int numberOfUniqueKernels; // Saves the unique kernel count + int numberOfTotalKernels; // Saves the total kernels before canonicalisation + int * permutedIndexArrayPointer; // Saves the permutation indeces + int ** numberOfConstPiArray; // Saves the number of constant Pi in each kernel + + Invariant * next; }; struct Signal { - IrNode * baseNode; // The baseSignalDefinition IrNode. - char * identifier; // The signal identifier. - char * invariantExpressionIdentifier; //Identifier used in invariant expressions. - int axis; // The axis of the multi axis signal that the signal corresponds to. Default value is zero. - char * sensorIdentifier; // Identifier of the sensor associated to a signal. - int physicalGroupNumber; // Conveys information about the physical origin of the signal. (e.g. The I2C bus number of a sensor connected to Ipsa). - int dimensionIndex; // Conveys information about the dimension of the signal. Currently used for storing the dimension index for Ipsa. - Signal * relatedSignalList; // List of signals that should be co-sampled with this signal. - Signal * relatedSignalListNext; // Move to the next element of the relatedSignalList. - Signal * relatedSignalListPrev; // Move to the previous element of the relatedSignalList. + IrNode * baseNode; // The baseSignalDefinition IrNode. + char * identifier; // The signal identifier. + char * invariantExpressionIdentifier; // Identifier used in invariant expressions. + int axis; // The axis of the multi axis signal that the signal corresponds to. Default value is zero. + char * sensorIdentifier; // Identifier of the sensor associated to a signal. + int physicalGroupNumber; // Conveys information about the physical origin of the signal. (e.g. The I2C bus number of a sensor connected to Ipsa). + int dimensionIndex; // Conveys information about the dimension of the signal. Currently used for storing the dimension index for Ipsa. + Signal * relatedSignalList; // List of signals that should be co-sampled with this signal. + Signal * relatedSignalListNext; // Move to the next element of the relatedSignalList. + Signal * relatedSignalListPrev; // Move to the previous element of the relatedSignalList. }; -struct Physics -{ - char * identifier; // Name of the physics quantity. of type Tidentifier - uint64_t id; - int subindex; // Index for further identification. e.g.) acceleration along x, y, z axes - Scope * scope; - Scope * uncertaintyScope; - SourceInfo * sourceInfo; - bool isVector; - Physics * vectorCounterpart; // Non-NULL if a scalar AND counterpart defined in vectorScalarPairScope - Physics * scalarCounterpart; // Non-NULl if a vector AND counterpart defined in vectorScalarPairScope - double value; // For constants like Pi or gravitational acceleration - bool isConstant; - Dimension * dimensions; - char * dimensionAlias; - char * dimensionAliasAbbreviation; - Physics * definition; - - Physics * next; +struct Physics { + char * identifier; // Name of the physics quantity. of type Tidentifier + uint64_t id; + int subindex; // Index for further identification. e.g.) acceleration along x, y, z axes + Scope * scope; + Scope * uncertaintyScope; + SourceInfo * sourceInfo; + bool isVector; + Physics * vectorCounterpart; // Non-NULL if a scalar AND counterpart defined in vectorScalarPairScope + Physics * scalarCounterpart; // Non-NULl if a vector AND counterpart defined in vectorScalarPairScope + double value; // For constants like Pi or gravitational acceleration + bool isConstant; + Dimension * dimensions; + char * dimensionAlias; + char * dimensionAliasAbbreviation; + Physics * definition; + + Physics * next; }; typedef enum { @@ -1001,185 +951,175 @@ typedef enum { } SensorInterfaceType; struct Modality { - char * identifier; /* Modality name, e.g, "bmx055xAcceleration" */ - Signal * signal; /* Signal type to follow */ - Physics * _physics; /* Temporary field */ - double rangeLowerBound; - double rangeUpperBound; + char * identifier; /* Modality name, e.g, "bmx055xAcceleration" */ + Signal * signal; /* Signal type to follow */ + Physics * _physics; /* Temporary field */ + double rangeLowerBound; + double rangeUpperBound; - int precisionBits; - double precisionCost; + int precisionBits; + double precisionCost; - double accuracy; - double accuracyCost; + double accuracy; + double accuracyCost; // Signal * accuracySignal; - double resolution; - - SensorInterfaceType interfaceType; /* WiP */ + double resolution; + + SensorInterfaceType interfaceType; /* WiP */ /* Missing register address for modality */ - uint64_t registerAddress; + uint64_t registerAddress; - Modality * next; + Modality * next; // Modality * prev; }; struct Sensor { - IrNode * baseNode; /* Pointer to AST node of definition */ - char * identifier; /* Definition identifier */ - Modality * modalityList; /* List of sensor modalities */ - uint16_t erasureToken; + IrNode * baseNode; /* Pointer to AST node of definition */ + char * identifier; /* Definition identifier */ + Modality * modalityList; /* List of sensor modalities */ + uint16_t erasureToken; - Sensor * next; + Sensor * next; // Sensor * prev; }; -struct IntegralList -{ - Physics * head; - IntegralList * next; +struct IntegralList { + Physics * head; + IntegralList * next; }; -struct IrNode -{ - IrNodeType type; +struct IrNode { + IrNodeType type; /* * Syntactic (AST) information. */ - char * tokenString; - Token * token; - SourceInfo * sourceInfo; - IrNode * irParent; - IrNode * irLeftChild; - IrNode * irRightChild; + char * tokenString; + Token * token; + SourceInfo * sourceInfo; + IrNode * irParent; + IrNode * irLeftChild; + IrNode * irRightChild; - Symbol * symbol; + Symbol * symbol; /* * Used for evaluating dimensions in expressions */ - Physics * physics; - NoisyType noisyType; + Physics * physics; + NoisyType noisyType; /* * Used for connecting invariant parameters to signals. */ - Signal * signal; + Signal * signal; /* - * Used for type checking invariant parameters during invariant call. - */ - Invariant * invariant; - + * Used for type checking invariant parameters during invariant call. + */ + Invariant * invariant; + /* * Only if this node belongs to a ParseNumericExpression subtree */ - double value; - int integerValue; + double value; + int integerValue; - int subindexStart; - int subindexEnd; + int subindexStart; + int subindexEnd; /* * A parameter tuple of length n has ordering from zero to n - 1 */ - int parameterNumber; + int parameterNumber; /* * Used for coloring the IR tree, e.g., during Graphviz/dot generation */ - IrNodeColor nodeColor; + IrNodeColor nodeColor; /* Used to keep track of whether the node was visited or not */ - bool isVisited; + bool isVisited; }; - -struct SourceInfo -{ +struct SourceInfo { /* * Not yet used; for when we implement includes, this will be * the 'genealogy' of includes leading to this token. */ - char ** genealogy; - - char * fileName; - uint64_t lineNumber; - uint64_t columnNumber; - uint64_t length; -}; - + char ** genealogy; -struct Token -{ - IrNodeType type; - char * identifier; - int64_t integerConst; - double realConst; - char * stringConst; - SourceInfo * sourceInfo; - - Token * prev; - Token * next; + char * fileName; + uint64_t lineNumber; + uint64_t columnNumber; + uint64_t length; }; +struct Token { + IrNodeType type; + char * identifier; + int64_t integerConst; + double realConst; + char * stringConst; + SourceInfo * sourceInfo; + Token * prev; + Token * next; +}; -struct Scope -{ +struct Scope { /* * For named scopes (at the moment, only Progtypes) */ - char * identifier; + char * identifier; - int currentSubindex; + int currentSubindex; /* * Hierarchy. The firstChild is used to access its siblings via firstChild->next */ - Scope * parent; - Scope * firstChild; + Scope * parent; + Scope * firstChild; /* * Symbols in this scope. The list of symbols is accesed via firstSymbol->next */ - Symbol * firstSymbol; + Symbol * firstSymbol; /* * Each invariant scope will have its own list of parameters */ - IrNode * scopeParameterList; // This is just bunch of IrNode's in Xseq + IrNode * scopeParameterList; // This is just bunch of IrNode's in Xseq /* * For the config file, we only have one global scope that keeps track of all * dimensions ad physics quantities. */ - Dimension * firstDimension; - Physics * firstPhysics; + Dimension * firstDimension; + Physics * firstPhysics; /* * Where in source scope begins and ends */ - SourceInfo * begin; - SourceInfo * end; + SourceInfo * begin; + SourceInfo * end; /* * For chaining together scopes (currently only used for Progtype * scopes and for chaining together children). */ - Scope * next; - Scope * prev; + Scope * next; + Scope * prev; /* * Used for coloring the IR tree, e.g., during Graphviz/dot generation */ - IrNodeColor nodeColor; + IrNodeColor nodeColor; }; -typedef enum -{ +typedef enum { kNoisySymbolTypeTypeError, kNoisySymbolTypeProgtype, kNoisySymbolTypeConstantDeclaration, @@ -1199,243 +1139,237 @@ typedef enum kNoisySymbolTypeMax, } NoisySymbolType; - -struct Symbol -{ - char * identifier; +struct Symbol { + char * identifier; /* * This field is duplicated in the AST node, since only * identifiers get into the symbol table: */ - SourceInfo * sourceInfo; + SourceInfo * sourceInfo; /* * Declaration, type definition, use, etc. (kNoisySymbolTypeXXX) */ - NoisySymbolType symbolType; - NoisyType noisyType; + NoisySymbolType symbolType; + NoisyType noisyType; /* - * The IrNode where function definition starts. Used for loading functions. - */ - IrNode * functionDefinition; + * The IrNode where function definition starts. Used for loading functions. + */ + IrNode * functionDefinition; /* - * Number of parameters. Used only for functions and Noisy - * code generation. - */ - int parameterNum; - bool isTypeComplete; - bool isSensorChannel; - bool isChannel; - int paramPosition; - LLVMValueRef llvmPointer; - LLVMValueRef inputChanAddress; + * Number of parameters. Used only for functions and Noisy + * code generation. + */ + int parameterNum; + bool isTypeComplete; + bool isSensorChannel; + bool isChannel; + int paramPosition; + LLVMValueRef llvmPointer; + LLVMValueRef inputChanAddress; /* * Scope within which sym appears */ - Scope * scope; + Scope * scope; /* * If an identifier use, definition's Sym, if any */ - Symbol * definition; + Symbol * definition; /* * Subtree in AST that represents typeexpr */ - IrNode * typeTree; + IrNode * typeTree; /* * If an I_CONST, its value. */ - int intConst; - double realConst; - char * stringConst; - + int intConst; + double realConst; + char * stringConst; + /* * For chaining together sibling symbols in the same scope */ - Symbol * next; - Symbol * prev; + Symbol * next; + Symbol * prev; }; - typedef struct { /* * Timestamps to track lifecycle */ - uint64_t initializationTimestamp; - TimeStamp * timestamps; - uint64_t timestampCount; - uint64_t timestampSlots; + uint64_t initializationTimestamp; + TimeStamp * timestamps; + uint64_t timestampCount; + uint64_t timestampSlots; /* * Track aggregate time spent in all routines, by incrementing * timeAggregates[timeAggregatesLastKey] by (now - timeAggregatesLastTimestamp) */ - uint64_t * timeAggregates; - TimeStampKey timeAggregatesLastKey; - uint64_t timeAggregatesLastTimestamp; - uint64_t timeAggregateTotal; - uint64_t * callAggregates; - uint64_t callAggregateTotal; + uint64_t * timeAggregates; + TimeStampKey timeAggregatesLastKey; + uint64_t timeAggregatesLastTimestamp; + uint64_t timeAggregateTotal; + uint64_t * callAggregates; + uint64_t callAggregateTotal; /* * Used to get error status from FlexLib routines */ - FlexErrState * Fe; + FlexErrState * Fe; /* * State for the portable/monitoring allocator (FlexM) */ - FlexMstate * Fm; + FlexMstate * Fm; /* * State for portable/buffering print routines (FlexP) * We have one buffer for informational messages, another * for errors and warnings. */ - FlexPrintBuf * Fperr; - FlexPrintBuf * Fpinfo; - FlexPrintBuf * Fpsmt2; - FlexPrintBuf * Fpc; - FlexPrintBuf * Fph; - FlexPrintBuf * Fpg; - FlexPrintBuf * Fprtl; - FlexPrintBuf * Fpmathjax; - FlexPrintBuf * Fpipsa; + FlexPrintBuf * Fperr; + FlexPrintBuf * Fpinfo; + FlexPrintBuf * Fpsmt2; + FlexPrintBuf * Fpc; + FlexPrintBuf * Fph; + FlexPrintBuf * Fpg; + FlexPrintBuf * Fprtl; + FlexPrintBuf * Fpmathjax; + FlexPrintBuf * Fpipsa; /* * The output file of the last render. TODO: Not very happy * with this solution as it stands... (inherited from Sal/svm) */ - char * lastDotRender; - + char * lastDotRender; /* * This is the name of the module that the file we're parsing implements */ - char * moduleOfFile; + char * moduleOfFile; /* * This is the target parameter for the targetParam backend */ - char * targetParam; + char * targetParam; /* * This is the invariant where the target parameter appears only once (for the targetParam backend) */ - int targetParamLocatedKernel; + int targetParamLocatedKernel; /* * This is data type that a signal will be typedef'ed to * in the signal typedef generation backend */ - char * signalTypedefDatatype; + char * signalTypedefDatatype; /* * We keep a global handle on the list of module scopes, for easy reference. * In this use case, the node->identifier holds the scopes string name, and we * chain then using their prev/next fields. */ - Scope * moduleScopes; + Scope * moduleScopes; /* * Lexer state */ - FILE * filePointer; - char * fileName; - char * lineBuffer; - uint64_t columnNumber; - uint64_t lineNumber; - uint64_t lineLength; - char * currentToken; - uint64_t currentTokenLength; - Token * tokenList; - Token * lastToken; - Symbol * currentFunction; + FILE * filePointer; + char * fileName; + char * lineBuffer; + uint64_t columnNumber; + uint64_t lineNumber; + uint64_t lineLength; + char * currentToken; + uint64_t currentTokenLength; + Token * tokenList; + Token * lastToken; + Symbol * currentFunction; /* * The root of the IR tree, and top scope */ - IrNode * noisyIrRoot; - IrNode * newtonIrRoot; - Scope * noisyIrTopScope; - Scope * newtonIrTopScope; + IrNode * noisyIrRoot; + IrNode * newtonIrRoot; + Scope * noisyIrTopScope; + Scope * newtonIrTopScope; /* * Output file name when emitting bytecode/protobuf */ - char * outputFilePath; - char * outputSmtFilePath; - char * outputCFilePath; - char * outputSignalTypedefHeaderFilePath; - char * outputRTLFilePath; - char * outputEstimatorSynthesisFilePath; - char * outputIpsaFilePath; - + char * outputFilePath; + char * outputSmtFilePath; + char * outputCFilePath; + char * outputSignalTypedefHeaderFilePath; + char * outputRTLFilePath; + char * outputEstimatorSynthesisFilePath; + char * outputIpsaFilePath; + /* * Invariant identifiers specified for State Estimator Synthesis */ - char * estimatorProcessModel; - char * estimatorMeasurementModel; - bool autodiff; - + char * estimatorProcessModel; + char * estimatorMeasurementModel; + bool autodiff; + /* * LLVM IR input file */ - char * llvmIR; - + char * llvmIR; + /* * Variables for storing lists of identifiers attached * to a physical group number. */ - char * physicalGroup1; - char * physicalGroup2; + char * physicalGroup1; + char * physicalGroup2; /* * Variables to keep track of the kernel number and pi number * specified by the user for Pi Groups Signal Annotation. */ - int kernelNumber; - int piNumber; - bool enableKernelSelect; - bool enablePiSelect; + int kernelNumber; + int piNumber; + bool enableKernelSelect; + bool enablePiSelect; - CommonMode mode; - uint64_t verbosityLevel; - uint64_t dotDetailLevel; - uint64_t optimizationLevel; - uint64_t irPasses; - uint64_t irBackends; + CommonMode mode; + uint64_t verbosityLevel; + uint64_t dotDetailLevel; + uint64_t optimizationLevel; + uint64_t irPasses; + uint64_t irBackends; - - jmp_buf jmpbuf; - bool jmpbufIsValid; + jmp_buf jmpbuf; + bool jmpbufIsValid; /* * Global index of which prime numbers we have used for the dimension id's */ - int primeNumbersIndex; - Invariant * invariantList; - Sensor * sensorList; + int primeNumbersIndex; + Invariant * invariantList; + Sensor * sensorList; } State; - -void fatal(State * C, const char * msg) __attribute__((noreturn)); -void error(State * C, const char * msg); -void timestampsInit(State * C); -void timeStampDumpTimeline(State * C); -void timeStampDumpResidencies(State * C); -State * init(CommonMode mode); -void dealloc(State * C); -void runPasses(State * C); -uint64_t checkRss(State * C); -void consolePrintBuffers(State * C); -void printToFile(State * C, const char * msg, const char * fileName, PostFileWriteAction action); -void renderDotInFile(State * C, char * pathName, char * randomizedFileName); -void checkCgiCompletion(State * C, const char * pathName, const char * renderExtension); +void fatal(State * C, const char * msg) __attribute__((noreturn)); +void error(State * C, const char * msg); +void timestampsInit(State * C); +void timeStampDumpTimeline(State * C); +void timeStampDumpResidencies(State * C); +State * init(CommonMode mode); +void dealloc(State * C); +void runPasses(State * C); +uint64_t checkRss(State * C); +void consolePrintBuffers(State * C); +void printToFile(State * C, const char * msg, const char * fileName, PostFileWriteAction action); +void renderDotInFile(State * C, char * pathName, char * randomizedFileName); +void checkCgiCompletion(State * C, const char * pathName, const char * renderExtension); diff --git a/src/newton/Makefile b/src/newton/Makefile index a4f119402..3b2864964 100644 --- a/src/newton/Makefile +++ b/src/newton/Makefile @@ -121,6 +121,7 @@ SOURCES =\ newton-irPass-LLVMIR-quantization.cpp\ newton-irPass-LLVMIR-memoryAlignment.cpp\ newton-irPass-LLVMIR-emitAssume.cpp\ + newton-irPass-LLVMIR-quantDecider.cpp\ # @@ -157,6 +158,7 @@ OBJS =\ newton-irPass-LLVMIR-shrinkTypeByRange.$(OBJECTEXTENSION)\ newton-irPass-LLVMIR-quantization.$(OBJECTEXTENSION)\ newton-irPass-LLVMIR-emitAssume.$(OBJECTEXTENSION)\ + newton-irPass-LLVMIR-quantDecider.$(OBJECTEXTENSION)\ newton-irPass-invariantSignalAnnotation.$(OBJECTEXTENSION)\ newton-irPass-piGroupsSignalAnnotation.$(OBJECTEXTENSION)\ newton-irPass-ipsaBackend.$(OBJECTEXTENSION)\ @@ -206,6 +208,7 @@ CGIOBJS =\ newton-irPass-LLVMIR-shrinkTypeByRange.$(OBJECTEXTENSION)\ newton-irPass-LLVMIR-quantization.$(OBJECTEXTENSION)\ newton-irPass-LLVMIR-emitAssume.$(OBJECTEXTENSION)\ + newton-irPass-LLVMIR-quantDecider.$(OBJECTEXTENSION)\ newton-irPass-invariantSignalAnnotation.$(OBJECTEXTENSION)\ newton-irPass-piGroupsSignalAnnotation.$(OBJECTEXTENSION)\ newton-irPass-estimatorSynthesisBackend/$(OBJECTEXTENSION)\ @@ -254,6 +257,7 @@ LIBNEWTONOBJS =\ newton-irPass-LLVMIR-shrinkTypeByRange.$(OBJECTEXTENSION)\ newton-irPass-LLVMIR-quantization.$(OBJECTEXTENSION)\ newton-irPass-LLVMIR-emitAssume.$(OBJECTEXTENSION)\ + newton-irPass-LLVMIR-quantDecider.$(OBJECTEXTENSION)\ newton-irPass-invariantSignalAnnotation.$(OBJECTEXTENSION)\ newton-irPass-piGroupsSignalAnnotation.$(OBJECTEXTENSION)\ newton-irPass-ipsaBackend.$(OBJECTEXTENSION)\ @@ -293,6 +297,7 @@ HEADERS =\ newton-irPass-LLVMIR-dimension-check.h\ newton-irPass-LLVMIR-livenessAnalysis.h\ newton-irPass-LLVMIR-optimizeByRange.h\ + newton-irPass-LLVMIR-quantDecider.h\ newton-irPass-invariantSignalAnnotation.h\ newton-irPass-piGroupsSignalAnnotation.h\ newton-irPass-ipsaBackend.h\ @@ -403,6 +408,10 @@ newton-irPass-LLVMIR-emitAssume.$(OBJECTEXTENSION): newton-irPass-LLVMIR-emitAss $(CXX) $(FLEXFLAGS) $(INCDIRS) $(CXXFLAGS) $(WFLAGS) $(OPTFLAGS) $(LINTFLAGS) $< $(CXX) $(FLEXFLAGS) $(INCDIRS) $(CXXFLAGS) $(WFLAGS) $(OPTFLAGS) $< +newton-irPass-LLVMIR-quantDecider.$(OBJECTEXTENSION): newton-irPass-LLVMIR-quantDecider.cpp maxprec.cfg + $(CXX) $(FLEXFLAGS) $(INCDIRS) $(CXXFLAGS) $(WFLAGS) $(OPTFLAGS) $(LINTFLAGS) $< + $(CXX) $(FLEXFLAGS) $(INCDIRS) $(CXXFLAGS) $(WFLAGS) $(OPTFLAGS) $< + .PHONY: rebuild-quant-opt @@ -479,4 +488,4 @@ test: clean: rm -rf version.c $(OBJS) $(CGIOBJS) $(LIBNEWTONOBJS) $(CGI_TARGET) $(CGI_TARGET).dSYM $(TARGET) $(TARGET).dSYM $(CGI_TARGET) $(CGI_TARGET).dsym lib$(LIBNEWTON)-$(OSTYPE)-$(NEWTON_L10N).a *.o *.plist - cd ../common && make clean \ No newline at end of file + cd ../common && make clean diff --git a/src/newton/main.c b/src/newton/main.c index 138deea5a..e33b982ac 100644 --- a/src/newton/main.c +++ b/src/newton/main.c @@ -60,16 +60,14 @@ #include "newton-symbolTable.h" #include "newton.h" - int -main(int argc, char *argv[]) +main(int argc, char * argv[]) { - int jumpParameter; - State * N; - + int jumpParameter; + State * N; N = init(kCommonModeDefault); - + if (N == NULL) { fatal(NULL, Emalloc); @@ -81,51 +79,51 @@ main(int argc, char *argv[]) while (1) { - char tmp; - char * ep = &tmp; - int optionIndex = 0, c; - static struct option options[] = - { - {"verbose", required_argument, 0, 'v'}, - {"help", no_argument, 0, 'h'}, - {"version", no_argument, 0, 'V'}, - {"dot", required_argument, 0, 'd'}, - {"smt", required_argument, 0, 'S'}, - {"bytecode", required_argument, 0, 'b'}, - {"trace", no_argument, 0, 't'}, - {"statistics", no_argument, 0, 's'}, - {"optimize", required_argument, 0, 'O'}, - {"dmatrixannote", no_argument, 0, 'm'}, - {"pigroups", no_argument, 0, 'p'}, - {"pigroupsfrombody", no_argument, 0, 'i'}, - {"kernelrowcanon", no_argument, 0, 'c'}, - {"pigroupsort", no_argument, 0, 'r'}, - {"pigroupdedup", no_argument, 0, 'e'}, - {"pikernelprinter", no_argument, 0, 'P'}, - {"pigrouptoast", no_argument, 0, 'a'}, - {"codegen", required_argument, 0, 'g'}, - {"latex", no_argument, 0, 'x'}, - {"RTLcodegen", required_argument, 0, 'l'}, - {"targetParam", required_argument, 0, 'T'}, - {"llvm-ir", required_argument, 0, 'I'}, - {"llvm-ir-liveness-check", no_argument, 0, 'L'}, - {"llvm-ir-enable-overload", no_argument, 0, 'o'}, - {"llvm-ir-enable-builtin-assume", no_argument, 0, 'A'}, - {"llvm-ir-auto-quantization", no_argument, 0, 'Q'}, - {"estimator-synthesis", required_argument, 0, 420}, - {"process", required_argument, 0, 421}, - {"measurement", required_argument, 0, 422}, - {"auto-diff", no_argument, 0, 423}, - {"ipsa", required_argument, 0, 489}, - {"kernelNumber", required_argument, 0, 494}, - {"piNumber", required_argument, 0, 495}, - {"physicalGroup1", required_argument, 0, 491}, - {"physicalGroup2", required_argument, 0, 492}, - {"generate-header", required_argument, 0, 493}, - {"signal-typedef-to", required_argument, 0, 496}, - {"no-sensors", required_argument, 0, 550}, - {0, 0, 0, 0} - }; + char tmp; + char * ep = &tmp; + int optionIndex = 0, c; + static struct option options[] = + { + {"verbose", required_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {"dot", required_argument, 0, 'd'}, + {"smt", required_argument, 0, 'S'}, + {"bytecode", required_argument, 0, 'b'}, + {"trace", no_argument, 0, 't'}, + {"statistics", no_argument, 0, 's'}, + {"optimize", required_argument, 0, 'O'}, + {"dmatrixannote", no_argument, 0, 'm'}, + {"pigroups", no_argument, 0, 'p'}, + {"pigroupsfrombody", no_argument, 0, 'i'}, + {"kernelrowcanon", no_argument, 0, 'c'}, + {"pigroupsort", no_argument, 0, 'r'}, + {"pigroupdedup", no_argument, 0, 'e'}, + {"pikernelprinter", no_argument, 0, 'P'}, + {"pigrouptoast", no_argument, 0, 'a'}, + {"codegen", required_argument, 0, 'g'}, + {"latex", no_argument, 0, 'x'}, + {"RTLcodegen", required_argument, 0, 'l'}, + {"targetParam", required_argument, 0, 'T'}, + {"llvm-ir", required_argument, 0, 'I'}, + {"llvm-ir-liveness-check", no_argument, 0, 'L'}, + {"llvm-ir-enable-overload", no_argument, 0, 'o'}, + {"llvm-ir-enable-builtin-assume", no_argument, 0, 'A'}, + {"llvm-ir-auto-quantization", no_argument, 0, 'Q'}, + {"llvm-ir-enable-quant-decider", no_argument, 0, 551}, + {"estimator-synthesis", required_argument, 0, 420}, + {"process", required_argument, 0, 421}, + {"measurement", required_argument, 0, 422}, + {"auto-diff", no_argument, 0, 423}, + {"ipsa", required_argument, 0, 489}, + {"kernelNumber", required_argument, 0, 494}, + {"piNumber", required_argument, 0, 495}, + {"physicalGroup1", required_argument, 0, 491}, + {"physicalGroup2", required_argument, 0, 492}, + {"generate-header", required_argument, 0, 493}, + {"signal-typedef-to", required_argument, 0, 496}, + {"no-sensors", required_argument, 0, 550}, + {0, 0, 0, 0}}; c = getopt_long(argc, argv, "v:hVd:S:b:stO:mpicl:rePapg:xT:L:", options, &optionIndex); @@ -292,7 +290,6 @@ main(int argc, char *argv[]) break; } - case 'c': { @@ -426,34 +423,34 @@ main(int argc, char *argv[]) break; } - case 'o': - { - N->irPasses |= kNewtonirPassLLVMIREnableOverload; - break; - } + case 'o': + { + N->irPasses |= kNewtonirPassLLVMIREnableOverload; + break; + } - case 'A': - { - N->irPasses |= kNewtonirPassLLVMIREnableBuiltinAssume; - break; - } + case 'A': + { + N->irPasses |= kNewtonirPassLLVMIREnableBuiltinAssume; + break; + } - case 'Q': - { - N->irPasses |= kNewtonirPassLLVMIRAutoQuantization; - break; - } + case 'Q': + { + N->irPasses |= kNewtonirPassLLVMIRAutoQuantization; + break; + } case 494: { - N->kernelNumber = atoi(optarg); + N->kernelNumber = atoi(optarg); N->enableKernelSelect = true; break; } case 495: { - N->piNumber = atoi(optarg); + N->piNumber = atoi(optarg); N->enablePiSelect = true; break; } @@ -500,6 +497,12 @@ main(int argc, char *argv[]) break; } + case 551: + { + N->irPasses |= kNewtonirPassLLVMIRQuantDeciderEnabled; + break; + } + case '?': { /* @@ -558,46 +561,45 @@ main(int argc, char *argv[]) return 0; } - - void -version(State * N) +version(State * N) { flexprint(N->Fe, N->Fm, N->Fperr, "\nNewton version %s.\n\n", kNewtonVersion); } - void -usage(State * N) +usage(State * N) { version(N); - flexprint(N->Fe, N->Fm, N->Fperr, "Usage: newton--%s\n" - " [ (--help, -h) \n" - " | (--version, --V) \n" - " | (--verbose , -v ) \n" - " | (--dot , -d ) \n" - " | (--smt , -S ) \n" - " | (--bytecode , -b ) \n" - " | (--optimize , -O ) \n" - " | (--dmatrixannote, -m) \n" - " | (--pigroups, -p) \n" - " | (--pigroupsfrombody, -i) \n" - " | (--kernelrowcanon, -c) \n" - " | (--pigroupsort, -r) \n" - " | (--pigroupdedup, -e) \n" - " | (--pikernelprinter, -P) \n" - " | (--pigrouptoast, -a) \n" - " | (--codegen , -g )\n" - " | (--RTLcodegen , -l )\n" - " | (--generate-header= \n" - " | (--signal-typedef-to= \n" - " | (--trace, -t) \n" - " | (--statistics, -s) \n" - " | (--latex, -x) \n" - " | (--estimator-synthesis=) \n" - " | (--process=) \n" - " | (--measurement=) \n" - " | (--auto-diff) ] \n" - " \n" - " \n\n", kNewtonL10N); + flexprint(N->Fe, N->Fm, N->Fperr, + "Usage: newton--%s\n" + " [ (--help, -h) \n" + " | (--version, --V) \n" + " | (--verbose , -v ) \n" + " | (--dot , -d ) \n" + " | (--smt , -S ) \n" + " | (--bytecode , -b ) \n" + " | (--optimize , -O ) \n" + " | (--dmatrixannote, -m) \n" + " | (--pigroups, -p) \n" + " | (--pigroupsfrombody, -i) \n" + " | (--kernelrowcanon, -c) \n" + " | (--pigroupsort, -r) \n" + " | (--pigroupdedup, -e) \n" + " | (--pikernelprinter, -P) \n" + " | (--pigrouptoast, -a) \n" + " | (--codegen , -g )\n" + " | (--RTLcodegen , -l )\n" + " | (--generate-header= \n" + " | (--signal-typedef-to= \n" + " | (--trace, -t) \n" + " | (--statistics, -s) \n" + " | (--latex, -x) \n" + " | (--estimator-synthesis=) \n" + " | (--process=) \n" + " | (--measurement=) \n" + " | (--auto-diff) ] \n" + " \n" + " \n\n", + kNewtonL10N); } diff --git a/src/newton/maxprec.cfg b/src/newton/maxprec.cfg new file mode 100644 index 000000000..b1bd38b62 --- /dev/null +++ b/src/newton/maxprec.cfg @@ -0,0 +1 @@ +13 diff --git a/src/newton/newton-irPass-LLVMIR-constantSubstitution.cpp b/src/newton/newton-irPass-LLVMIR-constantSubstitution.cpp index fcd1a6651..268eb9793 100644 --- a/src/newton/newton-irPass-LLVMIR-constantSubstitution.cpp +++ b/src/newton/newton-irPass-LLVMIR-constantSubstitution.cpp @@ -35,17 +35,25 @@ using namespace llvm; extern "C" { -static bool isFromQuantizedGlobal(Value *V) { - if (auto *loadInst = dyn_cast(V)) { - if (auto *gv = dyn_cast(loadInst->getPointerOperand())) { - if (gv->getName().contains("_quantized")) { +static bool +isFromQuantizedGlobal(Value * V) +{ + if (auto * loadInst = dyn_cast(V)) + { + if (auto * gv = dyn_cast(loadInst->getPointerOperand())) + { + if (gv->getName().contains("_quantized")) + { return true; } } } - if (auto *inst = dyn_cast(V)) { - for (auto &op : inst->operands()) { - if (isFromQuantizedGlobal(op)) { + if (auto * inst = dyn_cast(V)) + { + for (auto & op : inst->operands()) + { + if (isFromQuantizedGlobal(op)) + { return true; } } @@ -53,7 +61,6 @@ static bool isFromQuantizedGlobal(Value *V) { return false; } - /* * Steps of constantSubstitution: * 1. for each instruction (that is the case statement), get the range of current instruction from boundInfo @@ -133,17 +140,18 @@ constantSubstitution(State * N, BoundInfo * boundInfo, llvm::Function & llvmIrFu * store double 0.000000e+00, double* %12, align 8, !dbg !595 * ... * */ - if (isa(llvmIrInstruction) && isa(llvmIrInstruction->getOperand(0))) { - break; - } + if (isa(llvmIrInstruction) && isa(llvmIrInstruction->getOperand(0))) + { + break; + } - /* - * if it's a pointer, skip it - * */ - if (llvmIrInstruction->getType()->isPointerTy()) - { - break; - } + /* + * if it's a pointer, skip it + * */ + if (llvmIrInstruction->getType()->isPointerTy()) + { + break; + } auto lowerBound = vrIt->second.first; auto upperBound = vrIt->second.second; @@ -152,7 +160,8 @@ constantSubstitution(State * N, BoundInfo * boundInfo, llvm::Function & llvmIrFu * */ if (fabs(lowerBound - upperBound) < DBL_EPSILON) { - if (isFromQuantizedGlobal(llvmIrInstruction)) { + if (isFromQuantizedGlobal(llvmIrInstruction)) + { break; } @@ -183,7 +192,7 @@ constantSubstitution(State * N, BoundInfo * boundInfo, llvm::Function & llvmIrFu if (newConstant != nullptr) { llvmIrInstruction->replaceAllUsesWith(newConstant); - llvmIrInstruction->removeFromParent(); + llvmIrInstruction->eraseFromParent(); } } } @@ -196,7 +205,7 @@ constantSubstitution(State * N, BoundInfo * boundInfo, llvm::Function & llvmIrFu * store double 0.000000e+00, double 0.000000e+00, align 8 * */ if (isa(llvmIrStoreInstruction->getPointerOperand())) - llvmIrStoreInstruction->removeFromParent(); + llvmIrStoreInstruction->eraseFromParent(); } break; case Instruction::ICmp: diff --git a/src/newton/newton-irPass-LLVMIR-optimizeByRange.cpp b/src/newton/newton-irPass-LLVMIR-optimizeByRange.cpp index cb8edc9a4..4bf3c7371 100644 --- a/src/newton/newton-irPass-LLVMIR-optimizeByRange.cpp +++ b/src/newton/newton-irPass-LLVMIR-optimizeByRange.cpp @@ -41,11 +41,11 @@ All rights reserved. #include "newton-irPass-LLVMIR-constantSubstitution.h" #include "newton-irPass-LLVMIR-shrinkTypeByRange.h" #include "newton-irPass-LLVMIR-quantization.h" +#include "newton-irPass-LLVMIR-quantDecider.h" #include "newton-irPass-LLVMIR-optimizeByRange.h" #include "newton-irPass-LLVMIR-memoryAlignment.h" #include "newton-irPass-LLVMIR-emitAssume.h" - #endif /* __cplusplus */ #include @@ -73,6 +73,7 @@ All rights reserved. #include "llvm/Support/raw_ostream.h" #include "llvm/Support/FileSystem.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Verifier.h" #include "config.h" @@ -80,13 +81,65 @@ All rights reserved. #include #include - using namespace llvm; +using namespace llvm; // #define FRAC_BASE (1 << maxPrecisionBits) #define FRAC_BASE (1 << MAX_PRECISION_BITS) +namespace +{ +bool +isOverflowCheckTarget(const llvm::Value * V) +{ + if (!V) + return false; + + auto * I = llvm::dyn_cast(V); + if (!I) + return false; + + switch (I->getOpcode()) + { + case llvm::Instruction::FAdd: + case llvm::Instruction::FSub: + case llvm::Instruction::FMul: + case llvm::Instruction::FDiv: + case llvm::Instruction::FRem: + case llvm::Instruction::FNeg: + return true; + case llvm::Instruction::Call: + { + auto * CI = llvm::dyn_cast(I); + if (!CI) + return false; + + llvm::Function * callee = CI->getCalledFunction(); + if (!callee || !callee->hasName()) + return false; + + llvm::StringRef funcName = callee->getName(); + return funcName == "log" || funcName == "exp" || + funcName == "sqrt" || funcName == "log1p" || + funcName == "scalbn" || funcName == "sin" || + funcName == "cos" || + funcName.startswith("llvm.fabs") || + funcName.startswith("llvm.floor") || + funcName.startswith("llvm.ceil"); + } + default: + return false; + } +} +} // namespace + void -checkOverflow(State * N, BoundInfo * boundInfo, int FRAC_Q) +checkOverflow(State * N, BoundInfo * boundInfo, int FRAC_Q, llvm::Function * ownerFunction) { + if (!ownerFunction || ownerFunction->isDeclaration()) + return; + + if (!ownerFunction->hasFnAttribute("newton.dequantize")) + return; + int maxVal, minVal; if (BIT_WIDTH == 16) { @@ -104,10 +157,18 @@ checkOverflow(State * N, BoundInfo * boundInfo, int FRAC_Q) return; } + const double fracBase = std::ldexp(1.0, FRAC_Q); + for (const auto & entry : boundInfo->virtualRegisterRange) { - double scaledMin = entry.second.first * FRAC_BASE; - double scaledMax = entry.second.second * FRAC_BASE; + if (!isOverflowCheckTarget(entry.first)) + continue; + + if (!std::isfinite(entry.second.first) || !std::isfinite(entry.second.second)) + continue; + + double scaledMin = entry.second.first * fracBase; + double scaledMax = entry.second.second * fracBase; std::string instStr = "unknown"; if (Instruction * inst = dyn_cast(entry.first)) @@ -133,20 +194,20 @@ checkOverflow(State * N, BoundInfo * boundInfo, int FRAC_Q) } void -autoWhitelistFunctions(Module &Mod, std::set &whitelist) +autoWhitelistFunctions(Module & Mod, std::set & whitelist) { - for (Function &F : Mod) + for (Function & F : Mod) { - if (F.isDeclaration()) continue; + if (F.isDeclaration()) + continue; bool hasSensorParams = false; - for (auto &Arg : F.args()) + for (auto & Arg : F.args()) { if (Arg.hasName()) { std::string argName = Arg.getName().str(); - // 这里你可以加更丰富的匹配,比如查 typedef 类型名(需要调试信息) if (argName.find("bmx055") != std::string::npos) { hasSensorParams = true; @@ -163,514 +224,38 @@ autoWhitelistFunctions(Module &Mod, std::set &whitelist) } } - - -// #define IS_POINTER 1 -std::set whitelist = { - "MadgwickAHRSupdate", - "MahonyAHRSupdate", - "sensfusion6UpdateQImpl", - "matrixMul", - "pzero", - "qzero", - "pone", - "qone", - "__ieee754_exp"}; - - -void eraseUnusedConstant(Module &M) { - std::set quantizedBaseNames; - for (auto &GV : M.globals()) { - if (GV.getName().endswith("_quantized")) { - std::string baseName = GV.getName().str().substr(0, GV.getName().size() - 10); // remove "_quantized" - quantizedBaseNames.insert(baseName); - } - } - std::vector toDelete; - for (auto &GV : M.globals()) { - std::string name = GV.getName().str(); - - if (GV.use_empty() && quantizedBaseNames.count(name)) { - toDelete.push_back(&GV); - } - if (GV.use_empty() && name.size() > 10 && name.substr(name.size() - 10) == "_quantized") { - toDelete.push_back(&GV); - } - } - for (auto *GV : toDelete) { - GV->eraseFromParent(); - } -} - - - - -void -handleGlobalStore(StoreInst * storeInst, IRBuilder<> & Builder, int maxPrecisionBits) -{ - auto * pointerOperand = storeInst->getPointerOperand(); - - // Ensure the operation is on a global variable - if (auto * quantizedGlobalVar = dyn_cast(pointerOperand)) - { - llvm::errs() << "Processing quantized global variable: " << quantizedGlobalVar->getName() << "\n"; - - // Identify the corresponding original global variable (e.g., remove "_quantized" suffix) - std::string originalName = quantizedGlobalVar->getName().str(); - if (originalName.size() > 10 && originalName.compare(originalName.size() - 10, 10, "_quantized") == 0) - { - originalName = originalName.substr(0, originalName.size() - 10); - } - else - { - llvm::errs() << "Skipping: No matching original global for " << quantizedGlobalVar->getName() << "\n"; - return; - } - - // Find the original global variable - GlobalVariable * originalGlobalVar = quantizedGlobalVar->getParent()->getNamedGlobal(originalName); - if (!originalGlobalVar || !originalGlobalVar->getType()->getElementType()->isFloatingPointTy()) - { - llvm::errs() << "Skipping: Original global variable not found or not floating-point: " << originalName << "\n"; - return; - } - - llvm::errs() << "Found corresponding original global variable: " << originalGlobalVar->getName() << "\n"; - - // Check if the previous instruction is `trunc` - Instruction * prevInst = storeInst->getPrevNode(); - if (!prevInst || !isa(prevInst)) - { - llvm::errs() << "Skipping: Previous instruction is not trunc.\n"; - return; - } - - // Load the integer value from the quantized global variable - auto * loadInst = Builder.CreateLoad(quantizedGlobalVar->getType()->getPointerElementType(), quantizedGlobalVar); - - // Convert the integer value to a floating-point value - Value * convertedFloat = Builder.CreateSIToFP(loadInst, Type::getFloatTy(storeInst->getContext())); - - // Perform dequantization - Value * dequantizedValue = Builder.CreateFMul( - convertedFloat, ConstantFP::get(Type::getFloatTy(storeInst->getContext()), 1.0 / FRAC_BASE)); - - // Store the dequantized floating-point value back into the original global variable - Builder.CreateStore(dequantizedValue, originalGlobalVar); - - llvm::errs() << "Dequantized and stored value for original global variable: " << originalGlobalVar->getName() << "\n"; - } - else - { - llvm::errs() << "Pointer operand is not a global variable. Skipping.\n"; - } -} - -void -handlePointerStore(StoreInst * storeInst, IRBuilder<> & Builder, int maxPrecisionBits) -{ - auto * pointerOperand = storeInst->getPointerOperand(); - - if (!pointerOperand->getType()->getPointerElementType()->isIntegerTy(BIT_WIDTH)) - { - llvm::errs() << "Pointer operand type is not an integer of expected bit width.\n"; - return; - } - - auto * loadInst = Builder.CreateLoad(pointerOperand->getType()->getPointerElementType(), pointerOperand); - if (isa(loadInst->getPointerOperand())) - { - llvm::errs() << "Skipping StoreInst due to global variable in load operand.\n"; - return; - } - - Value * convertedFloat = Builder.CreateSIToFP(loadInst, Type::getFloatTy(storeInst->getContext())); - Value * dividedValue = Builder.CreateFMul( - convertedFloat, ConstantFP::get(Type::getFloatTy(storeInst->getContext()), 1.0 / FRAC_BASE)); - - if (auto * bitcastInst = dyn_cast(pointerOperand)) - { - Value * finalStorePtr = nullptr; - bool isValidSource = false; - llvm::errs() << "BIT_WIDTH: " << BIT_WIDTH << "\n"; - // Determine the final store pointer based on bit width - switch (BIT_WIDTH) - - { - case 16: - if (bitcastInst->getSrcTy()->getPointerElementType()->isIntegerTy(32)) - { - auto * i32Ptr = bitcastInst->getOperand(0); - if (auto * floatBitcast = dyn_cast(i32Ptr)) - { - if (floatBitcast->getSrcTy()->getPointerElementType()->isFloatTy()) - { - finalStorePtr = floatBitcast->getOperand(0); // Original float* - isValidSource = true; - } - } - } - break; - - case 32: - if (bitcastInst->getSrcTy()->getPointerElementType()->isFloatTy()) - { - finalStorePtr = bitcastInst->getOperand(0); // Original float* - isValidSource = true; - } - break; - - default: - llvm::errs() << "Unsupported BIT_WIDTH: " << BIT_WIDTH << "\n"; - return; - } - - if (isValidSource && finalStorePtr) - { - Builder.CreateStore(dividedValue, finalStorePtr); - llvm::errs() << "Dequantized and stored value for pointer.\n"; - } - else - { - llvm::errs() << "Invalid source for StoreInst: " << *storeInst << "\n"; - } - } -} - void -handleMatrixStore(StoreInst * storeInst, IRBuilder<> & Builder, int maxPrecisionBits) +eraseUnusedConstant(Module & M) { - Value * valueOperand = storeInst->getValueOperand(); - Value * pointerOperand = storeInst->getPointerOperand(); - - // Ensure the stored value is an integer and the destination is a float pointer - Type * valueType = valueOperand->getType(); - Type * pointerElementType = pointerOperand->getType()->getPointerElementType(); - - if (valueType->isIntegerTy() && pointerElementType->isFloatingPointTy()) - { - llvm::errs() << "Processing matrix store (quantized to dequantized): " << *storeInst << "\n"; - - // Convert integer value to floating-point (dequantization step 1) - llvm::errs() << "Converting integer to float: " << *valueOperand << "\n"; - Value * convertedFloat = Builder.CreateSIToFP(valueOperand, Type::getFloatTy(storeInst->getContext()), storeInst->getName() + ".dequantized"); - - // Perform dequantization by multiplying by (1 / fracBase) - Value * dequantizedValue = Builder.CreateFMul( - convertedFloat, ConstantFP::get(Type::getFloatTy(storeInst->getContext()), 1.0 / FRAC_BASE), storeInst->getName() + ".scaled_back"); - - // Store the dequantized floating-point value back to the original float memory location - Builder.CreateStore(dequantizedValue, pointerOperand); - - llvm::errs() << "Dequantized and stored float value at: " << *pointerOperand << "\n"; - - // Remove the original store instruction - storeInst->eraseFromParent(); - } - else - { - llvm::errs() << "Skipping store: Not storing i32 into float*.\n"; - } -} - -void -handleReturnValue(ReturnInst * retInst, int maxPrecisionBits) -{ - if (!retInst->getReturnValue()) - return; - - Value * retVal = retInst->getReturnValue(); - - if (!retVal->getType()->isIntegerTy()) - { - errs() << "Return value is not integer type, skipping dequantization.\n"; - return; - } - - IRBuilder<> Builder(retInst); - Type * targetType = Type::getDoubleTy(retInst->getContext()); - - Value * fpVal = Builder.CreateSIToFP(retVal, targetType); - - llvm::Constant * oneDivFrac = llvm::ConstantFP::get(targetType, 1.0 / FRAC_BASE); - - Value * dequantizedVal = Builder.CreateFMul(fpVal, oneDivFrac); - ReturnInst * newRet = ReturnInst::Create(retInst->getContext(), dequantizedVal, retInst); - - retInst->eraseFromParent(); - - errs() << "Replaced return with dequantized value: " << *newRet << "\n"; -} - -void -dequantizeResults(StoreInst * storeInst, Function & F, int maxPrecisionBits) -{ - IRBuilder<> Builder(storeInst->getNextNode()); - llvm::errs() << "Processing StoreInst in function: " << F.getName() << " | Store instruction: " << *storeInst << "\n"; - -#if IS_MATRIX - handleMatrixStore(storeInst, Builder, maxPrecisionBits); -#elif IS_POINTER - llvm::errs() << "Handling pointer store.\n"; - handlePointerStore(storeInst, Builder, maxPrecisionBits); - -#else - handleGlobalStore(storeInst, Builder, maxPrecisionBits); -#endif - -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -bool enableAutoQuantization = false; - -void -detectFloatingPointOps(Module & Mod) -{ - bool hasFloatOps = false; - std::map floatOpCounts; // Map to store the count of floating-point operations per function - - for (auto & F : Mod) - { - int functionFloatOpCount = 0; // Counter for floating-point operations in the current function - - // Analyze function parameters - int paramCount = 0; - int returnCount = 0; - std::vector paramTypes; // To store parameter types - for (auto & Arg : F.args()) - { - paramCount++; - if (Arg.getType()->isPointerTy()) - { - paramTypes.push_back("pointer"); - } - else if (Arg.getType()->isFloatingPointTy()) - { - paramTypes.push_back("floating-point"); - } - else if (Arg.getType()->isIntegerTy()) - { - paramTypes.push_back("integer"); - } - else - { - paramTypes.push_back("unknown"); - } - } - - // Analyze function return type - std::string returnType = "void"; // Default return type - if (!F.getReturnType()->isVoidTy()) - { - if (F.getReturnType()->isPointerTy()) - { - returnType = "pointer"; - } - else if (F.getReturnType()->isFloatingPointTy()) - { - returnType = "floating-point"; - } - else if (F.getReturnType()->isIntegerTy()) - { - returnType = "integer"; - } - else - { - returnType = "unknown"; - } - } - - for (auto & BB : F) - { - for (auto & I : BB) - { - // Check if the instruction is a floating-point operation - if (I.getOpcode() == Instruction::FAdd || - I.getOpcode() == Instruction::FMul || - I.getOpcode() == Instruction::FSub || - I.getOpcode() == Instruction::FDiv) - { - hasFloatOps = true; - functionFloatOpCount++; - } - - // Check if the instruction is a return - if (isa(I)) - { - returnCount++; - } - } - } - - // Store the count for this function - if (functionFloatOpCount > 0) - { - floatOpCounts[F.getName().str()] = functionFloatOpCount; - } - - // Output function details - llvm::errs() << "Function: " << F.getName() << "\n"; - llvm::errs() << " Return Type: " << returnType << "\n"; - llvm::errs() << " Parameter Count: " << paramCount << "\n"; - llvm::errs() << " Parameter Types: "; - for (const auto & type : paramTypes) - { - llvm::errs() << type << " "; - } - llvm::errs() << "\n"; - // f - } - - // Output the results - if (hasFloatOps) + std::set quantizedBaseNames; + for (auto & GV : M.globals()) { - llvm::errs() << "Floating-point operations detected in the module.\n"; - for (const auto & entry : floatOpCounts) + if (GV.getName().endswith("_quantized")) { - llvm::errs() << "Function: " << entry.first - << " - Floating-point operations: " << entry.second << "\n"; + std::string baseName = GV.getName().str().substr(0, GV.getName().size() - 10); // remove "_quantized" + quantizedBaseNames.insert(baseName); } - llvm::errs() << "Enabling Auto-Quantization.\n"; - enableAutoQuantization = true; } - else + std::vector toDelete; + for (auto & GV : M.globals()) { - llvm::errs() << "No floating-point operations detected. Skipping Auto-Quantization.\n"; - } -} - -void checkFPUAvailability(Module &Mod) -{ - bool hasFPU = false; - std::set detectedFeatures; + std::string name = GV.getName().str(); - // 1. Check target-features from function attributes - for (auto &F : Mod) - { - if (F.hasFnAttribute("target-features")) + if (GV.use_empty() && quantizedBaseNames.count(name)) { - std::string features = F.getFnAttribute("target-features").getValueAsString().str(); - detectedFeatures.insert(features); - - // x86 FPU features - if (features.find("+sse") != std::string::npos || - features.find("+sse2") != std::string::npos || - features.find("+avx") != std::string::npos || - features.find("+x87") != std::string::npos || - features.find("+fma") != std::string::npos) - { - hasFPU = true; - } - - // ARM FPU features - if (features.find("+vfp") != std::string::npos || - features.find("+neon") != std::string::npos || - features.find("+fp-armv8") != std::string::npos || - features.find("+fp16") != std::string::npos) - { - hasFPU = true; - } + toDelete.push_back(&GV); } - } - - // 2. Supplementary check: parse target triple for architecture - std::string triple = Mod.getTargetTriple(); - llvm::errs() << "Target Triple: " << triple << "\n"; - - if (triple.find("armv7e-m") != std::string::npos || - triple.find("armv8-m.main") != std::string::npos || - triple.find("aarch64") != std::string::npos) - { - hasFPU = true; - llvm::errs() << "Triple indicates hardware FPU support.\n"; - } - else if (triple.find("armv6-m") != std::string::npos || - triple.find("armv7-m") != std::string::npos) - { - hasFPU = false; - llvm::errs() << "Triple indicates no FPU support.\n"; - } - else - { - llvm::errs() << "Triple is not recognized. Assuming conservative FPU support = false.\n"; - } - - // 3. (Removed hardcoded CPU name database for better generality) - - // Summary - if (!detectedFeatures.empty()) - { - llvm::errs() << "Target Features: "; - for (const auto &feature : detectedFeatures) + if (GV.use_empty() && name.size() > 10 && name.substr(name.size() - 10) == "_quantized") { - llvm::errs() << feature << " "; + toDelete.push_back(&GV); } - llvm::errs() << "\n"; - } - - if (hasFPU) - { - llvm::errs() << "FPU detected (from features and/or triple).\n"; } - else - { - llvm::errs() << "No FPU detected. Enabling Auto-Quantization.\n"; - enableAutoQuantization = true; - } -} - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -// Process functions that are whitelisted for dequantization - -void -processWhitelistedFunctions(Module & module, const std::set & whitelist, int maxPrecisionBits) -{ - for (Function & F : module) + for (auto * GV : toDelete) { - if (whitelist.find(F.getName().str()) != whitelist.end()) - { - llvm::errs() << "Found whitelisted function: " << F.getName() << "\n"; - std::vector retWorkList; - for (BasicBlock & BB : F) - { - for (Instruction & I : BB) - { - if (ReturnInst * retInst = dyn_cast(&I)) - { - if (retInst->getReturnValue() && retInst->getReturnValue()->getType()->isIntegerTy()) - { - retWorkList.push_back(retInst); - } - } - } - } - for (ReturnInst * retInst : retWorkList) - { - handleReturnValue(retInst, maxPrecisionBits); - } - - for (BasicBlock & BB : F) - { - for (Instruction & I : BB) - { - // llvm::errs() << "Processing instruction: " << I << "\n"; - if (auto * storeInst = dyn_cast(&I)) - { - llvm::errs() << "Found valid StoreInst.\n"; - dequantizeResults(storeInst, F, maxPrecisionBits); - } - } - } - } + GV->eraseFromParent(); } } - - // Function to save the IR of a module to a file void saveModuleIR(llvm::Module & M, const std::string & fileName) @@ -700,51 +285,36 @@ dumpIR(State * N, std::string fileSuffix, const std::unique_ptr & Mod) std::string dirPath = std::string(sys::path::parent_path(filePath)) + "/"; std::string fileName = std::string(sys::path::stem(filePath)) + "_" + fileSuffix + ".bc"; std::string filePathStr = dirPath + fileName; + std::string tmpPathStr = filePathStr + ".tmp"; filePath = StringRef(filePathStr); flexprint(N->Fe, N->Fm, N->Fpinfo, "Dump IR of: %s\n", filePath.str().c_str()); - std::error_code errorCode(errno, std::generic_category()); - raw_fd_ostream dumpedFile(filePath, errorCode); + std::error_code errorCode; + raw_fd_ostream dumpedFile(tmpPathStr, errorCode, llvm::sys::fs::OF_None); + if (errorCode) + { + flexprint(N->Fe, N->Fm, N->Fperr, "Error: failed to open %s for bitcode output: %s\n", + tmpPathStr.c_str(), errorCode.message().c_str()); + return; + } + WriteBitcodeToFile(*Mod, dumpedFile); + if (dumpedFile.has_error()) + { + flexprint(N->Fe, N->Fm, N->Fperr, "Error: failed to write bitcode to %s\n", + filePath.str().c_str()); + dumpedFile.clear_error(); + } + dumpedFile.flush(); dumpedFile.close(); -} -// void dumpIR(State *N, std::string fileSuffix, const std::unique_ptr &Mod) { -// StringRef filePath(N->llvmIR); -// std::string dirPath = std::string(sys::path::parent_path(filePath)) + "/"; -// std::string fileName = std::string(sys::path::stem(filePath)) + "_" + fileSuffix + ".bc"; -// std::string filePathStr = dirPath + fileName; -// filePath = StringRef(filePathStr); -// -// // 输出调试信息:目标 IR 文件路径 -// flexprint(N->Fe, N->Fm, N->Fpinfo, "Dump IR of: %s\n", filePath.str().c_str()); -// llvm::errs() << "DumpIR: File path = " << filePath.str() << "\n"; -// -// // 使用 errorCode 检查创建文件是否成功 -// std::error_code errorCode; -// raw_fd_ostream dumpedFile(filePath, errorCode); -// if (errorCode) { -// // 输出错误信息 -// flexprint(N->Fe, N->Fm, N->Fpinfo, "Error opening file %s: %s\n", filePath.str().c_str(), errorCode.message().c_str()); -// llvm::errs() << "DumpIR: Failed to open file: " << filePath.str() << " (" << errorCode.message() << ")\n"; -// return; -// } else { -// llvm::errs() << "DumpIR: File opened successfully.\n"; -// } -// -// // 写入 IR 并检查写入过程 -// WriteBitcodeToFile(*Mod, dumpedFile); -// dumpedFile.flush(); -// if (dumpedFile.has_error()) { -// llvm::errs() << "DumpIR: Error during WriteBitcodeToFile: " << dumpedFile.error().message() << "\n"; -// flexprint(N->Fe, N->Fm, N->Fpinfo, "Error during WriteBitcodeToFile: %s\n", dumpedFile.error().message().c_str()); -// } else { -// llvm::errs() << "DumpIR: WriteBitcodeToFile completed successfully.\n"; -// } -// -// dumpedFile.close(); -// llvm::errs() << "DumpIR: File closed.\n"; -// } + errorCode = llvm::sys::fs::rename(tmpPathStr, filePathStr); + if (errorCode) + { + flexprint(N->Fe, N->Fm, N->Fperr, "Error: failed to finalize bitcode file %s: %s\n", + filePath.str().c_str(), errorCode.message().c_str()); + } +} void mergeBoundInfo(BoundInfo * dst, const BoundInfo * src) @@ -865,7 +435,8 @@ overloadFunc(std::unique_ptr & Mod, std::map & } void -irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverload, bool enableBuiltinAssume) +irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverload, + bool enableBuiltinAssume, bool enableQuantDecider) { llvm::errs() << "Entering irPassLLVMIROptimizeByRange\n"; if (N->llvmIR == nullptr) @@ -942,45 +513,69 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl // } // int MAX_PRECISION_BITS = maxPrecisionBits; - - int maxPrecisionBits = MAX_PRECISION_BITS; - /** - * Precision Analysis - */ flexprint(N->Fe, N->Fm, N->Fpinfo, "Precision Analysis"); double minResolution = 0.0; bool isFirstSensor = true; - for (Modality * currentModality = N->sensorList->modalityList; currentModality != NULL; currentModality = currentModality->next) + if (N->sensorList != NULL) { - // Calculate resolution - double resolution = (currentModality->rangeUpperBound - currentModality->rangeLowerBound) / - (1 << currentModality->precisionBits); + for (Modality * currentModality = N->sensorList->modalityList; currentModality != NULL; + currentModality = currentModality->next) + { + // Calculate resolution + double resolution = (currentModality->rangeUpperBound - currentModality->rangeLowerBound) / + (1 << currentModality->precisionBits); - // Store and print - currentModality->resolution = resolution; + // Store and print + currentModality->resolution = resolution; - // Initialize or compare for minimum - if (isFirstSensor) - { - minResolution = resolution; - isFirstSensor = false; - } - else if (resolution < minResolution) - { - minResolution = resolution; + // Initialize or compare for minimum + if (isFirstSensor) + { + minResolution = resolution; + isFirstSensor = false; + } + else if (resolution < minResolution) + { + minResolution = resolution; + } } } - double fracQ_exact = -log2(minResolution) - 1.0; - int fracQ = (int)ceil(fracQ_exact); + if (!isFirstSensor && std::isfinite(minResolution) && minResolution > 0.0) + { + double fracQ_exact = -log2(minResolution) - 1.0; + int fracQ = (int)ceil(fracQ_exact); + + const int maxSafeFracQ = (BIT_WIDTH > 1) ? (BIT_WIDTH - 1) : 0; + if (fracQ < 0) + fracQ = 0; + if (fracQ > maxSafeFracQ) + fracQ = maxSafeFracQ; + + maxPrecisionBits = fracQ; + + maxPrecisionBits = 16; + + - flexprint(N->Fe, N->Fm, N->Fpinfo, "Minimum resolution across all sensors: %f\n", minResolution); - flexprint(N->Fe, N->Fm, N->Fpinfo, "Required FRAC_Q: ceil(-log2(minResolution) - 1) = ceil(%f) = %d\n", - fracQ_exact, fracQ); + flexprint(N->Fe, N->Fm, N->Fpinfo, "Minimum resolution across all sensors: %f\n", minResolution); + flexprint(N->Fe, N->Fm, N->Fpinfo, + "Required FRAC_Q: ceil(-log2(minResolution) - 1) = ceil(%f) = %d\n", + fracQ_exact, fracQ); + flexprint(N->Fe, N->Fm, N->Fpinfo, + "Using computed precision bits (maxPrecisionBits/FRAC_Q): %d\n", + maxPrecisionBits); + } + else + { + flexprint(N->Fe, N->Fm, N->Fpinfo, + "Precision analysis unavailable; fallback to build-time MAX_PRECISION_BITS=%d\n", + maxPrecisionBits); + } /** * Config @@ -1062,9 +657,6 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl } } - detectFloatingPointOps(*Mod); - checkFPUAvailability(*Mod); - /* * analyze the range of all local variables in each function * */ @@ -1089,10 +681,9 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl flexprint(N->Fe, N->Fm, N->Fpinfo, "checking for potential overflows\n"); for (auto & funcPair : funcBoundInfo) { - checkOverflow(N, funcPair.second, maxPrecisionBits); + checkOverflow(N, funcPair.second, maxPrecisionBits, Mod->getFunction(funcPair.first)); } - flexprint(N->Fe, N->Fm, N->Fpinfo, "shrink data type by range\n"); for (auto & mi : *Mod) { @@ -1111,9 +702,27 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl { flexprint(N->Fe, N->Fm, N->Fpinfo, "auto quantization\n"); llvm::errs() << "Auto quantization enabled\n"; + std::map decisionMap; + if (enableQuantDecider) + { + irPassLLVMIRQuantDecider(N, *Mod, maxPrecisionBits, decisionMap); + } std::vector functionsToInsert; for (auto & mi : *Mod) { + if (enableQuantDecider) + { + auto decisionIt = decisionMap.find(mi.getName().str()); + if (decisionIt != decisionMap.end() && !decisionIt->second.shouldQuantize) + { + llvm::errs() << "QuantDecider: skipping function " << mi.getName() + << " (origCost=" << decisionIt->second.originalCost + << ", quantCost=" << decisionIt->second.quantizedCost + << ")\n"; + continue; + } + } + llvm::errs() << "Quantizing function: " << mi.getName() << "\n"; irPassLLVMIRAutoQuantization(N, mi, functionsToInsert, maxPrecisionBits); @@ -1129,8 +738,9 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl * Check for potential overflows */ flexprint(N->Fe, N->Fm, N->Fpinfo, "checking for potential overflows\n"); - for (auto & funcPair : funcBoundInfo) { - checkOverflow(N, funcPair.second, maxPrecisionBits); + for (auto & funcPair : funcBoundInfo) + { + checkOverflow(N, funcPair.second, maxPrecisionBits, Mod->getFunction(funcPair.first)); } flexprint(N->Fe, N->Fm, N->Fpinfo, "memory alignment\n"); @@ -1170,8 +780,8 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl // } /* - * simplify the condition of each branch - * */ + * simplify the condition of each branch + * */ flexprint(N->Fe, N->Fm, N->Fpinfo, "simplify control flow by range\n"); for (auto & mi : *Mod) { @@ -1192,9 +802,6 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl passManager.add(createGlobalDCEPass()); passManager.run(*Mod); - - - // /* // * remove the functions that are optimized by passes. // * */ @@ -1204,19 +811,19 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl // if (useOverLoad) // overloadFunc(Mod, callerMap); // - // flexprint(N->Fe, N->Fm, N->Fpinfo, "infer bound\n"); - // callerMap.clear(); - // funcBoundInfo.clear(); - // useOverLoad = false; - // for (auto & mi : *Mod) - // { - // auto boundInfo = new BoundInfo(); - // mergeBoundInfo(boundInfo, globalBoundInfo); - // rangeAnalysis(N, mi, boundInfo, callerMap, typeRange, virtualRegisterVectorRange, useOverLoad); - // funcBoundInfo.emplace(mi.getName().str(), boundInfo); - // std::vector calleeNames; - // collectCalleeInfo(calleeNames, funcBoundInfo, boundInfo); - // } + flexprint(N->Fe, N->Fm, N->Fpinfo, "infer bound\n"); + callerMap.clear(); + funcBoundInfo.clear(); + useOverLoad = false; + for (auto & mi : *Mod) + { + auto boundInfo = new BoundInfo(); + mergeBoundInfo(boundInfo, globalBoundInfo); + rangeAnalysis(N, mi, boundInfo, callerMap, typeRange, virtualRegisterVectorRange, useOverLoad); + funcBoundInfo.emplace(mi.getName().str(), boundInfo); + std::vector calleeNames; + collectCalleeInfo(calleeNames, funcBoundInfo, boundInfo); + } // flexprint(N->Fe, N->Fm, N->Fpinfo, "constant substitution\n"); for (auto & mi : *Mod) @@ -1241,8 +848,6 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl // if (useOverLoad) // overloadFunc(Mod, callerMap); - - // Finally, erase old functions // eraseOldFunctions(); @@ -1250,35 +855,42 @@ irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverl // eraseOldGlobals(); eraseUnusedConstant(*Mod); -// eraseOldFunctions(); -// eraseOldFunctions(); - + // eraseOldFunctions(); + // eraseOldFunctions(); - processWhitelistedFunctions(*Mod, whitelist, maxPrecisionBits); + irPassLLVMIRApplyDequantization(*Mod, maxPrecisionBits); -// eraseOldFunctions(); + // eraseOldFunctions(); eraseOldFunctions(*Mod); + StringRef inputPath(N->llvmIR); + std::string outDir = std::string(sys::path::parent_path(inputPath)); + std::string outStem = std::string(sys::path::stem(inputPath)) + "_output.ll"; + std::string outPath = outDir + "/" + outStem; + saveModuleIR(*Mod, outPath); + if (verifyModule(*Mod, &llvm::errs())) + { + llvm::errs() << "Warning: in-memory module verification failed before sanitized bitcode emission.\n"; + } - - - const char * homeDir = getenv("HOME"); - if (!homeDir) + LLVMContext dumpContext; + SMDiagnostic dumpErr; + std::unique_ptr sanitizedOutputMod = parseIRFile(outPath, dumpErr, dumpContext); + if (!sanitizedOutputMod) { - llvm::errs() << "Error: HOME environment variable not set.\n"; - return; + llvm::errs() << "Warning: failed to parse serialized LLVM IR for sanitized bitcode output: " + << outPath << "\n"; + llvm::errs() << "Falling back to in-memory module for bitcode emission.\n"; + } + else if (verifyModule(*sanitizedOutputMod, &llvm::errs())) + { + llvm::errs() << "Warning: sanitized output module verification failed; falling back to in-memory module.\n"; + sanitizedOutputMod.reset(); } - // Save the optimized IR to a file - // std::string fileName = std::string(homeDir) + "/CoSense/applications/newton/llvm-ir/MadgwickAHRS_output.ll"; - // saveModuleIR(*Mod, fileName); - // Save the optimized IR to a file - saveModuleIR(*Mod, "/home/xyf/CoSense/applications/newton/llvm-ir/MadgwickAHRS_output.ll"); - saveModuleIR(*Mod, "/home/xyf/CoSense/applications/newton/llvm-ir/MahonyAHRS_output.ll"); - saveModuleIR(*Mod, "/home/xyf/CoSense/applications/newton/llvm-ir/sensfusion6_output.ll"); - /* - * Dump BC file to a file. - * */ - dumpIR(N, "output", Mod); + if (sanitizedOutputMod) + dumpIR(N, "output", sanitizedOutputMod); + else + dumpIR(N, "output", Mod); llvm::errs() << "Exiting irPassLLVMIROptimizeByRange\n"; -} \ No newline at end of file +} diff --git a/src/newton/newton-irPass-LLVMIR-optimizeByRange.h b/src/newton/newton-irPass-LLVMIR-optimizeByRange.h index 92a49ad09..497385567 100644 --- a/src/newton/newton-irPass-LLVMIR-optimizeByRange.h +++ b/src/newton/newton-irPass-LLVMIR-optimizeByRange.h @@ -39,15 +39,15 @@ POSSIBILITY OF SUCH DAMAGE. #define NEWTON_IR_PASS_LLVM_IR_OPTIMIZE_BY_RANGE #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif /* __cplusplus */ void -irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverload, bool enableBuiltinAssume); +irPassLLVMIROptimizeByRange(State * N, bool enableQuantization, bool enableOverload, + bool enableBuiltinAssume, bool enableQuantDecider); #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ -#endif /* NEWTON_IR_PASS_LLVM_IR_OPTIMIZE_BY_RANGE */ \ No newline at end of file +#endif /* NEWTON_IR_PASS_LLVM_IR_OPTIMIZE_BY_RANGE */ diff --git a/src/newton/newton-irPass-LLVMIR-quantDecider.cpp b/src/newton/newton-irPass-LLVMIR-quantDecider.cpp new file mode 100644 index 000000000..a2737c605 --- /dev/null +++ b/src/newton/newton-irPass-LLVMIR-quantDecider.cpp @@ -0,0 +1,524 @@ +#include "newton-irPass-LLVMIR-quantDecider.h" + +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "newton-irPass-LLVMIR-quantization.h" + +using namespace llvm; + +namespace +{ +struct CostBreakdown { + double cfp; + double cint; + double cq; + double cdq; + double ccf; + int instructionCount; + int fpClusterInstructionCount; +}; + +struct TargetProfile { + bool hasFPU; + double fpFactor; + double intFactor; + double qFactor; + double dqFactor; + double marginFactor; + bool confident; +}; + +static bool +isMathRuntimeCall(const CallBase & callBase) +{ + Function * callee = callBase.getCalledFunction(); + if (!callee || !callee->hasName()) + return false; + + StringRef name = callee->getName(); + return name == "log" || name == "logf" || + name == "exp" || name == "expf" || + name == "sqrt" || name == "sqrtf" || + name == "sin" || name == "sinf" || + name == "cos" || name == "cosf" || + name == "log1p" || name == "log1pf" || + name.startswith("llvm.sqrt") || + name.startswith("llvm.log") || + name.startswith("llvm.exp") || + name.startswith("llvm.sin") || + name.startswith("llvm.cos"); +} + +static bool +usesOrProducesFloatingPoint(const Instruction & instruction) +{ + if (instruction.getType()->isFloatingPointTy()) + return true; + + for (const Value * operand : instruction.operands()) + { + if (operand->getType()->isFloatingPointTy()) + return true; + if (operand->getType()->isPointerTy() && operand->getType()->getPointerElementType()->isFloatingPointTy()) + return true; + } + + if (const auto * callBase = dyn_cast(&instruction)) + { + if (isMathRuntimeCall(*callBase)) + return true; + if (callBase->getType()->isFloatingPointTy()) + return true; + for (const Value * arg : callBase->args()) + { + if (arg->getType()->isFloatingPointTy()) + return true; + } + } + + return false; +} + +static bool +isCoreFloatingPointOp(const Instruction & instruction) +{ + switch (instruction.getOpcode()) + { + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::FNeg: + return true; + case Instruction::Call: + { + const auto * callBase = dyn_cast(&instruction); + return callBase && isMathRuntimeCall(*callBase); + } + default: + return false; + } +} + +static double +floatingPointOpCost(const Instruction & instruction) +{ + switch (instruction.getOpcode()) + { + case Instruction::FAdd: + case Instruction::FSub: + return 4.0; + case Instruction::FMul: + return 5.0; + case Instruction::FDiv: + case Instruction::FRem: + return 14.0; + case Instruction::FNeg: + return 3.0; + case Instruction::Call: + return 15.0; + default: + return 0.0; + } +} + +static double +integerOpCost(const Instruction & instruction) +{ + switch (instruction.getOpcode()) + { + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::AShr: + case Instruction::LShr: + return 1.0; + case Instruction::Mul: + return 2.0; + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + return 8.0; + default: + return 0.0; + } +} + +static double +quantizationBoundaryCost(const Instruction & instruction) +{ + switch (instruction.getOpcode()) + { + case Instruction::FPToSI: + case Instruction::FPToUI: + return 6.0; + default: + return 0.0; + } +} + +static double +dequantizationBoundaryCost(const Instruction & instruction) +{ + switch (instruction.getOpcode()) + { + case Instruction::SIToFP: + case Instruction::UIToFP: + return 6.0; + default: + return 0.0; + } +} + +static double +estimateBasicBlockWeight(const Function & function, const BasicBlock & block, + const std::map & blockOrder) +{ + double weight = 1.0; + int predecessorCount = 0; + for (const BasicBlock * ignored : predecessors(&block)) + { + (void)ignored; + predecessorCount++; + } + weight += 0.10 * predecessorCount; + + const Instruction * terminator = block.getTerminator(); + if (terminator) + { + if (terminator->getOpcode() == Instruction::Br) + { + const auto * branchInst = cast(terminator); + if (branchInst->isConditional()) + weight += 0.35; + } + else if (terminator->getOpcode() == Instruction::Switch) + { + const auto * switchInst = cast(terminator); + weight += 0.05 * (switchInst->getNumCases() + 1); + } + } + + auto currentIt = blockOrder.find(&block); + if (currentIt != blockOrder.end()) + { + int currentIndex = currentIt->second; + for (const BasicBlock * successor : successors(&block)) + { + auto succIt = blockOrder.find(successor); + if (succIt != blockOrder.end() && succIt->second <= currentIndex) + { + weight += 0.70; + } + } + } + + if (function.getEntryBlock().getName().equals(block.getName())) + weight += 0.05; + + return weight; +} + +static CostBreakdown +estimateCostBreakdown(const Function & function) +{ + CostBreakdown breakdown = {0.0, 0.0, 0.0, 0.0, 0.0, 0, 0}; + + std::map blockOrder; + int index = 0; + for (const BasicBlock & block : function) + { + blockOrder.emplace(&block, index++); + } + + for (const BasicBlock & block : function) + { + double blockWeight = estimateBasicBlockWeight(function, block, blockOrder); + double blockCfp = 0.0; + double blockCint = 0.0; + double blockCq = 0.0; + double blockCdq = 0.0; + + for (const Instruction & instruction : block) + { + if (isCoreFloatingPointOp(instruction)) + { + blockCfp += floatingPointOpCost(instruction); + breakdown.fpClusterInstructionCount++; + } + else if (usesOrProducesFloatingPoint(instruction)) + { + blockCfp += 1.0; + breakdown.fpClusterInstructionCount++; + } + + blockCint += integerOpCost(instruction); + blockCq += quantizationBoundaryCost(instruction); + blockCdq += dequantizationBoundaryCost(instruction); + breakdown.instructionCount++; + } + + double blockControlFlowCost = 0.0; + if (const Instruction * terminator = block.getTerminator()) + { + switch (terminator->getOpcode()) + { + case Instruction::Br: + { + const auto * branchInst = cast(terminator); + blockControlFlowCost += branchInst->isConditional() ? 2.0 : 0.7; + break; + } + case Instruction::Switch: + { + const auto * switchInst = cast(terminator); + blockControlFlowCost += 3.0 + 0.25 * (switchInst->getNumCases() + 1); + break; + } + default: + break; + } + } + + breakdown.cfp += blockWeight * blockCfp; + breakdown.cint += blockWeight * blockCint; + breakdown.cq += blockWeight * blockCq; + breakdown.cdq += blockWeight * blockCdq; + breakdown.ccf += blockWeight * blockControlFlowCost; + } + + return breakdown; +} + +static std::string +collectTargetFeatures(const Module & module) +{ + std::string merged; + for (const Function & function : module) + { + if (function.hasFnAttribute("target-features")) + { + if (!merged.empty()) + merged += ","; + merged += function.getFnAttribute("target-features").getValueAsString().str(); + } + } + return merged; +} + +static bool +containsToken(const std::string & haystack, const std::string & token) +{ + return haystack.find(token) != std::string::npos; +} + +static TargetProfile +detectTargetProfile(const Module & module) +{ + TargetProfile profile = {false, 1.0, 1.0, 1.0, 1.0, 0.05, false}; + + std::string triple = module.getTargetTriple(); + std::string features = collectTargetFeatures(module); + + bool explicitFPU = containsToken(features, "+vfp") || containsToken(features, "+neon") || + containsToken(features, "+fp-armv8") || containsToken(features, "+fp16") || + containsToken(features, "+vfp4") || containsToken(features, "+fpv4-sp-d16"); + bool softFloat = containsToken(features, "+soft-float") || containsToken(features, "+softfp"); + bool isAArch64 = containsToken(triple, "aarch64"); + bool isThumb = containsToken(triple, "thumb"); + bool isARM = containsToken(triple, "arm") || isThumb || isAArch64; + bool isMProfile = containsToken(triple, "armv6-m") || containsToken(triple, "armv6m") || + containsToken(triple, "armv7-m") || containsToken(triple, "armv7m") || + containsToken(triple, "armv7e-m") || containsToken(triple, "armv7em") || + containsToken(triple, "armv8-m") || containsToken(triple, "armv8m") || + isThumb; + bool isAProfile = isAArch64 || containsToken(triple, "armv7-a") || containsToken(triple, "armv8-a"); + + if (isAProfile && explicitFPU) + { + profile.hasFPU = true; + profile.fpFactor = 0.85; + profile.intFactor = 1.00; + profile.qFactor = 1.15; + profile.dqFactor = 1.15; + profile.marginFactor = 0.10; + profile.confident = true; + return profile; + } + + if (isMProfile && (!explicitFPU || softFloat)) + { + profile.hasFPU = false; + profile.fpFactor = 2.30; + profile.intFactor = 0.95; + profile.qFactor = 1.25; + profile.dqFactor = 1.30; + profile.marginFactor = 0.02; + profile.confident = true; + return profile; + } + + if (isMProfile && explicitFPU) + { + profile.hasFPU = explicitFPU; + profile.fpFactor = explicitFPU ? 1.05 : 1.80; + profile.intFactor = 0.95; + profile.qFactor = explicitFPU ? 1.10 : 1.20; + profile.dqFactor = explicitFPU ? 1.15 : 1.25; + profile.marginFactor = explicitFPU ? 0.08 : 0.03; + profile.confident = true; + return profile; + } + + profile.hasFPU = explicitFPU; + profile.fpFactor = explicitFPU ? 1.00 : (isARM ? 1.80 : 1.40); + profile.intFactor = 1.00; + profile.qFactor = explicitFPU ? 1.15 : (isARM ? 1.20 : 1.10); + profile.dqFactor = explicitFPU ? 1.20 : (isARM ? 1.25 : 1.15); + profile.marginFactor = 0.06; + profile.confident = false; + return profile; +} + +static bool +hasFloatingPointCluster(const Function & function) +{ + for (const BasicBlock & block : function) + { + for (const Instruction & instruction : block) + { + if (isCoreFloatingPointOp(instruction) || usesOrProducesFloatingPoint(instruction)) + return true; + } + } + return false; +} + +static int +countFloatingPointOperations(const Function & function) +{ + int operationCount = 0; + for (const BasicBlock & block : function) + { + for (const Instruction & instruction : block) + { + if (isCoreFloatingPointOp(instruction)) + operationCount++; + } + } + return operationCount; +} +} // namespace + +extern "C" { + +QuantDeciderResult +irPassLLVMIRQuantDecideFunction(void * N, Module & module, Function & llvmIrFunction, int maxPrecisionBits) +{ + QuantDeciderResult result = {false, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, false}; + + if (llvmIrFunction.isDeclaration()) + return result; + + if (!hasFloatingPointCluster(llvmIrFunction)) + return result; + + TargetProfile profile = detectTargetProfile(module); + result.targetHasFPU = profile.hasFPU; + + CostBreakdown originalBreakdown = estimateCostBreakdown(llvmIrFunction); + result.originalInstructionCount = originalBreakdown.instructionCount; + result.fpClusterInstructionCount = originalBreakdown.fpClusterInstructionCount; + + std::unique_ptr clonedModule = CloneModule(module); + if (!clonedModule) + return result; + + Function * clonedFunction = clonedModule->getFunction(llvmIrFunction.getName()); + if (!clonedFunction) + return result; + + std::vector functionsToInsert; + irPassLLVMIRAutoQuantization(reinterpret_cast(N), *clonedFunction, functionsToInsert, maxPrecisionBits); + + CostBreakdown quantizedBreakdown = estimateCostBreakdown(*clonedFunction); + result.quantizedInstructionCount = quantizedBreakdown.instructionCount; + + result.cfpCost = profile.fpFactor * originalBreakdown.cfp; + result.cintCost = profile.intFactor * quantizedBreakdown.cint; + result.cqCost = profile.qFactor * quantizedBreakdown.cq; + result.cdqCost = profile.dqFactor * quantizedBreakdown.cdq; + result.controlFlowCost = 0.5 * (originalBreakdown.ccf + quantizedBreakdown.ccf); + + result.originalCost = result.cfpCost + originalBreakdown.ccf; + result.quantizedCost = result.cintCost + result.cqCost + result.cdqCost + quantizedBreakdown.ccf; + + double effectiveCfp = result.cfpCost + result.controlFlowCost; + double effectiveQuant = result.cintCost + result.cqCost + result.cdqCost + result.controlFlowCost; + result.decisionMargin = effectiveCfp * profile.marginFactor; + if (!profile.confident) + result.decisionMargin = (std::max)(result.decisionMargin, 4.0); + + result.shouldQuantize = effectiveCfp > (effectiveQuant + result.decisionMargin); + + errs() << "[quant-decider] function=" << llvmIrFunction.getName() + << " targetHasFPU=" << (result.targetHasFPU ? "true" : "false") + << " Cfp=" << result.cfpCost + << " Cint=" << result.cintCost + << " Cq=" << result.cqCost + << " Cdq=" << result.cdqCost + << " Ccf=" << result.controlFlowCost + << " margin=" << result.decisionMargin + << " shouldQuantize=" << (result.shouldQuantize ? "true" : "false") << "\n"; + + return result; +} + +void +irPassLLVMIRQuantDecider(void * N, Module & module, int maxPrecisionBits, + std::map & decisionMap) +{ + decisionMap.clear(); + + for (Function & function : module) + { + if (function.isDeclaration()) + continue; + + QuantDeciderResult result = irPassLLVMIRQuantDecideFunction(N, module, function, maxPrecisionBits); + decisionMap.emplace(function.getName().str(), result); + } + + for (const auto & entry : decisionMap) + { + Function * function = module.getFunction(entry.first); + if (!function) + continue; + + int fpOperationCount = countFloatingPointOperations(*function); + if (fpOperationCount > 0) + { + errs() << "Function: " << entry.first + << " - Floating-point operations: " << fpOperationCount << "\n"; + } + } +} +} diff --git a/src/newton/newton-irPass-LLVMIR-quantDecider.h b/src/newton/newton-irPass-LLVMIR-quantDecider.h new file mode 100644 index 000000000..eefa23c90 --- /dev/null +++ b/src/newton/newton-irPass-LLVMIR-quantDecider.h @@ -0,0 +1,43 @@ +#ifndef NEWTON_IR_PASS_LLVM_IR_QUANT_DECIDER +#define NEWTON_IR_PASS_LLVM_IR_QUANT_DECIDER + +#include +#include + +#ifdef __cplusplus +namespace llvm +{ +class Module; +class Function; +} // namespace llvm +extern "C" { +#endif + +typedef struct QuantDeciderResult { + bool shouldQuantize; + double originalCost; + double quantizedCost; + double cfpCost; + double cintCost; + double cqCost; + double cdqCost; + double controlFlowCost; + double decisionMargin; + int originalInstructionCount; + int quantizedInstructionCount; + int fpClusterInstructionCount; + bool targetHasFPU; +} QuantDeciderResult; + +QuantDeciderResult +irPassLLVMIRQuantDecideFunction(void * N, llvm::Module & module, llvm::Function & llvmIrFunction, int maxPrecisionBits); + +void +irPassLLVMIRQuantDecider(void * N, llvm::Module & module, int maxPrecisionBits, + std::map & decisionMap); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/newton/newton-irPass-LLVMIR-quantization.h b/src/newton/newton-irPass-LLVMIR-quantization.h index 28adbfada..26109bc5b 100644 --- a/src/newton/newton-irPass-LLVMIR-quantization.h +++ b/src/newton/newton-irPass-LLVMIR-quantization.h @@ -32,37 +32,25 @@ POSSIBILITY OF SUCH DAMAGE. #include "newton-irPass-LLVMIR-rangeAnalysis.h" #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif /* __cplusplus */ -extern std::vector functionsToErase; -extern std::vector globalsToErase; -//extern std::vector instructionsToErase; +extern std::vector functionsToErase; +extern std::vector globalsToErase; +// extern std::vector instructionsToErase; void -irPassLLVMIRAutoQuantization(State * N, llvm::Function & llvmIrFunction, std::vector& functionsToInsert,int maxPrecisionBits); - -//void -//irPassLLVMIRAutoQuantization(State *N, llvm::Function &llvmIrFunction, std::vector &functionsToInsert, -// std::map>> &virtualRegisterVectorRange, -// int maxPrecisionBits); - -//void irPassLLVMIRAutoQuantization(State *N, Function &F, std::vector &functionsToInsert, -// int maxPrecisionBits, std::map>> &virtualRegisterVectorRange, -// int bitWidth, bool enableVectorization, bool enableRangeAnalysis); - - - - - +irPassLLVMIRAutoQuantization(State * N, llvm::Function & llvmIrFunction, std::vector & functionsToInsert, int maxPrecisionBits); +void +irPassLLVMIRApplyDequantization(llvm::Module & module, int maxPrecisionBits); extern -//void eraseOldFunctions(); - void eraseOldFunctions(llvm::Module &M); + // void eraseOldFunctions(); + void + eraseOldFunctions(llvm::Module & M); void eraseOldInstructions(); void eraseOldGlobals(); #ifdef __cplusplus } /* extern "C" */ -#endif /* __cplusplus */ \ No newline at end of file +#endif /* __cplusplus */ diff --git a/src/newton/newton-irPass-LLVMIR-rangeAnalysis.cpp b/src/newton/newton-irPass-LLVMIR-rangeAnalysis.cpp index b2fb8c7f7..925caa4ac 100644 --- a/src/newton/newton-irPass-LLVMIR-rangeAnalysis.cpp +++ b/src/newton/newton-irPass-LLVMIR-rangeAnalysis.cpp @@ -307,7 +307,10 @@ checkPhiRange(State * N, PHINode * phiNode, BoundInfo * boundInfo, if (minValueVec.empty() && minPHIValueVectors.empty()) { - flexprint(N->Fe, N->Fm, N->Fperr, "Error: min value vectors are both empty."); + if (valueRangeDebug) + { + flexprint(N->Fe, N->Fm, N->Fpinfo, "Call: skip empty PHI incoming range set.\n"); + } } return true; @@ -999,13 +1002,14 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, */ if (derivedVariableType->getTag() == llvm::dwarf::DW_TAG_pointer_type) { - auto baseType = derivedVariableType->getBaseType(); - /* - * the type is `void*` - * */ - if (nullptr != baseType) { - baseTypeName = derivedVariableType->getBaseType()->getName().str(); - } + auto baseType = derivedVariableType->getBaseType(); + /* + * the type is `void*` + * */ + if (nullptr != baseType) + { + baseTypeName = derivedVariableType->getBaseType()->getName().str(); + } } else { @@ -1023,15 +1027,21 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, } else { - flexprint(N->Fe, N->Fm, N->Fperr, "\tTODO: Call: Didn't support current type!\n"); + if (valueRangeDebug) + { + flexprint(N->Fe, N->Fm, N->Fpinfo, "\tCall: Skip unsupported debug type for range seeding.\n"); + } } } else if (const auto * basicVariableType = dyn_cast(variableType)) { - /* - * if it's a basic type, insert the basic + /* + * if it's a basic type, insert the basic * */ - flexprint(N->Fe, N->Fm, N->Fperr, "\tTODO: Call: Didn't support basic type!\n"); + if (valueRangeDebug) + { + flexprint(N->Fe, N->Fm, N->Fpinfo, "\tCall: Skip unsupported basic debug type for range seeding.\n"); + } } }; @@ -1189,9 +1199,9 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, * */ flexprint(N->Fe, N->Fm, N->Fpinfo, "\tCall: detect calledFunction %s.\n", calledFunction->getName().str().c_str()); - std::string newFuncName = calledFunction->getName().str(); - auto innerBoundInfo = new BoundInfo(); - bool hasSpecificRange = false; + std::string newFuncName = calledFunction->getName().str(); + auto innerBoundInfo = new BoundInfo(); + bool hasSpecificRange = false; /* * check if the ranges have been set to the function name * */ @@ -1369,26 +1379,32 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, returnRange = rangeAnalysis(N, *realCallee, overloadBoundInfo, callerMap, typeRange, virtualRegisterVectorRange, useOverLoad); - /* - * If the "realCallee" pass arguments by pointer, update the pointer argus. - * If the outer function have such operand value, but doesn't exist after the callee, - * remove it from boundInfo->virtualRegisterRange - * If both exist before and after callee, then update its value. - * */ - for (size_t idx = 0; idx < llvmIrCallInstruction->getNumOperands() - 1; idx++) { - auto operand = llvmIrCallInstruction->getOperand(idx); - if (operand->getType()->getTypeID() == Type::PointerTyID) { - auto vrIt = boundInfo->virtualRegisterRange.find(operand); - if (vrIt != boundInfo->virtualRegisterRange.end()) { - auto ibIt = innerBoundInfo->virtualRegisterRange.find(operand); - if (ibIt != innerBoundInfo->virtualRegisterRange.end()) { - vrIt->second = ibIt->second; - } else { - boundInfo->virtualRegisterRange.erase(vrIt); - } - } - } - } + /* + * If the "realCallee" pass arguments by pointer, update the pointer argus. + * If the outer function have such operand value, but doesn't exist after the callee, + * remove it from boundInfo->virtualRegisterRange + * If both exist before and after callee, then update its value. + * */ + for (size_t idx = 0; idx < llvmIrCallInstruction->getNumOperands() - 1; idx++) + { + auto operand = llvmIrCallInstruction->getOperand(idx); + if (operand->getType()->getTypeID() == Type::PointerTyID) + { + auto vrIt = boundInfo->virtualRegisterRange.find(operand); + if (vrIt != boundInfo->virtualRegisterRange.end()) + { + auto ibIt = innerBoundInfo->virtualRegisterRange.find(operand); + if (ibIt != innerBoundInfo->virtualRegisterRange.end()) + { + vrIt->second = ibIt->second; + } + else + { + boundInfo->virtualRegisterRange.erase(vrIt); + } + } + } + } if (returnRange.first != nullptr) { boundInfo->virtualRegisterRange.emplace(llvmIrCallInstruction, returnRange.second); @@ -1404,26 +1420,32 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, realCallee = calledFunction; returnRange = rangeAnalysis(N, *realCallee, innerBoundInfo, callerMap, typeRange, virtualRegisterVectorRange, useOverLoad); - /* - * If the "realCallee" pass arguments by pointer, update the pointer argus. - * If the outer function have such operand value, but doesn't exist after the callee, - * remove it from boundInfo->virtualRegisterRange - * If both exist before and after callee, then update its value. - * */ - for (size_t idx = 0; idx < llvmIrCallInstruction->getNumOperands() - 1; idx++) { - auto operand = llvmIrCallInstruction->getOperand(idx); - if (operand->getType()->getTypeID() == Type::PointerTyID) { - auto vrIt = boundInfo->virtualRegisterRange.find(operand); - if (vrIt != boundInfo->virtualRegisterRange.end()) { - auto ibIt = innerBoundInfo->virtualRegisterRange.find(operand); - if (ibIt != innerBoundInfo->virtualRegisterRange.end()) { - vrIt->second = ibIt->second; - } else { - boundInfo->virtualRegisterRange.erase(vrIt); - } - } - } - } + /* + * If the "realCallee" pass arguments by pointer, update the pointer argus. + * If the outer function have such operand value, but doesn't exist after the callee, + * remove it from boundInfo->virtualRegisterRange + * If both exist before and after callee, then update its value. + * */ + for (size_t idx = 0; idx < llvmIrCallInstruction->getNumOperands() - 1; idx++) + { + auto operand = llvmIrCallInstruction->getOperand(idx); + if (operand->getType()->getTypeID() == Type::PointerTyID) + { + auto vrIt = boundInfo->virtualRegisterRange.find(operand); + if (vrIt != boundInfo->virtualRegisterRange.end()) + { + auto ibIt = innerBoundInfo->virtualRegisterRange.find(operand); + if (ibIt != innerBoundInfo->virtualRegisterRange.end()) + { + vrIt->second = ibIt->second; + } + else + { + boundInfo->virtualRegisterRange.erase(vrIt); + } + } + } + } if (returnRange.first != nullptr) { boundInfo->virtualRegisterRange.emplace(llvmIrCallInstruction, returnRange.second); @@ -1452,10 +1474,11 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, * But we still believe in the range we inferred from the function body. */ DISubprogram * subProgram = realCallee->getSubprogram(); - if (nullptr == subProgram) { - break; - } - DITypeRefArray typeArray = subProgram->getType()->getTypeArray(); + if (nullptr == subProgram) + { + break; + } + DITypeRefArray typeArray = subProgram->getType()->getTypeArray(); if (typeArray[0] != nullptr) { StringRef returnTypeName = typeArray[0]->getName(); @@ -1989,10 +2012,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, { switch (bitWidth) { - case 1: - lowerBound = static_cast(static_cast(vrRangeIt->second.first)); - upperBound = static_cast(static_cast(vrRangeIt->second.second)); - break; + case 1: + lowerBound = static_cast(static_cast(vrRangeIt->second.first)); + upperBound = static_cast(static_cast(vrRangeIt->second.second)); + break; case 8: lowerBound = static_cast(static_cast(vrRangeIt->second.first)); upperBound = static_cast(static_cast(vrRangeIt->second.second)); @@ -2021,18 +2044,18 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, vrRangeIt = boundInfo->virtualRegisterRange.find(rightOperand); if (vrRangeIt != boundInfo->virtualRegisterRange.end()) { - auto leftMin = lowerBound; - auto leftMax = upperBound; - double rightMin = vrRangeIt->second.first; - double rightMax = vrRangeIt->second.second; - lowerBound = min(min(min((uint64_t)leftMin << (int64_t)rightMin, - (uint64_t)leftMin << (int64_t)rightMax), - (uint64_t)leftMax << (int64_t)rightMin), - (uint64_t)leftMax << (int64_t)rightMax); - upperBound = max(max(max((uint64_t)leftMin << (int64_t)rightMin, - (uint64_t)leftMin << (int64_t)rightMax), - (uint64_t)leftMax << (int64_t)rightMin), - (uint64_t)leftMax << (int64_t)rightMax); + auto leftMin = lowerBound; + auto leftMax = upperBound; + double rightMin = vrRangeIt->second.first; + double rightMax = vrRangeIt->second.second; + lowerBound = min(min(min((uint64_t)leftMin << (int64_t)rightMin, + (uint64_t)leftMin << (int64_t)rightMax), + (uint64_t)leftMax << (int64_t)rightMin), + (uint64_t)leftMax << (int64_t)rightMax); + upperBound = max(max(max((uint64_t)leftMin << (int64_t)rightMin, + (uint64_t)leftMin << (int64_t)rightMax), + (uint64_t)leftMax << (int64_t)rightMin), + (uint64_t)leftMax << (int64_t)rightMax); } else { @@ -2063,10 +2086,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double lowerBound, upperBound; switch (bitWidth) { - case 1: - lowerBound = constValue << (static_cast(vrRangeIt->second.first)); - upperBound = constValue << (static_cast(vrRangeIt->second.second)); - break; + case 1: + lowerBound = constValue << (static_cast(vrRangeIt->second.first)); + upperBound = constValue << (static_cast(vrRangeIt->second.second)); + break; case 8: lowerBound = constValue << (static_cast(vrRangeIt->second.first)); upperBound = constValue << (static_cast(vrRangeIt->second.second)); @@ -2111,10 +2134,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double resMin = 0, resMax = 0; switch (bitWidth) { - case 1: - resMin = static_cast(vrRangeIt->second.first) << constValue; - resMax = static_cast(vrRangeIt->second.second) << constValue; - break; + case 1: + resMin = static_cast(vrRangeIt->second.first) << constValue; + resMax = static_cast(vrRangeIt->second.second) << constValue; + break; case 8: resMin = static_cast(vrRangeIt->second.first) << constValue; resMax = static_cast(vrRangeIt->second.second) << constValue; @@ -2134,13 +2157,13 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, default: assert(false); } - /* - * lhs of shl can be negative, but the result should be positive. - * so we need to further check the real min value and real max value - * */ + /* + * lhs of shl can be negative, but the result should be positive. + * so we need to further check the real min value and real max value + * */ boundInfo->virtualRegisterRange.emplace(llvmIrBinaryOperator, - std::make_pair(min(resMin, resMax), - max(resMin, resMax))); + std::make_pair(min(resMin, resMax), + max(resMin, resMax))); } else { @@ -2155,9 +2178,9 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, } break; - /* - * Sign extend - * */ + /* + * Sign extend + * */ case Instruction::AShr: if (auto llvmIrBinaryOperator = dyn_cast(&llvmIrInstruction)) { @@ -2185,8 +2208,8 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, auto vrRangeIt = boundInfo->virtualRegisterRange.find(leftOperand); if (vrRangeIt != boundInfo->virtualRegisterRange.end()) { - leftMin = vrRangeIt->second.first; - leftMax = vrRangeIt->second.second; + leftMin = vrRangeIt->second.first; + leftMax = vrRangeIt->second.second; } else { @@ -2198,16 +2221,16 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, if (vrRangeIt != boundInfo->virtualRegisterRange.end()) { double rightMin = 0, rightMax = 0; - rightMin = vrRangeIt->second.first; - rightMax = vrRangeIt->second.second; + rightMin = vrRangeIt->second.first; + rightMax = vrRangeIt->second.second; lowerBound = min(min(min(static_cast(leftMin) >> static_cast(rightMin), - static_cast(leftMin) >> static_cast(rightMax)), - static_cast(leftMax) >> static_cast(rightMin)), - static_cast(leftMax) >> static_cast(rightMax)); + static_cast(leftMin) >> static_cast(rightMax)), + static_cast(leftMax) >> static_cast(rightMin)), + static_cast(leftMax) >> static_cast(rightMax)); upperBound = max(max(max(static_cast(leftMin) >> static_cast(rightMin), - static_cast(leftMin) >> static_cast(rightMax)), - static_cast(leftMax) >> static_cast(rightMin)), - static_cast(leftMax) >> static_cast(rightMax)); + static_cast(leftMin) >> static_cast(rightMax)), + static_cast(leftMax) >> static_cast(rightMin)), + static_cast(leftMax) >> static_cast(rightMax)); } else { @@ -2238,10 +2261,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double lowerBound, upperBound; switch (bitWidth) { - case 1: - lowerBound = constValue >> (static_cast(vrRangeIt->second.first)); - upperBound = constValue >> (static_cast(vrRangeIt->second.second)); - break; + case 1: + lowerBound = constValue >> (static_cast(vrRangeIt->second.first)); + upperBound = constValue >> (static_cast(vrRangeIt->second.second)); + break; case 8: lowerBound = constValue >> (static_cast(vrRangeIt->second.first)); upperBound = constValue >> (static_cast(vrRangeIt->second.second)); @@ -2286,10 +2309,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double resMin = 0, resMax = 0; switch (bitWidth) { - case 1: - resMin = static_cast(vrRangeIt->second.first) >> constValue; - resMax = static_cast(vrRangeIt->second.second) >> constValue; - break; + case 1: + resMin = static_cast(vrRangeIt->second.first) >> constValue; + resMax = static_cast(vrRangeIt->second.second) >> constValue; + break; case 8: resMin = static_cast(vrRangeIt->second.first) >> constValue; resMax = static_cast(vrRangeIt->second.second) >> constValue; @@ -2325,9 +2348,9 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, } break; - /* - * Zero extend - * */ + /* + * Zero extend + * */ case Instruction::LShr: if (auto llvmIrBinaryOperator = dyn_cast(&llvmIrInstruction)) { @@ -2357,10 +2380,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, { switch (bitWidth) { - case 1: - leftMin = static_cast(static_cast(vrRangeIt->second.first)); - leftMax = static_cast(static_cast(vrRangeIt->second.second)); - break; + case 1: + leftMin = static_cast(static_cast(vrRangeIt->second.first)); + leftMax = static_cast(static_cast(vrRangeIt->second.second)); + break; case 8: leftMin = static_cast(static_cast(vrRangeIt->second.first)); leftMax = static_cast(static_cast(vrRangeIt->second.second)); @@ -2393,10 +2416,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double rightMin = 0, rightMax = 0; switch (bitWidth) { - case 1: - rightMin = static_cast(vrRangeIt->second.first); - rightMax = static_cast(vrRangeIt->second.second); - break; + case 1: + rightMin = static_cast(vrRangeIt->second.first); + rightMax = static_cast(vrRangeIt->second.second); + break; case 8: rightMin = static_cast(vrRangeIt->second.first); rightMax = static_cast(vrRangeIt->second.second); @@ -2454,10 +2477,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double lowerBound, upperBound; switch (bitWidth) { - case 1: - lowerBound = constValue >> (static_cast(vrRangeIt->second.first)); - upperBound = constValue >> (static_cast(vrRangeIt->second.second)); - break; + case 1: + lowerBound = constValue >> (static_cast(vrRangeIt->second.first)); + upperBound = constValue >> (static_cast(vrRangeIt->second.second)); + break; case 8: lowerBound = constValue >> (static_cast(vrRangeIt->second.first)); upperBound = constValue >> (static_cast(vrRangeIt->second.second)); @@ -2502,10 +2525,10 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double resMin = 0, resMax = 0; switch (bitWidth) { - case 1: - resMin = (static_cast(vrRangeIt->second.first)) >> constValue; - resMax = (static_cast(vrRangeIt->second.second)) >> constValue; - break; + case 1: + resMin = (static_cast(vrRangeIt->second.first)) >> constValue; + resMax = (static_cast(vrRangeIt->second.second)) >> constValue; + break; case 8: resMin = (static_cast(vrRangeIt->second.first)) >> constValue; resMax = (static_cast(vrRangeIt->second.second)) >> constValue; @@ -2949,7 +2972,7 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, * and reinterpret it if necessary * */ unionAddress.emplace(llvmIrBitCastInstruction, llvmIrBitCastInstruction->getOperand(0)); - assert(llvmIrBitCastInstruction->getDestTy()->getTypeID() == Type::PointerTyID); + // assert(llvmIrBitCastInstruction->getDestTy()->getTypeID() == Type::PointerTyID); auto vrRangeIt = boundInfo->virtualRegisterRange.find(llvmIrBitCastInstruction->getOperand(0)); if (vrRangeIt != boundInfo->virtualRegisterRange.end()) { @@ -2962,13 +2985,25 @@ rangeAnalysis(State * N, llvm::Function & llvmIrFunction, BoundInfo * boundInfo, double originLow = vrRangeIt->second.first; double originHigh = vrRangeIt->second.second; double lowRange, highRange; - auto DestEleType = llvmIrBitCastInstruction->getDestTy()->getPointerElementType(); + Type * destType = llvmIrBitCastInstruction->getDestTy(); + if (!destType->isPointerTy()) + { + flexprint(N->Fe, N->Fm, N->Fpinfo, "\tBitCast: destination type is not pointer, skipping union reinterpretation.\n"); + boundInfo->virtualRegisterRange.emplace(llvmIrBitCastInstruction, vrRangeIt->second); + break; + } + auto DestEleType = destType->getPointerElementType(); + Type * srcType = llvmIrBitCastInstruction->getSrcTy(); + bool srcStructType = srcType->isStructTy(); + if (!srcStructType && srcType->isPointerTy()) + { + srcStructType = srcType->getPointerElementType()->isStructTy(); + } /* * if it's a structure type, we use reinterpret_cast * todo: not very sure, need further check * */ - if (llvmIrBitCastInstruction->getSrcTy()->isStructTy() || - llvmIrBitCastInstruction->getSrcTy()->getPointerElementType()->isStructTy()) + if (srcStructType) { switch (DestEleType->getTypeID()) { diff --git a/src/newton/newton-irPass-LLVMIR-shrinkTypeByRange.cpp b/src/newton/newton-irPass-LLVMIR-shrinkTypeByRange.cpp index 347bda48e..914961ef4 100644 --- a/src/newton/newton-irPass-LLVMIR-shrinkTypeByRange.cpp +++ b/src/newton/newton-irPass-LLVMIR-shrinkTypeByRange.cpp @@ -44,8 +44,7 @@ using namespace llvm; -extern "C" -{ +extern "C" { enum varType { INT1 = 1, @@ -62,28 +61,28 @@ enum varType { varType getIntegerTypeEnum(double min, double max, bool signFlag) { - varType finalType; - if ((!signFlag && max < UINT8_MAX) || (signFlag && min > INT8_MIN && max < INT8_MAX)) - { - finalType = INT8; - } - else if ((!signFlag && max < UINT16_MAX) || (signFlag && min > INT16_MIN && max < INT16_MAX)) - { - finalType = INT16; - } - else if ((!signFlag && max < UINT32_MAX) || (signFlag && min > INT32_MIN && max < INT32_MAX)) - { - finalType = INT32; - } - else if ((!signFlag && max < UINT64_MAX) || (signFlag && min > INT64_MIN && max < INT64_MAX)) - { - finalType = INT64; - } - else - { - finalType = UNKNOWN; - } - return finalType; + varType finalType; + if ((!signFlag && max < UINT8_MAX) || (signFlag && min > INT8_MIN && max < INT8_MAX)) + { + finalType = INT8; + } + else if ((!signFlag && max < UINT16_MAX) || (signFlag && min > INT16_MIN && max < INT16_MAX)) + { + finalType = INT16; + } + else if ((!signFlag && max < UINT32_MAX) || (signFlag && min > INT32_MIN && max < INT32_MAX)) + { + finalType = INT32; + } + else if ((!signFlag && max < UINT64_MAX) || (signFlag && min > INT64_MIN && max < INT64_MAX)) + { + finalType = INT64; + } + else + { + finalType = UNKNOWN; + } + return finalType; } #else /* @@ -122,13 +121,13 @@ varType getFloatingTypeEnum(double min, double max) { varType finalType; - if ((FLT_EPSILON < std::abs(min) && std::abs(min) < FLT_MAX) && - (FLT_EPSILON < std::abs(max) && std::abs(max) < FLT_MAX)) + if ((FLT_EPSILON < std::abs(min) && std::abs(min) < FLT_MAX) && + (FLT_EPSILON < std::abs(max) && std::abs(max) < FLT_MAX)) { finalType = FLOAT; } - else if ((DBL_EPSILON < std::abs(min) && std::abs(min) < DBL_MAX) && - (DBL_EPSILON < std::abs(max) && std::abs(max) < DBL_MAX)) + else if ((DBL_EPSILON < std::abs(min) && std::abs(min) < DBL_MAX) && + (DBL_EPSILON < std::abs(max) && std::abs(max) < DBL_MAX)) { finalType = DOUBLE; } @@ -282,7 +281,7 @@ getTypeInfo(State * N, Value * inValue, case Type::FloatTyID: break; case Type::DoubleTyID: -// typeInformation = getShrinkDoubleType(N, inValue, vrRangeIt->second); + // typeInformation = getShrinkDoubleType(N, inValue, vrRangeIt->second); break; default: break; @@ -324,11 +323,14 @@ getRawType(Type * inputType, std::vector indexValue = std::vectorgetNumContainedTypes()) { - eleType = stType->getContainedType(0); - } else { - return stType; - } + if (0 != stType->getNumContainedTypes()) + { + eleType = stType->getContainedType(0); + } + else + { + return stType; + } } else { @@ -470,10 +472,10 @@ rollbackType(State * N, Instruction * inInstruction, unsigned operandIdx, BasicB { newValue = Builder.CreateFPCast(valueInst, instPrevTypeInfo.valueType); } - else - { - newValue = Builder.CreateBitCast(valueInst, instPrevTypeInfo.valueType); - } + else + { + newValue = Builder.CreateBitCast(valueInst, instPrevTypeInfo.valueType); + } inInstruction->setOperand(operandIdx, newValue); typeChangedInst.emplace(newValue, instPrevTypeInfo); } @@ -489,10 +491,10 @@ rollbackType(State * N, Instruction * inInstruction, unsigned operandIdx, BasicB { newValue = Builder.CreateFPCast(valueInst, instPrevTypeInfo.valueType); } - else - { - newValue = Builder.CreateBitCast(valueInst, instPrevTypeInfo.valueType); - } + else + { + newValue = Builder.CreateBitCast(valueInst, instPrevTypeInfo.valueType); + } inInstruction->replaceUsesOfWith(valueInst, newValue); typeChangedInst.emplace(newValue, instPrevTypeInfo); } @@ -568,7 +570,7 @@ rollbackType(State * N, Instruction * inInstruction, unsigned operandIdx, BasicB bool isSignedValue(Value * inValue) { - // todo: get the sign bit from type system + // todo: get the sign bit from type system bool signFlag = true; if (Instruction * valueInst = llvm::dyn_cast(inValue)) { @@ -781,11 +783,12 @@ matchOperandType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasi Value * nonConstOperand = inInstruction->getOperand(nonConstOperandIdx); auto constType = constOperand->getType(); auto nonConstType = nonConstOperand->getType(); - bool nonConstSign = true; - auto tcIt = typeChangedInst.find(nonConstOperand); - if (tcIt != typeChangedInst.end()) { - nonConstSign = tcIt->second.signFlag; - } + bool nonConstSign = true; + auto tcIt = typeChangedInst.find(nonConstOperand); + if (tcIt != typeChangedInst.end()) + { + nonConstSign = tcIt->second.signFlag; + } if (!isa(constOperand)) { /* @@ -891,7 +894,7 @@ matchOperandType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasi { typeInfo backType; backType.valueType = constType; - backType.signFlag = nonConstSign; + backType.signFlag = nonConstSign; if (isa(inInstruction)) { backType.valueType = changeStoreInstSiblingType(backType.valueType, nonConstType); @@ -955,12 +958,13 @@ matchDestType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasicBl { unsigned ptAddressSpace = srcType->getPointerAddressSpace(); srcType = srcType->getPointerElementType(); - if (srcType->isAggregateType()) { - /* - * we don't shrink the aggregate type - * */ - return; - } + if (srcType->isAggregateType()) + { + /* + * we don't shrink the aggregate type + * */ + return; + } std::vector indexValue; for (size_t idx = 0; idx < inInstruction->getNumOperands() - 1; idx++) { @@ -985,11 +989,11 @@ matchDestType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasicBl typeInfo backType; backType.signFlag = isSignedValue(inInstruction); backType.valueType = inInstType; - if (isa(inInstruction)) - { - unsigned ptAddressSpace = srcType->getPointerAddressSpace(); - backType.valueType = backType.valueType->getPointerTo(ptAddressSpace); - } + if (isa(inInstruction)) + { + unsigned ptAddressSpace = srcType->getPointerAddressSpace(); + backType.valueType = backType.valueType->getPointerTo(ptAddressSpace); + } for (size_t id = 0; id < inInstruction->getNumOperands(); id++) { auto newTypeValue = rollbackType(N, inInstruction, id, llvmIrBasicBlock, typeChangedInst, backType); @@ -1039,13 +1043,13 @@ matchDestType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasicBl /* * roll back operands to typeInformation.valueType * */ - if (isa(inInstruction)) - { - unsigned ptAddressSpace = srcType->getPointerAddressSpace(); - typeInformation.valueType = typeInformation.valueType->getPointerTo(ptAddressSpace); - } - size_t roll_backed_op_num = isa(inInstruction) ? 1 : inInstruction->getNumOperands(); - for (size_t id = 0; id < roll_backed_op_num; id++) + if (isa(inInstruction)) + { + unsigned ptAddressSpace = srcType->getPointerAddressSpace(); + typeInformation.valueType = typeInformation.valueType->getPointerTo(ptAddressSpace); + } + size_t roll_backed_op_num = isa(inInstruction) ? 1 : inInstruction->getNumOperands(); + for (size_t id = 0; id < roll_backed_op_num; id++) { typeInfo operandPrevTypeInfo{typeInformation.valueType, isSignedValue(inInstruction->getOperand(id))}; @@ -1115,7 +1119,7 @@ shrinkInstructionType(State * N, Instruction * inInstruction, BasicBlock & llvmI typeInformation.valueType = typeInformation.valueType->getPointerElementType(); } - changed = true; + changed = true; auto inInstType = inInstruction->getType(); IRBuilder<> Builder(&llvmIrBasicBlock); Instruction * insertPoint = inInstruction->getNextNode(); @@ -1142,10 +1146,10 @@ shrinkInstructionType(State * N, Instruction * inInstruction, BasicBlock & llvmI { castInst = Builder.CreateFPCast(cloneInst, typeInformation.valueType); } - else - { - castInst = Builder.CreateBitCast(cloneInst, typeInformation.valueType); - } + else + { + castInst = Builder.CreateBitCast(cloneInst, typeInformation.valueType); + } auto vrIt = virtualRegisterRange.find(inInstruction); if (castInst != nullptr && vrIt != virtualRegisterRange.end()) { @@ -1276,66 +1280,82 @@ rollBackDependencyLink(State * N, const std::vector & depLink, * %srcInst = op bigType %a, %b * %inst = trunc bigType %srcInst to smallType * */ -bool matchCastType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasicBlock, - std::map> & virtualRegisterRange, - std::map & typeChangedInst) { - bool changed = false; - - auto inInstType = inInstruction->getType(); - auto srcInst = inInstruction->getOperand(0); - auto srcType = srcInst->getType(); - - // todo: get the sign bit from type system - bool signFlag = true; - auto tcIt = typeChangedInst.find(inInstruction); - if (tcIt != typeChangedInst.end()) { - signFlag = tcIt->second.signFlag; - } - - Value * castInst; - IRBuilder<> Builder(&llvmIrBasicBlock); - Builder.SetInsertPoint(inInstruction->getNextNode()); - - if (compareType(inInstType, srcType) > 0) { - if (inInstruction->getOpcode() == Instruction::Trunc) { - castInst = Builder.CreateIntCast(srcInst, inInstType, signFlag); - changed = true; - } else if (inInstruction->getOpcode() == Instruction::FPTrunc) { - castInst = Builder.CreateFPCast(srcInst, inInstType); - changed = true; - } - } else if (compareType(inInstType, srcType) > 0) { - if (inInstruction->getOpcode() == Instruction::ZExt || - inInstruction->getOpcode() == Instruction::SExt) { - castInst = Builder.CreateIntCast(srcInst, inInstType, signFlag); - changed = true; - } else if (inInstruction->getOpcode() == Instruction::FPExt) { - castInst = Builder.CreateFPCast(srcInst, inInstType); - changed = true; - } - } else { - /* mergeCast will do this */ - } - - if (!changed) { - return changed; - } - - auto vrIt = virtualRegisterRange.find(inInstruction); - if (castInst != nullptr && vrIt != virtualRegisterRange.end()) - { - virtualRegisterRange.emplace(castInst, vrIt->second); - } - - if (tcIt != typeChangedInst.end()) { - typeChangedInst.emplace(castInst, tcIt->second); - } - - Instruction * newCastInst = llvm::dyn_cast(castInst); - inInstruction->replaceAllUsesWith(newCastInst); - inInstruction->removeFromParent(); - - return changed; +bool +matchCastType(State * N, Instruction * inInstruction, BasicBlock & llvmIrBasicBlock, + std::map> & virtualRegisterRange, + std::map & typeChangedInst) +{ + bool changed = false; + + auto inInstType = inInstruction->getType(); + auto srcInst = inInstruction->getOperand(0); + auto srcType = srcInst->getType(); + + // todo: get the sign bit from type system + bool signFlag = true; + auto tcIt = typeChangedInst.find(inInstruction); + if (tcIt != typeChangedInst.end()) + { + signFlag = tcIt->second.signFlag; + } + + Value * castInst; + IRBuilder<> Builder(&llvmIrBasicBlock); + Builder.SetInsertPoint(inInstruction->getNextNode()); + + if (compareType(inInstType, srcType) > 0) + { + if (inInstruction->getOpcode() == Instruction::Trunc) + { + castInst = Builder.CreateIntCast(srcInst, inInstType, signFlag); + changed = true; + } + else if (inInstruction->getOpcode() == Instruction::FPTrunc) + { + castInst = Builder.CreateFPCast(srcInst, inInstType); + changed = true; + } + } + else if (compareType(inInstType, srcType) > 0) + { + if (inInstruction->getOpcode() == Instruction::ZExt || + inInstruction->getOpcode() == Instruction::SExt) + { + castInst = Builder.CreateIntCast(srcInst, inInstType, signFlag); + changed = true; + } + else if (inInstruction->getOpcode() == Instruction::FPExt) + { + castInst = Builder.CreateFPCast(srcInst, inInstType); + changed = true; + } + } + else + { + /* mergeCast will do this */ + } + + if (!changed) + { + return changed; + } + + auto vrIt = virtualRegisterRange.find(inInstruction); + if (castInst != nullptr && vrIt != virtualRegisterRange.end()) + { + virtualRegisterRange.emplace(castInst, vrIt->second); + } + + if (tcIt != typeChangedInst.end()) + { + typeChangedInst.emplace(castInst, tcIt->second); + } + + Instruction * newCastInst = llvm::dyn_cast(castInst); + inInstruction->replaceAllUsesWith(newCastInst); + inInstruction->eraseFromParent(); + + return changed; } /* @@ -1386,10 +1406,10 @@ shrinkInstType(State * N, BoundInfo * boundInfo, Function & llvmIrFunction) { castValue = Builder.CreateFPCast(paramOp, typeInformation.valueType); } - else - { - castValue = Builder.CreateBitCast(paramOp, typeInformation.valueType); - } + else + { + castValue = Builder.CreateBitCast(paramOp, typeInformation.valueType); + } auto vrIt = boundInfo->virtualRegisterRange.find(paramOp); if (castValue != nullptr && vrIt != boundInfo->virtualRegisterRange.end()) { @@ -1503,31 +1523,31 @@ shrinkInstType(State * N, BoundInfo * boundInfo, Function & llvmIrFunction) boundInfo->virtualRegisterRange, typeChangedInst); break; - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPExt: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - matchCastType(N, llvmIrInstruction, llvmIrBasicBlock, - boundInfo->virtualRegisterRange, - typeChangedInst); - /* - * update the llvmIrInstruction, - * maybe there's a better way - * - * question: why `--` get the next instruction? - * */ - llvmIrInstruction = &*itBB--; - llvmIrInstruction = &*itBB++; - shrinkInstructionType(N, llvmIrInstruction, llvmIrBasicBlock, - boundInfo->virtualRegisterRange, - typeChangedInst); - break; + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPExt: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + matchCastType(N, llvmIrInstruction, llvmIrBasicBlock, + boundInfo->virtualRegisterRange, + typeChangedInst); + /* + * update the llvmIrInstruction, + * maybe there's a better way + * + * question: why `--` get the next instruction? + * */ + llvmIrInstruction = &*itBB--; + llvmIrInstruction = &*itBB++; + shrinkInstructionType(N, llvmIrInstruction, llvmIrBasicBlock, + boundInfo->virtualRegisterRange, + typeChangedInst); + break; /* * the return type of storeInst is always void * the return type of cmpInst is always i1 @@ -1566,17 +1586,17 @@ shrinkInstType(State * N, BoundInfo * boundInfo, Function & llvmIrFunction) assert(retValue != nullptr && "return void"); if (funcRetType->isIntegerTy()) { - // todo: get the sign bit from type system + // todo: get the sign bit from type system castInst = Builder.CreateIntCast(retValue, funcRetType, true); } else if (funcRetType->isDoubleTy()) { castInst = Builder.CreateFPCast(retValue, funcRetType); } - else - { - castInst = Builder.CreateBitCast(retValue, funcRetType); - } + else + { + castInst = Builder.CreateBitCast(retValue, funcRetType); + } auto vrIt = boundInfo->virtualRegisterRange.find(retValue); if (castInst != nullptr && vrIt != boundInfo->virtualRegisterRange.end()) { @@ -1585,7 +1605,7 @@ shrinkInstType(State * N, BoundInfo * boundInfo, Function & llvmIrFunction) ReturnInst::Create(llvmIrReturnInstruction->getContext(), castInst, llvmIrReturnInstruction->getParent()); - llvmIrReturnInstruction->removeFromParent(); + llvmIrReturnInstruction->eraseFromParent(); } } break; @@ -1676,10 +1696,10 @@ mergeCast(State * N, Function & llvmIrFunction, Instruction * llvmIrInstruction = &*itBB++; switch (llvmIrInstruction->getOpcode()) { - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::SIToFP: - case Instruction::UIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPExt: @@ -1696,7 +1716,7 @@ mergeCast(State * N, Function & llvmIrFunction, if (sourceOp->getType() == llvmIrInstruction->getType()) { llvmIrInstruction->replaceAllUsesWith(sourceOp); - llvmIrInstruction->removeFromParent(); + llvmIrInstruction->eraseFromParent(); break; } auto sourceInst = llvm::dyn_cast(sourceOp); @@ -1724,23 +1744,23 @@ mergeCast(State * N, Function & llvmIrFunction, * */ Value * castInst; auto valueType = llvmIrInstruction->getType(); - if ((valueType->isFloatTy() || valueType->isDoubleTy()) && - sourceOperand->getType()->isIntegerTy()) - { - // float fa = (float)ia; - bool isSigned = sourceInst->getOpcode() == Instruction::SIToFP; - castInst = isSigned ? Builder.CreateSIToFP(sourceOperand, valueType) - : Builder.CreateUIToFP(sourceOperand, valueType); - } - else if (valueType->isIntegerTy() && - (sourceOperand->getType()->isFloatTy() || sourceOperand->getType()->isDoubleTy())) - { - // int iq = (int)fq; - bool isSigned = sourceInst->getOpcode() == Instruction::FPToSI; - castInst = isSigned ? Builder.CreateFPToSI(sourceOperand, valueType) - : Builder.CreateFPToUI(sourceOperand, valueType); - } - else if (valueType->isIntegerTy()) + if ((valueType->isFloatTy() || valueType->isDoubleTy()) && + sourceOperand->getType()->isIntegerTy()) + { + // float fa = (float)ia; + bool isSigned = sourceInst->getOpcode() == Instruction::SIToFP; + castInst = isSigned ? Builder.CreateSIToFP(sourceOperand, valueType) + : Builder.CreateUIToFP(sourceOperand, valueType); + } + else if (valueType->isIntegerTy() && + (sourceOperand->getType()->isFloatTy() || sourceOperand->getType()->isDoubleTy())) + { + // int iq = (int)fq; + bool isSigned = sourceInst->getOpcode() == Instruction::FPToSI; + castInst = isSigned ? Builder.CreateFPToSI(sourceOperand, valueType) + : Builder.CreateFPToUI(sourceOperand, valueType); + } + else if (valueType->isIntegerTy()) { castInst = Builder.CreateIntCast(sourceOperand, valueType, llvmIrInstruction->getOpcode() == Instruction::SExt); @@ -1749,10 +1769,10 @@ mergeCast(State * N, Function & llvmIrFunction, { castInst = Builder.CreateFPCast(sourceOperand, valueType); } - else - { - castInst = Builder.CreateBitCast(sourceOperand, valueType); - } + else + { + castInst = Builder.CreateBitCast(sourceOperand, valueType); + } auto vrIt = virtualRegisterRange.find(sourceOperand); if (castInst != nullptr && vrIt != virtualRegisterRange.end()) { @@ -1777,7 +1797,7 @@ mergeCast(State * N, Function & llvmIrFunction, llvmIrInstruction->replaceAllUsesWith(newCastInst); sourceInstVec.emplace_back(newCastInst); } - llvmIrInstruction->removeFromParent(); + llvmIrInstruction->eraseFromParent(); } else { @@ -1851,10 +1871,10 @@ countCastInst(State * N, Function & llvmIrFunction) { switch (llvmIrInstruction.getOpcode()) { - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::SIToFP: - case Instruction::UIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPExt: @@ -2039,157 +2059,173 @@ getDependencyLink(State * N, Function & llvmIrFunction) * */ void upDateInstSignFlag(State * N, Function & llvmIrFunction, - std::map> & virtualRegisterRange, - std::map & typeChangedInst) { - for (BasicBlock & llvmIrBasicBlock : llvmIrFunction) { - for (BasicBlock::iterator itBB = llvmIrBasicBlock.begin(); itBB != llvmIrBasicBlock.end();) { - Instruction *llvmIrInstruction = &*itBB++; - if (llvmIrInstruction->getNumOperands() < 2) { - continue; - } - auto lhs = llvmIrInstruction->getOperand(0); - auto rhs = llvmIrInstruction->getOperand(1); - auto lhsIt = typeChangedInst.find(lhs); - auto rhsIt = typeChangedInst.find(rhs); - if ((lhsIt != typeChangedInst.end() || rhsIt != typeChangedInst.end())) { - // debug info: to check the range of operands - auto vrLhsIt = virtualRegisterRange.find(lhs); - auto vrRhsIt = virtualRegisterRange.find(rhs); -// assert(vrLhsIt != virtualRegisterRange.end() && vrRhsIt != virtualRegisterRange.end()); - switch (llvmIrInstruction->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::Shl: - { - /* - * nsw/nuw - * Implement when meet - * */ -// if (lhsIt->second.signFlag || rhsIt->second.signFlag) { - if (llvmIrInstruction->hasNoUnsignedWrap()) { - /* - * change to `nsw` - * */ - llvmIrInstruction->setHasNoUnsignedWrap(false); - } -// } else { - if (llvmIrInstruction->hasNoSignedWrap()) { - /* - * change to `nuw` - * */ - llvmIrInstruction->setHasNoSignedWrap(false); - } -// } -// flexprint(N->Fe, N->Fm, N->Fperr, -// "\tupDateInstSignFlag with nsw/nuw: Not Implement!\n"); - break; - } - /* - * Different inst for signed/unsigned. - * Should also care about - * 1. the extent. - * 2. one operand is signed, the other is unsigned. - * Check the LLVM Ref: https://llvm.org/docs/LangRef.html#llvm-language-reference-manual - * Implement when meet. - * */ - case Instruction::SDiv: - { - if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) { - IRBuilder<> Builder(&llvmIrBasicBlock); - Builder.SetInsertPoint(llvmIrInstruction); - auto UDivInst = Builder.CreateUDiv(lhs, rhs); - llvmIrInstruction->replaceAllUsesWith(UDivInst); - llvmIrInstruction->removeFromParent(); - } - break; - } - case Instruction::SRem: - { - if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) { - IRBuilder<> Builder(&llvmIrBasicBlock); - Builder.SetInsertPoint(llvmIrInstruction); - auto URemInst = Builder.CreateURem(lhs, rhs); - llvmIrInstruction->replaceAllUsesWith(URemInst); - llvmIrInstruction->removeFromParent(); - } - break; - } - case Instruction::AShr: - { - if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) { - IRBuilder<> Builder(&llvmIrBasicBlock); - Builder.SetInsertPoint(llvmIrInstruction); - auto LShrInst = Builder.CreateLShr(lhs, rhs); - llvmIrInstruction->replaceAllUsesWith(LShrInst); - llvmIrInstruction->removeFromParent(); - } - break; - } - case Instruction::ICmp: - if (auto llvmIrICmpInstruction = dyn_cast(llvmIrInstruction)) - { - if (llvmIrICmpInstruction->isUnsigned()) { - break; - } - auto lhs = llvmIrICmpInstruction->getOperand(0); - auto rhs = llvmIrICmpInstruction->getOperand(1); - /* - * If either of the operand is constant, - * and the variable operand can only change from `signed` to `unsigned`, - * so we only care about when the variable operand is `unsigned`. - * Note: here's instruction is signed! - * if the constant operand is negative value, the `scf by range` should simplify it - * if the constant operand is positive value, we can use `unsigned` flag - * */ - if ((isa(lhs) && !isa(rhs))) - { - llvmIrICmpInstruction->swapOperands(); - lhs = llvmIrICmpInstruction->getOperand(0); - rhs = llvmIrICmpInstruction->getOperand(1); - } - if (!isa(lhs) && isa(rhs)) { - ConstantInt * constInt = llvm::dyn_cast(rhs); - assert(nullptr != constInt && "ICmp: it's not a const int!!!!!!!!!!!\n"); - if (constInt->getSExtValue() < 0) { - /* - * the `scf by range` should simplify it - * */ - break; - } - - auto originalPred = llvmIrICmpInstruction->getPredicate(); - llvmIrICmpInstruction->setPredicate(ICmpInst::getUnsignedPredicate(originalPred)); - } else if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) { - /* - * If both of the operands are variable with different sign bit, - * we check the range of them (if we can), e.g. - * - * %c = icmp slt i16 %a, %b - * - * if the %a is unsigned, but the max range is less than 32767, we can ignore it. - * otherwise, it overflows, and we should extend the operands, like, - * - * %c = sext i16 %a to i32 - * %d = sext i16 %b to i32 - * %e = icmp slt i32 %c, %d - * %f = trunc i32 %c to i16 - * %g = trunc i32 %d to i16 - * - * Then we replace the `%f`, `%g` to `%a`, `%b`. - * And also replace the `%e` to the previous icmp result. - * */ - auto originalPred = llvmIrICmpInstruction->getPredicate(); - llvmIrICmpInstruction->setPredicate(ICmpInst::getUnsignedPredicate(originalPred)); -// flexprint(N->Fe, N->Fm, N->Fperr, -// "\tupDateInstSignFlag ICmp with both variable: Not Implement!\n"); - } - break; - } - } - } - } - } + std::map> & virtualRegisterRange, + std::map & typeChangedInst) +{ + for (BasicBlock & llvmIrBasicBlock : llvmIrFunction) + { + for (BasicBlock::iterator itBB = llvmIrBasicBlock.begin(); itBB != llvmIrBasicBlock.end();) + { + Instruction * llvmIrInstruction = &*itBB++; + if (llvmIrInstruction->getNumOperands() < 2) + { + continue; + } + auto lhs = llvmIrInstruction->getOperand(0); + auto rhs = llvmIrInstruction->getOperand(1); + auto lhsIt = typeChangedInst.find(lhs); + auto rhsIt = typeChangedInst.find(rhs); + if ((lhsIt != typeChangedInst.end() || rhsIt != typeChangedInst.end())) + { + // debug info: to check the range of operands + auto vrLhsIt = virtualRegisterRange.find(lhs); + auto vrRhsIt = virtualRegisterRange.find(rhs); + // assert(vrLhsIt != virtualRegisterRange.end() && vrRhsIt != virtualRegisterRange.end()); + switch (llvmIrInstruction->getOpcode()) + { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + { + /* + * nsw/nuw + * Implement when meet + * */ + // if (lhsIt->second.signFlag || rhsIt->second.signFlag) { + if (llvmIrInstruction->hasNoUnsignedWrap()) + { + /* + * change to `nsw` + * */ + llvmIrInstruction->setHasNoUnsignedWrap(false); + } + // } else { + if (llvmIrInstruction->hasNoSignedWrap()) + { + /* + * change to `nuw` + * */ + llvmIrInstruction->setHasNoSignedWrap(false); + } + // } + // flexprint(N->Fe, N->Fm, N->Fperr, + // "\tupDateInstSignFlag with nsw/nuw: Not Implement!\n"); + break; + } + /* + * Different inst for signed/unsigned. + * Should also care about + * 1. the extent. + * 2. one operand is signed, the other is unsigned. + * Check the LLVM Ref: https://llvm.org/docs/LangRef.html#llvm-language-reference-manual + * Implement when meet. + * */ + case Instruction::SDiv: + { + if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) + { + IRBuilder<> Builder(&llvmIrBasicBlock); + Builder.SetInsertPoint(llvmIrInstruction); + auto UDivInst = Builder.CreateUDiv(lhs, rhs); + llvmIrInstruction->replaceAllUsesWith(UDivInst); + llvmIrInstruction->eraseFromParent(); + } + break; + } + case Instruction::SRem: + { + if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) + { + IRBuilder<> Builder(&llvmIrBasicBlock); + Builder.SetInsertPoint(llvmIrInstruction); + auto URemInst = Builder.CreateURem(lhs, rhs); + llvmIrInstruction->replaceAllUsesWith(URemInst); + llvmIrInstruction->eraseFromParent(); + } + break; + } + case Instruction::AShr: + { + if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) + { + IRBuilder<> Builder(&llvmIrBasicBlock); + Builder.SetInsertPoint(llvmIrInstruction); + auto LShrInst = Builder.CreateLShr(lhs, rhs); + llvmIrInstruction->replaceAllUsesWith(LShrInst); + llvmIrInstruction->eraseFromParent(); + } + break; + } + case Instruction::ICmp: + if (auto llvmIrICmpInstruction = dyn_cast(llvmIrInstruction)) + { + if (llvmIrICmpInstruction->isUnsigned()) + { + break; + } + auto lhs = llvmIrICmpInstruction->getOperand(0); + auto rhs = llvmIrICmpInstruction->getOperand(1); + /* + * If either of the operand is constant, + * and the variable operand can only change from `signed` to `unsigned`, + * so we only care about when the variable operand is `unsigned`. + * Note: here's instruction is signed! + * if the constant operand is negative value, the `scf by range` should simplify it + * if the constant operand is positive value, we can use `unsigned` flag + * */ + if ((isa(lhs) && !isa(rhs))) + { + llvmIrICmpInstruction->swapOperands(); + lhs = llvmIrICmpInstruction->getOperand(0); + rhs = llvmIrICmpInstruction->getOperand(1); + } + if (!isa(lhs) && isa(rhs)) + { + ConstantInt * constInt = llvm::dyn_cast(rhs); + assert(nullptr != constInt && "ICmp: it's not a const int!!!!!!!!!!!\n"); + if (constInt->getSExtValue() < 0) + { + /* + * the `scf by range` should simplify it + * */ + break; + } + + auto originalPred = llvmIrICmpInstruction->getPredicate(); + llvmIrICmpInstruction->setPredicate(ICmpInst::getUnsignedPredicate(originalPred)); + } + else if (!lhsIt->second.signFlag && !rhsIt->second.signFlag) + { + /* + * If both of the operands are variable with different sign bit, + * we check the range of them (if we can), e.g. + * + * %c = icmp slt i16 %a, %b + * + * if the %a is unsigned, but the max range is less than 32767, we can ignore it. + * otherwise, it overflows, and we should extend the operands, like, + * + * %c = sext i16 %a to i32 + * %d = sext i16 %b to i32 + * %e = icmp slt i32 %c, %d + * %f = trunc i32 %c to i16 + * %g = trunc i32 %d to i16 + * + * Then we replace the `%f`, `%g` to `%a`, `%b`. + * And also replace the `%e` to the previous icmp result. + * */ + auto originalPred = llvmIrICmpInstruction->getPredicate(); + llvmIrICmpInstruction->setPredicate(ICmpInst::getUnsignedPredicate(originalPred)); + // flexprint(N->Fe, N->Fm, N->Fperr, + // "\tupDateInstSignFlag ICmp with both variable: Not Implement!\n"); + } + break; + } + } + } + } + } } void @@ -2199,10 +2235,10 @@ shrinkType(State * N, BoundInfo * boundInfo, Function & llvmIrFunction) * 1. construct instruction dependency link * 2. work with roll back strategies * */ - std::map typeChangedInst = shrinkInstType(N, boundInfo, llvmIrFunction); + std::map typeChangedInst = shrinkInstType(N, boundInfo, llvmIrFunction); mergeCast(N, llvmIrFunction, boundInfo->virtualRegisterRange, typeChangedInst); - upDateInstSignFlag(N, llvmIrFunction, boundInfo->virtualRegisterRange, typeChangedInst); + upDateInstSignFlag(N, llvmIrFunction, boundInfo->virtualRegisterRange, typeChangedInst); } } diff --git a/src/newton/newton.c b/src/newton/newton.c index 742630eab..c0d8f4742 100644 --- a/src/newton/newton.c +++ b/src/newton/newton.c @@ -91,14 +91,13 @@ #include "newton-irPass-signalTypedefGenerationBackend.h" #include "newton-irPass-sensors.h" -extern char * gNewtonAstNodeStrings[kNoisyIrNodeTypeMax]; +extern char * gNewtonAstNodeStrings[kNoisyIrNodeTypeMax]; static State * processNewtonFileDimensionPass(char * filename); - void -processNewtonFile(State * N, char * filename) +processNewtonFile(State * N, char * filename) { TimeStampTraceMacro(kNewtonTimeStampKey); @@ -112,12 +111,12 @@ processNewtonFile(State * N, char * filename) */ N->newtonIrTopScope = commonSymbolTableAllocScope(N); - State * N_dim = processNewtonFileDimensionPass(filename); + State * N_dim = processNewtonFileDimensionPass(filename); N->newtonIrTopScope->firstDimension = N_dim->newtonIrTopScope->firstDimension; if (N->newtonIrTopScope->firstDimension == NULL) { - char * details; + char * details; asprintf(&details, "%s\n", EnoValidDimensions); newtonParserSemanticError(N, kNewtonIrNodeType_PnewtonDescription, details); @@ -131,7 +130,7 @@ processNewtonFile(State * N, char * filename) if (!(N->irPasses & kNewtonIrPassSensorsDisable)) { irPassSensors(N); - } + } if (N->irPasses & kNewtonIrPassDimensionalMatrixAnnotation) { @@ -204,10 +203,12 @@ processNewtonFile(State * N, char * filename) } if (N->irPasses & kNewtonirPassLLVMIROptimizeByRange) { - bool enableQuantization = N->irPasses & kNewtonirPassLLVMIRAutoQuantization; - bool enableOverload = N->irPasses & kNewtonirPassLLVMIREnableOverload; - bool enableBuiltinAssume = N->irPasses & kNewtonirPassLLVMIREnableBuiltinAssume; - irPassLLVMIROptimizeByRange(N, enableQuantization, enableOverload, enableBuiltinAssume); + bool enableQuantization = N->irPasses & kNewtonirPassLLVMIRAutoQuantization; + bool enableOverload = N->irPasses & kNewtonirPassLLVMIREnableOverload; + bool enableBuiltinAssume = N->irPasses & kNewtonirPassLLVMIREnableBuiltinAssume; + bool enableQuantDecider = N->irPasses & kNewtonirPassLLVMIRQuantDeciderEnabled; + irPassLLVMIROptimizeByRange(N, enableQuantization, enableOverload, + enableBuiltinAssume, enableQuantDecider); } /* * Dot backend. @@ -215,7 +216,7 @@ processNewtonFile(State * N, char * filename) if (N->irBackends & kNewtonIrBackendDot) { fprintf(stdout, "%s\n", irPassDotBackend(N, N->newtonIrTopScope, N->newtonIrRoot, gNewtonAstNodeStrings)); - } + } /* * Smt backend @@ -256,15 +257,13 @@ processNewtonFile(State * N, char * filename) if (N->mode & kCommonModeCallStatistics) { - uint64_t irNodeCount = 0, symbolTableNodeCount = 0; - + uint64_t irNodeCount = 0, symbolTableNodeCount = 0; timeStampDumpResidencies(N); - irNodeCount = irPassHelperIrSize(N, N->newtonIrRoot); + irNodeCount = irPassHelperIrSize(N, N->newtonIrRoot); symbolTableNodeCount = irPassHelperSymbolTableSize(N, N->newtonIrTopScope); - flexprint(N->Fe, N->Fm, N->Fpinfo, "Intermediate Representation Information:\n\n"); flexprint(N->Fe, N->Fm, N->Fpinfo, " IR node count : %llu\n", irNodeCount); flexprint(N->Fe, N->Fm, N->Fpinfo, " Symbol Table node count : %llu\n", symbolTableNodeCount); @@ -311,11 +310,10 @@ processNewtonFile(State * N, char * filename) } } -static State* +static State * processNewtonFileDimensionPass(char * filename) { - State * N = init(kCommonModeDefault); - + State * N = init(kCommonModeDefault); /* * In this case, put macro here since it needs 'N'