From 859cbe2447af079f45fe3e16cc557085437c5148 Mon Sep 17 00:00:00 2001
From: jhale <jhale@ip-172-31-72-110.ec2.internal>
Date: Thu, 2 Feb 2023 15:15:22 +0000
Subject: [PATCH 1/6] Setup for paper.

---
 compilers.yaml | 28 +++++++++++++++++++++++-----
 run.py         |  6 +++++-
 run.sh         | 13 +++++++++++++
 utils.py       |  6 +-----
 4 files changed, 42 insertions(+), 11 deletions(-)
 mode change 100644 => 100755 run.py
 create mode 100644 run.sh

diff --git a/compilers.yaml b/compilers.yaml
index 74676ba..8fd1681 100644
--- a/compilers.yaml
+++ b/compilers.yaml
@@ -1,9 +1,27 @@
-gcc-10:
+gcc-12:
   version:
-    - 10
+    - 12
   cpp:
-    - g++-10
+    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/g++ 
   cc:
-    - gcc-10
+    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/gcc
   flags:
-    - -Ofast -march=native -mprefer-vector-width=256
\ No newline at end of file
+    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=256
+    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=scalable
+    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-tree-vectorize
+    - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb
+    - -O2
+      
+clang-15:
+  version:
+    - 15
+  cc: 
+    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang
+  cpp: 
+    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang++
+  flags:
+    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -mprefer-vector-width=256
+    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb
+    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-slp-vectorize
+    - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb
+    - -O2
diff --git a/run.py b/run.py
old mode 100644
new mode 100755
index 2f7389a..e3471c8
--- a/run.py
+++ b/run.py
@@ -20,6 +20,9 @@
 
     parser.add_argument('--conf', dest='conf', type=str, default="compilers.yaml",
                         help="Configuration file describing the compilers and flags.")
+    
+    parser.add_argument('--output_file', dest='output_file', type=str, default="output/output.csv",
+                        help="Configuration file describing the compilers and flags.")
 
     parser.add_argument('--degree', dest='degree', default=range(1, 4), nargs='+',
                         help='Polynomial degree to evaluate the operators.')
@@ -55,9 +58,10 @@
     scalar_type = args.scalar_type
     mpi_size = args.mpi_size
     cell_type = args.cell_type
+    output_file = args.output_file
 
     machine = utils.machine_name()
-    out_file = utils.create_ouput(problem)
+    out_file = utils.create_output(problem, output_file)
     compilers = utils.parse_compiler_configuration(conf_file)
 
     # Set rank to 1 for matrix free, 2 otherwise
diff --git a/run.sh b/run.sh
new file mode 100644
index 0000000..48aa4aa
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash -l
+#SBATCH --job-name=mpi_job_test      # Job name
+#SBATCH --cpus-per-task=1            # Number of cores per MPI task 
+#SBATCH --nodes=1                    # Maximum number of nodes to be allocated
+#SBATCH --ntasks-per-node=8          # Maximum number of tasks on each node
+#SBATCH --output=mpi_test_%j.log     # Path to the standard output and error files relative to the working directory
+#SBATCH -p small
+
+spack env activate ffcx
+spack load cmake
+python3 run.py --problem Laplacian  --degree 8 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/laplacian_8_action.csv
+python3 run.py --problem Laplacian  --degree 3 --form_compiler=ffcx --global_size 10000000 --output_file=output/laplacian_3.csv
+python3 run.py --problem N1curl --degree 4 --form_compiler=ffcx --global_size 10000000 --output_file=output/n1curl_4.csv
diff --git a/utils.py b/utils.py
index 1fcf22c..0857fb0 100644
--- a/utils.py
+++ b/utils.py
@@ -41,14 +41,10 @@ def machine_name():
     return machine
 
 
-def create_ouput(problem):
+def create_output(problem, out_file):
     header = "machine,problem,compiler,version,flags,degree,fcomp,scalar,batch_size,rank,cell_type,ncells,time"
-    path = "output/"
-    out_file = path + str(problem) + ".txt"
 
     if not os.path.exists(out_file):
-        if not os.path.isdir(path):
-            os.mkdir(path)
         with open(out_file, "a") as f:
             f.write(header)
     return out_file

From 49a3783f2af75f9955dc9245d541ed1e7e48e56d Mon Sep 17 00:00:00 2001
From: Michal Habera <michal.habera@rafinex.com>
Date: Tue, 12 Dec 2023 15:23:49 +0000
Subject: [PATCH 2/6] Works with 0.7.2

---
 compilers.yaml       | 30 +++++++-----------------------
 ffcx/compile.py      | 30 +++++++++++++++---------------
 forms/Elasticity.ufl |  6 ++++--
 run.sh               |  5 ++---
 4 files changed, 28 insertions(+), 43 deletions(-)
 mode change 100644 => 100755 run.sh

diff --git a/compilers.yaml b/compilers.yaml
index 8fd1681..14af9b9 100644
--- a/compilers.yaml
+++ b/compilers.yaml
@@ -1,27 +1,11 @@
-gcc-12:
+gcc-13:
   version:
-    - 12
+    - 13
   cpp:
-    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/g++ 
+    - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/bin/g++
   cc:
-    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/gcc
+    - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/bin/gcc
   flags:
-    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=256
-    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=scalable
-    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-tree-vectorize
-    - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb
-    - -O2
-      
-clang-15:
-  version:
-    - 15
-  cc: 
-    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang
-  cpp: 
-    - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang++
-  flags:
-    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -mprefer-vector-width=256
-    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb
-    - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-slp-vectorize
-    - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb
-    - -O2
+    - -Ofast -mcpu=neoverse-v1
+    - -Ofast -mcpu=neoverse-v1 -fno-tree-vectorize
+    - -O2 -fno-tree-vectorize
diff --git a/ffcx/compile.py b/ffcx/compile.py
index 334cca3..b2a08ff 100644
--- a/ffcx/compile.py
+++ b/ffcx/compile.py
@@ -2,9 +2,9 @@
 from ffcx.codegeneration.backend import FFCXBackend
 from ffcx.analysis import analyze_ufl_objects
 from ffcx.ir.representation import compute_ir
-from ffcx.codegeneration.integrals import IntegralGenerator
-from ffcx.element_interface import create_element
-from ffcx.codegeneration.C.format_lines import format_indented_lines
+from ffcx.codegeneration.integral_generator import IntegralGenerator
+from ffcx.codegeneration.C.c_implementation import CFormatter
+from basix import create_element
 from ffcx.options import get_options
 import basix
 import ufl
@@ -80,13 +80,7 @@
 
 
 def compute_integral_body(ir, backend):
-    # Configure kernel generator
-    ig = IntegralGenerator(ir, backend)
-    # Generate code ast for the tabulate_tensor body
-    parts = ig.generate()
-    # Format code as string
-    body = format_indented_lines(parts.cs_format(ir.precision), 1)
-    return body
+    return parts
 
 
 def compile_form(form: ufl.Form, name: str,
@@ -121,8 +115,13 @@ def compile_form(form: ufl.Form, name: str,
     settings = {"scalar_type": scalar_type, "geom_type": geom_type}
     arguments = _arguments.format(**settings)
     signature = "inline void " + name + arguments
-    body = compute_integral_body(integral_ir, backend)
-    code = signature + " {\n" + body + "\n}\n"
+    # Configure kernel generator
+    ig = IntegralGenerator(integral_ir, backend)
+    # Generate code ast for the tabulate_tensor body
+    parts = ig.generate()
+    formatter = CFormatter(scalar_type)
+    body_c = formatter.c_format(parts)
+    code = signature + " {\n" + body_c + "\n}\n"
 
     return code
 
@@ -146,17 +145,18 @@ def generate_code(action, scalar_type, global_size, batch_size):
             [problem.a], parameters).form_data[0].num_coefficients
         rank = 2
 
-    element = create_element(problem.element)
+    print(dir(problem.element))
+    element = problem.element
     num_nodes = element.cell().num_vertices()
     geom_type = scalar_type.replace(' _Complex', '')
 
     if batch_size > 1:
-        headers = _headers_batched.format(dim=element.dim, global_size=global_size,
+        headers = _headers_batched.format(dim=element.value_shape()[0], global_size=global_size,
                                           scalar_type=scalar_type, rank=rank, geom_type=geom_type,
                                           batch_size=batch_size, num_nodes=num_nodes,
                                           num_coefficients=num_coefficients)
     else:
-        headers = _headers.format(dim=element.dim, global_size=global_size,
+        headers = _headers.format(dim=element.value_shape()[0], global_size=global_size,
                                   scalar_type=scalar_type, rank=rank, geom_type=geom_type,
                                   batch_size=batch_size, num_nodes=num_nodes, num_coefficients=num_coefficients)
 
diff --git a/forms/Elasticity.ufl b/forms/Elasticity.ufl
index 4f06939..c3de508 100644
--- a/forms/Elasticity.ufl
+++ b/forms/Elasticity.ufl
@@ -1,4 +1,6 @@
-from ufl import *
+import basix.ufl
+from ufl import (Mesh, FiniteElement, FunctionSpace, TrialFunction, TestFunction, Coefficient,
+                sym, grad, inner, dx, action, tetrahedron, VectorElement)
 
 element = VectorElement("Lagrange", $cell, $degree)
 mesh = Mesh(VectorElement("Lagrange", $cell, 1))
@@ -17,4 +19,4 @@ def eps(v):
 a = k*inner(eps(u), eps(v))*dx
 
 un = Coefficient(V)
-L = action(a, un)
\ No newline at end of file
+L = action(a, un)
diff --git a/run.sh b/run.sh
old mode 100644
new mode 100755
index 48aa4aa..acc25ca
--- a/run.sh
+++ b/run.sh
@@ -8,6 +8,5 @@
 
 spack env activate ffcx
 spack load cmake
-python3 run.py --problem Laplacian  --degree 8 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/laplacian_8_action.csv
-python3 run.py --problem Laplacian  --degree 3 --form_compiler=ffcx --global_size 10000000 --output_file=output/laplacian_3.csv
-python3 run.py --problem N1curl --degree 4 --form_compiler=ffcx --global_size 10000000 --output_file=output/n1curl_4.csv
+python3 run.py --problem Elasticity  --degree 2 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_2_action.csv
+python3 run.py --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv

From 07c9fee7d69d584c7fe541b66cb9c0961ab879d8 Mon Sep 17 00:00:00 2001
From: Michal Habera <michal.habera@rafinex.com>
Date: Tue, 12 Dec 2023 15:52:51 +0000
Subject: [PATCH 3/6] Fix bug

---
 ffcx/compile.py | 12 +++++-------
 ffcx/main.cpp   |  2 +-
 run.sh          |  4 ++--
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/ffcx/compile.py b/ffcx/compile.py
index b2a08ff..bc4d6ad 100644
--- a/ffcx/compile.py
+++ b/ffcx/compile.py
@@ -4,7 +4,7 @@
 from ffcx.ir.representation import compute_ir
 from ffcx.codegeneration.integral_generator import IntegralGenerator
 from ffcx.codegeneration.C.c_implementation import CFormatter
-from basix import create_element
+from basix.ufl import convert_ufl_element
 from ffcx.options import get_options
 import basix
 import ufl
@@ -112,8 +112,7 @@ def compile_form(form: ufl.Form, name: str,
         geom_type += str(batch_size)
         scalar_type += str(batch_size)
 
-    settings = {"scalar_type": scalar_type, "geom_type": geom_type}
-    arguments = _arguments.format(**settings)
+    arguments = _arguments.format(scalar_type=scalar_type, geom_type=geom_type)
     signature = "inline void " + name + arguments
     # Configure kernel generator
     ig = IntegralGenerator(integral_ir, backend)
@@ -145,18 +144,17 @@ def generate_code(action, scalar_type, global_size, batch_size):
             [problem.a], parameters).form_data[0].num_coefficients
         rank = 2
 
-    print(dir(problem.element))
-    element = problem.element
+    element = convert_ufl_element(problem.element)
     num_nodes = element.cell().num_vertices()
     geom_type = scalar_type.replace(' _Complex', '')
 
     if batch_size > 1:
-        headers = _headers_batched.format(dim=element.value_shape()[0], global_size=global_size,
+        headers = _headers_batched.format(dim=element.dim, global_size=global_size,
                                           scalar_type=scalar_type, rank=rank, geom_type=geom_type,
                                           batch_size=batch_size, num_nodes=num_nodes,
                                           num_coefficients=num_coefficients)
     else:
-        headers = _headers.format(dim=element.value_shape()[0], global_size=global_size,
+        headers = _headers.format(dim=element.dim, global_size=global_size,
                                   scalar_type=scalar_type, rank=rank, geom_type=geom_type,
                                   batch_size=batch_size, num_nodes=num_nodes, num_coefficients=num_coefficients)
 
diff --git a/ffcx/main.cpp b/ffcx/main.cpp
index 08ec743..ced55a6 100644
--- a/ffcx/main.cpp
+++ b/ffcx/main.cpp
@@ -61,4 +61,4 @@ int main(int argc, char *argv[])
   MPI_Finalize();
 
   return 0;
-}
\ No newline at end of file
+}
diff --git a/run.sh b/run.sh
index acc25ca..e26710b 100755
--- a/run.sh
+++ b/run.sh
@@ -8,5 +8,5 @@
 
 spack env activate ffcx
 spack load cmake
-python3 run.py --problem Elasticity  --degree 2 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_2_action.csv
-python3 run.py --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv
+python3 run.py --problem Elasticity  --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv
+python3 run.py --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 100000 --output_file=output/elasticity_6_action.csv

From 17a4b9534e11b71928db41d48f2760b340ca6d05 Mon Sep 17 00:00:00 2001
From: Michal Habera <michal.habera@rafinex.com>
Date: Tue, 12 Dec 2023 16:12:38 +0000
Subject: [PATCH 4/6] Tweak.

---
 run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run.sh b/run.sh
index e26710b..f44a828 100755
--- a/run.sh
+++ b/run.sh
@@ -9,4 +9,4 @@
 spack env activate ffcx
 spack load cmake
 python3 run.py --problem Elasticity  --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv
-python3 run.py --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 100000 --output_file=output/elasticity_6_action.csv
+python3 run.py --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv

From 3249b780d1fa2cdf3ded362bf776a750920cbe40 Mon Sep 17 00:00:00 2001
From: Michal Habera <michal.habera@rafinex.com>
Date: Thu, 14 Dec 2023 10:22:00 +0000
Subject: [PATCH 5/6] LLVM17 - results now match GCC13.

---
 compilers.yaml | 12 ++++++++++++
 run.sh         |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/compilers.yaml b/compilers.yaml
index 14af9b9..85950c6 100644
--- a/compilers.yaml
+++ b/compilers.yaml
@@ -9,3 +9,15 @@ gcc-13:
     - -Ofast -mcpu=neoverse-v1
     - -Ofast -mcpu=neoverse-v1 -fno-tree-vectorize
     - -O2 -fno-tree-vectorize
+
+llvm-17:
+  version:
+    - 17
+  cpp:
+    - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-13.2.0/llvm-17.0.4-ti5dvewumkoer6wsn6v4eipjrrkb4cg5/bin/clang++
+  cc:
+    - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-13.2.0/llvm-17.0.4-ti5dvewumkoer6wsn6v4eipjrrkb4cg5/bin/clang
+  flags:
+    - -Ofast -mcpu=neoverse-v1 -Wl,-rpath,/home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/lib64
+    - -Ofast -mcpu=neoverse-v1 -fno-vectorize -fno-slp-vectorize -Wl,-rpath,/home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/lib64
+    - -O2 -fno-vectorize -fno-slp-vectorize -Wl,-rpath,/home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/lib64
diff --git a/run.sh b/run.sh
index f44a828..123d1ba 100755
--- a/run.sh
+++ b/run.sh
@@ -6,7 +6,9 @@
 #SBATCH --output=mpi_test_%j.log     # Path to the standard output and error files relative to the working directory
 #SBATCH -p small
 
-spack env activate ffcx
 spack load cmake
+spack load gcc@13.2.0+binutils
+spack load llvm
+spack env activate ffcx
 python3 run.py --problem Elasticity  --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv
 python3 run.py --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv

From b32af42cdb46c880b40e80594a254be2edeaf7d9 Mon Sep 17 00:00:00 2001
From: Michal Habera <michal.habera@rafinex.com>
Date: Thu, 21 Dec 2023 14:57:27 +0000
Subject: [PATCH 6/6] Run script.

---
 run.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/run.sh b/run.sh
index 123d1ba..b4c55d6 100755
--- a/run.sh
+++ b/run.sh
@@ -10,5 +10,5 @@ spack load cmake
 spack load gcc@13.2.0+binutils
 spack load llvm
 spack env activate ffcx
-python3 run.py --problem Elasticity  --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv
-python3 run.py --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv
+python3 run.py --nrepeats 1 --problem Elasticity  --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv
+python3 run.py --nrepeats 1 --problem Elasticity  --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv