From 859cbe2447af079f45fe3e16cc557085437c5148 Mon Sep 17 00:00:00 2001 From: jhale Date: Thu, 2 Feb 2023 15:15:22 +0000 Subject: [PATCH 1/6] Setup for paper. --- compilers.yaml | 28 +++++++++++++++++++++++----- run.py | 6 +++++- run.sh | 13 +++++++++++++ utils.py | 6 +----- 4 files changed, 42 insertions(+), 11 deletions(-) mode change 100644 => 100755 run.py create mode 100644 run.sh diff --git a/compilers.yaml b/compilers.yaml index 74676ba..8fd1681 100644 --- a/compilers.yaml +++ b/compilers.yaml @@ -1,9 +1,27 @@ -gcc-10: +gcc-12: version: - - 10 + - 12 cpp: - - g++-10 + - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/g++ cc: - - gcc-10 + - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/gcc flags: - - -Ofast -march=native -mprefer-vector-width=256 \ No newline at end of file + - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=256 + - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=scalable + - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-tree-vectorize + - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb + - -O2 + +clang-15: + version: + - 15 + cc: + - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang + cpp: + - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang++ + flags: + - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -mprefer-vector-width=256 + - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb + - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-slp-vectorize + - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb + - -O2 diff --git a/run.py b/run.py old mode 100644 new mode 100755 index 2f7389a..e3471c8 --- a/run.py +++ b/run.py @@ -20,6 +20,9 @@ parser.add_argument('--conf', dest='conf', type=str, default="compilers.yaml", help="Configuration file describing the compilers and flags.") + + parser.add_argument('--output_file', dest='output_file', type=str, default="output/output.csv", + help="Configuration file describing the compilers and flags.") parser.add_argument('--degree', dest='degree', default=range(1, 4), nargs='+', help='Polynomial degree to evaluate the operators.') @@ -55,9 +58,10 @@ scalar_type = args.scalar_type mpi_size = args.mpi_size cell_type = args.cell_type + output_file = args.output_file machine = utils.machine_name() - out_file = utils.create_ouput(problem) + out_file = utils.create_output(problem, output_file) compilers = utils.parse_compiler_configuration(conf_file) # Set rank to 1 for matrix free, 2 otherwise diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..48aa4aa --- /dev/null +++ b/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash -l +#SBATCH --job-name=mpi_job_test # Job name +#SBATCH --cpus-per-task=1 # Number of cores per MPI task +#SBATCH --nodes=1 # Maximum number of nodes to be allocated +#SBATCH --ntasks-per-node=8 # Maximum number of tasks on each node +#SBATCH --output=mpi_test_%j.log # Path to the standard output and error files relative to the working directory +#SBATCH -p small + +spack env activate ffcx +spack load cmake +python3 run.py --problem Laplacian --degree 8 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/laplacian_8_action.csv +python3 run.py --problem Laplacian --degree 3 --form_compiler=ffcx --global_size 10000000 --output_file=output/laplacian_3.csv +python3 run.py --problem N1curl --degree 4 --form_compiler=ffcx --global_size 10000000 --output_file=output/n1curl_4.csv diff --git a/utils.py b/utils.py index 1fcf22c..0857fb0 100644 --- a/utils.py +++ b/utils.py @@ -41,14 +41,10 @@ def machine_name(): return machine -def create_ouput(problem): +def create_output(problem, out_file): header = "machine,problem,compiler,version,flags,degree,fcomp,scalar,batch_size,rank,cell_type,ncells,time" - path = "output/" - out_file = path + str(problem) + ".txt" if not os.path.exists(out_file): - if not os.path.isdir(path): - os.mkdir(path) with open(out_file, "a") as f: f.write(header) return out_file From 49a3783f2af75f9955dc9245d541ed1e7e48e56d Mon Sep 17 00:00:00 2001 From: Michal Habera Date: Tue, 12 Dec 2023 15:23:49 +0000 Subject: [PATCH 2/6] Works with 0.7.2 --- compilers.yaml | 30 +++++++----------------------- ffcx/compile.py | 30 +++++++++++++++--------------- forms/Elasticity.ufl | 6 ++++-- run.sh | 5 ++--- 4 files changed, 28 insertions(+), 43 deletions(-) mode change 100644 => 100755 run.sh diff --git a/compilers.yaml b/compilers.yaml index 8fd1681..14af9b9 100644 --- a/compilers.yaml +++ b/compilers.yaml @@ -1,27 +1,11 @@ -gcc-12: +gcc-13: version: - - 12 + - 13 cpp: - - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/g++ + - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/bin/g++ cc: - - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-12.2.0-huc2sescmw646r26mnp2vgu54rybd4kr/bin/gcc + - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/bin/gcc flags: - - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=256 - - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -msve-vector-bits=scalable - - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-tree-vectorize - - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb - - -O2 - -clang-15: - version: - - 15 - cc: - - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang - cpp: - - /home/jhale/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-12.2.0/llvm-15.0.7-yogi5jj4qamtiwozx7tj7b3imdf3nsku/bin/clang++ - flags: - - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -mprefer-vector-width=256 - - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb - - -Ofast -march=armv8.4-a+sve -mtune=neoverse-512tvb -fno-slp-vectorize - - -O2 -march=armv8.4-a+sve -mtune=neoverse-512tvb - - -O2 + - -Ofast -mcpu=neoverse-v1 + - -Ofast -mcpu=neoverse-v1 -fno-tree-vectorize + - -O2 -fno-tree-vectorize diff --git a/ffcx/compile.py b/ffcx/compile.py index 334cca3..b2a08ff 100644 --- a/ffcx/compile.py +++ b/ffcx/compile.py @@ -2,9 +2,9 @@ from ffcx.codegeneration.backend import FFCXBackend from ffcx.analysis import analyze_ufl_objects from ffcx.ir.representation import compute_ir -from ffcx.codegeneration.integrals import IntegralGenerator -from ffcx.element_interface import create_element -from ffcx.codegeneration.C.format_lines import format_indented_lines +from ffcx.codegeneration.integral_generator import IntegralGenerator +from ffcx.codegeneration.C.c_implementation import CFormatter +from basix import create_element from ffcx.options import get_options import basix import ufl @@ -80,13 +80,7 @@ def compute_integral_body(ir, backend): - # Configure kernel generator - ig = IntegralGenerator(ir, backend) - # Generate code ast for the tabulate_tensor body - parts = ig.generate() - # Format code as string - body = format_indented_lines(parts.cs_format(ir.precision), 1) - return body + return parts def compile_form(form: ufl.Form, name: str, @@ -121,8 +115,13 @@ def compile_form(form: ufl.Form, name: str, settings = {"scalar_type": scalar_type, "geom_type": geom_type} arguments = _arguments.format(**settings) signature = "inline void " + name + arguments - body = compute_integral_body(integral_ir, backend) - code = signature + " {\n" + body + "\n}\n" + # Configure kernel generator + ig = IntegralGenerator(integral_ir, backend) + # Generate code ast for the tabulate_tensor body + parts = ig.generate() + formatter = CFormatter(scalar_type) + body_c = formatter.c_format(parts) + code = signature + " {\n" + body_c + "\n}\n" return code @@ -146,17 +145,18 @@ def generate_code(action, scalar_type, global_size, batch_size): [problem.a], parameters).form_data[0].num_coefficients rank = 2 - element = create_element(problem.element) + print(dir(problem.element)) + element = problem.element num_nodes = element.cell().num_vertices() geom_type = scalar_type.replace(' _Complex', '') if batch_size > 1: - headers = _headers_batched.format(dim=element.dim, global_size=global_size, + headers = _headers_batched.format(dim=element.value_shape()[0], global_size=global_size, scalar_type=scalar_type, rank=rank, geom_type=geom_type, batch_size=batch_size, num_nodes=num_nodes, num_coefficients=num_coefficients) else: - headers = _headers.format(dim=element.dim, global_size=global_size, + headers = _headers.format(dim=element.value_shape()[0], global_size=global_size, scalar_type=scalar_type, rank=rank, geom_type=geom_type, batch_size=batch_size, num_nodes=num_nodes, num_coefficients=num_coefficients) diff --git a/forms/Elasticity.ufl b/forms/Elasticity.ufl index 4f06939..c3de508 100644 --- a/forms/Elasticity.ufl +++ b/forms/Elasticity.ufl @@ -1,4 +1,6 @@ -from ufl import * +import basix.ufl +from ufl import (Mesh, FiniteElement, FunctionSpace, TrialFunction, TestFunction, Coefficient, + sym, grad, inner, dx, action, tetrahedron, VectorElement) element = VectorElement("Lagrange", $cell, $degree) mesh = Mesh(VectorElement("Lagrange", $cell, 1)) @@ -17,4 +19,4 @@ def eps(v): a = k*inner(eps(u), eps(v))*dx un = Coefficient(V) -L = action(a, un) \ No newline at end of file +L = action(a, un) diff --git a/run.sh b/run.sh old mode 100644 new mode 100755 index 48aa4aa..acc25ca --- a/run.sh +++ b/run.sh @@ -8,6 +8,5 @@ spack env activate ffcx spack load cmake -python3 run.py --problem Laplacian --degree 8 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/laplacian_8_action.csv -python3 run.py --problem Laplacian --degree 3 --form_compiler=ffcx --global_size 10000000 --output_file=output/laplacian_3.csv -python3 run.py --problem N1curl --degree 4 --form_compiler=ffcx --global_size 10000000 --output_file=output/n1curl_4.csv +python3 run.py --problem Elasticity --degree 2 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_2_action.csv +python3 run.py --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv From 07c9fee7d69d584c7fe541b66cb9c0961ab879d8 Mon Sep 17 00:00:00 2001 From: Michal Habera Date: Tue, 12 Dec 2023 15:52:51 +0000 Subject: [PATCH 3/6] Fix bug --- ffcx/compile.py | 12 +++++------- ffcx/main.cpp | 2 +- run.sh | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/ffcx/compile.py b/ffcx/compile.py index b2a08ff..bc4d6ad 100644 --- a/ffcx/compile.py +++ b/ffcx/compile.py @@ -4,7 +4,7 @@ from ffcx.ir.representation import compute_ir from ffcx.codegeneration.integral_generator import IntegralGenerator from ffcx.codegeneration.C.c_implementation import CFormatter -from basix import create_element +from basix.ufl import convert_ufl_element from ffcx.options import get_options import basix import ufl @@ -112,8 +112,7 @@ def compile_form(form: ufl.Form, name: str, geom_type += str(batch_size) scalar_type += str(batch_size) - settings = {"scalar_type": scalar_type, "geom_type": geom_type} - arguments = _arguments.format(**settings) + arguments = _arguments.format(scalar_type=scalar_type, geom_type=geom_type) signature = "inline void " + name + arguments # Configure kernel generator ig = IntegralGenerator(integral_ir, backend) @@ -145,18 +144,17 @@ def generate_code(action, scalar_type, global_size, batch_size): [problem.a], parameters).form_data[0].num_coefficients rank = 2 - print(dir(problem.element)) - element = problem.element + element = convert_ufl_element(problem.element) num_nodes = element.cell().num_vertices() geom_type = scalar_type.replace(' _Complex', '') if batch_size > 1: - headers = _headers_batched.format(dim=element.value_shape()[0], global_size=global_size, + headers = _headers_batched.format(dim=element.dim, global_size=global_size, scalar_type=scalar_type, rank=rank, geom_type=geom_type, batch_size=batch_size, num_nodes=num_nodes, num_coefficients=num_coefficients) else: - headers = _headers.format(dim=element.value_shape()[0], global_size=global_size, + headers = _headers.format(dim=element.dim, global_size=global_size, scalar_type=scalar_type, rank=rank, geom_type=geom_type, batch_size=batch_size, num_nodes=num_nodes, num_coefficients=num_coefficients) diff --git a/ffcx/main.cpp b/ffcx/main.cpp index 08ec743..ced55a6 100644 --- a/ffcx/main.cpp +++ b/ffcx/main.cpp @@ -61,4 +61,4 @@ int main(int argc, char *argv[]) MPI_Finalize(); return 0; -} \ No newline at end of file +} diff --git a/run.sh b/run.sh index acc25ca..e26710b 100755 --- a/run.sh +++ b/run.sh @@ -8,5 +8,5 @@ spack env activate ffcx spack load cmake -python3 run.py --problem Elasticity --degree 2 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_2_action.csv -python3 run.py --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv +python3 run.py --problem Elasticity --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv +python3 run.py --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 100000 --output_file=output/elasticity_6_action.csv From 17a4b9534e11b71928db41d48f2760b340ca6d05 Mon Sep 17 00:00:00 2001 From: Michal Habera Date: Tue, 12 Dec 2023 16:12:38 +0000 Subject: [PATCH 4/6] Tweak. --- run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.sh b/run.sh index e26710b..f44a828 100755 --- a/run.sh +++ b/run.sh @@ -9,4 +9,4 @@ spack env activate ffcx spack load cmake python3 run.py --problem Elasticity --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv -python3 run.py --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 100000 --output_file=output/elasticity_6_action.csv +python3 run.py --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv From 3249b780d1fa2cdf3ded362bf776a750920cbe40 Mon Sep 17 00:00:00 2001 From: Michal Habera Date: Thu, 14 Dec 2023 10:22:00 +0000 Subject: [PATCH 5/6] LLVM17 - results now match GCC13. --- compilers.yaml | 12 ++++++++++++ run.sh | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/compilers.yaml b/compilers.yaml index 14af9b9..85950c6 100644 --- a/compilers.yaml +++ b/compilers.yaml @@ -9,3 +9,15 @@ gcc-13: - -Ofast -mcpu=neoverse-v1 - -Ofast -mcpu=neoverse-v1 -fno-tree-vectorize - -O2 -fno-tree-vectorize + +llvm-17: + version: + - 17 + cpp: + - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-13.2.0/llvm-17.0.4-ti5dvewumkoer6wsn6v4eipjrrkb4cg5/bin/clang++ + cc: + - /home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-13.2.0/llvm-17.0.4-ti5dvewumkoer6wsn6v4eipjrrkb4cg5/bin/clang + flags: + - -Ofast -mcpu=neoverse-v1 -Wl,-rpath,/home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/lib64 + - -Ofast -mcpu=neoverse-v1 -fno-vectorize -fno-slp-vectorize -Wl,-rpath,/home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/lib64 + - -O2 -fno-vectorize -fno-slp-vectorize -Wl,-rpath,/home/mhabera/spack/opt/spack/linux-amzn2-neoverse_v1/gcc-7.3.1/gcc-13.2.0-nxk6zirma34efpxq7yod2wuyjhwwagwh/lib64 diff --git a/run.sh b/run.sh index f44a828..123d1ba 100755 --- a/run.sh +++ b/run.sh @@ -6,7 +6,9 @@ #SBATCH --output=mpi_test_%j.log # Path to the standard output and error files relative to the working directory #SBATCH -p small -spack env activate ffcx spack load cmake +spack load gcc@13.2.0+binutils +spack load llvm +spack env activate ffcx python3 run.py --problem Elasticity --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv python3 run.py --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv From b32af42cdb46c880b40e80594a254be2edeaf7d9 Mon Sep 17 00:00:00 2001 From: Michal Habera Date: Thu, 21 Dec 2023 14:57:27 +0000 Subject: [PATCH 6/6] Run script. --- run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run.sh b/run.sh index 123d1ba..b4c55d6 100755 --- a/run.sh +++ b/run.sh @@ -10,5 +10,5 @@ spack load cmake spack load gcc@13.2.0+binutils spack load llvm spack env activate ffcx -python3 run.py --problem Elasticity --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv -python3 run.py --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv +python3 run.py --nrepeats 1 --problem Elasticity --degree 2 --form_compiler=ffcx --action --global_size 1000000 --output_file=output/elasticity_2_action.csv +python3 run.py --nrepeats 1 --problem Elasticity --degree 6 --form_compiler=ffcx --action --global_size 10000000 --output_file=output/elasticity_6_action.csv