diff --git a/fcm_bdiff.py b/fcm_bdiff.py deleted file mode 100644 index ec51ce23..00000000 --- a/fcm_bdiff.py +++ /dev/null @@ -1,262 +0,0 @@ -#!/usr/bin/env python3 -# *********************************COPYRIGHT************************************ -# (C) Crown copyright Met Office. All rights reserved. -# For further details please refer to the file COPYRIGHT.txt -# which you should have received as part of this distribution. -# *********************************COPYRIGHT************************************ -""" -This module provides the functionality to return a list of local files to -run tests on based on the branch-difference (to allow checking of only files -which a developer has actually modified on their branch) -""" - -import os -import re -import subprocess -import time - - -# ------------------------------------------------------------------------------ -class FCMError(Exception): - """ - Exception class for FCM commands - """ - - def __str__(self): - return '\nFCM command: "{0:s}"\nFailed with error: "{1:s}"'.format( - " ".join(self.args[0]), self.args[1].strip() - ) - - -# ------------------------------------------------------------------------------ -def is_trunk(url): - """ - Given an FCM url, returns True if it appears to be pointing to the - UM main trunk - """ - search = re.search( - r""" - (svn://fcm\d+/\w+_svn/\w+/trunk| - .*/svn/[\w\.]+/\w+/trunk| - ..*_svn/\w+/trunk) - """, - url, - flags=re.VERBOSE, - ) - return search is not None - - -# ------------------------------------------------------------------------------ -def text_decoder(bytes_type_string, codecs=["utf8", "cp1252"]): - """ - Given a bytes type string variable, attempt to decode it using the codecs - listed. - """ - - errors = [] - for codec in codecs: - try: - return bytes_type_string.decode(codec) - except UnicodeDecodeError as err: - errors.append(err) - - for error in errors: - print(error) - raise errors[0] - - -# ------------------------------------------------------------------------------ -def get_branch_info(branch, snooze=300, retries=0): - """ - Extract the output of the branch info command - (if the branch is the mirror, allow for a few retries in case - it hasn't picked up the latest commit yet) - """ - - command = ["fcm", "binfo", branch] - return run_fcm_command(command, retries, snooze) - - -# ------------------------------------------------------------------------------ -def get_bdiff_summarize(branch, snooze=300, retries=0): - """ - Extract the output of the branch diff command - (if the branch is the mirror, allow for a few retries in case - it hasn't picked up the latest commit yet) - """ - command = ["fcm", "bdiff", "--summarize", branch] - return run_fcm_command(command, retries, snooze) - - -# ------------------------------------------------------------------------------ -def get_branch_diff_filenames(branch=".", path_override=None): - """ - The main routine of this module, given the path to a working copy or the - URL of a branch (or simply run from within a working copy), returns a list - of filenames based on the FCM branch diff. In most cases it should try - to resolve to local filenames; - The base file path can be overridden, which may be helpful in suites. - If no working copy exists and the base path was not overridden, it will - return URLs in that case. - """ - - branch, retries = use_mirror(branch) - - # Get information about the branch - info = get_branch_info(branch, retries=retries) - - branch_url = get_url(info) - - # The branch should not be the trunk (a branch-diff would make no sense) - if is_trunk(branch_url): - print("{} appears to be the trunk, nothing to do!".format(branch_url)) - return [] - - # The branch parent should be the trunk; if it isn't assume this is a - # branch-of-branch (a test branch), and redirect the request to point at - # the parent branch - parent = get_branch_parent(info) - while not is_trunk(parent): - branch = parent - info = get_branch_info(branch, retries=retries) - parent = get_branch_parent(info) - - # The command `fcm bdiff --summarize ` returns a different - # format if the branch has been reversed off the trunk. The expected format - # is svn://fcm1/um.xm_svn/main/trunk/rose-stem/bin/suite_report.py - # but if it has been reversed then we get - # svn://fcm1/um.xm_svn/main/branches/dev/USER/BRANCH_NAME/PATH - # This results in an invalid path provided by relative_paths - bdiff = get_bdiff_summarize(branch, retries=retries) - - # Extract files from the bdiff that have been modified (M) or added (A). - # Strip whitespace, and remove blank lines while turning the output into - # a list of strings. - bdiff_files = [x.strip() for x in bdiff.split("\n") if x.strip()] - bdiff_files = [ - bfile.split()[1] - for bfile in bdiff_files - if bfile.split()[0].strip() == "M" or bfile.split()[0].strip() == "A" - ] - - # Convert the file paths to be relative to the current URL; to do this - # construct the base path of the trunk URL and compare it to the results - # of the bdiff command above - repos_root = get_repository_root(info) - relative_paths = [ - os.path.relpath(bfile, os.path.join(repos_root, "main", "trunk")) - for bfile in bdiff_files - ] - - # These relative paths can be joined to an appropriate base to complete - # the filenames to return - base_source_key = "SOURCE_UM_BASE" - if path_override is not None: - # Allows for 'user directed' path reconstruction. - # Particularly useful in rose stem. - base = path_override - bdiff_files = [os.path.join(base, bfile) for bfile in relative_paths] - elif base_source_key in os.environ: - # If running as a suite, the base path to the working copy can be used - # However, unless the suite task is running on a machine with the same - # path to the working copy, the task can't really make much use of - # this. - base = os.environ[base_source_key] - bdiff_files = [os.path.join(base, bfile) for bfile in relative_paths] - else: - # Otherwise stick to the original path/URL to the branch - bdiff_files = [os.path.join(branch, bfile) for bfile in relative_paths] - - return bdiff_files - - -# ------------------------------------------------------------------------------ -def run_fcm_command(command, max_retries, snooze): - """ - Run an fcm command, optionally retrying on failure. - """ - retries = 0 - while True: - result = subprocess.run( - command, - capture_output=True, - text=True, - timeout=120, - shell=False, - check=False, - ) - if result.returncode == 0: - return result.stdout - else: - retries += 1 - if retries > max_retries: - raise FCMError(command, result.stderr) - else: - time.sleep(snooze) - - -# ------------------------------------------------------------------------------ -def use_mirror(branch): - """ - Catch to work out if this is running as part of a suite using an - FCM mirror, if it is then redirect the request to the mirror. - If using the mirror then fcm calls can sometimes fail so specify a number - of retries for other routines to use. - - Returns updated branch URL and a number of retries - """ - - mirror_key = "SOURCE_UM_MIRROR" - if mirror_key in os.environ: - branch = os.environ[mirror_key] - retries = 2 - print(f"[INFO] Switching branch used for fcm command to: {branch}") - else: - retries = 0 - return branch, retries - - -# ------------------------------------------------------------------------------ -def get_repository_root(branch_info): - """ - Given the raw output from an fcm binfo command - which can be retrieved by - calling get_branch_info() - returns the Repository Root field - """ - repos_root = re.search( - r"^Repository Root:\s*(?P.*)\s*$", branch_info, flags=re.MULTILINE - ) - if repos_root: - repos_root = repos_root.group("url") - else: - raise Exception("Could not find Repository Root field") - return repos_root - - -# ------------------------------------------------------------------------------ -def get_branch_parent(branch_info): - """ - Given the raw output from an fcm binfo command - which can be retrieved by - calling get_branch_info() - returns the Branch Parent Field - """ - parent = re.search( - r"^Branch Parent:\s*(?P.*)$", branch_info, flags=re.MULTILINE - ) - if parent: - parent = parent.group("parent") - else: - raise Exception("Could not find Branch Parent field") - return parent - - -# ------------------------------------------------------------------------------ -def get_url(branch_info): - """ - Given the raw output from an fcm binfo command - which can be retrieved by - calling get_branch_info() - returns the URL field - """ - url = re.search(r"^URL:\s*(?P.*)$", branch_info, flags=re.MULTILINE) - if url: - url = url.group("url") - else: - raise Exception("Could not find URL field") - return url diff --git a/fcm_bdiff/__init__.py b/fcm_bdiff/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fcm_bdiff/fcm_bdiff.py b/fcm_bdiff/fcm_bdiff.py new file mode 100644 index 00000000..5e764e1f --- /dev/null +++ b/fcm_bdiff/fcm_bdiff.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python3 +# *********************************COPYRIGHT************************************ +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file COPYRIGHT.txt +# which you should have received as part of this distribution. +# *********************************COPYRIGHT************************************ +""" +This module provides the functionality to return a list of local files to +run tests on based on the branch-difference (to allow checking of only files +which a developer has actually modified on their branch) +""" + +import os +import re +import subprocess +import time +from pathlib import Path + + +# ------------------------------------------------------------------------------ +class FCMError(Exception): + """ + Exception class for FCM commands + """ + + def __str__(self): + return '\nFCM command: "{0:s}"\nFailed with error: "{1:s}"'.format( + " ".join(self.args[0]), self.args[1].strip() + ) + + +class FCMBase: + """Class which generates a branch diff.""" + + """ + This a modified (mangled) copy of the one Sam made in + bdiff/git_bdiff.py, to allow current scripts to try and migrate to + getting information from an instance of the same class. + Note that the version for Git has a small handful of methods, mostly + internal and some propeties. These are kept as close as possible to + version in git_bdiff.py. + Attributes used to navigate the horros of FCM and thus used in this + package are therefore preceded with an '_' and shouldn't be what is + being referred to outwith this class. Nor should the original + 'functions'... + """ + + # Name of primary branch - default is ~~main~~ Trunk, + # Not sure this will be required/used. It's just the git version defines it. + primary_branch = "trunk" + + def __init__(self, parent=None, repo=None): + """ + The 'git' version of this gets to assume 'repo' is a directory, + presumably containing a local 'clone' (of a fork of a repos). That + is not how we have worked previously with FCM, to which you could + give a path to a working copy, or a URL to a branch or the trunk on + the remote server. So, much of the initial stages here replicate the + kind of 'discovery' that was necessary for FCM that is hoped to + become outdated with Git. + """ + # use_mirror checks for SOURCE_UM_MIRROR env var, and if set + # redirects the branch to that value and sets retries. + # Otherwise it returns the branch unchanged and retries=0 + # Implies suite usage... + # _branch is the URL of the branch which, after the call to use_mirror, + # is the branch that was taken from the trunk (to avoid test branches etc) + self._branch, self._retries = self.use_mirror(repo or Path(".")) + self._branch_info = self.get_branch_info(retries=self._retries) + self._branch_url = self.get_url() + self._parent = self.get_branch_parent() + + # The branch parent(ancestor in git_bdiff) should be the trunk(main); if it isn't assume this is a + # branch-of-branch (a test branch), and redirect the request to point at + # the parent branch + while not self.is_trunk_test(self._parent): + self._branch = self._parent + self._branch_info = self.get_branch_info(retries=self._retries) + self._branch_url = self.get_url() + self._parent = self.get_branch_parent() + + def get_branch_name(self): + """ + Get the branch name from the branch URL. + Not sure how useful this will be in FCM world. + For now, define it to be the contants of the URL after .*/main/ has been + stripped off. i.e. it will start with trunk/... or branches/... + """ + pattern = rf"{self.get_repository_root()}/main/(.*)$" + match = re.match(pattern, self._branch_url) + if match: + result = match.group(1) + else: + raise FCMError("unable to get branch name") + return result + + def run_fcm_command(self, command, max_retries, snooze): + """ + Run an fcm command, optionally retrying on failure. + """ + retries = 0 + while True: + result = subprocess.run( + command, + capture_output=True, + text=True, + timeout=120, + shell=False, + check=False, + ) + if result.returncode == 0: + return result.stdout + else: + retries += 1 + if retries > max_retries: + raise FCMError(command, result.stderr) + else: + time.sleep(snooze) + + def use_mirror(self, branch): + """ + Catch to work out if this is running as part of a suite using an + FCM mirror, if it is then redirect the request to the mirror. + If using the mirror then fcm calls can sometimes fail so specify a number + of retries for other routines to use. + + Returns updated branch URL and a number of retries + """ + + mirror_key = "SOURCE_UM_MIRROR" + if mirror_key in os.environ: + branch = os.environ[mirror_key] + retries = 2 + print(f"[INFO] Switching branch used for fcm command to: {branch}") + else: + retries = 0 + return branch, retries + + def get_branch_info(self, snooze=300, retries=0): + """ + Extract the output of the branch info command + (if the branch is the mirror, allow for a few retries in case + it hasn't picked up the latest commit yet) + """ + + command = ["fcm", "binfo", self._branch] + branch_info = self.run_fcm_command(command, retries, snooze) + return branch_info + + def get_branch_parent(self): + """ + Given the raw output from an fcm binfo command - which can be retrieved by + calling get_branch_info() - returns the Branch Parent Field + """ + parent = re.search( + r"^Branch Parent:\s*(?P.*)$", + self._branch_info, + flags=re.MULTILINE, + ) + if parent: + parent = parent.group("parent") + else: + # Will end up here if _branch is the trunk. In which case we shold possibly return _branch? + parent = re.search( + r"^URL:\s*(?P.*)$", + self._branch_info, + flags=re.MULTILINE, + ) + if parent: + parent = parent.group("parent") + else: + raise Exception("Could not find Branch Parent field") + return parent + + def get_url(self): + """ + Given the raw output from an fcm binfo command - which can be retrieved + by calling get_branch_info() - returns the URL field + """ + url = re.search(r"^URL:\s*(?P.*)$", self._branch_info, flags=re.MULTILINE) + if url: + url = url.group("url") + else: + raise Exception("Could not find URL field") + return url + + def is_trunk_test(self, url): + """ + Given an FCM url, returns True if it appears to be pointing to the + UM main trunk + """ + search = re.search( + r""" + (svn://fcm\d+/\w+_svn/\w+/trunk| + .*/svn/[\w\.]+/\w+/trunk| + ..*_svn/\w+/trunk) + """, + url, + flags=re.VERBOSE, + ) + return search is not None + + def get_repository_root(self): + """ + Given the raw output from an fcm binfo command - which can be retrieved by + calling get_branch_info() - returns the Repository Root field + """ + repos_root = re.search( + r"^Repository Root:\s*(?P.*)\s*$", + self._branch_info, + flags=re.MULTILINE, + ) + if repos_root: + repos_root = repos_root.group("url") + else: + raise Exception("Could not find Repository Root field") + return repos_root + + def get_latest_commit(self): + """ + Given the raw output from an fcm binfo command - which can be retrieved by + calling get_branch_info() - returns the Last Changed Rev + """ + repos_rev = re.search( + r"^Last Changed Rev:\s*(?P.*)\s*$", + self._branch_info, + flags=re.MULTILINE, + ) + if repos_rev: + repos_rev = repos_rev.group("rev") + else: + raise Exception("Could not find Last Changed Rev field") + return repos_rev + + +# -------------------------------------------------------------------- +class FCMBDiff(FCMBase): + """Class which generates a branch diff.""" + + def __init__(self, parent=None, repo=None): + super().__init__(parent, repo) + self.parent = parent or self._parent + self.ancestor = self.get_branch_parent() + self.current = self.get_latest_commit() + self.branch = self.get_branch_name() + self.is_trunk = self.is_trunk_test(self._branch_url) + self.is_branch = not self.is_trunk + self.repos_root = self.get_repository_root() + + @property + def has_diverged(self): + """Whether the branch has diverged from its parent. + Bit vague here, so we're going to check to see if 'parent' had + an '@' in it denoting it's a branch of """ + match = re.match(r".*@(\d+)$", self.parent) + if match: + return True + else: + return False + + def files(self): + """Iterate over files changed on the branch.""" + dem_danged_files = self._get_files() + for line in dem_danged_files: + if line != "": + yield line + + def _get_files(self, path_override=None): + # The command `fcm bdiff --summarize ` returns a different + # format if the branch has been reversed off the trunk. The expected format + # is svn://fcm1/um.xm_svn/main/trunk/rose-stem/bin/suite_report.py + # but if it has been reversed then we get + # svn://fcm1/um.xm_svn/main/branches/dev/USER/BRANCH_NAME/PATH + # This results in an invalid path provided by relative_paths + bdiff = self.get_bdiff_summarize(retries=self._retries) + + # Extract files from the bdiff that have been modified (M) or added (A). + # Strip whitespace, and remove blank lines while turning the output into + # a list of strings. + bdiff_files = [x.strip() for x in bdiff.split("\n") if x.strip()] + bdiff_files = [ + bfile.split()[1] + for bfile in bdiff_files + if bfile.split()[0].strip() == "M" or bfile.split()[0].strip() == "A" + ] + + # Convert the file paths to be relative to the current URL; to do this + # construct the base path of the trunk URL and compare it to the results + # of the bdiff command above + repos_root = self.repos_root + relative_paths = [ + os.path.relpath(bfile, os.path.join(repos_root, "main", "trunk")) + for bfile in bdiff_files + ] + + # These relative paths can be joined to an appropriate base to complete + # the filenames to return + base_source_key = "SOURCE_UM_BASE" + if path_override is not None: + # Allows for 'user directed' path reconstruction. + # Particularly useful in rose stem. + base = path_override + bdiff_files = [os.path.join(base, bfile) for bfile in relative_paths] + elif base_source_key in os.environ: + # If running as a suite, the base path to the working copy can be used + # However, unless the suite task is running on a machine with the same + # path to the working copy, the task can't really make much use of + # this. + base = os.environ[base_source_key] + bdiff_files = [os.path.join(base, bfile) for bfile in relative_paths] + else: + # Otherwise stick to the original path/URL to the branch + bdiff_files = [ + os.path.join(self._branch, bfile) for bfile in relative_paths + ] + + return bdiff_files + + def get_bdiff_summarize(self, snooze=300, retries=0): + """ + Extract the output of the branch diff command + (if the branch is the mirror, allow for a few retries in case + it hasn't picked up the latest commit yet) + """ + command = ["fcm", "bdiff", "--summarize", self._branch] + return self.run_fcm_command(command, retries, snooze) + + +class FCMInfo(FCMBase): + """Class to hold FCM branch information. Mirroring the functionality + in the git_bdiff.GitBranchInfo class.""" + + def __init__(self, branch_info: str): + super().__init__(self, repo=None) + self.branch_name = self.get_branch_name() + + def is_main(self) -> bool: + """Return True if the branch is the main trunk.""" + return self.is_trunk_test(self._branch_url) diff --git a/script_umdp3_checker/__init__.py b/script_umdp3_checker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/script_umdp3_checker/bin/UMDP3.pm b/script_umdp3_checker/bin/UMDP3.pm deleted file mode 100644 index 1c6fcffd..00000000 --- a/script_umdp3_checker/bin/UMDP3.pm +++ /dev/null @@ -1,1769 +0,0 @@ -# *****************************COPYRIGHT******************************* -# (C) Crown copyright Met Office. All rights reserved. -# For further details please refer to the file LICENSE -# which you should have received as part of this distribution. -# *****************************COPYRIGHT******************************* - -package UMDP3; - -# Package to contain subroutines which test for UMDP3 compliance. - -# Each subroutine has a standard interface: -# Input: Array of lines to test -# Output: Scalar value 0=pass, >0 = fail - -# Subroutines which don't obey this interface: -# get_include_number - returns the value of a variable scoped to this file -# (number of files using includes for variable declarations) -# remove_quoted - returns the input string having removed any quoted -# substrings (single or double). - -# Standard modules -use strict; -use warnings; -use 5.010; -use Text::Balanced qw(extract_quotelike extract_multiple); - -# Declare version - this is the last UM version this script was updated for: -our $VERSION = '13.5.0'; - -# Global variables - -my $number_of_files_with_variable_declarations_in_includes = 0; - -sub get_include_number { - return $number_of_files_with_variable_declarations_in_includes; -} - -sub remove_quoted { - my $line = shift; - - # Replace quoted strings with a blessed reference: - my @strings = extract_multiple( - $line, - [ - { - Quoted => sub { extract_quotelike( $_[0] ) } - }, - ] - ); - - # Stitch the non-quoted fields back together into a single string: - my $remainder = ""; - foreach my $string (@strings) { - $remainder .= $string if not( $string =~ /^Quoted=SCALAR/sxm ); - } - return $remainder; -} - -my @fortran_keywords = ( - 'ABORT', 'ABS', - 'ABSTRACT', 'ACCESS', - 'ACHAR', 'ACOS', - 'ACOSD', 'ACOSH', - 'ACTION', 'ADJUSTL', - 'ADJUSTR', 'ADVANCE', - 'AIMAG', 'AINT', - 'ALARM', 'ALGAMA', - 'ALL', 'ALLOCATABLE', - 'ALLOCATE', 'ALLOCATED', - 'ALOG', 'ALOG10', - 'AMAX0', 'AMAX1', - 'AMIN0', 'AMIN1', - 'AMOD', 'AND', - 'ANINT', 'ANY', - 'ASIN', 'ASIND', - 'ASINH', 'ASSIGN', - 'ASSIGNMENT', 'ASSOCIATE', - 'ASSOCIATED', 'ASYNCHRONOUS', - 'ATAN', 'ATAN2', - 'ATAN2D', 'ATAND', - 'ATANH', 'ATOMIC_ADD', - 'ATOMIC_AND', 'ATOMIC_CAS', - 'ATOMIC_DEFINE', 'ATOMIC_FETCH_ADD', - 'ATOMIC_FETCH_AND', 'ATOMIC_FETCH_OR', - 'ATOMIC_FETCH_XOR', 'ATOMIC_INT_KIND', - 'ATOMIC_LOGICAL_KIND', 'ATOMIC_OR', - 'ATOMIC_REF', 'ATOMIC_XOR', - 'BACKSPACE', 'BACKTRACE', - 'BESJ0', 'BESJ1', - 'BESJN', 'BESSEL_J0', - 'BESSEL_J1', 'BESSEL_JN', - 'BESSEL_Y0', 'BESSEL_Y1', - 'BESSEL_YN', 'BESY0', - 'BESY1', 'BESYN', - 'BGE', 'BGT', - 'BIND', 'BIT_SIZE', - 'BLANK', 'BLE', - 'BLOCK', 'BLT', - 'BTEST', 'CABS', - 'CALL', 'CASE', - 'CCOS', 'CDABS', - 'CDCOS', 'CDEXP', - 'CDLOG', 'CDSIN', - 'CDSQRT', 'CEILING', - 'CEXP', 'CHAR', - 'CHARACTER', 'CHARACTER_KINDS', - 'CHARACTER_STORAGE_SIZE', 'CHDIR', - 'CHMOD', 'CLASS', - 'CLOG', 'CLOSE', - 'CMPLX', 'CODIMENSION', - 'COMMAND_ARGUMENT_COUNT', 'COMMON', - 'COMPILER_OPTIONS', 'COMPILER_VERSION', - 'COMPLEX', 'CONCURRENT', - 'CONJG', 'CONTAINS', - 'CONTIGUOUS', 'CONTINUE', - 'CONVERT', 'COS', - 'COSD', 'COSH', - 'COTAN', 'COTAND', - 'COUNT', 'CO_BROADCAST', - 'CO_MAX', 'CO_MIN', - 'CO_REDUCE', 'CO_SUM', - 'CPP', 'CPU_TIME', - 'CQABS', 'CQCOS', - 'CQEXP', 'CQLOG', - 'CQSIN', 'CQSQRT', - 'CSHIFT', 'CSIN', - 'CSQRT', 'CTIME', - 'CYCLE', 'C_ALERT', - 'C_ASSOCIATED', 'C_BACKSPACE', - 'C_BOOL', 'C_CARRIAGE_RETURN', - 'C_CHAR', 'C_DOUBLE', - 'C_DOUBLE_COMPLEX', 'C_FLOAT', - 'C_FLOAT128', 'C_FLOAT128_COMPLEX', - 'C_FLOAT_COMPLEX', 'C_FORM_FEED', - 'C_FUNLOC', 'C_FUNPTR', - 'C_F_POINTER', 'C_F_PROCPOINTER', - 'C_HORIZONTAL_TAB', 'C_INT', - 'C_INT128_T', 'C_INT16_T', - 'C_INT32_T', 'C_INT64_T', - 'C_INT8_T', 'C_INTMAX_T', - 'C_INTPTR_T', 'C_INT_FAST128_T', - 'C_INT_FAST16_T', 'C_INT_FAST32_T', - 'C_INT_FAST64_T', 'C_INT_FAST8_T', - 'C_INT_LEAST128_T', 'C_INT_LEAST16_T', - 'C_INT_LEAST32_T', 'C_INT_LEAST64_T', - 'C_INT_LEAST8_T', 'C_LOC', - 'C_LONG', 'C_LONG_DOUBLE', - 'C_LONG_DOUBLE_COMPLEX', 'C_LONG_LONG', - 'C_NEW_LINE', 'C_NULL_CHAR', - 'C_NULL_FUNPTR', 'C_NULL_PTR', - 'C_PTR', 'C_PTRDIFF_T', - 'C_SHORT', 'C_SIGNED_CHAR', - 'C_SIZEOF', 'C_SIZE_T', - 'C_VERTICAL_TAB', 'DABS', - 'DACOS', 'DACOSH', - 'DASIN', 'DASINH', - 'DATA', 'DATAN', - 'DATAN2', 'DATANH', - 'DATE_AND_TIME', 'DBESJ0', - 'DBESJ1', 'DBESJN', - 'DBESY0', 'DBESY1', - 'DBESYN', 'DBLE', - 'DCMPLX', 'DCONJG', - 'DCOS', 'DCOSH', - 'DDIM', 'DEALLOCATE', - 'DECODE', 'DEFERRED', - 'DELIM', 'DERF', - 'DERFC', 'DEXP', - 'DFLOAT', 'DGAMMA', - 'DIGITS', 'DIM', - 'DIMAG', 'DIMENSION', - 'DINT', 'DIRECT', - 'DLGAMA', 'DLOG', - 'DLOG10', 'DMAX1', - 'DMIN1', 'DMOD', - 'DNINT', 'DO', - 'DOT_PRODUCT', 'DOUBLE', - 'DPROD', 'DREAL', - 'DSHIFTL', 'DSHIFTR', - 'DSIGN', 'DSIN', - 'DSINH', 'DSQRT', - 'DTAN', 'DTANH', - 'DTIME', 'ELEMENTAL', - 'ELSE', 'ENCODE', - 'END', 'ENTRY', - 'ENUM', 'ENUMERATOR', - 'EOR', 'EOSHIFT', - 'EPSILON', 'EQ', - 'EQUIVALENCE', 'EQV', - 'ERF', 'ERFC', - 'ERFC_SCALED', 'ERRMSG', - 'ERROR', 'ERROR_UNIT', - 'ETIME', 'EVENT_QUERY', - 'EXECUTE_COMMAND_LINE', 'EXIST', - 'EXIT', 'EXP', - 'EXPONENT', 'EXTENDS', - 'EXTENDS_TYPE_OF', 'EXTERNAL', - 'FALSE', 'FDATE', - 'FGET', 'FGETC', - 'FILE', 'FILE_STORAGE_SIZE', - 'FINAL', 'FLOAT', - 'FLOOR', 'FLUSH', - 'FMT', 'FNUM', - 'FORALL', 'FORM', - 'FORMAT', 'FORMATTED', - 'FPP', 'FPUT', - 'FPUTC', 'FRACTION', - 'FREE', 'FSEEK', - 'FSTAT', 'FTELL', - 'FUNCTION', 'GAMMA', - 'GE', 'GENERIC', - 'GERROR', 'GETARG', - 'GETCWD', 'GETENV', - 'GETGID', 'GETLOG', - 'GETPID', 'GETUID', - 'GET_COMMAND', 'GET_COMMAND_ARGUMENT', - 'GET_ENVIRONMENT_VARIABLE', 'GMTIME', - 'GO', 'GT', - 'HOSTNM', 'HUGE', - 'HYPOT', 'IABS', - 'IACHAR', 'IALL', - 'IAND', 'IANY', - 'IARGC', 'IBCLR', - 'IBITS', 'IBSET', - 'ICHAR', 'IDATE', - 'IDIM', 'IDINT', - 'IDNINT', 'IEEE_CLASS', - 'IEEE_CLASS_TYPE', 'IEEE_COPY_SIGN', - 'IEEE_IS_FINITE', 'IEEE_IS_NAN', - 'IEEE_IS_NEGATIVE', 'IEEE_IS_NORMAL', - 'IEEE_LOGB', 'IEEE_NEGATIVE_DENORMAL', - 'IEEE_NEGATIVE_INF', 'IEEE_NEGATIVE_NORMAL', - 'IEEE_NEGATIVE_ZERO', 'IEEE_NEXT_AFTER', - 'IEEE_POSITIVE_DENORMAL', 'IEEE_POSITIVE_INF', - 'IEEE_POSITIVE_NORMAL', 'IEEE_POSITIVE_ZERO', - 'IEEE_QUIET_NAN', 'IEEE_REM', - 'IEEE_RINT', 'IEEE_SCALB', - 'IEEE_SELECTED_REAL_KIND', 'IEEE_SIGNALING_NAN', - 'IEEE_SUPPORT_DATATYPE', 'IEEE_SUPPORT_DENORMAL', - 'IEEE_SUPPORT_DIVIDE', 'IEEE_SUPPORT_INF', - 'IEEE_SUPPORT_NAN', 'IEEE_SUPPORT_SQRT', - 'IEEE_SUPPORT_STANDARD', 'IEEE_UNORDERED', - 'IEEE_VALUE', 'IEOR', - 'IERRNO', 'IF', - 'IFIX', 'IMAG', - 'IMAGES', 'IMAGE_INDEX', - 'IMAGPART', 'IMPLICIT', - 'IMPORT', 'IN', - 'INCLUDE', 'INDEX', - 'INPUT_UNIT', 'INQUIRE', - 'INT', 'INT16', - 'INT2', 'INT32', - 'INT64', 'INT8', - 'INTEGER', 'INTEGER_KINDS', - 'INTENT', 'INTERFACE', - 'INTRINSIC', 'IOMSG', - 'IOR', 'IOSTAT', - 'IOSTAT_END', 'IOSTAT_EOR', - 'IOSTAT_INQUIRE_INTERNAL_UNIT', 'IPARITY', - 'IQINT', 'IRAND', - 'IS', 'ISATTY', - 'ISHFT', 'ISHFTC', - 'ISIGN', 'ISNAN', - 'ISO_C_BINDING', 'ISO_FORTRAN_ENV', - 'IS_IOSTAT_END', 'IS_IOSTAT_EOR', - 'ITIME', 'KILL', - 'KIND', 'LBOUND', - 'LCOBOUND', 'LE', - 'LEADZ', 'LEN', - 'LEN_TRIM', 'LGAMMA', - 'LGE', 'LGT', - 'LINK', 'LLE', - 'LLT', 'LNBLNK', - 'LOC', 'LOCK', - 'LOCK_TYPE', 'LOG', - 'LOG10', 'LOGICAL', - 'LOGICAL_KINDS', 'LOG_GAMMA', - 'LONG', 'LSHIFT', - 'LSTAT', 'LT', - 'LTIME', 'MALLOC', - 'MASKL', 'MASKR', - 'MATMUL', 'MAX', - 'MAX0', 'MAX1', - 'MAXEXPONENT', 'MAXLOC', - 'MAXVAL', 'MCLOCK', - 'MCLOCK8', 'MEMORY', - 'MERGE', 'MERGE_BITS', - 'MIN', 'MIN0', - 'MIN1', 'MINEXPONENT', - 'MINLOC', 'MINVAL', - 'MOD', 'MODULE', - 'MODULO', 'MOVE_ALLOC', - 'MVBITS', 'NAME', - 'NAMED', 'NAMELIST', - 'NE', 'NEAREST', - 'NEQV', 'NEW_LINE', - 'NEXTREC', 'NINT', - 'NML', 'NONE', - 'NON_INTRINSIC', 'NON_OVERRIDABLE', - 'NOPASS', 'NORM2', - 'NOT', 'NULL', - 'NULLIFY', 'NUMBER', - 'NUMERIC_STORAGE_SIZE', 'NUM_IMAGES', - 'ONLY', 'OPEN', - 'OPENED', 'OPERATOR', - 'OPTIONAL', 'OR', - 'OUT', 'OUTPUT_UNIT', - 'PACK', 'PAD', - 'PARAMETER', 'PARITY', - 'PASS', 'PERROR', - 'POINTER', 'POPCNT', - 'POPPAR', 'POSITION', - 'PRECISION', 'PRESENT', - 'PRINT', 'PRIVATE', - 'PROCEDURE', 'PRODUCT', - 'PROGRAM', 'PROTECTED', - 'PUBLIC', 'PURE', - 'QABS', 'QACOS', - 'QASIN', 'QATAN', - 'QATAN2', 'QCMPLX', - 'QCONJG', 'QCOS', - 'QCOSH', 'QDIM', - 'QERF', 'QERFC', - 'QEXP', 'QGAMMA', - 'QIMAG', 'QLGAMA', - 'QLOG', 'QLOG10', - 'QMAX1', 'QMIN1', - 'QMOD', 'QNINT', - 'QSIGN', 'QSIN', - 'QSINH', 'QSQRT', - 'QTAN', 'QTANH', - 'RADIX', 'RAN', - 'RAND', 'RANDOM_NUMBER', - 'RANDOM_SEED', 'RANGE', - 'RANK', 'READ', - 'READWRITE', 'REAL', - 'REAL128', 'REAL32', - 'REAL64', 'REALPART', - 'REAL_KINDS', 'REC', - 'RECL', 'RECORD', - 'RECURSIVE', 'RENAME', - 'REPEAT', 'RESHAPE', - 'RESULT', 'RETURN', - 'REWIND', 'REWRITE', - 'RRSPACING', 'RSHIFT', - 'SAME_TYPE_AS', 'SAVE', - 'SCALE', 'SCAN', - 'SECNDS', 'SECOND', - 'SELECT', 'SELECTED_CHAR_KIND', - 'SELECTED_INT_KIND', 'SELECTED_REAL_KIND', - 'SEQUENCE', 'SEQUENTIAL', - 'SET_EXPONENT', 'SHAPE', - 'SHIFTA', 'SHIFTL', - 'SHIFTR', 'SHORT', - 'SIGN', 'SIGNAL', - 'SIN', 'SIND', - 'SINH', 'SIZE', - 'SIZEOF', 'SLEEP', - 'SNGL', 'SOURCE', - 'SPACING', 'SPREAD', - 'SQRT', 'SRAND', - 'STAT', 'STATUS', - 'STAT_FAILED_IMAGE', 'STAT_LOCKED', - 'STAT_LOCKED_OTHER_IMAGE', 'STAT_STOPPED_IMAGE', - 'STAT_UNLOCKED', 'STOP', - 'STORAGE_SIZE', 'STRUCTURE', - 'SUBMODULE', 'SUBROUTINE', - 'SUM', 'SYMLNK', - 'SYNC', 'SYSTEM', - 'SYSTEM_CLOCK', 'TAN', - 'TAND', 'TANH', - 'TARGET', 'THEN', - 'THIS_IMAGE', 'TIME', - 'TIME8', 'TINY', - 'TO', 'TRAILZ', - 'TRANSFER', 'TRANSPOSE', - 'TRIM', 'TRUE', - 'TTYNAM', 'TYPE', - 'UBOUND', 'UCOBOUND', - 'UMASK', 'UNFORMATTED', - 'UNIT', 'UNLINK', - 'UNLOCK', 'UNPACK', - 'USE', 'VALUE', - 'VERIF', 'VERIFY', - 'VOLATILE', 'WAIT', - 'WHERE', 'WHILE', - 'WRITE', 'XOR', - 'ZABS', 'ZCOS', - 'ZEXP', 'ZLOG', - 'ZSIN', 'ZSQRT', - '\.AND\.', '\.EQV\.', - '\.EQ\.', '\.FALSE\.', - '\.GE\.', '\.GT\.', - '\.LE\.', '\.LT\.', - '\.NEQV\.', '\.NE\.', - '\.NOT\.', '\.OR\.', - '\.TRUE\.', '\.XOR\.', -); - -my @archaic_fortran_keywords = ( - 'ALOG', 'ALOG10', 'AMAX0', 'AMAX1', 'AMIN0', 'AMIN1', - 'AMOD', 'CABS', 'CCOS', 'CEXP', 'CLOG', 'CSIN', - 'CSQRT', 'DABS', 'DACOS', 'DASIN', 'DATAN', 'DATAN2', - 'DBESJ0', 'DBESJ1', 'DBESJN', 'DBESY0', 'DBESY1', 'DBESYN', - 'DCOS', 'DCOSH', 'DDIM', 'DERF', 'DERFC', 'DEXP', - 'DINT', 'DLOG', 'DLOG10', 'DMAX1', 'DMIN1', 'DMOD', - 'DNINT', 'DSIGN', 'DSIN', 'DSINH', 'DSQRT', 'DTAN', - 'DTANH', 'FLOAT', 'IABS', 'IDIM', 'IDINT', 'IDNINT', - 'IFIX', 'ISIGN', 'LONG', 'MAX0', 'MAX1', 'MIN0', - 'MIN1', 'SNGL', 'ZABS', 'ZCOS', 'ZEXP', 'ZLOG', - 'ZSIN', 'ZSQRT', -); - -my @openmp_keywords = ( - 'PARALLEL', 'MASTER', 'CRITICAL', 'ATOMIC', - 'SECTIONS', 'WORKSHARE', 'TASK', 'BARRIER', - 'TASKWAIT', 'FLUSH', 'ORDERED', 'THREADPRIVATE', - 'SHARED', 'DEFAULT', 'FIRSTPRIVATE', 'LASTPRIVATE', - 'COPYIN', 'COPYPRIVATE', 'REDUCTION', -); - -my @fortran_types = ( - 'TYPE', 'CLASS', 'INTEGER', 'REAL', - 'DOUBLE PRECISION', 'CHARACTER', 'LOGICAL', 'COMPLEX', - 'ENUMERATOR', -); - -my @unseparated_keywords = ( - 'BLOCKDATA', 'DOUBLEPRECISION', 'ELSEIF', 'ELSEWHERE', - 'ENDASSOCIATE', 'ENDBLOCK', 'ENDBLOCKDATA', 'ENDCRITICAL', - 'ENDDO', 'ENDENUM', 'ENDFILE', 'ENDFORALL', - 'ENDFUNCTION', 'ENDIF', 'ENDINTERFACE', 'ENDMODULE', - 'ENDPARALLEL', 'ENDPARALLELDO', 'ENDPROCEDURE', 'ENDPROGRAM', - 'ENDSELECT', 'ENDSUBROUTINE', 'ENDTYPE', 'ENDWHERE', - 'GOTO', 'INOUT', 'PARALLELDO', 'SELECTCASE', - 'SELECTTYPE', -); - -my @intrinsic_modules_keywords = ( - 'ISO_C_BINDING', 'ISO_FORTRAN_ENV', - 'IEEE_ARITHMETIC', 'IEEE_EXCEPTIONS', - 'IEEE_FEATURES', -); - -sub get_fortran_keywords { - return @fortran_keywords; -} - -sub get_openmp_keywords { - return @openmp_keywords; -} - -sub get_archaic_fortran_keywords { - return @archaic_fortran_keywords; -} - -sub get_unseparated_keywords { - return @unseparated_keywords; -} - -sub get_intrinsic_modules_keywords { - return @intrinsic_modules_keywords; -} - -# List of uncapitalised keywords present in most recently tested file -my %extra_error_information = (); - -sub get_extra_error_information { - return %extra_error_information; -} - -sub reset_extra_error_information { - %extra_error_information = (); -} -################################# UMDP3 tests ################################# - -# Check for uncapitalised keywords -sub capitalised_keywords { - my @lines = @_; - my $failed = 0; - - # Iterate over lines and keywords - foreach my $line (@lines) { - my @keywords_to_check = get_fortran_keywords(); - - $line = remove_quoted($line); - - next unless $line; - next unless $line =~ /\S/sxm; # If line empty, try the next - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - if ( $line =~ /^!\$/sxm ) { - push @keywords_to_check, get_openmp_keywords(); - } - - foreach my $keyword (@keywords_to_check) { - - # If the keyword is present on the line - if ( $line =~ /(^|\W)$keyword(\W|$)/sxmi ) { - - if ( $line =~ /\(\s*kind\s*=.*::/sxm ) { - $extra_error_information{'KIND'}++; - $failed++; - } - - # Ignore cases such as RESHAPE(len=something) where 'len' would - # otherwise be triggered - next if ( $line =~ /,\s*$keyword\s*=/sxmi ); - next if ( $line =~ /\(\s*$keyword\s*=/sxmi ); - - # Ignore CPP - next if ( $line =~ /^\s*\#/sxm ); - - # Fail if the keyword occurance(s) are not uppercase - while ( $line =~ s/(^|\W)($keyword)(\W|$)/ /sxmi ) { - unless ( $2 =~ /$keyword/sxm ) { - $extra_error_information{$keyword}++; - $failed++; - } - } - - } - } - } - - return $failed; -} - -# OpenMP sentinels must be in column one -sub openmp_sentinels_in_column_one { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - # Check for one or more spaces before !$ - $failed++ if ( $line =~ /\s+!\$/sxm ); - } - - return $failed; -} - -# ENDIF, etc should be END IF -sub unseparated_keywords { - my @lines = @_; - - my @keywords = get_unseparated_keywords(); - - my $failed = 0; - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - # Check for frequent ones - should rewrite as a loop - unless ( $line =~ /^\s*\#/sxm ) { # Ignore CPP - foreach my $keyword (@keywords) { - if ( $line =~ /(^|\W)$keyword(\W|$)/sxmi ) { - $failed++; - $extra_error_information{$keyword}++; - } - } - } - } - - return $failed; -} - -# PAUSE and EQUIVALENCE are forbidden -sub forbidden_keywords { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - $failed++ if ( $line =~ /(^|\W)EQUIVALENCE(\W|$)/sxmi ); - $failed++ if ( $line =~ /(^|\W)PAUSE(\W|$)/sxmi ); - } - - return $failed; -} - -# Older forms of relational operators are forbidden -sub forbidden_operators { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - $failed++ if ( $line =~ /\.GT\./sxmi ); - $failed++ if ( $line =~ /\.GE\./sxmi ); - $failed++ if ( $line =~ /\.LT\./sxmi ); - $failed++ if ( $line =~ /\.LE\./sxmi ); - $failed++ if ( $line =~ /\.EQ\./sxmi ); - $failed++ if ( $line =~ /\.NE\./sxmi ); - } - - return $failed; -} - -# Any GO TO must go to 9999 -sub go_to_other_than_9999 { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - # Find lines matching GO TO - if ( $line =~ /GO\s*TO/sxmi ) { - - # If the line number isn't 9999 - unless ( $line =~ /GO\s*TO\s*9999/sxmi ) { - $failed++; - } - } - - } - - return $failed; -} - -# WRITE must specify a proper format -sub write_using_default_format { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - # Check for WRITE(...*) - if ( $line =~ /WRITE\s*\(.*\*\)/sxmi ) { - $failed++; - } - - } - - return $failed; -} - -sub lowercase_variable_names { - my @lines = @_; - - my $failed = 0; - my @variables; - - # Make a list of variables - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - if ( $line =~ /^\s*REAL/sxmi - or $line =~ /^\s*INTEGER/sxmi - or $line =~ /^\s*LOGICAL/sxm - or $line =~ /^\s*CHARACTER/sxm ) - { - if ( $line =~ /::/sxm ) { - $line =~ /::\s*(\w+)/sxm; - my $variable = $1; - next unless ($variable); - push @variables, $variable; - } - } - } - - # Search the code for these variables - foreach my $line (@lines) { - - # Ignore CPP defs: - next if ( $line =~ /^\s*\#/sxm ); - - $line = remove_quoted($line); - - foreach my $variable (@variables) { - if ( $line =~ /\b($variable)\b/sxmi ) { - my $instance_of_variable = $1; - -# If the variable is 4 or more characters and is uppercase in the declaration fail the test -# The length test is because some short scientific quantities could legitimately be uppercase. - next if ( length $variable < 4 ); - - if ( $instance_of_variable eq "\U$instance_of_variable" ) { - $failed++; - $extra_error_information{$instance_of_variable}++; - } - } - } - } - - return $failed; -} - -sub include_files_for_variable_declarations { - my @lines = @_; - - my $failed = 0; - - my $found_dr_hook = 0; - foreach my $line (@lines) { - $found_dr_hook++ if ( $line =~ /CALL\s+dr_hook/sxmi ); - } - - # File which don't have directly executable code automatically pass this - return 0 unless $found_dr_hook; - - foreach my $line (@lines) { - $failed++ if ( $line =~ /^\s*\#include/sxm ); - last if ( $line =~ /CALL\s+dr_hook/sxmi ); - } - - $number_of_files_with_variable_declarations_in_includes++ if $failed; - return $failed; -} - -sub dimension_forbidden { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - next unless $line; - $failed++ if ( $line =~ /(^|\W)DIMENSION\W/sxmi ); - } - - return $failed; -} - -sub forbidden_stop { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - $failed++ if ( $line =~ /^\s*STOP\s/sxmi ); - $failed++ if ( $line =~ /^\s*CALL\s*abort\W/sxmi ); - } - - return $failed; -} - -sub ampersand_continuation { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - $failed++ if ( $line =~ /^\s*&/sxmi ); - $failed++ if ( $line =~ /^\s*!\$\s*&/sxmi ); - } - - return $failed; -} - -sub implicit_none { - my @lines = @_; - - my $failed = 0; - my $foundit = 0; - my $modules = 0; - my @lines_to_test; - - my $in_interface = 0; - foreach my $input_line (@lines) { - - $input_line = remove_quoted($input_line); - - # Remove comments unless they're OpenMP commands - if ( $input_line =~ /![^\$]/sxm ) { - $input_line =~ s/![^\$].*?$//sxmg; - } - - # MODULEs etc in INTERFACEs don't have implicit none, so ignore these - if ( $input_line =~ /^\s*INTERFACE\s/sxmi ) { - $in_interface = 1; - } - push @lines_to_test, $input_line unless $in_interface; - if ( $input_line =~ /^\s*END\s*INTERFACE/sxmi ) { - $in_interface = 0; - } - } - - foreach my $line (@lines_to_test) { - - $foundit++ if ( $line =~ /^\s*IMPLICIT\s+NONE/sxmi ); - $modules++ - if ( $line =~ /^\s*SUBROUTINE\W/sxmi - or $line =~ /^\s*MODULE\W/sxmi - or $line =~ /^\s*FUNCTION\W/sxmi - or $line =~ /^\s*REAL\s*FUNCTION\W/sxmi - or $line =~ /^\s*LOGICAL\s*FUNCTION\W/sxmi - or $line =~ /^\s*INTEGER\s*FUNCTION\W/sxmi - or $line =~ /^\s*PROGRAM\W/sxmi ); - } - - $failed = 1 unless ( $foundit >= $modules ); - - return $failed; -} - -sub intrinsic_as_variable { - my @lines = @_; - my $failed = 0; - my @keywords = get_fortran_keywords(); - - my @fixed_lines = (); - - push @keywords, get_openmp_keywords(); - - # Steps: - # i) sanitise lines - # ii) look for match - # iii) check if match is a declaration (which must start with a type) - # iv) exclude any false positives from initialisation. - - # i) sanitise lines - - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - # Remove pre-processing directives - if ( $line =~ /^\s*\#/sxm ) { - $line = ""; - } - - push @fixed_lines, $line; - } - - my $entire = join( "", @fixed_lines ); - - # Sort out continuation lines - $entire =~ s/&\s*\n//sxmg; - - @fixed_lines = split /\n/sxm, $entire; - - foreach my $line (@fixed_lines) { - - next unless $line; - next unless $line =~ /\S/sxm; - - my $oline = $line; - - foreach my $keyword (@keywords) { - my $decl_match = 0; - $line = $oline; - - # ii) look for match - if ( $line =~ /(^|\W)$keyword($|\W)/sxmi ) { - foreach my $type (@fortran_types) { - my $type_r = $type; - $type_r =~ s/\s/\\s/sxm; - -# iii) check if match is a variable declaration (which always starts with a type): - if ( $line =~ /^\s*$type_r(\W.*\W|\W)$keyword/sxmi ) { - if ( $type =~ 'CLASS' - and $keyword =~ /(IS|DEFAULT)/sxm ) - { - - # statments within SELECT TYPE constructs are not declarations - unless ( $line =~ /^\s*CLASS\s+(IS|DEFAULT)/sxmi ) { - $decl_match = 1; - } - } - elsif ( $type =~ 'TYPE' and $keyword =~ 'IS' ) { - - # statments within SELECT TYPE constructs are not declarations - unless ( $line =~ /^\s*TYPE\s+IS/sxmi ) { - $decl_match = 1; - } - } - else { - $decl_match = 1; - } - last; - } - } - } - - # iv) exclude any false positives from initialisation. - if ($decl_match) { - - # This is a variable declaration with a matching keyword - # make sure this is not because of initialising to the result of a function - # (i.e. the keyword is the RHS of the = in this variable initialisation). - - # remove any type attributes which may match the keyword - $line =~ s/^.*:://sxm; - - # If we have a function declaration of the form - # FUNCTION foo() RESULT(bar) - # We need to strip the RESULT keyword out. - if ( $line =~ /^(.*?\bFUNCTION\s.*?\b)RESULT(\s*\()/sxm ) { - my $grp1 = quotemeta($1); - my $grp2 = quotemeta($2); - $line =~ s/($grp1)RESULT($grp2)/$grp1$grp2/sxm; - } - - # at this point, things in brackets aren't relevant because they can - # only be attributes of a variable, not the definition of a variable - # itself - while ( $line =~ /\(.*\)/sxm ) { - $line =~ s/\([^()]*\)//sxm; - } - - # At this point, remove array initialisations, as they mess with - # breaking on commas. "[]" and "(/ /)" forms may exist. We assume - # other checks enforce the "[]" -> "(/ /)" conversion, so here we - # simply deals with "[]". They may also be nested, so first we - # flatten to a single array initialiser. - - # The following matches a "[", followed by a capture group, then - # a pair of "[" and "]" enclosing a capture group which does not - # contain either a "[" or "]", followed by a capture group and a - # "]". It repeatedly removes the innermost pair of "[" and "]" - # in a nest until no more exist. - - while ( $line =~ /\[(.*?)\[([^\[]*?)\](.*?)\]/sxm ) { - $line =~ s/\[(.*?)\[([^\[]*?)\](.*?)\]/[$1$2$3]/sxm; - } - - # The following removes the actual array initialisations, which - # must be flattened and of the "[]" form following an "=" sign. - - while ( $line =~ /\=\s*\[.*?\]/sxm ) { - $line =~ s/\=\s*\[.*?\]//sxm; - } - - # split on commas, in case there are multiple variable declarations - my @decls = split /,/sxm, $line; - - foreach my $decl (@decls) { - - # As anything to the right of '=' signs are not variable definitions - # (they are instead initialiser etc.) we're not interested in them. - $decl =~ s/=.*$//sxm; - - # Remove function declarations - $decl =~ s/^.*?\bFUNCTION\s//sxmi; - - # If we get this far any matches are fails - if ( $decl =~ /(^|\W)$keyword(\W|$)/sxmi ) { - $line = "\n $keyword"; - $failed++; - $extra_error_information{$line}++; - } - } - } - } - } - - return $failed; -} - -sub line_over_80chars { - my @lines = @_; - my $failed = 0; - - foreach my $line (@lines) { - - # This needs to be 81, as Perl counts the newline as having length 1 - if ( length $line > 81 ) { - $failed++; - - # Reformat line so it prints the offending line neatly - chomp($line); - $line = "\n '$line'"; - $extra_error_information{$line}++; - } - } - return $failed; -} - -sub tab_detection { - my @lines = @_; - my $failed = 0; - foreach my $line (@lines) { - - # If any line contains a tab character - if ( $line =~ /\t/sxm ) { - $failed++; - - # Reformat line so it prints the offending line neatly - chomp($line); - $line = "\n '$line'"; - $extra_error_information{$line}++; - } - } - return $failed; -} - -sub check_crown_copyright { - my @lines = @_; - my $failed = 1; - my @valid_agreements = ( - 'L0195', 'NERC', - 'SC0138', 'UKCA', - 'SC0171', 'ACCESS', - 'SC0237', 'JULES', - 'IBM', 'of Bath', - 'Centre National', 'Lawrence Livermore', - 'Roger Marchand, ', 'of Colorado', - 'of Reading', - ); - - foreach my $line (@lines) { - $failed = 0 if ( $line =~ /^\s*(!|\/\*).*Crown\s*copyright/sxmi ); - foreach my $agreement (@valid_agreements) { - my $agreement_r = $agreement; - $agreement_r =~ s/\s/\\s/sxm; - $failed = 0 if ( $line =~ /^\s*(!|\/\*).*$agreement_r/sxmi ); - } - } - - return $failed; -} - -sub check_code_owner { - my @lines = @_; - my $failed = 1; - my $failed_co = 0; - my $failed_bi = 0; - my $is_shumlib = 0; - - foreach my $line (@lines) { - $is_shumlib = 1 - if ( - $line =~ / ^\s*(!|\/\*)\s*This\s*file\s* - is\s*part\s*of\s*the\s* - UM\s*Shared\s*Library\s*project /sxmi - ); - } - - if ( $is_shumlib == 1 ) { - $failed = 0; - } - else { - foreach my $line (@lines) { - $failed_co++ - if ( - $line =~ / ^\s*(!|\/\*)\s*Code\s*Owner:\s* - Please\s*refer\s*to\s*the\s*UM\s*file\s* - CodeOwners\.txt /sxmi - ); - $failed_bi++ - if ( - $line =~ / ^\s*(!|\/\*)\s* - This\s*file\s*belongs\s*in\s* - section: /sxmi - ); - } - - if ( $failed_co > 1 or $failed_bi > 1 ) { - $extra_error_information{"(multiple statements found)"}++; - } - - if ( $failed_co == 1 and $failed_bi == 1 ) { - $failed = 0; - } - } - - return $failed; -} - -sub array_init_form { - my @lines = @_; - my $failed = 0; - - my @fixed_lines = (); - - # First we clean up the lines by removing string contents and comments - foreach my $line (@lines) { - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - # Remove pre-processing directives - if ( $line =~ /^\s*\#/sxm ) { - $line = ""; - } - - push @fixed_lines, $line; - } - - my $entire = join( "", @fixed_lines ); - - # Sort out continuation lines - $entire =~ s/&\s*\n//sxmg; - - @fixed_lines = split /\n/sxm, $entire; - - # Now check for the existence of lines containing (/ /) - foreach my $line (@fixed_lines) { - - next unless $line; - next unless $line =~ /\S/sxm; - - $failed = 1 if ( $line =~ /\(\/.*\/\)/sxm ); - } - - return $failed; -} - -sub retire_if_def { - my @lines = @_; - my @ifdefs = ( 'VATPOLES', 'A12_4A', 'A12_3A', 'UM_JULES', 'A12_2A', ); - - # Sort out C continuation lines - my $entire = join( "", @lines ); - $entire =~ s/\\\s*\n//sxmg; - @lines = split /\n/sxm, $entire; - - my $failed = 0; - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - my $line = $lines[$i]; - foreach my $ifdef (@ifdefs) { - -# matches #if defined(), #elif defined(), #ifdef , and #ifndef - if ( $line =~ /^\s*\#(el)?if.*\W$ifdef/sxm ) { - $failed++; - $extra_error_information{$ifdef}++; - } - } - } - - return $failed; -} - -sub c_deprecated { - my @lines = @_; - my %deprecateds = ( - 'strcpy' => "(): please use strncpy() instead", - 'sprintf' => "(): please use snprintf() instead", - 'usleep' => "(): please use nanosleep() instead", - '_BSD_SOURCE' => -": please find alternative functionality, or an equivalent feature test macro (if possible)", - '_DEFAULT_SOURCE' => -": please find alternative functionality, or an equivalent feature test macro (if possible)", - ); - - my $entire = join( "", @lines ); - - #remove commented sections - $entire =~ s/\/\*(.|\n)+?(\*\/)//sxmg; - - # Sort out continuation lines - $entire =~ s/\\\s*\n//sxmg; - - #remove #pragmas - $entire =~ s/(^|\n)\s*\#pragma.+?\n/\n/sxmg; - - @lines = split /\n/sxm, $entire; - - my $failed = 0; - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - my $line = $lines[$i]; - - foreach my $dep ( keys %deprecateds ) { - if ( $line =~ /$dep/sxm ) { - my $extra_msg = "$dep$deprecateds{$dep}"; - $failed++; - $extra_error_information{$extra_msg}++; - } - } - } - - return $failed; -} - -sub printstatus_mod { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - $failed++ if ( $line =~ /^\s*USE\s*printstatus_mod/sxmi ); - } - return $failed; -} - -sub write6 { - my @lines = @_; - my $failed = 0; - - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - if ( $lines[$i] =~ /^\s*WRITE/sxmi ) { - if ( $lines[$i] =~ /^\s*WRITE\s*\(\s*6/sxm ) { - $failed++; - } - } - - } - - return $failed; -} - -sub printstar { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - $failed++ if ( $line =~ /^\s*PRINT\s*\*/sxmi ); - } - return $failed; -} - -sub um_fort_flush { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - $failed++ if ( $line =~ /^\s*CALL\s*UM_FORT_FLUSH/sxmi ); - } - return $failed; -} - -sub svn_keyword_subst { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - $failed++ if ( $line =~ /\$Date\$/sxm ); - $failed++ if ( $line =~ /\$LastChangedDate\$/sxm ); - $failed++ if ( $line =~ /\$Revision\$/sxm ); - $failed++ if ( $line =~ /\$Rev\$/sxm ); - $failed++ if ( $line =~ /\$LastChangedRevision\$/sxm ); - $failed++ if ( $line =~ /\$Author\$/sxm ); - $failed++ if ( $line =~ /\$LastChangedBy\$/sxm ); - $failed++ if ( $line =~ /\$HeadURL\$/sxm ); - $failed++ if ( $line =~ /\$URL\$/sxm ); - $failed++ if ( $line =~ /\$Id\$/sxm ); - $failed++ if ( $line =~ /\$Header\$/sxm ); - } - - return $failed; - -} - -sub omp_missing_dollar { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - if ( $line =~ /^\s*!OMP/sxm ) { - $failed = 1; - } - } - - return $failed; - -} - -sub intrinsic_modules { - my @lines = @_; - - my $failed = 0; - - foreach my $line (@lines) { - my @keywords_to_check = get_intrinsic_modules_keywords(); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - next unless $line; - next unless $line =~ /\S/sxm; # If line empty, try the next - - foreach my $keyword (@keywords_to_check) { - if ( $line =~ /^\s*USE\s*$keyword/sxmi ) { - $extra_error_information{$keyword}++; - $failed++; - } - } - } - return $failed; -} - -sub cpp_ifdef { - - # ifdefs should be of the form "#if defined(MY_IFDEF)" - # rather than "#ifdef(MY_IFDEF)" - my @lines = @_; - my $failed = 0; - foreach my $line (@lines) { - if ( $line =~ /^\s*\#ifdef/sxm ) { - $failed++; - } - elsif ( $line =~ /^\s*\#ifndef/sxm ) { - $failed++; - } - } - return $failed; -} - -sub cpp_comment { - - # C pre-processor directives should not be intermingled with - # fortran style comments - my @lines = @_; - my $failed = 0; - my @comments = (); - foreach my $line (@lines) { - - # is this an #if statement? - if ( ( $line =~ m/^\s*\#if\s/sxm ) || ( $line =~ m/^\s*\#elif\s/sxm ) ) - { - - # does this ifdef have a ! in it? - if ( $line =~ /!/sxm ) { - - # split the possible regions (ignoring the 0th) - # and loop over to check each one in turn - @comments = split /!/sxm, $line, -1; - splice( @comments, 0, 1 ); - foreach my $comment (@comments) { - - # must be a recognisable CPP directive - if ( $comment !~ /(^\s*\(?\s*defined)|(^=\s*[0-9])/sxm ) { - $failed++; - } - } - } - } - - # is this an #else? - elsif ( $line =~ /^\s*\#else\s*!/sxm ) { - $failed++; - } - - # is this an #endif? - elsif ( $line =~ /^\s*\#endif\s*!/sxm ) { - $failed++; - } - - # is this an #include? - elsif ( $line =~ /^\s*\#include[^!]+!/sxm ) { - $failed++; - } - } - return $failed; -} - -sub obsolescent_fortran_intrinsic { - my @lines = @_; - - my $failed = 0; - - # Iterate over lines and keywords - foreach my $line (@lines) { - my @keywords_to_check = get_archaic_fortran_keywords(); - - $line = remove_quoted($line); - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - next unless $line; - next unless $line =~ /\S/sxm; # If line empty, try the next - - # Ignore CPP - next if ( $line =~ /^\s*\#/sxm ); - - foreach my $keyword (@keywords_to_check) { - - # If the keyword is present on the line - if ( $line =~ /(^|\W)$keyword(\W|$)/sxmi ) { - $extra_error_information{$keyword}++; - $failed++; - } - } - } - return $failed; -} - -sub exit_stmt_label { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - # Find if the line appears to contain a solitary EXIT - if ( $line =~ /\bEXIT\b/sxm ) { - - # fail if that EXIT is not followed by a label - $failed++ if ( $line =~ /EXIT\s*$/sxm ); - } - } - - return $failed; - -} - -sub read_unit_args { - my @lines = @_; - - my $failed = 0; - foreach my $line (@lines) { - - # Remove comments unless they're OpenMP commands - if ( $line =~ /![^\$]/sxm ) { - $line =~ s/![^\$].*?$//sxmg; - } - - # Find if the line appears to be a READ statement - if ( $line =~ /^\s*READ\s*\(/sxm ) { - - # fail if that READ does not have UNIT= as the first argument - $failed++ if ( !( $line =~ /^\s*READ\s*\(\s*UNIT\s*=/sxm ) ); - } - } - - return $failed; - -} - -sub c_openmp_define_pair_thread_utils { - my @lines = c_sanitise_lines(@_); - - my $failed = 0; - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - my $line = $lines[$i]; - - # match ifdef and defined style for _OPENMP - if ( $line =~ /^\s*\#(el)?if.*defined\(_OPENMP\)/sxm ) { - - # fail if _OPENMP is not the first defined() test, or it is not - # followed by SHUM_USE_C_OPENMP_VIA_THREAD_UTILS - if ( - $line !~ / ^\s*\#(el)?if\s*!? - defined\(_OPENMP\)\s* - &&\s*!? - defined\(SHUM_USE_C_OPENMP_VIA_THREAD_UTILS\) /sxm - ) - { - $failed++; - } - } - } - - return $failed; -} - -sub c_openmp_define_no_combine { - my @lines = c_sanitise_lines(@_); - - my $failed = 0; - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - my $line = $lines[$i]; - - # fail if we match defined(_OPENMP) + at least two other defined() - if ( $line =~ - /^\s*\#(el)?if\s*defined\(_OPENMP\)(.*?!?defined\(\w+\)){2,}/sxm ) - { - $failed++; - } - } - - return $failed; -} - -sub c_openmp_define_not { - my @lines = c_sanitise_lines(@_); - - my $failed = 0; - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - my $line = $lines[$i]; - - # fail if we match !defined(_OPENMP) - if ( $line =~ /^\s*\#(el)?if.*!defined\(_OPENMP\)/sxm ) { - $failed++; - } - } - - return $failed; -} - -sub c_ifdef_defines { - my @lines = c_sanitise_lines(@_); - - my $failed = 0; - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - my $line = $lines[$i]; - - # fail if we match #ifdef or #ifndef - if ( $line =~ /^\s*\#if(n)?def/sxm ) { - $failed++; - } - } - - return $failed; -} - -sub c_protect_omp_pragma { - my @lines = c_sanitise_lines(@_); - - # remove _OPENMP if-def protected lines. - # As #ifs may be nested, successivly remove all the #if blocks until - # none are remaining. - for ( my $i = scalar @lines ; $i > 0 ; $i-- ) { - my $line = $lines[ $i - 1 ]; - - # as we are going from the bottom, the first #if will be an - # innermost one. - if ( $line =~ /^\s*\#if/sxm ) { - - splice @lines, $i - 1, 1, ''; - - my $whipe = 0; - - if ( $line =~ /defined\(_OPENMP\)/sxm ) { - $whipe = 1; - } - - for ( my $j = $i - 1 ; $j < scalar @lines ; $j++ ) { - my $jline = $lines[$j]; - - if ( $whipe == 1 ) { - splice @lines, $j, 1, ''; - } - - if ( $jline =~ /\#else/sxm ) { - if ( $whipe == 1 ) { - $whipe = 0; - } - splice @lines, $j, 1, ''; - } - - if ( $jline =~ /\#elif/sxm ) { - if ( $whipe == 1 ) { - $whipe = 0; - } - if ( $jline =~ /defined\(_OPENMP\)/sxm ) { - $whipe = 1; - } - splice @lines, $j, 1, ''; - } - - if ( $jline =~ /\#endif/sxm ) { - splice @lines, $j, 1, ''; - last; - } - - } - } - } - - my $failed = 0; - for ( my $i = 0 ; $i < scalar @lines ; $i++ ) { - my $line = $lines[$i]; - - # as we have removed all lines protected by _OPENMP, - # any remaining pragma lines are a fail. - if ( $line =~ /\#pragma\s+omp/sxm ) { - $failed++; - } - - # as are omp includes - if ( $line =~ /\#include\s+(<|")omp.h(>|")/sxm ) { - $failed++; - } - } - - return $failed; -} - -sub c_sanitise_lines { - my @lines = @_; - - my $entire = join( "", @lines ); - - #remove commented sections - $entire =~ s/\/\*(.|\n)+?(\*\/)//sxmg; - - # Sort out continuation lines - $entire =~ s/\\\s*\n//sxmg; - - # standardise format for defined() style tests - $entire =~ s/ defined\s*? # start with "defined", - # optionally followed by space(s) - \(?\s*?(\w+)[^\S\n]*\)? # Form capture group one from - # the following 'word' - # characters contained within - # (the optional) parantheses - - # i.e. the name of the macro being - # tested for. - ([|&><*+%^$()\/\-\s]) # Form capture group two from - # the space, linebreak, or - # operator immediately - # following the macro name - - /defined($1) $2/sxmg; # Standardise the form to - # include parantheses and a - # following white-space - - @lines = split /\n/sxm, $entire; - - return @lines; -} - -sub line_trail_whitespace { - my @lines = @_; - my $failed = 0; - - foreach my $line (@lines) { - - $line =~ s/\n//sxmg; - - # Fail if there are whitespace characters at the end of a line. - if ( $line =~ /\s+$/sxm ) { - $failed++; - $line = "\n '$line'"; - $extra_error_information{$line}++; - } - } - return $failed; -} - -sub c_integral_format_specifiers { - my @lines = @_; - my $failed = 0; - - my @fixed_width_size = ( '8', '16', '32', '64' ); - - my @fixed_width_type = ( 'MAX', 'PTR' ); - - my @fixed_prefix = ( 'PRI', 'SCN' ); - - my @fixed_suffix = ( '', 'FAST', 'LEAST' ); - - my @print_style = ( 'd', 'i', 'u', 'o', 'x', 'X' ); - - # Exact numerical width style (e.g. PRIdFAST64) - foreach my $line (@lines) { - foreach my $fwpre (@fixed_prefix) { - foreach my $fwps (@print_style) { - foreach my $fwsz (@fixed_width_size) { - foreach my $fwsfx (@fixed_suffix) { - - # Fail if format specifier immediately follows or proceeds a " character - if ( $line =~ /"${fwpre}${fwps}${fwsfx}${fwsz}/sxm ) { - $failed++; - chomp($line); - $line = "\n '$line'"; - $extra_error_information{$line}++; - } - elsif ( $line =~ /${fwpre}${fwps}${fwsfx}${fwsz}"/sxm ) - { - $failed++; - chomp($line); - $line = "\n '$line'"; - $extra_error_information{$line}++; - } - } - } - } - } - } - - # Style defining the width by type (e.g. SCNuMAX) - foreach my $line (@lines) { - foreach my $fwpre (@fixed_prefix) { - foreach my $fwps (@print_style) { - foreach my $fwt (@fixed_width_type) { - - # Fail if format specifier immediately follows or proceeds a " character - if ( $line =~ /"${fwpre}${fwps}${fwt}/sxm ) { - $failed++; - chomp($line); - $line = "\n '$line'"; - $extra_error_information{$line}++; - } - elsif ( $line =~ /${fwpre}${fwps}${fwt}"/sxm ) { - $failed++; - chomp($line); - $line = "\n '$line'"; - $extra_error_information{$line}++; - } - } - } - } - } - - return $failed; -} - -sub c_final_newline { - my @lines = @_; - my $failed = 0; - - my $line = $lines[-1]; - my $fchar = substr $line, -1; - - # Fail if the final line does not end with a newline character - if ( ord $fchar != 10 ) { - $failed++; - } - - return $failed; -} - -1; diff --git a/script_umdp3_checker/bin/UMDP3CriticPolicy.pm b/script_umdp3_checker/bin/UMDP3CriticPolicy.pm deleted file mode 100644 index 43ab1f6f..00000000 --- a/script_umdp3_checker/bin/UMDP3CriticPolicy.pm +++ /dev/null @@ -1,43 +0,0 @@ -# *****************************COPYRIGHT******************************* -# (C) Crown copyright Met Office. All rights reserved. -# For further details please refer to the file LICENSE -# which you should have received as part of this distribution. -# *****************************COPYRIGHT******************************* - -package UMDP3CriticPolicy; -use strict; -use warnings; -use 5.010; - -use Perl::Critic; - -# Declare version - this is the last UM version this script was updated for: -our $VERSION = '13.2.0'; - -my @allowed_spellings = qw(CreateBC FCM UMUI NEMO CICE); - -sub get_umdp3_critic_policy { - - # define overriden policies - my %overriden_policies = (); - $overriden_policies{'Documentation::PodSpelling'} = - { 'stop_words' => join q{ }, @allowed_spellings }; - - # Construct the critic policy with overriden policies omitted - my @default_policies = - Perl::Critic->new( -severity => 1, -exclude => keys %overriden_policies ) - ->policies(); - my $policy_file = Perl::Critic->new( -include => \@default_policies ); - - # add back in the overridden policies with new settings - for ( keys %overriden_policies ) { - $policy_file->add_policy( - -policy => $_, - -params => $overriden_policies{$_} - ); - } - - return $policy_file; -} - -1; diff --git a/script_umdp3_checker/bin/UMDP3DispatchTables.pm b/script_umdp3_checker/bin/UMDP3DispatchTables.pm deleted file mode 100644 index 5af7367f..00000000 --- a/script_umdp3_checker/bin/UMDP3DispatchTables.pm +++ /dev/null @@ -1,126 +0,0 @@ -# *****************************COPYRIGHT******************************* -# (C) Crown copyright Met Office. All rights reserved. -# For further details please refer to the file LICENSE -# which you should have received as part of this distribution. -# *****************************COPYRIGHT******************************* - -# Standalone version of the dispatch tables from UDMP3Job - -package UMDP3DispatchTables; -use strict; -use warnings; -use 5.010; - -# Declare version - this is the last UM version this script was updated for: -our $VERSION = '13.5.0'; - -my %dispatch_table_diff_fortran = ( - 'Lowercase Fortran keywords not permitted' => \&UMDP3::capitalised_keywords, - 'OpenMP sentinels not in column one' => - \&UMDP3::openmp_sentinels_in_column_one, - 'Omitted optional space in keywords' => \&UMDP3::unseparated_keywords, - 'GO TO other than 9999' => \&UMDP3::go_to_other_than_9999, - 'WRITE without format' => \&UMDP3::write_using_default_format, - 'Lowercase or CamelCase variable names only' => - \&UMDP3::lowercase_variable_names, - 'Use of dimension attribute' => \&UMDP3::dimension_forbidden, - 'Continuation lines shouldn\'t start with &' => - \&UMDP3::ampersand_continuation, - 'Use of EQUIVALENCE or PAUSE' => \&UMDP3::forbidden_keywords, - 'Use of older form of relational operator (.GT. etc.)' => - \&UMDP3::forbidden_operators, - 'Line longer than 80 characters' => \&UMDP3::line_over_80chars, - 'Line includes tab character' => \&UMDP3::tab_detection, - 'USEd printstatus_mod instead of umPrintMgr' => \&UMDP3::printstatus_mod, - 'Used PRINT rather than umMessage and umPrint' => \&UMDP3::printstar, - 'Used WRITE(6) rather than umMessage and umPrint' => \&UMDP3::write6, - 'Used um_fort_flush rather than umPrintFlush' => \&UMDP3::um_fort_flush, - 'Used Subversion keyword substitution which is prohibited' => - \&UMDP3::svn_keyword_subst, - 'Used !OMP instead of !$OMP' => \&UMDP3::omp_missing_dollar, - 'Used #ifdef or #ifndef rather than #if defined() or #if !defined()' => - \&UMDP3::cpp_ifdef, - 'Presence of fortran comment in CPP directive' => \&UMDP3::cpp_comment, - 'Used an archaic fortran intrinsic function' => - \&UMDP3::obsolescent_fortran_intrinsic, - 'EXIT statements should be labelled' => \&UMDP3::exit_stmt_label, - 'Intrinsic modules must be USEd with an INTRINSIC keyword specifier' => - \&UMDP3::intrinsic_modules, - 'READ statements should have an explicit UNIT= as their first argument' => - \&UMDP3::read_unit_args, -); - -my %dispatch_table_file_fortran = ( - 'Warning - used an if-def due for retirement' => \&UMDP3::retire_if_def, - 'File is missing at least one IMPLICIT NONE' => \&UMDP3::implicit_none, - 'Never use STOP or CALL abort' => \&UMDP3::forbidden_stop, - 'Use of Fortran function as a variable name' => - \&UMDP3::intrinsic_as_variable, - 'File missing crown copyright statement or agreement reference' => - \&UMDP3::check_crown_copyright, - 'File missing correct code owner comment' => \&UMDP3::check_code_owner, - 'Used (/ 1,2,3 /) form of array initialisation, rather than [1,2,3] form' - => \&UMDP3::array_init_form, -); - -my %dispatch_table_diff_c = ( - 'Line longer than 80 characters' => \&UMDP3::line_over_80chars, - 'Line includes tab character' => \&UMDP3::tab_detection, -'Fixed-width Integer format specifiers must have a space between themselves and the string delimiter (the " character)' - => \&UMDP3::c_integral_format_specifiers, -); - -my %dispatch_table_file_c = ( - 'Warning - used an if-def due for retirement' => \&UMDP3::retire_if_def, - 'Used a deprecated C identifier' => \&UMDP3::c_deprecated, - 'File missing crown copyright statement or agreement reference' => - \&UMDP3::check_crown_copyright, - 'File missing correct code owner comment' => \&UMDP3::check_code_owner, -'Used an _OPENMP if-def without also testing against SHUM_USE_C_OPENMP_VIA_THREAD_UTILS. (Or _OPENMP does not come first in the test.)' - => \&UMDP3::c_openmp_define_pair_thread_utils, -'Used an _OPENMP && SHUM_USE_C_OPENMP_VIA_THREAD_UTILS if-def test in a logical combination with a third macro' - => \&UMDP3::c_openmp_define_no_combine, - 'Used !defined(_OPENMP) rather than defined(_OPENMP) with #else branch' => - \&UMDP3::c_openmp_define_not, -'Used an omp #pragma (or #include ) without protecting it with an _OPENMP if-def' - => \&UMDP3::c_protect_omp_pragma, - 'Used the #ifdef style of if-def, rather than the #if defined() style' => - \&UMDP3::c_ifdef_defines, - 'C Unit does not end with a final newline character' => - \&UMDP3::c_final_newline, -); - -my %dispatch_table_file_all = - ( 'Line includes trailing whitespace character(s)' => - \&UMDP3::line_trail_whitespace, ); - -sub get_diff_dispatch_table_fortran { - return %dispatch_table_diff_fortran; -} - -sub get_file_dispatch_table_fortran { - my $modified_file = shift; - my %dispatch_table_file_fortran_custom = %dispatch_table_file_fortran; - - if ( $modified_file =~ /um_abort_mod.F90$/sxm ) { - delete( - $dispatch_table_file_fortran_custom{'Never use STOP or CALL abort'} - ); - } - - return %dispatch_table_file_fortran_custom; -} - -sub get_diff_dispatch_table_c { - return %dispatch_table_diff_c; -} - -sub get_file_dispatch_table_c { - return %dispatch_table_file_c; -} - -sub get_file_dispatch_table_all { - return %dispatch_table_file_all; -} - -1; diff --git a/script_umdp3_checker/bin/umdp3_check.pl b/script_umdp3_checker/bin/umdp3_check.pl deleted file mode 100755 index c1521a42..00000000 --- a/script_umdp3_checker/bin/umdp3_check.pl +++ /dev/null @@ -1,1194 +0,0 @@ -#!/usr/bin/env perl -# *****************************COPYRIGHT******************************* -# (C) Crown copyright Met Office. All rights reserved. -# For further details please refer to the file LICENSE -# which you should have received as part of this distribution. -# *****************************COPYRIGHT******************************* - -# Script to check whether a code change complies with UMDP 003 -# Basically the 'guts' of the UMDP3.pm class's perform_task method without -# the UTF-isms such as status objects and the OO stuff - -use strict; -use warnings; -use 5.010; -use Cwd 'abs_path'; -use threads; -use threads::shared; -use List::Util qw(max); -use File::MimeInfo::Magic; -use IO::ScalarArray; - -use IPC::Run qw(run); - -# Set the location of the UMDP3 package. -use FindBin; -use lib "$FindBin::Bin"; - -use UMDP3; -use UMDP3CriticPolicy; - -# This is a standalone version of the dispatch tables from UMDP3Job, generated -# by script automatically -use UMDP3DispatchTables; - -# Declare version - this is the last UM version this script was updated for: -our $VERSION = '13.5.0'; - -# Declare variables -my $fcm = '/etc/profile'; # File to source to access 'fcm' commands -my $exit = 0; # Exit code of this script == number of failing files -my %additions : shared; # Hash of added code -my %deletions; # Hash of deleted files -my $filename = ''; # Current filename being tested -my $snooze = 120; # Time to wait before retrying -my $max_snooze = 10; # Maximum number of retries before aborting - -# Shared variables for multi-threading -# Variables with the "shared" attribute become shared memory variables - -# i.e. they are accessable by all threads. -# Other variables are private to each thread. -my @branchls_threads; -my @add_keys_threads; -my @output_threads : shared; -my @exit_threads : shared; - -# Get argument from command line, or assume '.' if not defined -my $branch = shift // '.'; - -# Cope with UTF-style working copy syntax just in case -$branch =~ s/wc://sxm; - -# Read text file of whitelisted include files -my $whitelist_includes_file = shift; - -unless ( $whitelist_includes_file and -f $whitelist_includes_file ) { - die "Whitelist filename not provided.\n"; -} - -# Read in retired if-defs -my @includes = read_file($whitelist_includes_file); - -my $suite_mode = 0; -if ( $ENV{SOURCE_UM_MIRROR} ) { - print "Detected SOURCE_UM_MIRROR environment variable.\n"; - $branch = $ENV{SOURCE_UM_MIRROR}; - print "Redirecting branch to $branch\n"; - $suite_mode = 1; -} - -# Set up threads - -# Determin the number of threads to use. Default to 1 (i.e. run serially), but override this -# if the UMDP_CHECKER_THREADS environment variable is set. - -my $num_threads = 1; -if ( $ENV{UMDP_CHECKER_THREADS} ) { - $num_threads = $ENV{UMDP_CHECKER_THREADS}; - if ( $num_threads < 1 ) { - print - "UMDP_CHECKER_THREADS environment variable is invalid: overriding\n"; - $num_threads = 1; - } - print "Using $num_threads threads\n"; -} - -# Determine cylc logging -my $log_cylc = 0; -if ( $ENV{CYLC_TASK_LOG_ROOT} ) { - $log_cylc = $ENV{CYLC_TASK_LOG_ROOT}; - print "Using cylc logging directory: $log_cylc\n"; -} - -# Now we have the number of threads required, set up a threads array, with one entry -# for each thread. This will be used later to hold the control information for each -# thread in use. - -my @threads_array; - -for ( my $i = 1 ; $i <= $num_threads ; $i++ ) { - push( @threads_array, $i ); -} - -my %dispatch_table_diff_fortran = - UMDP3DispatchTables::get_diff_dispatch_table_fortran(); -my %dispatch_table_diff_c = UMDP3DispatchTables::get_diff_dispatch_table_c(); -my %dispatch_table_file_c = UMDP3DispatchTables::get_file_dispatch_table_c(); -my %dispatch_table_file_all = - UMDP3DispatchTables::get_file_dispatch_table_all(); - -my @binfo; -my $binfocode; - -my $trunkmode = 0; -my $error_trunk = 0; - -start_branch_checking: - -# Check this is a branch rather than the trunk -@binfo = `. $fcm; fcm binfo $branch 2>&1`; -$binfocode = $?; - -unless ( $binfocode == 0 ) { - if ( grep( /svn\sinfo\s--xml/sxm, @binfo ) ) { - if ($suite_mode) { - for ( my $i = 1 ; $i <= $max_snooze ; $i++ ) { - print -"Revision probably doesn't exist yet - waiting $snooze seconds for mirror to update (Snooze $i of $max_snooze).\n"; - sleep $snooze; - @binfo = `. $fcm; fcm binfo $branch 2>&1`; - $binfocode = $?; - last if ( $binfocode == 0 ); - } - } - } - if ( $binfocode != 0 ) { - print "Error running fcm binfo:\n"; - print @binfo; - die "FCM error"; - } -} - -if ( grep( /URL:\ssvn:\/\/[^\/]+\/(\w|\.)+_svn\/\w+\/trunk/sxm, @binfo ) - or grep( /URL:\shttps:\/\/[^\/]+\/svn\/[\w\.]+\/\w+\/trunk/sxm, @binfo ) - or grep( /URL:.*\/svn\/\w+\/main\/trunk/sxm, @binfo ) - or grep( /URL:..*_svn\/main\/trunk/sxm, @binfo ) - or grep( /URL:\sfile:\/\/.*\/trunk/sxm, @binfo ) ) -{ - print "Detected trunk: checking full source tree\n"; - $branch =~ s/@.*$//sxm; - $trunkmode = 1; - if ( $ENV{UMDP_CHECKER_TRUNK_ERROR} ) { - $error_trunk = $ENV{UMDP_CHECKER_TRUNK_ERROR}; - if ( $error_trunk == 1 ) { - print -"UMDP_CHECKER_TRUNK_ERROR environment variable is set to 1: failures will be fatal\n"; - } - elsif ( $error_trunk == -1 ) { - print -"UMDP_CHECKER_TRUNK_ERROR environment variable is set to -1: skipping UMPD3 checks for the trunk\n"; - exit 0; - } - else { - print -"UMDP_CHECKER_TRUNK_ERROR environment variable is set to $error_trunk: failures will be ignored\n"; - print -"Set this to 1 to cause the checker script to exit with an error code on finding failures.\n"; - print -"Alternatively, set this to -1 to cause the checker script to exit without checking the trunk.\n"; - } - } - else { - print "UMDP_CHECKER_TRUNK_ERROR environment variable is not present.\n"; - print -"Set this to 1 to cause the checker script to exit with an error code on finding failures.\n"; - print -"Alternatively, set this to -1 to cause the checker script to exit without checking the trunk.\n"; - } -} - -foreach my $line (@binfo) { - if ( $line =~ m{Branch\sParent:.*/trunk@.*}sxm ) { - last; - } - elsif ( $line =~ m/Branch\sParent:\s*(.*)/sxm ) { - print "This branch is a branch-of-branch - testing parent ($1)\n"; - $branch = $1; - goto start_branch_checking; - } -} - -my @info; - -# Get fcm info for branch -@info = `. $fcm; fcm info $branch 2>&1`; -$binfocode = $?; - -if ( $binfocode != 0 ) { - print "Error running fcm info:\n"; - print @info; - die "FCM error"; -} - -my $repository_branch_path; -my $repository_working_path; -my $repository_relative_path; - -foreach my $line (@binfo) { - if ( $line =~ /^URL:\s*(.*)/sxm ) { - $repository_branch_path = $1; - last; - } -} - -foreach my $line (@info) { - if ( $line =~ /^URL:\s*(.*)/sxm ) { - $repository_working_path = $1; - last; - } -} - -$repository_relative_path = $repository_working_path; -$repository_relative_path =~ s/$repository_branch_path//sxm; -$repository_relative_path =~ s/\n//sxm; - -# replace relative branch paths with absolute paths -if ( grep( /Working\sCopy\sRoot\sPath:/sxm, @info ) ) { - $branch = abs_path($branch); -} - -# trim trailing "/" -$branch =~ s{/$}{}sxm; - -print "Testing branch $branch\n"; - -if ( $trunkmode == 0 ) { - - if ($repository_relative_path) { - print -"\n[WARN] The relative path between the root of the branch and the script working path ($repository_relative_path) is not empty\n"; - print " - you are not running from the root of the branch\n\n"; - if ($suite_mode) { - die "Error - re-run from the root of the branch\n"; - } - } - - # Get the diff - my @diff = `. $fcm; fcm bdiff $branch 2>&1`; - my $diffcode = $?; - - # Check the bdiff worked correctly - unless ( $diffcode == 0 ) { - die "Error running 'fcm bdiff $branch':\n@diff\n"; - } - -# We will need to know empty and deleted files - use the bdiff summary to identify these. - my @summary = `. $fcm; fcm bdiff --summarise $branch 2>&1`; - $diffcode = $?; - - # Check the second bdiff worked correctly - unless ( $diffcode == 0 ) { - die "Error running 'fcm bdiff --summarise $branch':\n@summary\n"; - } - - foreach my $line (@summary) { - - # Reset captures to undefined with a trivial successful match. - "a" =~ /a/sxm; - - # Add hash entries for added or modified files: - # These are files which are newly added; or which add or remove lines. - $line =~ /^(A|M+)\s*(?\S+)$/sxm; - my $modified_file = $+{filename}; - if ($modified_file) { - - #normalise the path - $modified_file =~ s/$repository_working_path\///sxm; - $modified_file =~ s/.*trunk$repository_relative_path\///sxm; - - my @share_arr = []; - $additions{$modified_file} = share(@share_arr); - } - - # Reset captures to undefined with a trivial successful match. - "a" =~ /a/sxm; - - # Add has entries for deleted files - $line =~ /^D\s*(?\S+)$/sxm; - my $deleted_file = $+{filename}; - if ($deleted_file) { - - #normalise the path - $deleted_file =~ s/$repository_working_path\///sxm; - $deleted_file =~ s/.*trunk$repository_relative_path\///sxm; - $deletions{$deleted_file} = []; - } - } - - my $store_line = 0; - - # Store the lines added in a hash with the filename as the key, i.e. - # %additions = ( 'filename' => [ 'added line 1', 'added line 2'] ) - foreach my $line (@diff) { - - if ( $line =~ /^\+\+\+/sxm ) { - - # Find if the filename is in our additions hash, - # and set the subsequent lines to be stored if it is. - $line =~ /^\+\+\+\s+(?\S+)/sxm; - $filename = $+{filename}; - unless ( ( $branch eq "." ) || ( $filename eq $branch ) ) { - $filename =~ s/.*$branch\///sxm; - } - $store_line = exists( $additions{$filename} ); - - if ( $store_line == 0 ) { - - # if we don't recognise the file as deleted, - # or as marking an SVN property change, - # something has gone wrong. - if ( !exists( $deletions{$filename} ) ) { - if ( - !( - grep( /^Property\schanges\son:\s$filename$/sxm, - @diff ) - ) - ) - { - print "Something has failed parsing line '$line'\n"; - die -"Filename '$filename' is not contained in the output from fcm bdiff --summarise!\n"; - } - } - } - - } - elsif ( $line =~ /^\+/sxm ) { - if ($store_line) { - - # Add the diff to %additions hash - $line =~ s/^\+//sxm; - push @{ $additions{$filename} }, $line; - } - } - } - -} - -# The @external_checks array contains the names of all the non-UM repositories -# extracted by the UM which should also be checked. -my @external_checks = ( "shumlib", "meta", "ukca" ); -my %filepath_mapping = ( 'meta' => 'um_meta' ); -my @extracts = (); - -if ( $trunkmode == 0 ) { - if ($suite_mode) { - - # enable trunkmode for specific repositories if the environment does - # not match rose-stem/rose-suite.conf - - my $ss_env = $ENV{SCRIPT_SOURCE}; - my @suite_conf = cat_file( $ss_env . "/um/rose-stem/rose-suite.conf" ); - my @host_sources = grep /^HOST_SOURCE_.*=/, @suite_conf; - - print "Detected HOST_SOURCE variables:\n"; - print join( "", @host_sources ); - - foreach (@external_checks) { - my $repo = $_; - my $o_repo = $repo; - if ( exists $filepath_mapping{$repo} ) { - $repo = $filepath_mapping{$repo}; - } - my $host_var_name = "HOST_SOURCE_" . uc($repo); - my $env_var_res = $ENV{$host_var_name}; - if ( !grep /^$host_var_name=(\"|\')$env_var_res(\"|\')/, - @host_sources ) - { - print $host_var_name - . " modified in environment." - . " Running full check on this repository\n"; - push @extracts, $o_repo; - } - } - - } - - # enable trunkmode for specific repositories if rose-stem/rose-suite.conf - # is modified - if ( exists $additions{"rose-stem/rose-suite.conf"} ) { - print "rose-stem/rose-suite.conf modified:" - . " checking for external repository updates\n"; - my $added_lines_ref = $additions{"rose-stem/rose-suite.conf"}; - my @added_lines = @$added_lines_ref; - foreach (@external_checks) { - my $repo = $_; - my $o_repo = $repo; - if ( exists $filepath_mapping{$repo} ) { - $repo = $filepath_mapping{$repo}; - } - my $host_var_name = "HOST_SOURCE_" . uc($repo); - if ( grep /^$host_var_name=/, @added_lines ) { - print $host_var_name - . " modified in rose-suite.conf." - . " Running full check on this repository\n"; - push @extracts, $o_repo; - } - } - } - - # remove any duplicates - my %unique_extracts = map { $_ => 1 } @extracts; - @extracts = keys %unique_extracts; - - # If we captured any changes, enable trunk-mode for those repositories. - if ( scalar(@extracts) > 0 ) { - $trunkmode = 1; - $error_trunk = 1; - unshift @extracts, ""; - } -} -else { - @extracts = ( "", "um" ); - push @extracts, @external_checks; -} - -if ( $trunkmode == 1 ) { - - #trunk mode: cat all the source files to %additions - - my @branchls; - my $returncode; - - if ($suite_mode) { - - # If we are in suite mode, we need to generate the ls from the extracted - # sources, not from FCM. - - my $ss_env = $ENV{SCRIPT_SOURCE}; - my $extracts_path = join( " $ss_env/", @extracts ); - - print "Using extracted source from path(s) : $extracts_path\n"; - - my @exract_source = - `find $extracts_path -type f -exec readlink -f {} \\; 2>&1`; - $returncode = $?; - - if ( $returncode != 0 ) { - die "Error running 'find $extracts_path':\n@exract_source\n"; - } - - my $cs_env = $ENV{CYLC_SUITE_SHARE_DIR}; - - $cs_env = `readlink -f $cs_env`; - chomp $cs_env; - - my @script_source = -`find $cs_env/imported_github_scripts -type f -not -ipath "*/.git/*" -exec readlink -f {} \\; 2>&1`; - $returncode = $?; - - if ( $returncode != 0 ) { - die -"Error running 'find $cs_env/imported_github_scripts':\n@script_source\n"; - } - - push( @branchls, @exract_source ); - push( @branchls, @script_source ); - - # convert the realtive paths to be relative to the extract location - - if ( $#exract_source >= 0 ) { - $repository_working_path = $exract_source[0]; - } - else { - $repository_working_path = "[ ]"; - } - - $repository_working_path =~ s{/um/.*$}{}sxm; - $repository_working_path = - "(" . $cs_env . "|" . $repository_working_path . ")"; - $repository_relative_path = ""; - - } - else { - - @branchls = `. $fcm; fcm ls -R $branch 2>&1`; - $returncode = $?; - - unless ( $returncode == 0 ) { - die "Error running ' fcm ls -R $branch':\n@branchls\n"; - } - - } - - # check there are some files availible to test! - unless ( $#branchls >= 0 ) { - die "Error: no files in $branch\n"; - } - - # because the work done by each thread will be unbalanced, - # we should take a guided approach - therefore split into - # multiple branchls blocks - - # reduce the number of threads if are too few files - # (prevent empty threads) - if ( $#branchls < $num_threads - 1 ) { - $num_threads = $#branchls + 1; - } - -# Set up the size of the chunk of work each thread will do. -# Each thread should process at least one key (i.e. at least one element of -# the array @branchls, which in turn are used as the keys of the hash %additions) -# However, we also want to balance the work. -# We will start with a chunk size equivalent to a third of the keys divided -# equally across the threads, then progrssively re-use each thread with smaller -# chunks until the entire work pool has been exhausted. - - my $thread_branchls_len; - - $thread_branchls_len = max( 1, ( $#branchls + 1 ) / ( 3 * $num_threads ) ); - - # fork the threads to execute trunk_files_parse - for ( my $i = 0 ; $i < $num_threads ; $i++ ) { - $branchls_threads[$i] = []; - -# Store the work (in this case the list of files to process) for the i'th thread, -# by taking a chunk from the original branchls array. The chunk will be of size -# thread_branchls_len. - push @{ $branchls_threads[$i] }, splice @branchls, - $#branchls - $thread_branchls_len + 1; - - # fork the thread - # This will create a new thread which will execute the trunk_files_parse sub. - # Its thread id will be stored in the threads array. - $threads_array[$i] = threads->create( \&trunk_files_parse, $i ); - } - - my @th_l; - - # add the currently running threads to the list - @th_l = threads->list(threads::running); - - # add the threads which have run work, but have already finished it. - push @th_l, threads->list(threads::joinable); - - # re-join (and possibly re-fork) all the threads. - # By doing this we will recycle all the threads until the entire work - # pool is executed and completed. - while ( $#branchls >= 0 or $#th_l >= 0 ) { - for ( my $i = 0 ; $i < $num_threads ; $i++ ) { - - # Check if any of the threads in out list is done with its work chunk. - # If it is, we can re-join it, then recycle it by issuing a new work chunk. - if ( $threads_array[$i]->is_joinable() ) { - my $return_code = $threads_array[$i]->join(); - if ( !defined $return_code ) { - print "thread ", $threads_array[$i]->tid(), - ": terminated abnormally [A]\n"; - $exit += 1; - $error_trunk = 1; - } - - # Calculate a new work chunk. - # This chunk size will get progressivly smaller as the work pool is exhausted. - $thread_branchls_len = - max( 1, ( $#branchls + 1 ) / ( 3 * $num_threads ) ); - if ( $#branchls >= 0 ) { - $branchls_threads[$i] = []; - - # Give the thread a new chunk of work - if ( $thread_branchls_len > $#branchls + 1 ) { - push @{ $branchls_threads[$i] }, splice @branchls, 0; - } - else { - push @{ $branchls_threads[$i] }, splice @branchls, - $#branchls - $thread_branchls_len + 1; - } - $threads_array[$i] = - threads->create( \&trunk_files_parse, $i ); - } - } - } - - # Update the list of threads. - @th_l = threads->list(threads::running); - push @th_l, threads->list(threads::joinable); - } - - # By this point we have allocated all the work pool to the threads. - # Check all threads are re-joined - this will finalise the threads, - # and will block execution for any threads still processing work - # until they have completed it. - - foreach my $thread ( threads->list() ) { - my $return_code = $thread->join(); - if ( !defined $return_code ) { - print "thread ", $thread->tid(), ": terminated abnormally [B]\n"; - $exit += 1; - $error_trunk = 1; - } - } - -} - -# Set up the error message string to empty -my $message = ''; - -# set up known includes whitelist -my %includes_hash; -@includes_hash{@includes} = (); - -my @add_keys = keys %additions; - -# only run checks if there is at least one file to check -if ( $#add_keys >= 0 ) { - - # reduce the number of threads if are too few keys - # (prevent empty threads) - if ( $#add_keys < $num_threads - 1 ) { - $num_threads = $#add_keys + 1; - } - - # Set up the size of the chunk of work each thread will do. - # Each thread should process at least one key (i.e. at least one element of - # the array @add_keys, which in turn are the keys of the hash %additions) - # However, we also want to balance the work. - # We will start with a chunk size equivalent to a third of the keys divided - # equally across the threads, then progrssively re-use each thread with smaller - # chunks until the entire work pool has been exhausted. - - my $thread_add_keys_len; - - $thread_add_keys_len = max( 1, ( $#add_keys + 1 ) / ( 3 * $num_threads ) ); - - # fork the threads to execute run_checks - for ( my $i = 0 ; $i < $num_threads ; $i++ ) { - - # Initialise a shared memory space to store the output from each thread. - # This is shared so the main thread will be able to retrieve the output. - my @share_arr = []; - $output_threads[$i] = share(@share_arr); - - $add_keys_threads[$i] = []; - -# Store the work (in this case the list of added keys to process) for the i'th thread, -# by taking a chunk from the original add_keys array. The chunk will be of size -# thread_add_keys_len. - push @{ $add_keys_threads[$i] }, splice @add_keys, - $#add_keys - $thread_add_keys_len + 1; - - $exit_threads[$i] = 0; - - # fork the thread - # This will create a new thread which will execute the run_checks sub. - # Its thread id will be stored in the threads array. - $threads_array[$i] = threads->create( \&run_checks, $i ); - } - - # Create a list of threads - th_l - which contains those threads that are - # currently doing, or have done, some work. These are the threads which - # we will have to untimately finalise, possibly after waiting for them to - # complete. - - my @th_l; - - # add the currently running threads to the list - @th_l = threads->list(threads::running); - - # add the threads which have run work, but have already finished it. - push @th_l, threads->list(threads::joinable); - - # re-join (and possibly re-fork) all the threads. - # By doing this we will recycle all the threads until the entire work - # pool is executed and completed. - while ( $#add_keys >= 0 and $#th_l >= 0 ) { - for ( my $i = 0 ; $i < $num_threads ; $i++ ) { - - # Check if any of the threads in out list is done with its work chunk. - # If it is, we can re-join it, then recycle it by issuing a new work chunk. - if ( $threads_array[$i]->is_joinable() ) { - my $return_code = $threads_array[$i]->join(); - if ( !defined $return_code ) { - print "thread ", $threads_array[$i]->tid(), - ": terminated abnormally [C]\n"; - $exit += 1; - $error_trunk = 1; - } - - # Calculate a new work chunk. - # This chunk size will get progressivly smaller as the work pool is exhausted. - $thread_add_keys_len = - max( 1, ( $#add_keys + 1 ) / ( 3 * $num_threads ) ); - $exit += $exit_threads[$i]; - $exit_threads[$i] = 0; - if ( $#add_keys >= 0 ) { - $add_keys_threads[$i] = []; - - # Give the thread a new chunk of work - if ( $thread_add_keys_len > $#add_keys + 1 ) { - push @{ $add_keys_threads[$i] }, splice @add_keys, 0; - } - else { - push @{ $add_keys_threads[$i] }, splice @add_keys, - $#add_keys - $thread_add_keys_len + 1; - } - $threads_array[$i] = threads->create( \&run_checks, $i ); - } - } - } - - # Update the list of threads. - @th_l = threads->list(threads::running); - push @th_l, threads->list(threads::joinable); - } - - # By this point we have allocated all the work pool to the threads. - # Check all threads are re-joined - this will finalise the threads, - # and will block execution for any threads still processing work - # until they have completed it. - - foreach my $thread ( threads->list() ) { - my $return_code = $thread->join(); - if ( !defined $return_code ) { - print "thread ", $thread->tid(), ": terminated abnormally [D]\n"; - $exit += 1; - $error_trunk = 1; - } - } - - # Include any previously uncounted failures. - for ( my $i = 0 ; $i < $num_threads ; $i++ ) { - $exit += $exit_threads[$i]; - } - - if ( $exit > 0 ) { - - # This section prints failure messages for each file after each file is tested - print "The following files have failed the UMDP3 compliance tests:\n"; - for ( my $i = 0 ; $i < $num_threads ; $i++ ) { - - # Print the output from each thread in turn. - print @{ $output_threads[$i] }; - } - } - -} - -# Print message for a success if no files have failed -if ( $exit == 0 ) { - print "No modified files appear to have failed the compliance tests\n"; -} -else { - print "\n[ERROR] There were a total of $exit compliance tests failures\n"; -} - -# Exit with an exit code dependant on the options chosen. -if ( ( $error_trunk == 1 ) or ( $trunkmode == 0 ) ) { - - # Exit with number of fails, if it's zero (a UNIX success) it passed - exit( $exit > 0 ); -} -else { - # We are in trunkmode but error_trunk is not set: exit with success - exit 0; -} - -############################### SUBROUTINES ################################### - -sub trunk_files_parse { - - foreach my $line ( @{ $branchls_threads[ $_[0] ] } ) { - - #strip newline character - $line =~ s/\R$//sxm; - - # ignore non-source files - if ( $line !~ /\/$/sxm ) { - - # Add hash entries for added or modified files: - my $modified_file = $line; - - #normalise the path - $modified_file =~ s/$repository_working_path\///sxm; - $modified_file =~ s/.*trunk$repository_relative_path\///sxm; - - my @share_arr = []; - $additions{$modified_file} = share(@share_arr); - - my $file_url; - - if ($suite_mode) { - $file_url = $line; - } - else { - $file_url = "$branch/$modified_file"; - } - - my @file_lines = cat_file($file_url); - - # Store the lines added in a hash with the filename as the key, i.e. - # %additions = ( 'filename' => [ 'added line 1', 'added line 2'] ) - push @{ $additions{$modified_file} }, @file_lines; - } - } - - # empty return is important for thread return code checking - return 0; -} - -sub run_checks { - - # Loop over modified files - - foreach my $modified_file ( @{ $add_keys_threads[ $_[0] ] } ) { - - # Initialise variables - my $failed = 0; - my @failed_tests; - my $is_c_file = 0; - my $is_fortran_include_file = 0; - - # If it's an include file, fail unless it's on the include whitelist - # (e.g. its a C header or a Fortran include for reducing code duplication). - if ( $modified_file =~ /\.h$/sxm ) { - - if ( exists( $includes_hash{$modified_file} ) ) { - my @components = split( "/", $modified_file ); - if ( $components[0] =~ /src/sxm - and $components[-2] =~ /include/sxm - and not $components[1] =~ /include/sxm ) - { - $is_fortran_include_file = 1; - } - elsif ( $components[0] =~ /src/sxm - and $components[1] =~ /include/sxm ) - { - $is_c_file = 1; - } - else { - push @failed_tests, -"Added an include file outside of a recognised 'include' directory"; - } - } - else { - push @failed_tests, -"Modified or created non-whitelisted include file rather than using a module"; - $failed++; - } - } - - if ( $modified_file =~ /\.c$/sxm ) { - $is_c_file = 1; - } - - # if it's Fortran or C apply all the tests - if ( $modified_file =~ /\.F90$/sxm - or $modified_file =~ /\.f90$/sxm - or $is_c_file - or $is_fortran_include_file ) - { - - my $dispatch_table_diff; - my $dispatch_table_file; - - if ($is_c_file) { - $dispatch_table_diff = \%dispatch_table_diff_c; - $dispatch_table_file = \%dispatch_table_file_c; - } - else { - $dispatch_table_diff = \%dispatch_table_diff_fortran; - my %dispatch_table_file_fortran = - UMDP3DispatchTables::get_file_dispatch_table_fortran( - $modified_file); - $dispatch_table_file = \%dispatch_table_file_fortran; - } - - # Get the diff for this file out of the hash - my $added_lines_ref = $additions{$modified_file}; - my @added_lines = @$added_lines_ref; - - # Loop over each test which works on a diff - foreach my $testname ( keys %$dispatch_table_diff ) { - UMDP3::reset_extra_error_information(); - - # Get the subroutine reference from the tables at the top of this file - my $subroutine_ref = ${$dispatch_table_diff}{$testname}; - - # Run the test - my $answer = &$subroutine_ref(@added_lines); - - my %extra_error = UMDP3::get_extra_error_information(); - if ( scalar keys %extra_error > 0 ) { - my @extra_error = keys %extra_error; - my $extra_text = join( ", ", @extra_error ); - $testname .= ": $extra_text"; - } - - # If the test fails, increase the number of failures and add the testname - # to the array containing the list of problems with this file - if ($answer) { - $failed++; - push @failed_tests, $testname; - } - } - - # Get the whole file contents - # (if we are in trunk mode, @added_lines is already the full file) - my @file_lines; - - if ( $trunkmode == 1 ) { - @file_lines = @added_lines; - } - else { - # Analyse the command line argument to work out how to access the whole file - my $file_url; - my $url_revision; - my $short_branch = $branch; - if ( $short_branch =~ /@/sxm ) { - $short_branch =~ s/(@.*)//sxm; - $url_revision = $1; - } - - # The $url_revision variable is only present if the URL is a branch - if ($url_revision) { - $file_url = "$short_branch/$modified_file$url_revision"; - } - else { - $file_url = "$short_branch/$modified_file"; - } - - @file_lines = cat_file($file_url); - } - - # Perform each test which checks the whole file in a similar method to - # tests which work on a diff - foreach my $testname ( keys %$dispatch_table_file ) { - UMDP3::reset_extra_error_information(); - my $subroutine_ref = ${$dispatch_table_file}{$testname}; - my $answer = &$subroutine_ref(@file_lines); - my %extra_error = UMDP3::get_extra_error_information(); - if ( scalar keys %extra_error > 0 ) { - my @extra_error = keys %extra_error; - my $extra_text = join( ", ", @extra_error ); - $testname .= ": $extra_text"; - } - if ($answer) { - $failed++; - push @failed_tests, $testname; - } - } - - # Perform universal tests - foreach my $testname ( keys %dispatch_table_file_all ) { - UMDP3::reset_extra_error_information(); - my $subroutine_ref = $dispatch_table_file_all{$testname}; - my $answer = &$subroutine_ref(@file_lines); - my %extra_error = UMDP3::get_extra_error_information(); - if ( scalar keys %extra_error > 0 ) { - my @extra_error = keys %extra_error; - my $extra_text = join( ", ", @extra_error ); - $testname .= ": $extra_text"; - } - if ($answer) { - $failed++; - push @failed_tests, $testname; - } - } - - # end Filename matches F90/f90/c - } - else { - # Get the whole file contents - # (if we are in trunk mode, @added_lines is already the full file) - my @file_lines; - - # Get the diff for this file out of the hash - my $added_lines_ref = $additions{$modified_file}; - my @added_lines = @$added_lines_ref; - - if ( $trunkmode == 1 ) { - @file_lines = @added_lines; - } - else { - # Analyse the command line argument to work out how to access the whole file - my $file_url; - my $url_revision; - my $short_branch = $branch; - if ( $short_branch =~ /@/sxm ) { - $short_branch =~ s/(@.*)//sxm; - $url_revision = $1; - } - - # The $url_revision variable is only present if the URL is a branch - if ($url_revision) { - $file_url = "$short_branch/$modified_file$url_revision"; - } - else { - $file_url = "$short_branch/$modified_file"; - } - - @file_lines = cat_file($file_url); - } - - # read in data from file to $data, then - my $io_array = IO::ScalarArray->new(\@file_lines); - my $mimetype = mimetype($io_array); - - # if we can't detect a mime type, try some tricks to aid detection - if ( $mimetype =~ /text\/plain/sxm ) { - my @mime_file_lines = grep !/^\s*\#/sxm, @file_lines; - $io_array = IO::ScalarArray->new(\@mime_file_lines); - $mimetype = mimetype($io_array); - } - - # The binary files array contains all the binary mime types - # present in the UM. - my @binary_files = ( - 'application/x-tar', 'application/octet-stream', - 'image/gif', 'image/png', - ); - - # Exclude binary formats from universal tests - if ( !( $mimetype ~~ @binary_files ) ) { - - # Perform universal tests - foreach my $testname ( keys %dispatch_table_file_all ) { - UMDP3::reset_extra_error_information(); - my $subroutine_ref = $dispatch_table_file_all{$testname}; - my $answer = &$subroutine_ref(@file_lines); - my %extra_error = UMDP3::get_extra_error_information(); - if ( scalar keys %extra_error > 0 ) { - my @extra_error = keys %extra_error; - my $extra_text = join( ", ", @extra_error ); - $testname .= ": $extra_text"; - } - if ($answer) { - $failed++; - push @failed_tests, $testname; - } - } - } - - my $is_python = 0; - my $is_perl = 0; - my $is_shell = 0; - - if ( $mimetype =~ /text\/x-python/sxm - or $modified_file =~ /\.py$/sxm ) - { - $is_python = 1; - } - - if ( $mimetype =~ /application\/x-shellscript/sxm ) { - $is_shell = 1; - } - - if ( $mimetype =~ /application\/x-perl/sxm - or $modified_file =~ /\.pl$/sxm - or $modified_file =~ /\.pm$/sxm ) - { - $is_perl = 1; - } - - if ($is_python) { - my $in = join "", @file_lines; - my $out = ""; - my $err = ""; - - my $shellcheck = run [ 'pycodestyle', '-' ], \$in, \$out, \$err; - - if ( !$shellcheck ) { - $failed++; - my $shellcheck_fails = $out . $err; - $shellcheck_fails =~ s{\n?\n}{\n }sxmg; - $shellcheck_fails =~ s/stdin:/line /sxmg; - push @failed_tests, $shellcheck_fails; - } - } - - if ($is_perl) { - my $critic = UMDP3CriticPolicy::get_umdp3_critic_policy(); - my $in = join "", @file_lines; - my @violations = $critic->critique( \$in ); - - if (@violations) { - $failed++; - my $testname = join " ", @violations; - push @failed_tests, $testname; - } - } - - if ($is_shell) { - my $in = join "", @file_lines; - my $out = ""; - my $err = ""; - - my $shellcheck = run [ 'shellcheck', '-' ], \$in, \$out, \$err; - - if ( !$shellcheck ) { - $failed++; - my $shellcheck_fails = $out . $err; - $shellcheck_fails =~ s{\n?\n}{\n }sxmg; - $shellcheck_fails =~ s/\s\sIn\s-\s/ /sxmg; - push @failed_tests, $shellcheck_fails; - } - } - - } - - # If any tests failed, print the failure message - if ( $failed > 0 ) { - my $failure_text = join( "\n ", @failed_tests ); - - # The space before the colon makes the filename easier to cut and paste - $message .= "File $modified_file :\n $failure_text\n"; - push @{ $output_threads[ $_[0] ] }, $message; - $exit_threads[ $_[0] ] += $failed; - if ($log_cylc) { - my $filename = $modified_file; - $filename =~ s/\//+/sxmg; - if ( index( $filename, "." ) != -1 ) { - $filename .= "_"; - } - else { - $filename .= "."; - } - $filename = $log_cylc . "." . $filename . "report"; - my $fileres = open( my $fh, '>', $filename ); - if ( !defined $fileres ) { - die "ERR: $filename\n"; - } - print $fh $failure_text; - close($fh); - } - } - $message = ''; - - } # Loop over files - - # empty return is important for thread return code checking - return 0; -} - -# Cat a file, either from fcm (if the URL contains a colon) or from disk -sub cat_file { - my $url = shift; - my @lines; - my $error = 0; - - # If the URL contains a colon treat it as an fcm, else treat as a regular file - if ( $url =~ /:/sxm ) { - @lines = `. $fcm; fcm cat $url 2>&1`; - $error = $?; - } - else { - @lines = `cat $url 2>&1`; - $error = $?; - } - - # If there is an error, check if this is not due to the 'node kind' - # being innappropriate - if ( $error != 0 ) { - @lines = `. $fcm; fcm info $url 2>&1`; - if ( $? == 0 ) { - if ( ( join "\n", @lines ) !~ /Node\sKind:\sfile/sxmgi ) { - @lines = (''); - $error = 0; - } - } - } - - if ($error) { - die "Error cating file $url\n"; - } - - return @lines; -} - -sub read_file { - my $file = shift; - open( my $fh, '<', $file ) or die "Cannot read $file: $!\n"; - chomp( my @lines = <$fh> ); - close $fh; - return @lines; -} diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py new file mode 100644 index 00000000..7862188c --- /dev/null +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -0,0 +1,99 @@ +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file LICENSE +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* + +""" +Standalone version of the dispatch tables from UMDP3Job +Python translation of the original Perl module +""" +""" +ToDo : This list was checked to ensure it had something for each + test in the original. +""" +from typing import Dict, Callable +from umdp3_checker_rules import UMDP3Checker + +# Declare version +VERSION = "13.5.0" + + +class CheckerDispatchTables: + """Class containing dispatch tables for UMDP3 tests""" + + def __init__(self): + self.umdp3_checker = UMDP3Checker() + + def get_diff_dispatch_table_fortran(self) -> Dict[str, Callable]: + """Get dispatch table for Fortran diff tests""" + return { + # 'Captain Daves doomed test of destruction': self.umdp3_checker.capitulated_keywords, + "Lowercase Fortran keywords not permitted": self.umdp3_checker.capitalised_keywords, + "OpenMP sentinels not in column one": self.umdp3_checker.openmp_sentinels_in_column_one, + "Omitted optional space in keywords": self.umdp3_checker.unseparated_keywords, + "GO TO other than 9999": self.umdp3_checker.go_to_other_than_9999, + "WRITE without format": self.umdp3_checker.write_using_default_format, + "Lowercase or CamelCase variable names only": self.umdp3_checker.lowercase_variable_names, + "Use of dimension attribute": self.umdp3_checker.dimension_forbidden, + "Continuation lines shouldn't start with &": self.umdp3_checker.ampersand_continuation, + "Use of EQUIVALENCE or PAUSE": self.umdp3_checker.forbidden_keywords, + "Use of older form of relational operator (.GT. etc.)": self.umdp3_checker.forbidden_operators, + "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, + "Line includes tab character": self.umdp3_checker.tab_detection, + "USEd printstatus_mod instead of umPrintMgr": self.umdp3_checker.printstatus_mod, + "Used PRINT rather than umMessage and umPrint": self.umdp3_checker.printstar, + "Used WRITE(6) rather than umMessage and umPrint": self.umdp3_checker.write6, + "Used um_fort_flush rather than umPrintFlush": self.umdp3_checker.um_fort_flush, + "Used Subversion keyword substitution which is prohibited": self.umdp3_checker.svn_keyword_subst, + "Used !OMP instead of !$OMP": self.umdp3_checker.omp_missing_dollar, + "Used #ifdef or #ifndef rather than #if defined() or #if !defined()": self.umdp3_checker.cpp_ifdef, + "Presence of fortran comment in CPP directive": self.umdp3_checker.cpp_comment, + "Used an archaic fortran intrinsic function": self.umdp3_checker.obsolescent_fortran_intrinsic, + "EXIT statements should be labelled": self.umdp3_checker.exit_stmt_label, + "Intrinsic modules must be USEd with an INTRINSIC keyword specifier": self.umdp3_checker.intrinsic_modules, + "READ statements should have an explicit UNIT= as their first argument": self.umdp3_checker.read_unit_args, + } + + def get_file_dispatch_table_fortran( + self, filename: str = "" + ) -> Dict[str, Callable]: + """Get dispatch table for Fortran file tests""" + return { + "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, + "File is missing at least one IMPLICIT NONE": self.umdp3_checker.implicit_none, + "Never use STOP or CALL abort": self.umdp3_checker.forbidden_stop, + "Use of Fortran function as a variable name": self.umdp3_checker.intrinsic_as_variable, + "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, + "File missing correct code owner comment": self.umdp3_checker.check_code_owner, + "Used (/ 1,2,3 /) form of array initialisation, rather than [1,2,3] form": self.umdp3_checker.array_init_form, + } + + def get_diff_dispatch_table_c(self) -> Dict[str, Callable]: + """Get dispatch table for C diff tests""" + return { + "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, + "Line includes tab character": self.umdp3_checker.tab_detection, + 'Fixed-width Integer format specifiers must have a space between themselves and the string delimiter (the " character)': self.umdp3_checker.c_integral_format_specifiers, + } + + def get_file_dispatch_table_c(self) -> Dict[str, Callable]: + """Get dispatch table for C file tests""" + return { + "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, + "Used a deprecated C identifier": self.umdp3_checker.c_deprecated, + "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, + "File missing correct code owner comment": self.umdp3_checker.check_code_owner, + "Used an _OPENMP if-def without also testing against SHUM_USE_C_OPENMP_VIA_THREAD_UTILS. (Or _OPENMP does not come first in the test.)": self.umdp3_checker.c_openmp_define_pair_thread_utils, + "Used an _OPENMP && SHUM_USE_C_OPENMP_VIA_THREAD_UTILS if-def test in a logical combination with a third macro": self.umdp3_checker.c_openmp_define_no_combine, + "Used !defined(_OPENMP) rather than defined(_OPENMP) with #else branch": self.umdp3_checker.c_openmp_define_not, + "Used an omp #pragma (or #include ) without protecting it with an _OPENMP if-def": self.umdp3_checker.c_protect_omp_pragma, + "Used the #ifdef style of if-def, rather than the #if defined() style": self.umdp3_checker.c_ifdef_defines, + "C Unit does not end with a final newline character": self.umdp3_checker.c_final_newline, + } + + def get_file_dispatch_table_all(self) -> Dict[str, Callable]: + """Get dispatch table for universal file tests""" + return { + "Line includes trailing whitespace character(s)": self.umdp3_checker.line_trail_whitespace, + } diff --git a/script_umdp3_checker/file/whitelist_includes.txt b/script_umdp3_checker/file/whitelist_includes.txt deleted file mode 100644 index 7cf7d4c5..00000000 --- a/script_umdp3_checker/file/whitelist_includes.txt +++ /dev/null @@ -1,82 +0,0 @@ -src/atmosphere/atmosphere_service/include/qsat_mod_qsat.h -src/atmosphere/dynamics_solver/include/eg_calc_ax.h -src/atmosphere/dynamics_solver/include/eg_inner_prod.h -src/atmosphere/dynamics_solver/include/gmres1.h -src/atmosphere/dynamics_solver/include/tri_sor_vl.h -src/atmosphere/dynamics_solver/include/tri_sor.h -src/atmosphere/free_tracers/include/wtrac_all_phase_chg.h -src/atmosphere/free_tracers/include/wtrac_calc_ratio_fn.h -src/atmosphere/free_tracers/include/wtrac_move_phase.h -src/control/mpp/include/extended_halo_exchange_mod_swap_bounds_ext.h -src/control/mpp/include/tools_halo_exchange_mod_extended_copy_ns_halo_to_buffer.h -src/control/mpp/include/tools_halo_exchange_mod_extended_copy_ew_halo_to_buffer.h -src/control/mpp/include/tools_halo_exchange_mod_extended_copy_buffer_to_ns_halo.h -src/control/mpp/include/tools_halo_exchange_mod_extended_copy_buffer_to_ew_halo.h -src/control/mpp/include/halo_exchange_os_mod_swap_bounds_osput_nsew_wa.h -src/control/mpp/include/halo_exchange_os_mod_begin_swap_bounds_osput_nsew_wa.h -src/control/mpp/include/halo_exchange_os_mod_end_swap_bounds_osput_nsew_wa.h -src/control/mpp/include/non_blocking_halo_exchange_mod_begin_swap_bounds_hub.h -src/control/mpp/include/non_blocking_halo_exchange_mod_end_swap_bounds_hub.h -src/control/mpp/include/halo_exchange_mod_swap_bounds_hub_4D_to_3D.h -src/control/mpp/include/halo_exchange_mod_swap_bounds_hub_2D.h -src/control/mpp/include/halo_exchange_mod_swap_bounds_hub.h -src/control/mpp/include/halo_exchange_mod_swap_bounds_RB_hub.h -src/control/mpp/include/halo_exchange_mpi_mod_begin_swap_bounds_mpi_aao.h -src/control/mpp/include/halo_exchange_mpi_mod_begin_swap_bounds_mpi_nsew_wa.h -src/control/mpp/include/halo_exchange_mpi_mod_end_swap_bounds_mpi_aao.h -src/control/mpp/include/halo_exchange_mpi_mod_end_swap_bounds_mpi_nsew_wa.h -src/control/mpp/include/halo_exchange_mpi_mod_swap_bounds_mpi_aao.h -src/control/mpp/include/halo_exchange_mpi_mod_swap_bounds_mpi_nsew_wa.h -src/control/mpp/include/halo_exchange_mpi_mod_swap_bounds_mpi_rb.h -src/control/mpp/include/fill_external_halos.h -src/control/mpp/include/halo_exchange_ddt_mod_begin_swap_bounds_ddt_nsew_wa.h -src/control/mpp/include/halo_exchange_ddt_mod_end_swap_bounds_ddt_nsew_wa.h -src/control/mpp/include/halo_exchange_ddt_mod_swap_bounds_ddt_nsew_wa.h -src/control/mpp/include/halo_exchange_ddt_mod_begin_swap_bounds_ddt_aao.h -src/control/mpp/include/halo_exchange_ddt_mod_end_swap_bounds_ddt_aao.h -src/control/mpp/include/halo_exchange_ddt_mod_swap_bounds_ddt_aao.h -src/control/mpp/include/tools_halo_exchange_mod_copy_edge_to_ns_halo.h -src/control/mpp/include/tools_halo_exchange_mod_copy_ns_halo_single.h -src/control/mpp/include/tools_halo_exchange_mod_copy_buffer_to_cr_halo.h -src/control/mpp/include/tools_halo_exchange_mod_copy_buffer_to_ns_halo.h -src/control/mpp/include/tools_halo_exchange_mod_copy_buffer_to_ew_halo.h -src/control/mpp/include/tools_halo_exchange_mod_copy_cr_halo_to_buffer.h -src/control/mpp/include/tools_halo_exchange_mod_copy_ns_halo_to_buffer.h -src/control/mpp/include/tools_halo_exchange_mod_copy_ew_halo_to_buffer.h -src/include/other/c_memprof_routines.h -src/include/other/c_pio_timer.h -src/include/other/c_io_errcodes.h -src/include/other/c_io_lustreapi.h -src/include/other/c_io_libc.h -src/include/other/c_io_nextlayer.h -src/include/other/pio_umprint.h -src/include/other/c_io_internal.h -src/include/other/c_fort2c_prototypes.h -src/include/other/c_io.h -src/include/other/c_io_unix.h -src/include/other/c_portio.h -src/include/other/c_io_timing.h -src/include/other/c_io_rbuffering.h -src/include/other/read_wgdos_header.h -src/include/other/c_io_trace.h -src/include/other/c_io_byteswap.h -src/include/other/c_io_wbuffering.h -src/include/other/c_io_layers.h -src/include/other/exceptions-generic.h -src/include/other/exceptions-libunwind.h -src/include/other/exceptions.h -src/include/other/exceptions-ibm.h -src/include/other/exceptions-linux.h -src/include/other/sstpert.h -src/include/other/wafccb.h -src/include/other/um_compile_diag_suspend.h -src/include/other/c_io_blackhole.h -src/include/other/c_io_throttle.h -src/include/other/portutils.h -src/include/other/portio_api.h -src/include/other/io_timing_interfaces.h -src/atmosphere/large_scale_precipitation/include/lsp_moments.h -src/atmosphere/large_scale_precipitation/include/lsp_subgrid_lsp_qclear.h -src/include/other/c_io_lustreapi_pool.h -src/include/other/c_lustre_control.h -src/atmosphere/boundary_layer/include/buoy_tq.h diff --git a/script_umdp3_checker/fortran_keywords.py b/script_umdp3_checker/fortran_keywords.py new file mode 100644 index 00000000..06a92945 --- /dev/null +++ b/script_umdp3_checker/fortran_keywords.py @@ -0,0 +1,691 @@ +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file LICENSE +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* + +""" +List of the Fortran Keywords for the UMDP3 checker to check against. +Hopefully complete, but may need to be updated from time to time. +These have been 'ordered' in terms of frequency of occurrence within the UM codebase in order to improve efficiency. +ToDo: Current order may not be perfect, and could possibly be reviewed. However, it is probably 'good enough' for now. +""" + +fortran_keywords = ( + "IF", + "END", + "DO", + "CALL", + "THEN", + "USE", + "INTEGER", + "PARAMETER", + "ELSE", + "SUBROUTINE", + "IMPLICIT", + "NONE", + ".AND.", + "REAL", + "MODULE", + ".OR.", + "LOGICAL", + ".FALSE.", + "CASE", + "ALLOCATABLE", + "RETURN", + "PRIVATE", + ".TRUE.", + "CONTAINS", + "TO", + "POINTER", + "ALLOCATE", + "IN", + "TYPE", + "SELECT", + "CHARACTER", + "NOT", + "IS", + ".NOT.", + "FUNCTION", + "SAVE", + "GO", + "DATA", + "DEALLOCATE", + "WRITE", + "PUBLIC", + "INTERFACE", + "TARGET", + "INTENT", + "EXIT", + "AND", + "WHERE", + "FILE", + "OPTIONAL", + "NAMELIST", + "ERROR", + "PROCEDURE", + "READ", + "TIME", + "WHILE", + "OR", + "VALUE", + "PASS", + "CYCLE", + "NUMBER", + "SIZE", + "UNIT", + "CONTINUE", + "SEQUENCE", + "NAME", + "OUT", + "ONLY", + "MAX", + "INTRINSIC", + "SOURCE", + "FRACTION", + "IMPORT", + "ALL", + "RECORD", + "DIMENSION", + "DIM", + "OPEN", + "ANY", + "KIND", + "MIN", + "ALLOCATED", + "LOG", + "C_INT64_T", + "NULLIFY", + "PROGRAM", + "SCALE", + "INDEX", + ".EQV.", + "CLOSE", + "ERF", + "FALSE", + "RANGE", + "COUNT", + "SQRT", + "SYNC", + "LONG", + "EXP", + "INCLUDE", + "PROTECTED", + "FMT", + "MEMORY", + "RESULT", + "SHAPE", + "CLASS", + "ELEMENTAL", + "ABS", + "POSITION", + "PRESENT", + "SECOND", + "ASSOCIATED", + "C_F_POINTER", + "SUM", + "TRIM", + "IOSTAT", + "LEN", + "MOD", + "INT", + "PRECISION", + "COMPLEX", + "C_CHAR", + "FORMAT", + "BLOCK", + "CPP", + "C_PTR", + "ENTRY", + "PURE", + "SIN", + "CONVERT", + "EXIST", + "FREE", + "PRINT", + "RECURSIVE", + "SPACING", + "TRUE", + ".NEQV.", + "ACTION", + "COMMON", + "INQUIRE", + "NINT", + "NULL", + "RANK", + "TRANSFER", + "ASYNCHRONOUS", + "BACKSPACE", + "C_BOOL", + "DOUBLE", + "STATUS", + "STOP", + "SYSTEM", + "ABSTRACT", + "ATAN2", + "C_INTPTR_T", + "C_LOC", + "ERROR_UNIT", + "FINAL", + "IOSTAT_END", + "OPENED", + "RANDOM_SEED", + "WAIT", + "ACCESS", + "ASSIGN", + "C_INT", + "C_SIZE_T", + "ENUM", + "ENUMERATOR", + "GT", + "LOC", + "NEAREST", + "REWIND", + "STRUCTURE", + "UNPACK", + "CONTIGUOUS", + "COS", + "C_SIZEOF", + "EXTERNAL", + "GAMMA", + "IOMSG", + "OUTPUT_UNIT", + "ABORT", + "C_DOUBLE", + "C_FLOAT", + "C_INT16_T", + "FLUSH", + "FORM", + "ISO_C_BINDING", + "LE", + "NAMED", + "PRODUCT", + "RANDOM_NUMBER", + "SHORT", + "TAN", + "VOLATILE", + "ALARM", + "CHAR", + "CHMOD", + "C_FUNLOC", + "C_FUNPTR", + "C_INT8_T", + "DIRECT", + "EXTENDS", + "GENERIC", + "HUGE", + "INPUT_UNIT", + "LOCK", + "PACK", + "RESHAPE", + "SIGN", + "SYSTEM_CLOCK", + "ACHAR", + "ACOS", + "ACOSD", + "ACOSH", + "ADJUSTL", + "ADJUSTR", + "ADVANCE", + "AIMAG", + "AINT", + "ALGAMA", + "ALOG", + "ALOG10", + "AMAX0", + "AMAX1", + "AMIN0", + "AMIN1", + "AMOD", + "ANINT", + "ASIN", + "ASIND", + "ASINH", + "ASSIGNMENT", + "ASSOCIATE", + "ATAN", + "ATAN2D", + "ATAND", + "ATANH", + "ATOMIC_ADD", + "ATOMIC_AND", + "ATOMIC_CAS", + "ATOMIC_DEFINE", + "ATOMIC_FETCH_ADD", + "ATOMIC_FETCH_AND", + "ATOMIC_FETCH_OR", + "ATOMIC_FETCH_XOR", + "ATOMIC_INT_KIND", + "ATOMIC_LOGICAL_KIND", + "ATOMIC_OR", + "ATOMIC_REF", + "ATOMIC_XOR", + "BACKTRACE", + "BESJ0", + "BESJ1", + "BESJN", + "BESSEL_J0", + "BESSEL_J1", + "BESSEL_JN", + "BESSEL_Y0", + "BESSEL_Y1", + "BESSEL_YN", + "BESY0", + "BESY1", + "BESYN", + "BGE", + "BGT", + "BIND", + "BIT_SIZE", + "BLANK", + "BLE", + "BLT", + "BTEST", + "CABS", + "CCOS", + "CDABS", + "CDCOS", + "CDEXP", + "CDLOG", + "CDSIN", + "CDSQRT", + "CEILING", + "CEXP", + "CHARACTER_KINDS", + "CHARACTER_STORAGE_SIZE", + "CHDIR", + "CLOG", + "CMPLX", + "CODIMENSION", + "COMMAND_ARGUMENT_COUNT", + "COMPILER_OPTIONS", + "COMPILER_VERSION", + "CONCURRENT", + "CONJG", + "COSD", + "COSH", + "COTAN", + "COTAND", + "CO_BROADCAST", + "CO_MAX", + "CO_MIN", + "CO_REDUCE", + "CO_SUM", + "CPU_TIME", + "CQABS", + "CQCOS", + "CQEXP", + "CQLOG", + "CQSIN", + "CQSQRT", + "CSHIFT", + "CSIN", + "CSQRT", + "CTIME", + "C_ALERT", + "C_ASSOCIATED", + "C_BACKSPACE", + "C_CARRIAGE_RETURN", + "C_DOUBLE_COMPLEX", + "C_FLOAT128", + "C_FLOAT128_COMPLEX", + "C_FLOAT_COMPLEX", + "C_FORM_FEED", + "C_F_PROCPOINTER", + "C_HORIZONTAL_TAB", + "C_INT128_T", + "C_INT32_T", + "C_INTMAX_T", + "C_INT_FAST128_T", + "C_INT_FAST16_T", + "C_INT_FAST32_T", + "C_INT_FAST64_T", + "C_INT_FAST8_T", + "C_INT_LEAST128_T", + "C_INT_LEAST16_T", + "C_INT_LEAST32_T", + "C_INT_LEAST64_T", + "C_INT_LEAST8_T", + "C_LONG", + "C_LONG_DOUBLE", + "C_LONG_DOUBLE_COMPLEX", + "C_LONG_LONG", + "C_NEW_LINE", + "C_NULL_CHAR", + "C_NULL_FUNPTR", + "C_NULL_PTR", + "C_PTRDIFF_T", + "C_SHORT", + "C_SIGNED_CHAR", + "C_VERTICAL_TAB", + "DABS", + "DACOS", + "DACOSH", + "DASIN", + "DASINH", + "DATAN", + "DATAN2", + "DATANH", + "DATE_AND_TIME", + "DBESJ0", + "DBESJ1", + "DBESJN", + "DBESY0", + "DBESY1", + "DBESYN", + "DBLE", + "DCMPLX", + "DCONJG", + "DCOS", + "DCOSH", + "DDIM", + "DECODE", + "DEFERRED", + "DELIM", + "DERF", + "DERFC", + "DEXP", + "DFLOAT", + "DGAMMA", + "DIGITS", + "DIMAG", + "DINT", + "DLGAMA", + "DLOG", + "DLOG10", + "DMAX1", + "DMIN1", + "DMOD", + "DNINT", + "DOT_PRODUCT", + "DPROD", + "DREAL", + "DSHIFTL", + "DSHIFTR", + "DSIGN", + "DSIN", + "DSINH", + "DSQRT", + "DTAN", + "DTANH", + "DTIME", + "ENCODE", + "EOR", + "EOSHIFT", + "EPSILON", + "EQ", + "EQUIVALENCE", + "EQV", + "ERFC", + "ERFC_SCALED", + "ERRMSG", + "ETIME", + "EVENT_QUERY", + "EXECUTE_COMMAND_LINE", + "EXPONENT", + "EXTENDS_TYPE_OF", + "FDATE", + "FGET", + "FGETC", + "FILE_STORAGE_SIZE", + "FLOAT", + "FLOOR", + "FNUM", + "FORALL", + "FORMATTED", + "FPP", + "FPUT", + "FPUTC", + "FSEEK", + "FSTAT", + "FTELL", + "GE", + "GERROR", + "GETARG", + "GETCWD", + "GETENV", + "GETGID", + "GETLOG", + "GETPID", + "GETUID", + "GET_COMMAND", + "GET_COMMAND_ARGUMENT", + "GET_ENVIRONMENT_VARIABLE", + "GMTIME", + "HOSTNM", + "HYPOT", + "IABS", + "IACHAR", + "IALL", + "IAND", + "IANY", + "IARGC", + "IBCLR", + "IBITS", + "IBSET", + "ICHAR", + "IDATE", + "IDIM", + "IDINT", + "IDNINT", + "IEEE_CLASS", + "IEEE_CLASS_TYPE", + "IEEE_COPY_SIGN", + "IEEE_IS_FINITE", + "IEEE_IS_NAN", + "IEEE_IS_NEGATIVE", + "IEEE_IS_NORMAL", + "IEEE_LOGB", + "IEEE_NEGATIVE_DENORMAL", + "IEEE_NEGATIVE_INF", + "IEEE_NEGATIVE_NORMAL", + "IEEE_NEGATIVE_ZERO", + "IEEE_NEXT_AFTER", + "IEEE_POSITIVE_DENORMAL", + "IEEE_POSITIVE_INF", + "IEEE_POSITIVE_NORMAL", + "IEEE_POSITIVE_ZERO", + "IEEE_QUIET_NAN", + "IEEE_REM", + "IEEE_RINT", + "IEEE_SCALB", + "IEEE_SELECTED_REAL_KIND", + "IEEE_SIGNALING_NAN", + "IEEE_SUPPORT_DATATYPE", + "IEEE_SUPPORT_DENORMAL", + "IEEE_SUPPORT_DIVIDE", + "IEEE_SUPPORT_INF", + "IEEE_SUPPORT_NAN", + "IEEE_SUPPORT_SQRT", + "IEEE_SUPPORT_STANDARD", + "IEEE_UNORDERED", + "IEEE_VALUE", + "IEOR", + "IERRNO", + "IFIX", + "IMAG", + "IMAGES", + "IMAGE_INDEX", + "IMAGPART", + "INT16", + "INT2", + "INT32", + "INT64", + "INT8", + "INTEGER_KINDS", + "IOR", + "IOSTAT_EOR", + "IOSTAT_INQUIRE_INTERNAL_UNIT", + "IPARITY", + "IQINT", + "IRAND", + "ISATTY", + "ISHFT", + "ISHFTC", + "ISIGN", + "ISNAN", + "ISO_FORTRAN_ENV", + "IS_IOSTAT_END", + "IS_IOSTAT_EOR", + "ITIME", + "KILL", + "LBOUND", + "LCOBOUND", + "LEADZ", + "LEN_TRIM", + "LGAMMA", + "LGE", + "LGT", + "LINK", + "LLE", + "LLT", + "LNBLNK", + "LOCK_TYPE", + "LOG10", + "LOGICAL_KINDS", + "LOG_GAMMA", + "LSHIFT", + "LSTAT", + "LT", + "LTIME", + "MALLOC", + "MASKL", + "MASKR", + "MATMUL", + "MAX0", + "MAX1", + "MAXEXPONENT", + "MAXLOC", + "MAXVAL", + "MCLOCK", + "MCLOCK8", + "MERGE", + "MERGE_BITS", + "MIN0", + "MIN1", + "MINEXPONENT", + "MINLOC", + "MINVAL", + "MODULO", + "MOVE_ALLOC", + "MVBITS", + "NE", + "NEQV", + "NEW_LINE", + "NEXTREC", + "NML", + "NON_INTRINSIC", + "NON_OVERRIDABLE", + "NOPASS", + "NORM2", + "NUMERIC_STORAGE_SIZE", + "NUM_IMAGES", + "OPERATOR", + "PAD", + "PARITY", + "PERROR", + "POPCNT", + "POPPAR", + "QABS", + "QACOS", + "QASIN", + "QATAN", + "QATAN2", + "QCMPLX", + "QCONJG", + "QCOS", + "QCOSH", + "QDIM", + "QERF", + "QERFC", + "QEXP", + "QGAMMA", + "QIMAG", + "QLGAMA", + "QLOG", + "QLOG10", + "QMAX1", + "QMIN1", + "QMOD", + "QNINT", + "QSIGN", + "QSIN", + "QSINH", + "QSQRT", + "QTAN", + "QTANH", + "RADIX", + "RAN", + "RAND", + "READWRITE", + "REAL128", + "REAL32", + "REAL64", + "REALPART", + "REAL_KINDS", + "REC", + "RECL", + "RENAME", + "REPEAT", + "REWRITE", + "RRSPACING", + "RSHIFT", + "SAME_TYPE_AS", + "SCAN", + "SECNDS", + "SELECTED_CHAR_KIND", + "SELECTED_INT_KIND", + "SELECTED_REAL_KIND", + "SEQUENTIAL", + "SET_EXPONENT", + "SHIFTA", + "SHIFTL", + "SHIFTR", + "SIGNAL", + "SIND", + "SINH", + "SIZEOF", + "SLEEP", + "SNGL", + "SPREAD", + "SRAND", + "STAT", + "STAT_FAILED_IMAGE", + "STAT_LOCKED", + "STAT_LOCKED_OTHER_IMAGE", + "STAT_STOPPED_IMAGE", + "STAT_UNLOCKED", + "STORAGE_SIZE", + "SUBMODULE", + "SYMLNK", + "TAND", + "TANH", + "THIS_IMAGE", + "TIME8", + "TINY", + "TRAILZ", + "TRANSPOSE", + "TTYNAM", + "UBOUND", + "UCOBOUND", + "UMASK", + "UNFORMATTED", + "UNLINK", + "UNLOCK", + "VERIF", + "VERIFY", + "XOR", + "ZABS", + "ZCOS", + "ZEXP", + "ZLOG", + "ZSIN", + "ZSQRT", + ".EQ.", + ".GE.", + ".GT.", + ".LE.", + ".LT.", + ".NE.", + ".XOR.", +) diff --git a/script_umdp3_checker/search_lists.py b/script_umdp3_checker/search_lists.py new file mode 100644 index 00000000..05d4aaca --- /dev/null +++ b/script_umdp3_checker/search_lists.py @@ -0,0 +1,158 @@ +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file LICENSE +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* + +""" +Lists of words for Fortran checks. Some to confirm they are found in the approved form, some to test for as the intention is that they should no longer appear in the code. +""" + +# Obsolescent Fortran intrinsics : These should not be used in new code and +# their use in existing code should be reviewed. +obsolescent_intrinsics = { + "ALOG", + "ALOG10", + "AMAX0", + "AMAX1", + "AMIN0", + "AMIN1", + "AMOD", + "CABS", + "CCOS", + "CEXP", + "CLOG", + "CSIN", + "CSQRT", + "DABS", + "DACOS", + "DASIN", + "DATAN", + "DATAN2", + "DBESJ0", + "DBESJ1", + "DBESJN", + "DBESY0", + "DBESY1", + "DBESYN", + "DCOS", + "DCOSH", + "DDIM", + "DERF", + "DERFC", + "DEXP", + "DINT", + "DLOG", + "DLOG10", + "DMAX1", + "DMIN1", + "DMOD", + "DNINT", + "DSIGN", + "DSIN", + "DSINH", + "DSQRT", + "DTAN", + "DTANH", + "FLOAT", + "IABS", + "IDIM", + "IDINT", + "IDNINT", + "IFIX", + "ISIGN", + "LONG", + "MAX0", + "MAX1", + "MIN0", + "MIN1", + "SNGL", + "ZABS", + "ZCOS", + "ZEXP", + "ZLOG", + "ZSIN", + "ZSQRT", +} + +openmp_keywords = { + "PARALLEL", + "MASTER", + "CRITICAL", + "ATOMIC", + "SECTIONS", + "WORKSHARE", + "TASK", + "BARRIER", + "TASKWAIT", + "FLUSH", + "ORDERED", + "THREADPRIVATE", + "SHARED", + "DEFAULT", + "FIRSTPRIVATE", + "LASTPRIVATE", + "COPYIN", + "COPYPRIVATE", + "REDUCTION", +} + +fortran_types = { + "TYPE", + "CLASS", + "INTEGER", + "REAL", + "DOUBLE PRECISION", + "CHARACTER", + "LOGICAL", + "COMPLEX", + "ENUMERATOR", +} + +# These keywords should all appear with the requisite spaces in them +# (i.e. not 'ENDIF' but 'END IF') +unseparated_keywords_list = { + "BLOCKDATA", + "DOUBLEPRECISION", + "ELSEIF", + "ELSEWHERE", + "ENDASSOCIATE", + "ENDBLOCK", + "ENDBLOCKDATA", + "ENDCRITICAL", + "ENDDO", + "ENDENUM", + "ENDFILE", + "ENDFORALL", + "ENDFUNCTION", + "ENDIF", + "ENDINTERFACE", + "ENDMODULE", + "ENDPARALLEL", + "ENDPARALLELDO", + "ENDPROCEDURE", + "ENDPROGRAM", + "ENDSELECT", + "ENDSUBROUTINE", + "ENDTYPE", + "ENDWHERE", + "GOTO", + "INOUT", + "PARALLELDO", + "SELECTCASE", + "SELECTTYPE", +} + +# Retired if-defs (placeholder - would be loaded from configuration) +retired_ifdefs = set( + [ + "VATPOLES", + "A12_4A", + "A12_3A", + "UM_JULES", + "A12_2A", + ] +) + +# Deprecated C identifiers +deprecated_c_identifiers = {"gets", "tmpnam", "tempnam", "mktemp"} diff --git a/script_umdp3_checker/tests/test_fortran_checks.py b/script_umdp3_checker/tests/test_fortran_checks.py new file mode 100644 index 00000000..63facc57 --- /dev/null +++ b/script_umdp3_checker/tests/test_fortran_checks.py @@ -0,0 +1,719 @@ +import pytest +import sys +import os +from pathlib import Path + +# Add the current directory to Python path +sys.path.insert(0, str(Path(__file__).parent.parent)) +from umdp3_checker_rules import UMDP3Checker, TestResult + + +# Prevent pytest from trying to collect TestResult as more tests: +TestResult.__test__ = False +""" +ToDo : + THere has been a LOT of refactoring in the umdp3 module since these + tests were written. To persuade them to 'work' for now, only two + attributes of the TestResult class are used - failure_count and + errors. + Something more rigorous should be done to bring these tests up to + date. Especially as errors is (I think) only checking for the presence + of a given string in the keys of the error log dict. +""" +keyword_data = [ + ("IF THEN END", 0, {}, "All UPPERCASE keywords"), + ("if then end", 3, {"lowercase keyword: if"}, "All lowercase keywords"), + ("If Then End", 3, {"lowercase keyword: If"}, "All mixed case keywords"), + ("foo bar baz", 0, {}, "No keywords"), + ("! if then end", 0, {}, "Commented keywords"), +] +keyword_test_parameters = [data[:3] for data in keyword_data] +keyword_test_ids = [data[3] for data in keyword_data] + + +@pytest.mark.parametrize( + "lines, expected_result, expected_errors", + keyword_test_parameters, + ids=keyword_test_ids, +) +def test_keywords(lines, expected_result, expected_errors): + checker = UMDP3Checker() + result = checker.capitalised_keywords([lines]) + assert result.failure_count == expected_result + for error in expected_errors: + assert error in result.errors + + +fake_code_block = ["PROGRAM test", "IMPLICIT NONE", "INTEGER :: i", "END PROGRAM"] +implicit_none_paramters = [ + ( + [line for line in fake_code_block if line != "IMPLICIT NONE"], + 1, + "Missing IMPLICIT NONE", + ), + (fake_code_block, 0, "With IMPLICIT NONE"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in implicit_none_paramters], + ids=[data[2] for data in implicit_none_paramters], +) +def test_implicit_none(lines, expected_result): + checker = UMDP3Checker() + result = checker.implicit_none(lines) + assert result.failure_count == expected_result + + +openmp_sentinels_parameters = [ + (["!$OMP PARALLEL"], 0, "OpenMP sentinel in column one"), + ([" !$OMP PARALLEL"], 1, "OpenMP sentinel not in column one"), + ( + ["!$OMP PARALLEL", " !$OMP END PARALLEL"], + 1, + "One sentinel in column one, one not", + ), + ([" !$OMP PARALLEL", " !$OMP END PARALLEL"], 2, "No sentinels in column one"), + ( + ["! This is a comment", " !$OMP PARALLEL"], + 1, + "Comment line and sentinel not in column one", + ), + (["!$OMP PARALLEL", "!$OMP END PARALLEL"], 0, "Both sentinels in column one"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in openmp_sentinels_parameters], + ids=[data[2] for data in openmp_sentinels_parameters], +) +def test_openmp_sentinels_in_column_one(lines, expected_result): + checker = UMDP3Checker() + result = checker.openmp_sentinels_in_column_one(lines) + assert result.failure_count == expected_result + + +unseparated_keywords_parameters = [ + (["ELSEIF", "ENDDO", "ENDSUBROUTINE"], 3, "All keywords unseparated"), + (["ELSE IF", "ENDMODULE", "ENDSUBROUTINE"], 2, "One keyword separated"), + (["ELSE IF", "END PARRALEL DO", "END IF"], 0, "All keywords separated"), + (["i=0", "i=i+1", "PRINT*,i"], 0, "No keywords"), + (["PROGRAM test", "i=0", "ENDIF"], 1, "One keyword unseparated"), + (["i=0", "ENDPARALLELDO", "END DO"], 1, "One keyword unseparated in middle"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in unseparated_keywords_parameters], + ids=[data[2] for data in unseparated_keywords_parameters], +) +def test_unseparated_keywords(lines, expected_result): + checker = UMDP3Checker() + result = checker.unseparated_keywords(lines) + assert result.failure_count == expected_result + + +go_to_other_than_9999_parameters = [ + ( + [" GO TO 1000", " GO TO 2000"], + 2, + "All GO TO statements to labels other than 9999", + ), + ( + [" GO TO 9999", " GO TO 2000"], + 1, + "One GO TO statement to label other than 9999", + ), + ([" GO TO 9999", " GO TO 9999"], 0, "All GO TO statements to label 9999"), + ([" PRINT *, 'Hello, World!'", " i = i + 1"], 0, "No GO TO statements"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in go_to_other_than_9999_parameters], + ids=[data[2] for data in go_to_other_than_9999_parameters], +) +def test_go_to_other_than_9999(lines, expected_result): + checker = UMDP3Checker() + result = checker.go_to_other_than_9999(lines) + assert result.failure_count == expected_result + + +write_using_default_format_parameters = [ + ([" WRITE(*,*) 'Hello, World!'"], 1, "WRITE using default format"), + ([" WRITE(6,*) 'Hello, World!'"], 0, "WRITE using correct format"), + ([" PRINT *, 'Hello, World!'"], 0, "PRINT statement"), + ([" i = i + 1"], 0, "No WRITE statements"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in write_using_default_format_parameters], + ids=[data[2] for data in write_using_default_format_parameters], +) +def test_write_using_default_format(lines, expected_result): + checker = UMDP3Checker() + result = checker.write_using_default_format(lines) + assert result.failure_count == expected_result + + +test_lowercase_variable_names_parameters = [ + (["INTEGER :: lowercase_variable"], 0, "Lowercase variable name"), + (["REAL :: Lowercase_Variable"], 0, "Pascal case variable name"), + (["CHARACTER :: LOWERCASE_VARIABLE"], 1, "Uppercase variable name"), + ( + [" REAL :: lowercase_variable"], + 0, + "Lowercase variable name with leading whitespace", + ), + ( + [" CHARACTER :: Lowercase_Variable"], + 0, + "Pascal case variable name with leading whitespace", + ), + ( + [" INTEGER :: LOWERCASE_VARIABLE"], + 1, + "Uppercase variable name with leading whitespace", + ), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_lowercase_variable_names_parameters], + ids=[data[2] for data in test_lowercase_variable_names_parameters], +) +def test_lowercase_variable_names(lines, expected_result): + checker = UMDP3Checker() + result = checker.lowercase_variable_names(lines) + assert result.failure_count == expected_result + + +test_dimension_forbidden_parameters = [ + (["REAL :: array(ARR_LEN)"], 0, "Dimension specified in variable declaration"), + (["REAL :: array"], 0, "No dimension specified in variable declaration"), + (["DIMENSION matrix(5,5)"], 1, "Dimension specified for declared variable"), + ( + ["INTEGER, DIMENSION(10) :: array"], + 1, + "Dimension specified in variable declaration with attributes", + ), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_dimension_forbidden_parameters], + ids=[data[2] for data in test_dimension_forbidden_parameters], +) +def test_dimension_forbidden(lines, expected_result): + checker = UMDP3Checker() + result = checker.dimension_forbidden(lines) + assert result.failure_count == expected_result + + +test_ampersand_continuation_parameters = [ + ( + [" PRINT *, 'This is a long line &", " & that continues here'"], + 1, + "Ampersand continuation on both lines", + ), + ( + [" PRINT *, 'This is a long line &", " that continues here'"], + 0, + "Correct ampersand continuation", + ), + ( + [" PRINT *, 'This is a long line", "& that continues here'"], + 1, + "Incorrect ampersand continuation", + ), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_ampersand_continuation_parameters], + ids=[data[2] for data in test_ampersand_continuation_parameters], +) +def test_ampersand_continuation(lines, expected_result): + checker = UMDP3Checker() + result = checker.ampersand_continuation(lines) + assert result.failure_count == expected_result + + +test_forbidden_keywords_parameters = [ + (["COMMON /BLOCK/ var1, var2"], 0, "Use of COMMON block"), + (["EQUIVALENCE (var1, var2)"], 1, "Use of EQUIVALENCE"), + (["PAUSE 1"], 1, "Use of PAUSE statement"), + (["REAL :: var1"], 0, "No forbidden keywords"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_forbidden_keywords_parameters], + ids=[data[2] for data in test_forbidden_keywords_parameters], +) +def test_forbidden_keywords(lines, expected_result): + checker = UMDP3Checker() + result = checker.forbidden_keywords(lines) + assert result.failure_count == expected_result + + +test_forbidden_operators_parameters = [ + (["IF (x .GT. y) THEN"], 1, "Use of .GT. operator"), + (["IF (x > y) THEN"], 0, "Use of > operator"), + (["IF (x .GE. y) THEN"], 1, "Use of .GE. operator"), + (["IF (x .EQ. y) THEN"], 1, "Use of .EQ. operator"), + (["IF (x .NE. y) THEN"], 1, "Use of .NE. operator"), + (["IF (x >= y) THEN"], 0, "Use of >= operator"), + (["IF (x == y) THEN"], 0, "Use of == operator"), + (["IF (x >= y) .AND. (y <= z) THEN"], 0, "Use of >= operator"), + (["IF (x == y) .OR. (y .LE. z) THEN"], 1, "Use of .LE. operator"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_forbidden_operators_parameters], + ids=[data[2] for data in test_forbidden_operators_parameters], +) +def test_forbidden_operators(lines, expected_result): + checker = UMDP3Checker() + result = checker.forbidden_operators(lines) + assert result.failure_count == expected_result + + +test_line_over_80chars_parameters = [ + ( + [ + " PRINT *, 'This line is definitely way over the eighty character limit set by the UM coding standards'" + ], + 1, + "Line over 80 characters", + ), + ([" PRINT *, 'This line is within the limit'"], 0, "Line within 80 characters"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_line_over_80chars_parameters], + ids=[data[2] for data in test_line_over_80chars_parameters], +) +def test_line_over_80chars(lines, expected_result): + checker = UMDP3Checker() + result = checker.line_over_80chars(lines) + assert result.failure_count == expected_result + + +test_tab_detection_parameters = [ + ([" PRINT *, 'This line has no tabs'"], 0, "No tabs"), + ([" PRINT *, 'This line has a tab\tcharacter'"], 1, "Line with tab character"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_tab_detection_parameters], + ids=[data[2] for data in test_tab_detection_parameters], +) +def test_tab_detection(lines, expected_result): + checker = UMDP3Checker() + result = checker.tab_detection(lines) + assert result.failure_count == expected_result + + +test_printstatus_mod_parameters = [ + ([" USE PrintStatus_mod"], 1, "Use of PRINTSTATUS_Mod"), + ([" USE umPrintMgr_mod"], 0, "Use of umPrintMgr_Mod"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_printstatus_mod_parameters], + ids=[data[2] for data in test_printstatus_mod_parameters], +) +def test_printstatus_mod(lines, expected_result): + checker = UMDP3Checker() + result = checker.printstatus_mod(lines) + assert result.failure_count == expected_result + + +test_printstar_parameters = [ + ([" PRINT *, 'Hello, World!'"], 1, "Use of PRINT *"), + ([" PRINT '(A)', 'Hello, World!'"], 0, "Use of PRINT with format"), + ([" umMessage = 'Hello, World!'"], 0, "Use of umMessage"), + ([" umPrint(umMessage)"], 0, "Use of umPrint"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_printstar_parameters], + ids=[data[2] for data in test_printstar_parameters], +) +def test_printstar(lines, expected_result): + checker = UMDP3Checker() + result = checker.printstar(lines) + assert result.failure_count == expected_result + + +test_write6_parameters = [ + ([" WRITE(6,*) 'Hello, World!'"], 1, "Use of WRITE(6,*)"), + ([" umPrint(umMessage)"], 0, "Use of umPrint"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_write6_parameters], + ids=[data[2] for data in test_write6_parameters], +) +def test_write6(lines, expected_result): + checker = UMDP3Checker() + result = checker.write6(lines) + assert result.failure_count == expected_result + + +test_um_fort_flush_parameters = [ + ([" CALL um_fort_flush()"], 1, "Use of um_fort_flush"), + ([" CALL umPrintFlush()"], 0, "No use of um_fort_flush"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_um_fort_flush_parameters], + ids=[data[2] for data in test_um_fort_flush_parameters], +) +def test_um_fort_flush(lines, expected_result): + checker = UMDP3Checker() + result = checker.um_fort_flush(lines) + assert result.failure_count == expected_result + + +test_svn_keyword_subst_parameters = [ + ([" ! $Id$"], 1, "Use of SVN keyword substitution"), + ([" ! This is a comment"], 0, "No SVN keyword substitution"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_svn_keyword_subst_parameters], + ids=[data[2] for data in test_svn_keyword_subst_parameters], +) +def test_svn_keyword_subst(lines, expected_result): + checker = UMDP3Checker() + result = checker.svn_keyword_subst(lines) + assert result.failure_count == expected_result + + +test_omp_missing_dollar_parameters = [ + (["!$OMP PARALLEL"], 0, "Correct OpenMP sentinel"), + (["!OMP PARALLEL"], 1, "Missing $ in OpenMP sentinel"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_omp_missing_dollar_parameters], + ids=[data[2] for data in test_omp_missing_dollar_parameters], +) +def test_omp_missing_dollar(lines, expected_result): + checker = UMDP3Checker() + result = checker.omp_missing_dollar(lines) + assert result.failure_count == expected_result + + +test_cpp_ifdef_parameters = [ + (["#ifndef DEBUG"], 1, "Incorrect #ifndef"), + (["#if defined(DEBUG)"], 0, "Correct #if defined"), + (["#if !defined(DEBUG)"], 0, "Correct #if !defined"), + (["#ifdef DEBUG"], 1, "Incorrect #ifdef"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_cpp_ifdef_parameters], + ids=[data[2] for data in test_cpp_ifdef_parameters], +) +def test_cpp_ifdef(lines, expected_result): + checker = UMDP3Checker() + result = checker.cpp_ifdef(lines) + assert result.failure_count == expected_result + + +test_cpp_comment_parameters = [ + # This test fails because the test is wrong - it needs fixing + (["#if !defined(cpp)"], 0, "cpp directive without comment"), + (["! This is a comment"], 0, "Fortran style comment"), + (["#if defined(cpp) ! some comment"], 1, "Fortran comment after cpp directive"), + (["#else ! another comment"], 1, "Fortran comment after #else directive"), + (["#else"], 0, "#else directive without comment"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_cpp_comment_parameters], + ids=[data[2] for data in test_cpp_comment_parameters], +) +def test_cpp_comment(lines, expected_result): + checker = UMDP3Checker() + result = checker.cpp_comment(lines) + assert result.failure_count == expected_result + + +test_obsolescent_fortran_intrinsic_parameters = [ + ([" x = ALOG(2.0)"], 1, "Use of obsolescent intrinsic ALOG"), + ([" y = DSIN(x)"], 1, "Use of obsolescent intrinsic DSIN"), + ([" z = SIN(x)"], 0, "Use of non-obsolescent intrinsic SIN"), + ([" x = ALOG10(2.0)", " y = DACOS(x)"], 2, "Use of two obsolescent intrinsics"), + ([" x = FLOAT(2)", " z = SIN(x)"], 1, "Use of one obsolescent intrinsic"), + ([" y = DMAX1(x)", " z = SIN(x)"], 1, "Use of one obsolescent intrinsic"), + ( + [" a = DATAN2(2.0)", " b = DSIN(a)", " c = SIN(b)"], + 2, + "Use of two obsolescent intrinsics", + ), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_obsolescent_fortran_intrinsic_parameters], + ids=[data[2] for data in test_obsolescent_fortran_intrinsic_parameters], +) +def test_obsolescent_fortran_intrinsic(lines, expected_result): + checker = UMDP3Checker() + result = checker.obsolescent_fortran_intrinsic(lines) + assert result.failure_count == expected_result + + +test_exit_stmt_label_parameters = [ + ([" EXIT 10"], 0, "EXIT statement with label"), + ([" EXIT"], 1, "EXIT statement without label"), + ([" i = i + 1"], 0, "No EXIT statement"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_exit_stmt_label_parameters], + ids=[data[2] for data in test_exit_stmt_label_parameters], +) +def test_exit_stmt_label(lines, expected_result): + checker = UMDP3Checker() + result = checker.exit_stmt_label(lines) + assert result.failure_count == expected_result + + +test_intrinsic_modules_parameters = [ + ([" USE ISO_C_BINDING"], 1, "Incorrect Use of ISO_C_BINDING module"), + ( + [" USE, INTRINSIC :: ISO_FORTRAN_ENV"], + 0, + "Correct Use of ISO_FORTRAN_ENV module", + ), + ([" USE :: ISO_FORTRAN_ENV"], 1, "Incorrect Use of ISO_FORTRAN_ENV module"), + ([" USE, INTRINSIC :: ISO_C_BINDING"], 0, "Correct Use of ISO_C_BINDING module"), + ( + [" USE SOME_OTHER_MODULE"], + 0, + "Use of non-intrinsic module without INTRINSIC keyword", + ), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_intrinsic_modules_parameters], + ids=[data[2] for data in test_intrinsic_modules_parameters], +) +def test_intrinsic_modules(lines, expected_result): + checker = UMDP3Checker() + result = checker.intrinsic_modules(lines) + assert result.failure_count == expected_result + + +test_read_unit_args_parameters = [ + ([" READ(5,*) var"], 1, "READ without explicit UNIT="), + ([" READ(UNIT=10) var"], 0, "READ with explicit UNIT="), + ( + [" READ(UNIT=unit_in, NML=lustre_control_custom_files) var"], + 0, + "READ with UNIT=variable", + ), + ([" READ(unit_in,*) var"], 1, "READ unit as variable, no UNIT="), + ([" READ(*,*) var"], 1, "READ from default unit"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_read_unit_args_parameters], + ids=[data[2] for data in test_read_unit_args_parameters], +) +def test_read_unit_args(lines, expected_result): + checker = UMDP3Checker() + result = checker.read_unit_args(lines) + assert result.failure_count == expected_result + + +test_retire_if_def_parameters = [ + (["#ifdef DEBUG"], 0, "Correct Use of #ifdef"), + (["#ifndef DEBUG"], 0, "Correct Use of #ifndef"), + (["#if defined(DEBUG)"], 0, "Correct Use of #if defined"), + (["#if !defined(DEBUG)"], 0, "Correct Use of #if !defined"), + (["#elif defined(DEBUG)"], 0, "Correct Use of #elif defined"), + (["#else"], 0, "Correct Use of #else"), + (["#ifdef VATPOLES"], 1, "Incorrect Use of VATPOLES"), + (["#ifndef A12_3A"], 1, "Incorrect Use of A12_3A"), + (["#if defined(A12_4A)"], 1, "Incorrect Use of A12_4A"), + (["#if !defined(UM_JULES)"], 1, "Incorrect Use of UM_JULES"), + (["#elif defined(VATPOLES)"], 1, "Incorrect Use of VATPOLES"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_retire_if_def_parameters], + ids=[data[2] for data in test_retire_if_def_parameters], +) +def test_retire_if_def(lines, expected_result): + checker = UMDP3Checker() + result = checker.retire_if_def(lines) + assert result.failure_count == expected_result + + +test_forbidden_stop_parameters = [ + ([" STOP 0"], 1, "Use of STOP statement"), + (["STOP"], 1, "Use of STOP statement without code"), + ([" PRINT *, 'Hello, World!'"], 0, "No STOP statement"), + (["CALL ABORT"], 1, "Use of call abort statement"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_forbidden_stop_parameters], + ids=[data[2] for data in test_forbidden_stop_parameters], +) +def test_forbidden_stop(lines, expected_result): + checker = UMDP3Checker() + result = checker.forbidden_stop(lines) + assert result.failure_count == expected_result + + +test_intrinsic_as_variable_parameters = [ + ([" INTEGER :: SIN"], 1, "Use of intrinsic name as variable"), + ([" REAL :: COS"], 1, "Use of intrinsic name as variable"), + ([" REAL :: MYVAR"], 0, "No use of intrinsic name as variable"), + ([" INTEGER :: TAN, MYVAR"], 1, "One intrinsic name as variable"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_intrinsic_as_variable_parameters], + ids=[data[2] for data in test_intrinsic_as_variable_parameters], +) +def test_intrinsic_as_variable(lines, expected_result): + checker = UMDP3Checker() + result = checker.intrinsic_as_variable(lines) + assert result.failure_count == expected_result + + +test_check_crown_copyright_parameters = [ + (["! Crown copyright 2024"], 0, "Correct crown copyright statement"), + (["! Copyright 2024"], 0, "A copyright statement"), + (["! This is a comment"], 1, "No crown copyright statement"), + (["! This is a Crown"], 1, "No crown copyright statement"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_check_crown_copyright_parameters], + ids=[data[2] for data in test_check_crown_copyright_parameters], +) +def test_check_crown_copyright(lines, expected_result): + checker = UMDP3Checker() + result = checker.check_crown_copyright(lines) + assert result.failure_count == expected_result + + +test_check_code_owner_parameters = [ + (["! Code Owner: John Doe"], 0, "code owner statement"), + (["! Code Owner : John Doe"], 0, "Another code owner statement"), + (["! This is a comment"], 1, "No code owner statement"), + (["! Code Owner: "], 0, "Code owner statement with no name"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_check_code_owner_parameters], + ids=[data[2] for data in test_check_code_owner_parameters], +) +def test_check_code_owner(lines, expected_result): + checker = UMDP3Checker() + result = checker.check_code_owner(lines) + assert result.failure_count == expected_result + + +test_array_init_form_parameters = [ + ([" INTEGER, DIMENSION(10) :: array = 0"], 0, "Array initialized using '='"), + ([" INTEGER, DIMENSION(10) :: array"], 0, "Array declared without initialization"), + ( + [" INTEGER, DIMENSION(10) :: array = (/ (i, i=1,10) /)"], + 1, + "Array initialized using array constructor", + ), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_array_init_form_parameters], + ids=[data[2] for data in test_array_init_form_parameters], +) +def test_array_init_form(lines, expected_result): + checker = UMDP3Checker() + result = checker.array_init_form(lines) + assert result.failure_count == expected_result + + +test_line_trail_whitespace_parameters = [ + ([" PRINT *, 'Hello, World! '"], 0, "Line 1 without trailing whitespace"), + ([" PRINT *, 'Hello, World!'"], 0, "Line 2 without trailing whitespace"), + ([" PRINT *, 'Hello, World! ' "], 1, "Line 1 with trailing whitespace"), + ( + [" something = sin(coeff /2.0_rdef) + & "], + 1, + "Line 2 with trailing whitespace", + ), + (["MODULE some_mod "], 1, "Line 3 with trailing whitespace"), +] + + +@pytest.mark.parametrize( + "lines, expected_result", + [data[:2] for data in test_line_trail_whitespace_parameters], + ids=[data[2] for data in test_line_trail_whitespace_parameters], +) +def test_line_trail_whitespace(lines, expected_result): + checker = UMDP3Checker() + result = checker.line_trail_whitespace(lines) + assert result.failure_count == expected_result diff --git a/script_umdp3_checker/tests/test_umdp3.py b/script_umdp3_checker/tests/test_umdp3.py new file mode 100644 index 00000000..fe81c669 --- /dev/null +++ b/script_umdp3_checker/tests/test_umdp3.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python3 +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file LICENSE +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* + +""" +Test script for the Python UMDP3 checker +""" + +import sys +import tempfile +from pathlib import Path + +# Add the current directory to Python path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from umdp3_checker_rules import UMDP3Checker, TestResult +from checker_dispatch_tables import CheckerDispatchTables +from typing import Callable, Iterable, List, Dict, Set +from dataclasses import dataclass, field + +# Prevent pytest from trying to collect TestResult as more tests: +TestResult.__test__ = False + + +def test_basic_functionality(): + """Test basic UMDP3 functionality""" + print("Testing basic UMDP3 functionality...") + + # Initialize UMDP3Checker + umdp3_checker = UMDP3Checker() + + # Test line length check + test_lines = [ + "This is a short line", + "This is a very long line that exceeds eighty characters and should trigger a failure in the line length test", + ] + expected = TestResult( + checker_name="Line Length Check", failure_count=1, passed=False + ) + result = umdp3_checker.line_over_80chars(test_lines) + print( + f"Line length test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" + ) + + # Test tab detection + test_lines_tabs = ["Normal line", "Line with\ttab"] + + result = umdp3_checker.tab_detection(test_lines_tabs) + print( + f"Tab detection test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" + ) + + # Test trailing whitespace + test_lines_whitespace = ["Normal line", "Line with trailing spaces "] + + result = umdp3_checker.line_trail_whitespace(test_lines_whitespace) + print( + f"Trailing whitespace test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" + ) + + # Test IMPLICIT NONE check + fortran_without_implicit = ["PROGRAM test", "INTEGER :: i", "END PROGRAM"] + + result = umdp3_checker.implicit_none(fortran_without_implicit) + print( + f"IMPLICIT NONE test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" + ) + + fortran_with_implicit = [ + "PROGRAM test", + "IMPLICIT NONE", + "INTEGER :: i", + "END PROGRAM", + ] + + result = umdp3_checker.implicit_none(fortran_with_implicit) + print( + f"IMPLICIT NONE test (good): {'PASS' if result.failure_count == 0 else 'FAIL'} (expected pass)" + ) + + +def test_dispatch_tables(): + """Test dispatch tables""" + print("\nTesting dispatch tables...") + + dispatch = CheckerDispatchTables() + + # Test getting dispatch tables + fortran_diff = dispatch.get_diff_dispatch_table_fortran() + print(f"Fortran diff tests available: {len(fortran_diff)}") + + fortran_file = dispatch.get_file_dispatch_table_fortran() + print(f"Fortran file tests available: {len(fortran_file)}") + + c_diff = dispatch.get_diff_dispatch_table_c() + print(f"C diff tests available: {len(c_diff)}") + + c_file = dispatch.get_file_dispatch_table_c() + print(f"C file tests available: {len(c_file)}") + + all_tests = dispatch.get_file_dispatch_table_all() + print(f"Universal tests available: {len(all_tests)}") + + +def test_fortran_specific(): + """Test Fortran-specific checks""" + print("\nTesting Fortran-specific checks...") + + umdp3_checker = UMDP3Checker() + + # Test obsolescent intrinsics + fortran_old_intrinsics = ["REAL :: x", "x = ALOG(2.0)", "y = DBLE(x)"] + + result = umdp3_checker.obsolescent_fortran_intrinsic(fortran_old_intrinsics) + print( + f"Obsolescent intrinsics test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" + ) + + # Test forbidden operators + fortran_old_operators = ["IF (x .GT. y) THEN", " PRINT *, 'x is greater'"] + + result = umdp3_checker.forbidden_operators(fortran_old_operators) + print( + f"Forbidden operators test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" + ) + + # Test PRINT statement + fortran_print = ["PRINT *, 'Hello world'"] + + result = umdp3_checker.printstar(fortran_print) + print( + f"PRINT statement test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" + ) + + +def test_c_specific(): + """Test C-specific checks""" + print("\nTesting C-specific checks...") + + umdp3_checker = UMDP3Checker() + + # Test deprecated C identifiers + c_deprecated = [ + "#include ", + "char buffer[100];", + "gets(buffer);", # deprecated function + ] + + result = umdp3_checker.c_deprecated(c_deprecated) + print( + f"Deprecated C identifiers test: {'PASS' if result > 0 else 'FAIL'} (expected failure)" + ) + + # Test format specifiers + c_format = ['printf("%10d", value);'] # missing space + + result = umdp3_checker.c_integral_format_specifiers(c_format) + print( + f"C format specifiers test: {'PASS' if result > 0 else 'FAIL'} (expected failure)" + ) + + +def create_test_files(): + """Create test files for full integration test""" + print("\nCreating test files...") + + # Create temporary directory + test_dir = tempfile.mkdtemp(prefix="umdp3_test_") + print(f"Test directory: {test_dir}") + + # Create a test Fortran file with issues + fortran_file = Path(test_dir) / "test.F90" + fortran_content = """! Test Fortran file with various issues +program test + ! Missing IMPLICIT NONE + integer :: i + real :: x + + ! Line that is too long and exceeds the eighty character limit which should trigger a failure + x = alog(2.0) ! obsolescent intrinsic + + if (x .gt. 1.0) then ! old operator + print *, 'Value is greater than 1' ! should use umPrint + endif + +end program test +""" + + with open(fortran_file, "w") as f: + f.write(fortran_content) + + # Create a test C file with issues + c_file = Path(test_dir) / "test.c" + c_content = """/* Test C file with various issues */ +#include + +int main() { + char buffer[100]; + + // Line that is way too long and exceeds the eighty character limit set by UMDP3 standards + gets(buffer); /* deprecated function */ + printf("%10d", 42); /* missing space in format specifier */ + + return 0; +} +""" + + with open(c_file, "w") as f: + f.write(c_content) + + # Create a test Python file + python_file = Path(test_dir) / "test.py" + python_content = """#!/usr/bin/env python3 +# Test Python file + +def test_function(): + # Line that is way too long and exceeds the eighty character limit which should be caught + x=1+2 # missing spaces around operators + return x + +if __name__ == "__main__": + test_function() +""" + + with open(python_file, "w") as f: + f.write(python_content) + + return test_dir + + +def main(): + """Main test function""" + print("UMDP3 Python Translation Test Suite") + print("=" * 40) + + try: + test_basic_functionality() + test_dispatch_tables() + test_fortran_specific() + test_c_specific() + + # Create test files for demonstration + test_dir = create_test_files() + print(f"\nTest files created in: {test_dir}") + print("You can now run the main checker on these files to see it in action.") + + print("\n" + "=" * 40) + print("All tests completed successfully!") + + except Exception as e: + print(f"Error during testing: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/script_umdp3_checker/umdp3_checker_rules.py b/script_umdp3_checker/umdp3_checker_rules.py new file mode 100644 index 00000000..31bd4b7a --- /dev/null +++ b/script_umdp3_checker/umdp3_checker_rules.py @@ -0,0 +1,1045 @@ +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file LICENSE +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* + +""" +Package to contain functions which test for UMDP3 compliance. +Python translation of the original Perl UMDP3.pm module. +""" +""" +ToDo : Several of the test functions are poor shadows of the original + Perl versions. They would benefit from improving to catch more + cases. + Equally, there could probably be more consistancly in how things like comments are stripped from the ends of lines + and/or full comment lines are skipped. +""" +import re +import threading +from typing import List, Dict, Set +from fortran_keywords import fortran_keywords +from search_lists import ( + obsolescent_intrinsics, + openmp_keywords, + fortran_types, + unseparated_keywords_list, + retired_ifdefs, + deprecated_c_identifiers, +) +from dataclasses import dataclass, field + +# Declare version +VERSION = "13.5.0" + + +@dataclass +class TestResult: + """Result from running a single style checker test on a file.""" + + """ToDo : unsure if both output and errors are required. + They make a bit more sense in the 'external_checkers' where + they hold stdout and stderr.""" + checker_name: str = "Unnamed Checker" + failure_count: int = 0 + passed: bool = False + output: str = "" + errors: Dict = field(default_factory=dict) + + +class UMDP3Checker: + """UMDP3 compliance checker class""" + + """ToDO : This class could possibly be abandoned, or replaced + by a similar class at a different level. Presently only one + instance is created in such a way that the original + _extra_error_info can't be used to hold extra information + at a per file level. resulting in the need to pass error_log + back, which feels like a bodge.""" + # precompiled, regularly used search patterns. + comment_line = re.compile(r"!.*$") + word_splitter = re.compile(r"\b\w+\b") + + def __init__(self): + self._extra_error_info = {} + self._lock = threading.Lock() + """ToDo: The Perl version had a dodgy looking subroutine to calculate + this, but I can't find where it was called from within the files in + 'bin'. It used all args as a 'list' - searched them for '#include' and + then returned the count as well as adding 1 to this global var if any were found. + This is either redundant and needs removing, or needs implementing properly.""" + self._number_of_files_with_variable_declarations_in_includes = 0 + + def reset_extra_error_information(self): + """Reset extra error information : + Appears to be used 'between' blocks of tests such as those on diffs and those on full files. + """ + with self._lock: + self._extra_error_info = {} + + def get_extra_error_information(self) -> Dict: + """Get extra error information. Dictionary with file names as the keys.""" + """ToDo: I presume this is what's used when creating the report of the actual failures and not just the count. However, this information doesn't seem to be output as yet and will need implementing.""" + with self._lock: + return self._extra_error_info.copy() + + def add_extra_error(self, key: str, value: str = ""): + """Add extra error information to the dictionary""" + """ToDo: The usefulness of the information added has not been assesed, nor does it appear to be reported as yet.""" + with self._lock: + self._extra_error_info[key] = value + + def add_error_log( + self, error_log: Dict, key: str = "no key", value: int = 0 + ) -> Dict: + """Add extra error information to the dictionary""" + """ToDo: This is a bodge to get more detailed info about + the errors back to the calling program. The info is + useful, but is currently presented on a per-test basis + rather than a per-file which would be easier to read + and make use of.""" + if key not in error_log: + error_log[key] = [] + error_log[key].append(value) + return error_log + + def get_include_number(self) -> int: + """Get number of files with variable declarations in includes""" + """ToDo: At present, this is hardwired to zero and I don't think anything alters it along the way. Plus it doesn't seem to be called from anywhere..... So this getter is probably very redundant.""" + return self._number_of_files_with_variable_declarations_in_includes + + def remove_quoted(self, line: str) -> str: + """Remove quoted strings from a line""" + """ToDo: The original version replaced the quoted sections with a "blessed reference", presumably becuase they were 're-inserted' at some stage. No idea if that capability is still required.""" + # Simple implementation - remove single and double quoted strings + result = line + + # Remove double quoted strings + result = re.sub(r'"[^"]*"', "", result) + + # Remove single quoted strings + result = re.sub(r"'[^']*'", "", result) + + return result + + """Test functions : + Each accepts a list of 'lines' to search and returns a + TestResult object containing all the information.""" + """ToDo: One thought here is each test should also be told whether it's being passed the contents of a full file, or just a selection of lines involved in a change as some of the tests appear to really only be useful if run on a full file (e.g. the Implicit none checker). Thus if only passed a selection of lines, these tests could be skipped/return 'pass' regardless. + Although, a brief look seems to imply that there are two 'dispatch tables' one for full files and one for changed lines.""" + + def capitulated_keywords(self, lines: List[str]) -> TestResult: + """A fake test, put in for testing purposes. + Probably not needed any more, but left in case.""" + failures = 0 + line_count = 0 + error_log = {} + # print("Debug: In capitulated_keywords test") + for line in lines: + line_count += 1 + # Remove quoted strings and comments + if line.lstrip(" ").startswith("!"): + continue + clean_line = self.remove_quoted(line) + clean_line = self.comment_line.sub("", clean_line) # Remove comments + + # Check for lowercase keywords + for word in self.word_splitter.findall(clean_line): + upcase = word.upper() + if upcase in fortran_keywords and word != upcase: + self.add_extra_error(f"lowercase keyword: {word}") + error_log = self.add_error_log( + error_log, f"capitulated keyword: {word}", line_count + ) + failures += 1 + + return TestResult( + checker_name="Capitulated Keywords", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {line_count} lines, found {failures} failures.", + # errors=self.get_extra_error_information() + errors=error_log, + ) + + def capitalised_keywords(self, lines: List[str]) -> TestResult: + """Check for the presence of lowercase Fortran keywords, which are + taken from an imported list 'fortran_keywords'.""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + # Remove quoted strings and comments + if line.lstrip(" ").startswith("!"): + continue + clean_line = self.remove_quoted(line) + clean_line = self.comment_line.sub("", clean_line) # Remove comments + + # Check for lowercase keywords + for word in self.word_splitter.findall(clean_line): + upcase = word.upper() + if upcase in fortran_keywords and word != upcase: + self.add_extra_error(f"lowercase keyword: {word}") + failures += 1 + error_log = self.add_error_log( + error_log, f"lowercase keyword: {word}", count + 1 + ) + + return TestResult( + checker_name="Capitalised Keywords", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def openmp_sentinels_in_column_one(self, lines: List[str]) -> TestResult: + """Check OpenMP sentinels are in column one""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if re.search(r"^\s+!\$OMP", line): + self.add_extra_error("OpenMP sentinel not in column 1") + failures += 1 + error_log = self.add_error_log( + error_log, f"OpenMP sentinel not in column 1:", count + 1 + ) + output = f"Checked {count+1} lines, found {failures} failures." + return TestResult( + checker_name="Capitalised Keywords", + failure_count=failures, + passed=(failures == 0), + output=output, + errors=error_log, + ) + + def unseparated_keywords(self, lines: List[str]) -> TestResult: + """Check for omitted optional spaces in keywords""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if line.lstrip(" ").startswith("!"): + continue + clean_line = self.remove_quoted(line) + for pattern in [f"\\b{kw}\\b" for kw in unseparated_keywords_list]: + if re.search(pattern, clean_line, re.IGNORECASE): + self.add_extra_error(f"unseparated keyword in line: {line.strip()}") + failures += 1 + error_log = self.add_error_log( + error_log, + f"unseparated keyword in line: {line.strip()}", + count + 1, + ) + output = f"Checked {count+1} lines, found {failures} failures." + return TestResult( + checker_name="Unseparated Keywords", + failure_count=failures, + passed=(failures == 0), + output=output, + errors=error_log, + ) + + def go_to_other_than_9999(self, lines: List[str]) -> TestResult: + """Check for GO TO statements other than 9999""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if match := re.search(r"\bGO\s*TO\s+(\d+)", clean_line, re.IGNORECASE): + label = match.group(1) + if label != "9999": + self.add_extra_error(f"GO TO {label}") + failures += 1 + error_log = self.add_error_log( + error_log, f"GO TO {label}", count + 1 + ) + output = f"Checked {count+1} lines, found {failures} failures." + return TestResult( + checker_name="GO TO other than 9999", + failure_count=failures, + passed=(failures == 0), + output=output, + errors=error_log, + ) + + def write_using_default_format(self, lines: List[str]) -> TestResult: + """Check for WRITE without format""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if re.search(r"\bWRITE\s*\(\s*\*\s*,\s*\*\s*\)", clean_line, re.IGNORECASE): + self.add_extra_error("WRITE(*,*) found") + failures += 1 + error_log = self.add_error_log(error_log, "WRITE(*,*) found", count + 1) + output = f"Checked {count+1} lines, found {failures} failures." + return TestResult( + checker_name="WRITE using default format", + failure_count=failures, + passed=(failures == 0), + output=output, + errors=error_log, + ) + + def lowercase_variable_names(self, lines: List[str]) -> TestResult: + """Check for lowercase or CamelCase variable names only""" + """ToDo: This is a very simplistic check and will not detect many + cases which break UMDP3. I suspect the Perl Predeccessor concattenated continuation lines prior to 'cleaning' and checking. Having identified a declaration, it also then scanned the rest of the file for that variable name in any case.""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + # Simple check for UPPERCASE variable declarations + if re.search( + r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*[A-Z_]+", + clean_line, + re.IGNORECASE, + ): + # print(f"Debug: Found variable declaration line: {clean_line}") + clean_line = re.sub( + r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*", + "", + clean_line, + ) + if re.search(r"[A-Z]{2,}", clean_line): + # print(f"Debug: Found UPPERCASE variable name: {clean_line}") + self.add_extra_error("UPPERCASE variable name") + failures += 1 + error_log = self.add_error_log( + error_log, "UPPERCASE variable name", count + 1 + ) + + output = f"Checked {count+1} lines, found {failures} failures." + return TestResult( + checker_name="Lowercase or CamelCase variable names only", + failure_count=failures, + passed=(failures == 0), + output=output, + errors=error_log, + ) + + def dimension_forbidden(self, lines: List[str]) -> TestResult: + """Check for use of dimension attribute""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if re.search(r"\bDIMENSION\b", clean_line, re.IGNORECASE): + self.add_extra_error("DIMENSION attribute used") + failures += 1 + error_log = self.add_error_log( + error_log, "DIMENSION attribute used", count + 1 + ) + + output = f"Checked {count+1} lines, found {failures} failures." + return TestResult( + checker_name="Use of dimension attribute", + failure_count=failures, + passed=(failures == 0), + output=output, + errors=error_log, + ) + + def ampersand_continuation(self, lines: List[str]) -> TestResult: + """Check continuation lines shouldn't start with &""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if re.search(r"^\s*&", line): + self.add_extra_error("continuation line starts with &") + failures += 1 + error_log = self.add_error_log( + error_log, "continuation line starts with &", count + 1 + ) + + return TestResult( + checker_name="Continuation lines shouldn't start with &", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def forbidden_keywords(self, lines: List[str]) -> TestResult: + """Check for use of EQUIVALENCE or PAUSE""" + """ToDo: Can't believe this will allow a COMMON BLOCK.... + Need to check against what the original did..""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if re.search(r"\b(EQUIVALENCE|PAUSE)\b", clean_line, re.IGNORECASE): + self.add_extra_error("forbidden keyword") + failures += 1 + error_log = self.add_error_log( + error_log, "forbidden keyword", count + 1 + ) + + return TestResult( + checker_name="Use of forbidden keywords EQUIVALENCE or PAUSE", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def forbidden_operators(self, lines: List[str]) -> TestResult: + """Check for older form of relational operators""" + failures = 0 + error_log = {} + count = -1 + old_operators = [".GT.", ".GE.", ".LT.", ".LE.", ".EQ.", ".NE."] + + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + for op in old_operators: + if op in clean_line.upper(): + self.add_extra_error(f"old operator {op}") + failures += 1 + error_log = self.add_error_log( + error_log, f"old operator {op}", count + 1 + ) + + return TestResult( + checker_name="Use of older form of relational operator (.GT. etc.)", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def line_over_80chars(self, lines: List[str]) -> TestResult: + """Check for lines longer than 80 characters""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if len(line.rstrip()) > 80: + self.add_extra_error("line too long") + failures += 1 + error_log = self.add_error_log(error_log, "line too long", count + 1) + + return TestResult( + checker_name="Line longer than 80 characters", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def tab_detection(self, lines: List[str]) -> TestResult: + """Check for tab characters""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if "\t" in line: + self.add_extra_error("tab character found") + failures += 1 + error_log = self.add_error_log( + error_log, "tab character found", count + 1 + ) + + return TestResult( + checker_name="Line includes tab character", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def printstatus_mod(self, lines: List[str]) -> TestResult: + """Check for use of printstatus_mod instead of umPrintMgr""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if re.search(r"\bUSE\s+printstatus_mod\b", line, re.IGNORECASE): + self.add_extra_error("printstatus_mod used") + failures += 1 + error_log = self.add_error_log( + error_log, "printstatus_mod used", count + 1 + ) + + return TestResult( + checker_name="Use of printstatus_mod instead of umPrintMgr", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def printstar(self, lines: List[str]) -> TestResult: + """Check for PRINT rather than umMessage and umPrint""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if re.search(r"\bPRINT\s*\*", clean_line, re.IGNORECASE): + self.add_extra_error("PRINT * used") + failures += 1 + error_log = self.add_error_log(error_log, "PRINT * used", count + 1) + + return TestResult( + checker_name="Use of PRINT rather than umMessage and umPrint", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def write6(self, lines: List[str]) -> TestResult: + """Check for WRITE(6) rather than umMessage and umPrint""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if re.search(r"\bWRITE\s*\(\s*6\s*,", clean_line, re.IGNORECASE): + self.add_extra_error("WRITE(6) used") + failures += 1 + error_log = self.add_error_log(error_log, "WRITE(6) used", count + 1) + + return TestResult( + checker_name="Use of WRITE(6) rather than umMessage and umPrint", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def um_fort_flush(self, lines: List[str]) -> TestResult: + """Check for um_fort_flush rather than umPrintFlush""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if re.search(r"\bum_fort_flush\b", line): + self.add_extra_error("um_fort_flush used") + failures += 1 + error_log = self.add_error_log( + error_log, "um_fort_flush used", count + 1 + ) + return TestResult( + checker_name="Use of um_fort_flush rather than umPrintFlush", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def svn_keyword_subst(self, lines: List[str]) -> TestResult: + """Check for Subversion keyword substitution""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if re.search(r"\$\w+\$", line): + self.add_extra_error("SVN keyword substitution") + failures += 1 + error_log = self.add_error_log( + error_log, "SVN keyword substitution", count + 1 + ) + return TestResult( + checker_name="Subversion keyword substitution", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def omp_missing_dollar(self, lines: List[str]) -> TestResult: + """Check for !OMP instead of !$OMP""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if re.search(r"!\s*OMP\b", line) and not re.search(r"!\$OMP", line): + self.add_extra_error("!OMP without $") + failures += 1 + error_log = self.add_error_log(error_log, "!OMP without $", count + 1) + + return TestResult( + checker_name="!OMP without $", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def cpp_ifdef(self, lines: List[str]) -> TestResult: + """Check for #ifdef/#ifndef rather than #if defined()""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + if re.search(r"^\s*#\s*if(n)?def\b", line): + self.add_extra_error("#ifdef/#ifndef used") + failures += 1 + error_log = self.add_error_log( + error_log, "#ifdef/#ifndef used", count + 1 + ) + + return TestResult( + checker_name="#ifdef/#ifndef used", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def cpp_comment(self, lines: List[str]) -> TestResult: + """Check for Fortran comments in CPP directives""" + """Todo: This looks like it will incorrectly fail # if !defined(X) + How did the original do this test?""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + match = re.search( + r"^\s*#if *(!)?defined\s*\(\s*\w+\s*\)(.*)", line + ) or re.search(r"^\s*#(else) *(.*)", line) + if match: + if re.search(r".*!", match.group(2)): + self.add_extra_error("Fortran comment in CPP directive") + failures += 1 + error_log = self.add_error_log( + error_log, "Fortran comment in CPP directive", count + 1 + ) + + return TestResult( + checker_name="Fortran comment in CPP directive", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def obsolescent_fortran_intrinsic(self, lines: List[str]) -> TestResult: + """Check for archaic Fortran intrinsic functions""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + for intrinsic in obsolescent_intrinsics: + if re.search(rf"\b{intrinsic}\b", clean_line, re.IGNORECASE): + self.add_extra_error(f"obsolescent intrinsic: {intrinsic}") + failures += 1 + error_log = self.add_error_log( + error_log, f"obsolescent intrinsic: {intrinsic}", count + 1 + ) + + return TestResult( + checker_name="obsolescent intrinsic", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def exit_stmt_label(self, lines: List[str]) -> TestResult: + """Check that EXIT statements are labelled""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if re.search(r"\bEXIT\s*$", clean_line, re.IGNORECASE): + self.add_extra_error("unlabelled EXIT statement") + failures += 1 + error_log = self.add_error_log( + error_log, "unlabelled EXIT statement", count + 1 + ) + + return TestResult( + checker_name="unlabelled EXIT statement", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def intrinsic_modules(self, lines: List[str]) -> TestResult: + """Check intrinsic modules are USEd with INTRINSIC keyword""" + failures = 0 + intrinsic_modules = ["ISO_C_BINDING", "ISO_FORTRAN_ENV"] + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + for module in intrinsic_modules: + if re.search( + rf"\bUSE\s+(::)*\s*{module}\b", clean_line, re.IGNORECASE + ) and not re.search(r"\bINTRINSIC\b", clean_line, re.IGNORECASE): + self.add_extra_error(f"intrinsic module {module} without INTRINSIC") + failures += 1 + error_log = self.add_error_log( + error_log, + f"intrinsic module {module} without INTRINSIC", + count + 1, + ) + + return TestResult( + checker_name="intrinsic modules", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def read_unit_args(self, lines: List[str]) -> TestResult: + """Check READ statements have explicit UNIT= as first argument""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if match := re.search(r"\bREAD\s*\(\s*([^,)]+)", clean_line, re.IGNORECASE): + first_arg = match.group(1).strip() + if not first_arg.upper().startswith("UNIT="): + self.add_extra_error("READ without explicit UNIT=") + failures += 1 + error_log = self.add_error_log( + error_log, "READ without explicit UNIT=", count + 1 + ) + + return TestResult( + checker_name="read unit args", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def retire_if_def(self, lines: List[str]) -> TestResult: + """Check for if-defs due for retirement""" + # retired_ifdefs = ['VATPOLES', 'A12_4A', 'A12_3A', 'UM_JULES', 'A12_2A',] + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + if match := re.search( + r"^#(?:(?:ifn?def|" # ifdef/ifndef + r"(?:el)?if\s*\S*?defined\s*\()" # elif/if defined( + r"\s*([^\)\s]*)\)?)", # SYMBOL + line, + re.IGNORECASE, + ): + # # The above match either returns [None, SYMBOL] or [SYMBOL, None] + # SYMBOL = [x for x in match.groups() if x] # reduce to a list of 1 element + if match.group(1) in retired_ifdefs: + self.add_extra_error(f"retired if-def: {match.group(1)}") + failures += 1 + error_log = self.add_error_log( + error_log, f"retired if-def: {match.group(1)}", count + 1 + ) + return TestResult( + checker_name="retired if-def", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def implicit_none(self, lines: List[str]) -> TestResult: + """Check file has at least one IMPLICIT NONE""" + error_log = {} + no_implicit_none = True + for line in lines: + if re.search(r"\bIMPLICIT\s+NONE\b", line, re.IGNORECASE): + no_implicit_none = False + break + + if no_implicit_none: + self.add_extra_error("missing IMPLICIT NONE") + error_log = self.add_error_log( + error_log, "No IMPLICIT NONE found in file", 0 + ) + + return TestResult( + checker_name="implicit none", + failure_count=1 if no_implicit_none else 0, + passed=not no_implicit_none, + output="Checked for IMPLICIT NONE statement.", + errors=error_log, + ) + + def forbidden_stop(self, lines: List[str]) -> TestResult: + """Check for STOP or CALL abort""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + clean_line = re.sub(r"!.*$", "", clean_line) + + if re.search(r"\b(STOP|CALL\s+abort)\b", clean_line, re.IGNORECASE): + self.add_extra_error("STOP or CALL abort used") + failures += 1 + error_log = self.add_error_log( + error_log, "STOP or CALL abort used", count + 1 + ) + + return TestResult( + checker_name="forbidden stop", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def intrinsic_as_variable(self, lines: List[str]) -> TestResult: + """Check for Fortran function used as variable name""" + failures = 0 + error_log = {} + count = -1 + # This would check for intrinsic function names used as variables + # Simplified implementation + # The AI said that - This needs to be compared to the Perl + # as I doubt this does anything near what that did... + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + if re.search( + r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER)\s*.*::\s*(SIN|COS|LOG|EXP|TAN)\b", + clean_line, + re.IGNORECASE, + ): + self.add_extra_error("intrinsic function used as variable") + failures += 1 + error_log = self.add_error_log( + error_log, "intrinsic function used as variable", count + 1 + ) + + return TestResult( + checker_name="intrinsic as variable", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count+1} lines, found {failures} failures.", + errors=error_log, + ) + + def check_crown_copyright(self, lines: List[str]) -> TestResult: + """Check for crown copyright statement""" + """ToDo: This is a very simplistic check and will not detect many + cases which break UMDP3. I suspect the Perl Predeccessor + did much more convoluted tests""" + comment_lines = [ + line.upper() for line in lines if line.lstrip(" ").startswith("!") + ] + file_content = "\n".join(comment_lines) + error_log = {} + found_copyright = False + if "CROWN COPYRIGHT" in file_content or "COPYRIGHT" in file_content: + found_copyright = True + + if not found_copyright: + self.add_extra_error("missing copyright or crown copyright statement") + error_log = self.add_error_log( + error_log, "missing copyright or crown copyright statement", 0 + ) + return TestResult( + checker_name="Crown Copyright Statement", + failure_count=0 if found_copyright else 1, + passed=found_copyright, + output="Checked for crown copyright statement.", + errors=error_log, + ) + + def check_code_owner(self, lines: List[str]) -> TestResult: + """Check for correct code owner comment""" + """ToDo: oh wow is this test worthless. We don't even guarentee to put the wrds "code owner" in a file. Plus, that's before you take into account both returns were '0' - so it couldn't possibly fail (picard.gif) + The Perl looks to have been designed to check the whole file, and turns various logicals on/off dependent on previously processed lines.""" + # Simplified check for code owner information + file_content = "\n".join(lines) + found_code_owner = False + error_log = {} + if "Code Owner:" in file_content or "code owner" in file_content.lower(): + # print(f"Debug: Found {file_content.lower()}") + found_code_owner = True + + # This is often a warning rather than an error + if not found_code_owner: + self.add_extra_error("missing code owner comment") + error_log = self.add_error_log(error_log, "missing code owner comment", 0) + return TestResult( + checker_name="Code Owner Comment", + failure_count=0 if found_code_owner else 1, + passed=found_code_owner, + output="Checked for code owner comment.", + errors=error_log, + ) + + def array_init_form(self, lines: List[str]) -> TestResult: + """Check for old array initialization form""" + """ToDo: Another instance that assumes continuation lines are concatenated prior to executing the actual test to ensure both forward slashes are on the same line.""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + clean_line = self.remove_quoted(line) + if re.search(r"\(/.*?\/\)", clean_line): + self.add_extra_error("old array initialization form (/ /)") + failures += 1 + error_log = self.add_error_log( + error_log, "old array initialization form (/ /)", count + 1 + ) + + return TestResult( + checker_name="Old Array Initialization Form", + failure_count=failures, + passed=(failures == 0), + output="Checked for old array initialization form (/ /).", + errors=error_log, + ) + + def line_trail_whitespace(self, lines: List[str]) -> TestResult: + """Check for trailing whitespace""" + failures = 0 + error_log = {} + for count, line in enumerate(lines): + if re.search(r"\s+$", line): + self.add_extra_error("trailing whitespace") + failures += 1 + error_log = self.add_error_log( + error_log, "trailing whitespace", count + 1 + ) + return TestResult( + checker_name="Trailing Whitespace", + failure_count=failures, + passed=(failures == 0), + output="Checked for trailing whitespace.", + errors=error_log, + ) + + # C-specific tests + + def c_integral_format_specifiers(self, lines: List[str]) -> int: + """Check C integral format specifiers have space""" + failures = 0 + for line in lines: + if re.search(r'%\d+[dioxX]"', line): + self.add_extra_error("missing space in format specifier") + failures += 1 + + return failures + + def c_deprecated(self, lines: List[str]) -> int: + """Check for deprecated C identifiers""" + failures = 0 + for line in lines: + for identifier in deprecated_c_identifiers: + if re.search(rf"\b{identifier}\b", line): + self.add_extra_error(f"deprecated C identifier: {identifier}") + failures += 1 + + return failures + + def c_openmp_define_pair_thread_utils(self, lines: List[str]) -> int: + """Check C OpenMP define pairing with thread utils""" + failures = 0 + for line in lines: + if re.search(r"#\s*if.*_OPENMP", line): + if not re.search(r"SHUM_USE_C_OPENMP_VIA_THREAD_UTILS", line): + self.add_extra_error( + "_OPENMP without SHUM_USE_C_OPENMP_VIA_THREAD_UTILS" + ) + failures += 1 + + return failures + + def c_openmp_define_no_combine(self, lines: List[str]) -> int: + """Check C OpenMP defines not combined with third macro""" + failures = 0 + for line in lines: + if re.search( + r"_OPENMP.*&&.*SHUM_USE_C_OPENMP_VIA_THREAD_UTILS.*&&", line + ) or re.search( + r"&&.*_OPENMP.*&&.*SHUM_USE_C_OPENMP_VIA_THREAD_UTILS", line + ): + self.add_extra_error("OpenMP defines combined with third macro") + failures += 1 + + return failures + + def c_openmp_define_not(self, lines: List[str]) -> int: + """Check for !defined(_OPENMP) usage""" + failures = 0 + for line in lines: + if re.search(r"!\s*defined\s*\(\s*_OPENMP\s*\)", line): + self.add_extra_error("!defined(_OPENMP) used") + failures += 1 + + return failures + + def c_protect_omp_pragma(self, lines: List[str]) -> int: + """Check OMP pragma is protected with ifdef""" + failures = 0 + in_openmp_block = False + + for line in lines: + if re.search(r"#\s*if.*_OPENMP", line): + in_openmp_block = True + elif re.search(r"#\s*endif", line): + in_openmp_block = False + elif re.search(r"#\s*pragma\s+omp", line) or re.search( + r"#\s*include\s*", line + ): + if not in_openmp_block: + self.add_extra_error("unprotected OMP pragma/include") + failures += 1 + + return failures + + def c_ifdef_defines(self, lines: List[str]) -> int: + """Check for #ifdef style rather than #if defined()""" + failures = 0 + for line in lines: + if re.search(r"^\s*#\s*ifdef\b", line): + self.add_extra_error("#ifdef used instead of #if defined()") + failures += 1 + + return failures + + def c_final_newline(self, lines: List[str]) -> int: + """Check C unit ends with final newline""" + if lines and not lines[-1].endswith("\n"): + self.add_extra_error("missing final newline") + return 1 + + return 0 diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py new file mode 100644 index 00000000..b6b72abc --- /dev/null +++ b/script_umdp3_checker/umdp3_conformance.py @@ -0,0 +1,517 @@ +import subprocess +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Callable, Iterable, List, Dict, Set +from dataclasses import dataclass, field +import argparse + +# Add custom modules to Python path if needed +# Add the repository root to access fcm_bdiff and git_bdiff packages +import sys + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from github_scripts import git_bdiff +import fcm_bdiff +from checker_dispatch_tables import CheckerDispatchTables +from umdp3_checker_rules import TestResult +import concurrent.futures + +""" +Framework and Classes to generate a list of files to check for style +conformance, and to run relevant style checkers on those files. +""" + + +@dataclass +class CheckResult: + """Result from running a style checker on a file.""" + + file_path: str = "No file provided" + tests_failed: int = 0 + all_passed: bool = False + test_results: List[TestResult] = field(default_factory=list) + + +class CMSSystem(ABC): + """Abstract base class for CMS systems like git or FCM.""" + + @abstractmethod + def get_changed_files(self) -> List[Path]: + """Get list of files changed between base_branch and branch.""" + pass + + @abstractmethod + def is_branch(self) -> bool: + """Check if we're looking at a branch""" + pass + + @abstractmethod + def get_branch_name(self) -> str: + """Get the current branch name.""" + pass + + +class GitBdiffWrapper(CMSSystem): + """Wrapper around git_bdiff to get changed files.""" + + def __init__(self, repo_path: Path = Path(".")): + self.repo_path = repo_path + self.bdiff_obj = git_bdiff.GitBDiff(repo=self.repo_path) + self.info_obj = git_bdiff.GitInfo(repo=self.repo_path) + + def get_changed_files(self) -> List[Path]: + """Get list of files changed between base_branch and branch.""" + return [Path(f) for f in self.bdiff_obj.files()] + + def is_branch(self) -> bool: + """Check if we're looking at a branch""" + is_a_branch = not self.info_obj.is_main() + return is_a_branch + + def get_branch_name(self) -> str: + """Get the current branch name.""" + return self.info_obj.branch + + +class FCMBdiffWrapper(CMSSystem): + """Wrapper around fcm_bdiff to get changed files.""" + + def __init__(self, repo_path: Path = Path(".")): + self.repo_path = repo_path + self.bdiff_obj = fcm_bdiff.FCMBDiff(repo=self.repo_path) + + def get_changed_files(self) -> List[Path]: + """Get list of files changed between base_branch and branch.""" + return [Path(f) for f in self.bdiff_obj.files()] + + def is_branch(self) -> bool: + """Check if we're looking at a branch""" + return self.bdiff_obj.is_branch + + def get_branch_name(self) -> str: + """Get the current branch name.""" + return self.bdiff_obj.branch + + +class StyleChecker(ABC): + """Abstract base class for style checkers.""" + + """ ToDo: This is where it might be good to set up a threadsafe + class instance to hold the 'expanded' check outputs. + One for each file being checked in parallel. + Curently the UMDP3 class holds "_extra_error_info" which + was used to provide more detailed error logging. + However, this is not threadsafe, so in a multithreaded + environment, the extra error info could get mixed up between + different files being checked in parallel. + For now, I've modified the UMDP3 class methods to return + a TestResult object directly, which includes the extra error + info, so that each thread can work independently.""" + name: str + file_extensions: Set[str] + check_functions: Dict[str, Callable] + files_to_check: List[Path] + + @abstractmethod + def get_name(self) -> str: + """Return the name of this checker.""" + pass + + @abstractmethod + def check(self, file_path: Path) -> CheckResult: + """Run the style checker on a file.""" + pass + + @classmethod + def from_full_list( + cls, + name: str, + file_extensions: Set[str], + check_functions: Dict[str, Callable], + all_files: List[Path], + ) -> "StyleChecker": + """Create a StyleChecker instance filtering files from a full list.""" + filtered_files = cls.filter_files(all_files, file_extensions) + return cls(name, file_extensions, check_functions, filtered_files) + + @staticmethod + def filter_files( + files: List[Path], file_extensions: Set[str] = set() + ) -> List[Path]: + """Filter files based on the checker's file extensions.""" + if not file_extensions: + return files + return [f for f in files if f.suffix in file_extensions] + + +class UMDP3_checker(StyleChecker): + """UMDP3 built-in style checker.""" + + files_to_check: List[Path] + + def __init__( + self, + name: str, + file_extensions: Set[str], + check_functions: Dict[str, Callable], + changed_files: List[Path] = [], + ): + self.name = name + self.file_extensions = file_extensions or set() + self.check_functions = check_functions or {} + self.files_to_check = ( + super().filter_files(changed_files, self.file_extensions) + if changed_files + else [] + ) + # Should wrap the following in some kind of verbosity control + # print(f"UMDP3_checker initialized :\n" + # f" Name : {self.name}\n" + # f" Has {len(self.check_functions)} check functions\n" + # f" Using {len(self.file_extensions)} file extensions\n" + # f" Gives {len(self.files_to_check)} files to check.") + + def get_name(self) -> str: + return self.name + + def check(self, file_path: Path) -> CheckResult: + """Run UMDP3 check function on file.""" + lines = file_path.read_text().splitlines() + file_results = [] # list of TestResult objects + for check_name, check_function in self.check_functions.items(): + file_results.append(check_function(lines)) + + tests_failed = sum([0 if result.passed else 1 for result in file_results]) + return CheckResult( + file_path=str(file_path), + tests_failed=tests_failed, + all_passed=tests_failed == 0, + test_results=file_results, + ) + + +class ExternalChecker(StyleChecker): + """Wrapper for external style checking tools.""" + + """ToDo : This is overriding the 'syle type hint from the base class. As we're currently passing in a list of strings to pass to 'subcommand'. Ideally we should be making callable functions for each check, but that would require more refactoring of the code. + Is that a 'factory' method?""" + check_commands: Dict[str, List[str]] + + def __init__( + self, + name: str, + file_extensions: Set[str], + check_functions: Dict[str, List[str]], + changed_files: List[Path], + ): + self.name = name + self.file_extensions = file_extensions or set() + self.check_commands = check_functions or {} + self.files_to_check = ( + super().filter_files(changed_files, self.file_extensions) + if changed_files + else [] + ) + # Should wrap the following in some kind of verbosity control + # print(f"ExternalChecker initialized :\n" + # f" Name : {self.name}\n" + # f" Has {len(self.check_commands)} check commands\n" + # f" Using {len(self.file_extensions)} file extensions\n" + # f" Gives {len(self.files_to_check)} files to check.") + + def get_name(self) -> str: + return self.name + + def check(self, file_path: Path) -> CheckResult: + """Run external checker commands on file.""" + file_results = [] + tests_failed = 0 + for test_name, command in self.check_commands.items(): + try: + cmd = command + [str(file_path)] + result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) + except subprocess.TimeoutExpired: + file_results.append( + TestResult( + checker_name=test_name, + failure_count=1, + passed=False, + output=f"Checker {test_name} timed out", + errors={test_name: "TimeoutExpired"}, + ) + ) + tests_failed += 1 + except Exception as e: + file_results.append( + TestResult( + checker_name=test_name, + failure_count=1, + passed=False, + output=str(e), + errors={test_name: str(e)}, + ) + ) + tests_failed += 1 + else: + error_text = result.stderr if result.stderr else "" + file_results.append( + TestResult( + checker_name=test_name, + failure_count=0 if result.returncode == 0 else 1, + passed=result.returncode == 0, + output=result.stdout, + errors={test_name: error_text} if error_text else {}, + ) + ) + if result.returncode != 0: + tests_failed += 1 + return CheckResult( + file_path=str(file_path), + tests_failed=tests_failed, + all_passed=tests_failed == 0, + test_results=file_results, + ) + + +class ConformanceChecker: + """Main framework for running style checks in parallel.""" + + def __init__( + self, + cms: CMSSystem, + checkers: List[StyleChecker], + max_workers: int = 8, + changed_files: List[Path] = [], + results: List[CheckResult] = [], + ): + self.checkers = checkers + self.max_workers = max_workers + self.changed_files = changed_files + self.results = results + + def check_files(self) -> None: + """Run all checkers on given files in parallel. + ======================================================== + Note : + Each checker runs on its own set of files, and has a list of + appropriate checkers for that file type. + The loop to create the threads currently creates a thread for each + (checker, file) pair, which may not be optimal. + However, given that the number of files is likely to be small, + and the number of checkers is also small, this should be acceptable + for now. + ToDo : Might be good to have a threadsafe object for each file and + allow multiple checks to be run at once on that file.""" + results = [] + + with concurrent.futures.ThreadPoolExecutor( + max_workers=self.max_workers + ) as executor: + future_to_task = { + executor.submit(checker.check, file_path): file_path + for checker in self.checkers + for file_path in checker.files_to_check + } + + for future in concurrent.futures.as_completed(future_to_task): + result = future.result() + results.append(result) + self.results = results + return + + def print_results(self, print_volume: int = 3) -> bool: + """Print results and return True if all checks passed. + ======================================================== + ToDo: If an object encapsulating the data for each file is created" + it should contain the "in depth" printing method for file data. + With this method presenting the summary and then looping over + each file object to print its details at the desired verbosity.""" + all_passed = True + for result in self.results: + file_status = "✓ PASS" if result.all_passed else "✗ FAIL" + # Lousy variable names here - 'result' is the CheckResult for a file + # which had multiple tests, so result.all_passed is for that file. + all_passed = all_passed and result.all_passed + if print_volume >= 2: + print(f"{file_status:7s} file : {result.file_path:50s}") + if print_volume < 4 and result.all_passed: + continue + for test_result in result.test_results: + """ToDo : The output logic here is a bit of a mess.""" + if print_volume < 5 and test_result.passed: + continue + if print_volume >= 4: + print( + " " * 5 + + "-" * 50 + + " " * 5 + + f"\n {test_result.checker_name} Output :\n" + + " " * 5 + + f"{test_result.output}\n" + + " " * 5 + + "-" * 50 + ) + if test_result.errors: + print(" " * 5 + "-=-" * 30) + print(" " * 5 + f" Std Error :") + for count, (title, info) in enumerate( + test_result.errors.items() + ): + print(f" {count:2} : {title} : {info}") + print(" " * 5 + "-=-" * 30) + elif print_volume > 2: + print(f" {test_result.checker_name:60s} : ✗ FAIL") + return all_passed + + +def process_arguments(): + """Process command line arguments. + Somewhat a work in progress, but it's going to be needed eventually.""" + parser = argparse.ArgumentParser( + prog="umdp3_conformance.py", + description="""UMDP3 Conformance Checker""", + epilog="T-T-T-T-That's all folks !!", + ) + parser.add_argument( + "-f", + "--file-types", + type=str, + nargs="+", + choices=["Fortran", "Python"], + default=["Fortran"], + help="File types to check, comma-separated", + ) + """ ToDo : I /think/ the old version also checked '.h' files as Fortran. + Not sure if that is still needed.""" + parser.add_argument( + "-p", "--path", type=str, default="./", help="path to repository" + ) + parser.add_argument( + "--max-workers", type=int, default=8, help="Maximum number of parallel workers" + ) + parser.add_argument( + "-v", "--verbose", action="count", default=0, help="Increase output verbosity" + ) + parser.add_argument( + "-q", "--quiet", action="count", default=0, help="Decrease output verbosity" + ) + # The following are not yet implemented, but may become useful + # branch and base branch could be used to configure the CMS diff + # if/when git_bdiff is changed to override those values. + # parser.add_argument("--branch", type=str, default="HEAD", + # help="Branch to check") + # parser.add_argument("--base-branch", type=str, default="main", + # help="Base branch for comparison") + # parser.add_argument("--checker-configs", type=str, default=None, + # help="Checker configuration file") + args = parser.parse_args() + # Determine output verbosity level + args.volume = 3 + args.verbose - args.quiet + return args + + +def which_cms_is_it(path: str) -> CMSSystem: + """Determine which CMS is in use based on the presence of certain files.""" + repo_path = Path(path) + if (repo_path / ".git").is_dir(): + return GitBdiffWrapper(repo_path) + elif (repo_path / ".svn").is_dir(): + """ToDo : If we still want this to work reliably with FCM, it will need + to also accept URLs and not just local paths.""" + return FCMBdiffWrapper(repo_path) + else: + raise RuntimeError("Unknown CMS type at path: " + str(path)) + + +def create_style_checkers( + file_types: List[str], changed_files: List[Path] +) -> List[StyleChecker]: + """Create style checkers based on requested file types.""" + dispatch_tables = CheckerDispatchTables() + checkers = [] + if "Fortran" in file_types: + file_extensions = {".f", ".for", ".f90", ".f95", ".f03", ".f08", ".F90"} + fortran_diff_table = dispatch_tables.get_diff_dispatch_table_fortran() + fortran_file_table = dispatch_tables.get_file_dispatch_table_fortran() + print("Configuring Fortran checkers:") + combined_checkers = fortran_diff_table | fortran_file_table + fortran_file_checker = UMDP3_checker.from_full_list( + "Fortran Checker", file_extensions, combined_checkers, changed_files + ) + checkers.append(fortran_file_checker) + if "Python" in file_types: + print("Setting up Python external checkers.") + file_extensions = {".py"} + python_checkers = { + "flake 8": ["flake8", "-q"], + "black": ["black", "--check"], + "pylint": ["pylint", "-E"], + # "ruff" : ["ruff", "check"], + } + python_file_checker = ExternalChecker( + "Python External Checkers", file_extensions, python_checkers, changed_files + ) + checkers.append(python_file_checker) + + """ ToDo : Puting this here, with no file type filtering, + means it will always run on all changed files. + It might be better to add the dispatch table to all the other + checkers so it's only running on 'code' files.""" + all_file_dispatch_table = dispatch_tables.get_file_dispatch_table_all() + generic_checker = UMDP3_checker( + "Generic File Checker", set(), all_file_dispatch_table, changed_files + ) + checkers.append(generic_checker) + + return checkers + + +# Example usage +if __name__ == "__main__": + args = process_arguments() + + # Configure CMS, and check we've been passed a branch + cms = which_cms_is_it(args.path) + branch_name = cms.get_branch_name() + if not cms.is_branch(): + print( + f"The path {args.path} is not a branch." + f"\nReported branch name is : {branch_name}" + "\nThe meaning of differences is unclear, and so" + " checking is aborted." + ) + exit(1) + else: + print(f"The branch, {branch_name}, at path {args.path} is a branch.") + if args.volume >= 5: + print("The files changed on this branch are:") + for changed_file in cms.get_changed_files(): + print(f" {changed_file}") + + # Configure checkers + """ ToDo : Uncertain as to how flexible this needs to be. + For now, just configure checkers based on file type requested. + Later, could add configuration files to specify which + checkers to use for each file type.""" + checkers = [] + + active_checkers = create_style_checkers(args.file_types, cms.get_changed_files()) + + # ToDo : Could create a conformance checker for each + # file type. + # Currently, just create a single conformance checker + # with all active checkers. + checker = ConformanceChecker( + cms, + active_checkers, + max_workers=args.max_workers, + changed_files=[Path(f) for f in cms.get_changed_files()], + ) + + checker.check_files() + + all_passed = checker.print_results(print_volume=args.volume) + print(f"Total files checked: {len(checker.results)}") + print(f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}") + + exit(0 if all_passed else 1)