From 09510e08e0b8e8fc47c5c708bdbee46e3d245b4c Mon Sep 17 00:00:00 2001 From: r-sharp Date: Tue, 24 Mar 2026 17:36:18 +0000 Subject: [PATCH 01/27] Some initial drafts --- script_umdp3_checker/tests/conftest.py | 34 ++++++ .../tests/example_fortran_code.F90 | 113 ++++++++++++++++++ .../tests/test_umdp3_rules_S3.py | 34 ++++++ script_umdp3_checker/umdp3_rules_S3.py | 27 +++++ 4 files changed, 208 insertions(+) create mode 100644 script_umdp3_checker/tests/conftest.py create mode 100644 script_umdp3_checker/tests/example_fortran_code.F90 create mode 100644 script_umdp3_checker/tests/test_umdp3_rules_S3.py create mode 100644 script_umdp3_checker/umdp3_rules_S3.py diff --git a/script_umdp3_checker/tests/conftest.py b/script_umdp3_checker/tests/conftest.py new file mode 100644 index 00000000..fdb80f95 --- /dev/null +++ b/script_umdp3_checker/tests/conftest.py @@ -0,0 +1,34 @@ +from pathlib import Path +import pytest + +@pytest.fixture(scope="session") +def example_fortran_lines() -> list[str]: + """Return the example Fortran source as a list of lines for tests.""" + test_dir = Path(__file__).resolve().parent + return (test_dir / "example_fortran_code.F90").read_text().splitlines(True) + + +@pytest.fixture +def modified_fortran_lines(request: pytest.FixtureRequest, example_fortran_lines: list[str]) -> list[str]: + """Return a copy of example_fortran_lines with changes applied. + + ``request.param`` is a list of operation dicts, each with: + - ``{"operation": "replace", "line": N, "text": "..."}`` – replace line N + - ``{"operation": "delete", "line": N}`` – remove line N + - ``{"operation": "add", "line": N, "text": "..."}`` – insert before line N + + Operations are applied in descending line order so that earlier + line numbers are not shifted by later mutations. + """ + lines = list(example_fortran_lines) + for change in sorted(request.param, key=lambda o: o["line"], reverse=True): + idx = change["line"] - 1 + if change["operation"] == "replace": + lines[idx] = change["text"] + elif change["operation"] == "delete": + del lines[idx] + elif change["operation"] == "add": + lines.insert(idx, change["text"]) + else: + raise ValueError(f"Unknown operation: {change['operation']}") + return lines diff --git a/script_umdp3_checker/tests/example_fortran_code.F90 b/script_umdp3_checker/tests/example_fortran_code.F90 new file mode 100644 index 00000000..40ceb6f7 --- /dev/null +++ b/script_umdp3_checker/tests/example_fortran_code.F90 @@ -0,0 +1,113 @@ +! *****************************COPYRIGHT******************************* +! (C) Crown copyright Met Office. All rights reserved. +! For further details please refer to the file COPYRIGHT.txt +! which you should have received as part of this distribution. +! *****************************COPYRIGHT******************************* +! +! An example routine depicting how one should construct new code +! to meet the UMDP3 coding standards. +! +MODULE example_mod +IMPLICIT NONE +! Description: +! A noddy routine that illustrates the way to apply the UMDP3 +! coding standards to new code to help code developers +! pass code reviews. +! +! Method: +! In this routine we apply many of the UMDP3 features +! to construct a simple routine. The references on the RHS take the reader +! to the appropriate section of the UMDP3 guide with further details. +! +! Code Owner: Please refer to the UM file CodeOwners.txt +! This file belongs in section: Control +! +! Code description: +! Language: Fortran 2003. +! This code is written to UMDP3 standards. +CHARACTER(LEN=*), PARAMETER, PRIVATE :: ModuleName="EXAMPLE_MOD" +CONTAINS +! Subroutine Interface: +SUBROUTINE example (xlen,ylen,l_unscale,input1,input2, & + output, l_loud_opt) +! Description: +! Nothing further to add to module description. +USE atmos_constants_mod, ONLY: r +USE ereport_mod, ONLY: ereport +USE parkind1, ONLY: jpim, jprb +USE umprintMgr, ONLY: umprint,ummessage,PrNorm +USE errormessagelength_mod, ONLY: errormessagelength +USE yomhook, ONLY: lhook, dr_hook +IMPLICIT NONE +! Subroutine arguments +INTEGER, INTENT(IN) :: xlen !Length of first dimension of the arrays. +INTEGER, INTENT(IN) :: ylen !Length of second dimension of the arrays. +LOGICAL, INTENT(IN) :: l_unscale ! switch scaling off. +REAL, INTENT(IN) :: input1(xlen, ylen) !First input array +REAL, INTENT(IN OUT) :: input2(xlen, ylen) !Second input array +REAL, INTENT(OUT) :: output(xlen, ylen) !Contains the result +LOGICAL, INTENT(IN), OPTIONAL :: l_loud_opt !optional debug flag +! Local variables +INTEGER(KIND=jpim), PARAMETER :: zhook_in = 0 ! DrHook tracing entry +INTEGER(KIND=jpim), PARAMETER :: zhook_out = 1 ! DrHook tracing exit +INTEGER :: i ! Loop counter +INTEGER :: j ! Loop counter +INTEGER :: icode ! error code for EReport +LOGICAL :: l_loud ! debug flag (default false unless l_loud_opt is used) +REAL, ALLOCATABLE :: field(:,:) ! Scaling array to fill. +REAL(KIND=jprb) :: zhook_handle ! DrHook tracing +CHARACTER(LEN=*), PARAMETER :: RoutineName="EXAMPLE" +CHARACTER(LEN=errormessagelength) :: Cmessage ! used for EReport +CHARACTER(LEN=256) :: my_char ! string for output +! End of header +IF (lhook) CALL dr_hook(ModuleName//":"//RoutineName,zhook_in,zhook_handle) +! Set debug flag if argument is present +l_loud = .FALSE. +IF (PRESENT(l_loud_opt)) THEN + l_loud = l_loud_opt +END IF +my_char & + = "This is a very very very very very very very " & + // "loud character assignment" ! A pointless long character example. +icode=0 +! verbosity choice, output some numbers to aid with debugging +! protected by printstatus>=PrNorm and pe=0 +WRITE(ummessage,"(A,I0)")"xlen=",xlen +CALL umprint(ummessage,level=PrNorm,pe=0,src="example_mod") +WRITE(ummessage,"(A,I0)")"ylen=",ylen +CALL umprint(ummessage,level=PrNorm,pe=0,src="example_mod") +IF (l_loud) CALL umprint(my_char,level=PrNormal,src="example_mod") +! Allocate and initialise scaling array +! Noddy code warns user when scalling is not employed. +IF ( l_unscale ) THEN + icode = -100 ! set up WARNING message + ALLOCATE(field( 1,1 ) ) + cmessage="Scaling is switched off in run!" + CALL ereport(RoutineName,icode,cmessage) +ELSE + ALLOCATE(field( xlen, ylen ) ) + DO j=1,ylen + DO i=1,xlen + field(i, j) = (1.0*i) + (2.0*j) + input2(i, j) = input2(i, j) * field(i, j) + END DO + END DO +END IF +! The main calculation of the routine, using OpenMP. +!$OMP PARALLEL DEFAULT(NONE) & +!$OMP SHARED(xlen,ylen,input1,input2,field,output) & +!$OMP PRIVATE(i, j ) +!$OMP DO SCHEDULE(STATIC) +DO j = 1, ylen + i_loop: DO i = 1, xlen + ! Calculate the Output value: + output(i, j) = (input1(i, j) * input2(i, j)) + END DO i_loop +END DO ! j loop +!$OMP END DO +!$OMP END PARALLEL +DEALLOCATE (field) +IF (lhook) CALL dr_hook(ModuleName//":"//RoutineName,zhook_out,zhook_handle) +RETURN +END SUBROUTINE example +END MODULE example_mod \ No newline at end of file diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py new file mode 100644 index 00000000..925ee31a --- /dev/null +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -0,0 +1,34 @@ +import pytest + +from umdp3_rules_S3 import rule_S3_1 +from umdp3_checker_rules import TestResult + + +def test_example_fortran_lines_fixture(example_fortran_lines): + assert example_fortran_lines + + +@pytest.mark.parametrize( + "modified_fortran_lines, expected_fragment", + [ + # replace: swap the module name + ([{"operation": "replace", "line": 10, "text": "MODULE bad_mod\n"}], "bad_mod"), + # delete: remove the IMPLICIT NONE line + ([{"operation": "delete", "line": 11}], "MODULE example_mod"), + # add: insert a comment before the module line + ([{"operation": "add", "line": 10, "text": "! inserted comment\n"}], "! inserted comment"), + # combined: replace module name and add a comment above it + ( + [ + {"operation": "replace", "line": 10, "text": "MODULE renamed_mod\n"}, + {"operation": "add", "line": 10, "text": "! renamed module\n"}, + ], + "renamed_mod", + ), + ], + indirect=["modified_fortran_lines"], +) +def test_example_fortran_lines_parametrized( + modified_fortran_lines, expected_fragment +): + assert any(expected_fragment in line for line in modified_fortran_lines) diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py new file mode 100644 index 00000000..da6d68e1 --- /dev/null +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -0,0 +1,27 @@ +""" This is the storage location for rules based on what's defined as the standard in the UMDP: 003 document. These rules are not necessarily the same as the rules defined in the UMDP: 003 document, but they are based on them. The rules in this file are used by the umdp3_checker script to check for compliance with the UMDP: 003 standard. +For now, the document has been copied into this file as a placeholder for the rules as they appear.""" +import re +from typing import List, Dict +# from fortran_keywords import fortran_keywords +# from search_lists import ( +# obsolescent_intrinsics, +# unseparated_keywords_list, +# retired_ifdefs, +# deprecated_c_identifiers, +# ) +from dataclasses import dataclass, field +from umdp3_checker_rules import TestResult +""" +3.1 Source files should only contain a single program unit + +* Modules may be used to group related variables, subroutines and functions. Each separate file within the source tree should be uniquely named. +* The name of the file should reflect the name of the programming unit. Multiple versions of the same file should be named filename-#ver where #ver is the section/version number (e.g. 1a,2a,2b. . . ). For example: + .F90 when writing a + .F90 with writing a + .F90 with only if upgrading existing subroutine since Subversion does not handle renaming of files very well and this allows history of the file to be easily retrieved. + This makes it easier to navigate the UM code source tree for given routines. +* You should avoid naming your program units and variables with names that match an intrinsic FUNCTION, SUBROUTINE or MODULE. We recommend the use of unique names within a program unit. +* You should also avoid naming your program units and variables with names that match a keyword in a Fortran statement. +* Avoid giving program units names that are likely to be used as variable names elsewhere in the code, e.g. field or string. This makes searching the code difficult and can cause the code browser to make erroneous connections between unrelated routines. +* Subroutines should be kept reasonably short, where appropriate, say up to about 100 lines of executable code, but don’t forget there are start up overheads involved in calling an external subroutine so they should do a reasonable amount of work +""" \ No newline at end of file From 7784b5fb624a3308b611517611d849cdf0db6ade Mon Sep 17 00:00:00 2001 From: r-sharp Date: Wed, 25 Mar 2026 16:12:02 +0000 Subject: [PATCH 02/27] some working, but very messy examples --- script_umdp3_checker/tests/conftest.py | 12 +-- .../tests/test_umdp3_rules_S3.py | 82 ++++++++++++++++++- script_umdp3_checker/umdp3_rules_S3.py | 5 +- 3 files changed, 91 insertions(+), 8 deletions(-) diff --git a/script_umdp3_checker/tests/conftest.py b/script_umdp3_checker/tests/conftest.py index fdb80f95..205d65d6 100644 --- a/script_umdp3_checker/tests/conftest.py +++ b/script_umdp3_checker/tests/conftest.py @@ -5,7 +5,7 @@ def example_fortran_lines() -> list[str]: """Return the example Fortran source as a list of lines for tests.""" test_dir = Path(__file__).resolve().parent - return (test_dir / "example_fortran_code.F90").read_text().splitlines(True) + return (test_dir / "example_fortran_code.F90").read_text().splitlines() @pytest.fixture @@ -13,14 +13,14 @@ def modified_fortran_lines(request: pytest.FixtureRequest, example_fortran_lines """Return a copy of example_fortran_lines with changes applied. ``request.param`` is a list of operation dicts, each with: - - ``{"operation": "replace", "line": N, "text": "..."}`` – replace line N - - ``{"operation": "delete", "line": N}`` – remove line N - - ``{"operation": "add", "line": N, "text": "..."}`` – insert before line N + - ``{"operation": "replace", "line": N, "text": "..."}`` : replace line N + - ``{"operation": "delete", "line": N}`` : remove line N + - ``{"operation": "add", "line": N, "text": "..."}`` : insert before line N Operations are applied in descending line order so that earlier line numbers are not shifted by later mutations. """ - lines = list(example_fortran_lines) + lines = example_fortran_lines.copy() for change in sorted(request.param, key=lambda o: o["line"], reverse=True): idx = change["line"] - 1 if change["operation"] == "replace": @@ -31,4 +31,6 @@ def modified_fortran_lines(request: pytest.FixtureRequest, example_fortran_lines lines.insert(idx, change["text"]) else: raise ValueError(f"Unknown operation: {change['operation']}") + # for line in lines: + # print(line) return lines diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 925ee31a..98866060 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -1,13 +1,91 @@ import pytest +import sys +from pathlib import Path -from umdp3_rules_S3 import rule_S3_1 -from umdp3_checker_rules import TestResult +# from umdp3_rules_S3 import rule_S3_1 +# Add the current directory to Python path +sys.path.insert(0, str(Path(__file__).parent.parent)) +from umdp3_checker_rules import TestResult, UMDP3Checker +def modified_fortran_lines_fnc(lines: list[str], changes: list[dict]) -> list[str]: + """Return a copy of example_fortran_lines with changes applied. + + ``request.param`` is a list of operation dicts, each with: + - ``{"operation": "replace", "line": N, "text": "..."}`` : replace line N + - ``{"operation": "delete", "line": N}`` : remove line N + - ``{"operation": "add", "line": N, "text": "..."}`` : insert before line N + + Operations are applied in descending line order so that earlier + line numbers are not shifted by later mutations. + """ + # lines = example_fortran_lines.copy() + for change in sorted(changes, key=lambda o: o["line"], reverse=True): + idx = change["line"] - 1 + if change["operation"] == "replace": + lines[idx] = change["text"] + elif change["operation"] == "delete": + del lines[idx] + elif change["operation"] == "add": + lines.insert(idx, change["text"]) + # if change["operation"] == "replace": + # lines[idx:idx+1] = change["text"] + # elif change["operation"] == "delete": + # del lines[idx] + # elif change["operation"] == "add": + # lines[idx:idx], change["text"]) + else: + raise ValueError(f"Unknown operation: {change['operation']}") + for count, line in enumerate(lines): + print(f"line [{count}]: {line}") + return lines + + + +# ================================================================= def test_example_fortran_lines_fixture(example_fortran_lines): assert example_fortran_lines +# ================================================================= + +@pytest.mark.parametrize( + "modified_fortran_lines, expected_result, expected_errors", + [([{"operation": "add", "line": 10, "text": ""}], 0, []), # No changes, expect no errors + ([{"operation": "replace", "line": 10, "text": "Module example_mod\n"}], 1, {'lowercase keyword: Module': [10]}) + ], + indirect=["modified_fortran_lines"], +) +def test_keywords(modified_fortran_lines, expected_result, expected_errors): + checker = UMDP3Checker() + for line in modified_fortran_lines: + assert isinstance(line, str) # Ensure all lines are strings for debugging + result = checker.capitalised_keywords(modified_fortran_lines) + assert result.failure_count == expected_result + for error in expected_errors: + assert error in result.errors + + +# ================================================================= + +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [([{"operation": "add", "line": 10, "text": ""}], 0, []), # No changes, expect no errors + ([{"operation": "replace", "line": 10, "text": "Module example_mod\n"}], 1, {'lowercase keyword: Module': [10]}) + ], +) +def test_keywords_II(example_fortran_lines, changes_list, expected_result, expected_errors): + checker = UMDP3Checker() + modified_fortran_lines = modified_fortran_lines_fnc(example_fortran_lines, changes_list) + result = checker.capitalised_keywords(modified_fortran_lines) + assert result.failure_count == expected_result + for error in expected_errors: + assert error in result.errors + + + +# ================================================================= + @pytest.mark.parametrize( "modified_fortran_lines, expected_fragment", [ diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index da6d68e1..886ba178 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -10,10 +10,13 @@ # deprecated_c_identifiers, # ) from dataclasses import dataclass, field -from umdp3_checker_rules import TestResult +from umdp3_checker_rules import UMDP3Checker, TestResult """ 3.1 Source files should only contain a single program unit +""" + +""" * Modules may be used to group related variables, subroutines and functions. Each separate file within the source tree should be uniquely named. * The name of the file should reflect the name of the programming unit. Multiple versions of the same file should be named filename-#ver where #ver is the section/version number (e.g. 1a,2a,2b. . . ). For example: .F90 when writing a From e626a32ead4e5453c28478b16abe92200ca4aa6d Mon Sep 17 00:00:00 2001 From: r-sharp Date: Wed, 25 Mar 2026 18:02:34 +0000 Subject: [PATCH 03/27] Some tidying, and some questions for tomorrow. --- script_umdp3_checker/tests/conftest.py | 28 ----- script_umdp3_checker/tests/test_umdp3.py | 2 +- .../tests/test_umdp3_rules_S3.py | 119 +++++++----------- 3 files changed, 49 insertions(+), 100 deletions(-) diff --git a/script_umdp3_checker/tests/conftest.py b/script_umdp3_checker/tests/conftest.py index 205d65d6..9bb0cb02 100644 --- a/script_umdp3_checker/tests/conftest.py +++ b/script_umdp3_checker/tests/conftest.py @@ -6,31 +6,3 @@ def example_fortran_lines() -> list[str]: """Return the example Fortran source as a list of lines for tests.""" test_dir = Path(__file__).resolve().parent return (test_dir / "example_fortran_code.F90").read_text().splitlines() - - -@pytest.fixture -def modified_fortran_lines(request: pytest.FixtureRequest, example_fortran_lines: list[str]) -> list[str]: - """Return a copy of example_fortran_lines with changes applied. - - ``request.param`` is a list of operation dicts, each with: - - ``{"operation": "replace", "line": N, "text": "..."}`` : replace line N - - ``{"operation": "delete", "line": N}`` : remove line N - - ``{"operation": "add", "line": N, "text": "..."}`` : insert before line N - - Operations are applied in descending line order so that earlier - line numbers are not shifted by later mutations. - """ - lines = example_fortran_lines.copy() - for change in sorted(request.param, key=lambda o: o["line"], reverse=True): - idx = change["line"] - 1 - if change["operation"] == "replace": - lines[idx] = change["text"] - elif change["operation"] == "delete": - del lines[idx] - elif change["operation"] == "add": - lines.insert(idx, change["text"]) - else: - raise ValueError(f"Unknown operation: {change['operation']}") - # for line in lines: - # print(line) - return lines diff --git a/script_umdp3_checker/tests/test_umdp3.py b/script_umdp3_checker/tests/test_umdp3.py index 2f4a5db8..4cb99faf 100644 --- a/script_umdp3_checker/tests/test_umdp3.py +++ b/script_umdp3_checker/tests/test_umdp3.py @@ -20,7 +20,7 @@ from checker_dispatch_tables import CheckerDispatchTables # Prevent pytest from trying to collect TestResult as more tests: -TestResult.__test__ = False +TestResult.__test__ = False # type: ignore def test_basic_functionality(): diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 98866060..67b60cee 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -7,7 +7,8 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from umdp3_checker_rules import TestResult, UMDP3Checker -def modified_fortran_lines_fnc(lines: list[str], changes: list[dict]) -> list[str]: + +def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: """Return a copy of example_fortran_lines with changes applied. ``request.param`` is a list of operation dicts, each with: @@ -18,95 +19,71 @@ def modified_fortran_lines_fnc(lines: list[str], changes: list[dict]) -> list[st Operations are applied in descending line order so that earlier line numbers are not shifted by later mutations. """ - # lines = example_fortran_lines.copy() - for change in sorted(changes, key=lambda o: o["line"], reverse=True): - idx = change["line"] - 1 - if change["operation"] == "replace": - lines[idx] = change["text"] - elif change["operation"] == "delete": + lines = lines_in.copy() + for change in sorted(changes, key=lambda o: o[1], reverse=True): + idx = int(change[1]) - 1 + if change[0] == "replace": + lines[idx] = change[2] + elif change[0] == "delete": del lines[idx] - elif change["operation"] == "add": - lines.insert(idx, change["text"]) - # if change["operation"] == "replace": + elif change[0] == "add": + lines.insert(idx, change[2]) + # if change[0] == "replace": # lines[idx:idx+1] = change["text"] - # elif change["operation"] == "delete": + # elif change[0] == "delete": # del lines[idx] - # elif change["operation"] == "add": + # elif change[0] == "add": # lines[idx:idx], change["text"]) else: - raise ValueError(f"Unknown operation: {change['operation']}") - for count, line in enumerate(lines): - print(f"line [{count}]: {line}") + raise ValueError(f"Unknown operation: {change[0]}") + # for count, line in enumerate(lines, 1): + # print(f"line [{count}]: {line}") return lines - # ================================================================= -def test_example_fortran_lines_fixture(example_fortran_lines): - assert example_fortran_lines +"""This example hopefully demonstrates some of the complexity available... + Setting up a test, may involve multiple changes to the demo Fortran file, hence the complex entries in the parametrization. + Each error found is recorded with the line number(s) it was found on using the Error text as a dict key, and the line no(s) as a list, which means finding 'use' and 'Use' in the code would generate 2 different keys in the dict. + Then a single value recording the total number of failures is also included in the 'TestResult' object. + So for the dictionary of errors returned, we have to check they match, which at present involves checking it's 'len' but also that all the keys in one are in the other, i.e. youre not accidentally getting a matching count but different errors. + Then for each error(key) in the dict, you need to check how many lines it occured on, and that the line numbers match, i.e. the list of lines numbers is a match with the expected list of line numbers. -# ================================================================= - -@pytest.mark.parametrize( - "modified_fortran_lines, expected_result, expected_errors", - [([{"operation": "add", "line": 10, "text": ""}], 0, []), # No changes, expect no errors - ([{"operation": "replace", "line": 10, "text": "Module example_mod\n"}], 1, {'lowercase keyword: Module': [10]}) - ], - indirect=["modified_fortran_lines"], -) -def test_keywords(modified_fortran_lines, expected_result, expected_errors): - checker = UMDP3Checker() - for line in modified_fortran_lines: - assert isinstance(line, str) # Ensure all lines are strings for debugging - result = checker.capitalised_keywords(modified_fortran_lines) - assert result.failure_count == expected_result - for error in expected_errors: - assert error in result.errors - - -# ================================================================= - + There has to be a better way..... + As a side note, might it be better to write a function to compare 2 'TestResult' objects, which would be more robust to changes in the structure of the 'TestResult' object, and also make the test code more readable? If so, would it's place be here in the testing, or as a method in the 'TestResult' class itself? + """ @pytest.mark.parametrize( "changes_list, expected_result, expected_errors", - [([{"operation": "add", "line": 10, "text": ""}], 0, []), # No changes, expect no errors - ([{"operation": "replace", "line": 10, "text": "Module example_mod\n"}], 1, {'lowercase keyword: Module': [10]}) - ], -) -def test_keywords_II(example_fortran_lines, changes_list, expected_result, expected_errors): - checker = UMDP3Checker() - modified_fortran_lines = modified_fortran_lines_fnc(example_fortran_lines, changes_list) - result = checker.capitalised_keywords(modified_fortran_lines) - assert result.failure_count == expected_result - for error in expected_errors: - assert error in result.errors - - - -# ================================================================= - -@pytest.mark.parametrize( - "modified_fortran_lines, expected_fragment", [ - # replace: swap the module name - ([{"operation": "replace", "line": 10, "text": "MODULE bad_mod\n"}], "bad_mod"), - # delete: remove the IMPLICIT NONE line - ([{"operation": "delete", "line": 11}], "MODULE example_mod"), - # add: insert a comment before the module line - ([{"operation": "add", "line": 10, "text": "! inserted comment\n"}], "! inserted comment"), - # combined: replace module name and add a comment above it ( [ - {"operation": "replace", "line": 10, "text": "MODULE renamed_mod\n"}, - {"operation": "add", "line": 10, "text": "! renamed module\n"}, + ["replace", 10, "Module example_mod"], + ["replace", 37, "use parkind1, ONLY: jpim, jprb"], + ["replace", 40, "use yomhook, ONLY: lhook, dr_hook"] ], - "renamed_mod", + 3, + {"lowercase keyword: Module": [10], "lowercase keyword: use": [37, 40]}, ), + ([["add", 10, ""]], 0, []), # No changes, expect no errors ], - indirect=["modified_fortran_lines"], ) -def test_example_fortran_lines_parametrized( - modified_fortran_lines, expected_fragment +def test_keywords( + example_fortran_lines, changes_list, expected_result, expected_errors ): - assert any(expected_fragment in line for line in modified_fortran_lines) + checker = UMDP3Checker() + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = checker.capitalised_keywords(modified_fortran_lines) + failure_count = result.failure_count + assert failure_count == expected_result + errors = result.errors + assert len(errors) == len(expected_errors) + for error, lines_list in errors.items(): + assert error in expected_errors + assert len(lines_list) == len(expected_errors[error]) + for line_no in lines_list: + assert line_no in expected_errors[error] + + +# ================================================================= From f71e30df3c3acd4ce730fc7bddbbd37330a2e612 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Fri, 27 Mar 2026 12:31:48 +0000 Subject: [PATCH 04/27] Twiddling things, it works, but is "useless" --- .../tests/test_umdp3_rules_S3.py | 43 ++++- script_umdp3_checker/umdp3_conformance.py | 4 + script_umdp3_checker/umdp3_rules_S3.py | 153 ++++++++++++++++-- 3 files changed, 185 insertions(+), 15 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 67b60cee..c281ccdb 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -2,10 +2,10 @@ import sys from pathlib import Path -# from umdp3_rules_S3 import rule_S3_1 # Add the current directory to Python path sys.path.insert(0, str(Path(__file__).parent.parent)) -from umdp3_checker_rules import TestResult, UMDP3Checker +from umdp3_rules_S3 import capitulated_keywords, r3_4_1_capitalised_keywords +# from umdp3_checker_rules import TestResult, UMDP3Checker def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: @@ -68,13 +68,14 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: ), ([["add", 10, ""]], 0, []), # No changes, expect no errors ], + ids = ["3 Errors", "No Errors"] ) -def test_keywords( +def test_r3_4_1_capitalised_keywords( example_fortran_lines, changes_list, expected_result, expected_errors ): - checker = UMDP3Checker() + # checker = UMDP3Checker() modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) - result = checker.capitalised_keywords(modified_fortran_lines) + result = r3_4_1_capitalised_keywords(modified_fortran_lines) failure_count = result.failure_count assert failure_count == expected_result errors = result.errors @@ -87,3 +88,35 @@ def test_keywords( # ================================================================= + +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + ["replace", 10, "Module example_mod"], + ["replace", 37, "use parkind1, ONLY: jpim, jprb"], + ["replace", 40, "use yomhook, ONLY: lhook, dr_hook"] + ], + 3, + {"capitulated keyword: Module": [10], "capitulated keyword: use": [37, 40]}, + ), + ([["add", 10, ""]], 0, []), # No changes, expect no errors + ], +) +def test_keywords_II( + example_fortran_lines, changes_list, expected_result, expected_errors +): +# checker = UMDP3Checker() + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = capitulated_keywords(modified_fortran_lines) + failure_count = result.failure_count + assert failure_count == expected_result + errors = result.errors + assert len(errors) == len(expected_errors) + for error, lines_list in errors.items(): + assert error in expected_errors + assert len(lines_list) == len(expected_errors[error]) + for line_no in lines_list: + assert line_no in expected_errors[error] + diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py index 856b51f9..5a8dfe82 100644 --- a/script_umdp3_checker/umdp3_conformance.py +++ b/script_umdp3_checker/umdp3_conformance.py @@ -276,6 +276,9 @@ def check(self, file_path: Path) -> CheckResult: class ConformanceChecker: """Main framework for running style checks in parallel.""" + checkers: List[StyleChecker] + max_workers: int + results: List[CheckResult] def __init__( self, @@ -284,6 +287,7 @@ def __init__( ): self.checkers = checkers self.max_workers = max_workers + self.results = [] def check_files(self) -> None: """Run all checkers on given files in parallel. diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 886ba178..78bc83d0 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -9,22 +9,155 @@ # retired_ifdefs, # deprecated_c_identifiers, # ) -from dataclasses import dataclass, field -from umdp3_checker_rules import UMDP3Checker, TestResult +# from dataclasses import dataclass, field +# from script_umdp3_checker import fortran_keywords +from umdp3_checker_rules import TestResult +from fortran_keywords import fortran_keywords + +comment_line = re.compile(r"!.*$") +word_splitter = re.compile(r"\b\w+\b") + +def add_error_log( + error_log: Dict, key: str = "no key", value: int = 0 +) -> Dict: + """Add extra error information to the dictionary""" + """ +TODO: This is a bodge to get more detailed info about + the errors back to the calling program. The info is + useful, but is currently presented on a per-test basis + rather than a per-file which would be easier to read + and make use of.""" + if key not in error_log: + error_log[key] = [] + error_log[key].append(value) + return error_log + +def remove_quoted(line: str) -> str: + """Remove quoted strings from a line""" + """ +TODO: The original version replaced the quoted sections with a + "blessed reference", presumably becuase they were 're-inserted' at some + stage. No idea if that capability is still required.""" + # Simple implementation - remove single and double quoted strings + result = line + + # Remove double quoted strings + result = re.sub(r'"[^"]*"', "", result) + + # Remove single quoted strings + result = re.sub(r"'[^']*'", "", result) + + return result + """ 3.1 Source files should only contain a single program unit """ - +"""TODO: routine to identify the first program unit in the file and store it's name. +Then check it's the same name as the last thing closed.""" """ -* Modules may be used to group related variables, subroutines and functions. Each separate file within the source tree should be uniquely named. -* The name of the file should reflect the name of the programming unit. Multiple versions of the same file should be named filename-#ver where #ver is the section/version number (e.g. 1a,2a,2b. . . ). For example: - .F90 when writing a - .F90 with writing a - .F90 with only if upgrading existing subroutine since Subversion does not handle renaming of files very well and this allows history of the file to be easily retrieved. - This makes it easier to navigate the UM code source tree for given routines. +* Modules may be used to group related variables, subroutines and functions. +* Each separate file within the source tree should be uniquely named. + - Not sure that's possible/easy within the current framework to process one file at a time... +* The name of the file should reflect the name of the programming unit. +* Multiple versions of the same file should be named filename-#ver where #ver is the section/version number (e.g. 1a,2a,2b. . . ). * You should avoid naming your program units and variables with names that match an intrinsic FUNCTION, SUBROUTINE or MODULE. We recommend the use of unique names within a program unit. * You should also avoid naming your program units and variables with names that match a keyword in a Fortran statement. * Avoid giving program units names that are likely to be used as variable names elsewhere in the code, e.g. field or string. This makes searching the code difficult and can cause the code browser to make erroneous connections between unrelated routines. * Subroutines should be kept reasonably short, where appropriate, say up to about 100 lines of executable code, but don’t forget there are start up overheads involved in calling an external subroutine so they should do a reasonable amount of work -""" \ No newline at end of file +""" + +"""3.4 Fortran style +* To improve readability, write your code using the ALL CAPS Fortran keywords approach.""" +def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: + """Check for the presence of lowercase Fortran keywords, which are + taken from an imported list 'fortran_keywords'.""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + # Remove quoted strings and comments + if line.lstrip(" ").startswith("!"): + continue + clean_line = remove_quoted(line) + clean_line = comment_line.sub("", clean_line) + # Check for lowercase keywords + for word in word_splitter.findall(clean_line): + upcase = word.upper() + if upcase in fortran_keywords and word != upcase: + # self.add_extra_error(f"lowercase keyword: {word}") + failures += 1 + error_log = add_error_log( + error_log, f"lowercase keyword: {word}", count + 1 + ) + + return TestResult( + checker_name="Capitalised Keywords", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count + 1} lines, found {failures} failures.", + errors=error_log, + ) +""" +* The rest of the code may be written in either lower-case with underscores or CamelCase. +• To improve readability, you should always use the optional space to separate the Fortran keywords. The +full list of Fortran keywords with an optional spaces is: +ELSE IF END DO END FORALL END FUNCTION +END IF END INTERFACE END MODULE END PROGRAM +END SELECT END SUBROUTINE END TYPE END WHERE +SELECT CASE ELSE WHERE DOUBLE PRECISION END ASSOCIATE +END BLOCK END BLOCK DATA END ENUM END FILE +END PROCEDURE GO TO IN OUT SELECT TYPE +Note that not all of these are approved or appropriate for use in UM code. This rule also applies to OpenMP +keywords. (See: 3.15) +• The full version of END should be used at all times, eg END SUBROUTINE and END FUNCTION +• New code should be written using Fortran 95/2003 features. Avoid non-portable vendor/compiler exten- +sions. +• When writing a REAL literal with an integer value, put a 0 after the decimal point (i.e. 1.0 as opposed to 1.) +to improve readability. +• Avoid using obsolescent features of the Fortran language, instead make use of F95/2003 alternatives. For +example, statement functions are among the list of deprecated features in the F95 standard and these can +be replaced by FUNCTIONs within appropriate MODULEs. +• Do not use archaic forms of intrinsic functions. For example, LOG() should be used in place of ALOG(), +MAX() instead of AMAX1(), REAL() instead of FLOAT() etc. +• Never use the PAUSE statement. +• Never use the STOP statement, see 3.19 +• The standard delimiter for namelists is /. In particular, note that &END is non-standard and should be +avoided. For further information on namelists please refer to 4.1 +• Only use the generic names of intrinsic functions, avoid the use of ’hardware’ specific intrinsic functions. +Use the latter if an only if there is an optimisation benefit and then it must be protected by a platform +specific CPP flag 3.17""" + + +def capitulated_keywords(lines: List[str]) -> TestResult: + """A fake test, put in for testing purposes. + Probably not needed any more, but left in case.""" + failures = 0 + line_count = 0 + error_log = {} + # print("Debug: In capitulated_keywords test") + for line in lines: + line_count += 1 + # Remove quoted strings and comments + if line.lstrip(" ").startswith("!"): + continue + clean_line = remove_quoted(line) + clean_line = comment_line.sub("", clean_line) + + # Check for lowercase keywords + for word in word_splitter.findall(clean_line): + upcase = word.upper() + if upcase in fortran_keywords and word != upcase: + # add_extra_error(f"lowercase keyword: {word}") + error_log = add_error_log( + error_log, f"capitulated keyword: {word}", line_count + ) + failures += 1 + + return TestResult( + checker_name="Capitulated Keywords", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {line_count} lines, found {failures} failures.", + errors=error_log, + ) From 7134e817c66e68ee5314be5660dbde64e33a5ce8 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Mon, 30 Mar 2026 17:51:39 +0100 Subject: [PATCH 05/27] Adding copyright checker. --- .../tests/test_umdp3_rules_S3.py | 53 +++++++++++++++---- script_umdp3_checker/umdp3_rules_S3.py | 47 ++++++++++++++++ 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index c281ccdb..025377b8 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -4,21 +4,23 @@ # Add the current directory to Python path sys.path.insert(0, str(Path(__file__).parent.parent)) -from umdp3_rules_S3 import capitulated_keywords, r3_4_1_capitalised_keywords +from umdp3_rules_S3 import capitulated_keywords, r3_2_1_check_crown_copyright, r3_4_1_capitalised_keywords # from umdp3_checker_rules import TestResult, UMDP3Checker def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: """Return a copy of example_fortran_lines with changes applied. - ``request.param`` is a list of operation dicts, each with: - - ``{"operation": "replace", "line": N, "text": "..."}`` : replace line N - - ``{"operation": "delete", "line": N}`` : remove line N - - ``{"operation": "add", "line": N, "text": "..."}`` : insert before line N + ``changes`` is a list of operation list, each with: + - ``[, , []]`` + - where : = "replace" would replace line N with the new line(s) + - = delete" would remove line N + - and ="add" would insert the new line(s) before line N. Operations are applied in descending line order so that earlier line numbers are not shifted by later mutations. """ + # TODO: Currently is a string, but should become a list of strings, allowing multiple lines to be used in addition and as replacements. lines = lines_in.copy() for change in sorted(changes, key=lambda o: o[1], reverse=True): idx = int(change[1]) - 1 @@ -29,21 +31,19 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: elif change[0] == "add": lines.insert(idx, change[2]) # if change[0] == "replace": - # lines[idx:idx+1] = change["text"] + # lines[idx:idx+1] = change[2] # elif change[0] == "delete": # del lines[idx] # elif change[0] == "add": - # lines[idx:idx], change["text"]) + # lines[idx:idx], change[2]) else: raise ValueError(f"Unknown operation: {change[0]}") # for count, line in enumerate(lines, 1): # print(f"line [{count}]: {line}") return lines - # ================================================================= - """This example hopefully demonstrates some of the complexity available... Setting up a test, may involve multiple changes to the demo Fortran file, hence the complex entries in the parametrization. Each error found is recorded with the line number(s) it was found on using the Error text as a dict key, and the line no(s) as a list, which means finding 'use' and 'Use' in the code would generate 2 different keys in the dict. @@ -86,6 +86,41 @@ def test_r3_4_1_capitalised_keywords( for line_no in lines_list: assert line_no in expected_errors[error] +# ================================================================= + +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + ["delete", 1, None], + ["delete", 2, None], + ["delete", 3, None], + ["delete", 4, None], + ["delete", 5, None] + ], + 1, + {"missing copyright or crown copyright statement": [0]}, + ), + ([["add", 10, ""]], 0, []), # No changes, expect no errors + ], + ids = ["Missing copyright statement", "No Errors"] +) +def test_r3_2_1_check_crown_copyright( + example_fortran_lines, changes_list, expected_result, expected_errors +): + # checker = UMDP3Checker() + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_2_1_check_crown_copyright(modified_fortran_lines) + failure_count = result.failure_count + assert failure_count == expected_result + errors = result.errors + assert len(errors) == len(expected_errors) + for error, lines_list in errors.items(): + assert error in expected_errors + assert len(lines_list) == len(expected_errors[error]) + for line_no in lines_list: + assert line_no in expected_errors[error] # ================================================================= diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 78bc83d0..3856c303 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -67,6 +67,53 @@ def remove_quoted(line: str) -> str: * Subroutines should be kept reasonably short, where appropriate, say up to about 100 lines of executable code, but don’t forget there are start up overheads involved in calling an external subroutine so they should do a reasonable amount of work """ +"""3.2 Headers +* All programming units require a suitable copyright header. Met Office derived code should use the stan-dard UM copyright header as depicted in the good example code. Collaborative UM developed codemay require alternative headers as agreed in the collaborative agreements. e.g. UKCA code. The IPR(intelectual property rights) of UM code is important and needs to be protected appropriately.""" + +def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: + """Check for crown copyright statement""" + """ +TODO: This is a very simplistic check and will not detect many cases which break UMDP3. + It will pass if the word copyright appears on a commented out line. This could + include passing : + ! I should put a copyright statement here... + + I suspect the Perl Predeccessor + did much more convoluted tests""" + comment_lines = [ + line.upper() for line in lines if line.lstrip(" ").startswith("!") + ] + file_content = "\n".join(comment_lines) + error_log = {} + found_copyright = False + if "CROWN COPYRIGHT" in file_content or "COPYRIGHT" in file_content: + found_copyright = True + + if not found_copyright: + # add_extra_error("missing copyright or crown copyright statement") + error_log = add_error_log( + error_log, "missing copyright or crown copyright statement", 0 + ) + return TestResult( + checker_name="Crown Copyright Statement", + failure_count=0 if found_copyright else 1, + passed=found_copyright, + output="Checked for crown copyright statement.", + errors=error_log, + ) + +""" +* Headers are an immensely important part of any code as they document what it does, and how it does it. +* The description of the MODULE and its contained SUBROUTINE may be the same and thus it need not berepeated in the latter. If a MODULE contains more than one subroutine then further descriptions are required. +* History comments should not be included in the header or routine code. FCM TRAC provides the history of our codes. +* Code author names should NOT be included explicitly within the code as they quickly become out of date and are sometimes misleading. Instead we reference a single maintainable text file which is included within the UM code repository. + +! Code Owner: Please refer to the UM file CodeOwners.txt +! This file belongs in section: + +* Example UM templates are provided with the source of this document; subroutine, function and module +templates""" + """3.4 Fortran style * To improve readability, write your code using the ALL CAPS Fortran keywords approach.""" def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: From 4ba0e8d9e5240a0bbebd93864842f85cdbf59732 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Wed, 1 Apr 2026 17:38:03 +0100 Subject: [PATCH 06/27] improve error messages --- script_umdp3_checker/tests/test_umdp3_rules_S3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 025377b8..1dce08b4 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -68,7 +68,7 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: ), ([["add", 10, ""]], 0, []), # No changes, expect no errors ], - ids = ["3 Errors", "No Errors"] + ids = ["3 Errors", "No lowercase keyword Errors"] ) def test_r3_4_1_capitalised_keywords( example_fortran_lines, changes_list, expected_result, expected_errors @@ -104,7 +104,7 @@ def test_r3_4_1_capitalised_keywords( ), ([["add", 10, ""]], 0, []), # No changes, expect no errors ], - ids = ["Missing copyright statement", "No Errors"] + ids = ["Missing copyright statement", "copyright statemrent present"] ) def test_r3_2_1_check_crown_copyright( example_fortran_lines, changes_list, expected_result, expected_errors From 89db21af6b809cd95717733c2bfb2bedf225812d Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 31 Mar 2026 14:40:38 +0100 Subject: [PATCH 07/27] Minor fixes for missing config owner (#218) --- github_scripts/get_git_sources.py | 21 +++++++++++++++++---- github_scripts/merge_sources.py | 1 + github_scripts/suite_report_git.py | 2 +- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index f9fe168e..ffee9652 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -19,6 +19,19 @@ logger = logging.getLogger(__name__) +class SubprocessRunError(Exception): + def __init__(self, command, returncode, stdout, stderr): + self.command = command + self.returncode = returncode + self.stdout = stdout + self.stderr = stderr + self.message = ( + f"Errorcode {returncode} raised when running command '{command}\n\n" + f"stdout:\n{stdout}\n\nstderr:\n{stderr}" + ) + super().__init__(self.message) + + def run_command( command: str, check: bool = True, capture: bool = True, timeout: int = 600 ) -> Optional[subprocess.CompletedProcess]: @@ -44,8 +57,8 @@ def run_command( if check and result.returncode != 0: err_msg = (result.stderr or "").strip() logger.error(f"[FAIL] Command failed: {command}\nError: {err_msg}") - raise subprocess.CalledProcessError( - result.returncode, args, output=result.stdout, stderr=result.stderr + raise SubprocessRunError( + result.returncode, command, result.stdout, result.stderr ) return result @@ -198,14 +211,14 @@ def merge_source( run_command(f"git -C {dest} fetch local {fetch}") - command = f"git -C {dest} merge --no-gpg-sign FETCH_HEAD" + command = f"git -C {dest} merge --no-gpg-sign --no-edit FETCH_HEAD" result = run_command(command, check=False) if result.returncode: unmerged_files = get_unmerged(dest) if unmerged_files: handle_merge_conflicts(source, ref, dest, dest.name) else: - raise subprocess.CalledProcessError( + raise SubprocessRunError( result.returncode, command, result.stdout, result.stderr ) diff --git a/github_scripts/merge_sources.py b/github_scripts/merge_sources.py index acb63c22..9f884911 100755 --- a/github_scripts/merge_sources.py +++ b/github_scripts/merge_sources.py @@ -32,6 +32,7 @@ def parse_args(): parser.add_argument( "-p", "--path", + type=Path, default=None, help="The path to extract the sources to. If part of a cylc suite, it will " "default to $CYLC_WORKFLOW_SHARE_DIR/source, otherwise __file__/source", diff --git a/github_scripts/suite_report_git.py b/github_scripts/suite_report_git.py index 5bf32254..340110c4 100755 --- a/github_scripts/suite_report_git.py +++ b/github_scripts/suite_report_git.py @@ -238,7 +238,7 @@ def create_um_config_owner_table(self, owners: Dict) -> None: # Create a dict with owners as the key table_dict = defaultdict(list) for config in failed_configs: - owner, others = owners[config] + owner, others = owners.get(config, "UNKNOWN") if others != "--": config = f"{config} ({others})" table_dict[owner].append(config) From 7bbecffa1553396a4f6acdd2ce774d97c3acbde5 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Tue, 7 Apr 2026 17:38:52 +0100 Subject: [PATCH 08/27] Aaaarrrrrrrrgle!!!!!!! --- script_umdp3_checker/tests/test_fortran_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script_umdp3_checker/tests/test_fortran_checks.py b/script_umdp3_checker/tests/test_fortran_checks.py index d631c602..92844f48 100644 --- a/script_umdp3_checker/tests/test_fortran_checks.py +++ b/script_umdp3_checker/tests/test_fortran_checks.py @@ -8,7 +8,7 @@ # Prevent pytest from trying to collect TestResult as more tests: -TestResult.__test__ = False +TestResult.__test__ = False # type: ignore """ ToDo : THere has been a LOT of refactoring in the umdp3 module since these From de71e6c1e712f1cdd3d3d719eb30b4b2a1212068 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Wed, 15 Apr 2026 17:33:58 +0100 Subject: [PATCH 09/27] testing built in helper functions and trying to concattenate lines --- .../tests/example_fortran_code.F90 | 7 +- .../tests/test_umdp3_rules_S3.py | 66 ++++++++++++++++++- script_umdp3_checker/umdp3_rules_S3.py | 34 +++++++++- 3 files changed, 103 insertions(+), 4 deletions(-) diff --git a/script_umdp3_checker/tests/example_fortran_code.F90 b/script_umdp3_checker/tests/example_fortran_code.F90 index 40ceb6f7..316dbe3e 100644 --- a/script_umdp3_checker/tests/example_fortran_code.F90 +++ b/script_umdp3_checker/tests/example_fortran_code.F90 @@ -55,6 +55,7 @@ SUBROUTINE example (xlen,ylen,l_unscale,input1,input2, & INTEGER :: icode ! error code for EReport LOGICAL :: l_loud ! debug flag (default false unless l_loud_opt is used) REAL, ALLOCATABLE :: field(:,:) ! Scaling array to fill. +REAL, ALLOCATABLE :: field2(:,:) ! Scaling array to fill. REAL(KIND=jprb) :: zhook_handle ! DrHook tracing CHARACTER(LEN=*), PARAMETER :: RoutineName="EXAMPLE" CHARACTER(LEN=errormessagelength) :: Cmessage ! used for EReport @@ -68,7 +69,7 @@ SUBROUTINE example (xlen,ylen,l_unscale,input1,input2, & END IF my_char & = "This is a very very very very very very very " & - // "loud character assignment" ! A pointless long character example. + // "long character assignment" ! A pointless long character example. icode=0 ! verbosity choice, output some numbers to aid with debugging ! protected by printstatus>=PrNorm and pe=0 @@ -82,14 +83,18 @@ SUBROUTINE example (xlen,ylen,l_unscale,input1,input2, & IF ( l_unscale ) THEN icode = -100 ! set up WARNING message ALLOCATE(field( 1,1 ) ) + ALLOCATE(field2( 1,1 ) ) cmessage="Scaling is switched off in run!" CALL ereport(RoutineName,icode,cmessage) ELSE ALLOCATE(field( xlen, ylen ) ) + ALLOCATE(field2( xlen, ylen ) ) DO j=1,ylen DO i=1,xlen field(i, j) = (1.0*i) + (2.0*j) input2(i, j) = input2(i, j) * field(i, j) + field2(i, j) = (1.0*i) - (2.0*j) & + + (3.0*i*j) + (4.0*i**2) + field(i, j)*2 END DO END DO END IF diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 1dce08b4..a3919241 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -1,10 +1,19 @@ +# ----------------------------------------------------------------------------- +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +# ----------------------------------------------------------------------------- + +# from pyparsing import remove_quotes import pytest import sys from pathlib import Path # Add the current directory to Python path sys.path.insert(0, str(Path(__file__).parent.parent)) -from umdp3_rules_S3 import capitulated_keywords, r3_2_1_check_crown_copyright, r3_4_1_capitalised_keywords +from umdp3_rules_S3 import remove_comments, remove_quoted, \ + concatenate_lines, capitulated_keywords, r3_2_1_check_crown_copyright, \ + r3_4_1_capitalised_keywords # from umdp3_checker_rules import TestResult, UMDP3Checker @@ -42,6 +51,61 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: # print(f"line [{count}]: {line}") return lines +# ================================================================= +"""First : test the helper functions in umdp3_rules_S3.py.""" +def test_modify_fortran_lines(example_fortran_lines): + """TODO: Does this need to be more rigorous ?""" + changes_list = [ + ["replace", 10, "This is a replacement line."], + ["delete", 20, None], + ["add", 30, "This is an added line."] + ] + modified_lines = modify_fortran_lines(example_fortran_lines, changes_list) + # Line no.s below have to be carefully calculated. Basic is line no of change -1 but then add one for every line added aobve in the file and -1 for every line deleted above in the file. + # for line_no, line in enumerate(modified_lines, 1): + # print(f"line [{line_no:04}]: {line}") + assert modified_lines[9] == "This is a replacement line." + assert len(modified_lines) == len(example_fortran_lines) # One line deleted, One added + # Added @ 30, so 29, but one line deleted above, so 28 - seeemples + assert modified_lines[28] == "This is an added line." + +def test_remove_quoted(example_fortran_lines): + for line in example_fortran_lines.copy(): + uncommented_line = remove_quoted(line) + assert '"' not in uncommented_line + assert "'" not in uncommented_line + +def test_remove_comments(example_fortran_lines): + for line in example_fortran_lines.copy(): + line = remove_quoted(line) + uncommented_line = remove_comments(line) + comment_location = line.find("!") + if comment_location != -1: + assert uncommented_line == line[:comment_location].rstrip() + assert "!" not in uncommented_line + +def test_concatenate_lines(example_fortran_lines): + for line_no, line in enumerate(example_fortran_lines.copy(), 1): + if line.find("&") >= 0: + concatenated_line = concatenate_lines(example_fortran_lines.copy(), line_no) + assert "&" not in concatenated_line + # This bit has to be a bit hard-wired and is going to break every time the example Fortran file line numbers are changed. If you can think of a better method - go ahead. + if line_no == 31: + expected_line = ("SUBROUTINE example (xlen,ylen,l_unscale,input1," + + "input2, output, l_loud_opt)") + assert concatenated_line == expected_line + elif line_no == 70: + # This will look odd as the quoted text has been removed. + expected_line = ( + "my_char " + + " = // ") + assert concatenated_line == expected_line + elif line_no == 96: + expected_line = (" field2(i, j) = (1.0*i) - (2.0*j) " + + " + (3.0*i*j) + " + + "(4.0*i**2) + field(i, j)*2") + assert concatenated_line == expected_line + # ================================================================= """This example hopefully demonstrates some of the complexity available... diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 3856c303..617c6118 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -1,7 +1,16 @@ +# ----------------------------------------------------------------------------- +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +# ----------------------------------------------------------------------------- + """ This is the storage location for rules based on what's defined as the standard in the UMDP: 003 document. These rules are not necessarily the same as the rules defined in the UMDP: 003 document, but they are based on them. The rules in this file are used by the umdp3_checker script to check for compliance with the UMDP: 003 standard. For now, the document has been copied into this file as a placeholder for the rules as they appear.""" import re from typing import List, Dict +from umdp3_checker_rules import TestResult +from fortran_keywords import fortran_keywords +"""ToDo: This lot will need putting back as and when the tests that use them get imported/re-created""" # from fortran_keywords import fortran_keywords # from search_lists import ( # obsolescent_intrinsics, @@ -11,8 +20,6 @@ # ) # from dataclasses import dataclass, field # from script_umdp3_checker import fortran_keywords -from umdp3_checker_rules import TestResult -from fortran_keywords import fortran_keywords comment_line = re.compile(r"!.*$") word_splitter = re.compile(r"\b\w+\b") @@ -49,6 +56,29 @@ def remove_quoted(line: str) -> str: return result +def remove_comments(line: str) -> str: + """Remove comments from the lines : + There is a bit of an assumption here that quoted text has already been removed, so that we don't accidentally remove text after an "!" in a string.""" + return comment_line.sub("", line).rstrip() + +def concatenate_lines(lines: List[str], line_no: int) -> str: + """Concatenate the continuation lines into a single string""" + # Find first line and check for continuation character. + line = lines[line_no - 1] + line = remove_comments(line) + line = remove_quoted(line) + while line.rstrip().endswith("&"): + line = line.rstrip()[:-1] # Remove the continuation character + line_no += 1 + if line_no > len(lines): + break # Avoid going out of bounds + next_line = lines[line_no - 1] + next_line = remove_comments(next_line) + next_line = remove_quoted(next_line) + line += next_line.lstrip() # Concatenate with the next line, removing leading whitespace + return line + + """ 3.1 Source files should only contain a single program unit """ From 1c7c589151c76e4af62ad9a9686041034521ccbe Mon Sep 17 00:00:00 2001 From: r-sharp Date: Thu, 16 Apr 2026 10:39:34 +0100 Subject: [PATCH 10/27] Adding line length check --- .../tests/test_umdp3_rules_S3.py | 42 ++++++++++++++++++- script_umdp3_checker/umdp3_rules_S3.py | 31 +++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index a3919241..1d57a2d2 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -13,7 +13,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from umdp3_rules_S3 import remove_comments, remove_quoted, \ concatenate_lines, capitulated_keywords, r3_2_1_check_crown_copyright, \ - r3_4_1_capitalised_keywords + r3_3_2line_too_long, r3_4_1_capitalised_keywords # from umdp3_checker_rules import TestResult, UMDP3Checker @@ -132,7 +132,7 @@ def test_concatenate_lines(example_fortran_lines): ), ([["add", 10, ""]], 0, []), # No changes, expect no errors ], - ids = ["3 Errors", "No lowercase keyword Errors"] + ids = ["3 Lowercase Errors", "No Lowercase Errors"] ) def test_r3_4_1_capitalised_keywords( example_fortran_lines, changes_list, expected_result, expected_errors @@ -188,6 +188,44 @@ def test_r3_2_1_check_crown_copyright( # ================================================================= +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + ["replace", 71, + " = \"This is a very very very very very very very \"" + + " &"], + ["replace",115, "IF (lhook) CALL dr_hook(ModuleName// " + + "\":\" // RoutineName, zhook_out, zhook_handle) ! extra comment"], + ["replace", 40, "use yomhook, ONLY: lhook, dr_hook"] + ], + 2, + {"line too long": [71, 115]}, + ), + ([], 0, {}), # No changes, expect no errors + ], + ids = ["3 line too long Errors", "No line too long Errors"] +) +def test_r3_3_2line_too_long( + example_fortran_lines, changes_list, expected_result, expected_errors +): + # checker = UMDP3Checker() + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_3_2line_too_long(modified_fortran_lines) + failure_count = result.failure_count + assert failure_count == expected_result + errors = result.errors + assert len(errors) == len(expected_errors) + for error, lines_list in errors.items(): + assert error in expected_errors + assert len(lines_list) == len(expected_errors[error]) + for line_no in lines_list: + assert line_no in expected_errors[error] + + +# ================================================================= + @pytest.mark.parametrize( "changes_list, expected_result, expected_errors", [ diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 617c6118..11309989 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -98,7 +98,7 @@ def concatenate_lines(lines: List[str], line_no: int) -> str: """ """3.2 Headers -* All programming units require a suitable copyright header. Met Office derived code should use the stan-dard UM copyright header as depicted in the good example code. Collaborative UM developed codemay require alternative headers as agreed in the collaborative agreements. e.g. UKCA code. The IPR(intelectual property rights) of UM code is important and needs to be protected appropriately.""" +* All programming units require a suitable copyright header.""" def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: """Check for crown copyright statement""" @@ -144,6 +144,35 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: * Example UM templates are provided with the source of this document; subroutine, function and module templates""" +"""3.3 Free source form +* All code should be written using the free source form. +* Please restrict code to 80 columns, so that your code can be easily viewed on any editor and screen and +can be printed easily on A4 paper. Note that CreateBC uses a limit of 100 columns, due to the nature of +the object-orientated code. +""" +def r3_3_2line_too_long(lines: List[str]) -> TestResult: + """Check for lines longer than 80 characters""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines, 1): + if len(line.rstrip()) > 80: + # self.add_extra_error("line too long") + failures += 1 + error_log = add_error_log(error_log, "line too long", count) + + return TestResult( + checker_name="Line longer than 80 characters", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count + 1} lines, found {failures} failures.", + errors=error_log, + ) + +""" +* Never put more than one statement on a line. +* Write your program in UK English, unless you have a very good reason for not doing so.""" + """3.4 Fortran style * To improve readability, write your code using the ALL CAPS Fortran keywords approach.""" def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: From cd2b864e13ccfeaa9a0f8b99be5021a790b62e92 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Thu, 16 Apr 2026 16:32:10 +0100 Subject: [PATCH 11/27] Adding re-worked tests to checker so they get run. --- script_umdp3_checker/umdp3_conformance.py | 32 ++++++++++++++++++----- script_umdp3_checker/umdp3_rules_S3.py | 16 +++++++++--- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py index 5a8dfe82..5a180818 100644 --- a/script_umdp3_checker/umdp3_conformance.py +++ b/script_umdp3_checker/umdp3_conformance.py @@ -6,6 +6,7 @@ import argparse from checker_dispatch_tables import CheckerDispatchTables from umdp3_checker_rules import TestResult +from umdp3_rules_S3 import list_O_tests import concurrent.futures # Add custom modules to Python path if needed @@ -161,6 +162,14 @@ def check(self, file_path: Path) -> CheckResult: test_results=file_results, ) + def report(self, print_volume: int = 3) -> None: + """Print summary of results for this checker.""" + if print_volume >= 3: + print( + f"Checker \"{self.name}\" checking {len(self.files_to_check)} files " + f"with {len(self.check_functions)} checks." + ) + @classmethod def from_full_list( cls, @@ -536,6 +545,7 @@ def create_style_checkers( if print_volume >= 3: print("Configuring Fortran checkers:") combined_checkers = fortran_diff_table | fortran_file_table | generic_file_table + combined_checkers = combined_checkers | list_O_tests fortran_file_checker = StyleChecker.from_full_list( "Fortran Checker", file_extensions, combined_checkers, changed_files ) @@ -593,18 +603,26 @@ def get_files_to_check( all_files = [f.relative_to(path) for f in repo_path.rglob("*") if f.is_file()] if print_volume >= 1: print("Full check override enabled.") - if print_volume >= 3: + if print_volume >= 4: print( - f" Found {len(all_files)} files to " - f"check in repository at path: {path}" + f" Found a total of {len(all_files)} files " + f"in directory at path: {path}\n" + " These will be filtered by the checkers based on their file" + " extension" ) - return all_files else: # Configure CMS, and check we've been passed a branch if print_volume >= 1: print("Using a CMS to determine changed files.") cms = which_cms_is_it(path, print_volume) - changed_files = cms.get_changed_files() - return changed_files + all_files = cms.get_changed_files() + if print_volume >= 4: + print( + f" CMS indicates {len(all_files)} files " + f"changed in repository at path: {path}\n" + " These will be filtered by the checkers based on their file" + " extension" + ) + return all_files # Usage when run from command line. @@ -625,6 +643,8 @@ def get_files_to_check( checkers to use for each file type.""" active_checkers = create_style_checkers(args.file_types, full_file_paths) + for checker in active_checkers: + checker.report(log_volume) # TODO : Could create a conformance checker for each # file type. diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 11309989..923bce75 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -125,7 +125,7 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: error_log, "missing copyright or crown copyright statement", 0 ) return TestResult( - checker_name="Crown Copyright Statement", + checker_name="Test 2.1 Check Copyright", failure_count=0 if found_copyright else 1, passed=found_copyright, output="Checked for crown copyright statement.", @@ -162,7 +162,7 @@ def r3_3_2line_too_long(lines: List[str]) -> TestResult: error_log = add_error_log(error_log, "line too long", count) return TestResult( - checker_name="Line longer than 80 characters", + checker_name="Test 3.2 Line Length", failure_count=failures, passed=(failures == 0), output=f"Checked {count + 1} lines, found {failures} failures.", @@ -198,7 +198,7 @@ def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: ) return TestResult( - checker_name="Capitalised Keywords", + checker_name="Test 4.1 Capitalised Keywords", failure_count=failures, passed=(failures == 0), output=f"Checked {count + 1} lines, found {failures} failures.", @@ -267,3 +267,13 @@ def capitulated_keywords(lines: List[str]) -> TestResult: output=f"Checked {line_count} lines, found {failures} failures.", errors=error_log, ) + +"""TODO: The following dictionary can be converted to a list of executables, once all +the tests above are in place. The keys are not referenced anywhere as each callable +(test) defines it's own name and stores that in the result.""" +list_O_tests = { + "Test 2.1 Check Copyright": r3_2_1_check_crown_copyright, + "Test 3.2 Line Length": r3_3_2line_too_long, + "Test 4.1 Capitalised Keywords": r3_4_1_capitalised_keywords, + # "A": None, # TODO: add test + } From 5e014906707fa9700f28ea0730f1b7ac56bcd4bb Mon Sep 17 00:00:00 2001 From: r-sharp Date: Thu, 16 Apr 2026 17:22:23 +0100 Subject: [PATCH 12/27] converting dictionaries of callable tests to lists --- .../checker_dispatch_tables.py | 134 ++++++++---------- script_umdp3_checker/umdp3_conformance.py | 27 ++-- script_umdp3_checker/umdp3_rules_S3.py | 14 +- 3 files changed, 81 insertions(+), 94 deletions(-) diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py index 51aac77f..f4af2ddc 100644 --- a/script_umdp3_checker/checker_dispatch_tables.py +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -9,7 +9,7 @@ Python translation of the original Perl module """ -from typing import Dict, Callable +from typing import Callable from umdp3_checker_rules import UMDP3Checker """ @@ -30,86 +30,74 @@ class CheckerDispatchTables: def __init__(self): self.umdp3_checker = UMDP3Checker() - def get_diff_dispatch_table_fortran(self) -> Dict[str, Callable]: + def get_diff_dispatch_table_fortran(self) -> list[Callable]: """Get dispatch table for Fortran diff tests""" - return { - # 'Captain Daves doomed test of destruction': - # self.umdp3_checker.capitulated_keywords, - "Lowercase Fortran keywords not permitted": self.umdp3_checker.capitalised_keywords, - "OpenMP sentinels not in column one": self.umdp3_checker.openmp_sentinels_in_column_one, - "Omitted optional space in keywords": self.umdp3_checker.unseparated_keywords, - "GO TO other than 9999": self.umdp3_checker.go_to_other_than_9999, - "WRITE without format": self.umdp3_checker.write_using_default_format, - "Lowercase or CamelCase variable names only": self.umdp3_checker.lowercase_variable_names, - "Use of dimension attribute": self.umdp3_checker.dimension_forbidden, - "Continuation lines shouldn't start with &": self.umdp3_checker.ampersand_continuation, - "Use of EQUIVALENCE or PAUSE": self.umdp3_checker.forbidden_keywords, - "Use of older form of relational operator (.GT. etc.)": self.umdp3_checker.forbidden_operators, - "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, - "Line includes tab character": self.umdp3_checker.tab_detection, - "USEd printstatus_mod instead of umPrintMgr": self.umdp3_checker.printstatus_mod, - "Used PRINT rather than umMessage and umPrint": self.umdp3_checker.printstar, - "Used WRITE(6) rather than umMessage and umPrint": self.umdp3_checker.write6, - "Used um_fort_flush rather than umPrintFlush": self.umdp3_checker.um_fort_flush, - "Used Subversion keyword substitution which is prohibited": self.umdp3_checker.svn_keyword_subst, - "Used !OMP instead of !$OMP": self.umdp3_checker.omp_missing_dollar, - "Used #ifdef/#ifndef rather than #if defined() or #if !defined()": self.umdp3_checker.cpp_ifdef, - "Presence of fortran comment in CPP directive": self.umdp3_checker.cpp_comment, - "Used an archaic fortran intrinsic function": self.umdp3_checker.obsolescent_fortran_intrinsic, - "EXIT statements should be labelled": self.umdp3_checker.exit_stmt_label, - "Intrinsic modules must be USEd with an INTRINSIC " - + "keyword specifier": self.umdp3_checker.intrinsic_modules, - "READ statements should have an explicit UNIT= as " - + "their first argument": self.umdp3_checker.read_unit_args, - } + return [ + self.umdp3_checker.capitalised_keywords, + self.umdp3_checker.openmp_sentinels_in_column_one, + self.umdp3_checker.unseparated_keywords, + self.umdp3_checker.go_to_other_than_9999, + self.umdp3_checker.write_using_default_format, + self.umdp3_checker.lowercase_variable_names, + self.umdp3_checker.dimension_forbidden, + self.umdp3_checker.ampersand_continuation, + self.umdp3_checker.forbidden_keywords, + self.umdp3_checker.forbidden_operators, + self.umdp3_checker.line_over_80chars, + self.umdp3_checker.tab_detection, + self.umdp3_checker.printstatus_mod, + self.umdp3_checker.printstar, + self.umdp3_checker.write6, + self.umdp3_checker.um_fort_flush, + self.umdp3_checker.svn_keyword_subst, + self.umdp3_checker.omp_missing_dollar, + self.umdp3_checker.cpp_ifdef, + self.umdp3_checker.cpp_comment, + self.umdp3_checker.obsolescent_fortran_intrinsic, + self.umdp3_checker.exit_stmt_label, + self.umdp3_checker.intrinsic_modules, + self.umdp3_checker.read_unit_args, + ] def get_file_dispatch_table_fortran( self, filename: str = "" - ) -> Dict[str, Callable]: + ) -> list[Callable]: """Get dispatch table for Fortran file tests""" - return { - "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, - "File is missing at least one IMPLICIT NONE": self.umdp3_checker.implicit_none, - "Never use STOP or CALL abort": self.umdp3_checker.forbidden_stop, - "Use of Fortran function as a variable name": self.umdp3_checker.intrinsic_as_variable, - "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, - "File missing correct code owner comment": self.umdp3_checker.check_code_owner, - "Used (/ 1,2,3 /) form of array initialisation, rather than " - + "[1,2,3] form": self.umdp3_checker.array_init_form, - } + return [ + self.umdp3_checker.retire_if_def, + self.umdp3_checker.implicit_none, + self.umdp3_checker.forbidden_stop, + self.umdp3_checker.intrinsic_as_variable, + self.umdp3_checker.check_crown_copyright, + self.umdp3_checker.check_code_owner, + self.umdp3_checker.array_init_form, + ] - def get_diff_dispatch_table_c(self) -> Dict[str, Callable]: + def get_diff_dispatch_table_c(self) -> list[Callable]: """Get dispatch table for C diff tests""" - return { - "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, - "Line includes tab character": self.umdp3_checker.tab_detection, - "Fixed-width Integer format specifiers must have a space " - + 'between themselves and the string delimiter (the " character)': self.umdp3_checker.c_integral_format_specifiers, - } + return [ + self.umdp3_checker.line_over_80chars, + self.umdp3_checker.tab_detection, + self.umdp3_checker.c_integral_format_specifiers, + ] - def get_file_dispatch_table_c(self) -> Dict[str, Callable]: + def get_file_dispatch_table_c(self) -> list[Callable]: """Get dispatch table for C file tests""" - return { - "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, - "Used a deprecated C identifier": self.umdp3_checker.c_deprecated, - "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, - "File missing correct code owner comment": self.umdp3_checker.check_code_owner, - "Used an _OPENMP if-def without also testing against " - + "SHUM_USE_C_OPENMP_VIA_THREAD_UTILS. (Or _OPENMP does " - + "not come first in the test.)": self.umdp3_checker.c_openmp_define_pair_thread_utils, - "Used an _OPENMP && SHUM_USE_C_OPENMP_VIA_THREAD_UTILS if-def " - + "test in a logical combination with a third macro": self.umdp3_checker.c_openmp_define_no_combine, - "Used !defined(_OPENMP) rather than defined(_OPENMP) " - + "with #else branch": self.umdp3_checker.c_openmp_define_not, - "Used an omp #pragma (or #include ) without " - + "protecting it with an _OPENMP if-def": self.umdp3_checker.c_protect_omp_pragma, - "Used the #ifdef style of if-def, rather than the #if " - + "defined() style": self.umdp3_checker.c_ifdef_defines, - "C Unit does not end with a final newline character": self.umdp3_checker.c_final_newline, - } + return [ + self.umdp3_checker.retire_if_def, + self.umdp3_checker.c_deprecated, + self.umdp3_checker.check_crown_copyright, + self.umdp3_checker.check_code_owner, + self.umdp3_checker.c_openmp_define_pair_thread_utils, + self.umdp3_checker.c_openmp_define_no_combine, + self.umdp3_checker.c_openmp_define_not, + self.umdp3_checker.c_protect_omp_pragma, + self.umdp3_checker.c_ifdef_defines, + self.umdp3_checker.c_final_newline, + ] - def get_file_dispatch_table_all(self) -> Dict[str, Callable]: + def get_file_dispatch_table_all(self) -> list[Callable]: """Get dispatch table for universal file tests""" - return { - "Line includes trailing whitespace character(s)": self.umdp3_checker.line_trail_whitespace, - } + return [ + self.umdp3_checker.line_trail_whitespace, + ] diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py index 5a180818..3eaee203 100644 --- a/script_umdp3_checker/umdp3_conformance.py +++ b/script_umdp3_checker/umdp3_conformance.py @@ -1,7 +1,7 @@ import subprocess from abc import ABC, abstractmethod from pathlib import Path -from typing import Callable, List, Dict, Set, Optional +from typing import Callable, List, Set, Optional from dataclasses import dataclass, field import argparse from checker_dispatch_tables import CheckerDispatchTables @@ -131,17 +131,17 @@ class instance to hold the 'expanded' check outputs. a TestResult object directly, which includes the extra error info, so that each thread can work independently.""" name: str - check_functions: Dict[str, Callable] + check_functions: list[Callable] files_to_check: List[Path] def __init__( self, name: str, - check_functions: Dict[str, Callable], + check_functions: list[Callable], changed_files: List[Path], ): self.name = name - self.check_functions = check_functions or {} + self.check_functions = check_functions or [] self.files_to_check = changed_files or [] def get_name(self) -> str: @@ -151,7 +151,7 @@ def check(self, file_path: Path) -> CheckResult: """Run UMDP3 check function on file.""" lines = file_path.read_text().splitlines() file_results = [] # list of TestResult objects - for check_name, check_function in self.check_functions.items(): + for check_function in self.check_functions: file_results.append(check_function(lines)) tests_failed = sum([0 if result.passed else 1 for result in file_results]) @@ -175,7 +175,7 @@ def from_full_list( cls, name: str, file_extensions: Set[str], - check_functions: Dict[str, Callable], + check_functions: list[Callable], changed_files: List[Path], print_volume: int = 3, ): @@ -211,11 +211,11 @@ def create_external_runners( ) -> "StyleChecker": """Create a StyleChecker instance filtering files from a full list.""" filtered_files = cls.filter_files(all_files, file_extensions) - check_functions = {} + check_functions = [] for command in commands: - external_opname = f"External_operation_{command[0]}" + external_opname = f"{command[0]}" free_runner = cls.create_free_runner(command, external_opname) - check_functions[external_opname] = free_runner + check_functions.append(free_runner) return cls(name, check_functions, filtered_files) @staticmethod @@ -272,7 +272,7 @@ class Check_Runner(StyleChecker): def check(self, file_path: Path) -> CheckResult: """Run external command on file.""" file_results = [] # list of TestResult objects - for check_name, check_function in self.check_functions.items(): + for check_function in self.check_functions: file_results.append(check_function(file_path)) tests_failed = sum([0 if result.passed else 1 for result in file_results]) return CheckResult( @@ -544,8 +544,11 @@ def create_style_checkers( generic_file_table = dispatch_tables.get_file_dispatch_table_all() if print_volume >= 3: print("Configuring Fortran checkers:") - combined_checkers = fortran_diff_table | fortran_file_table | generic_file_table - combined_checkers = combined_checkers | list_O_tests + combined_checkers = [] + combined_checkers.extend(fortran_diff_table) + combined_checkers.extend(fortran_file_table) + combined_checkers.extend(generic_file_table) + combined_checkers.extend(list_O_tests) fortran_file_checker = StyleChecker.from_full_list( "Fortran Checker", file_extensions, combined_checkers, changed_files ) diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 923bce75..2dbd0c4f 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -268,12 +268,8 @@ def capitulated_keywords(lines: List[str]) -> TestResult: errors=error_log, ) -"""TODO: The following dictionary can be converted to a list of executables, once all -the tests above are in place. The keys are not referenced anywhere as each callable -(test) defines it's own name and stores that in the result.""" -list_O_tests = { - "Test 2.1 Check Copyright": r3_2_1_check_crown_copyright, - "Test 3.2 Line Length": r3_3_2line_too_long, - "Test 4.1 Capitalised Keywords": r3_4_1_capitalised_keywords, - # "A": None, # TODO: add test - } +list_O_tests = [ + r3_2_1_check_crown_copyright, + r3_3_2line_too_long, + r3_4_1_capitalised_keywords, +] From d2de828138b79cdbe0bc1c5e37b3a91ea64672c1 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Fri, 17 Apr 2026 17:31:28 +0100 Subject: [PATCH 13/27] Bit of a TODO and redundant code tidy --- .../checker_dispatch_tables.py | 15 +-- script_umdp3_checker/fortran_keywords.py | 2 + script_umdp3_checker/umdp3_checker_rules.py | 121 ++---------------- script_umdp3_checker/umdp3_rules_S3.py | 28 ++-- 4 files changed, 38 insertions(+), 128 deletions(-) diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py index f4af2ddc..77c11aab 100644 --- a/script_umdp3_checker/checker_dispatch_tables.py +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -13,17 +13,12 @@ from umdp3_checker_rules import UMDP3Checker """ -TODO : This list was checked to ensure it had something for each - test in the original. -TODO : This needs to be re-checked. -TODO : And the tests need to be compared to the original Perl tests - to ensure they are equivalent. -""" - -# Declare version -VERSION = "13.5.0" - +TODO : This module has lost it's way. I uses a class to define methods which just + return lists of functions. I don't think a class is required for this. + As the functions themselves are shifted, renamed and hopefully improved, this + class should eventually get emptied out and the file removed. +""" class CheckerDispatchTables: """Class containing dispatch tables for UMDP3 tests""" diff --git a/script_umdp3_checker/fortran_keywords.py b/script_umdp3_checker/fortran_keywords.py index d9b6be82..a9d5e4b4 100644 --- a/script_umdp3_checker/fortran_keywords.py +++ b/script_umdp3_checker/fortran_keywords.py @@ -14,6 +14,8 @@ """ TODO: Current order may not be perfect, and could possibly be reviewed. However, it is probably 'good enough' for now. +TODO: Document method used to decide on order. I think it was a grep and count for the +frequency the keywords appear in the source code. """ fortran_keywords = ( diff --git a/script_umdp3_checker/umdp3_checker_rules.py b/script_umdp3_checker/umdp3_checker_rules.py index 5aea535b..66f76de8 100644 --- a/script_umdp3_checker/umdp3_checker_rules.py +++ b/script_umdp3_checker/umdp3_checker_rules.py @@ -27,20 +27,18 @@ Equally, there could probably be more consistency in how things like comments are stripped from the ends of lines and/or full comment lines are skipped. +TODO : This list was checked to ensure it had something for each + test in the original. +TODO : This needs to be re-checked. +TODO : And the tests need to be compared to the original Perl tests + to ensure they are equivalent. +TODO : The functions themselves are being re-named and re-created in umdp3_rules_S3.py, + The new names starte with r2_x_y, where x and y refer to the subsections of the + UMDP3 standards document that they are checking conformance to. """ - -# Declare version -VERSION = "13.5.0" - - @dataclass class TestResult: """Result from running a single style checker test on a file.""" - - """ - TODO : unsure if both output and errors are required. - They make a bit more sense in the 'external_checkers' where - they hold stdout and stderr.""" checker_name: str = "Unnamed Checker" failure_count: int = 0 passed: bool = False @@ -52,18 +50,13 @@ class UMDP3Checker: """UMDP3 compliance checker class""" """ - TODO : This class could possibly be abandoned, or replaced - by a similar class at a different level. Presently only one - instance is created in such a way that the original - _extra_error_info can't be used to hold extra information - at a per file level. resulting in the need to pass error_log - back, which feels like a bodge.""" + TODO : This class should probably be abandoned. Presently only one + instance is created by checker_dispatch_tables (also for the chop) to give access to the dispatch tables, a list of the checks to run grouped by filetype. The checks themselves are being 'shifted' and hopefully improved to umdp3_rules_S3.py.""" # precompiled, regularly used search patterns. comment_line = re.compile(r"!.*$") word_splitter = re.compile(r"\b\w+\b") def __init__(self): - self._extra_error_info = {} """ TODO: The Perl version had a dodgy looking subroutine to calculate this, but I can't find where it was called from within the files in @@ -74,42 +67,10 @@ def __init__(self): properly.""" self._number_of_files_with_variable_declarations_in_includes = 0 - def reset_extra_error_information(self): - """Reset extra error information : - Appears to be used 'between' blocks of tests such as those on diffs and - those on full files. - """ - self._extra_error_info = {} - - def get_extra_error_information(self) -> Dict: - """ - Get extra error information. Dictionary with file names as the keys. - """ - """ - - TODO: I presume this is what's used when creating the report of the - actual failures and not just the count. However, this information - doesn't seem to be output as yet and will need implementing. - """ - return self._extra_error_info.copy() - - def add_extra_error(self, key: str, value: str = ""): - """Add extra error information to the dictionary""" - """ - TODO: The usefulness of the information added has not been assessed, - nor does it appear to be reported as yet.""" - self._extra_error_info[key] = value - def add_error_log( self, error_log: Dict, key: str = "no key", value: int = 0 ) -> Dict: - """Add extra error information to the dictionary""" - """ - TODO: This is a bodge to get more detailed info about - the errors back to the calling program. The info is - useful, but is currently presented on a per-test basis - rather than a per-file which would be easier to read - and make use of.""" + """Add error information to the error log dictionary""" if key not in error_log: error_log[key] = [] error_log[key].append(value) @@ -125,11 +86,7 @@ def get_include_number(self) -> int: def remove_quoted(self, line: str) -> str: """Remove quoted strings from a line""" - """ - TODO: The original version replaced the quoted sections with a - "blessed reference", presumably becuase they were 're-inserted' at some - stage. No idea if that capability is still required.""" - # Simple implementation - remove single and double quoted strings +# # Simple implementation - remove single and double quoted strings result = line # Remove double quoted strings @@ -142,16 +99,9 @@ def remove_quoted(self, line: str) -> str: """Test functions : Each accepts a list of 'lines' to search and returns a - TestResult object containing all the information.""" + TestResult object containing all the information. + The list of lines is often assumed to be the whole file. """ - TODO: One thought here is each test should also be told whether it's - being passed the contents of a full file, or just a selection of lines - involved in a change as some of the tests appear to really only be useful - if run on a full file (e.g. the Implicit none checker). Thus if only passed - a selection of lines, these tests could be skipped/return 'pass' - regardless. - Although, a brief look seems to imply that there are two 'dispatch tables' - one for full files and one for changed lines.""" def capitulated_keywords(self, lines: List[str]) -> TestResult: """A fake test, put in for testing purposes. @@ -172,7 +122,6 @@ def capitulated_keywords(self, lines: List[str]) -> TestResult: for word in self.word_splitter.findall(clean_line): upcase = word.upper() if upcase in fortran_keywords and word != upcase: - self.add_extra_error(f"lowercase keyword: {word}") error_log = self.add_error_log( error_log, f"capitulated keyword: {word}", line_count ) @@ -202,7 +151,6 @@ def capitalised_keywords(self, lines: List[str]) -> TestResult: for word in self.word_splitter.findall(clean_line): upcase = word.upper() if upcase in fortran_keywords and word != upcase: - self.add_extra_error(f"lowercase keyword: {word}") failures += 1 error_log = self.add_error_log( error_log, f"lowercase keyword: {word}", count + 1 @@ -223,7 +171,6 @@ def openmp_sentinels_in_column_one(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"^\s+!\$OMP", line): - self.add_extra_error("OpenMP sentinel not in column 1") failures += 1 error_log = self.add_error_log( error_log, "OpenMP sentinel not in column 1:", count + 1 @@ -248,7 +195,6 @@ def unseparated_keywords(self, lines: List[str]) -> TestResult: clean_line = self.remove_quoted(line) for pattern in [f"\\b{kw}\\b" for kw in unseparated_keywords_list]: if re.search(pattern, clean_line, re.IGNORECASE): - self.add_extra_error(f"unseparated keyword in line: {line.strip()}") failures += 1 error_log = self.add_error_log( error_log, @@ -276,7 +222,6 @@ def go_to_other_than_9999(self, lines: List[str]) -> TestResult: if match := re.search(r"\bGO\s*TO\s+(\d+)", clean_line, re.IGNORECASE): label = match.group(1) if label != "9999": - self.add_extra_error(f"GO TO {label}") failures += 1 error_log = self.add_error_log( error_log, f"GO TO {label}", count + 1 @@ -300,7 +245,6 @@ def write_using_default_format(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bWRITE\s*\(\s*\*\s*,\s*\*\s*\)", clean_line, re.IGNORECASE): - self.add_extra_error("WRITE(*,*) found") failures += 1 error_log = self.add_error_log(error_log, "WRITE(*,*) found", count + 1) output = f"Checked {count + 1} lines, found {failures} failures." @@ -339,7 +283,6 @@ def lowercase_variable_names(self, lines: List[str]) -> TestResult: clean_line, ) if match := re.search(r"([A-Z]{2,})", clean_line): - self.add_extra_error(f"UPPERCASE variable name : {match[1]}") failures += 1 error_log = self.add_error_log( error_log, f"UPPERCASE variable name {match[1]}", count @@ -364,7 +307,6 @@ def dimension_forbidden(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bDIMENSION\b", clean_line, re.IGNORECASE): - self.add_extra_error("DIMENSION attribute used") failures += 1 error_log = self.add_error_log( error_log, "DIMENSION attribute used", count + 1 @@ -386,7 +328,6 @@ def ampersand_continuation(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"^\s*&", line): - self.add_extra_error("continuation line starts with &") failures += 1 error_log = self.add_error_log( error_log, "continuation line starts with &", count + 1 @@ -413,7 +354,6 @@ def forbidden_keywords(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\b(EQUIVALENCE|PAUSE)\b", clean_line, re.IGNORECASE): - self.add_extra_error("forbidden keyword") failures += 1 error_log = self.add_error_log( error_log, "forbidden keyword", count + 1 @@ -440,7 +380,6 @@ def forbidden_operators(self, lines: List[str]) -> TestResult: for op in old_operators: if op in clean_line.upper(): - self.add_extra_error(f"old operator {op}") failures += 1 error_log = self.add_error_log( error_log, f"old operator {op}", count + 1 @@ -461,7 +400,6 @@ def line_over_80chars(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if len(line.rstrip()) > 80: - self.add_extra_error("line too long") failures += 1 error_log = self.add_error_log(error_log, "line too long", count + 1) @@ -480,7 +418,6 @@ def tab_detection(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if "\t" in line: - self.add_extra_error("tab character found") failures += 1 error_log = self.add_error_log( error_log, "tab character found", count + 1 @@ -501,7 +438,6 @@ def printstatus_mod(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"\bUSE\s+printstatus_mod\b", line, re.IGNORECASE): - self.add_extra_error("printstatus_mod used") failures += 1 error_log = self.add_error_log( error_log, "printstatus_mod used", count + 1 @@ -525,7 +461,6 @@ def printstar(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bPRINT\s*\*", clean_line, re.IGNORECASE): - self.add_extra_error("PRINT * used") failures += 1 error_log = self.add_error_log(error_log, "PRINT * used", count + 1) @@ -547,7 +482,6 @@ def write6(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bWRITE\s*\(\s*6\s*,", clean_line, re.IGNORECASE): - self.add_extra_error("WRITE(6) used") failures += 1 error_log = self.add_error_log(error_log, "WRITE(6) used", count + 1) @@ -566,7 +500,6 @@ def um_fort_flush(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"\bum_fort_flush\b", line): - self.add_extra_error("um_fort_flush used") failures += 1 error_log = self.add_error_log( error_log, "um_fort_flush used", count + 1 @@ -586,7 +519,6 @@ def svn_keyword_subst(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"\$\w+\$", line): - self.add_extra_error("SVN keyword substitution") failures += 1 error_log = self.add_error_log( error_log, "SVN keyword substitution", count + 1 @@ -606,7 +538,6 @@ def omp_missing_dollar(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"!\s*OMP\b", line) and not re.search(r"!\$OMP", line): - self.add_extra_error("!OMP without $") failures += 1 error_log = self.add_error_log(error_log, "!OMP without $", count + 1) @@ -625,7 +556,6 @@ def cpp_ifdef(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"^\s*#\s*if(n)?def\b", line): - self.add_extra_error("#ifdef/#ifndef used") failures += 1 error_log = self.add_error_log( error_log, "#ifdef/#ifndef used", count + 1 @@ -653,7 +583,6 @@ def cpp_comment(self, lines: List[str]) -> TestResult: ) or re.search(r"^\s*#(else) *(.*)", line) if match: if re.search(r".*!", match.group(2)): - self.add_extra_error("Fortran comment in CPP directive") failures += 1 error_log = self.add_error_log( error_log, "Fortran comment in CPP directive", count + 1 @@ -678,7 +607,6 @@ def obsolescent_fortran_intrinsic(self, lines: List[str]) -> TestResult: for intrinsic in obsolescent_intrinsics: if re.search(rf"\b{intrinsic}\b", clean_line, re.IGNORECASE): - self.add_extra_error(f"obsolescent intrinsic: {intrinsic}") failures += 1 error_log = self.add_error_log( error_log, f"obsolescent intrinsic: {intrinsic}", count + 1 @@ -702,7 +630,6 @@ def exit_stmt_label(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bEXIT\s*$", clean_line, re.IGNORECASE): - self.add_extra_error("unlabelled EXIT statement") failures += 1 error_log = self.add_error_log( error_log, "unlabelled EXIT statement", count + 1 @@ -730,7 +657,6 @@ def intrinsic_modules(self, lines: List[str]) -> TestResult: if re.search( rf"\bUSE\s+(::)*\s*{module}\b", clean_line, re.IGNORECASE ) and not re.search(r"\bINTRINSIC\b", clean_line, re.IGNORECASE): - self.add_extra_error(f"intrinsic module {module} without INTRINSIC") failures += 1 error_log = self.add_error_log( error_log, @@ -758,7 +684,6 @@ def read_unit_args(self, lines: List[str]) -> TestResult: if match := re.search(r"\bREAD\s*\(\s*([^,)]+)", clean_line, re.IGNORECASE): first_arg = match.group(1).strip() if not first_arg.upper().startswith("UNIT="): - self.add_extra_error("READ without explicit UNIT=") failures += 1 error_log = self.add_error_log( error_log, "READ without explicit UNIT=", count + 1 @@ -794,7 +719,6 @@ def retire_if_def(self, lines: List[str]) -> TestResult: # SYMBOL = [x for x in match.groups() if x] # reduce to a # list of 1 element if match.group(1) in retired_ifdefs: - self.add_extra_error(f"retired if-def: {match.group(1)}") failures += 1 error_log = self.add_error_log( error_log, f"retired if-def: {match.group(1)}", count + 1 @@ -817,7 +741,6 @@ def implicit_none(self, lines: List[str]) -> TestResult: break if no_implicit_none: - self.add_extra_error("missing IMPLICIT NONE") error_log = self.add_error_log( error_log, "No IMPLICIT NONE found in file", 0 ) @@ -840,7 +763,6 @@ def forbidden_stop(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\b(STOP|CALL\s+abort)\b", clean_line, re.IGNORECASE): - self.add_extra_error("STOP or CALL abort used") failures += 1 error_log = self.add_error_log( error_log, "STOP or CALL abort used", count + 1 @@ -874,7 +796,6 @@ def intrinsic_as_variable(self, lines: List[str]) -> TestResult: clean_line, re.IGNORECASE, ): - self.add_extra_error("intrinsic function used as variable") failures += 1 error_log = self.add_error_log( error_log, "intrinsic function used as variable", count + 1 @@ -904,7 +825,6 @@ def check_crown_copyright(self, lines: List[str]) -> TestResult: found_copyright = True if not found_copyright: - self.add_extra_error("missing copyright or crown copyright statement") error_log = self.add_error_log( error_log, "missing copyright or crown copyright statement", 0 ) @@ -935,7 +855,6 @@ def check_code_owner(self, lines: List[str]) -> TestResult: # This is often a warning rather than an error if not found_code_owner: - self.add_extra_error("missing code owner comment") error_log = self.add_error_log(error_log, "missing code owner comment", 0) return TestResult( checker_name="Code Owner Comment", @@ -957,7 +876,6 @@ def array_init_form(self, lines: List[str]) -> TestResult: for count, line in enumerate(lines): clean_line = self.remove_quoted(line) if re.search(r"\(/.*?\/\)", clean_line): - self.add_extra_error("old array initialization form (/ /)") failures += 1 error_log = self.add_error_log( error_log, "old array initialization form (/ /)", count + 1 @@ -977,7 +895,6 @@ def line_trail_whitespace(self, lines: List[str]) -> TestResult: error_log = {} for count, line in enumerate(lines): if re.search(r"\s+$", line): - self.add_extra_error("trailing whitespace") failures += 1 error_log = self.add_error_log( error_log, "trailing whitespace", count + 1 @@ -997,7 +914,6 @@ def c_integral_format_specifiers(self, lines: List[str]) -> int: failures = 0 for line in lines: if re.search(r'%\d+[dioxX]"', line): - self.add_extra_error("missing space in format specifier") failures += 1 return failures @@ -1008,7 +924,6 @@ def c_deprecated(self, lines: List[str]) -> int: for line in lines: for identifier in deprecated_c_identifiers: if re.search(rf"\b{identifier}\b", line): - self.add_extra_error(f"deprecated C identifier: {identifier}") failures += 1 return failures @@ -1019,9 +934,6 @@ def c_openmp_define_pair_thread_utils(self, lines: List[str]) -> int: for line in lines: if re.search(r"#\s*if.*_OPENMP", line): if not re.search(r"SHUM_USE_C_OPENMP_VIA_THREAD_UTILS", line): - self.add_extra_error( - "_OPENMP without SHUM_USE_C_OPENMP_VIA_THREAD_UTILS" - ) failures += 1 return failures @@ -1035,7 +947,6 @@ def c_openmp_define_no_combine(self, lines: List[str]) -> int: ) or re.search( r"&&.*_OPENMP.*&&.*SHUM_USE_C_OPENMP_VIA_THREAD_UTILS", line ): - self.add_extra_error("OpenMP defines combined with third macro") failures += 1 return failures @@ -1045,7 +956,6 @@ def c_openmp_define_not(self, lines: List[str]) -> int: failures = 0 for line in lines: if re.search(r"!\s*defined\s*\(\s*_OPENMP\s*\)", line): - self.add_extra_error("!defined(_OPENMP) used") failures += 1 return failures @@ -1064,7 +974,6 @@ def c_protect_omp_pragma(self, lines: List[str]) -> int: r"#\s*include\s*", line ): if not in_openmp_block: - self.add_extra_error("unprotected OMP pragma/include") failures += 1 return failures @@ -1074,7 +983,6 @@ def c_ifdef_defines(self, lines: List[str]) -> int: failures = 0 for line in lines: if re.search(r"^\s*#\s*ifdef\b", line): - self.add_extra_error("#ifdef used instead of #if defined()") failures += 1 return failures @@ -1082,7 +990,6 @@ def c_ifdef_defines(self, lines: List[str]) -> int: def c_final_newline(self, lines: List[str]) -> int: """Check C unit ends with final newline""" if lines and not lines[-1].endswith("\n"): - self.add_extra_error("missing final newline") return 1 return 0 diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 2dbd0c4f..fdab4de5 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -27,13 +27,10 @@ def add_error_log( error_log: Dict, key: str = "no key", value: int = 0 ) -> Dict: - """Add extra error information to the dictionary""" - """ -TODO: This is a bodge to get more detailed info about - the errors back to the calling program. The info is - useful, but is currently presented on a per-test basis - rather than a per-file which would be easier to read - and make use of.""" + """Add error information to the dictionary. + This adds the error statement, some are more unique than others, along with the + line number the error occurs on. Line number(s) are stored in a list in case the + same error occurs multiple times in a file.""" if key not in error_log: error_log[key] = [] error_log[key].append(value) @@ -56,6 +53,19 @@ def remove_quoted(line: str) -> str: return result +"""This is in case I return to the idea of replacing the quoted sections from above +with something unique and storing them in order to put them back at some stage...""" +def create_unique_random_string(storage_set, length: int = 7) -> str: + """Create a unique random string of a given length. + Creates a random string of given length and ensures it hasn't been used before.""" + import random + import string + new_value = "".join(random.choices(string.ascii_letters + string.digits, k=length)) + while new_value in storage_set: + new_value = "".join(random.choices(string.ascii_letters + string.digits, k=length)) + storage_set.add(new_value) + return new_value + def remove_comments(line: str) -> str: """Remove comments from the lines : There is a bit of an assumption here that quoted text has already been removed, so that we don't accidentally remove text after an "!" in a string.""" @@ -120,7 +130,6 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: found_copyright = True if not found_copyright: - # add_extra_error("missing copyright or crown copyright statement") error_log = add_error_log( error_log, "missing copyright or crown copyright statement", 0 ) @@ -157,7 +166,6 @@ def r3_3_2line_too_long(lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines, 1): if len(line.rstrip()) > 80: - # self.add_extra_error("line too long") failures += 1 error_log = add_error_log(error_log, "line too long", count) @@ -191,7 +199,6 @@ def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: for word in word_splitter.findall(clean_line): upcase = word.upper() if upcase in fortran_keywords and word != upcase: - # self.add_extra_error(f"lowercase keyword: {word}") failures += 1 error_log = add_error_log( error_log, f"lowercase keyword: {word}", count + 1 @@ -254,7 +261,6 @@ def capitulated_keywords(lines: List[str]) -> TestResult: for word in word_splitter.findall(clean_line): upcase = word.upper() if upcase in fortran_keywords and word != upcase: - # add_extra_error(f"lowercase keyword: {word}") error_log = add_error_log( error_log, f"capitulated keyword: {word}", line_count ) From 1713ac1d6b8d05e27f37caa3d89a2aa3e957e872 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Fri, 17 Apr 2026 17:31:49 +0100 Subject: [PATCH 14/27] missed a bit... --- script_umdp3_checker/umdp3_conformance.py | 106 ++++++++++------------ 1 file changed, 47 insertions(+), 59 deletions(-) diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py index 3eaee203..8e9c05c8 100644 --- a/script_umdp3_checker/umdp3_conformance.py +++ b/script_umdp3_checker/umdp3_conformance.py @@ -20,13 +20,11 @@ conformance, and to run relevant style checkers on those files. """ - -ALLOWABLE_FILE_TYPES = ["Fortran", "Python", "Generic"] +ALLOWABLE_FILE_TYPES = ["Fortran", "Python", "AnyFile"] GROUP_FILE_TYPES = { "CI": {"Fortran", "Python"}, "ALL": set(ALLOWABLE_FILE_TYPES), } -# TODO: Generic /probably/ needs renaming. @dataclass @@ -118,18 +116,6 @@ class StyleChecker: The checks are a dictionary of named callable routines. The files are a list of file paths to check.""" - """ - TODO: This is where it might be good to set up a threadsafe - class instance to hold the 'expanded' check outputs. - One for each file being checked in parallel. - Curently the UMDP3 class holds "_extra_error_info" which - was used to provide more detailed error logging. - However, this is not threadsafe, so in a multithreaded - environment, the extra error info could get mixed up between - different files being checked in parallel. - For now, I've modified the UMDP3 class methods to return - a TestResult object directly, which includes the extra error - info, so that each thread can work independently.""" name: str check_functions: list[Callable] files_to_check: List[Path] @@ -311,10 +297,6 @@ def check_files(self) -> None: for now. """ """ - TODO : Might be good to have a threadsafe object for each file and - allow multiple checks to be run at once on that file. - """ - """ TODO : Poor terminology makes discerning what is actually happening here hard work. A 'checker' is an instance of the StyleChecker class. Each of which has a list of checks to perform and a list of @@ -325,13 +307,6 @@ def check_files(self) -> None: filename will appear multiple times in the output. Not Good! """ results = [] - - """TODO : The current implementation creates a thread for each - (checker, file), however a chat with Ollie suggests a better approach - would be to switch to multiprocessing and create a pool of workers, and - then have each worker run all the checks for a given file. This would - reduce the overhead of creating threads and allow for better use of - resources.""" with concurrent.futures.ProcessPoolExecutor( max_workers=self.max_workers ) as executor: @@ -340,8 +315,9 @@ def check_files(self) -> None: for checker in self.checkers for file_path in checker.files_to_check } - # TODO : This next loop could be used to process the individual results as - # they come in, rather than waiting for all to complete. + """TODO : This next loop could be used to process the individual results as + they come in, rather than waiting for all to complete. For example, sorting + the error logs into a dictionary with the file_name as the key.""" for future in concurrent.futures.as_completed(future_to_task): result = future.result() results.append(result) @@ -351,11 +327,6 @@ def check_files(self) -> None: def print_results(self, print_volume: int = 3, quiet_pass: bool = True) -> bool: """Print results and return True if all checks passed. ========================================================""" - """ - TODO: If an object encapsulating the data for each file is created - it should contain the "in depth" printing method for file data. - With this method presenting the summary and then looping over - each file object to print its details at the desired verbosity.""" all_passed = True for result in self.results: file_status = "✓ PASS" if result.all_passed else "✗ FAIL" @@ -466,6 +437,31 @@ def line_2(length: int = 80) -> str: """Helper function to print a line for separating output sections.""" return "-" * length +def print_in_box_a(text: list[str], width: int = 80) -> None: + """Helper function to print text in a box.""" + print("+" + "-" * (width - 2) + "+") + for line in text: + print("| " + line.ljust(width - 4) + " |") + print("+" + "-" * (width - 2) + "+" ) + +def print_in_box_b(text: list[str], width: int = 80, justification: str = "left") -> None: + """Another Helper function to print text in a box.""" + print(line_1(width)) + for line in text: + total_padding = width - 6 - len(line) + if justification == "left": + left_padding = 0 + right_padding = total_padding + elif justification == "right": + left_padding = total_padding + right_padding = 0 + elif justification == "center": + left_padding = total_padding // 2 + right_padding = total_padding - left_padding + else: + raise ValueError("Invalid justification: " + justification) + print("## " + " " * left_padding + line + " " * right_padding + " ##") + print(line_1(width) + "\n") def which_cms_is_it(path: str, print_volume: int = 3) -> CMSSystem: """Determine which CMS is in use based on the presence of certain files.""" @@ -481,8 +477,6 @@ def which_cms_is_it(path: str, print_volume: int = 3) -> CMSSystem: raise RuntimeError("Unknown CMS type at path: " + str(path)) branch_name = cms.get_branch_name() if not cms.is_branch(): - # TODO : This /might/ be better as a raise ValueError to allow - # printing the help message, but for now just print and exit. print( f"The path {path} is not a branch." f"\nReported branch name is : {branch_name}" @@ -491,7 +485,7 @@ def which_cms_is_it(path: str, print_volume: int = 3) -> CMSSystem: f"Please try switching on the full check option" ) # Soft exit mainly so nightly testing on main doesn't flag failure. - exit(0) + sys.exit(0) else: if print_volume >= 2: print(f"Found branch, {branch_name}, at path {path}.") @@ -538,7 +532,8 @@ def create_style_checkers( file_extensions = {".f", ".for", ".f90", ".f95", ".f03", ".f08", ".F90"} """ TODO : I /think/ the old version also checked '.h' files as Fortran. - Not sure if that is still needed.""" + Not sure if that is still needed. - Probably, but some of them are C source + files""" fortran_diff_table = dispatch_tables.get_diff_dispatch_table_fortran() fortran_file_table = dispatch_tables.get_file_dispatch_table_fortran() generic_file_table = dispatch_tables.get_file_dispatch_table_all() @@ -571,12 +566,12 @@ def create_style_checkers( file_extensions, ) checkers.append(python_file_checker) - if "Generic" in file_types or file_types == []: + if "AnyFile" in file_types or file_types == []: if print_volume >= 3: - print("Configuring Generic File Checkers:") + print("Configuring AnyFile File Checkers:") all_file_dispatch_table = dispatch_tables.get_file_dispatch_table_all() generic_checker = StyleChecker( - "Generic File Checker", all_file_dispatch_table, changed_files + "AnyFile File Checker", all_file_dispatch_table, changed_files ) checkers.append(generic_checker) @@ -639,20 +634,14 @@ def get_files_to_check( full_file_paths = [Path(args.path) / f for f in file_paths] # Configure checkers - """ - TODO : Uncertain as to how flexible this needs to be. - For now, just configure checkers based on file type requested. - Later, could add configuration files to specify which - checkers to use for each file type.""" - active_checkers = create_style_checkers(args.file_types, full_file_paths) for checker in active_checkers: checker.report(log_volume) - # TODO : Could create a conformance checker for each - # file type. - # Currently, just create a single conformance checker - # with all active checkers. + """TODO : Could create a conformance checker for each + file type. + Currently, just create a single conformance checker + with all active checkers.""" checker = ConformanceChecker( active_checkers, max_workers=args.max_workers, @@ -661,17 +650,16 @@ def get_files_to_check( checker.check_files() if log_volume >= 3: - print(line_1(81)) - print("## Results :" + " " * 67 + "##") - print(line_1(81) + "\n") + print_in_box_b(["Results :"], 81) else: print("Results :") all_passed = checker.print_results(print_volume=log_volume, quiet_pass=quiet_pass) - if log_volume >= 4: - print("\n" + line_1(81)) - print("## Summary :" + " " * 67 + "##") - print(line_1(81)) - print(f"Total files checked: {len(checker.results)}") - print(f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}") + output = [ + "Summary :", + f" Total files checked: {len(checker.results)}", + " " + + f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}", + ] + print_in_box_a(output, width=81) exit(0 if all_passed else 1) From 2a33be3b9184ee178fc61f1d7777c6dcbe1cbfa6 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Wed, 22 Apr 2026 17:08:48 +0100 Subject: [PATCH 15/27] Stop ruff from trying to correct the demo Fortran file --- .github/linters/.ruff.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/linters/.ruff.toml b/.github/linters/.ruff.toml index 9cb5f660..02338e42 100644 --- a/.github/linters/.ruff.toml +++ b/.github/linters/.ruff.toml @@ -1,6 +1,7 @@ cache-dir = "/tmp/.ruff_cache" line-length = 88 output-format = "grouped" +exclude = ["*.F90"] [lint] # Allow unused variables when underscore-prefixed. From e8f9b5c3f4d6176b141b16b6876a1a05db16660f Mon Sep 17 00:00:00 2001 From: r-sharp Date: Wed, 22 Apr 2026 18:10:58 +0100 Subject: [PATCH 16/27] Adding single programming module tests and adjusting TODOs a bit more. --- .../tests/test_umdp3_rules_S3.py | 74 ++++-- script_umdp3_checker/umdp3_rules_S3.py | 241 ++++++++++++------ 2 files changed, 205 insertions(+), 110 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 1d57a2d2..41e0e181 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -12,8 +12,8 @@ # Add the current directory to Python path sys.path.insert(0, str(Path(__file__).parent.parent)) from umdp3_rules_S3 import remove_comments, remove_quoted, \ - concatenate_lines, capitulated_keywords, r3_2_1_check_crown_copyright, \ - r3_3_2line_too_long, r3_4_1_capitalised_keywords + concatenate_lines, r3_2_1_check_crown_copyright, \ + r3_3_2line_too_long, r3_4_1_capitalised_keywords, r3_1_1_there_can_be_only_one # from umdp3_checker_rules import TestResult, UMDP3Checker @@ -29,7 +29,9 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: Operations are applied in descending line order so that earlier line numbers are not shifted by later mutations. """ - # TODO: Currently is a string, but should become a list of strings, allowing multiple lines to be used in addition and as replacements. + """ TODO: Currently is a string, but should become a list of strings, + allowing multiple lines to be used in addition and as replacements. Although, + this may make keeping track of line numbers more complex.""" lines = lines_in.copy() for change in sorted(changes, key=lambda o: o[1], reverse=True): idx = int(change[1]) - 1 @@ -108,7 +110,7 @@ def test_concatenate_lines(example_fortran_lines): # ================================================================= -"""This example hopefully demonstrates some of the complexity available... +"""These examples hopefully demonstrate some of the complexity available... Setting up a test, may involve multiple changes to the demo Fortran file, hence the complex entries in the parametrization. Each error found is recorded with the line number(s) it was found on using the Error text as a dict key, and the line no(s) as a list, which means finding 'use' and 'Use' in the code would generate 2 different keys in the dict. Then a single value recording the total number of failures is also included in the 'TestResult' object. @@ -118,33 +120,54 @@ def test_concatenate_lines(example_fortran_lines): There has to be a better way..... As a side note, might it be better to write a function to compare 2 'TestResult' objects, which would be more robust to changes in the structure of the 'TestResult' object, and also make the test code more readable? If so, would it's place be here in the testing, or as a method in the 'TestResult' class itself? """ + +# ================================================================= + @pytest.mark.parametrize( - "changes_list, expected_result, expected_errors", + "changes_list, expected_passed, expected_failure_count, expected_errors", [ + # Valid: MODULE example_mod ... END MODULE example_mod (no changes) + ([], True, 0, {}), + # Invalid: No program unit found (delete MODULE line) ( - [ - ["replace", 10, "Module example_mod"], - ["replace", 37, "use parkind1, ONLY: jpim, jprb"], - ["replace", 40, "use yomhook, ONLY: lhook, dr_hook"] - ], - 3, - {"lowercase keyword: Module": [10], "lowercase keyword: use": [37, 40]}, + [["replace", 10, "! No module declaration here"]], + False, + 1, + {"First executable line doesn't define an accepted programming unit :" + " IMPLICIT NONE": [0]}, + ), + # Invalid: Mismatched END statement + ( + [["replace", 118, "END MODULE wrong_mod_name"]], + False, + 1, + {"END statement found for a different program unit: should be example_mod got wrong_mod_name.": [0]}, + ), + # Invalid: No END statement found + ( + [["replace", 118, "! Missing END MODULE"]], + False, + 1, + {"No matching END statement found for the first program unit.": [0]}, ), - ([["add", 10, ""]], 0, []), # No changes, expect no errors ], - ids = ["3 Lowercase Errors", "No Lowercase Errors"] + ids=[ + "Valid module with matching END", + "No program unit found", + "Mismatched END statement", + "No END statement", + ], ) -def test_r3_4_1_capitalised_keywords( - example_fortran_lines, changes_list, expected_result, expected_errors +def test_r3_1_1_there_can_be_only_one( + example_fortran_lines, changes_list, expected_passed, expected_failure_count, expected_errors ): - # checker = UMDP3Checker() modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) - result = r3_4_1_capitalised_keywords(modified_fortran_lines) - failure_count = result.failure_count - assert failure_count == expected_result + result = r3_1_1_there_can_be_only_one(modified_fortran_lines) + assert result.passed == expected_passed + assert result.failure_count == expected_failure_count errors = result.errors assert len(errors) == len(expected_errors) - for error, lines_list in errors.items(): + for error, lines_list in errors.items(): assert error in expected_errors assert len(lines_list) == len(expected_errors[error]) for line_no in lines_list: @@ -236,17 +259,18 @@ def test_r3_3_2line_too_long( ["replace", 40, "use yomhook, ONLY: lhook, dr_hook"] ], 3, - {"capitulated keyword: Module": [10], "capitulated keyword: use": [37, 40]}, + {"lowercase keyword: Module": [10], "lowercase keyword: use": [37, 40]}, ), ([["add", 10, ""]], 0, []), # No changes, expect no errors ], + ids = ["3 Lowercase Errors", "No Lowercase Errors"] ) -def test_keywords_II( +def test_r3_4_1_capitalised_keywords( example_fortran_lines, changes_list, expected_result, expected_errors ): -# checker = UMDP3Checker() + # checker = UMDP3Checker() modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) - result = capitulated_keywords(modified_fortran_lines) + result = r3_4_1_capitalised_keywords(modified_fortran_lines) failure_count = result.failure_count assert failure_count == expected_result errors = result.errors diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index fdab4de5..b6c0de13 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -39,7 +39,7 @@ def add_error_log( def remove_quoted(line: str) -> str: """Remove quoted strings from a line""" """ -TODO: The original version replaced the quoted sections with a + TODO: The original version replaced the quoted sections with a "blessed reference", presumably becuase they were 're-inserted' at some stage. No idea if that capability is still required.""" # Simple implementation - remove single and double quoted strings @@ -88,32 +88,85 @@ def concatenate_lines(lines: List[str], line_no: int) -> str: line += next_line.lstrip() # Concatenate with the next line, removing leading whitespace return line - """ 3.1 Source files should only contain a single program unit """ -"""TODO: routine to identify the first program unit in the file and store it's name. -Then check it's the same name as the last thing closed.""" - +def r3_1_1_there_can_be_only_one(lines: List[str]) -> TestResult: # ..one programming unit in a file, that is. + """Check for multiple program units in a file: + Given that a MODULE or a PROGRAM can contain multiples of the other program units, + just going to confirm the first one decalred is the last one ENDed.""" + program_unit_keywords = {"PROGRAM", "MODULE", "SUBROUTINE", "FUNCTION"} + # unsure if "TYPE" should be included. + def find_first(lines: List[str]) -> tuple[bool, str]: + for line in lines: + executable_line = remove_comments(line).strip() + if not executable_line: + continue # Skip empty lines + for keyword in program_unit_keywords: + unit_name_search = re.search(rf"{keyword}\s+(\w+)", executable_line) + if unit_name_search: + unit_type = keyword + unit_name = unit_name_search.group(1) + return (True, f"{unit_type} {unit_name}") + return (False, + "First executable line doesn't define an accepted programming unit" + f" : {line}") + return (False, "No program unit found.") + def find_last(lines: List[str], unit_type: str, unit_name: str) -> tuple[bool, str]: + for line in reversed(lines): + executable_line = remove_comments(line).strip() + if not executable_line: + continue # Skip empty lines + unit_name_search = re.search(rf"END\s+{unit_type}\s+(\w+)", executable_line) + if unit_name_search: + if unit_name_search.group(1) == unit_name: + return (True, "") + else: + return (False, "END statement found for a different program unit: " + f"should be {unit_name} got {unit_name_search.group(1)}.") + else: + return (False, "No matching END statement found for the first program unit.") + return (False, "No matching END statement found for the first program unit.") + found_first, first_result = find_first(lines) + if not found_first: + failure_count = 1 + found_last = False + error_log = add_error_log({}, first_result, 0) + else: + unit_type, unit_name = first_result.split() + found_last, last_result = find_last(lines, unit_type, unit_name) + if found_last: + failure_count = 0 + error_log = {} + else: + failure_count = 1 + error_log = add_error_log({}, last_result, 0) + return TestResult( + checker_name="Test 3.1 Only One Program Unit", + failure_count=failure_count, + passed=found_first and found_last, + output="Checked for multiple program units.", + errors=error_log, + ) """ -* Modules may be used to group related variables, subroutines and functions. -* Each separate file within the source tree should be uniquely named. +* TODO: Modules may be used to group related variables, subroutines and functions. + - Really unsure as to how to test that's what they're doing... +* TODO: Each separate file within the source tree should be uniquely named. - Not sure that's possible/easy within the current framework to process one file at a time... -* The name of the file should reflect the name of the programming unit. -* Multiple versions of the same file should be named filename-#ver where #ver is the section/version number (e.g. 1a,2a,2b. . . ). -* You should avoid naming your program units and variables with names that match an intrinsic FUNCTION, SUBROUTINE or MODULE. We recommend the use of unique names within a program unit. -* You should also avoid naming your program units and variables with names that match a keyword in a Fortran statement. -* Avoid giving program units names that are likely to be used as variable names elsewhere in the code, e.g. field or string. This makes searching the code difficult and can cause the code browser to make erroneous connections between unrelated routines. -* Subroutines should be kept reasonably short, where appropriate, say up to about 100 lines of executable code, but don’t forget there are start up overheads involved in calling an external subroutine so they should do a reasonable amount of work +* TODO: The name of the file should reflect the name of the programming unit. +* TODO: Multiple versions of the same file should be named filename-#ver where #ver is the section/version number (e.g. 1a,2a,2b. . . ). +* TODO: You should avoid naming your program units and variables with names that match an intrinsic FUNCTION, SUBROUTINE or MODULE. We recommend the use of unique names within a program unit. +* TODO: You should also avoid naming your program units and variables with names that match a keyword in a Fortran statement. +* TODO: Avoid giving program units names that are likely to be used as variable names elsewhere in the code, e.g. field or string. This makes searching the code difficult and can cause the code browser to make erroneous connections between unrelated routines. +* TODO: Subroutines should be kept reasonably short, where appropriate, say up to about 100 lines of executable code, but don’t forget there are start up overheads involved in calling an external subroutine so they should do a reasonable amount of work """ """3.2 Headers * All programming units require a suitable copyright header.""" - def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: """Check for crown copyright statement""" """ -TODO: This is a very simplistic check and will not detect many cases which break UMDP3. + TODO: This is a very simplistic check and will not detect many cases which break UMDP3. It will pass if the word copyright appears on a commented out line. This could include passing : ! I should put a copyright statement here... @@ -142,22 +195,23 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: ) """ -* Headers are an immensely important part of any code as they document what it does, and how it does it. -* The description of the MODULE and its contained SUBROUTINE may be the same and thus it need not berepeated in the latter. If a MODULE contains more than one subroutine then further descriptions are required. -* History comments should not be included in the header or routine code. FCM TRAC provides the history of our codes. -* Code author names should NOT be included explicitly within the code as they quickly become out of date and are sometimes misleading. Instead we reference a single maintainable text file which is included within the UM code repository. +* TODO: Headers are an immensely important part of any code as they document what it does, and how it does it. +* TODO: The description of the MODULE and its contained SUBROUTINE may be the same and thus it need not berepeated in the latter. If a MODULE contains more than one subroutine then further descriptions are required. +* TODO: History comments should not be included in the header or routine code. FCM TRAC provides the history of our codes. +* TODO: Code author names should NOT be included explicitly within the code as they quickly become out of date and are sometimes misleading. Instead we reference a single maintainable text file which is included within the UM code repository. ! Code Owner: Please refer to the UM file CodeOwners.txt ! This file belongs in section: -* Example UM templates are provided with the source of this document; subroutine, function and module +* TODO: Example UM templates are provided with the source of this document; subroutine, function and module templates""" """3.3 Free source form -* All code should be written using the free source form. -* Please restrict code to 80 columns, so that your code can be easily viewed on any editor and screen and -can be printed easily on A4 paper. Note that CreateBC uses a limit of 100 columns, due to the nature of -the object-orientated code. +* TODO: All code should be written using the free source form. + - Could be fun, look for 7 leading spaces regularly. Or only something in col6 and/or only nos in cols 1 to 5 +* Please restrict code to 80 columns + Note that CreateBC uses a limit of 100 columns, due to the nature of + the object-orientated code. """ def r3_3_2line_too_long(lines: List[str]) -> TestResult: """Check for lines longer than 80 characters""" @@ -178,8 +232,8 @@ def r3_3_2line_too_long(lines: List[str]) -> TestResult: ) """ -* Never put more than one statement on a line. -* Write your program in UK English, unless you have a very good reason for not doing so.""" +* TODO: Never put more than one statement on a line. +* TODO: Write your program in UK English, unless you have a very good reason for not doing so.""" """3.4 Fortran style * To improve readability, write your code using the ALL CAPS Fortran keywords approach.""" @@ -212,69 +266,86 @@ def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: errors=error_log, ) """ -* The rest of the code may be written in either lower-case with underscores or CamelCase. -• To improve readability, you should always use the optional space to separate the Fortran keywords. The -full list of Fortran keywords with an optional spaces is: -ELSE IF END DO END FORALL END FUNCTION -END IF END INTERFACE END MODULE END PROGRAM -END SELECT END SUBROUTINE END TYPE END WHERE -SELECT CASE ELSE WHERE DOUBLE PRECISION END ASSOCIATE -END BLOCK END BLOCK DATA END ENUM END FILE -END PROCEDURE GO TO IN OUT SELECT TYPE -Note that not all of these are approved or appropriate for use in UM code. This rule also applies to OpenMP -keywords. (See: 3.15) -• The full version of END should be used at all times, eg END SUBROUTINE and END FUNCTION -• New code should be written using Fortran 95/2003 features. Avoid non-portable vendor/compiler exten- -sions. -• When writing a REAL literal with an integer value, put a 0 after the decimal point (i.e. 1.0 as opposed to 1.) -to improve readability. -• Avoid using obsolescent features of the Fortran language, instead make use of F95/2003 alternatives. For -example, statement functions are among the list of deprecated features in the F95 standard and these can -be replaced by FUNCTIONs within appropriate MODULEs. -• Do not use archaic forms of intrinsic functions. For example, LOG() should be used in place of ALOG(), -MAX() instead of AMAX1(), REAL() instead of FLOAT() etc. -• Never use the PAUSE statement. -• Never use the STOP statement, see 3.19 -• The standard delimiter for namelists is /. In particular, note that &END is non-standard and should be -avoided. For further information on namelists please refer to 4.1 -• Only use the generic names of intrinsic functions, avoid the use of ’hardware’ specific intrinsic functions. -Use the latter if an only if there is an optimisation benefit and then it must be protected by a platform -specific CPP flag 3.17""" - - -def capitulated_keywords(lines: List[str]) -> TestResult: - """A fake test, put in for testing purposes. - Probably not needed any more, but left in case.""" - failures = 0 - line_count = 0 - error_log = {} - # print("Debug: In capitulated_keywords test") - for line in lines: - line_count += 1 - # Remove quoted strings and comments - if line.lstrip(" ").startswith("!"): - continue - clean_line = remove_quoted(line) - clean_line = comment_line.sub("", clean_line) +* TODO: The rest of the code may be written in either lower-case with underscores or + CamelCase. +* TODO: To improve readability, you should always use the optional space to separate + the Fortran keywords. + This rule also applies to OpenMP keywords. (See: 3.15) +* TODO: The full version of END should be used at all times, + eg END SUBROUTINE and END FUNCTION +* TODO: New code should be written using Fortran 95/2003 features. Avoid non-portable + vendor/compiler extensions. +* TODO: When writing a REAL literal with an integer value, put a 0 after the decimal + point (i.e. 1.0 as opposed to 1.) to improve readability. +* TODO: Avoid using obsolescent features of the Fortran language, instead make use of + F95/2003 alternatives. For example, statement functions are among the list of + deprecated features in the F95 standard and these can be replaced by FUNCTIONs + within appropriate MODULEs. +* TODO: Do not use archaic forms of intrinsic functions. For example: + LOG() should be used in place of ALOG(), + MAX() instead of AMAX1(), REAL() instead of FLOAT() etc. +* TODO: Never use the PAUSE statement. +* TODO: Never use the STOP statement, see 3.19 +* TODO: The standard delimiter for namelists is /. In particular, note that &END is + non-standard and should be avoided. For further information on namelists please + refer to 4.1 +* TODO: Only use the generic names of intrinsic functions, avoid the use of ’hardware’ + specific intrinsic functions. + Use the latter if an only if there is an optimisation benefit and then it must + be protected by a platform specific CPP flag 3.17 +3.5 Comments and white spacing : +TODO : Copy in the rules from section 3.5 and add the appropriate tests. - # Check for lowercase keywords - for word in word_splitter.findall(clean_line): - upcase = word.upper() - if upcase in fortran_keywords and word != upcase: - error_log = add_error_log( - error_log, f"capitulated keyword: {word}", line_count - ) - failures += 1 +3.6 The use of modules : +TODO : Copy in the rules from section 3.6 and add the appropriate tests. - return TestResult( - checker_name="Capitulated Keywords", - failure_count=failures, - passed=(failures == 0), - output=f"Checked {line_count} lines, found {failures} failures.", - errors=error_log, - ) +3.7 Argument and variable declaration : +TODO : Copy in the rules from section 3.7 and add the appropriate tests. + +3.8 Allocatables : +TODO : Copy in the rules from section 3.8 and add the appropriate tests. + +3.9 Code IF blocks, DO LOOPs, and other constructs : +TODO : Copy in the rules from section 3.9 and add the appropriate tests. + +3.10 Line continuation : +TODO : Copy in the rules from section 3.10 and add the appropriate tests. + +3.11 Fortran I/O : +TODO : Copy in the rules from section 3.11 and add the appropriate tests. + +3.12 Formatting and output of text : +TODO : Copy in the rules from section 3.12 and add the appropriate tests. + +3.13 PrintStatus : +TODO : Copy in the rules from section 3.13 and add the appropriate tests. + +3.14 DrHook : +TODO : Copy in the rules from section 3.14 and add the appropriate tests. + I /think/ DrHook is being (possibly has been) retired, so this section of the docs needs updating and tests for the new system need to be added. + +3.15 OpenMP : +TODO : Copy in the rules from section 3.15 and add the appropriate tests. + +3.16 MPI : +TODO : Copy in the rules from section 3.16 and add the appropriate tests. + +3.17 Preprocessing : +TODO : Copy in the rules from section 3.17 and add the appropriate tests. + +3.18 Code duplication : +TODO : Copy in the rules from section 3.18 and add the appropriate tests. + +3.19 Error reporting : +TODO : Copy in the rules from section 3.19 and add the appropriate tests. + +TODO : Section 4 has guidelines on "specific standards". + Unsure as yet if they can be tested for, but it needs looking into. +""" +# A list, of all the tests defined above, so it can be imported easily. list_O_tests = [ + r3_1_1_there_can_be_only_one, r3_2_1_check_crown_copyright, r3_3_2line_too_long, r3_4_1_capitalised_keywords, From ae4e58a8cd0c04912cea2b64e7cc326f0c1ed581 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Thu, 23 Apr 2026 16:45:11 +0100 Subject: [PATCH 17/27] Quick Tidy of an output failure message. --- script_umdp3_checker/tests/test_umdp3_rules_S3.py | 2 +- script_umdp3_checker/umdp3_rules_S3.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 41e0e181..68ea65ff 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -148,7 +148,7 @@ def test_concatenate_lines(example_fortran_lines): [["replace", 118, "! Missing END MODULE"]], False, 1, - {"No matching END statement found for the first program unit.": [0]}, + {"Last executable line not a matching END statement for the first program unit found.": [0]}, ), ], ids=[ diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index b6c0de13..2a9fb42d 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -125,7 +125,7 @@ def find_last(lines: List[str], unit_type: str, unit_name: str) -> tuple[bool, s return (False, "END statement found for a different program unit: " f"should be {unit_name} got {unit_name_search.group(1)}.") else: - return (False, "No matching END statement found for the first program unit.") + return (False, "Last executable line not a matching END statement for the first program unit found.") return (False, "No matching END statement found for the first program unit.") found_first, first_result = find_first(lines) if not found_first: From 5f1d9c02a33a207bc7318edc90ed5eb9b38cdfa4 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Fri, 24 Apr 2026 16:26:08 +0100 Subject: [PATCH 18/27] Updating the Fortran file editing routine to add or replace with multiple lines, plus accidentally running ruff. --- .../tests/example_fortran_code.F90 | 1 + .../tests/test_umdp3_rules_S3.py | 237 ++++++++++++++---- script_umdp3_checker/umdp3_rules_S3.py | 43 +++- 3 files changed, 231 insertions(+), 50 deletions(-) diff --git a/script_umdp3_checker/tests/example_fortran_code.F90 b/script_umdp3_checker/tests/example_fortran_code.F90 index 316dbe3e..52309dc7 100644 --- a/script_umdp3_checker/tests/example_fortran_code.F90 +++ b/script_umdp3_checker/tests/example_fortran_code.F90 @@ -1,3 +1,4 @@ +! fortitude: ignore file ! *****************************COPYRIGHT******************************* ! (C) Crown copyright Met Office. All rights reserved. ! For further details please refer to the file COPYRIGHT.txt diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 68ea65ff..e993a632 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -11,9 +11,16 @@ # Add the current directory to Python path sys.path.insert(0, str(Path(__file__).parent.parent)) -from umdp3_rules_S3 import remove_comments, remove_quoted, \ - concatenate_lines, r3_2_1_check_crown_copyright, \ - r3_3_2line_too_long, r3_4_1_capitalised_keywords, r3_1_1_there_can_be_only_one +from umdp3_rules_S3 import ( + remove_comments, + remove_quoted, + concatenate_lines, + r3_1_1_there_can_be_only_one, + r3_2_1_check_crown_copyright, + r3_3_2line_too_long, + r3_4_1_capitalised_keywords, + r3_4_2_no_full_uppercase_variable_names, +) # from umdp3_checker_rules import TestResult, UMDP3Checker @@ -36,47 +43,57 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: for change in sorted(changes, key=lambda o: o[1], reverse=True): idx = int(change[1]) - 1 if change[0] == "replace": - lines[idx] = change[2] + del lines[idx] + print(f"Line {idx} : deleting line.") + for new_line in change[2]: + print(f'Line {idx} : adding "{new_line}"') + lines.insert(idx, new_line) + idx += 1 elif change[0] == "delete": + print(f"Line {idx} : deleting line.") del lines[idx] elif change[0] == "add": - lines.insert(idx, change[2]) - # if change[0] == "replace": - # lines[idx:idx+1] = change[2] - # elif change[0] == "delete": - # del lines[idx] - # elif change[0] == "add": - # lines[idx:idx], change[2]) + for new_line in change[2]: + print(f'Line {idx} : adding "{new_line}"') + lines.insert(idx, new_line) + idx += 1 else: raise ValueError(f"Unknown operation: {change[0]}") # for count, line in enumerate(lines, 1): # print(f"line [{count}]: {line}") return lines + # ================================================================= """First : test the helper functions in umdp3_rules_S3.py.""" + + def test_modify_fortran_lines(example_fortran_lines): """TODO: Does this need to be more rigorous ?""" changes_list = [ - ["replace", 10, "This is a replacement line."], + ["replace", 10, ["This is a replacement line."]], ["delete", 20, None], - ["add", 30, "This is an added line."] + ["add", 30, ["This is an added line."]], ] modified_lines = modify_fortran_lines(example_fortran_lines, changes_list) # Line no.s below have to be carefully calculated. Basic is line no of change -1 but then add one for every line added aobve in the file and -1 for every line deleted above in the file. # for line_no, line in enumerate(modified_lines, 1): # print(f"line [{line_no:04}]: {line}") assert modified_lines[9] == "This is a replacement line." - assert len(modified_lines) == len(example_fortran_lines) # One line deleted, One added + assert len(modified_lines) == len( + example_fortran_lines + ) # One line deleted, One added # Added @ 30, so 29, but one line deleted above, so 28 - seeemples assert modified_lines[28] == "This is an added line." + def test_remove_quoted(example_fortran_lines): for line in example_fortran_lines.copy(): uncommented_line = remove_quoted(line) assert '"' not in uncommented_line assert "'" not in uncommented_line + def test_remove_comments(example_fortran_lines): for line in example_fortran_lines.copy(): line = remove_quoted(line) @@ -86,6 +103,7 @@ def test_remove_comments(example_fortran_lines): assert uncommented_line == line[:comment_location].rstrip() assert "!" not in uncommented_line + def test_concatenate_lines(example_fortran_lines): for line_no, line in enumerate(example_fortran_lines.copy(), 1): if line.find("&") >= 0: @@ -93,21 +111,27 @@ def test_concatenate_lines(example_fortran_lines): assert "&" not in concatenated_line # This bit has to be a bit hard-wired and is going to break every time the example Fortran file line numbers are changed. If you can think of a better method - go ahead. if line_no == 31: - expected_line = ("SUBROUTINE example (xlen,ylen,l_unscale,input1," + - "input2, output, l_loud_opt)") + expected_line = ( + "SUBROUTINE example (xlen,ylen,l_unscale,input1," + + "input2, output, l_loud_opt)" + ) assert concatenated_line == expected_line elif line_no == 70: # This will look odd as the quoted text has been removed. expected_line = ( - "my_char " + - " = // ") + "my_char " + + " = // " + ) assert concatenated_line == expected_line elif line_no == 96: - expected_line = (" field2(i, j) = (1.0*i) - (2.0*j) " + - " + (3.0*i*j) + " + - "(4.0*i**2) + field(i, j)*2") + expected_line = ( + " field2(i, j) = (1.0*i) - (2.0*j) " + + " + (3.0*i*j) + " + + "(4.0*i**2) + field(i, j)*2" + ) assert concatenated_line == expected_line + # ================================================================= """These examples hopefully demonstrate some of the complexity available... @@ -123,6 +147,7 @@ def test_concatenate_lines(example_fortran_lines): # ================================================================= + @pytest.mark.parametrize( "changes_list, expected_passed, expected_failure_count, expected_errors", [ @@ -130,25 +155,35 @@ def test_concatenate_lines(example_fortran_lines): ([], True, 0, {}), # Invalid: No program unit found (delete MODULE line) ( - [["replace", 10, "! No module declaration here"]], + [["replace", 10, ["! No module declaration here"]]], False, 1, - {"First executable line doesn't define an accepted programming unit :" - " IMPLICIT NONE": [0]}, + { + "First executable line doesn't define an accepted programming unit :" + " IMPLICIT NONE": [0] + }, ), # Invalid: Mismatched END statement ( - [["replace", 118, "END MODULE wrong_mod_name"]], + [["replace", 118, ["END MODULE wrong_mod_name"]]], False, 1, - {"END statement found for a different program unit: should be example_mod got wrong_mod_name.": [0]}, + { + "END statement found for a different program unit: should be example_mod got wrong_mod_name.": [ + 0 + ] + }, ), # Invalid: No END statement found ( - [["replace", 118, "! Missing END MODULE"]], + [["replace", 118, ["! Missing END MODULE"]]], False, 1, - {"Last executable line not a matching END statement for the first program unit found.": [0]}, + { + "Last executable line not a matching END statement for the first program unit found.": [ + 0 + ] + }, ), ], ids=[ @@ -159,7 +194,11 @@ def test_concatenate_lines(example_fortran_lines): ], ) def test_r3_1_1_there_can_be_only_one( - example_fortran_lines, changes_list, expected_passed, expected_failure_count, expected_errors + example_fortran_lines, + changes_list, + expected_passed, + expected_failure_count, + expected_errors, ): modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) result = r3_1_1_there_can_be_only_one(modified_fortran_lines) @@ -173,8 +212,10 @@ def test_r3_1_1_there_can_be_only_one( for line_no in lines_list: assert line_no in expected_errors[error] + # ================================================================= + @pytest.mark.parametrize( "changes_list, expected_result, expected_errors", [ @@ -184,14 +225,14 @@ def test_r3_1_1_there_can_be_only_one( ["delete", 2, None], ["delete", 3, None], ["delete", 4, None], - ["delete", 5, None] + ["delete", 5, None], ], 1, {"missing copyright or crown copyright statement": [0]}, ), - ([["add", 10, ""]], 0, []), # No changes, expect no errors + ([], 0, []), # No changes, expect no errors ], - ids = ["Missing copyright statement", "copyright statemrent present"] + ids=["Missing copyright statement", "copyright statement present"], ) def test_r3_2_1_check_crown_copyright( example_fortran_lines, changes_list, expected_result, expected_errors @@ -203,12 +244,13 @@ def test_r3_2_1_check_crown_copyright( assert failure_count == expected_result errors = result.errors assert len(errors) == len(expected_errors) - for error, lines_list in errors.items(): + for error, lines_list in errors.items(): assert error in expected_errors assert len(lines_list) == len(expected_errors[error]) for line_no in lines_list: assert line_no in expected_errors[error] + # ================================================================= @pytest.mark.parametrize( @@ -216,19 +258,30 @@ def test_r3_2_1_check_crown_copyright( [ ( [ - ["replace", 71, - " = \"This is a very very very very very very very \"" + - " &"], - ["replace",115, "IF (lhook) CALL dr_hook(ModuleName// " + - "\":\" // RoutineName, zhook_out, zhook_handle) ! extra comment"], - ["replace", 40, "use yomhook, ONLY: lhook, dr_hook"] + [ + "replace", + 71, + [ + ' = "This is a very very very very very very very "' + + " &" + ], + ], + [ + "replace", + 115, + [ + "IF (lhook) CALL dr_hook(ModuleName// " + + '":" // RoutineName, zhook_out, zhook_handle) ! extra comment' + ], + ], + ["replace", 40, ["use yomhook, ONLY: lhook, dr_hook"]], ], 2, {"line too long": [71, 115]}, ), ([], 0, {}), # No changes, expect no errors ], - ids = ["3 line too long Errors", "No line too long Errors"] + ids=["3 line too long Errors", "No line too long Errors"], ) def test_r3_3_2line_too_long( example_fortran_lines, changes_list, expected_result, expected_errors @@ -240,7 +293,7 @@ def test_r3_3_2line_too_long( assert failure_count == expected_result errors = result.errors assert len(errors) == len(expected_errors) - for error, lines_list in errors.items(): + for error, lines_list in errors.items(): assert error in expected_errors assert len(lines_list) == len(expected_errors[error]) for line_no in lines_list: @@ -249,35 +302,123 @@ def test_r3_3_2line_too_long( # ================================================================= + @pytest.mark.parametrize( "changes_list, expected_result, expected_errors", [ ( [ - ["replace", 10, "Module example_mod"], - ["replace", 37, "use parkind1, ONLY: jpim, jprb"], - ["replace", 40, "use yomhook, ONLY: lhook, dr_hook"] + ["replace", 10, ["Module example_mod"]], + ["replace", 37, ["use parkind1, ONLY: jpim, jprb"]], + ["replace", 40, ["use yomhook, ONLY: lhook, dr_hook"]], ], 3, {"lowercase keyword: Module": [10], "lowercase keyword: use": [37, 40]}, ), - ([["add", 10, ""]], 0, []), # No changes, expect no errors + ([], 0, []), # No changes, expect no errors ], - ids = ["3 Lowercase Errors", "No Lowercase Errors"] + ids=["3 Lowercase Errors", "No Lowercase Errors"], ) def test_r3_4_1_capitalised_keywords( example_fortran_lines, changes_list, expected_result, expected_errors ): - # checker = UMDP3Checker() modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) result = r3_4_1_capitalised_keywords(modified_fortran_lines) failure_count = result.failure_count assert failure_count == expected_result errors = result.errors assert len(errors) == len(expected_errors) - for error, lines_list in errors.items(): + for error, lines_list in errors.items(): assert error in expected_errors assert len(lines_list) == len(expected_errors[error]) for line_no in lines_list: assert line_no in expected_errors[error] + +# ================================================================= + +# See if you can spot why I hate ruff - it highlights my awful data structures really well. +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + [ + "replace", + 43, + ["INTEGER, INTENT(IN) :: XLEN !Length of first dim of the arrays."], + ], + ["replace", 58, ["REAL :: var1, DAVE_2, HiPPo"]], + ], + 2, + { + "Found UPPERCASE variable name in declaration at line 43: XLEN": [43], + "Found UPPERCASE variable name in declaration at line 58: DAVE_2": [58], + }, + ), + ( + [ + [ + "add", + 58, + [ + "REAL :: VARIaBLE_1, variable_2, &", + " VARIABLE_3, Hot_Potato, Baked Potato &", + " nice_var, good_var, camelCase, & !comment test", + " CAPS_VAR, CASPVAR, the_ghost", + ], + ], + [ + "replace", + 54, + [ + "INTEGER :: j ! Loop counter &", + "INTEGER :: k ! Loop counter &", + "INTEGER :: IJ ! Loop counter", + ], + ], + [ + "replace", + 43, + [ + "INTEGER, INTENT(IN) :: XLEN !Length of first dimension of the" + + " arrays." + ], + ], + ], + 5, + { + "Found UPPERCASE variable name in declaration at line 43: XLEN": [ + 43 + ], + "Found UPPERCASE variable name in declaration at line 56: IJ": [56], + "Found UPPERCASE variable name in declaration at line 60: CASPVAR": [ + 60 + ], + "Found UPPERCASE variable name in declaration at line 60: VARIABLE_3": [ + 60 + ], + "Found UPPERCASE variable name in declaration at line 60: CAPS_VAR": [ + 60 + ], + }, + ), + ([], 0, []), # No changes, expect no errors + ], + ids=["2 UpperCase Var Errors", "5 UpperCase Var Errors on extended lines", + "No UpperCase Var Errors"], +) +def test_r3_4_2_no_full_uppercase_variable_names( + example_fortran_lines, changes_list, expected_result, expected_errors +): + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_4_2_no_full_uppercase_variable_names(modified_fortran_lines) + failure_count = result.failure_count + assert failure_count == expected_result + errors = result.errors + assert len(errors) == len(expected_errors) + for error, lines_list in errors.items(): + assert error in expected_errors + assert len(lines_list) == len(expected_errors[error]) + for line_no in lines_list: + assert line_no in expected_errors[error] diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 2a9fb42d..be24019d 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -266,8 +266,47 @@ def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: errors=error_log, ) """ -* TODO: The rest of the code may be written in either lower-case with underscores or - CamelCase. +* The rest of the code may be written in either lower-case with underscores or + CamelCase. +""" +def r3_4_2_no_full_uppercase_variable_names(lines: List[str]) -> TestResult: + """Check for lowercase or CamelCase variable names only""" + """ +TODO: This is a very simplistic check and will not detect many + cases which break UMDP3. I suspect the Perl Predecessor concatenated + continuation lines prior to 'cleaning' and checking. Having identified + a declaration, it also then scanned the rest of the file for that + variable name in any case.""" + failures = 0 + error_log = {} + count = -1 + declaration_search = re.compile( + r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*", re.IGNORECASE + ) + for count, line in enumerate(lines, 1): + clean_line = remove_quoted(line) + clean_line = remove_comments(clean_line) + # Simple check for UPPERCASE variable declarations + if declaration_search.search(clean_line): + full_line = concatenate_lines(lines, count) + clean_line = full_line.split("::", 1)[1].strip() + clean_line = re.sub(r"\([^)]*\)", "", clean_line) + variables = [var.strip() for var in clean_line.split(",")] + for var in variables: + #var = var.split("(", 1)[0].strip() # Remove any array dimensions + if var.upper() == var: + failures += 1 + error_log = add_error_log( + error_log, f"Found UPPERCASE variable name in declaration at line {count}: {var}", count + ) + return TestResult( + checker_name="No Full Uppercase variable names", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count} lines, found {failures} UPPERCASE variables.", + errors=error_log, + ) +""" * TODO: To improve readability, you should always use the optional space to separate the Fortran keywords. This rule also applies to OpenMP keywords. (See: 3.15) From 02c663b347a5013be1b979bc8f28d9f516577591 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Fri, 24 Apr 2026 16:29:54 +0100 Subject: [PATCH 19/27] skinning the Latte. --- script_umdp3_checker/tests/example_fortran_code.F90 | 1 + 1 file changed, 1 insertion(+) diff --git a/script_umdp3_checker/tests/example_fortran_code.F90 b/script_umdp3_checker/tests/example_fortran_code.F90 index 52309dc7..9525ae5c 100644 --- a/script_umdp3_checker/tests/example_fortran_code.F90 +++ b/script_umdp3_checker/tests/example_fortran_code.F90 @@ -1,3 +1,4 @@ +! fortls: ignore file ! fortitude: ignore file ! *****************************COPYRIGHT******************************* ! (C) Crown copyright Met Office. All rights reserved. From 6c543ea50c4540b110564755996170aaaf858f4b Mon Sep 17 00:00:00 2001 From: r-sharp Date: Fri, 24 Apr 2026 17:16:01 +0100 Subject: [PATCH 20/27] Tidying up redundant tests --- .../checker_dispatch_tables.py | 6 - .../tests/test_fortran_checks.py | 100 ------------ script_umdp3_checker/tests/test_umdp3.py | 6 +- .../tests/test_umdp3_rules_S3.py | 76 ++++----- script_umdp3_checker/umdp3_checker_rules.py | 148 ------------------ script_umdp3_checker/umdp3_rules_S3.py | 5 +- 6 files changed, 44 insertions(+), 297 deletions(-) diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py index 77c11aab..38df2f07 100644 --- a/script_umdp3_checker/checker_dispatch_tables.py +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -28,17 +28,14 @@ def __init__(self): def get_diff_dispatch_table_fortran(self) -> list[Callable]: """Get dispatch table for Fortran diff tests""" return [ - self.umdp3_checker.capitalised_keywords, self.umdp3_checker.openmp_sentinels_in_column_one, self.umdp3_checker.unseparated_keywords, self.umdp3_checker.go_to_other_than_9999, self.umdp3_checker.write_using_default_format, - self.umdp3_checker.lowercase_variable_names, self.umdp3_checker.dimension_forbidden, self.umdp3_checker.ampersand_continuation, self.umdp3_checker.forbidden_keywords, self.umdp3_checker.forbidden_operators, - self.umdp3_checker.line_over_80chars, self.umdp3_checker.tab_detection, self.umdp3_checker.printstatus_mod, self.umdp3_checker.printstar, @@ -63,7 +60,6 @@ def get_file_dispatch_table_fortran( self.umdp3_checker.implicit_none, self.umdp3_checker.forbidden_stop, self.umdp3_checker.intrinsic_as_variable, - self.umdp3_checker.check_crown_copyright, self.umdp3_checker.check_code_owner, self.umdp3_checker.array_init_form, ] @@ -71,7 +67,6 @@ def get_file_dispatch_table_fortran( def get_diff_dispatch_table_c(self) -> list[Callable]: """Get dispatch table for C diff tests""" return [ - self.umdp3_checker.line_over_80chars, self.umdp3_checker.tab_detection, self.umdp3_checker.c_integral_format_specifiers, ] @@ -81,7 +76,6 @@ def get_file_dispatch_table_c(self) -> list[Callable]: return [ self.umdp3_checker.retire_if_def, self.umdp3_checker.c_deprecated, - self.umdp3_checker.check_crown_copyright, self.umdp3_checker.check_code_owner, self.umdp3_checker.c_openmp_define_pair_thread_utils, self.umdp3_checker.c_openmp_define_no_combine, diff --git a/script_umdp3_checker/tests/test_fortran_checks.py b/script_umdp3_checker/tests/test_fortran_checks.py index 92844f48..5c4e97c7 100644 --- a/script_umdp3_checker/tests/test_fortran_checks.py +++ b/script_umdp3_checker/tests/test_fortran_checks.py @@ -19,29 +19,6 @@ date. Especially as errors is (I think) only checking for the presence of a given string in the keys of the error log dict. """ -keyword_data = [ - ("IF THEN END", 0, {}, "All UPPERCASE keywords"), - ("if then end", 3, {"lowercase keyword: if"}, "All lowercase keywords"), - ("If Then End", 3, {"lowercase keyword: If"}, "All mixed case keywords"), - ("foo bar baz", 0, {}, "No keywords"), - ("! if then end", 0, {}, "Commented keywords"), -] -keyword_test_parameters = [data[:3] for data in keyword_data] -keyword_test_ids = [data[3] for data in keyword_data] - - -@pytest.mark.parametrize( - "lines, expected_result, expected_errors", - keyword_test_parameters, - ids=keyword_test_ids, -) -def test_keywords(lines, expected_result, expected_errors): - checker = UMDP3Checker() - result = checker.capitalised_keywords([lines]) - assert result.failure_count == expected_result - for error in expected_errors: - assert error in result.errors - fake_code_block = ["PROGRAM test", "IMPLICIT NONE", "INTEGER :: i", "END PROGRAM"] implicit_none_paramters = [ @@ -161,39 +138,6 @@ def test_write_using_default_format(lines, expected_result): assert result.failure_count == expected_result -test_lowercase_variable_names_parameters = [ - (["INTEGER :: lowercase_variable"], 0, "Lowercase variable name"), - (["REAL :: Lowercase_Variable"], 0, "Pascal case variable name"), - (["CHARACTER :: LOWERCASE_VARIABLE"], 1, "Uppercase variable name"), - ( - [" REAL :: lowercase_variable"], - 0, - "Lowercase variable name with leading whitespace", - ), - ( - [" CHARACTER :: Lowercase_Variable"], - 0, - "Pascal case variable name with leading whitespace", - ), - ( - [" INTEGER :: LOWERCASE_VARIABLE"], - 1, - "Uppercase variable name with leading whitespace", - ), -] - - -@pytest.mark.parametrize( - "lines, expected_result", - [data[:2] for data in test_lowercase_variable_names_parameters], - ids=[data[2] for data in test_lowercase_variable_names_parameters], -) -def test_lowercase_variable_names(lines, expected_result): - checker = UMDP3Checker() - result = checker.lowercase_variable_names(lines) - assert result.failure_count == expected_result - - test_dimension_forbidden_parameters = [ (["REAL :: array(ARR_LEN)"], 0, "Dimension specified in variable declaration"), (["REAL :: array"], 0, "No dimension specified in variable declaration"), @@ -205,7 +149,6 @@ def test_lowercase_variable_names(lines, expected_result): ), ] - @pytest.mark.parametrize( "lines, expected_result", [data[:2] for data in test_dimension_forbidden_parameters], @@ -289,30 +232,6 @@ def test_forbidden_operators(lines, expected_result): result = checker.forbidden_operators(lines) assert result.failure_count == expected_result - -test_line_over_80chars_parameters = [ - ( - [ - " PRINT *, 'This line is definitely way over the eighty character limit set by the UM coding standards'" - ], - 1, - "Line over 80 characters", - ), - ([" PRINT *, 'This line is within the limit'"], 0, "Line within 80 characters"), -] - - -@pytest.mark.parametrize( - "lines, expected_result", - [data[:2] for data in test_line_over_80chars_parameters], - ids=[data[2] for data in test_line_over_80chars_parameters], -) -def test_line_over_80chars(lines, expected_result): - checker = UMDP3Checker() - result = checker.line_over_80chars(lines) - assert result.failure_count == expected_result - - test_tab_detection_parameters = [ ([" PRINT *, 'This line has no tabs'"], 0, "No tabs"), ([" PRINT *, 'This line has a tab\tcharacter'"], 1, "Line with tab character"), @@ -634,25 +553,6 @@ def test_intrinsic_as_variable(lines, expected_result): assert result.failure_count == expected_result -test_check_crown_copyright_parameters = [ - (["! Crown copyright 2024"], 0, "Correct crown copyright statement"), - (["! Copyright 2024"], 0, "A copyright statement"), - (["! This is a comment"], 1, "No crown copyright statement"), - (["! This is a Crown"], 1, "No crown copyright statement"), -] - - -@pytest.mark.parametrize( - "lines, expected_result", - [data[:2] for data in test_check_crown_copyright_parameters], - ids=[data[2] for data in test_check_crown_copyright_parameters], -) -def test_check_crown_copyright(lines, expected_result): - checker = UMDP3Checker() - result = checker.check_crown_copyright(lines) - assert result.failure_count == expected_result - - test_check_code_owner_parameters = [ (["! Code Owner: John Doe"], 0, "code owner statement"), (["! Code Owner : John Doe"], 0, "Another code owner statement"), diff --git a/script_umdp3_checker/tests/test_umdp3.py b/script_umdp3_checker/tests/test_umdp3.py index 4cb99faf..dfa05d82 100644 --- a/script_umdp3_checker/tests/test_umdp3.py +++ b/script_umdp3_checker/tests/test_umdp3.py @@ -33,10 +33,10 @@ def test_basic_functionality(): # Test line length check test_lines = [ - "This is a short line", - "This is a very long line that exceeds eighty characters and should trigger a failure in the line length test", + "This is a line with no trailing space", + "This is line with a trailing space ", ] - result = umdp3_checker.line_over_80chars(test_lines) + result = umdp3_checker.line_trail_whitespace(test_lines) print( f"Line length test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" ) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index e993a632..eb6f602a 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -17,7 +17,7 @@ concatenate_lines, r3_1_1_there_can_be_only_one, r3_2_1_check_crown_copyright, - r3_3_2line_too_long, + r3_3_2_line_too_long, r3_4_1_capitalised_keywords, r3_4_2_no_full_uppercase_variable_names, ) @@ -71,20 +71,20 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: def test_modify_fortran_lines(example_fortran_lines): """TODO: Does this need to be more rigorous ?""" changes_list = [ - ["replace", 10, ["This is a replacement line."]], - ["delete", 20, None], - ["add", 30, ["This is an added line."]], + ["replace", 12, ["This is a replacement line."]], + ["delete", 22, None], + ["add", 32, ["This is an added line."]], ] modified_lines = modify_fortran_lines(example_fortran_lines, changes_list) # Line no.s below have to be carefully calculated. Basic is line no of change -1 but then add one for every line added aobve in the file and -1 for every line deleted above in the file. # for line_no, line in enumerate(modified_lines, 1): # print(f"line [{line_no:04}]: {line}") - assert modified_lines[9] == "This is a replacement line." + assert modified_lines[11] == "This is a replacement line." assert len(modified_lines) == len( example_fortran_lines ) # One line deleted, One added - # Added @ 30, so 29, but one line deleted above, so 28 - seeemples - assert modified_lines[28] == "This is an added line." + # Added @ 32, so 31, but one line deleted above, so 30 - seeemples + assert modified_lines[30] == "This is an added line." def test_remove_quoted(example_fortran_lines): @@ -155,7 +155,7 @@ def test_concatenate_lines(example_fortran_lines): ([], True, 0, {}), # Invalid: No program unit found (delete MODULE line) ( - [["replace", 10, ["! No module declaration here"]]], + [["replace", 12, ["! No module declaration here"]]], False, 1, { @@ -165,7 +165,7 @@ def test_concatenate_lines(example_fortran_lines): ), # Invalid: Mismatched END statement ( - [["replace", 118, ["END MODULE wrong_mod_name"]]], + [["replace", 120, ["END MODULE wrong_mod_name"]]], False, 1, { @@ -176,7 +176,7 @@ def test_concatenate_lines(example_fortran_lines): ), # Invalid: No END statement found ( - [["replace", 118, ["! Missing END MODULE"]]], + [["replace", 120, ["! Missing END MODULE"]]], False, 1, { @@ -221,11 +221,11 @@ def test_r3_1_1_there_can_be_only_one( [ ( [ - ["delete", 1, None], - ["delete", 2, None], ["delete", 3, None], ["delete", 4, None], ["delete", 5, None], + ["delete", 6, None], + ["delete", 7, None], ], 1, {"missing copyright or crown copyright statement": [0]}, @@ -260,7 +260,7 @@ def test_r3_2_1_check_crown_copyright( [ [ "replace", - 71, + 73, [ ' = "This is a very very very very very very very "' + " &" @@ -268,27 +268,27 @@ def test_r3_2_1_check_crown_copyright( ], [ "replace", - 115, + 117, [ "IF (lhook) CALL dr_hook(ModuleName// " + '":" // RoutineName, zhook_out, zhook_handle) ! extra comment' ], ], - ["replace", 40, ["use yomhook, ONLY: lhook, dr_hook"]], + ["replace", 42, ["use yomhook, ONLY: lhook, dr_hook"]], ], 2, - {"line too long": [71, 115]}, + {"line too long": [73, 117]}, ), ([], 0, {}), # No changes, expect no errors ], ids=["3 line too long Errors", "No line too long Errors"], ) -def test_r3_3_2line_too_long( +def test_r3_3_2_line_too_long( example_fortran_lines, changes_list, expected_result, expected_errors ): # checker = UMDP3Checker() modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) - result = r3_3_2line_too_long(modified_fortran_lines) + result = r3_3_2_line_too_long(modified_fortran_lines) failure_count = result.failure_count assert failure_count == expected_result errors = result.errors @@ -308,12 +308,12 @@ def test_r3_3_2line_too_long( [ ( [ - ["replace", 10, ["Module example_mod"]], - ["replace", 37, ["use parkind1, ONLY: jpim, jprb"]], - ["replace", 40, ["use yomhook, ONLY: lhook, dr_hook"]], + ["replace", 12, ["Module example_mod"]], + ["replace", 39, ["use parkind1, ONLY: jpim, jprb"]], + ["replace", 42, ["use yomhook, ONLY: lhook, dr_hook"]], ], 3, - {"lowercase keyword: Module": [10], "lowercase keyword: use": [37, 40]}, + {"lowercase keyword: Module": [12], "lowercase keyword: use": [39, 42]}, ), ([], 0, []), # No changes, expect no errors ], @@ -345,22 +345,22 @@ def test_r3_4_1_capitalised_keywords( [ [ "replace", - 43, + 45, ["INTEGER, INTENT(IN) :: XLEN !Length of first dim of the arrays."], ], - ["replace", 58, ["REAL :: var1, DAVE_2, HiPPo"]], + ["replace", 60, ["REAL :: var1, DAVE_2, HiPPo"]], ], 2, { - "Found UPPERCASE variable name in declaration at line 43: XLEN": [43], - "Found UPPERCASE variable name in declaration at line 58: DAVE_2": [58], + "Found UPPERCASE variable name in declaration at line 45: XLEN": [45], + "Found UPPERCASE variable name in declaration at line 60: DAVE_2": [60], }, ), ( [ [ "add", - 58, + 60, [ "REAL :: VARIaBLE_1, variable_2, &", " VARIABLE_3, Hot_Potato, Baked Potato &", @@ -370,7 +370,7 @@ def test_r3_4_1_capitalised_keywords( ], [ "replace", - 54, + 56, [ "INTEGER :: j ! Loop counter &", "INTEGER :: k ! Loop counter &", @@ -379,7 +379,7 @@ def test_r3_4_1_capitalised_keywords( ], [ "replace", - 43, + 45, [ "INTEGER, INTENT(IN) :: XLEN !Length of first dimension of the" + " arrays." @@ -388,18 +388,18 @@ def test_r3_4_1_capitalised_keywords( ], 5, { - "Found UPPERCASE variable name in declaration at line 43: XLEN": [ - 43 + "Found UPPERCASE variable name in declaration at line 45: XLEN": [ + 45 ], - "Found UPPERCASE variable name in declaration at line 56: IJ": [56], - "Found UPPERCASE variable name in declaration at line 60: CASPVAR": [ - 60 + "Found UPPERCASE variable name in declaration at line 58: IJ": [58], + "Found UPPERCASE variable name in declaration at line 62: CASPVAR": [ + 62 ], - "Found UPPERCASE variable name in declaration at line 60: VARIABLE_3": [ - 60 + "Found UPPERCASE variable name in declaration at line 62: VARIABLE_3": [ + 62 ], - "Found UPPERCASE variable name in declaration at line 60: CAPS_VAR": [ - 60 + "Found UPPERCASE variable name in declaration at line 62: CAPS_VAR": [ + 62 ], }, ), diff --git a/script_umdp3_checker/umdp3_checker_rules.py b/script_umdp3_checker/umdp3_checker_rules.py index 66f76de8..39f33bcb 100644 --- a/script_umdp3_checker/umdp3_checker_rules.py +++ b/script_umdp3_checker/umdp3_checker_rules.py @@ -11,7 +11,6 @@ import re from typing import List, Dict -from fortran_keywords import fortran_keywords from search_lists import ( obsolescent_intrinsics, unseparated_keywords_list, @@ -103,67 +102,6 @@ def remove_quoted(self, line: str) -> str: The list of lines is often assumed to be the whole file. """ - def capitulated_keywords(self, lines: List[str]) -> TestResult: - """A fake test, put in for testing purposes. - Probably not needed any more, but left in case.""" - failures = 0 - line_count = 0 - error_log = {} - # print("Debug: In capitulated_keywords test") - for line in lines: - line_count += 1 - # Remove quoted strings and comments - if line.lstrip(" ").startswith("!"): - continue - clean_line = self.remove_quoted(line) - clean_line = self.comment_line.sub("", clean_line) - - # Check for lowercase keywords - for word in self.word_splitter.findall(clean_line): - upcase = word.upper() - if upcase in fortran_keywords and word != upcase: - error_log = self.add_error_log( - error_log, f"capitulated keyword: {word}", line_count - ) - failures += 1 - - return TestResult( - checker_name="Capitulated Keywords", - failure_count=failures, - passed=(failures == 0), - output=f"Checked {line_count} lines, found {failures} failures.", - errors=error_log, - ) - - def capitalised_keywords(self, lines: List[str]) -> TestResult: - """Check for the presence of lowercase Fortran keywords, which are - taken from an imported list 'fortran_keywords'.""" - failures = 0 - error_log = {} - count = -1 - for count, line in enumerate(lines): - # Remove quoted strings and comments - if line.lstrip(" ").startswith("!"): - continue - clean_line = self.remove_quoted(line) - clean_line = self.comment_line.sub("", clean_line) - # Check for lowercase keywords - for word in self.word_splitter.findall(clean_line): - upcase = word.upper() - if upcase in fortran_keywords and word != upcase: - failures += 1 - error_log = self.add_error_log( - error_log, f"lowercase keyword: {word}", count + 1 - ) - - return TestResult( - checker_name="Capitalised Keywords", - failure_count=failures, - passed=(failures == 0), - output=f"Checked {count + 1} lines, found {failures} failures.", - errors=error_log, - ) - def openmp_sentinels_in_column_one(self, lines: List[str]) -> TestResult: """Check OpenMP sentinels are in column one""" failures = 0 @@ -256,47 +194,6 @@ def write_using_default_format(self, lines: List[str]) -> TestResult: errors=error_log, ) - def lowercase_variable_names(self, lines: List[str]) -> TestResult: - """Check for lowercase or CamelCase variable names only""" - """ - TODO: This is a very simplistic check and will not detect many - cases which break UMDP3. I suspect the Perl Predecessor concatenated - continuation lines prior to 'cleaning' and checking. Having identified - a declaration, it also then scanned the rest of the file for that - variable name in any case.""" - failures = 0 - error_log = {} - count = -1 - for count, line in enumerate(lines, 1): - clean_line = self.remove_quoted(line) - clean_line = re.sub(r"!.*$", "", clean_line) - - # Simple check for UPPERCASE variable declarations - if re.search( - r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*[A-Z_]+", - clean_line, - re.IGNORECASE, - ): - clean_line = re.sub( - r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*", - "", - clean_line, - ) - if match := re.search(r"([A-Z]{2,})", clean_line): - failures += 1 - error_log = self.add_error_log( - error_log, f"UPPERCASE variable name {match[1]}", count - ) - - output = f"Checked {count + 1} lines, found {failures} failures." - return TestResult( - checker_name="Lowercase or CamelCase variable names only", - failure_count=failures, - passed=(failures == 0), - output=output, - errors=error_log, - ) - def dimension_forbidden(self, lines: List[str]) -> TestResult: """Check for use of dimension attribute""" failures = 0 @@ -393,24 +290,6 @@ def forbidden_operators(self, lines: List[str]) -> TestResult: errors=error_log, ) - def line_over_80chars(self, lines: List[str]) -> TestResult: - """Check for lines longer than 80 characters""" - failures = 0 - error_log = {} - count = -1 - for count, line in enumerate(lines): - if len(line.rstrip()) > 80: - failures += 1 - error_log = self.add_error_log(error_log, "line too long", count + 1) - - return TestResult( - checker_name="Line longer than 80 characters", - failure_count=failures, - passed=(failures == 0), - output=f"Checked {count + 1} lines, found {failures} failures.", - errors=error_log, - ) - def tab_detection(self, lines: List[str]) -> TestResult: """Check for tab characters""" failures = 0 @@ -809,33 +688,6 @@ def intrinsic_as_variable(self, lines: List[str]) -> TestResult: errors=error_log, ) - def check_crown_copyright(self, lines: List[str]) -> TestResult: - """Check for crown copyright statement""" - """ - TODO: This is a very simplistic check and will not detect many - cases which break UMDP3. I suspect the Perl Predeccessor - did much more convoluted tests""" - comment_lines = [ - line.upper() for line in lines if line.lstrip(" ").startswith("!") - ] - file_content = "\n".join(comment_lines) - error_log = {} - found_copyright = False - if "CROWN COPYRIGHT" in file_content or "COPYRIGHT" in file_content: - found_copyright = True - - if not found_copyright: - error_log = self.add_error_log( - error_log, "missing copyright or crown copyright statement", 0 - ) - return TestResult( - checker_name="Crown Copyright Statement", - failure_count=0 if found_copyright else 1, - passed=found_copyright, - output="Checked for crown copyright statement.", - errors=error_log, - ) - def check_code_owner(self, lines: List[str]) -> TestResult: """Check for correct code owner comment""" """ diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index be24019d..a180c4d9 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -213,7 +213,7 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: Note that CreateBC uses a limit of 100 columns, due to the nature of the object-orientated code. """ -def r3_3_2line_too_long(lines: List[str]) -> TestResult: +def r3_3_2_line_too_long(lines: List[str]) -> TestResult: """Check for lines longer than 80 characters""" failures = 0 error_log = {} @@ -386,6 +386,7 @@ def r3_4_2_no_full_uppercase_variable_names(lines: List[str]) -> TestResult: list_O_tests = [ r3_1_1_there_can_be_only_one, r3_2_1_check_crown_copyright, - r3_3_2line_too_long, + r3_3_2_line_too_long, r3_4_1_capitalised_keywords, + r3_4_2_no_full_uppercase_variable_names, ] From f323196e5b970c18cd40616a0c3a0e19b07f911b Mon Sep 17 00:00:00 2001 From: r-sharp Date: Mon, 27 Apr 2026 15:07:06 +0100 Subject: [PATCH 21/27] A mild tidy, and a deliberate test for l_some_logical = .FALSE. as highlighted in #214 --- .../tests/test_umdp3_rules_S3.py | 26 ++++++++++++------- script_umdp3_checker/umdp3_rules_S3.py | 2 +- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index eb6f602a..a2f98294 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -31,14 +31,11 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: - ``[, , []]`` - where : = "replace" would replace line N with the new line(s) - = delete" would remove line N - - and ="add" would insert the new line(s) before line N. + - and ="add" would insert the new line(s) after line N. Operations are applied in descending line order so that earlier line numbers are not shifted by later mutations. """ - """ TODO: Currently is a string, but should become a list of strings, - allowing multiple lines to be used in addition and as replacements. Although, - this may make keeping track of line numbers more complex.""" lines = lines_in.copy() for change in sorted(changes, key=lambda o: o[1], reverse=True): idx = int(change[1]) - 1 @@ -46,7 +43,7 @@ def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: del lines[idx] print(f"Line {idx} : deleting line.") for new_line in change[2]: - print(f'Line {idx} : adding "{new_line}"') + print(f'Line {idx} : replacing with "{new_line}"') lines.insert(idx, new_line) idx += 1 elif change[0] == "delete": @@ -230,7 +227,7 @@ def test_r3_1_1_there_can_be_only_one( 1, {"missing copyright or crown copyright statement": [0]}, ), - ([], 0, []), # No changes, expect no errors + ([], 0, {}), # No changes, expect no errors ], ids=["Missing copyright statement", "copyright statement present"], ) @@ -315,7 +312,7 @@ def test_r3_3_2_line_too_long( 3, {"lowercase keyword: Module": [12], "lowercase keyword: use": [39, 42]}, ), - ([], 0, []), # No changes, expect no errors + ([], 0, {}), # No changes, expect no errors ], ids=["3 Lowercase Errors", "No Lowercase Errors"], ) @@ -356,6 +353,17 @@ def test_r3_4_1_capitalised_keywords( "Found UPPERCASE variable name in declaration at line 60: DAVE_2": [60], }, ), + ( + [ + [ + "add", + 58, + ["LOGICAL :: l_whizz_bang = .FALSE. ! optimisation flag"], + ], + ], + 0, + {}, + ), ( [ [ @@ -403,9 +411,9 @@ def test_r3_4_1_capitalised_keywords( ], }, ), - ([], 0, []), # No changes, expect no errors + ([], 0, {}), # No changes, expect no errors ], - ids=["2 UpperCase Var Errors", "5 UpperCase Var Errors on extended lines", + ids=["2 UpperCase Var Errors", "False FALSE error", "5 UpperCase Var Errors on extended lines", "No UpperCase Var Errors"], ) def test_r3_4_2_no_full_uppercase_variable_names( diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index a180c4d9..8524d535 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -293,7 +293,7 @@ def r3_4_2_no_full_uppercase_variable_names(lines: List[str]) -> TestResult: clean_line = re.sub(r"\([^)]*\)", "", clean_line) variables = [var.strip() for var in clean_line.split(",")] for var in variables: - #var = var.split("(", 1)[0].strip() # Remove any array dimensions + var = var.split(r"=", 1)[0].strip() # Remove any assignment part if var.upper() == var: failures += 1 error_log = add_error_log( From 5afa82754661ff8c9cb186e1c043d744436f1d50 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Mon, 27 Apr 2026 15:16:52 +0100 Subject: [PATCH 22/27] It is well documented that I hate ruff, but when I find out who keeps changing it so that lines which previously passed now fail, they're going to get a jolly hard stare.... --- .../checker_dispatch_tables.py | 6 +- script_umdp3_checker/tests/conftest.py | 1 + .../tests/test_fortran_checks.py | 2 + script_umdp3_checker/tests/test_umdp3.py | 2 +- .../tests/test_umdp3_rules_S3.py | 14 ++-- script_umdp3_checker/umdp3_checker_rules.py | 19 +++-- script_umdp3_checker/umdp3_conformance.py | 18 +++-- script_umdp3_checker/umdp3_rules_S3.py | 78 +++++++++++++++---- 8 files changed, 100 insertions(+), 40 deletions(-) diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py index 38df2f07..9c907043 100644 --- a/script_umdp3_checker/checker_dispatch_tables.py +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -19,6 +19,8 @@ class should eventually get emptied out and the file removed. """ + + class CheckerDispatchTables: """Class containing dispatch tables for UMDP3 tests""" @@ -51,9 +53,7 @@ def get_diff_dispatch_table_fortran(self) -> list[Callable]: self.umdp3_checker.read_unit_args, ] - def get_file_dispatch_table_fortran( - self, filename: str = "" - ) -> list[Callable]: + def get_file_dispatch_table_fortran(self, filename: str = "") -> list[Callable]: """Get dispatch table for Fortran file tests""" return [ self.umdp3_checker.retire_if_def, diff --git a/script_umdp3_checker/tests/conftest.py b/script_umdp3_checker/tests/conftest.py index 9bb0cb02..ff03af93 100644 --- a/script_umdp3_checker/tests/conftest.py +++ b/script_umdp3_checker/tests/conftest.py @@ -1,6 +1,7 @@ from pathlib import Path import pytest + @pytest.fixture(scope="session") def example_fortran_lines() -> list[str]: """Return the example Fortran source as a list of lines for tests.""" diff --git a/script_umdp3_checker/tests/test_fortran_checks.py b/script_umdp3_checker/tests/test_fortran_checks.py index c4644336..0700bdd7 100644 --- a/script_umdp3_checker/tests/test_fortran_checks.py +++ b/script_umdp3_checker/tests/test_fortran_checks.py @@ -155,6 +155,7 @@ def test_write_using_default_format(lines, expected_result): ), ] + @pytest.mark.parametrize( "lines, expected_result", [data[:2] for data in test_dimension_forbidden_parameters], @@ -238,6 +239,7 @@ def test_forbidden_operators(lines, expected_result): result = checker.forbidden_operators(lines) assert result.failure_count == expected_result + test_tab_detection_parameters = [ ([" PRINT *, 'This line has no tabs'"], 0, "No tabs"), ([" PRINT *, 'This line has a tab\tcharacter'"], 1, "Line with tab character"), diff --git a/script_umdp3_checker/tests/test_umdp3.py b/script_umdp3_checker/tests/test_umdp3.py index dfa05d82..bec81679 100644 --- a/script_umdp3_checker/tests/test_umdp3.py +++ b/script_umdp3_checker/tests/test_umdp3.py @@ -20,7 +20,7 @@ from checker_dispatch_tables import CheckerDispatchTables # Prevent pytest from trying to collect TestResult as more tests: -TestResult.__test__ = False # type: ignore +TestResult.__test__ = False # type: ignore def test_basic_functionality(): diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index a2f98294..256ff927 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -250,6 +250,7 @@ def test_r3_2_1_check_crown_copyright( # ================================================================= + @pytest.mark.parametrize( "changes_list, expected_result, expected_errors", [ @@ -334,6 +335,7 @@ def test_r3_4_1_capitalised_keywords( # ================================================================= + # See if you can spot why I hate ruff - it highlights my awful data structures really well. @pytest.mark.parametrize( "changes_list, expected_result, expected_errors", @@ -396,9 +398,7 @@ def test_r3_4_1_capitalised_keywords( ], 5, { - "Found UPPERCASE variable name in declaration at line 45: XLEN": [ - 45 - ], + "Found UPPERCASE variable name in declaration at line 45: XLEN": [45], "Found UPPERCASE variable name in declaration at line 58: IJ": [58], "Found UPPERCASE variable name in declaration at line 62: CASPVAR": [ 62 @@ -413,8 +413,12 @@ def test_r3_4_1_capitalised_keywords( ), ([], 0, {}), # No changes, expect no errors ], - ids=["2 UpperCase Var Errors", "False FALSE error", "5 UpperCase Var Errors on extended lines", - "No UpperCase Var Errors"], + ids=[ + "2 UpperCase Var Errors", + "False FALSE error", + "5 UpperCase Var Errors on extended lines", + "No UpperCase Var Errors", + ], ) def test_r3_4_2_no_full_uppercase_variable_names( example_fortran_lines, changes_list, expected_result, expected_errors diff --git a/script_umdp3_checker/umdp3_checker_rules.py b/script_umdp3_checker/umdp3_checker_rules.py index 39f33bcb..ea469565 100644 --- a/script_umdp3_checker/umdp3_checker_rules.py +++ b/script_umdp3_checker/umdp3_checker_rules.py @@ -35,9 +35,12 @@ The new names starte with r2_x_y, where x and y refer to the subsections of the UMDP3 standards document that they are checking conformance to. """ + + @dataclass class TestResult: """Result from running a single style checker test on a file.""" + checker_name: str = "Unnamed Checker" failure_count: int = 0 passed: bool = False @@ -57,13 +60,13 @@ class UMDP3Checker: def __init__(self): """ - TODO: The Perl version had a dodgy looking subroutine to calculate - this, but I can't find where it was called from within the files in - 'bin'. It used all args as a 'list' - searched them for '#include' and - then returned the count as well as adding 1 to this global var if any - were found. - This is either redundant and needs removing, or needs implementing - properly.""" + TODO: The Perl version had a dodgy looking subroutine to calculate + this, but I can't find where it was called from within the files in + 'bin'. It used all args as a 'list' - searched them for '#include' and + then returned the count as well as adding 1 to this global var if any + were found. + This is either redundant and needs removing, or needs implementing + properly.""" self._number_of_files_with_variable_declarations_in_includes = 0 def add_error_log( @@ -85,7 +88,7 @@ def get_include_number(self) -> int: def remove_quoted(self, line: str) -> str: """Remove quoted strings from a line""" -# # Simple implementation - remove single and double quoted strings + # # Simple implementation - remove single and double quoted strings result = line # Remove double quoted strings diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py index 5122b46a..33b55187 100644 --- a/script_umdp3_checker/umdp3_conformance.py +++ b/script_umdp3_checker/umdp3_conformance.py @@ -158,7 +158,7 @@ def report(self, print_volume: int = 3) -> None: """Print summary of results for this checker.""" if print_volume >= 3: print( - f"Checker \"{self.name}\" checking {len(self.files_to_check)} files " + f'Checker "{self.name}" checking {len(self.files_to_check)} files ' f"with {len(self.check_functions)} checks." ) @@ -277,6 +277,7 @@ def check(self, file_path: Path) -> CheckResult: class ConformanceChecker: """Main framework for running style checks in parallel.""" + checkers: List[StyleChecker] max_workers: int results: List[CheckResult] @@ -443,14 +444,18 @@ def line_2(length: int = 80) -> str: """Helper function to print a line for separating output sections.""" return "-" * length + def print_in_box_a(text: list[str], width: int = 80) -> None: """Helper function to print text in a box.""" print("+" + "-" * (width - 2) + "+") for line in text: print("| " + line.ljust(width - 4) + " |") - print("+" + "-" * (width - 2) + "+" ) + print("+" + "-" * (width - 2) + "+") -def print_in_box_b(text: list[str], width: int = 80, justification: str = "left") -> None: + +def print_in_box_b( + text: list[str], width: int = 80, justification: str = "left" +) -> None: """Another Helper function to print text in a box.""" print(line_1(width)) for line in text: @@ -469,6 +474,7 @@ def print_in_box_b(text: list[str], width: int = 80, justification: str = "left" print("## " + " " * left_padding + line + " " * right_padding + " ##") print(line_1(width) + "\n") + def which_cms_is_it(path: str, print_volume: int = 3) -> CMSSystem: """Determine which CMS is in use based on the presence of certain files.""" repo_path = Path(path) @@ -663,9 +669,9 @@ def get_files_to_check( output = [ "Summary :", f" Total files checked: {len(checker.results)}", - " " + - f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}", - ] + " " + + f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}", + ] print_in_box_a(output, width=81) exit(0 if all_passed else 1) diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py index 8524d535..b7f03dfa 100644 --- a/script_umdp3_checker/umdp3_rules_S3.py +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -4,12 +4,14 @@ # under which the code may be used. # ----------------------------------------------------------------------------- -""" This is the storage location for rules based on what's defined as the standard in the UMDP: 003 document. These rules are not necessarily the same as the rules defined in the UMDP: 003 document, but they are based on them. The rules in this file are used by the umdp3_checker script to check for compliance with the UMDP: 003 standard. +"""This is the storage location for rules based on what's defined as the standard in the UMDP: 003 document. These rules are not necessarily the same as the rules defined in the UMDP: 003 document, but they are based on them. The rules in this file are used by the umdp3_checker script to check for compliance with the UMDP: 003 standard. For now, the document has been copied into this file as a placeholder for the rules as they appear.""" + import re from typing import List, Dict from umdp3_checker_rules import TestResult from fortran_keywords import fortran_keywords + """ToDo: This lot will need putting back as and when the tests that use them get imported/re-created""" # from fortran_keywords import fortran_keywords # from search_lists import ( @@ -24,9 +26,8 @@ comment_line = re.compile(r"!.*$") word_splitter = re.compile(r"\b\w+\b") -def add_error_log( - error_log: Dict, key: str = "no key", value: int = 0 -) -> Dict: + +def add_error_log(error_log: Dict, key: str = "no key", value: int = 0) -> Dict: """Add error information to the dictionary. This adds the error statement, some are more unique than others, along with the line number the error occurs on. Line number(s) are stored in a list in case the @@ -36,6 +37,7 @@ def add_error_log( error_log[key].append(value) return error_log + def remove_quoted(line: str) -> str: """Remove quoted strings from a line""" """ @@ -53,24 +55,32 @@ def remove_quoted(line: str) -> str: return result + """This is in case I return to the idea of replacing the quoted sections from above with something unique and storing them in order to put them back at some stage...""" + + def create_unique_random_string(storage_set, length: int = 7) -> str: """Create a unique random string of a given length. Creates a random string of given length and ensures it hasn't been used before.""" import random import string + new_value = "".join(random.choices(string.ascii_letters + string.digits, k=length)) while new_value in storage_set: - new_value = "".join(random.choices(string.ascii_letters + string.digits, k=length)) + new_value = "".join( + random.choices(string.ascii_letters + string.digits, k=length) + ) storage_set.add(new_value) return new_value + def remove_comments(line: str) -> str: """Remove comments from the lines : There is a bit of an assumption here that quoted text has already been removed, so that we don't accidentally remove text after an "!" in a string.""" return comment_line.sub("", line).rstrip() + def concatenate_lines(lines: List[str], line_no: int) -> str: """Concatenate the continuation lines into a single string""" # Find first line and check for continuation character. @@ -85,17 +95,25 @@ def concatenate_lines(lines: List[str], line_no: int) -> str: next_line = lines[line_no - 1] next_line = remove_comments(next_line) next_line = remove_quoted(next_line) - line += next_line.lstrip() # Concatenate with the next line, removing leading whitespace + line += ( + next_line.lstrip() + ) # Concatenate with the next line, removing leading whitespace return line + """ 3.1 Source files should only contain a single program unit """ -def r3_1_1_there_can_be_only_one(lines: List[str]) -> TestResult: # ..one programming unit in a file, that is. + + +def r3_1_1_there_can_be_only_one( + lines: List[str], +) -> TestResult: # ..one programming unit in a file, that is. """Check for multiple program units in a file: Given that a MODULE or a PROGRAM can contain multiples of the other program units, just going to confirm the first one decalred is the last one ENDed.""" program_unit_keywords = {"PROGRAM", "MODULE", "SUBROUTINE", "FUNCTION"} + # unsure if "TYPE" should be included. def find_first(lines: List[str]) -> tuple[bool, str]: for line in lines: @@ -108,10 +126,13 @@ def find_first(lines: List[str]) -> tuple[bool, str]: unit_type = keyword unit_name = unit_name_search.group(1) return (True, f"{unit_type} {unit_name}") - return (False, - "First executable line doesn't define an accepted programming unit" - f" : {line}") + return ( + False, + "First executable line doesn't define an accepted programming unit" + f" : {line}", + ) return (False, "No program unit found.") + def find_last(lines: List[str], unit_type: str, unit_name: str) -> tuple[bool, str]: for line in reversed(lines): executable_line = remove_comments(line).strip() @@ -122,11 +143,18 @@ def find_last(lines: List[str], unit_type: str, unit_name: str) -> tuple[bool, s if unit_name_search.group(1) == unit_name: return (True, "") else: - return (False, "END statement found for a different program unit: " - f"should be {unit_name} got {unit_name_search.group(1)}.") + return ( + False, + "END statement found for a different program unit: " + f"should be {unit_name} got {unit_name_search.group(1)}.", + ) else: - return (False, "Last executable line not a matching END statement for the first program unit found.") + return ( + False, + "Last executable line not a matching END statement for the first program unit found.", + ) return (False, "No matching END statement found for the first program unit.") + found_first, first_result = find_first(lines) if not found_first: failure_count = 1 @@ -148,6 +176,8 @@ def find_last(lines: List[str], unit_type: str, unit_name: str) -> tuple[bool, s output="Checked for multiple program units.", errors=error_log, ) + + """ * TODO: Modules may be used to group related variables, subroutines and functions. - Really unsure as to how to test that's what they're doing... @@ -163,6 +193,8 @@ def find_last(lines: List[str], unit_type: str, unit_name: str) -> tuple[bool, s """3.2 Headers * All programming units require a suitable copyright header.""" + + def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: """Check for crown copyright statement""" """ @@ -173,9 +205,7 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: I suspect the Perl Predeccessor did much more convoluted tests""" - comment_lines = [ - line.upper() for line in lines if line.lstrip(" ").startswith("!") - ] + comment_lines = [line.upper() for line in lines if line.lstrip(" ").startswith("!")] file_content = "\n".join(comment_lines) error_log = {} found_copyright = False @@ -194,6 +224,7 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: errors=error_log, ) + """ * TODO: Headers are an immensely important part of any code as they document what it does, and how it does it. * TODO: The description of the MODULE and its contained SUBROUTINE may be the same and thus it need not berepeated in the latter. If a MODULE contains more than one subroutine then further descriptions are required. @@ -213,6 +244,8 @@ def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: Note that CreateBC uses a limit of 100 columns, due to the nature of the object-orientated code. """ + + def r3_3_2_line_too_long(lines: List[str]) -> TestResult: """Check for lines longer than 80 characters""" failures = 0 @@ -231,12 +264,15 @@ def r3_3_2_line_too_long(lines: List[str]) -> TestResult: errors=error_log, ) + """ * TODO: Never put more than one statement on a line. * TODO: Write your program in UK English, unless you have a very good reason for not doing so.""" """3.4 Fortran style * To improve readability, write your code using the ALL CAPS Fortran keywords approach.""" + + def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: """Check for the presence of lowercase Fortran keywords, which are taken from an imported list 'fortran_keywords'.""" @@ -265,10 +301,14 @@ def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: output=f"Checked {count + 1} lines, found {failures} failures.", errors=error_log, ) + + """ * The rest of the code may be written in either lower-case with underscores or CamelCase. """ + + def r3_4_2_no_full_uppercase_variable_names(lines: List[str]) -> TestResult: """Check for lowercase or CamelCase variable names only""" """ @@ -297,7 +337,9 @@ def r3_4_2_no_full_uppercase_variable_names(lines: List[str]) -> TestResult: if var.upper() == var: failures += 1 error_log = add_error_log( - error_log, f"Found UPPERCASE variable name in declaration at line {count}: {var}", count + error_log, + f"Found UPPERCASE variable name in declaration at line {count}: {var}", + count, ) return TestResult( checker_name="No Full Uppercase variable names", @@ -306,6 +348,8 @@ def r3_4_2_no_full_uppercase_variable_names(lines: List[str]) -> TestResult: output=f"Checked {count} lines, found {failures} UPPERCASE variables.", errors=error_log, ) + + """ * TODO: To improve readability, you should always use the optional space to separate the Fortran keywords. From 6a708aaf634b125891d299ab14f52510aba8c666 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Mon, 27 Apr 2026 15:36:41 +0100 Subject: [PATCH 23/27] Linter loonacy. --- .../tests/test_umdp3_conformance.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_conformance.py b/script_umdp3_checker/tests/test_umdp3_conformance.py index e488abab..1527e0d9 100644 --- a/script_umdp3_checker/tests/test_umdp3_conformance.py +++ b/script_umdp3_checker/tests/test_umdp3_conformance.py @@ -36,7 +36,7 @@ def test_from_full_list_filters_by_extension(): Path("src/script.py"), Path("README.md"), ] - check_functions = {"dummy_check": lambda lines: None} + check_functions = [] checker = StyleChecker.from_full_list( name="Fortran Checker", @@ -56,7 +56,7 @@ def test_from_full_list_returns_all_files_when_no_extensions(): checker = StyleChecker.from_full_list( name="Any Checker", file_extensions=set(), - check_functions={}, + check_functions=[], changed_files=changed_files, ) @@ -67,7 +67,7 @@ def test_from_full_list_with_no_changed_files_returns_empty_list(): checker = StyleChecker.from_full_list( name="Empty Checker", file_extensions={".py"}, - check_functions={}, + check_functions=[], changed_files=[], ) @@ -168,13 +168,13 @@ def test_detangle_file_types_expands_all_group(): def test_detangle_file_types_mixed_group_and_explicit_type(): - result = detangle_file_types({"CI", "Generic"}) + result = detangle_file_types({"CI", "AnyFile"}) - assert result == GROUP_FILE_TYPES["CI"].union({"Generic"}) + assert result == GROUP_FILE_TYPES["CI"].union({"AnyFile"}) def test_detangle_file_types_passthrough_without_groups(): - input_types = {"Fortran", "Generic"} + input_types = {"Fortran", "AnyFile"} result = detangle_file_types(set(input_types)) @@ -204,7 +204,7 @@ def check_fail(lines): return SimpleNamespace(passed=False) checker = StyleChecker( - "StyleChecker test", {"pass test": check_pass, "fail test": check_fail}, [] + "StyleChecker test", [check_pass, check_fail], [] ) result = checker.check(file_path) @@ -231,7 +231,7 @@ def check_fail(path): return SimpleNamespace(passed=False) checker = Check_Runner( - "CheckRunner test", {"pass test": check_pass, "fail test": check_fail}, [] + "CheckRunner test", [check_pass, check_fail], [] ) result = checker.check(file_path) @@ -270,14 +270,10 @@ def fake_create_free_runner(command, external_opname): # All_files got filtered to only those with 'py' extension. assert checker.files_to_check == [Path("a.py"), Path("d.py")] # The two commands get given generated 'names'. - assert set(checker.check_functions.keys()) == { - "External_operation_ruff", - "External_operation_black", - } # Check the arguments passed to create_free_runner are as expected. assert callables == [ - (["ruff", "check"], "External_operation_ruff"), - (["black", "--check"], "External_operation_black"), + (["ruff", "check"], "ruff"), + (["black", "--check"], "black"), ] @@ -556,7 +552,7 @@ def test_stylechecker_check_with_no_check_functions_passes(tmp_path: Path): file_path = tmp_path / "sample.txt" file_path.write_text("content\n") - checker = StyleChecker("EmptyChecks", {}, [file_path]) + checker = StyleChecker("EmptyChecks", [], [file_path]) result = checker.check(file_path) assert result.file_path == str(file_path) @@ -592,7 +588,7 @@ def test_create_external_runners_with_empty_commands(): ) assert checker.files_to_check == [Path("a.py"), Path("b.py")] - assert checker.check_functions == {} + assert checker.check_functions == [] # Okay, c'mon CoPilot - where's the value in testing a tiny function to write a string @@ -626,13 +622,13 @@ def test_process_arguments_verbose_and_filetype_group_expansion( monkeypatch.setattr( sys, "argv", - ["umdp3_conformance.py", "--file-types", "CI", "Generic", "-vv"], + ["umdp3_conformance.py", "--file-types", "CI", "AnyFile", "-vv"], ) args = process_arguments() assert args.volume == 5 - assert args.file_types == {"Fortran", "Python", "Generic"} + assert args.file_types == {"Fortran", "Python", "AnyFile"} def test_process_arguments_rejects_verbose_and_quiet_together( From ed1a09d15f46327afc8c41a590a9b08de62a93cc Mon Sep 17 00:00:00 2001 From: r-sharp Date: Mon, 27 Apr 2026 15:40:57 +0100 Subject: [PATCH 24/27] to requote : "I hate ruff" --- script_umdp3_checker/tests/test_umdp3_conformance.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_conformance.py b/script_umdp3_checker/tests/test_umdp3_conformance.py index 1527e0d9..c0f5287d 100644 --- a/script_umdp3_checker/tests/test_umdp3_conformance.py +++ b/script_umdp3_checker/tests/test_umdp3_conformance.py @@ -203,9 +203,7 @@ def check_fail(lines): seen.append(lines) return SimpleNamespace(passed=False) - checker = StyleChecker( - "StyleChecker test", [check_pass, check_fail], [] - ) + checker = StyleChecker("StyleChecker test", [check_pass, check_fail], []) result = checker.check(file_path) assert seen == [["a", "b"], ["a", "b"]] @@ -230,9 +228,7 @@ def check_fail(path): seen.append(path) return SimpleNamespace(passed=False) - checker = Check_Runner( - "CheckRunner test", [check_pass, check_fail], [] - ) + checker = Check_Runner("CheckRunner test", [check_pass, check_fail], []) result = checker.check(file_path) assert seen == [file_path, file_path] From 3e69e15f1d6c18d5e0e27b4aa87bf9e7bb03c8cb Mon Sep 17 00:00:00 2001 From: R Sharp Date: Thu, 30 Apr 2026 14:58:02 +0100 Subject: [PATCH 25/27] Apply suggestions from code review Mostly typos and tidying. All changes suggested by reviewers and very sensible they were too. Co-authored-by: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Co-authored-by: Sam Clarke-Green <74185251+t00sa@users.noreply.github.com> --- script_umdp3_checker/checker_dispatch_tables.py | 2 +- script_umdp3_checker/tests/test_umdp3_rules_S3.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py index 9c907043..e325f884 100644 --- a/script_umdp3_checker/checker_dispatch_tables.py +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -13,7 +13,7 @@ from umdp3_checker_rules import UMDP3Checker """ -TODO : This module has lost it's way. I uses a class to define methods which just +TODO : This module has lost it's way. It uses a class to define methods which just return lists of functions. I don't think a class is required for this. As the functions themselves are shifted, renamed and hopefully improved, this class should eventually get emptied out and the file removed. diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index 256ff927..eeefe160 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -10,8 +10,7 @@ from pathlib import Path # Add the current directory to Python path -sys.path.insert(0, str(Path(__file__).parent.parent)) -from umdp3_rules_S3 import ( +from ..umdp3_rules_S3 import ( remove_comments, remove_quoted, concatenate_lines, @@ -73,7 +72,7 @@ def test_modify_fortran_lines(example_fortran_lines): ["add", 32, ["This is an added line."]], ] modified_lines = modify_fortran_lines(example_fortran_lines, changes_list) - # Line no.s below have to be carefully calculated. Basic is line no of change -1 but then add one for every line added aobve in the file and -1 for every line deleted above in the file. + # Line no.s below have to be carefully calculated. Basic is line no of change -1 but then add one for every line added above in the file and -1 for every line deleted above in the file. # for line_no, line in enumerate(modified_lines, 1): # print(f"line [{line_no:04}]: {line}") assert modified_lines[11] == "This is a replacement line." @@ -135,11 +134,11 @@ def test_concatenate_lines(example_fortran_lines): Setting up a test, may involve multiple changes to the demo Fortran file, hence the complex entries in the parametrization. Each error found is recorded with the line number(s) it was found on using the Error text as a dict key, and the line no(s) as a list, which means finding 'use' and 'Use' in the code would generate 2 different keys in the dict. Then a single value recording the total number of failures is also included in the 'TestResult' object. - So for the dictionary of errors returned, we have to check they match, which at present involves checking it's 'len' but also that all the keys in one are in the other, i.e. youre not accidentally getting a matching count but different errors. + So for the dictionary of errors returned, we have to check they match, which at present involves checking it's 'len' but also that all the keys in one are in the other, i.e. you're not accidentally getting a matching count but different errors. Then for each error(key) in the dict, you need to check how many lines it occured on, and that the line numbers match, i.e. the list of lines numbers is a match with the expected list of line numbers. There has to be a better way..... - As a side note, might it be better to write a function to compare 2 'TestResult' objects, which would be more robust to changes in the structure of the 'TestResult' object, and also make the test code more readable? If so, would it's place be here in the testing, or as a method in the 'TestResult' class itself? + As a side note, might it be better to write a function to compare 2 'TestResult' objects, which would be more robust to changes in the structure of the 'TestResult' object, and also make the test code more readable? If so, would its place be here in the testing, or as a method in the 'TestResult' class itself? """ # ================================================================= From 89719c1990e2d09e7ce323986bb72294e1dcc364 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Thu, 30 Apr 2026 16:04:40 +0100 Subject: [PATCH 26/27] Trying to pass the unit tests.... --- script_umdp3_checker/tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 script_umdp3_checker/tests/__init__.py diff --git a/script_umdp3_checker/tests/__init__.py b/script_umdp3_checker/tests/__init__.py new file mode 100644 index 00000000..e69de29b From deff95695ccaae05d00c7cfa05c48559caf2acd5 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Thu, 30 Apr 2026 16:05:05 +0100 Subject: [PATCH 27/27] Fungle Worzle Aardvark Frakkin' Apostorphes --- script_umdp3_checker/tests/test_umdp3_rules_S3.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py index eeefe160..495f7a61 100644 --- a/script_umdp3_checker/tests/test_umdp3_rules_S3.py +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -6,8 +6,8 @@ # from pyparsing import remove_quotes import pytest -import sys -from pathlib import Path +# import sys +# from pathlib import Path # Add the current directory to Python path from ..umdp3_rules_S3 import ( @@ -20,7 +20,6 @@ r3_4_1_capitalised_keywords, r3_4_2_no_full_uppercase_variable_names, ) -# from umdp3_checker_rules import TestResult, UMDP3Checker def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: