diff --git a/.github/linters/.ruff.toml b/.github/linters/.ruff.toml index 9cb5f660..02338e42 100644 --- a/.github/linters/.ruff.toml +++ b/.github/linters/.ruff.toml @@ -1,6 +1,7 @@ cache-dir = "/tmp/.ruff_cache" line-length = 88 output-format = "grouped" +exclude = ["*.F90"] [lint] # Allow unused variables when underscore-prefixed. diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py index 51aac77f..e325f884 100644 --- a/script_umdp3_checker/checker_dispatch_tables.py +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -9,19 +9,16 @@ Python translation of the original Perl module """ -from typing import Dict, Callable +from typing import Callable from umdp3_checker_rules import UMDP3Checker """ -TODO : This list was checked to ensure it had something for each - test in the original. -TODO : This needs to be re-checked. -TODO : And the tests need to be compared to the original Perl tests - to ensure they are equivalent. -""" +TODO : This module has lost it's way. It uses a class to define methods which just + return lists of functions. I don't think a class is required for this. + As the functions themselves are shifted, renamed and hopefully improved, this + class should eventually get emptied out and the file removed. -# Declare version -VERSION = "13.5.0" +""" class CheckerDispatchTables: @@ -30,86 +27,66 @@ class CheckerDispatchTables: def __init__(self): self.umdp3_checker = UMDP3Checker() - def get_diff_dispatch_table_fortran(self) -> Dict[str, Callable]: + def get_diff_dispatch_table_fortran(self) -> list[Callable]: """Get dispatch table for Fortran diff tests""" - return { - # 'Captain Daves doomed test of destruction': - # self.umdp3_checker.capitulated_keywords, - "Lowercase Fortran keywords not permitted": self.umdp3_checker.capitalised_keywords, - "OpenMP sentinels not in column one": self.umdp3_checker.openmp_sentinels_in_column_one, - "Omitted optional space in keywords": self.umdp3_checker.unseparated_keywords, - "GO TO other than 9999": self.umdp3_checker.go_to_other_than_9999, - "WRITE without format": self.umdp3_checker.write_using_default_format, - "Lowercase or CamelCase variable names only": self.umdp3_checker.lowercase_variable_names, - "Use of dimension attribute": self.umdp3_checker.dimension_forbidden, - "Continuation lines shouldn't start with &": self.umdp3_checker.ampersand_continuation, - "Use of EQUIVALENCE or PAUSE": self.umdp3_checker.forbidden_keywords, - "Use of older form of relational operator (.GT. etc.)": self.umdp3_checker.forbidden_operators, - "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, - "Line includes tab character": self.umdp3_checker.tab_detection, - "USEd printstatus_mod instead of umPrintMgr": self.umdp3_checker.printstatus_mod, - "Used PRINT rather than umMessage and umPrint": self.umdp3_checker.printstar, - "Used WRITE(6) rather than umMessage and umPrint": self.umdp3_checker.write6, - "Used um_fort_flush rather than umPrintFlush": self.umdp3_checker.um_fort_flush, - "Used Subversion keyword substitution which is prohibited": self.umdp3_checker.svn_keyword_subst, - "Used !OMP instead of !$OMP": self.umdp3_checker.omp_missing_dollar, - "Used #ifdef/#ifndef rather than #if defined() or #if !defined()": self.umdp3_checker.cpp_ifdef, - "Presence of fortran comment in CPP directive": self.umdp3_checker.cpp_comment, - "Used an archaic fortran intrinsic function": self.umdp3_checker.obsolescent_fortran_intrinsic, - "EXIT statements should be labelled": self.umdp3_checker.exit_stmt_label, - "Intrinsic modules must be USEd with an INTRINSIC " - + "keyword specifier": self.umdp3_checker.intrinsic_modules, - "READ statements should have an explicit UNIT= as " - + "their first argument": self.umdp3_checker.read_unit_args, - } + return [ + self.umdp3_checker.openmp_sentinels_in_column_one, + self.umdp3_checker.unseparated_keywords, + self.umdp3_checker.go_to_other_than_9999, + self.umdp3_checker.write_using_default_format, + self.umdp3_checker.dimension_forbidden, + self.umdp3_checker.ampersand_continuation, + self.umdp3_checker.forbidden_keywords, + self.umdp3_checker.forbidden_operators, + self.umdp3_checker.tab_detection, + self.umdp3_checker.printstatus_mod, + self.umdp3_checker.printstar, + self.umdp3_checker.write6, + self.umdp3_checker.um_fort_flush, + self.umdp3_checker.svn_keyword_subst, + self.umdp3_checker.omp_missing_dollar, + self.umdp3_checker.cpp_ifdef, + self.umdp3_checker.cpp_comment, + self.umdp3_checker.obsolescent_fortran_intrinsic, + self.umdp3_checker.exit_stmt_label, + self.umdp3_checker.intrinsic_modules, + self.umdp3_checker.read_unit_args, + ] - def get_file_dispatch_table_fortran( - self, filename: str = "" - ) -> Dict[str, Callable]: + def get_file_dispatch_table_fortran(self, filename: str = "") -> list[Callable]: """Get dispatch table for Fortran file tests""" - return { - "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, - "File is missing at least one IMPLICIT NONE": self.umdp3_checker.implicit_none, - "Never use STOP or CALL abort": self.umdp3_checker.forbidden_stop, - "Use of Fortran function as a variable name": self.umdp3_checker.intrinsic_as_variable, - "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, - "File missing correct code owner comment": self.umdp3_checker.check_code_owner, - "Used (/ 1,2,3 /) form of array initialisation, rather than " - + "[1,2,3] form": self.umdp3_checker.array_init_form, - } + return [ + self.umdp3_checker.retire_if_def, + self.umdp3_checker.implicit_none, + self.umdp3_checker.forbidden_stop, + self.umdp3_checker.intrinsic_as_variable, + self.umdp3_checker.check_code_owner, + self.umdp3_checker.array_init_form, + ] - def get_diff_dispatch_table_c(self) -> Dict[str, Callable]: + def get_diff_dispatch_table_c(self) -> list[Callable]: """Get dispatch table for C diff tests""" - return { - "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, - "Line includes tab character": self.umdp3_checker.tab_detection, - "Fixed-width Integer format specifiers must have a space " - + 'between themselves and the string delimiter (the " character)': self.umdp3_checker.c_integral_format_specifiers, - } + return [ + self.umdp3_checker.tab_detection, + self.umdp3_checker.c_integral_format_specifiers, + ] - def get_file_dispatch_table_c(self) -> Dict[str, Callable]: + def get_file_dispatch_table_c(self) -> list[Callable]: """Get dispatch table for C file tests""" - return { - "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, - "Used a deprecated C identifier": self.umdp3_checker.c_deprecated, - "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, - "File missing correct code owner comment": self.umdp3_checker.check_code_owner, - "Used an _OPENMP if-def without also testing against " - + "SHUM_USE_C_OPENMP_VIA_THREAD_UTILS. (Or _OPENMP does " - + "not come first in the test.)": self.umdp3_checker.c_openmp_define_pair_thread_utils, - "Used an _OPENMP && SHUM_USE_C_OPENMP_VIA_THREAD_UTILS if-def " - + "test in a logical combination with a third macro": self.umdp3_checker.c_openmp_define_no_combine, - "Used !defined(_OPENMP) rather than defined(_OPENMP) " - + "with #else branch": self.umdp3_checker.c_openmp_define_not, - "Used an omp #pragma (or #include ) without " - + "protecting it with an _OPENMP if-def": self.umdp3_checker.c_protect_omp_pragma, - "Used the #ifdef style of if-def, rather than the #if " - + "defined() style": self.umdp3_checker.c_ifdef_defines, - "C Unit does not end with a final newline character": self.umdp3_checker.c_final_newline, - } + return [ + self.umdp3_checker.retire_if_def, + self.umdp3_checker.c_deprecated, + self.umdp3_checker.check_code_owner, + self.umdp3_checker.c_openmp_define_pair_thread_utils, + self.umdp3_checker.c_openmp_define_no_combine, + self.umdp3_checker.c_openmp_define_not, + self.umdp3_checker.c_protect_omp_pragma, + self.umdp3_checker.c_ifdef_defines, + self.umdp3_checker.c_final_newline, + ] - def get_file_dispatch_table_all(self) -> Dict[str, Callable]: + def get_file_dispatch_table_all(self) -> list[Callable]: """Get dispatch table for universal file tests""" - return { - "Line includes trailing whitespace character(s)": self.umdp3_checker.line_trail_whitespace, - } + return [ + self.umdp3_checker.line_trail_whitespace, + ] diff --git a/script_umdp3_checker/fortran_keywords.py b/script_umdp3_checker/fortran_keywords.py index d9b6be82..a9d5e4b4 100644 --- a/script_umdp3_checker/fortran_keywords.py +++ b/script_umdp3_checker/fortran_keywords.py @@ -14,6 +14,8 @@ """ TODO: Current order may not be perfect, and could possibly be reviewed. However, it is probably 'good enough' for now. +TODO: Document method used to decide on order. I think it was a grep and count for the +frequency the keywords appear in the source code. """ fortran_keywords = ( diff --git a/script_umdp3_checker/tests/__init__.py b/script_umdp3_checker/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/script_umdp3_checker/tests/conftest.py b/script_umdp3_checker/tests/conftest.py new file mode 100644 index 00000000..ff03af93 --- /dev/null +++ b/script_umdp3_checker/tests/conftest.py @@ -0,0 +1,9 @@ +from pathlib import Path +import pytest + + +@pytest.fixture(scope="session") +def example_fortran_lines() -> list[str]: + """Return the example Fortran source as a list of lines for tests.""" + test_dir = Path(__file__).resolve().parent + return (test_dir / "example_fortran_code.F90").read_text().splitlines() diff --git a/script_umdp3_checker/tests/example_fortran_code.F90 b/script_umdp3_checker/tests/example_fortran_code.F90 new file mode 100644 index 00000000..9e78967c --- /dev/null +++ b/script_umdp3_checker/tests/example_fortran_code.F90 @@ -0,0 +1,121 @@ +! fortls: ignore file +! fortitude: ignore file +! *****************************COPYRIGHT******************************* +! (C) Crown copyright Met Office. All rights reserved. +! For further details please refer to the file COPYRIGHT.txt +! which you should have received as part of this distribution. +! *****************************COPYRIGHT******************************* +! +! An example routine depicting how one should construct new code +! to meet the UMDP3 coding standards. +! +MODULE example_mod +IMPLICIT NONE +! Description: +! A noddy routine that illustrates the way to apply the UMDP3 +! coding standards to new code to help code developers +! pass code reviews. +! +! Method: +! In this routine we apply many of the UMDP3 features +! to construct a simple routine. The references on the RHS take the reader +! to the appropriate section of the UMDP3 guide with further details. +! +! Code Owner: Please refer to the UM file CodeOwners.txt +! This file belongs in section: Control +! +! Code description: +! Language: Fortran 2003. +! This code is written to UMDP3 standards. +CHARACTER(LEN=*), PARAMETER, PRIVATE :: ModuleName="EXAMPLE_MOD" +CONTAINS +! Subroutine Interface: +SUBROUTINE example (xlen,ylen,l_unscale,input1,input2, & + output, l_loud_opt) +! Description: +! Nothing further to add to module description. +USE atmos_constants_mod, ONLY: r +USE ereport_mod, ONLY: ereport +USE parkind1, ONLY: jpim, jprb +USE umprintMgr, ONLY: umprint,ummessage,PrNorm +USE errormessagelength_mod, ONLY: errormessagelength +USE yomhook, ONLY: lhook, dr_hook +IMPLICIT NONE +! Subroutine arguments +INTEGER, INTENT(IN) :: xlen !Length of first dimension of the arrays. +INTEGER, INTENT(IN) :: ylen !Length of second dimension of the arrays. +LOGICAL, INTENT(IN) :: l_unscale ! switch scaling off. +REAL, INTENT(IN) :: input1(xlen, ylen) !First input array +REAL, INTENT(IN OUT) :: input2(xlen, ylen) ! INOUT Second input array +REAL, INTENT(OUT) :: output(xlen, ylen) !Contains the result +REAL, PARAMETER :: b_pollonator(4) = [ 0.0, 11.2, 6.6, 3.6] +LOGICAL, INTENT(IN), OPTIONAL :: l_loud_opt !optional debug flag +! Local variables +INTEGER(KIND=jpim), PARAMETER :: zhook_in = 0 ! DrHook tracing entry +INTEGER(KIND=jpim), PARAMETER :: zhook_out = 1 ! DrHook tracing exit +INTEGER :: i ! Loop counter +INTEGER :: j ! Loop counter +INTEGER :: icode ! error code for EReport +LOGICAL :: l_loud ! debug flag (default false unless l_loud_opt is used) +REAL, ALLOCATABLE :: field(:,:) ! Scaling array to fill. +REAL, ALLOCATABLE :: field2(:,:) ! Scaling array to fill. +REAL(KIND=jprb) :: zhook_handle ! DrHook tracing +CHARACTER(LEN=*), PARAMETER :: RoutineName="EXAMPLE" +CHARACTER(LEN=errormessagelength) :: Cmessage ! used for EReport +CHARACTER(LEN=256) :: my_char ! string for output +! End of header +IF (lhook) CALL dr_hook(ModuleName//":"//RoutineName,zhook_in,zhook_handle) +! Set debug flag if argument is present +l_loud = .FALSE. +IF (PRESENT(l_loud_opt)) THEN + l_loud = l_loud_opt +END IF +my_char & + = "This is a very very very very very very very " & + // "long character assignment" ! A pointless long character example. +icode=0 +! verbosity choice, output some numbers to aid with debugging +! protected by printstatus>=PrNorm and pe=0 +WRITE(ummessage,"(A,I0)")"xlen=",xlen +CALL umprint(ummessage,level=PrNorm,pe=0,src="example_mod") +WRITE(ummessage,"(A,I0)")"ylen=",ylen +CALL umprint(ummessage,level=PrNorm,pe=0,src="example_mod") +IF (l_loud) CALL umprint(my_char,level=PrNormal,src="example_mod") +! Allocate and initialise scaling array +! Noddy code warns user when scalling is not employed. +IF ( l_unscale ) THEN + icode = -100 ! set up WARNING message + ALLOCATE(field( 1,1 ) ) + ALLOCATE(field2( 1,1 ) ) + cmessage="Scaling is switched off in run!" + CALL ereport(RoutineName,icode,cmessage) +ELSE + ALLOCATE(field( xlen, ylen ) ) + ALLOCATE(field2( xlen, ylen ) ) + DO j=1,ylen + DO i=1,xlen + field(i, j) = (1.0*i) + (2.0*j) + input2(i, j) = input2(i, j) * field(i, j) + field2(i, j) = (1.0*i) - (2.0*j) & + + (3.0*i*j) + (4.0*i**2) + field(i, j)*2 + END DO + END DO +END IF +! The main calculation of the routine, using OpenMP. +!$OMP PARALLEL DEFAULT(NONE) & +!$OMP SHARED(xlen,ylen,input1,input2,field,output) & +!$OMP PRIVATE(i, j ) +!$OMP DO SCHEDULE(STATIC) +DO j = 1, ylen + i_loop: DO i = 1, xlen + ! Calculate the Output value: + output(i, j) = (input1(i, j) * input2(i, j)) + END DO i_loop +END DO ! j loop +!$OMP END DO +!$OMP END PARALLEL +DEALLOCATE (field) +IF (lhook) CALL dr_hook(ModuleName//":"//RoutineName,zhook_out,zhook_handle) +RETURN +END SUBROUTINE example +END MODULE example_mod \ No newline at end of file diff --git a/script_umdp3_checker/tests/test_fortran_checks.py b/script_umdp3_checker/tests/test_fortran_checks.py index be495a37..721f91cc 100644 --- a/script_umdp3_checker/tests/test_fortran_checks.py +++ b/script_umdp3_checker/tests/test_fortran_checks.py @@ -14,7 +14,7 @@ # Prevent pytest from trying to collect TestResult as more tests: -TestResult.__test__ = False +TestResult.__test__ = False # type: ignore """ ToDo : THere has been a LOT of refactoring in the umdp3 module since these @@ -25,29 +25,6 @@ date. Especially as errors is (I think) only checking for the presence of a given string in the keys of the error log dict. """ -keyword_data = [ - ("IF THEN END", 0, {}, "All UPPERCASE keywords"), - ("if then end", 3, {"lowercase keyword: if"}, "All lowercase keywords"), - ("If Then End", 3, {"lowercase keyword: If"}, "All mixed case keywords"), - ("foo bar baz", 0, {}, "No keywords"), - ("! if then end", 0, {}, "Commented keywords"), -] -keyword_test_parameters = [data[:3] for data in keyword_data] -keyword_test_ids = [data[3] for data in keyword_data] - - -@pytest.mark.parametrize( - "lines, expected_result, expected_errors", - keyword_test_parameters, - ids=keyword_test_ids, -) -def test_keywords(lines, expected_result, expected_errors): - checker = UMDP3Checker() - result = checker.capitalised_keywords([lines]) - assert result.failure_count == expected_result - for error in expected_errors: - assert error in result.errors - fake_code_block = ["PROGRAM test", "IMPLICIT NONE", "INTEGER :: i", "END PROGRAM"] implicit_none_paramters = [ @@ -107,6 +84,18 @@ def test_openmp_sentinels_in_column_one(lines, expected_result): (["i=0", "i=i+1", "PRINT*,i"], 0, "No keywords"), (["PROGRAM test", "i=0", "ENDIF"], 1, "One keyword unseparated"), (["i=0", "ENDPARALLELDO", "END DO"], 1, "One keyword unseparated in middle"), + ( + [ + "REAL, INTENT(IN OUT) :: lambda_pole !INOUT Longitude of pole", + "REAL, INTENT(OUT) :: Dave", + "DO", + "nothing", + "END DO", + ], + 0, + "unseparated keyword in comment", + ), + (["#endif", "i=i+1", "PRINT*,i"], 0, "Safely ignore cpp commands"), ] @@ -167,39 +156,6 @@ def test_write_using_default_format(lines, expected_result): assert result.failure_count == expected_result -test_lowercase_variable_names_parameters = [ - (["INTEGER :: lowercase_variable"], 0, "Lowercase variable name"), - (["REAL :: Lowercase_Variable"], 0, "Pascal case variable name"), - (["CHARACTER :: LOWERCASE_VARIABLE"], 1, "Uppercase variable name"), - ( - [" REAL :: lowercase_variable"], - 0, - "Lowercase variable name with leading whitespace", - ), - ( - [" CHARACTER :: Lowercase_Variable"], - 0, - "Pascal case variable name with leading whitespace", - ), - ( - [" INTEGER :: LOWERCASE_VARIABLE"], - 1, - "Uppercase variable name with leading whitespace", - ), -] - - -@pytest.mark.parametrize( - "lines, expected_result", - [data[:2] for data in test_lowercase_variable_names_parameters], - ids=[data[2] for data in test_lowercase_variable_names_parameters], -) -def test_lowercase_variable_names(lines, expected_result): - checker = UMDP3Checker() - result = checker.lowercase_variable_names(lines) - assert result.failure_count == expected_result - - test_dimension_forbidden_parameters = [ (["REAL :: array(ARR_LEN)"], 0, "Dimension specified in variable declaration"), (["REAL :: array"], 0, "No dimension specified in variable declaration"), @@ -296,29 +252,6 @@ def test_forbidden_operators(lines, expected_result): assert result.failure_count == expected_result -test_line_over_80chars_parameters = [ - ( - [ - " PRINT *, 'This line is definitely way over the eighty character limit set by the UM coding standards'" - ], - 1, - "Line over 80 characters", - ), - ([" PRINT *, 'This line is within the limit'"], 0, "Line within 80 characters"), -] - - -@pytest.mark.parametrize( - "lines, expected_result", - [data[:2] for data in test_line_over_80chars_parameters], - ids=[data[2] for data in test_line_over_80chars_parameters], -) -def test_line_over_80chars(lines, expected_result): - checker = UMDP3Checker() - result = checker.line_over_80chars(lines) - assert result.failure_count == expected_result - - test_tab_detection_parameters = [ ([" PRINT *, 'This line has no tabs'"], 0, "No tabs"), ([" PRINT *, 'This line has a tab\tcharacter'"], 1, "Line with tab character"), @@ -640,25 +573,6 @@ def test_intrinsic_as_variable(lines, expected_result): assert result.failure_count == expected_result -test_check_crown_copyright_parameters = [ - (["! Crown copyright 2024"], 0, "Correct crown copyright statement"), - (["! Copyright 2024"], 0, "A copyright statement"), - (["! This is a comment"], 1, "No crown copyright statement"), - (["! This is a Crown"], 1, "No crown copyright statement"), -] - - -@pytest.mark.parametrize( - "lines, expected_result", - [data[:2] for data in test_check_crown_copyright_parameters], - ids=[data[2] for data in test_check_crown_copyright_parameters], -) -def test_check_crown_copyright(lines, expected_result): - checker = UMDP3Checker() - result = checker.check_crown_copyright(lines) - assert result.failure_count == expected_result - - test_check_code_owner_parameters = [ (["! Code Owner: John Doe"], 0, "code owner statement"), (["! Code Owner : John Doe"], 0, "Another code owner statement"), diff --git a/script_umdp3_checker/tests/test_umdp3.py b/script_umdp3_checker/tests/test_umdp3.py index 2f4a5db8..bec81679 100644 --- a/script_umdp3_checker/tests/test_umdp3.py +++ b/script_umdp3_checker/tests/test_umdp3.py @@ -20,7 +20,7 @@ from checker_dispatch_tables import CheckerDispatchTables # Prevent pytest from trying to collect TestResult as more tests: -TestResult.__test__ = False +TestResult.__test__ = False # type: ignore def test_basic_functionality(): @@ -33,10 +33,10 @@ def test_basic_functionality(): # Test line length check test_lines = [ - "This is a short line", - "This is a very long line that exceeds eighty characters and should trigger a failure in the line length test", + "This is a line with no trailing space", + "This is line with a trailing space ", ] - result = umdp3_checker.line_over_80chars(test_lines) + result = umdp3_checker.line_trail_whitespace(test_lines) print( f"Line length test: {'PASS' if result.failure_count > 0 else 'FAIL'} (expected failure)" ) diff --git a/script_umdp3_checker/tests/test_umdp3_conformance.py b/script_umdp3_checker/tests/test_umdp3_conformance.py index e488abab..c0f5287d 100644 --- a/script_umdp3_checker/tests/test_umdp3_conformance.py +++ b/script_umdp3_checker/tests/test_umdp3_conformance.py @@ -36,7 +36,7 @@ def test_from_full_list_filters_by_extension(): Path("src/script.py"), Path("README.md"), ] - check_functions = {"dummy_check": lambda lines: None} + check_functions = [] checker = StyleChecker.from_full_list( name="Fortran Checker", @@ -56,7 +56,7 @@ def test_from_full_list_returns_all_files_when_no_extensions(): checker = StyleChecker.from_full_list( name="Any Checker", file_extensions=set(), - check_functions={}, + check_functions=[], changed_files=changed_files, ) @@ -67,7 +67,7 @@ def test_from_full_list_with_no_changed_files_returns_empty_list(): checker = StyleChecker.from_full_list( name="Empty Checker", file_extensions={".py"}, - check_functions={}, + check_functions=[], changed_files=[], ) @@ -168,13 +168,13 @@ def test_detangle_file_types_expands_all_group(): def test_detangle_file_types_mixed_group_and_explicit_type(): - result = detangle_file_types({"CI", "Generic"}) + result = detangle_file_types({"CI", "AnyFile"}) - assert result == GROUP_FILE_TYPES["CI"].union({"Generic"}) + assert result == GROUP_FILE_TYPES["CI"].union({"AnyFile"}) def test_detangle_file_types_passthrough_without_groups(): - input_types = {"Fortran", "Generic"} + input_types = {"Fortran", "AnyFile"} result = detangle_file_types(set(input_types)) @@ -203,9 +203,7 @@ def check_fail(lines): seen.append(lines) return SimpleNamespace(passed=False) - checker = StyleChecker( - "StyleChecker test", {"pass test": check_pass, "fail test": check_fail}, [] - ) + checker = StyleChecker("StyleChecker test", [check_pass, check_fail], []) result = checker.check(file_path) assert seen == [["a", "b"], ["a", "b"]] @@ -230,9 +228,7 @@ def check_fail(path): seen.append(path) return SimpleNamespace(passed=False) - checker = Check_Runner( - "CheckRunner test", {"pass test": check_pass, "fail test": check_fail}, [] - ) + checker = Check_Runner("CheckRunner test", [check_pass, check_fail], []) result = checker.check(file_path) assert seen == [file_path, file_path] @@ -270,14 +266,10 @@ def fake_create_free_runner(command, external_opname): # All_files got filtered to only those with 'py' extension. assert checker.files_to_check == [Path("a.py"), Path("d.py")] # The two commands get given generated 'names'. - assert set(checker.check_functions.keys()) == { - "External_operation_ruff", - "External_operation_black", - } # Check the arguments passed to create_free_runner are as expected. assert callables == [ - (["ruff", "check"], "External_operation_ruff"), - (["black", "--check"], "External_operation_black"), + (["ruff", "check"], "ruff"), + (["black", "--check"], "black"), ] @@ -556,7 +548,7 @@ def test_stylechecker_check_with_no_check_functions_passes(tmp_path: Path): file_path = tmp_path / "sample.txt" file_path.write_text("content\n") - checker = StyleChecker("EmptyChecks", {}, [file_path]) + checker = StyleChecker("EmptyChecks", [], [file_path]) result = checker.check(file_path) assert result.file_path == str(file_path) @@ -592,7 +584,7 @@ def test_create_external_runners_with_empty_commands(): ) assert checker.files_to_check == [Path("a.py"), Path("b.py")] - assert checker.check_functions == {} + assert checker.check_functions == [] # Okay, c'mon CoPilot - where's the value in testing a tiny function to write a string @@ -626,13 +618,13 @@ def test_process_arguments_verbose_and_filetype_group_expansion( monkeypatch.setattr( sys, "argv", - ["umdp3_conformance.py", "--file-types", "CI", "Generic", "-vv"], + ["umdp3_conformance.py", "--file-types", "CI", "AnyFile", "-vv"], ) args = process_arguments() assert args.volume == 5 - assert args.file_types == {"Fortran", "Python", "Generic"} + assert args.file_types == {"Fortran", "Python", "AnyFile"} def test_process_arguments_rejects_verbose_and_quiet_together( diff --git a/script_umdp3_checker/tests/test_umdp3_rules_S3.py b/script_umdp3_checker/tests/test_umdp3_rules_S3.py new file mode 100644 index 00000000..8eaa3141 --- /dev/null +++ b/script_umdp3_checker/tests/test_umdp3_rules_S3.py @@ -0,0 +1,500 @@ +# ----------------------------------------------------------------------------- +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +# ----------------------------------------------------------------------------- + +# from pyparsing import remove_quotes +import pytest +# import sys +# from pathlib import Path + +# Add the current directory to Python path +from ..umdp3_rules_S3 import ( + remove_comments, + remove_quoted, + concatenate_lines, + r3_1_1_there_can_be_only_one, + r3_2_1_check_crown_copyright, + r3_3_2_line_too_long, + r3_4_1_capitalised_keywords, + r3_4_2_no_full_uppercase_variable_names, +) + + +def modify_fortran_lines(lines_in: list[str], changes: list[list]) -> list[str]: + """Return a copy of example_fortran_lines with changes applied. + + ``changes`` is a list of operation list, each with: + - ``[, , []]`` + - where : = "replace" would replace line N with the new line(s) + - = delete" would remove line N + - and ="add" would insert the new line(s) after line N. + + Operations are applied in descending line order so that earlier + line numbers are not shifted by later mutations. + """ + lines = lines_in.copy() + for change in sorted(changes, key=lambda o: o[1], reverse=True): + idx = int(change[1]) - 1 + if change[0] == "replace": + del lines[idx] + print(f"Line {idx} : deleting line.") + for new_line in change[2]: + print(f'Line {idx} : replacing with "{new_line}"') + lines.insert(idx, new_line) + idx += 1 + elif change[0] == "delete": + print(f"Line {idx} : deleting line.") + del lines[idx] + elif change[0] == "add": + for new_line in change[2]: + print(f'Line {idx} : adding "{new_line}"') + lines.insert(idx, new_line) + idx += 1 + else: + raise ValueError(f"Unknown operation: {change[0]}") + # for count, line in enumerate(lines, 1): + # print(f"line [{count}]: {line}") + return lines + + +# ================================================================= +"""First : test the helper functions in umdp3_rules_S3.py.""" + + +def test_modify_fortran_lines(example_fortran_lines): + """TODO: Does this need to be more rigorous ?""" + changes_list = [ + ["replace", 12, ["This is a replacement line."]], + ["delete", 22, None], + ["add", 32, ["This is an added line."]], + ] + modified_lines = modify_fortran_lines(example_fortran_lines, changes_list) + # Line no.s below have to be carefully calculated. Basic is line no of change -1 but then add one for every line added above in the file and -1 for every line deleted above in the file. + # for line_no, line in enumerate(modified_lines, 1): + # print(f"line [{line_no:04}]: {line}") + assert modified_lines[11] == "This is a replacement line." + assert len(modified_lines) == len( + example_fortran_lines + ) # One line deleted, One added + # Added @ 32, so 31, but one line deleted above, so 30 - seeemples + assert modified_lines[30] == "This is an added line." + + +def test_remove_quoted(example_fortran_lines): + for line in example_fortran_lines.copy(): + uncommented_line = remove_quoted(line) + assert '"' not in uncommented_line + assert "'" not in uncommented_line + + +def test_remove_comments(example_fortran_lines): + for line in example_fortran_lines.copy(): + line = remove_quoted(line) + uncommented_line = remove_comments(line) + comment_location = line.find("!") + if comment_location != -1: + assert uncommented_line == line[:comment_location].rstrip() + assert "!" not in uncommented_line + + +def test_concatenate_lines(example_fortran_lines): + for line_no, line in enumerate(example_fortran_lines.copy(), 1): + if line.find("&") >= 0: + concatenated_line = concatenate_lines(example_fortran_lines.copy(), line_no) + assert "&" not in concatenated_line + # This bit has to be a bit hard-wired and is going to break every time the example Fortran file line numbers are changed. If you can think of a better method - go ahead. + if line_no == 31: + expected_line = ( + "SUBROUTINE example (xlen,ylen,l_unscale,input1," + + "input2, output, l_loud_opt)" + ) + assert concatenated_line == expected_line + elif line_no == 70: + # This will look odd as the quoted text has been removed. + expected_line = ( + "my_char " + + " = // " + ) + assert concatenated_line == expected_line + elif line_no == 96: + expected_line = ( + " field2(i, j) = (1.0*i) - (2.0*j) " + + " + (3.0*i*j) + " + + "(4.0*i**2) + field(i, j)*2" + ) + assert concatenated_line == expected_line + + +# ================================================================= + +"""These examples hopefully demonstrate some of the complexity available... + Setting up a test, may involve multiple changes to the demo Fortran file, hence the complex entries in the parametrization. + Each error found is recorded with the line number(s) it was found on using the Error text as a dict key, and the line no(s) as a list, which means finding 'use' and 'Use' in the code would generate 2 different keys in the dict. + Then a single value recording the total number of failures is also included in the 'TestResult' object. + So for the dictionary of errors returned, we have to check they match, which at present involves checking it's 'len' but also that all the keys in one are in the other, i.e. you're not accidentally getting a matching count but different errors. + Then for each error(key) in the dict, you need to check how many lines it occured on, and that the line numbers match, i.e. the list of lines numbers is a match with the expected list of line numbers. + + There has to be a better way..... + As a side note, might it be better to write a function to compare 2 'TestResult' objects, which would be more robust to changes in the structure of the 'TestResult' object, and also make the test code more readable? If so, would its place be here in the testing, or as a method in the 'TestResult' class itself? + """ + +# ================================================================= + + +@pytest.mark.parametrize( + "changes_list, expected_passed, expected_failure_count, expected_errors", + [ + # Invalid: No program unit found (delete MODULE line) + ( + [["replace", 12, ["! No module declaration here"]]], + False, + 1, + { + "First executable line doesn't define an accepted programming unit :" + " IMPLICIT NONE": [0] + }, + ), + # Invalid: Mismatched END statement + ( + [["replace", 121, ["END MODULE wrong_mod_name"]]], + False, + 1, + { + "END statement found for a different program unit: should be example_mod got wrong_mod_name.": [ + 0 + ] + }, + ), + # Invalid: No END statement found + ( + [["replace", 121, ["! Missing END MODULE"]]], + False, + 1, + { + "Last executable line not a matching END statement for the first program unit found.": [ + 0 + ] + }, + ), + # Pass ccp directives #if !defined(MCT) + ( + [["replace", 1, ["#if !defined(MCT)"]], ["add", 121, ["#endif"]]], + True, + 0, + {}, + ), + # Valid: MODULE example_mod ... END MODULE example_mod (no changes) + ([], True, 0, {}), + ], + ids=[ + "No program unit found", + "Mismatched END statement", + "No END statement", + "Valid checking cpp directives pass", + "Valid module with matching END", + ], +) +def test_r3_1_1_there_can_be_only_one( + example_fortran_lines, + changes_list, + expected_passed, + expected_failure_count, + expected_errors, +): + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_1_1_there_can_be_only_one(modified_fortran_lines) + errors = result.errors + for error, lines_list in errors.items(): + assert error in expected_errors + for line_no in lines_list: + assert line_no in expected_errors[error] + assert len(lines_list) == len(expected_errors[error]) + assert len(errors) == len(expected_errors) + assert result.failure_count == expected_failure_count + assert result.passed == expected_passed + + +# ================================================================= + + +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + ["delete", 3, None], + ["delete", 4, None], + ["delete", 5, None], + ["delete", 6, None], + ["delete", 7, None], + ], + 1, + {"missing copyright or crown copyright statement": [0]}, + ), + ([], 0, {}), # No changes, expect no errors + ], + ids=["Missing copyright statement", "copyright statement present"], +) +def test_r3_2_1_check_crown_copyright( + example_fortran_lines, changes_list, expected_result, expected_errors +): + # checker = UMDP3Checker() + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_2_1_check_crown_copyright(modified_fortran_lines) + failure_count = result.failure_count + errors = result.errors + for error, lines_list in errors.items(): + assert error in expected_errors + for line_no in lines_list: + assert line_no in expected_errors[error] + assert len(lines_list) == len(expected_errors[error]) + assert len(errors) == len(expected_errors) + assert failure_count == expected_result + + +# ================================================================= + + +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + [ + "replace", + 73, + [ + ' = "This is a very very very very very very very "' + + " &" + ], + ], + [ + "replace", + 117, + [ + "IF (lhook) CALL dr_hook(ModuleName// " + + '":" // RoutineName, zhook_out, zhook_handle) ! extra comment' + ], + ], + ], + 2, + {"line too long": [73, 117]}, + ), + ([], 0, {}), # No changes, expect no errors + ], + ids=["3 line too long Errors", "No line too long Errors"], +) +def test_r3_3_2_line_too_long( + example_fortran_lines, changes_list, expected_result, expected_errors +): + # checker = UMDP3Checker() + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_3_2_line_too_long(modified_fortran_lines) + failure_count = result.failure_count + errors = result.errors + for error, lines_list in errors.items(): + assert error in expected_errors + for line_no in lines_list: + assert line_no in expected_errors[error] + assert len(lines_list) == len(expected_errors[error]) + assert len(errors) == len(expected_errors) + assert failure_count == expected_result + + +# ================================================================= + + +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + ["replace", 12, ["Module example_mod"]], + ["replace", 39, ["use parkind1, ONLY: jpim, jprb"]], + ["replace", 42, ["use yomhook, ONLY: lhook, dr_hook"]], + ], + 3, + {"lowercase keyword: Module": [12], "lowercase keyword: use": [39, 42]}, + ), + ( + [ + ["add", 44, ["#if defined(SOMETHING)"]], + [ + "add", + 47, + [ + "#else", + "INTEGER, INTENT(INOUT) :: xlen", + "INTEGER, INTENT(OUT) :: ylen", + "LOGICAL, INTENT(IN) :: l_WhoopSies = .FALSE.", + "#endif", + ], + ], + ], + 0, + {}, + ), + ([], 0, {}), # No changes, expect no errors + ], + ids=["3 Lowercase Errors", "Pass cpp directives", "No Lowercase Errors"], +) +def test_r3_4_1_capitalised_keywords( + example_fortran_lines, changes_list, expected_result, expected_errors +): + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_4_1_capitalised_keywords(modified_fortran_lines) + failure_count = result.failure_count + errors = result.errors + for error, lines_list in errors.items(): + assert error in expected_errors + for line_no in lines_list: + assert line_no in expected_errors[error] + assert len(lines_list) == len(expected_errors[error]) + assert len(errors) == len(expected_errors) + assert failure_count == expected_result + + +# ================================================================= + + +# See if you can spot why I hate ruff - it highlights my awful data structures really well. +@pytest.mark.parametrize( + "changes_list, expected_result, expected_errors", + [ + ( + [ + [ + "replace", + 45, + ["INTEGER, INTENT(IN) :: XLEN !Length of first dim of the arrays."], + ], + ["replace", 61, ["REAL :: var1, DAVE_2, HiPPo"]], + ], + 2, + { + 'Found UPPERCASE variable name in declaration at line 45: "XLEN"': [45], + 'Found UPPERCASE variable name in declaration at line 61: "DAVE_2"': [ + 61 + ], + }, + ), + ( + [ + [ + "add", + 59, + ["LOGICAL :: l_whizz_bang = .FALSE. ! optimisation flag"], + ], + ], + 0, + {}, + ), + ( + [ + [ + "add", + 61, + [ + "REAL :: VARIaBLE_1, variable_2, &", + " VARIABLE_3, Hot_Potato, Baked Potato &", + " nice_var, good_var, camelCase, & !comment test", + " CAPS_VAR, CASPVAR, the_ghost", + ], + ], + [ + "replace", + 57, + [ + "INTEGER :: j ! Loop counter &", + "INTEGER :: k ! Loop counter &", + "INTEGER :: IJ ! Loop counter", + ], + ], + [ + "replace", + 45, + [ + "INTEGER, INTENT(IN) :: XLEN !Length of first dimension of the" + + " arrays." + ], + ], + ], + 5, + { + 'Found UPPERCASE variable name in declaration at line 45: "XLEN"': [45], + 'Found UPPERCASE variable name in declaration at line 59: "IJ"': [59], + 'Found UPPERCASE variable name in declaration at line 63: "CASPVAR"': [ + 63 + ], + 'Found UPPERCASE variable name in declaration at line 63: "VARIABLE_3"': [ + 63 + ], + 'Found UPPERCASE variable name in declaration at line 63: "CAPS_VAR"': [ + 63 + ], + }, + ), + ( + [ + ["delete", 48, None], + [ + "replace", + 49, + [ + "REAL, INTENT(IN) :: input1(xlen, ylen), & !First input array", + " input2(XLEN, ylen), !Second input array", + ], + ], + # [], + ], + 0, + {}, + ), + ( + [ + ["delete", 48, None], + [ + "replace", + 49, + [ + "REAL, INTENT(IN) :: input1(xlen, ylen), & !First input array", + " INPUT2(XLEN, ylen), !Second input array", + ], + ], + # [], + ], + 1, + {'Found UPPERCASE variable name in declaration at line 48: "INPUT2"': [48]}, + ), + ([], 0, {}), # No changes, expect no errors + ], + ids=[ + "2 UpperCase Var Errors", + "False FALSE error", + "5 UpperCase Var Errors on extended lines", + "Array dimnensions, twice on an extended line, but no failures", + "Array dimnensions, twice on an extended line, with one failure", + "No UpperCase Var Errors", + ], +) +def test_r3_4_2_no_full_uppercase_variable_names( + example_fortran_lines, changes_list, expected_result, expected_errors +): + modified_fortran_lines = modify_fortran_lines(example_fortran_lines, changes_list) + result = r3_4_2_no_full_uppercase_variable_names(modified_fortran_lines) + failure_count = result.failure_count + errors = result.errors + for error, lines_list in errors.items(): + assert error in expected_errors + for line_no in lines_list: + assert line_no in expected_errors[error] + assert len(lines_list) == len(expected_errors[error]) + assert len(errors) == len(expected_errors) + assert failure_count == expected_result + + +"""TODO: When testing for 'unseparated keywords' remember to test/discard false +ves on +"#endif" or similar. Current (old) test flags these in the wild, but the example file +has no cpp directives...""" diff --git a/script_umdp3_checker/umdp3_checker_rules.py b/script_umdp3_checker/umdp3_checker_rules.py index 5aea535b..eddc9140 100644 --- a/script_umdp3_checker/umdp3_checker_rules.py +++ b/script_umdp3_checker/umdp3_checker_rules.py @@ -11,7 +11,6 @@ import re from typing import List, Dict -from fortran_keywords import fortran_keywords from search_lists import ( obsolescent_intrinsics, unseparated_keywords_list, @@ -27,20 +26,21 @@ Equally, there could probably be more consistency in how things like comments are stripped from the ends of lines and/or full comment lines are skipped. +TODO : This list was checked to ensure it had something for each + test in the original. +TODO : This needs to be re-checked. +TODO : And the tests need to be compared to the original Perl tests + to ensure they are equivalent. +TODO : The functions themselves are being re-named and re-created in umdp3_rules_S3.py, + The new names starte with r2_x_y, where x and y refer to the subsections of the + UMDP3 standards document that they are checking conformance to. """ -# Declare version -VERSION = "13.5.0" - @dataclass class TestResult: """Result from running a single style checker test on a file.""" - """ - TODO : unsure if both output and errors are required. - They make a bit more sense in the 'external_checkers' where - they hold stdout and stderr.""" checker_name: str = "Unnamed Checker" failure_count: int = 0 passed: bool = False @@ -52,64 +52,27 @@ class UMDP3Checker: """UMDP3 compliance checker class""" """ - TODO : This class could possibly be abandoned, or replaced - by a similar class at a different level. Presently only one - instance is created in such a way that the original - _extra_error_info can't be used to hold extra information - at a per file level. resulting in the need to pass error_log - back, which feels like a bodge.""" + TODO : This class should probably be abandoned. Presently only one + instance is created by checker_dispatch_tables (also for the chop) to give access to the dispatch tables, a list of the checks to run grouped by filetype. The checks themselves are being 'shifted' and hopefully improved to umdp3_rules_S3.py.""" # precompiled, regularly used search patterns. comment_line = re.compile(r"!.*$") word_splitter = re.compile(r"\b\w+\b") def __init__(self): - self._extra_error_info = {} """ - TODO: The Perl version had a dodgy looking subroutine to calculate - this, but I can't find where it was called from within the files in - 'bin'. It used all args as a 'list' - searched them for '#include' and - then returned the count as well as adding 1 to this global var if any - were found. - This is either redundant and needs removing, or needs implementing - properly.""" + TODO: The Perl version had a dodgy looking subroutine to calculate + this, but I can't find where it was called from within the files in + 'bin'. It used all args as a 'list' - searched them for '#include' and + then returned the count as well as adding 1 to this global var if any + were found. + This is either redundant and needs removing, or needs implementing + properly.""" self._number_of_files_with_variable_declarations_in_includes = 0 - def reset_extra_error_information(self): - """Reset extra error information : - Appears to be used 'between' blocks of tests such as those on diffs and - those on full files. - """ - self._extra_error_info = {} - - def get_extra_error_information(self) -> Dict: - """ - Get extra error information. Dictionary with file names as the keys. - """ - """ - - TODO: I presume this is what's used when creating the report of the - actual failures and not just the count. However, this information - doesn't seem to be output as yet and will need implementing. - """ - return self._extra_error_info.copy() - - def add_extra_error(self, key: str, value: str = ""): - """Add extra error information to the dictionary""" - """ - TODO: The usefulness of the information added has not been assessed, - nor does it appear to be reported as yet.""" - self._extra_error_info[key] = value - def add_error_log( self, error_log: Dict, key: str = "no key", value: int = 0 ) -> Dict: - """Add extra error information to the dictionary""" - """ - TODO: This is a bodge to get more detailed info about - the errors back to the calling program. The info is - useful, but is currently presented on a per-test basis - rather than a per-file which would be easier to read - and make use of.""" + """Add error information to the error log dictionary""" if key not in error_log: error_log[key] = [] error_log[key].append(value) @@ -125,11 +88,7 @@ def get_include_number(self) -> int: def remove_quoted(self, line: str) -> str: """Remove quoted strings from a line""" - """ - TODO: The original version replaced the quoted sections with a - "blessed reference", presumably becuase they were 're-inserted' at some - stage. No idea if that capability is still required.""" - # Simple implementation - remove single and double quoted strings + # # Simple implementation - remove single and double quoted strings result = line # Remove double quoted strings @@ -142,79 +101,9 @@ def remove_quoted(self, line: str) -> str: """Test functions : Each accepts a list of 'lines' to search and returns a - TestResult object containing all the information.""" + TestResult object containing all the information. + The list of lines is often assumed to be the whole file. """ - TODO: One thought here is each test should also be told whether it's - being passed the contents of a full file, or just a selection of lines - involved in a change as some of the tests appear to really only be useful - if run on a full file (e.g. the Implicit none checker). Thus if only passed - a selection of lines, these tests could be skipped/return 'pass' - regardless. - Although, a brief look seems to imply that there are two 'dispatch tables' - one for full files and one for changed lines.""" - - def capitulated_keywords(self, lines: List[str]) -> TestResult: - """A fake test, put in for testing purposes. - Probably not needed any more, but left in case.""" - failures = 0 - line_count = 0 - error_log = {} - # print("Debug: In capitulated_keywords test") - for line in lines: - line_count += 1 - # Remove quoted strings and comments - if line.lstrip(" ").startswith("!"): - continue - clean_line = self.remove_quoted(line) - clean_line = self.comment_line.sub("", clean_line) - - # Check for lowercase keywords - for word in self.word_splitter.findall(clean_line): - upcase = word.upper() - if upcase in fortran_keywords and word != upcase: - self.add_extra_error(f"lowercase keyword: {word}") - error_log = self.add_error_log( - error_log, f"capitulated keyword: {word}", line_count - ) - failures += 1 - - return TestResult( - checker_name="Capitulated Keywords", - failure_count=failures, - passed=(failures == 0), - output=f"Checked {line_count} lines, found {failures} failures.", - errors=error_log, - ) - - def capitalised_keywords(self, lines: List[str]) -> TestResult: - """Check for the presence of lowercase Fortran keywords, which are - taken from an imported list 'fortran_keywords'.""" - failures = 0 - error_log = {} - count = -1 - for count, line in enumerate(lines): - # Remove quoted strings and comments - if line.lstrip(" ").startswith("!"): - continue - clean_line = self.remove_quoted(line) - clean_line = self.comment_line.sub("", clean_line) - # Check for lowercase keywords - for word in self.word_splitter.findall(clean_line): - upcase = word.upper() - if upcase in fortran_keywords and word != upcase: - self.add_extra_error(f"lowercase keyword: {word}") - failures += 1 - error_log = self.add_error_log( - error_log, f"lowercase keyword: {word}", count + 1 - ) - - return TestResult( - checker_name="Capitalised Keywords", - failure_count=failures, - passed=(failures == 0), - output=f"Checked {count + 1} lines, found {failures} failures.", - errors=error_log, - ) def openmp_sentinels_in_column_one(self, lines: List[str]) -> TestResult: """Check OpenMP sentinels are in column one""" @@ -223,7 +112,6 @@ def openmp_sentinels_in_column_one(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"^\s+!\$OMP", line): - self.add_extra_error("OpenMP sentinel not in column 1") failures += 1 error_log = self.add_error_log( error_log, "OpenMP sentinel not in column 1:", count + 1 @@ -246,9 +134,10 @@ def unseparated_keywords(self, lines: List[str]) -> TestResult: if line.lstrip(" ").startswith("!"): continue clean_line = self.remove_quoted(line) - for pattern in [f"\\b{kw}\\b" for kw in unseparated_keywords_list]: + clean_line = self.comment_line.sub("", clean_line).strip() + # The [^#] is to avoid false positives on #endif and similar. + for pattern in [f"([^#]|^)\\b{kw}\\b" for kw in unseparated_keywords_list]: if re.search(pattern, clean_line, re.IGNORECASE): - self.add_extra_error(f"unseparated keyword in line: {line.strip()}") failures += 1 error_log = self.add_error_log( error_log, @@ -271,12 +160,11 @@ def go_to_other_than_9999(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): clean_line = self.remove_quoted(line) - clean_line = re.sub(r"!.*$", "", clean_line) + clean_line = self.comment_line.sub("", clean_line).strip() if match := re.search(r"\bGO\s*TO\s+(\d+)", clean_line, re.IGNORECASE): label = match.group(1) if label != "9999": - self.add_extra_error(f"GO TO {label}") failures += 1 error_log = self.add_error_log( error_log, f"GO TO {label}", count + 1 @@ -300,7 +188,6 @@ def write_using_default_format(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bWRITE\s*\(\s*\*\s*,\s*\*\s*\)", clean_line, re.IGNORECASE): - self.add_extra_error("WRITE(*,*) found") failures += 1 error_log = self.add_error_log(error_log, "WRITE(*,*) found", count + 1) output = f"Checked {count + 1} lines, found {failures} failures." @@ -312,48 +199,6 @@ def write_using_default_format(self, lines: List[str]) -> TestResult: errors=error_log, ) - def lowercase_variable_names(self, lines: List[str]) -> TestResult: - """Check for lowercase or CamelCase variable names only""" - """ - TODO: This is a very simplistic check and will not detect many - cases which break UMDP3. I suspect the Perl Predecessor concatenated - continuation lines prior to 'cleaning' and checking. Having identified - a declaration, it also then scanned the rest of the file for that - variable name in any case.""" - failures = 0 - error_log = {} - count = -1 - for count, line in enumerate(lines, 1): - clean_line = self.remove_quoted(line) - clean_line = re.sub(r"!.*$", "", clean_line) - - # Simple check for UPPERCASE variable declarations - if re.search( - r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*[A-Z_]+", - clean_line, - re.IGNORECASE, - ): - clean_line = re.sub( - r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*", - "", - clean_line, - ) - if match := re.search(r"([A-Z]{2,})", clean_line): - self.add_extra_error(f"UPPERCASE variable name : {match[1]}") - failures += 1 - error_log = self.add_error_log( - error_log, f"UPPERCASE variable name {match[1]}", count - ) - - output = f"Checked {count + 1} lines, found {failures} failures." - return TestResult( - checker_name="Lowercase or CamelCase variable names only", - failure_count=failures, - passed=(failures == 0), - output=output, - errors=error_log, - ) - def dimension_forbidden(self, lines: List[str]) -> TestResult: """Check for use of dimension attribute""" failures = 0 @@ -364,7 +209,6 @@ def dimension_forbidden(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bDIMENSION\b", clean_line, re.IGNORECASE): - self.add_extra_error("DIMENSION attribute used") failures += 1 error_log = self.add_error_log( error_log, "DIMENSION attribute used", count + 1 @@ -386,7 +230,6 @@ def ampersand_continuation(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"^\s*&", line): - self.add_extra_error("continuation line starts with &") failures += 1 error_log = self.add_error_log( error_log, "continuation line starts with &", count + 1 @@ -413,7 +256,6 @@ def forbidden_keywords(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\b(EQUIVALENCE|PAUSE)\b", clean_line, re.IGNORECASE): - self.add_extra_error("forbidden keyword") failures += 1 error_log = self.add_error_log( error_log, "forbidden keyword", count + 1 @@ -440,7 +282,6 @@ def forbidden_operators(self, lines: List[str]) -> TestResult: for op in old_operators: if op in clean_line.upper(): - self.add_extra_error(f"old operator {op}") failures += 1 error_log = self.add_error_log( error_log, f"old operator {op}", count + 1 @@ -454,25 +295,6 @@ def forbidden_operators(self, lines: List[str]) -> TestResult: errors=error_log, ) - def line_over_80chars(self, lines: List[str]) -> TestResult: - """Check for lines longer than 80 characters""" - failures = 0 - error_log = {} - count = -1 - for count, line in enumerate(lines): - if len(line.rstrip()) > 80: - self.add_extra_error("line too long") - failures += 1 - error_log = self.add_error_log(error_log, "line too long", count + 1) - - return TestResult( - checker_name="Line longer than 80 characters", - failure_count=failures, - passed=(failures == 0), - output=f"Checked {count + 1} lines, found {failures} failures.", - errors=error_log, - ) - def tab_detection(self, lines: List[str]) -> TestResult: """Check for tab characters""" failures = 0 @@ -480,7 +302,6 @@ def tab_detection(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if "\t" in line: - self.add_extra_error("tab character found") failures += 1 error_log = self.add_error_log( error_log, "tab character found", count + 1 @@ -501,7 +322,6 @@ def printstatus_mod(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"\bUSE\s+printstatus_mod\b", line, re.IGNORECASE): - self.add_extra_error("printstatus_mod used") failures += 1 error_log = self.add_error_log( error_log, "printstatus_mod used", count + 1 @@ -525,7 +345,6 @@ def printstar(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bPRINT\s*\*", clean_line, re.IGNORECASE): - self.add_extra_error("PRINT * used") failures += 1 error_log = self.add_error_log(error_log, "PRINT * used", count + 1) @@ -547,7 +366,6 @@ def write6(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bWRITE\s*\(\s*6\s*,", clean_line, re.IGNORECASE): - self.add_extra_error("WRITE(6) used") failures += 1 error_log = self.add_error_log(error_log, "WRITE(6) used", count + 1) @@ -566,7 +384,6 @@ def um_fort_flush(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"\bum_fort_flush\b", line): - self.add_extra_error("um_fort_flush used") failures += 1 error_log = self.add_error_log( error_log, "um_fort_flush used", count + 1 @@ -586,7 +403,6 @@ def svn_keyword_subst(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"\$\w+\$", line): - self.add_extra_error("SVN keyword substitution") failures += 1 error_log = self.add_error_log( error_log, "SVN keyword substitution", count + 1 @@ -606,7 +422,6 @@ def omp_missing_dollar(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"!\s*OMP\b", line) and not re.search(r"!\$OMP", line): - self.add_extra_error("!OMP without $") failures += 1 error_log = self.add_error_log(error_log, "!OMP without $", count + 1) @@ -625,7 +440,6 @@ def cpp_ifdef(self, lines: List[str]) -> TestResult: count = -1 for count, line in enumerate(lines): if re.search(r"^\s*#\s*if(n)?def\b", line): - self.add_extra_error("#ifdef/#ifndef used") failures += 1 error_log = self.add_error_log( error_log, "#ifdef/#ifndef used", count + 1 @@ -653,7 +467,6 @@ def cpp_comment(self, lines: List[str]) -> TestResult: ) or re.search(r"^\s*#(else) *(.*)", line) if match: if re.search(r".*!", match.group(2)): - self.add_extra_error("Fortran comment in CPP directive") failures += 1 error_log = self.add_error_log( error_log, "Fortran comment in CPP directive", count + 1 @@ -678,7 +491,6 @@ def obsolescent_fortran_intrinsic(self, lines: List[str]) -> TestResult: for intrinsic in obsolescent_intrinsics: if re.search(rf"\b{intrinsic}\b", clean_line, re.IGNORECASE): - self.add_extra_error(f"obsolescent intrinsic: {intrinsic}") failures += 1 error_log = self.add_error_log( error_log, f"obsolescent intrinsic: {intrinsic}", count + 1 @@ -702,7 +514,6 @@ def exit_stmt_label(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\bEXIT\s*$", clean_line, re.IGNORECASE): - self.add_extra_error("unlabelled EXIT statement") failures += 1 error_log = self.add_error_log( error_log, "unlabelled EXIT statement", count + 1 @@ -730,7 +541,6 @@ def intrinsic_modules(self, lines: List[str]) -> TestResult: if re.search( rf"\bUSE\s+(::)*\s*{module}\b", clean_line, re.IGNORECASE ) and not re.search(r"\bINTRINSIC\b", clean_line, re.IGNORECASE): - self.add_extra_error(f"intrinsic module {module} without INTRINSIC") failures += 1 error_log = self.add_error_log( error_log, @@ -758,7 +568,6 @@ def read_unit_args(self, lines: List[str]) -> TestResult: if match := re.search(r"\bREAD\s*\(\s*([^,)]+)", clean_line, re.IGNORECASE): first_arg = match.group(1).strip() if not first_arg.upper().startswith("UNIT="): - self.add_extra_error("READ without explicit UNIT=") failures += 1 error_log = self.add_error_log( error_log, "READ without explicit UNIT=", count + 1 @@ -794,7 +603,6 @@ def retire_if_def(self, lines: List[str]) -> TestResult: # SYMBOL = [x for x in match.groups() if x] # reduce to a # list of 1 element if match.group(1) in retired_ifdefs: - self.add_extra_error(f"retired if-def: {match.group(1)}") failures += 1 error_log = self.add_error_log( error_log, f"retired if-def: {match.group(1)}", count + 1 @@ -817,7 +625,6 @@ def implicit_none(self, lines: List[str]) -> TestResult: break if no_implicit_none: - self.add_extra_error("missing IMPLICIT NONE") error_log = self.add_error_log( error_log, "No IMPLICIT NONE found in file", 0 ) @@ -840,7 +647,6 @@ def forbidden_stop(self, lines: List[str]) -> TestResult: clean_line = re.sub(r"!.*$", "", clean_line) if re.search(r"\b(STOP|CALL\s+abort)\b", clean_line, re.IGNORECASE): - self.add_extra_error("STOP or CALL abort used") failures += 1 error_log = self.add_error_log( error_log, "STOP or CALL abort used", count + 1 @@ -874,7 +680,6 @@ def intrinsic_as_variable(self, lines: List[str]) -> TestResult: clean_line, re.IGNORECASE, ): - self.add_extra_error("intrinsic function used as variable") failures += 1 error_log = self.add_error_log( error_log, "intrinsic function used as variable", count + 1 @@ -888,34 +693,6 @@ def intrinsic_as_variable(self, lines: List[str]) -> TestResult: errors=error_log, ) - def check_crown_copyright(self, lines: List[str]) -> TestResult: - """Check for crown copyright statement""" - """ - TODO: This is a very simplistic check and will not detect many - cases which break UMDP3. I suspect the Perl Predeccessor - did much more convoluted tests""" - comment_lines = [ - line.upper() for line in lines if line.lstrip(" ").startswith("!") - ] - file_content = "\n".join(comment_lines) - error_log = {} - found_copyright = False - if "CROWN COPYRIGHT" in file_content or "COPYRIGHT" in file_content: - found_copyright = True - - if not found_copyright: - self.add_extra_error("missing copyright or crown copyright statement") - error_log = self.add_error_log( - error_log, "missing copyright or crown copyright statement", 0 - ) - return TestResult( - checker_name="Crown Copyright Statement", - failure_count=0 if found_copyright else 1, - passed=found_copyright, - output="Checked for crown copyright statement.", - errors=error_log, - ) - def check_code_owner(self, lines: List[str]) -> TestResult: """Check for correct code owner comment""" """ @@ -935,7 +712,6 @@ def check_code_owner(self, lines: List[str]) -> TestResult: # This is often a warning rather than an error if not found_code_owner: - self.add_extra_error("missing code owner comment") error_log = self.add_error_log(error_log, "missing code owner comment", 0) return TestResult( checker_name="Code Owner Comment", @@ -957,7 +733,6 @@ def array_init_form(self, lines: List[str]) -> TestResult: for count, line in enumerate(lines): clean_line = self.remove_quoted(line) if re.search(r"\(/.*?\/\)", clean_line): - self.add_extra_error("old array initialization form (/ /)") failures += 1 error_log = self.add_error_log( error_log, "old array initialization form (/ /)", count + 1 @@ -977,7 +752,6 @@ def line_trail_whitespace(self, lines: List[str]) -> TestResult: error_log = {} for count, line in enumerate(lines): if re.search(r"\s+$", line): - self.add_extra_error("trailing whitespace") failures += 1 error_log = self.add_error_log( error_log, "trailing whitespace", count + 1 @@ -997,7 +771,6 @@ def c_integral_format_specifiers(self, lines: List[str]) -> int: failures = 0 for line in lines: if re.search(r'%\d+[dioxX]"', line): - self.add_extra_error("missing space in format specifier") failures += 1 return failures @@ -1008,7 +781,6 @@ def c_deprecated(self, lines: List[str]) -> int: for line in lines: for identifier in deprecated_c_identifiers: if re.search(rf"\b{identifier}\b", line): - self.add_extra_error(f"deprecated C identifier: {identifier}") failures += 1 return failures @@ -1019,9 +791,6 @@ def c_openmp_define_pair_thread_utils(self, lines: List[str]) -> int: for line in lines: if re.search(r"#\s*if.*_OPENMP", line): if not re.search(r"SHUM_USE_C_OPENMP_VIA_THREAD_UTILS", line): - self.add_extra_error( - "_OPENMP without SHUM_USE_C_OPENMP_VIA_THREAD_UTILS" - ) failures += 1 return failures @@ -1035,7 +804,6 @@ def c_openmp_define_no_combine(self, lines: List[str]) -> int: ) or re.search( r"&&.*_OPENMP.*&&.*SHUM_USE_C_OPENMP_VIA_THREAD_UTILS", line ): - self.add_extra_error("OpenMP defines combined with third macro") failures += 1 return failures @@ -1045,7 +813,6 @@ def c_openmp_define_not(self, lines: List[str]) -> int: failures = 0 for line in lines: if re.search(r"!\s*defined\s*\(\s*_OPENMP\s*\)", line): - self.add_extra_error("!defined(_OPENMP) used") failures += 1 return failures @@ -1064,7 +831,6 @@ def c_protect_omp_pragma(self, lines: List[str]) -> int: r"#\s*include\s*", line ): if not in_openmp_block: - self.add_extra_error("unprotected OMP pragma/include") failures += 1 return failures @@ -1074,7 +840,6 @@ def c_ifdef_defines(self, lines: List[str]) -> int: failures = 0 for line in lines: if re.search(r"^\s*#\s*ifdef\b", line): - self.add_extra_error("#ifdef used instead of #if defined()") failures += 1 return failures @@ -1082,7 +847,6 @@ def c_ifdef_defines(self, lines: List[str]) -> int: def c_final_newline(self, lines: List[str]) -> int: """Check C unit ends with final newline""" if lines and not lines[-1].endswith("\n"): - self.add_extra_error("missing final newline") return 1 return 0 diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py index a08727bd..33b55187 100644 --- a/script_umdp3_checker/umdp3_conformance.py +++ b/script_umdp3_checker/umdp3_conformance.py @@ -7,11 +7,12 @@ import subprocess from abc import ABC, abstractmethod from pathlib import Path -from typing import Callable, List, Dict, Set, Optional +from typing import Callable, List, Set, Optional from dataclasses import dataclass, field import argparse from checker_dispatch_tables import CheckerDispatchTables from umdp3_checker_rules import TestResult +from umdp3_rules_S3 import list_O_tests import concurrent.futures # Add custom modules to Python path if needed @@ -25,13 +26,11 @@ conformance, and to run relevant style checkers on those files. """ - -ALLOWABLE_FILE_TYPES = ["Fortran", "Python", "Generic"] +ALLOWABLE_FILE_TYPES = ["Fortran", "Python", "AnyFile"] GROUP_FILE_TYPES = { "CI": {"Fortran", "Python"}, "ALL": set(ALLOWABLE_FILE_TYPES), } -# TODO: Generic /probably/ needs renaming. @dataclass @@ -123,30 +122,18 @@ class StyleChecker: The checks are a dictionary of named callable routines. The files are a list of file paths to check.""" - """ - TODO: This is where it might be good to set up a threadsafe - class instance to hold the 'expanded' check outputs. - One for each file being checked in parallel. - Curently the UMDP3 class holds "_extra_error_info" which - was used to provide more detailed error logging. - However, this is not threadsafe, so in a multithreaded - environment, the extra error info could get mixed up between - different files being checked in parallel. - For now, I've modified the UMDP3 class methods to return - a TestResult object directly, which includes the extra error - info, so that each thread can work independently.""" name: str - check_functions: Dict[str, Callable] + check_functions: list[Callable] files_to_check: List[Path] def __init__( self, name: str, - check_functions: Dict[str, Callable], + check_functions: list[Callable], changed_files: List[Path], ): self.name = name - self.check_functions = check_functions or {} + self.check_functions = check_functions or [] self.files_to_check = changed_files or [] def get_name(self) -> str: @@ -156,7 +143,7 @@ def check(self, file_path: Path) -> CheckResult: """Run UMDP3 check function on file.""" lines = file_path.read_text().splitlines() file_results = [] # list of TestResult objects - for check_name, check_function in self.check_functions.items(): + for check_function in self.check_functions: file_results.append(check_function(lines)) tests_failed = sum([0 if result.passed else 1 for result in file_results]) @@ -167,12 +154,20 @@ def check(self, file_path: Path) -> CheckResult: test_results=file_results, ) + def report(self, print_volume: int = 3) -> None: + """Print summary of results for this checker.""" + if print_volume >= 3: + print( + f'Checker "{self.name}" checking {len(self.files_to_check)} files ' + f"with {len(self.check_functions)} checks." + ) + @classmethod def from_full_list( cls, name: str, file_extensions: Set[str], - check_functions: Dict[str, Callable], + check_functions: list[Callable], changed_files: List[Path], print_volume: int = 3, ): @@ -208,11 +203,11 @@ def create_external_runners( ) -> "StyleChecker": """Create a StyleChecker instance filtering files from a full list.""" filtered_files = cls.filter_files(all_files, file_extensions) - check_functions = {} + check_functions = [] for command in commands: - external_opname = f"External_operation_{command[0]}" + external_opname = f"{command[0]}" free_runner = cls.create_free_runner(command, external_opname) - check_functions[external_opname] = free_runner + check_functions.append(free_runner) return cls(name, check_functions, filtered_files) @staticmethod @@ -269,7 +264,7 @@ class Check_Runner(StyleChecker): def check(self, file_path: Path) -> CheckResult: """Run external command on file.""" file_results = [] # list of TestResult objects - for check_name, check_function in self.check_functions.items(): + for check_function in self.check_functions: file_results.append(check_function(file_path)) tests_failed = sum([0 if result.passed else 1 for result in file_results]) return CheckResult( @@ -283,6 +278,10 @@ def check(self, file_path: Path) -> CheckResult: class ConformanceChecker: """Main framework for running style checks in parallel.""" + checkers: List[StyleChecker] + max_workers: int + results: List[CheckResult] + def __init__( self, checkers: List[StyleChecker], @@ -290,6 +289,7 @@ def __init__( ): self.checkers = checkers self.max_workers = max_workers + self.results = [] def check_files(self) -> None: """Run all checkers on given files in parallel. @@ -304,10 +304,6 @@ def check_files(self) -> None: for now. """ """ - TODO : Might be good to have a threadsafe object for each file and - allow multiple checks to be run at once on that file. - """ - """ TODO : Poor terminology makes discerning what is actually happening here hard work. A 'checker' is an instance of the StyleChecker class. Each of which has a list of checks to perform and a list of @@ -318,13 +314,6 @@ def check_files(self) -> None: filename will appear multiple times in the output. Not Good! """ results = [] - - """TODO : The current implementation creates a thread for each - (checker, file), however a chat with Ollie suggests a better approach - would be to switch to multiprocessing and create a pool of workers, and - then have each worker run all the checks for a given file. This would - reduce the overhead of creating threads and allow for better use of - resources.""" with concurrent.futures.ProcessPoolExecutor( max_workers=self.max_workers ) as executor: @@ -333,8 +322,9 @@ def check_files(self) -> None: for checker in self.checkers for file_path in checker.files_to_check } - # TODO : This next loop could be used to process the individual results as - # they come in, rather than waiting for all to complete. + """TODO : This next loop could be used to process the individual results as + they come in, rather than waiting for all to complete. For example, sorting + the error logs into a dictionary with the file_name as the key.""" for future in concurrent.futures.as_completed(future_to_task): result = future.result() results.append(result) @@ -344,11 +334,6 @@ def check_files(self) -> None: def print_results(self, print_volume: int = 3, quiet_pass: bool = True) -> bool: """Print results and return True if all checks passed. ========================================================""" - """ - TODO: If an object encapsulating the data for each file is created - it should contain the "in depth" printing method for file data. - With this method presenting the summary and then looping over - each file object to print its details at the desired verbosity.""" all_passed = True for result in self.results: file_status = "✓ PASS" if result.all_passed else "✗ FAIL" @@ -460,6 +445,36 @@ def line_2(length: int = 80) -> str: return "-" * length +def print_in_box_a(text: list[str], width: int = 80) -> None: + """Helper function to print text in a box.""" + print("+" + "-" * (width - 2) + "+") + for line in text: + print("| " + line.ljust(width - 4) + " |") + print("+" + "-" * (width - 2) + "+") + + +def print_in_box_b( + text: list[str], width: int = 80, justification: str = "left" +) -> None: + """Another Helper function to print text in a box.""" + print(line_1(width)) + for line in text: + total_padding = width - 6 - len(line) + if justification == "left": + left_padding = 0 + right_padding = total_padding + elif justification == "right": + left_padding = total_padding + right_padding = 0 + elif justification == "center": + left_padding = total_padding // 2 + right_padding = total_padding - left_padding + else: + raise ValueError("Invalid justification: " + justification) + print("## " + " " * left_padding + line + " " * right_padding + " ##") + print(line_1(width) + "\n") + + def which_cms_is_it(path: str, print_volume: int = 3) -> CMSSystem: """Determine which CMS is in use based on the presence of certain files.""" repo_path = Path(path) @@ -474,8 +489,6 @@ def which_cms_is_it(path: str, print_volume: int = 3) -> CMSSystem: raise RuntimeError("Unknown CMS type at path: " + str(path)) branch_name = cms.get_branch_name() if not cms.is_branch(): - # TODO : This /might/ be better as a raise ValueError to allow - # printing the help message, but for now just print and exit. print( f"The path {path} is not a branch." f"\nReported branch name is : {branch_name}" @@ -484,7 +497,7 @@ def which_cms_is_it(path: str, print_volume: int = 3) -> CMSSystem: f"Please try switching on the full check option" ) # Soft exit mainly so nightly testing on main doesn't flag failure. - exit(0) + sys.exit(0) else: if print_volume >= 2: print(f"Found branch, {branch_name}, at path {path}.") @@ -531,13 +544,18 @@ def create_style_checkers( file_extensions = {".f", ".for", ".f90", ".f95", ".f03", ".f08", ".F90"} """ TODO : I /think/ the old version also checked '.h' files as Fortran. - Not sure if that is still needed.""" + Not sure if that is still needed. - Probably, but some of them are C source + files""" fortran_diff_table = dispatch_tables.get_diff_dispatch_table_fortran() fortran_file_table = dispatch_tables.get_file_dispatch_table_fortran() generic_file_table = dispatch_tables.get_file_dispatch_table_all() if print_volume >= 3: print("Configuring Fortran checkers:") - combined_checkers = fortran_diff_table | fortran_file_table | generic_file_table + combined_checkers = [] + combined_checkers.extend(fortran_diff_table) + combined_checkers.extend(fortran_file_table) + combined_checkers.extend(generic_file_table) + combined_checkers.extend(list_O_tests) fortran_file_checker = StyleChecker.from_full_list( "Fortran Checker", file_extensions, combined_checkers, changed_files ) @@ -560,12 +578,12 @@ def create_style_checkers( file_extensions, ) checkers.append(python_file_checker) - if "Generic" in file_types or file_types == []: + if "AnyFile" in file_types or file_types == []: if print_volume >= 3: - print("Configuring Generic File Checkers:") + print("Configuring AnyFile File Checkers:") all_file_dispatch_table = dispatch_tables.get_file_dispatch_table_all() generic_checker = StyleChecker( - "Generic File Checker", all_file_dispatch_table, changed_files + "AnyFile File Checker", all_file_dispatch_table, changed_files ) checkers.append(generic_checker) @@ -595,18 +613,26 @@ def get_files_to_check( all_files = [f.relative_to(path) for f in repo_path.rglob("*") if f.is_file()] if print_volume >= 1: print("Full check override enabled.") - if print_volume >= 3: + if print_volume >= 4: print( - f" Found {len(all_files)} files to " - f"check in repository at path: {path}" + f" Found a total of {len(all_files)} files " + f"in directory at path: {path}\n" + " These will be filtered by the checkers based on their file" + " extension" ) - return all_files else: # Configure CMS, and check we've been passed a branch if print_volume >= 1: print("Using a CMS to determine changed files.") cms = which_cms_is_it(path, print_volume) - changed_files = cms.get_changed_files() - return changed_files + all_files = cms.get_changed_files() + if print_volume >= 4: + print( + f" CMS indicates {len(all_files)} files " + f"changed in repository at path: {path}\n" + " These will be filtered by the checkers based on their file" + " extension" + ) + return all_files # Usage when run from command line. @@ -620,18 +646,14 @@ def get_files_to_check( full_file_paths = [Path(args.path) / f for f in file_paths] # Configure checkers - """ - TODO : Uncertain as to how flexible this needs to be. - For now, just configure checkers based on file type requested. - Later, could add configuration files to specify which - checkers to use for each file type.""" - active_checkers = create_style_checkers(args.file_types, full_file_paths) + for checker in active_checkers: + checker.report(log_volume) - # TODO : Could create a conformance checker for each - # file type. - # Currently, just create a single conformance checker - # with all active checkers. + """TODO : Could create a conformance checker for each + file type. + Currently, just create a single conformance checker + with all active checkers.""" checker = ConformanceChecker( active_checkers, max_workers=args.max_workers, @@ -640,17 +662,16 @@ def get_files_to_check( checker.check_files() if log_volume >= 3: - print(line_1(81)) - print("## Results :" + " " * 67 + "##") - print(line_1(81) + "\n") + print_in_box_b(["Results :"], 81) else: print("Results :") all_passed = checker.print_results(print_volume=log_volume, quiet_pass=quiet_pass) - if log_volume >= 4: - print("\n" + line_1(81)) - print("## Summary :" + " " * 67 + "##") - print(line_1(81)) - print(f"Total files checked: {len(checker.results)}") - print(f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}") + output = [ + "Summary :", + f" Total files checked: {len(checker.results)}", + " " + + f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}", + ] + print_in_box_a(output, width=81) exit(0 if all_passed else 1) diff --git a/script_umdp3_checker/umdp3_rules_S3.py b/script_umdp3_checker/umdp3_rules_S3.py new file mode 100644 index 00000000..f94396d7 --- /dev/null +++ b/script_umdp3_checker/umdp3_rules_S3.py @@ -0,0 +1,459 @@ +# ----------------------------------------------------------------------------- +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +# ----------------------------------------------------------------------------- + +"""This is the storage location for rules based on what's defined as the standard in the UMDP: 003 document. These rules are not necessarily the same as the rules defined in the UMDP: 003 document, but they are based on them. The rules in this file are used by the umdp3_checker script to check for compliance with the UMDP: 003 standard. +For now, the document has been copied into this file as a placeholder for the rules as they appear.""" + +import re +from typing import List, Dict +from umdp3_checker_rules import TestResult +from fortran_keywords import fortran_keywords + +"""ToDo: This lot will need putting back as and when the tests that use them get imported/re-created""" +# from fortran_keywords import fortran_keywords +# from search_lists import ( +# obsolescent_intrinsics, +# unseparated_keywords_list, +# retired_ifdefs, +# deprecated_c_identifiers, +# ) +# from dataclasses import dataclass, field +# from script_umdp3_checker import fortran_keywords + +comment_line = re.compile(r"!.*$") +cpp_command_line = re.compile(r"^#.*$") +word_splitter = re.compile(r"\b\w+\b") + + +def add_error_log(error_log: Dict, key: str = "no key", value: int = 0) -> Dict: + """Add error information to the dictionary. + This adds the error statement, some are more unique than others, along with the + line number the error occurs on. Line number(s) are stored in a list in case the + same error occurs multiple times in a file.""" + if key not in error_log: + error_log[key] = [] + error_log[key].append(value) + return error_log + + +def remove_quoted(line: str) -> str: + """Remove quoted strings from a line""" + """ + TODO: The original version replaced the quoted sections with a + "blessed reference", presumably becuase they were 're-inserted' at some + stage. No idea if that capability is still required.""" + # Simple implementation - remove single and double quoted strings + result = line + + # Remove double quoted strings + result = re.sub(r'"[^"]*"', "", result) + + # Remove single quoted strings + result = re.sub(r"'[^']*'", "", result) + + return result + + +"""This is in case I return to the idea of replacing the quoted sections from above +with something unique and storing them in order to put them back at some stage...""" + + +def create_unique_random_string(storage_set, length: int = 7) -> str: + """Create a unique random string of a given length. + Creates a random string of given length and ensures it hasn't been used before.""" + import random + import string + + new_value = "".join(random.choices(string.ascii_letters + string.digits, k=length)) + while new_value in storage_set: + new_value = "".join( + random.choices(string.ascii_letters + string.digits, k=length) + ) + storage_set.add(new_value) + return new_value + + +def remove_comments(line: str) -> str: + """Remove comments from the lines : + There is a bit of an assumption here that quoted text has already been removed, so + that we don't accidentally remove text after an "!" in a string.""" + return comment_line.sub("", line).rstrip() + + +def remove_cpp_commands(line: str) -> str: + """Remove cpp commands from the lines : + There is a bit of an assumption here that quoted text has already been removed, so that we don't accidentally remove text after an "#" in a string. + Also that cpp commands have the '#' in col 1 of the line.""" + return cpp_command_line.sub("", line).rstrip() + + +def concatenate_lines(lines: List[str], line_no: int) -> str: + """Concatenate the continuation lines into a single string""" + # Find first line and check for continuation character. + line = lines[line_no - 1] + line = remove_comments(line) + line = remove_quoted(line) + while line.rstrip().endswith("&"): + line = line.rstrip()[:-1] # Remove the continuation character + line_no += 1 + if line_no > len(lines): + break # Avoid going out of bounds + next_line = lines[line_no - 1] + next_line = remove_comments(next_line) + next_line = remove_quoted(next_line) + line += ( + next_line.lstrip() + ) # Concatenate with the next line, removing leading whitespace + return line + + +""" +3.1 Source files should only contain a single program unit +""" + + +def r3_1_1_there_can_be_only_one( + lines: List[str], +) -> TestResult: # ..one programming unit in a file, that is. + """Check for multiple program units in a file: + Given that a MODULE or a PROGRAM can contain multiples of the other program units, + just going to confirm the first one decalred is the last one ENDed.""" + program_unit_keywords = {"PROGRAM", "MODULE", "SUBROUTINE", "FUNCTION"} + + # unsure if "TYPE" should be included. + def find_first(lines: List[str]) -> tuple[bool, str]: + for line in lines: + executable_line = remove_comments(line).strip() + executable_line = remove_cpp_commands(executable_line).strip() + if not executable_line: + continue # Skip empty lines + for keyword in program_unit_keywords: + unit_name_search = re.search(rf"{keyword}\s+(\w+)", executable_line) + if unit_name_search: + unit_type = keyword + unit_name = unit_name_search.group(1) + return (True, f"{unit_type} {unit_name}") + return ( + False, + "First executable line doesn't define an accepted programming unit" + f" : {line}", + ) + return (False, "No program unit found.") + + def find_last(lines: List[str], unit_type: str, unit_name: str) -> tuple[bool, str]: + for line in reversed(lines): + executable_line = remove_comments(line).strip() + executable_line = remove_cpp_commands(executable_line).strip() + if not executable_line: + continue # Skip empty lines + unit_name_search = re.search(rf"END\s+{unit_type}\s+(\w+)", executable_line) + if unit_name_search: + if unit_name_search.group(1) == unit_name: + return (True, "") + else: + return ( + False, + "END statement found for a different program unit: " + f"should be {unit_name} got {unit_name_search.group(1)}.", + ) + else: + return ( + False, + "Last executable line not a matching END statement for the first program unit found.", + ) + return (False, "No matching END statement found for the first program unit.") + + found_first, first_result = find_first(lines) + if not found_first: + failure_count = 1 + found_last = False + error_log = add_error_log({}, first_result, 0) + else: + unit_type, unit_name = first_result.split() + found_last, last_result = find_last(lines, unit_type, unit_name) + if found_last: + failure_count = 0 + error_log = {} + else: + failure_count = 1 + error_log = add_error_log({}, last_result, 0) + return TestResult( + checker_name="Test 3.1 Only One Program Unit", + failure_count=failure_count, + passed=found_first and found_last, + output="Checked for multiple program units.", + errors=error_log, + ) + + +""" +* TODO: Modules may be used to group related variables, subroutines and functions. + - Really unsure as to how to test that's what they're doing... +* TODO: Each separate file within the source tree should be uniquely named. + - Not sure that's possible/easy within the current framework to process one file at a time... +* TODO: The name of the file should reflect the name of the programming unit. +* TODO: Multiple versions of the same file should be named filename-#ver where #ver is the section/version number (e.g. 1a,2a,2b. . . ). +* TODO: You should avoid naming your program units and variables with names that match an intrinsic FUNCTION, SUBROUTINE or MODULE. We recommend the use of unique names within a program unit. +* TODO: You should also avoid naming your program units and variables with names that match a keyword in a Fortran statement. +* TODO: Avoid giving program units names that are likely to be used as variable names elsewhere in the code, e.g. field or string. This makes searching the code difficult and can cause the code browser to make erroneous connections between unrelated routines. +* TODO: Subroutines should be kept reasonably short, where appropriate, say up to about 100 lines of executable code, but don’t forget there are start up overheads involved in calling an external subroutine so they should do a reasonable amount of work +""" + +"""3.2 Headers +* All programming units require a suitable copyright header.""" + + +def r3_2_1_check_crown_copyright(lines: List[str]) -> TestResult: + """Check for crown copyright statement""" + """ + TODO: This is a very simplistic check and will not detect many cases which break UMDP3. + It will pass if the word copyright appears on a commented out line. This could + include passing : + ! I should put a copyright statement here... + + I suspect the Perl Predeccessor + did much more convoluted tests""" + comment_lines = [line.upper() for line in lines if line.lstrip(" ").startswith("!")] + file_content = "\n".join(comment_lines) + error_log = {} + found_copyright = False + if "CROWN COPYRIGHT" in file_content or "COPYRIGHT" in file_content: + found_copyright = True + + if not found_copyright: + error_log = add_error_log( + error_log, "missing copyright or crown copyright statement", 0 + ) + return TestResult( + checker_name="Test 2.1 Check Copyright", + failure_count=0 if found_copyright else 1, + passed=found_copyright, + output="Checked for crown copyright statement.", + errors=error_log, + ) + + +""" +* TODO: Headers are an immensely important part of any code as they document what it does, and how it does it. +* TODO: The description of the MODULE and its contained SUBROUTINE may be the same and thus it need not berepeated in the latter. If a MODULE contains more than one subroutine then further descriptions are required. +* TODO: History comments should not be included in the header or routine code. FCM TRAC provides the history of our codes. +* TODO: Code author names should NOT be included explicitly within the code as they quickly become out of date and are sometimes misleading. Instead we reference a single maintainable text file which is included within the UM code repository. + +! Code Owner: Please refer to the UM file CodeOwners.txt +! This file belongs in section: + +* TODO: Example UM templates are provided with the source of this document; subroutine, function and module +templates""" + +"""3.3 Free source form +* TODO: All code should be written using the free source form. + - Could be fun, look for 7 leading spaces regularly. Or only something in col6 and/or only nos in cols 1 to 5 +* Please restrict code to 80 columns + Note that CreateBC uses a limit of 100 columns, due to the nature of + the object-orientated code. +""" + + +def r3_3_2_line_too_long(lines: List[str]) -> TestResult: + """Check for lines longer than 80 characters""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines, 1): + if len(line.rstrip()) > 80: + failures += 1 + error_log = add_error_log(error_log, "line too long", count) + + return TestResult( + checker_name="Test 3.2 Line Length", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count + 1} lines, found {failures} failures.", + errors=error_log, + ) + + +""" +* TODO: Never put more than one statement on a line. +* TODO: Write your program in UK English, unless you have a very good reason for not doing so.""" + +"""3.4 Fortran style +* To improve readability, write your code using the ALL CAPS Fortran keywords approach.""" + + +def r3_4_1_capitalised_keywords(lines: List[str]) -> TestResult: + """Check for the presence of lowercase Fortran keywords, which are + taken from an imported list 'fortran_keywords'.""" + failures = 0 + error_log = {} + count = -1 + for count, line in enumerate(lines): + # Remove quoted strings and comments + if line.lstrip(" ").startswith("!"): + continue + clean_line = remove_quoted(line) + clean_line = comment_line.sub("", clean_line) + clean_line = remove_cpp_commands(clean_line) + # Check for lowercase keywords + for word in word_splitter.findall(clean_line): + upcase = word.upper() + if upcase in fortran_keywords and word != upcase: + failures += 1 + error_log = add_error_log( + error_log, f"lowercase keyword: {word}", count + 1 + ) + + return TestResult( + checker_name="Test 4.1 Capitalised Keywords", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count + 1} lines, found {failures} failures.", + errors=error_log, + ) + + +""" +* The rest of the code may be written in either lower-case with underscores or + CamelCase. +""" + + +def r3_4_2_no_full_uppercase_variable_names(lines: List[str]) -> TestResult: + """Check for lowercase or CamelCase variable names only""" + """ +TODO: This is a very simplistic check and will not detect many + cases which break UMDP3. I suspect the Perl Predecessor concatenated + continuation lines prior to 'cleaning' and checking. Having identified + a declaration, it also then scanned the rest of the file for that + variable name in any case.""" + failures = 0 + error_log = {} + count = -1 + declaration_search = re.compile( + r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*", re.IGNORECASE + ) + digit_search = re.compile(r"([+-]?\d+\.\d+|[+-]?\d+)") + array_dimensions_search = re.compile( + r"\([^)]*\)" + ) # finds array dimensions in declaration + array_assignment_search = re.compile( + r"=\s*\[[^\]]*\]\s*" + ) # finds array assignments + for count, line in enumerate(lines, 1): + clean_line = remove_quoted(line) + clean_line = remove_comments(clean_line) + # Simple check for UPPERCASE variable declarations + if declaration_search.search(clean_line): + full_line = concatenate_lines(lines, count) + clean_line = full_line.split("::", 1)[1].strip() + clean_line = array_dimensions_search.sub("", clean_line).strip() + clean_line = array_assignment_search.sub("", clean_line) + variables = [var.strip() for var in clean_line.split(",")] + for var in variables: + if digit_search.fullmatch(var): + continue # Skip if it's just a number + var = var.split(r"=", 1)[0].strip() # Remove any assignment part + if var and var.upper() == var: + failures += 1 + error_log = add_error_log( + error_log, + f"Found UPPERCASE variable name in declaration at line {count}:" + + f' "{var}"', + count, + ) + return TestResult( + checker_name="No Full Uppercase variable names", + failure_count=failures, + passed=(failures == 0), + output=f"Checked {count} lines, found {failures} UPPERCASE variables.", + errors=error_log, + ) + + +""" +* TODO: To improve readability, you should always use the optional space to separate + the Fortran keywords. + This rule also applies to OpenMP keywords. (See: 3.15) +* TODO: The full version of END should be used at all times, + eg END SUBROUTINE and END FUNCTION +* TODO: New code should be written using Fortran 95/2003 features. Avoid non-portable + vendor/compiler extensions. +* TODO: When writing a REAL literal with an integer value, put a 0 after the decimal + point (i.e. 1.0 as opposed to 1.) to improve readability. +* TODO: Avoid using obsolescent features of the Fortran language, instead make use of + F95/2003 alternatives. For example, statement functions are among the list of + deprecated features in the F95 standard and these can be replaced by FUNCTIONs + within appropriate MODULEs. +* TODO: Do not use archaic forms of intrinsic functions. For example: + LOG() should be used in place of ALOG(), + MAX() instead of AMAX1(), REAL() instead of FLOAT() etc. +* TODO: Never use the PAUSE statement. +* TODO: Never use the STOP statement, see 3.19 +* TODO: The standard delimiter for namelists is /. In particular, note that &END is + non-standard and should be avoided. For further information on namelists please + refer to 4.1 +* TODO: Only use the generic names of intrinsic functions, avoid the use of ’hardware’ + specific intrinsic functions. + Use the latter if an only if there is an optimisation benefit and then it must + be protected by a platform specific CPP flag 3.17 +3.5 Comments and white spacing : +TODO : Copy in the rules from section 3.5 and add the appropriate tests. + +3.6 The use of modules : +TODO : Copy in the rules from section 3.6 and add the appropriate tests. + +3.7 Argument and variable declaration : +TODO : Copy in the rules from section 3.7 and add the appropriate tests. + +3.8 Allocatables : +TODO : Copy in the rules from section 3.8 and add the appropriate tests. + +3.9 Code IF blocks, DO LOOPs, and other constructs : +TODO : Copy in the rules from section 3.9 and add the appropriate tests. + +3.10 Line continuation : +TODO : Copy in the rules from section 3.10 and add the appropriate tests. + +3.11 Fortran I/O : +TODO : Copy in the rules from section 3.11 and add the appropriate tests. + +3.12 Formatting and output of text : +TODO : Copy in the rules from section 3.12 and add the appropriate tests. + +3.13 PrintStatus : +TODO : Copy in the rules from section 3.13 and add the appropriate tests. + +3.14 DrHook : +TODO : Copy in the rules from section 3.14 and add the appropriate tests. + I /think/ DrHook is being (possibly has been) retired, so this section of the docs needs updating and tests for the new system need to be added. + +3.15 OpenMP : +TODO : Copy in the rules from section 3.15 and add the appropriate tests. + +3.16 MPI : +TODO : Copy in the rules from section 3.16 and add the appropriate tests. + +3.17 Preprocessing : +TODO : Copy in the rules from section 3.17 and add the appropriate tests. + +3.18 Code duplication : +TODO : Copy in the rules from section 3.18 and add the appropriate tests. + +3.19 Error reporting : +TODO : Copy in the rules from section 3.19 and add the appropriate tests. + +TODO : Section 4 has guidelines on "specific standards". + Unsure as yet if they can be tested for, but it needs looking into. +""" + +# A list, of all the tests defined above, so it can be imported easily. +list_O_tests = [ + r3_1_1_there_can_be_only_one, + r3_2_1_check_crown_copyright, + r3_3_2_line_too_long, + r3_4_1_capitalised_keywords, + r3_4_2_no_full_uppercase_variable_names, +]