From ef4bf795a64e133979e3a46f2deac99ada482deb Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Fri, 5 Jun 2026 19:36:26 +0200 Subject: [PATCH] Use Google Benchmark for performance tables --- .clang-format | 7 + .github/workflows/perf.yml | 4 +- .gitmodules | 3 + 3rdparty/benchmark | 1 + CMakeLists.txt | 1 + Doxyfile | 1 - cmake/benchmark.cmake | 51 +++ docs/user_guide/api.rst | 6 - modules/CMakeLists.txt | 1 + modules/performance/include/performance.hpp | 133 ------ modules/performance/tests/perf_tests.cpp | 402 ------------------ modules/task/tests/task_tests.cpp | 42 +- modules/util/include/perf_test_util.hpp | 257 ++++++++--- modules/util/include/util.hpp | 20 + modules/util/src/func_test_util.cpp | 2 +- scoreboard/main.py | 315 +++++--------- scoreboard/tests/conftest.py | 43 -- .../test_calculate_performance_metrics.py | 94 +++- .../test_load_benchmark_performance_data.py | 120 ++++++ .../tests/test_load_performance_data.py | 145 ------- scripts/create_perf_table.py | 289 ------------- scripts/generate_perf_results.bat | 4 - scripts/generate_perf_results.sh | 6 - scripts/run_tests.py | 108 +++-- tasks/CMakeLists.txt | 3 + tasks/common/runners/performance.cpp | 229 +++++++++- .../processes/t1/tests/performance/main.cpp | 2 +- .../processes/t2/tests/performance/main.cpp | 2 +- .../processes/t3/tests/performance/main.cpp | 2 +- .../threads/tests/performance/main.cpp | 2 +- 30 files changed, 923 insertions(+), 1372 deletions(-) create mode 160000 3rdparty/benchmark create mode 100644 cmake/benchmark.cmake delete mode 100644 modules/performance/include/performance.hpp delete mode 100644 modules/performance/tests/perf_tests.cpp create mode 100644 scoreboard/tests/test_load_benchmark_performance_data.py delete mode 100644 scoreboard/tests/test_load_performance_data.py delete mode 100644 scripts/create_perf_table.py delete mode 100644 scripts/generate_perf_results.bat delete mode 100755 scripts/generate_perf_results.sh diff --git a/.clang-format b/.clang-format index 98e77c747..67c2f4cda 100644 --- a/.clang-format +++ b/.clang-format @@ -6,6 +6,13 @@ UseTab: Never AllowShortFunctionsOnASingleLine: Empty IndentPPDirectives: AfterHash SortIncludes: true +IncludeCategories: + - Regex: '^ --parallel + INSTALL_COMMAND + "${CMAKE_COMMAND}" --install + "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build" --config $ + --prefix "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install" + ${PPC_EXTERNAL_PROJECT_LOG_ARGS}) + +function(ppc_include_benchmark target_name) + target_include_directories( + ${target_name} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/benchmark/include) + target_compile_definitions(${target_name} PUBLIC BENCHMARK_STATIC_DEFINE) +endfunction() + +function(ppc_link_benchmark target_name) + ppc_include_benchmark(${target_name}) + add_dependencies(${target_name} ppc_benchmark) + target_link_directories(${target_name} PUBLIC + "${CMAKE_BINARY_DIR}/ppc_benchmark/install/lib") + target_link_libraries(${target_name} PUBLIC benchmark Threads::Threads) + if(WIN32) + target_link_libraries(${target_name} PUBLIC shlwapi) + endif() +endfunction() diff --git a/docs/user_guide/api.rst b/docs/user_guide/api.rst index 178c3f401..81167abb4 100644 --- a/docs/user_guide/api.rst +++ b/docs/user_guide/api.rst @@ -21,9 +21,3 @@ Utility Module .. doxygennamespace:: ppc::util :project: ParallelProgrammingCourse - -Performance Module ------------------- - -.. doxygennamespace:: ppc::performance - :project: ParallelProgrammingCourse diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index d9b5057e3..d7e6fd76a 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -26,6 +26,7 @@ set_target_properties(${exec_func_lib} PROPERTIES LINKER_LANGUAGE CXX) target_include_directories( ${exec_func_lib} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty ${CMAKE_SOURCE_DIR}/modules ${CMAKE_SOURCE_DIR}/tasks) +ppc_include_benchmark(${exec_func_lib}) foreach( link diff --git a/modules/performance/include/performance.hpp b/modules/performance/include/performance.hpp deleted file mode 100644 index 2b5d1e9fb..000000000 --- a/modules/performance/include/performance.hpp +++ /dev/null @@ -1,133 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "task/include/task.hpp" -#include "util/include/util.hpp" - -namespace ppc::performance { - -inline double DefaultTimer() { - return -1.0; -} - -struct PerfAttr { - /// @brief Number of times the task is run for performance evaluation. - uint64_t num_running = 5; - /// @brief Timer function returning current time in seconds. - /// @cond - std::function current_timer = DefaultTimer; - /// @endcond -}; - -struct PerfResults { - /// @brief Measured execution time in seconds. - double time_sec = 0.0; - enum class TypeOfRunning : uint8_t { - kPipeline, - kTaskRun, - kNone, - }; - TypeOfRunning type_of_running = TypeOfRunning::kNone; - constexpr static double kMaxTime = 10.0; -}; - -template -class Perf { - public: - // Init performance analysis with an initialized task and initialized data - explicit Perf(const ppc::task::TaskPtr &task_ptr) : task_(task_ptr) { - task_ptr->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf; - } - // Check performance of full task's pipeline: PreProcessing() -> - // Validation() -> Run() -> PostProcessing() - void PipelineRun(const PerfAttr &perf_attr) { - perf_results_.type_of_running = PerfResults::TypeOfRunning::kPipeline; - - CommonRun(perf_attr, [&] { - task_->Validation(); - task_->PreProcessing(); - task_->Run(); - task_->PostProcessing(); - }, perf_results_); - } - // Check performance of task's Run() function - void TaskRun(const PerfAttr &perf_attr) { - perf_results_.type_of_running = PerfResults::TypeOfRunning::kTaskRun; - - task_->Validation(); - task_->PreProcessing(); - CommonRun(perf_attr, [&] { task_->Run(); }, perf_results_); - task_->PostProcessing(); - - task_->Validation(); - task_->PreProcessing(); - task_->Run(); - task_->PostProcessing(); - } - // Print results for automation checkers - void PrintPerfStatistic(const std::string &test_id) const { - std::string type_test_name; - if (perf_results_.type_of_running == PerfResults::TypeOfRunning::kTaskRun) { - type_test_name = "task_run"; - } else if (perf_results_.type_of_running == PerfResults::TypeOfRunning::kPipeline) { - type_test_name = "pipeline"; - } else { - std::stringstream err_msg; - err_msg << '\n' << "The type of performance check for the task was not selected.\n"; - throw std::runtime_error(err_msg.str().c_str()); - } - - auto time_secs = perf_results_.time_sec; - const auto max_time = ppc::util::GetPerfMaxTime(); - std::stringstream perf_res_str; - if (time_secs < max_time) { - perf_res_str << std::fixed << std::setprecision(10) << time_secs; - std::cout << test_id << ":" << type_test_name << ":" << perf_res_str.str() << '\n'; - } else { - std::stringstream err_msg; - err_msg << '\n' << "Task execute time need to be: "; - err_msg << "time < " << max_time << " secs." << '\n'; - err_msg << "Original time in secs: " << time_secs << '\n'; - perf_res_str << std::fixed << std::setprecision(10) << -1.0; - std::cout << test_id << ":" << type_test_name << ":" << perf_res_str.str() << '\n'; - throw std::runtime_error(err_msg.str().c_str()); - } - } - /// @brief Retrieves the performance test results. - /// @return The latest PerfResults structure. - [[nodiscard]] PerfResults GetPerfResults() const { - return perf_results_; - } - - private: - PerfResults perf_results_; - std::shared_ptr> task_; - static void CommonRun(const PerfAttr &perf_attr, const std::function &pipeline, PerfResults &perf_results) { - auto begin = perf_attr.current_timer(); - for (uint64_t i = 0; i < perf_attr.num_running; i++) { - pipeline(); - } - auto end = perf_attr.current_timer(); - perf_results.time_sec = (end - begin) / static_cast(perf_attr.num_running); - } -}; - -inline std::string GetStringParamName(PerfResults::TypeOfRunning type_of_running) { - if (type_of_running == PerfResults::TypeOfRunning::kTaskRun) { - return "task_run"; - } - if (type_of_running == PerfResults::TypeOfRunning::kPipeline) { - return "pipeline"; - } - return "none"; -} - -} // namespace ppc::performance diff --git a/modules/performance/tests/perf_tests.cpp b/modules/performance/tests/perf_tests.cpp deleted file mode 100644 index 18f3c6b89..000000000 --- a/modules/performance/tests/perf_tests.cpp +++ /dev/null @@ -1,402 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "performance/include/performance.hpp" -#include "task/include/task.hpp" -#include "util/include/util.hpp" - -using ppc::task::StatusOfTask; -using ppc::task::Task; -using ppc::task::TypeOfTask; - -namespace ppc::test { - -template -class TestPerfTask : public ppc::task::Task { - public: - explicit TestPerfTask(const InType &in) { - this->GetInput() = in; - } - - protected: - bool ValidationImpl() override { - return !this->GetInput().empty(); - } - - bool PreProcessingImpl() override { - this->GetOutput() = 0; - return true; - } - - bool RunImpl() override { - for (const auto &value : this->GetInput()) { - this->GetOutput() += value; - } - return true; - } - - bool PostProcessingImpl() override { - return true; - } -}; - -template -class FakePerfTask : public TestPerfTask { - public: - explicit FakePerfTask(const InType &in) : TestPerfTask(in) {} - - protected: - bool RunImpl() override { - std::this_thread::sleep_for(std::chrono::seconds(11)); - return TestPerfTask::RunImpl(); - } -}; - -} // namespace ppc::test - -namespace ppc::performance { - -TEST(PerfTests, CheckPerfPipeline) { - std::vector in(2000, 1); - - auto test_task = std::make_shared, uint32_t>>(in); - - Perf, uint32_t> perf_analyzer(test_task); - - PerfAttr perf_attr; - perf_analyzer.PipelineRun(perf_attr); - - perf_analyzer.PrintPerfStatistic("check_perf_pipeline"); - ASSERT_LE(perf_analyzer.GetPerfResults().time_sec, PerfResults::kMaxTime); - EXPECT_EQ(test_task->GetOutput(), in.size()); -} - -TEST(PerfTests, CheckPerfPipelineFloat) { - std::vector in(2000, 1); - - auto test_task = std::make_shared, float>>(in); - - Perf, float> perf_analyzer(test_task); - - PerfAttr perf_attr; - perf_analyzer.PipelineRun(perf_attr); - - perf_analyzer.PrintPerfStatistic("check_perf_pipeline_float"); - ASSERT_LE(perf_analyzer.GetPerfResults().time_sec, PerfResults::kMaxTime); - EXPECT_EQ(test_task->GetOutput(), in.size()); -} - -TEST(PerfTests, CheckPerfPipelineUint8tSlowTest) { - std::vector in(128, 1); - - auto test_task = std::make_shared, uint8_t>>(in); - - Perf, uint8_t> perf_analyzer(test_task); - - PerfAttr perf_attr; - perf_attr.num_running = 1; - - const auto t0 = std::chrono::high_resolution_clock::now(); - perf_attr.current_timer = [&] { - auto current_time_point = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(current_time_point - t0).count(); - return static_cast(duration) * 1e-9; - }; - perf_analyzer.PipelineRun(perf_attr); - - ASSERT_ANY_THROW(perf_analyzer.PrintPerfStatistic("check_perf_pipeline_uint8_t_slow_test")); -} - -TEST(PerfTests, SlowPerfRespectsEnvOverride) { - env::detail::set_scoped_environment_variable scoped("PPC_PERF_MAX_TIME", "12"); - std::vector in(128, 1); - auto test_task = std::make_shared, uint8_t>>(in); - Perf, uint8_t> perf_analyzer(test_task); - PerfAttr perf_attr; - perf_attr.num_running = 1; - const auto t0 = std::chrono::high_resolution_clock::now(); - perf_attr.current_timer = [&] { - auto current_time_point = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(current_time_point - t0).count(); - return static_cast(duration) * 1e-9; - }; - perf_analyzer.PipelineRun(perf_attr); - EXPECT_NO_THROW(perf_analyzer.PrintPerfStatistic("slow_perf_respects_env_override")); -} - -TEST(PerfTests, CheckPerfTaskException) { - std::vector in(2000, 1); - - auto test_task = std::make_shared, uint32_t>>(in); - - Perf, uint32_t> perf_analyzer(test_task); - - ASSERT_ANY_THROW(perf_analyzer.PrintPerfStatistic("check_perf_task_exception")); - - PerfAttr perf_attr; - perf_analyzer.TaskRun(perf_attr); -} - -TEST(PerfTests, CheckPerfTaskFloat) { - std::vector in(2000, 1); - - auto test_task = std::make_shared, float>>(in); - - Perf, float> perf_analyzer(test_task); - - PerfAttr perf_attr; - perf_analyzer.TaskRun(perf_attr); - - perf_analyzer.PrintPerfStatistic("check_perf_task_float"); - ASSERT_LE(perf_analyzer.GetPerfResults().time_sec, PerfResults::kMaxTime); - EXPECT_EQ(test_task->GetOutput(), in.size()); -} - -struct ParamTestCase { - PerfResults::TypeOfRunning input; - std::string_view expected_output; -}; - -namespace { - -constexpr std::array kParamTestCases = { - {{.input = PerfResults::TypeOfRunning::kTaskRun, .expected_output = "task_run"}, - {.input = PerfResults::TypeOfRunning::kPipeline, .expected_output = "pipeline"}, - {.input = PerfResults::TypeOfRunning::kNone, .expected_output = "none"}}}; - -} // namespace - -TEST(GetStringParamNameParamTest, ReturnsExpectedString) { - for (const auto ¶m : kParamTestCases) { - EXPECT_EQ(GetStringParamName(param.input), std::string(param.expected_output)); - } -} - -struct TaskTypeTestCase { - TypeOfTask type; - std::string_view expected; - std::string_view label; -}; - -class GetStringTaskTypeTest : public ::testing::Test { - protected: - std::string temp_path; - - void SetUp() override { - temp_path = (std::filesystem::temp_directory_path() / "test_settings.json").string(); - auto j = ppc::util::InitJSONPtr(); - *j = {{"tasks", {{"all", "ALL"}, {"stl", "STL"}, {"omp", "OMP"}, {"mpi", "MPI"}, {"tbb", "TBB"}, {"seq", "SEQ"}}}}; - - std::ofstream(temp_path) << j->dump(); - } - - void TearDown() override { - std::filesystem::remove(temp_path); - } -}; - -namespace { - -constexpr std::array kTaskTypeTestCases = { - {{.type = TypeOfTask::kALL, .expected = "all_ALL", .label = "kALL"}, - {.type = TypeOfTask::kSTL, .expected = "stl_STL", .label = "kSTL"}, - {.type = TypeOfTask::kOMP, .expected = "omp_OMP", .label = "kOMP"}, - {.type = TypeOfTask::kMPI, .expected = "mpi_MPI", .label = "kMPI"}, - {.type = TypeOfTask::kTBB, .expected = "tbb_TBB", .label = "kTBB"}, - {.type = TypeOfTask::kSEQ, .expected = "seq_SEQ", .label = "kSEQ"}}}; - -} // namespace - -TEST_F(GetStringTaskTypeTest, ReturnsExpectedString) { - for (const auto ¶m : kTaskTypeTestCases) { - EXPECT_EQ(GetStringTaskType(param.type, temp_path), std::string(param.expected)) << "Failed on: " << param.label; - } -} - -TEST(GetStringTaskTypeStandaloneTest, ThrowsIfFileMissing) { - std::string missing_path = "non_existent_settings.json"; - EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, missing_path), std::runtime_error); -} - -TEST(GetStringTaskTypeStandaloneTest, ExceptionMessageContainsPath) { - const std::string missing_path = "non_existent_settings.json"; - EXPECT_THROW(try { GetStringTaskType(TypeOfTask::kSEQ, missing_path); } catch (const std::runtime_error &e) { - EXPECT_NE(std::string(e.what()).find(missing_path), std::string::npos); - throw; - }, - std::runtime_error); -} - -TEST(GetStringTaskTypeStandaloneTest, ReturnsUnknownForInvalidEnum) { - std::string path = (std::filesystem::temp_directory_path() / "tmp_settings.json").string(); - std::ofstream(path) << R"({"tasks":{"seq":"SEQ"}})"; - - auto result = GetStringTaskType(TypeOfTask::kUnknown, path); - EXPECT_EQ(result, "unknown"); - - std::filesystem::remove(path); -} - -TEST(GetStringTaskTypeEdgeCases, ThrowsIfFileCannotBeOpened) { - EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, "definitely_missing_file.json"), std::runtime_error); -} - -TEST(GetStringTaskTypeEdgeCases, ThrowsIfJsonIsMalformed) { - std::string path = (std::filesystem::temp_directory_path() / "bad_json.json").string(); - std::ofstream(path) << "{ this is not valid json "; - EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, path), NlohmannJsonParseError); - std::filesystem::remove(path); -} - -TEST(GetStringTaskTypeEdgeCases, ThrowsIfJsonValueIsNull) { - std::string path = (std::filesystem::temp_directory_path() / "null_value.json").string(); - std::ofstream(path) << R"({"tasks": { "seq": null }})"; - - EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, path), NlohmannJsonTypeError); - - std::filesystem::remove(path); -} - -TEST(GetStringTaskTypeEdgeCases, ReturnsUnknownIfEnumOutOfRange) { - std::string path = (std::filesystem::temp_directory_path() / "ok.json").string(); - std::ofstream(path) << R"({"tasks":{"seq":"SEQ"}})"; - auto result = GetStringTaskType(TypeOfTask::kUnknown, path); - EXPECT_EQ(result, "unknown"); - std::filesystem::remove(path); -} - -TEST(GetStringTaskStatusTest, HandlesEnabledAndDisabled) { - EXPECT_EQ(GetStringTaskStatus(StatusOfTask::kEnabled), "enabled"); - EXPECT_EQ(GetStringTaskStatus(StatusOfTask::kDisabled), "disabled"); -} - -class DummyTask : public Task { - public: - using Task::Task; - - protected: - bool ValidationImpl() override { - return true; - } - bool PreProcessingImpl() override { - return true; - } - bool RunImpl() override { - return true; - } - bool PostProcessingImpl() override { - return true; - } -}; - -TEST(TaskTest, GetDynamicTypeReturnsCorrectEnum) { - DummyTask task; - task.SetTypeOfTask(TypeOfTask::kOMP); - task.Validation(); - task.PreProcessing(); - task.Run(); - task.PostProcessing(); - EXPECT_EQ(task.GetDynamicTypeOfTask(), TypeOfTask::kOMP); -} - -TEST(TaskTest, DestructorTerminatesIfWrongOrder) { - DummyTask task; - EXPECT_THROW(task.Run(), std::runtime_error); -} - -namespace my { -namespace nested { -struct Type {}; -} // namespace nested - -class Another {}; -} // namespace my - -TEST(GetNamespaceTest, ExtractsNestedNamespaceCorrectly) { - EXPECT_EQ(ppc::util::GetNamespace(), "ppc::performance::my::nested"); -} - -TEST(GetNamespaceTest, ExtractsParentNamespaceCorrectly) { - EXPECT_EQ(ppc::util::GetNamespace(), "ppc::performance::my"); -} - -TEST(GetNamespaceTest, ReturnsEmptyStringForGlobalNamespaceType) { - EXPECT_EQ(ppc::util::GetNamespace(), ""); -} - -TEST(PerfTest, PipelineRunAndTaskRun) { - auto task_ptr = std::make_shared(); - Perf perf(task_ptr); - - PerfAttr attr; - double time = 0.0; - attr.num_running = 2; - attr.current_timer = [&time]() { - double t = time; - time += 1.0; - return t; - }; - - EXPECT_NO_THROW(perf.PipelineRun(attr)); - auto res_pipeline = perf.GetPerfResults(); - EXPECT_EQ(res_pipeline.type_of_running, PerfResults::TypeOfRunning::kPipeline); - EXPECT_GT(res_pipeline.time_sec, 0.0); - - EXPECT_NO_THROW(perf.TaskRun(attr)); - auto res_taskrun = perf.GetPerfResults(); - EXPECT_EQ(res_taskrun.type_of_running, PerfResults::TypeOfRunning::kTaskRun); - EXPECT_GT(res_taskrun.time_sec, 0.0); -} - -TEST(PerfTest, PrintPerfStatisticThrowsOnNone) { - { - auto task_ptr = std::make_shared(); - Perf perf(task_ptr); - EXPECT_THROW(perf.PrintPerfStatistic("test"), std::runtime_error); - } - EXPECT_TRUE(ppc::util::DestructorFailureFlag::Get()); - ppc::util::DestructorFailureFlag::Unset(); -} - -TEST(PerfTest, GetStringParamNameTest) { - EXPECT_EQ(GetStringParamName(PerfResults::TypeOfRunning::kTaskRun), "task_run"); - EXPECT_EQ(GetStringParamName(PerfResults::TypeOfRunning::kPipeline), "pipeline"); - EXPECT_EQ(GetStringParamName(PerfResults::TypeOfRunning::kNone), "none"); -} - -TEST(TaskTest, DestructorInvalidPipelineOrderTerminatesPartialPipeline) { - { - struct BadTask : Task { - protected: - bool ValidationImpl() override { - return true; - } - bool PreProcessingImpl() override { - return true; - } - bool RunImpl() override { - return true; - } - bool PostProcessingImpl() override { - return true; - } - } task; - task.Validation(); - } - EXPECT_TRUE(ppc::util::DestructorFailureFlag::Get()); - ppc::util::DestructorFailureFlag::Unset(); -} - -} // namespace ppc::performance diff --git a/modules/task/tests/task_tests.cpp b/modules/task/tests/task_tests.cpp index 70d7c67a2..4589a1520 100644 --- a/modules/task/tests/task_tests.cpp +++ b/modules/task/tests/task_tests.cpp @@ -202,12 +202,22 @@ TEST(TaskTest, GetStringTaskTypeEachTypeWithValidFile) { << R"({"tasks": {"all": "enabled", "stl": "enabled", "omp": "enabled", "mpi": "enabled", "tbb": "enabled", "seq": "enabled"}})"; file.close(); - EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kALL, path)); - EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kSTL, path)); - EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kOMP, path)); - EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kMPI, path)); - EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kTBB, path)); - EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kSEQ, path)); + EXPECT_EQ(GetStringTaskType(TypeOfTask::kALL, path), "all_enabled"); + EXPECT_EQ(GetStringTaskType(TypeOfTask::kSTL, path), "stl_enabled"); + EXPECT_EQ(GetStringTaskType(TypeOfTask::kOMP, path), "omp_enabled"); + EXPECT_EQ(GetStringTaskType(TypeOfTask::kMPI, path), "mpi_enabled"); + EXPECT_EQ(GetStringTaskType(TypeOfTask::kTBB, path), "tbb_enabled"); + EXPECT_EQ(GetStringTaskType(TypeOfTask::kSEQ, path), "seq_enabled"); +} + +TEST(TaskTest, GetStringTaskTypeExceptionMessageContainsPath) { + const std::string missing_path = "non_existent_settings.json"; + try { + GetStringTaskType(TypeOfTask::kSEQ, missing_path); + FAIL() << "Expected std::runtime_error"; + } catch (const std::runtime_error &e) { + EXPECT_NE(std::string(e.what()).find(missing_path), std::string::npos); + } } TEST(TaskTest, GetStringTaskTypeReadsNestedTaskPath) { @@ -257,6 +267,16 @@ TEST(TaskTest, GetStringTaskTypeThrowsIfKeyMissing) { EXPECT_ANY_THROW(GetStringTaskType(TypeOfTask::kSTL, path)); } +TEST(TaskTest, GetStringTaskTypeThrowsIfJsonValueIsNull) { + std::string path = "settings_null_value.json"; + ScopedFile cleaner(path); + std::ofstream file(path); + file << R"({"tasks": {"seq": null}})"; + file.close(); + + EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, path), NlohmannJsonTypeError); +} + TEST(TaskTest, TaskDestructorThrowsIfStageIncomplete) { { std::vector in(20, 1); @@ -368,6 +388,16 @@ class DummyTask : public Task { } }; +TEST(TaskTest, GetDynamicTypeReturnsCorrectEnum) { + DummyTask task; + task.SetTypeOfTask(TypeOfTask::kOMP); + task.Validation(); + task.PreProcessing(); + task.Run(); + task.PostProcessing(); + EXPECT_EQ(task.GetDynamicTypeOfTask(), TypeOfTask::kOMP); +} + TEST(TaskTest, ValidationThrowsIfCalledTwice) { auto task = std::make_shared(); task->Validation(); diff --git a/modules/util/include/perf_test_util.hpp b/modules/util/include/perf_test_util.hpp index 13c4e3f81..e1ec442d4 100644 --- a/modules/util/include/perf_test_util.hpp +++ b/modules/util/include/perf_test_util.hpp @@ -1,32 +1,193 @@ #pragma once #include -#include -#include +#include #include #include +#include +#include #include -#include +#include +#include +#include #include #include #include +#include #include #include #include -#include "performance/include/performance.hpp" #include "task/include/task.hpp" #include "util/include/util.hpp" namespace ppc::util { -double GetTimeMPI(); -int GetMPIRank(); +inline double DefaultTimer() { + return -1.0; +} + +struct PerfAttr { + /// @brief Number of times the task is run for performance evaluation. + uint64_t num_running = 5; + /// @brief Timer function returning current time in seconds. + /// @cond + std::function current_timer = DefaultTimer; + /// @endcond +}; + +namespace detail { + +inline bool ContainsFilterToken(std::string_view value, std::string_view filter) { + if (filter.empty()) { + return true; + } + return value.contains(filter); +} + +inline bool MatchesCategoryFilter(std::string_view task_category, std::string_view category_filter) { + if (category_filter.empty() || task_category.empty()) { + return true; + } + return category_filter.contains(task_category); +} + +inline bool ShouldRunBenchmark(std::string_view test_name, std::string_view task_category) { + const auto impl_filter = env::get("PPC_PERF_IMPL_FILTER"); + const auto category_filter = env::get("PPC_PERF_CATEGORY_FILTER"); + const auto impl_filter_value = impl_filter.has_value() ? std::string_view(impl_filter.value()) : std::string_view{}; + const auto category_filter_value = + category_filter.has_value() ? std::string_view(category_filter.value()) : std::string_view{}; + return ContainsFilterToken(test_name, impl_filter_value) && + MatchesCategoryFilter(task_category, category_filter_value); +} + +inline std::string GetPerfTaskCategory(std::string_view settings_task_path) { + if (settings_task_path.starts_with("threads")) { + return "threads"; + } + if (settings_task_path.starts_with("processes")) { + return "processes"; + } + return {}; +} + +template +void RunTaskForValidation(const ppc::task::TaskPtr &task) { + task->Validation(); + task->PreProcessing(); + task->Run(); + task->PostProcessing(); +} + +inline std::function MakeTechnologyTimer(ppc::task::TypeOfTask task_type) { + if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL) { + return [] { return GetTimeMPI(); }; + } + if (task_type == ppc::task::TypeOfTask::kOMP) { + return [] { return omp_get_wtime(); }; + } + if (task_type == ppc::task::TypeOfTask::kTBB) { + const auto t0 = tbb::tick_count::now(); + return [t0] { return (tbb::tick_count::now() - t0).seconds(); }; + } + if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL) { + const auto t0 = std::chrono::high_resolution_clock::now(); + return [t0] { + const auto now = std::chrono::high_resolution_clock::now(); + const auto ns = std::chrono::duration_cast(now - t0).count(); + return static_cast(ns) * 1e-9; + }; + } + throw std::runtime_error("The task type is not supported for performance testing."); +} + +inline double MaxElapsedTimeAcrossMpiRanks(double elapsed, ppc::task::TypeOfTask task_type) { + if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL) { + return elapsed; + } + double max_elapsed = elapsed; + MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + return max_elapsed; +} + +inline void SkipBenchmarkWithError(benchmark::State &state, const char *message) noexcept { + try { + state.SkipWithError(message); + } catch (const std::exception &e) { + std::cerr << "Failed to report benchmark error: " << e.what() << '\n'; + } catch (...) { + std::cerr << "Failed to report unknown benchmark error" << '\n'; + } +} + +inline void CheckPerfTimeLimit(double elapsed) { + if (elapsed >= GetPerfMaxTime()) { + throw std::runtime_error("Task execution time exceeded the performance limit."); + } +} template -using PerfTestParam = std::tuple(InType)>, std::string, - ppc::performance::PerfResults::TypeOfRunning>; +double RunTaskForBenchmark(const ppc::task::TaskPtr &task) { + const auto task_type = task->GetDynamicTypeOfTask(); + const auto timer = MakeTechnologyTimer(task_type); + task->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf; + + task->Validation(); + task->PreProcessing(); + SynchronizeMpiRanks(); + const double begin = timer(); + task->Run(); + const double elapsed = timer() - begin; + task->PostProcessing(); + const double max_elapsed = MaxElapsedTimeAcrossMpiRanks(elapsed, task_type); + CheckPerfTimeLimit(max_elapsed); + return max_elapsed; +} + +template +void RunBenchmarkBody(const TaskGetter &task_getter, const InType &input_data, const std::string &test_env_token, + benchmark::State &state) noexcept { + try { + const auto benchmark_env_scope = ppc::util::test::ScopedPerTestEnv(test_env_token); + for (auto _ : state) { + auto task = task_getter(input_data); + const double elapsed = RunTaskForBenchmark(task); + state.SetIterationTime(elapsed); + benchmark::DoNotOptimize(task->GetOutput()); + } + } catch (const std::exception &e) { + PerformanceFailureFlag::Set(); + SkipBenchmarkWithError(state, e.what()); + } catch (...) { + PerformanceFailureFlag::Set(); + SkipBenchmarkWithError(state, "Unknown exception in performance benchmark"); + } +} + +template +class BenchmarkTaskBody final { + public: + BenchmarkTaskBody(TaskGetter task_getter, InType input_data, std::string test_env_token) + : task_getter_(std::move(task_getter)), + input_data_(std::move(input_data)), + test_env_token_(std::move(test_env_token)) {} + + void operator()(benchmark::State &state) const noexcept { + RunBenchmarkBody(task_getter_, input_data_, test_env_token_, state); + } + + private: + TaskGetter task_getter_; + InType input_data_; + std::string test_env_token_; +}; + +} // namespace detail + +template +using PerfTestParam = std::tuple(InType)>, std::string, std::string>; template /// @brief Base class for performance testing of parallel tasks. @@ -36,9 +197,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam> &info) { - return ppc::performance::GetStringParamName( - std::get(GTestParamIndex::kTestParams)>(info.param)) + - "_" + std::get(GTestParamIndex::kNameTest)>(info.param); + return std::get(GTestParamIndex::kNameTest)>(info.param); } protected: @@ -46,69 +205,51 @@ class BaseRunPerfTests : public ::testing::TestWithParamGetDynamicTypeOfTask() == ppc::task::TypeOfTask::kMPI || - task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kALL) { - const double t0 = GetTimeMPI(); - perf_attrs.current_timer = [t0] { return GetTimeMPI() - t0; }; - } else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kOMP) { - const double t0 = omp_get_wtime(); - perf_attrs.current_timer = [t0] { return omp_get_wtime() - t0; }; - } else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSEQ || - task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSTL) { - const auto t0 = std::chrono::high_resolution_clock::now(); - perf_attrs.current_timer = [t0] { - auto now = std::chrono::high_resolution_clock::now(); - auto ns = std::chrono::duration_cast(now - t0).count(); - return static_cast(ns) * 1e-9; - }; - } else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kTBB) { - const auto t0 = tbb::tick_count::now(); - perf_attrs.current_timer = [t0] { return (tbb::tick_count::now() - t0).seconds(); }; - } else { - throw std::runtime_error("The task type is not supported for performance testing."); - } + virtual void SetPerfAttributes(PerfAttr &perf_attrs) { + perf_attrs.current_timer = detail::MakeTechnologyTimer(task_->GetDynamicTypeOfTask()); } void ExecuteTest(const PerfTestParam &perf_test_param) { auto task_getter = std::get(GTestParamIndex::kTaskGetter)>(perf_test_param); auto test_name = std::get(GTestParamIndex::kNameTest)>(perf_test_param); - auto mode = std::get(GTestParamIndex::kTestParams)>(perf_test_param); + auto task_category = std::get(GTestParamIndex::kTestParams)>(perf_test_param); ASSERT_FALSE(test_name.find("unknown") != std::string::npos); if (test_name.find("disabled") != std::string::npos) { - // A single perf test body may execute several implementations; do not abort the enabled ones. + return; + } + if (!detail::ShouldRunBenchmark(test_name, task_category)) { return; } - const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest(test_name); + const auto test_env_token = ppc::util::test::MakeCurrentGTestToken(test_name); + const auto test_env_scope = ppc::util::test::ScopedPerTestEnv(test_env_token); - task_ = task_getter(GetTestInputData()); - ppc::performance::Perf perf(task_); - ppc::performance::PerfAttr perf_attr; + const auto input_data = GetTestInputData(); + task_ = task_getter(input_data); + task_->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf; SynchronizeMpiRanks(); - SetPerfAttributes(perf_attr); - - if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) { - perf.PipelineRun(perf_attr); - } else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) { - perf.TaskRun(perf_attr); - } else { - std::stringstream err_msg; - err_msg << '\n' << "The type of performance check for the task was not selected.\n"; - throw std::runtime_error(err_msg.str().c_str()); - } - - if (GetMPIRank() == 0) { - perf.PrintPerfStatistic(test_name); - } + detail::RunTaskForValidation(task_); OutType output_data = task_->GetOutput(); ASSERT_TRUE(CheckTestOutputData(output_data)); + + PerfAttr perf_attr; + SetPerfAttributes(perf_attr); + const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running; + + using BenchmarkInputType = std::decay_t; + auto benchmark_body = + detail::BenchmarkTaskBody(task_getter, input_data, test_env_token); + + benchmark::RegisterBenchmark(test_name, std::move(benchmark_body)) + ->UseManualTime() + ->Unit(benchmark::kSecond) + ->Iterations(static_cast(num_iterations)); } private: - ppc::task::TaskPtr task_; + ppc::task::TaskPtr task_{}; }; template @@ -117,9 +258,7 @@ auto MakePerfTaskTuples(const std::string &settings_path, std::string_view setti ppc::task::GetStringTaskType(TaskType::GetStaticTypeOfTask(), settings_path, settings_task_path); return std::make_tuple(std::make_tuple(ppc::task::TaskGetter, name, - ppc::performance::PerfResults::TypeOfRunning::kPipeline), - std::make_tuple(ppc::task::TaskGetter, name, - ppc::performance::PerfResults::TypeOfRunning::kTaskRun)); + detail::GetPerfTaskCategory(settings_task_path))); } template @@ -129,7 +268,7 @@ auto TupleToGTestValuesImpl(const Tuple &tup, std::index_sequence /*unused template auto TupleToGTestValues(Tuple &&tup) { - constexpr size_t kSize = std::tuple_size_v>; + constexpr std::size_t kSize{std::tuple_size_v>}; return TupleToGTestValuesImpl(std::forward(tup), std::make_index_sequence{}); } diff --git a/modules/util/include/util.hpp b/modules/util/include/util.hpp index 70a64e847..912344551 100644 --- a/modules/util/include/util.hpp +++ b/modules/util/include/util.hpp @@ -64,6 +64,24 @@ class DestructorFailureFlag { inline static std::atomic failure_flag{false}; }; +class PerformanceFailureFlag { + public: + static void Set() { + failure_flag.store(true); + } + + static void Unset() { + failure_flag.store(false); + } + + static bool Get() { + return failure_flag.load(); + } + + private: + inline static std::atomic failure_flag{false}; +}; + enum class GTestParamIndex : uint8_t { kTaskGetter, kNameTest, @@ -75,6 +93,8 @@ int GetNumThreads(); int GetNumProc(); double GetTaskMaxTime(); double GetPerfMaxTime(); +double GetTimeMPI(); +int GetMPIRank(); void SynchronizeMpiRanks(); template diff --git a/modules/util/src/func_test_util.cpp b/modules/util/src/func_test_util.cpp index a5dfe0811..c901919f5 100644 --- a/modules/util/src/func_test_util.cpp +++ b/modules/util/src/func_test_util.cpp @@ -1,6 +1,6 @@ #include -#include "util/include/perf_test_util.hpp" +#include "util/include/util.hpp" double ppc::util::GetTimeMPI() { return MPI_Wtime(); diff --git a/scoreboard/main.py b/scoreboard/main.py index 0b479a0da..997799876 100644 --- a/scoreboard/main.py +++ b/scoreboard/main.py @@ -1,7 +1,7 @@ import argparse -import csv import json import logging +import re import shutil import subprocess import sys @@ -20,6 +20,8 @@ # Threads table order: seq first, then omp, tbb, stl, all task_types_threads = ["seq", "omp", "tbb", "stl", "all"] task_types_processes = ["mpi", "seq"] +PERF_STAT_PRIORITY = {"median": 0, "mean": 1, "": 2} +PERF_TIME_UNIT_TO_SECONDS = {"s": 1.0, "ms": 1e-3, "us": 1e-6, "ns": 1e-9} script_dir = Path(__file__).parent tasks_dir = script_dir.parent / "tasks" @@ -208,143 +210,108 @@ def discover_tasks(tasks_dir, task_types): directories, task_category_map = discover_tasks(tasks_dir, task_types) -def load_performance_data_threads(perf_stat_file_path: Path) -> dict: - """Load threads performance ratios (T_x/T_seq) from CSV. - Expected header: Task, SEQ, OMP, TBB, STL, ALL - """ - perf_stats: dict[str, dict] = {} - if perf_stat_file_path.exists(): - with open(perf_stat_file_path, "r", newline="") as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - task_name = row.get("Task") - if not task_name: - continue - perf_stats[task_name] = { - "seq": row.get("SEQ", "?"), - "omp": row.get("OMP", "?"), - "tbb": row.get("TBB", "?"), - "stl": row.get("STL", "?"), - "all": row.get("ALL", "?"), - } - else: - logger.warning("Threads perf stats CSV not found at %s", perf_stat_file_path) - return perf_stats +def parse_benchmark_name(name: str) -> tuple[str, str, str] | None: + """Parse __enabled Google Benchmark names.""" + base_name = name.split("/", maxsplit=1)[0] + match = re.match( + r"(.+?)_(all|mpi|omp|seq|stl|tbb)_enabled(?:_(mean|median))?$", base_name + ) + if match is None: + return None + return match.group(1), match.group(2), match.group(3) or "" -def load_performance_data(perf_stat_file_path: Path) -> dict: - """Compatibility helper for legacy tests: load perf data with optional MPI column. +def _benchmark_time_to_seconds(value: float, unit: str) -> float: + return float(value) * PERF_TIME_UNIT_TO_SECONDS.get(unit, 1e-9) - Always returns a mapping: task -> {seq, omp, stl, tbb, all, mpi} - Missing columns are filled with ``"N/A"``; empty cells stay empty strings. - """ - perf_stats: dict[str, dict] = {} - if not perf_stat_file_path.exists(): - return perf_stats - - with open(perf_stat_file_path, "r", newline="") as csvfile: - reader = csv.DictReader(csvfile) - # Normalize column names we care about - for row in reader: - task_name = row.get("Task") - if not task_name: - continue - def _get(col: str) -> str: - if col in row: - return row.get(col, "N/A") - return "N/A" - - perf_stats[task_name] = { - "seq": _get("SEQ"), - "omp": _get("OMP"), - "stl": _get("STL"), - "tbb": _get("TBB"), - "all": _get("ALL"), - "mpi": _get("MPI"), - } - return perf_stats +def _perf_record_priority(record: dict) -> int: + return PERF_STAT_PRIORITY.get(str(record.get("statistic", "")), 3) -def load_performance_data_processes(perf_stat_file_path: Path) -> dict: - """Load processes performance data (raw times, seconds) and merge *_seq/_mpi rows. +def load_benchmark_performance_data(benchmarks_dir: Path) -> dict[str, dict]: + """Load Google Benchmark JSON files written by ppc_perf_tests. - Expected header: Task, SEQ, MPI with absolute times. If the CSV contains - split rows like _seq and _mpi, they are combined into one entry. + Returns raw benchmark times in seconds: + benchmark task name -> implementation -> seconds """ - perf_stats: dict[str, dict] = {} - if not perf_stat_file_path.exists(): - logger.warning("Processes perf stats CSV not found at %s", perf_stat_file_path) - return perf_stats - - with open(perf_stat_file_path, "r", newline="") as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - task_name = row.get("Task") - if not task_name: + if not benchmarks_dir.exists(): + logger.warning("Benchmark JSON directory not found at %s", benchmarks_dir) + return {} + + selected: dict[tuple[str, str], dict] = {} + for json_path in sorted(benchmarks_dir.glob("*.json")): + try: + with open(json_path, "r", encoding="utf-8") as file: + payload = json.load(file) + except (OSError, json.JSONDecodeError) as e: + logger.warning("Failed to parse benchmark JSON %s: %s", json_path, e) + continue + + for entry in payload.get("benchmarks", []): + parsed_name = parse_benchmark_name(str(entry.get("name", ""))) + if parsed_name is None: continue - seq_val = row.get("SEQ", "?") - mpi_val = row.get("MPI", "?") - - base_name = task_name - mode = None - for suff, lbl in (("_seq", "seq"), ("_mpi", "mpi")): - if task_name.endswith(suff): - base_name = task_name[: -len(suff)] - mode = lbl - break - - entry = perf_stats.setdefault(base_name, {"seq": "?", "mpi": "?"}) - if mode == "seq": - if seq_val and seq_val != "?": - entry["seq"] = seq_val - elif mode == "mpi": - if mpi_val and mpi_val != "?": - entry["mpi"] = mpi_val - else: - if seq_val and seq_val != "?": - entry["seq"] = seq_val - if mpi_val and mpi_val != "?": - entry["mpi"] = mpi_val + task_name, implementation, statistic = parsed_name + try: + seconds = _benchmark_time_to_seconds( + float(entry["real_time"]), str(entry.get("time_unit", "ns")) + ) + except (KeyError, TypeError, ValueError): + continue + record = { + "task": task_name, + "implementation": implementation, + "seconds": seconds, + "statistic": statistic or str(entry.get("aggregate_name", "")), + } + key = (task_name, implementation) + previous = selected.get(key) + if previous is None or _perf_record_priority( + record + ) < _perf_record_priority(previous): + selected[key] = record + perf_stats: dict[str, dict] = {} + for record in selected.values(): + perf_stats.setdefault(record["task"], {})[record["implementation"]] = ( + f"{record['seconds']:.10g}" + ) return perf_stats def calculate_performance_metrics(perf_val, eff_num_proc, task_type, seq_val=None): - """Calculate acceleration and efficiency. - - For processes table we pass raw times; for threads legacy ratios we keep old behavior. - """ + """Calculate acceleration and efficiency from raw times in seconds.""" acceleration = "?" efficiency = "?" try: if seq_val is None: perf_float = float(perf_val) - if perf_float > 0 and not ( - perf_float == float("inf") or perf_float != perf_float - ): - speedup = 1.0 / perf_float - if task_type == "seq": - acceleration = "1.00" - efficiency = "N/A" - else: - acceleration = f"{speedup:.2f}" - efficiency = f"{speedup / eff_num_proc * 100:.2f}%" + if task_type == "seq" and perf_float > 0: + return "1.00", "N/A" + return acceleration, efficiency + + seq_t = float(seq_val) + par_t = float(perf_val) + if ( + seq_t <= 0 + or par_t <= 0 + or seq_t == float("inf") + or par_t == float("inf") + or seq_t != seq_t + or par_t != par_t + ): + return acceleration, efficiency + if min(seq_t, par_t) < 0.001: + tiny_mark = "t <
1e-3" + return tiny_mark, tiny_mark + speedup = seq_t / par_t + if task_type == "seq": + acceleration = "1.00" + efficiency = "N/A" else: - seq_t = float(seq_val) - par_t = float(perf_val) - # If times are too small, metrics are unstable -> mark N/A - if min(seq_t, par_t) < 0.001: - tiny_mark = "t <
1e-3" - return tiny_mark, tiny_mark - if seq_t > 0 and par_t > 0: - speedup = seq_t / par_t - if task_type == "seq": - acceleration = "1.00" - efficiency = "N/A" - else: - acceleration = f"{speedup:.2f}" - efficiency = f"{speedup / eff_num_proc * 100:.2f}%" + acceleration = f"{speedup:.2f}" + efficiency = f"{speedup / eff_num_proc * 100:.2f}%" except (ValueError, TypeError): pass return acceleration, efficiency @@ -646,9 +613,11 @@ def _load_student_fields(dir_name: str): perf_val = perf_stats.get(dir, {}).get(task_type, "?") - # Calculate acceleration and efficiency if performance data is available + seq_val = None + if isinstance(perf_stats.get(dir, {}), dict): + seq_val = perf_stats.get(dir, {}).get("seq") acceleration, efficiency = calculate_performance_metrics( - perf_val, eff_num_proc, task_type + perf_val, eff_num_proc, task_type, seq_val=seq_val ) # Calculate deadline penalty points @@ -776,80 +745,12 @@ def _process_deadline_labels(task_numbers: list[int]) -> list[str]: labels.append(label) return labels if any(labels) else [] - # Locate perf CSVs from CI or local runs (threads and processes) - candidates_threads = [ - script_dir.parent - / "build" - / "perf_stat_dir" - / "threads_task_run_perf_table.csv", - script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv", - # Fallback to old single-file name - script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv", - script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv", + benchmark_dirs = [ + script_dir.parent / "build" / "perf_stat_dir" / "benchmarks", + script_dir.parent / "perf_stat_dir" / "benchmarks", ] - threads_csv = next( - (p for p in candidates_threads if p.exists()), candidates_threads[0] - ) - - candidates_processes = [ - script_dir.parent - / "build" - / "perf_stat_dir" - / "processes_task_run_perf_table.csv", - script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv", - ] - processes_csv = next( - (p for p in candidates_processes if p.exists()), candidates_processes[0] - ) - - # Read and merge performance statistics CSVs (keys = CSV Task column) - perf_stats_threads = load_performance_data_threads(threads_csv) - perf_stats_processes = load_performance_data_processes(processes_csv) - - def _aggregate_process_csv( - perf_stat_file_path: Path, base: dict[str, dict] - ) -> dict: - """Parse CSV again to ensure merged seq/mpi entries.""" - import csv - - perf_stats_local = dict(base) - if not perf_stat_file_path.exists(): - return perf_stats_local - with open(perf_stat_file_path, "r", newline="") as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - task_name = row.get("Task") - if not task_name: - continue - seq_val = row.get("SEQ", "?") - mpi_val = row.get("MPI", "?") - base_name = task_name - mode = None - for suff, lbl in (("_seq", "seq"), ("_mpi", "mpi")): - if task_name.endswith(suff): - base_name = task_name[: -len(suff)] - mode = lbl - break - entry = perf_stats_local.setdefault(base_name, {"seq": "?", "mpi": "?"}) - if mode == "seq": - if seq_val and seq_val != "?": - entry["seq"] = seq_val - elif mode == "mpi": - if mpi_val and mpi_val != "?": - entry["mpi"] = mpi_val - else: - if seq_val and seq_val != "?": - entry["seq"] = seq_val - if mpi_val and mpi_val != "?": - entry["mpi"] = mpi_val - return perf_stats_local - - perf_stats_processes = _aggregate_process_csv(processes_csv, perf_stats_processes) - - perf_stats_raw: dict[str, dict] = {} - perf_stats_raw.update(perf_stats_threads) - for k, v in perf_stats_processes.items(): - perf_stats_raw[k] = {**perf_stats_raw.get(k, {}), **v} + benchmarks_dir = next((p for p in benchmark_dirs if p.exists()), benchmark_dirs[0]) + perf_stats_raw = load_benchmark_performance_data(benchmarks_dir) # Partition tasks by category derived from the filesystem layout. threads_task_dirs = [ @@ -867,11 +768,8 @@ def _aggregate_process_csv( elif "processes" in name: processes_task_dirs.append(name) - # Resolve performance stats keys (from CSV Task names) to actual task directories. - # Old logic grouped by "family", which made all tasks share the same numbers. - # New logic: map each CSV key to the best-matching directory name by substring. + # Resolve benchmark task names to actual task directories. perf_stats: dict[str, dict] = {} - import re as _re dir_names_sorted = sorted(directories.keys(), key=len, reverse=True) @@ -886,35 +784,18 @@ def _merge_perf_maps(existing: dict, updates: dict) -> dict: merged[k] = v return merged - # Precompute mapping: process task number -> list of directories. Meta-layout - # tasks derive this from processes/t1, processes/t2, etc. - process_tasknum_map: dict[int, list[str]] = {} - for d, num in process_task_indices.items(): - process_tasknum_map.setdefault(num, []).append(d) - - def _match_dir(csv_key: str) -> str | None: - # Strip common suffixes like "_mpi_enabled" etc. to improve matching - base = _re.sub(r"_(mpi|omp|tbb|stl|all|seq)_enabled.*", "", csv_key) + def _match_dir(benchmark_key: str) -> str | None: + base = re.sub(r"_(mpi|omp|tbb|stl|all|seq)_enabled.*", "", benchmark_key) for d in dir_names_sorted: - if base.startswith(d) or d in base or csv_key.startswith(d): + if base.startswith(d) or d in base or benchmark_key.startswith(d): return d return None for key, vals in perf_stats_raw.items(): targets: set[str] = set() - # 1) Direct / substring match target = _match_dir(key) if target: targets.add(target) - # 2) If a legacy key encodes processes_N, spread to dirs with that task number - m_num = _re.search(r"processes_(\d+)", key) - if m_num: - try: - num = int(m_num.group(1)) - targets.update(process_tasknum_map.get(num, [])) - except Exception: - pass - # Apply merged values to all targets for t in targets: perf_stats[t] = _merge_perf_maps(perf_stats.get(t, {}), vals) diff --git a/scoreboard/tests/conftest.py b/scoreboard/tests/conftest.py index 08be73a62..960bbe8cf 100644 --- a/scoreboard/tests/conftest.py +++ b/scoreboard/tests/conftest.py @@ -2,7 +2,6 @@ Pytest configuration and shared fixtures for scoreboard tests. """ -import csv import shutil import tempfile from pathlib import Path @@ -84,48 +83,6 @@ def sample_task_structure(temp_dir): return tasks_dir -@pytest.fixture -def sample_performance_csv(temp_dir): - """Create a sample performance CSV file.""" - csv_file = temp_dir / "performance.csv" - - data = [ - { - "Task": "example_task", - "SEQ": "1.0", - "OMP": "0.5", - "STL": "0.3", - "TBB": "0.4", - "ALL": "0.2", - }, - { - "Task": "disabled_task", - "SEQ": "2.0", - "OMP": "1.0", - "STL": "0.8", - "TBB": "0.9", - "ALL": "0.7", - }, - { - "Task": "partial_task", - "SEQ": "1.5", - "OMP": "N/A", - "STL": "N/A", - "TBB": "N/A", - "ALL": "N/A", - }, - ] - - with open(csv_file, "w", newline="") as f: - writer = csv.DictWriter( - f, fieldnames=["Task", "SEQ", "OMP", "STL", "TBB", "ALL"] - ) - writer.writeheader() - writer.writerows(data) - - return csv_file - - @pytest.fixture def sample_config_files(temp_dir, sample_config, sample_plagiarism_config): """Create sample configuration files.""" diff --git a/scoreboard/tests/test_calculate_performance_metrics.py b/scoreboard/tests/test_calculate_performance_metrics.py index 4ed144b4e..e14ff64f2 100644 --- a/scoreboard/tests/test_calculate_performance_metrics.py +++ b/scoreboard/tests/test_calculate_performance_metrics.py @@ -3,93 +3,145 @@ class TestCalculatePerformanceMetrics: def test_calculate_performance_metrics_valid_values(self): - acceleration, efficiency = calculate_performance_metrics("0.5", 4) + acceleration, efficiency = calculate_performance_metrics( + "0.5", 4, "omp", seq_val="1.0" + ) assert acceleration == "2.00" assert efficiency == "50.00%" - acceleration, efficiency = calculate_performance_metrics("0.25", 4) + acceleration, efficiency = calculate_performance_metrics( + "0.25", 4, "tbb", seq_val="1.0" + ) assert acceleration == "4.00" assert efficiency == "100.00%" - acceleration, efficiency = calculate_performance_metrics("0.5", 2) + acceleration, efficiency = calculate_performance_metrics( + "0.5", 2, "stl", seq_val="1.0" + ) assert acceleration == "2.00" assert efficiency == "100.00%" def test_calculate_performance_metrics_edge_cases(self): - acceleration, efficiency = calculate_performance_metrics("0.1", 4) + acceleration, efficiency = calculate_performance_metrics( + "0.1", 4, "omp", seq_val="1.0" + ) assert acceleration == "10.00" assert efficiency == "250.00%" - acceleration, efficiency = calculate_performance_metrics("1.0", 4) + acceleration, efficiency = calculate_performance_metrics( + "1.0", 4, "omp", seq_val="1.0" + ) assert acceleration == "1.00" assert efficiency == "25.00%" - acceleration, efficiency = calculate_performance_metrics("2.0", 4) + acceleration, efficiency = calculate_performance_metrics( + "2.0", 4, "omp", seq_val="1.0" + ) assert acceleration == "0.50" assert efficiency == "12.50%" def test_calculate_performance_metrics_invalid_values(self): - acceleration, efficiency = calculate_performance_metrics("0.0", 4) + acceleration, efficiency = calculate_performance_metrics( + "0.0", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" - acceleration, efficiency = calculate_performance_metrics("-1.0", 4) + acceleration, efficiency = calculate_performance_metrics( + "-1.0", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" - acceleration, efficiency = calculate_performance_metrics("invalid", 4) + acceleration, efficiency = calculate_performance_metrics( + "invalid", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" - acceleration, efficiency = calculate_performance_metrics("", 4) + acceleration, efficiency = calculate_performance_metrics( + "", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" - acceleration, efficiency = calculate_performance_metrics("inf", 4) + acceleration, efficiency = calculate_performance_metrics( + "inf", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" - acceleration, efficiency = calculate_performance_metrics("nan", 4) + acceleration, efficiency = calculate_performance_metrics( + "nan", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" def test_calculate_performance_metrics_special_strings(self): - acceleration, efficiency = calculate_performance_metrics("?", 4) + acceleration, efficiency = calculate_performance_metrics( + "?", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" - acceleration, efficiency = calculate_performance_metrics("N/A", 4) + acceleration, efficiency = calculate_performance_metrics( + "N/A", 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" - acceleration, efficiency = calculate_performance_metrics(None, 4) + acceleration, efficiency = calculate_performance_metrics( + None, 4, "omp", seq_val="1.0" + ) assert acceleration == "?" assert efficiency == "?" def test_calculate_performance_metrics_different_proc_counts(self): perf_val = "0.25" - acceleration, efficiency = calculate_performance_metrics(perf_val, 1) + acceleration, efficiency = calculate_performance_metrics( + perf_val, 1, "omp", seq_val="1.0" + ) assert acceleration == "4.00" assert efficiency == "400.00%" - acceleration, efficiency = calculate_performance_metrics(perf_val, 2) + acceleration, efficiency = calculate_performance_metrics( + perf_val, 2, "omp", seq_val="1.0" + ) assert acceleration == "4.00" assert efficiency == "200.00%" - acceleration, efficiency = calculate_performance_metrics(perf_val, 8) + acceleration, efficiency = calculate_performance_metrics( + perf_val, 8, "omp", seq_val="1.0" + ) assert acceleration == "4.00" assert efficiency == "50.00%" - acceleration, efficiency = calculate_performance_metrics(perf_val, 16) + acceleration, efficiency = calculate_performance_metrics( + perf_val, 16, "omp", seq_val="1.0" + ) assert acceleration == "4.00" assert efficiency == "25.00%" def test_calculate_performance_metrics_precision(self): - acceleration, efficiency = calculate_performance_metrics("0.3", 3) + acceleration, efficiency = calculate_performance_metrics( + "0.3", 3, "omp", seq_val="1.0" + ) assert acceleration == "3.33" assert efficiency == "111.11%" - acceleration, efficiency = calculate_performance_metrics("0.7", 6) + acceleration, efficiency = calculate_performance_metrics( + "0.7", 6, "omp", seq_val="1.0" + ) assert acceleration == "1.43" assert efficiency == "23.81%" + + def test_calculate_performance_metrics_requires_seq_baseline_for_parallel(self): + acceleration, efficiency = calculate_performance_metrics("0.5", 4, "omp") + assert acceleration == "?" + assert efficiency == "?" + + def test_calculate_performance_metrics_seq_without_baseline(self): + acceleration, efficiency = calculate_performance_metrics("1.0", 4, "seq") + assert acceleration == "1.00" + assert efficiency == "N/A" diff --git a/scoreboard/tests/test_load_benchmark_performance_data.py b/scoreboard/tests/test_load_benchmark_performance_data.py new file mode 100644 index 000000000..4bc66ca38 --- /dev/null +++ b/scoreboard/tests/test_load_benchmark_performance_data.py @@ -0,0 +1,120 @@ +""" +Tests for loading Google Benchmark performance JSON files. +""" + +import json + +from main import load_benchmark_performance_data, parse_benchmark_name + + +class TestLoadBenchmarkPerformanceData: + """Test cases for Google Benchmark performance data loading.""" + + def test_parse_threads_benchmark_name(self): + assert parse_benchmark_name( + "example_threads_omp_enabled/iterations:5/manual_time" + ) == ( + "example_threads", + "omp", + "", + ) + + def test_parse_processes_benchmark_name(self): + assert parse_benchmark_name("example_processes_t2_mpi_enabled") == ( + "example_processes_t2", + "mpi", + "", + ) + + def test_load_benchmark_json_in_seconds(self, temp_dir): + benchmarks_dir = temp_dir / "benchmarks" + benchmarks_dir.mkdir() + (benchmarks_dir / "threads.json").write_text( + json.dumps( + { + "benchmarks": [ + { + "name": "example_threads_seq_enabled", + "real_time": 1.5, + "time_unit": "s", + }, + { + "name": "example_threads_omp_enabled", + "real_time": 0.75, + "time_unit": "s", + }, + { + "name": "example_threads_omp_disabled", + "real_time": 0.8, + "time_unit": "s", + }, + ] + } + ), + encoding="utf-8", + ) + + result = load_benchmark_performance_data(benchmarks_dir) + + assert result["example_threads"]["seq"] == "1.5" + assert result["example_threads"]["omp"] == "0.75" + + def test_load_benchmark_json_converts_units_to_seconds(self, temp_dir): + benchmarks_dir = temp_dir / "benchmarks" + benchmarks_dir.mkdir() + (benchmarks_dir / "processes.json").write_text( + json.dumps( + { + "benchmarks": [ + { + "name": "example_processes_t1_seq_enabled", + "real_time": 250, + "time_unit": "ms", + }, + { + "name": "example_processes_t1_mpi_enabled", + "real_time": 100000, + "time_unit": "us", + }, + ] + } + ), + encoding="utf-8", + ) + + result = load_benchmark_performance_data(benchmarks_dir) + + assert result["example_processes_t1"]["seq"] == "0.25" + assert result["example_processes_t1"]["mpi"] == "0.1" + + def test_load_benchmark_json_prefers_median_statistic(self, temp_dir): + benchmarks_dir = temp_dir / "benchmarks" + benchmarks_dir.mkdir() + (benchmarks_dir / "threads.json").write_text( + json.dumps( + { + "benchmarks": [ + { + "name": "example_threads_tbb_enabled", + "real_time": 0.4, + "time_unit": "s", + }, + { + "name": "example_threads_tbb_enabled_mean", + "real_time": 0.3, + "time_unit": "s", + }, + { + "name": "example_threads_tbb_enabled_median", + "real_time": 0.2, + "time_unit": "s", + }, + ] + } + ), + encoding="utf-8", + ) + + result = load_benchmark_performance_data(benchmarks_dir) + + assert result["example_threads"]["tbb"] == "0.2" diff --git a/scoreboard/tests/test_load_performance_data.py b/scoreboard/tests/test_load_performance_data.py deleted file mode 100644 index b32ba4abd..000000000 --- a/scoreboard/tests/test_load_performance_data.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -Tests for the load_performance_data function. -""" - -import csv - -from main import load_performance_data - - -class TestLoadPerformanceData: - """Test cases for load_performance_data function.""" - - def test_load_performance_data_valid_csv(self, sample_performance_csv): - """Test loading performance data from a valid CSV file.""" - result = load_performance_data(sample_performance_csv) - - # Check structure - assert isinstance(result, dict) - assert len(result) == 3 - - # Check example_task data - assert "example_task" in result - example_data = result["example_task"] - assert example_data["seq"] == "1.0" - assert example_data["omp"] == "0.5" - assert example_data["stl"] == "0.3" - assert example_data["tbb"] == "0.4" - assert example_data["all"] == "0.2" - assert example_data["mpi"] == "N/A" - - # Check disabled_task data - assert "disabled_task" in result - disabled_data = result["disabled_task"] - assert disabled_data["seq"] == "2.0" - assert disabled_data["omp"] == "1.0" - - # Check partial_task data - assert "partial_task" in result - partial_data = result["partial_task"] - assert partial_data["seq"] == "1.5" - assert partial_data["omp"] == "N/A" - assert partial_data["mpi"] == "N/A" - - def test_load_performance_data_nonexistent_file(self, temp_dir): - """Test loading performance data when file doesn't exist.""" - nonexistent_file = temp_dir / "nonexistent.csv" - - result = load_performance_data(nonexistent_file) - - assert result == {} - - def test_load_performance_data_empty_csv(self, temp_dir): - """Test loading performance data from an empty CSV file.""" - empty_csv = temp_dir / "empty.csv" - empty_csv.touch() - - result = load_performance_data(empty_csv) - - assert result == {} - - def test_load_performance_data_header_only_csv(self, temp_dir): - """Test loading performance data from CSV with only headers.""" - header_only_csv = temp_dir / "header_only.csv" - - with open(header_only_csv, "w", newline="") as f: - writer = csv.DictWriter( - f, fieldnames=["Task", "SEQ", "OMP", "STL", "TBB", "ALL"] - ) - writer.writeheader() - - result = load_performance_data(header_only_csv) - - assert result == {} - - def test_load_performance_data_malformed_csv(self, temp_dir): - """Test loading performance data from malformed CSV.""" - malformed_csv = temp_dir / "malformed.csv" - - with open(malformed_csv, "w") as f: - f.write("Task,SEQ,OMP\n") - f.write("test_task,1.0\n") # Missing OMP value - f.write("another_task,invalid,0.5\n") # Invalid SEQ value - - # Should not crash, but may have incomplete data - result = load_performance_data(malformed_csv) - - # Function should handle this gracefully - assert isinstance(result, dict) - - def test_load_performance_data_missing_columns(self, temp_dir): - """Test loading performance data when some columns are missing.""" - partial_csv = temp_dir / "partial.csv" - - data = [ - {"Task": "test_task", "SEQ": "1.0", "OMP": "0.5"} - # Missing STL, TBB, ALL columns - ] - - with open(partial_csv, "w", newline="") as f: - writer = csv.DictWriter(f, fieldnames=["Task", "SEQ", "OMP"]) - writer.writeheader() - writer.writerows(data) - - # Should handle missing columns gracefully - result = load_performance_data(partial_csv) - - assert "test_task" in result - # Missing columns should be handled (likely as empty strings or errors) - task_data = result["test_task"] - assert task_data["seq"] == "1.0" - assert task_data["omp"] == "0.5" - assert task_data["mpi"] == "N/A" # This should always be set - - def test_load_performance_data_special_values(self, temp_dir): - """Test loading performance data with special values.""" - special_csv = temp_dir / "special.csv" - - data = [ - { - "Task": "special_task", - "SEQ": "0.0", - "OMP": "inf", - "STL": "-1", - "TBB": "", - "ALL": "N/A", - } - ] - - with open(special_csv, "w", newline="") as f: - writer = csv.DictWriter( - f, fieldnames=["Task", "SEQ", "OMP", "STL", "TBB", "ALL"] - ) - writer.writeheader() - writer.writerows(data) - - result = load_performance_data(special_csv) - - assert "special_task" in result - task_data = result["special_task"] - assert task_data["seq"] == "0.0" - assert task_data["omp"] == "inf" - assert task_data["stl"] == "-1" - assert task_data["tbb"] == "" - assert task_data["all"] == "N/A" - assert task_data["mpi"] == "N/A" diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py deleted file mode 100644 index 14b6d3105..000000000 --- a/scripts/create_perf_table.py +++ /dev/null @@ -1,289 +0,0 @@ -import argparse -import csv -import os -import re - -import xlsxwriter - -# ------------------------------- -# Helpers and configuration -# ------------------------------- - -# Known task types (used to pre-initialize tables) -list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"] - -# Compile patterns once -OLD_PATTERN = re.compile(r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)") -NEW_PATTERN = re.compile( - r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)" -) -# Example formats: -# _threads_omp_enabled:task_run:0.4749 -# _processes_t2_mpi_enabled:pipeline:0.0507 -# Accept optional suffix after `_enabled` (e.g., `_enabled_size1000000`) before the colon -SIMPLE_PATTERN = re.compile( - r"(.+?)_(omp|seq|tbb|stl|all|mpi)_enabled[^:]*:(task_run|pipeline):(-*\d*\.\d*)" -) - - -def _ensure_task_tables(result_tables: dict, perf_type: str, task_name: str) -> None: - if perf_type not in result_tables: - result_tables[perf_type] = {} - if task_name not in result_tables[perf_type]: - result_tables[perf_type][task_name] = {t: -1.0 for t in list_of_type_of_tasks} - - -def _infer_category(task_name: str) -> str: - return "threads" if "threads" in task_name else "processes" - - -def _columns_for_category(category: str) -> list[str]: - return ( - ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"] - ) - - -def _write_excel_sheet( - workbook, - worksheet, - cpu_num: int, - tasks_list: list[str], - cols: list[str], - table: dict, -): - worksheet.set_column("A:Z", 23) - right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2}) - bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2}) - right_border = workbook.add_format({"right": 2}) - - worksheet.write(0, 0, "cpu_num = " + str(cpu_num), right_bold_border) - - # Header (T_x, S, Eff) per column - col = 1 - for ttype in cols: - worksheet.write(0, col, f"T_{ttype}({cpu_num})", bottom_bold_border) - col += 1 - worksheet.write( - 0, - col, - f"S({cpu_num}) = T_seq({cpu_num}) / T_{ttype}({cpu_num})", - bottom_bold_border, - ) - col += 1 - worksheet.write( - 0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border - ) - col += 1 - - # Task rows - row = 1 - for task_name in tasks_list: - worksheet.write( - row, 0, task_name, workbook.add_format({"bold": True, "right": 2}) - ) - row += 1 - - # Values - row = 1 - for task_name in tasks_list: - col = 1 - for ttype in cols: - if task_name not in table: - # no data for task at all - worksheet.write(row, col, "—") - col += 1 - worksheet.write(row, col, "—") - col += 1 - worksheet.write(row, col, "—", right_border) - col += 1 - continue - par_time = table[task_name].get(ttype, -1.0) - seq_time = table[task_name].get("seq", -1.0) - if par_time in (0.0, -1.0) or seq_time in (0.0, -1.0): - speed_up = "—" - efficiency = "—" - else: - speed_up = seq_time / par_time - efficiency = speed_up / cpu_num - worksheet.write(row, col, par_time if par_time != -1.0 else "?") - col += 1 - worksheet.write(row, col, speed_up) - col += 1 - worksheet.write(row, col, efficiency, right_border) - col += 1 - row += 1 - - -def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict): - """Write raw times (seconds) to CSV so downstream can derive speedups correctly.""" - with open(path, "w", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow(header) - for task_name in tasks_list: - task_row = table.get(task_name, {}) - seq_time = task_row.get("seq", -1.0) - row = [task_name, (seq_time if seq_time not in (0.0, -1.0) else "?")] - for col_name in header[2:]: - val = task_row.get(col_name.lower(), -1.0) - row.append(val if val != -1.0 else "?") - writer.writerow(row) - - -parser = argparse.ArgumentParser() -parser.add_argument( - "-i", "--input", help="Input file path (logs of perf tests, .txt)", required=True -) -parser.add_argument( - "-o", "--output", help="Output file path (path to .xlsx table)", required=True -) -args = parser.parse_args() -logs_path = os.path.abspath(args.input) -xlsx_path = os.path.abspath(args.output) - -# For each perf_type (pipeline/task_run) store times per task -result_tables = {"pipeline": {}, "task_run": {}} -# Map task name -> category (threads|processes) -task_categories = {} -# Track tasks per category to split output -tasks_by_category = {"threads": set(), "processes": set()} - -with open(logs_path, "r") as logs_file: - logs_lines = logs_file.readlines() -for line in logs_lines: - # Handle both old format: tasks/task_type/task_name:perf_type:time - # and new format: namespace_task_type_enabled:perf_type:time - old_result = OLD_PATTERN.findall(line) - new_result = NEW_PATTERN.findall(line) - simple_result = SIMPLE_PATTERN.findall(line) - - if len(old_result): - task_name = old_result[0][1] - perf_type = old_result[0][2] - # legacy: track task in threads category by default - _ensure_task_tables(result_tables, perf_type, task_name) - # Unknown category in legacy format; default to threads - task_categories[task_name] = "threads" - tasks_by_category["threads"].add(task_name) - elif len(new_result): - # Extract task name from namespace format and keep it specific. - base = new_result[0][0] # e.g., task_namespace_processes - task_category = new_result[0][1] # "threads" or "processes" - task_type_token = new_result[0][2] # e.g., "all", "omp", or "2_mpi" - task_name = f"{base}_{task_type_token}" - if "_" in task_type_token: - suffix, impl = task_type_token.rsplit("_", 1) - if impl in list_of_type_of_tasks: - task_name = f"{base}_{suffix}" - perf_type = new_result[0][3] - - _ensure_task_tables(result_tables, perf_type, task_name) - task_categories[task_name] = task_category - tasks_by_category[task_category].add(task_name) - elif len(simple_result): - # Extract task name in the current format (prefix already includes category suffix) - task_name = simple_result[0][0] - # Infer category by substring - task_category = "threads" if "threads" in task_name else "processes" - perf_type = simple_result[0][2] - - # no set tracking needed; category mapping below - - _ensure_task_tables(result_tables, perf_type, task_name) - task_categories[task_name] = task_category - tasks_by_category[task_category].add(task_name) - -for line in logs_lines: - # Handle both old format: tasks/task_type/task_name:perf_type:time - # and new format: namespace_task_type_enabled:perf_type:time - old_result = OLD_PATTERN.findall(line) - new_result = NEW_PATTERN.findall(line) - simple_result = SIMPLE_PATTERN.findall(line) - - if len(old_result): - task_type = old_result[0][0] - task_name = old_result[0][1] - perf_type = old_result[0][2] - perf_time = float(old_result[0][3]) - result_tables[perf_type][task_name][task_type] = perf_time - elif len(new_result): - # Extract task details from namespace format (keep specific task name) - base = new_result[0][0] - task_category = new_result[0][1] # "threads" or "processes" - token = new_result[0][2] # "all", "omp", "seq", or tokens like "2_mpi" - perf_type = new_result[0][3] - perf_time = float(new_result[0][4]) - # Split token like "2_mpi" into task suffix and impl to aggregate seq/mpi together - if "_" in token: - suffix, impl = token.rsplit("_", 1) - if impl in list_of_type_of_tasks: - task_name = f"{base}_{suffix}" - task_type = impl - else: - task_name = f"{base}_{token}" - task_type = token - else: - task_name = f"{base}_{token}" - task_type = token - - _ensure_task_tables(result_tables, perf_type, task_name) - result_tables[perf_type][task_name][task_type] = perf_time - task_categories[task_name] = task_category - tasks_by_category[task_category].add(task_name) - elif len(simple_result): - # Extract details from the simplified pattern (current logs) - task_name = simple_result[0][0] - # Infer category by substring present in task_name - task_category = "threads" if "threads" in task_name else "processes" - task_type = simple_result[0][1] - perf_type = simple_result[0][2] - perf_time = float(simple_result[0][3]) - - if perf_type not in result_tables: - result_tables[perf_type] = {} - if task_name not in result_tables[perf_type]: - result_tables[perf_type][task_name] = {} - for ttype in list_of_type_of_tasks: - result_tables[perf_type][task_name][ttype] = -1.0 - result_tables[perf_type][task_name][task_type] = perf_time - task_categories[task_name] = task_category - tasks_by_category[task_category].add(task_name) - - -for table_name, table_data in result_tables.items(): - # Prepare two workbooks/CSVs: threads and processes - for category in ["threads", "processes"]: - tasks_list = sorted(tasks_by_category[category]) - if not tasks_list: - continue - - # Use appropriate env var per category - if category == "threads": - cpu_num_env = os.environ.get("PPC_NUM_THREADS") - if cpu_num_env is None: - raise EnvironmentError( - "Required environment variable 'PPC_NUM_THREADS' is not set." - ) - else: - cpu_num_env = os.environ.get("PPC_NUM_PROC") - if cpu_num_env is None: - raise EnvironmentError( - "Required environment variable 'PPC_NUM_PROC' is not set." - ) - cpu_num = int(cpu_num_env) - cols = _columns_for_category(category) - - # Excel - wb_path = os.path.join( - xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx" - ) - workbook = xlsxwriter.Workbook(wb_path) - worksheet = workbook.add_worksheet() - _write_excel_sheet(workbook, worksheet, cpu_num, tasks_list, cols, table_data) - workbook.close() - - # CSV - header = ["Task", "SEQ"] + [c.upper() for c in cols[1:]] - csv_path = os.path.join( - xlsx_path, f"{category}_" + table_name + "_perf_table.csv" - ) - _write_csv(csv_path, header, tasks_list, table_data) diff --git a/scripts/generate_perf_results.bat b/scripts/generate_perf_results.bat deleted file mode 100644 index a7d72690a..000000000 --- a/scripts/generate_perf_results.bat +++ /dev/null @@ -1,4 +0,0 @@ -@echo off -mkdir build\perf_stat_dir -scripts/run_tests.py --running-type="performance" > build\perf_stat_dir\perf_log.txt -python scripts\create_perf_table.py --input build\perf_stat_dir\perf_log.txt --output build\perf_stat_dir diff --git a/scripts/generate_perf_results.sh b/scripts/generate_perf_results.sh deleted file mode 100755 index da317f427..000000000 --- a/scripts/generate_perf_results.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -mkdir -p build/perf_stat_dir -scripts/run_tests.py --running-type="performance" | tee build/perf_stat_dir/perf_log.txt -python3 scripts/create_perf_table.py --input build/perf_stat_dir/perf_log.txt --output build/perf_stat_dir diff --git a/scripts/run_tests.py b/scripts/run_tests.py index 7c713f8bd..bdbfc2280 100755 --- a/scripts/run_tests.py +++ b/scripts/run_tests.py @@ -3,6 +3,7 @@ import os import platform import shlex +import shutil import subprocess from pathlib import Path @@ -51,6 +52,7 @@ def __init__(self, build_dir="build", verbose=False): self.__ppc_num_threads = None self.__ppc_num_proc = None self.__ppc_env = None + self.__build_dir_path = None self.work_dir = None self.build_dir = build_dir self.verbose = verbose @@ -98,14 +100,16 @@ def setup_env(self, ppc_env): ) project_path = Path(self.__get_project_path()) + build_dir = Path(self.build_dir) + if not build_dir.is_absolute(): + build_dir = project_path / build_dir + self.__build_dir_path = build_dir + install_bin_dir = project_path / "install" / "bin" if install_bin_dir.exists(): self.work_dir = install_bin_dir return - build_dir = Path(self.build_dir) - if not build_dir.is_absolute(): - build_dir = project_path / build_dir bin_dir = build_dir if build_dir.name == "bin" else build_dir / "bin" if not bin_dir.exists(): raise FileNotFoundError( @@ -114,10 +118,13 @@ def setup_env(self, ppc_env): ) self.work_dir = bin_dir - def __run_exec(self, command): + def __run_exec(self, command, extra_env=None): if self.verbose: print("Executing:", " ".join(shlex.quote(part) for part in command)) - result = subprocess.run(command, shell=False, env=self.__ppc_env) + run_env = self.__ppc_env.copy() + if extra_env: + run_env.update(extra_env) + result = subprocess.run(command, shell=False, env=run_env) if result.returncode != 0: raise Exception(f"Subprocess return {result.returncode}.") @@ -153,41 +160,42 @@ def __detect_mpi_impl(self): return "mpich", "-n" return "unknown", "-np" - def __build_mpi_cmd(self, ppc_num_proc, additional_mpi_args): + def __build_mpi_cmd(self, ppc_num_proc, additional_mpi_args, extra_env=None): + mpi_env = self.__ppc_env.copy() + if extra_env: + mpi_env.update(extra_env) base = [self.mpi_exec] + shlex.split(additional_mpi_args) + forwarded_env = [ + "PPC_NUM_THREADS", + "OMP_NUM_THREADS", + "PPC_BENCHMARK_OUT", + "PPC_BENCHMARK_FILTER", + "PPC_PERF_IMPL_FILTER", + "PPC_PERF_CATEGORY_FILTER", + ] if self.platform == "Windows": # MS-MPI style - env_args = [ - "-env", - "PPC_NUM_THREADS", - self.__ppc_env["PPC_NUM_THREADS"], - "-env", - "OMP_NUM_THREADS", - self.__ppc_env["OMP_NUM_THREADS"], - ] + env_args = [] + for env_name in forwarded_env: + if env_name in mpi_env: + env_args += ["-env", env_name, mpi_env[env_name]] np_args = ["-n", ppc_num_proc] return base + env_args + np_args # Non-Windows if self.mpi_env_mode == "openmpi": - env_args = [ - "-x", - "PPC_NUM_THREADS", - "-x", - "OMP_NUM_THREADS", - ] + env_args = [] + for env_name in forwarded_env: + if env_name in mpi_env: + env_args += ["-x", env_name] np_flag = "-np" elif self.mpi_env_mode == "mpich": # Explicitly set env variables for all ranks - env_args = [ - "-env", - "PPC_NUM_THREADS", - self.__ppc_env["PPC_NUM_THREADS"], - "-env", - "OMP_NUM_THREADS", - self.__ppc_env["OMP_NUM_THREADS"], - ] + env_args = [] + for env_name in forwarded_env: + if env_name in mpi_env: + env_args += ["-env", env_name, mpi_env[env_name]] np_flag = "-n" else: # Unknown MPI flavor: rely on environment inheritance and default to -np @@ -196,6 +204,30 @@ def __build_mpi_cmd(self, ppc_num_proc, additional_mpi_args): return base + env_args + [np_flag, ppc_num_proc] + def __benchmark_output_dir(self): + if self.__build_dir_path is None: + raise RuntimeError("Build directory is not initialized.") + return self.__build_dir_path / "perf_stat_dir" / "benchmarks" + + def __get_performance_gtest_settings(self): + return [ + "--gtest_repeat=1", + "--gtest_recreate_environments_when_repeating", + "--gtest_color=0", + "--gtest_filter=*RunPerf*", + ] + + def __get_benchmark_env(self, category, task_type): + output_dir = self.__benchmark_output_dir() + output_dir.mkdir(parents=True, exist_ok=True) + return { + "PPC_PERF_CATEGORY_FILTER": f"_{category}_", + "PPC_PERF_IMPL_FILTER": f"_{task_type}_", + "PPC_BENCHMARK_OUT": str( + output_dir / f"benchmark_{category}_{task_type}.json" + ), + } + @staticmethod def __get_gtest_settings(repeats_count, type_task): type_task_patterns = { @@ -260,19 +292,31 @@ def run_processes(self, additional_mpi_args): ) def run_performance(self): + output_dir = self.__benchmark_output_dir() + if output_dir.exists(): + shutil.rmtree(output_dir) + if not self.__ppc_env.get("PPC_ASAN_RUN"): - mpi_running = self.__build_mpi_cmd(self.__ppc_num_proc, "") - for task_type in ["all", "mpi", "seq"]: + for category, task_type in [ + ("threads", "all"), + ("processes", "mpi"), + ("processes", "seq"), + ]: + extra_env = self.__get_benchmark_env(category, task_type) + mpi_running = self.__build_mpi_cmd(self.__ppc_num_proc, "", extra_env) self.__run_exec( mpi_running + [str(self.work_dir / "ppc_perf_tests")] - + self.__get_gtest_settings(1, "_" + task_type + "_") + + self.__get_performance_gtest_settings(), + extra_env, ) for task_type in ["omp", "seq", "stl", "tbb"]: + extra_env = self.__get_benchmark_env("threads", task_type) self.__run_exec( [str(self.work_dir / "ppc_perf_tests")] - + self.__get_gtest_settings(1, "_" + task_type + "_") + + self.__get_performance_gtest_settings(), + extra_env, ) diff --git a/tasks/CMakeLists.txt b/tasks/CMakeLists.txt index 685457716..a20f74abf 100644 --- a/tasks/CMakeLists.txt +++ b/tasks/CMakeLists.txt @@ -12,6 +12,9 @@ include(${CMAKE_SOURCE_DIR}/cmake/functions.cmake) # ——— Initialize test executables ————————————————————————————————————— ppc_add_test(${FUNC_TEST_EXEC} common/runners/functional.cpp USE_FUNC_TESTS) ppc_add_test(${PERF_TEST_EXEC} common/runners/performance.cpp USE_PERF_TESTS) +if(USE_PERF_TESTS) + ppc_link_benchmark(${PERF_TEST_EXEC}) +endif() # ——— List of implementations ———————————————————————————————————————— set(PPC_IMPLEMENTATIONS "all;mpi;omp;seq;stl;tbb" CACHE STRING "Implementations to build (semicolon-separated)") diff --git a/tasks/common/runners/performance.cpp b/tasks/common/runners/performance.cpp index a4b6c0e2f..1f8101bcd 100644 --- a/tasks/common/runners/performance.cpp +++ b/tasks/common/runners/performance.cpp @@ -1,5 +1,232 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oneapi/tbb/global_control.h" #include "runners/include/runners.hpp" +#include "util/include/util.hpp" + +namespace { + +class NullBenchmarkReporter final : public benchmark::BenchmarkReporter { + public: + bool ReportContext(const Context & /*context*/) override { + return true; + } + + void ReportRuns(const std::vector & /*report*/) override {} +}; + +int RunAllTests() { + const int status = RUN_ALL_TESTS(); + if (ppc::util::DestructorFailureFlag::Get()) { + throw std::runtime_error( + std::format("[ ERROR ] Destructor failed with code {}", ppc::util::DestructorFailureFlag::Get())); + } + return status; +} + +void SyncGTestSeed() { + int rank = -1; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + int seed = ::testing::GTEST_FLAG(random_seed); + if (rank == 0 && seed == 0) { + try { + seed = static_cast((std::random_device{}() % 99999U) + 1U); + } catch (...) { + seed = 0; + } + if (seed == 0) { + const auto now = static_cast(std::chrono::steady_clock::now().time_since_epoch().count()); + seed = static_cast((now % 99999ULL) + 1ULL); + } + } + MPI_Bcast(&seed, 1, MPI_INT, 0, MPI_COMM_WORLD); + ::testing::GTEST_FLAG(random_seed) = seed; +} + +void SyncGTestFilter() { + int rank = -1; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + std::string filter = (rank == 0) ? ::testing::GTEST_FLAG(filter) : std::string{}; + int len = static_cast(filter.size()); + MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rank != 0) { + filter.resize(static_cast(len)); + } + if (len > 0) { + MPI_Bcast(filter.data(), len, MPI_CHAR, 0, MPI_COMM_WORLD); + } + ::testing::GTEST_FLAG(filter) = filter; +} + +bool HasFlag(int argc, char **argv, std::string_view flag) { + for (int i = 1; i < argc; ++i) { + if (argv[i] != nullptr && std::string_view(argv[i]) == flag) { + return true; + } + } + return false; +} + +std::vector MakeBenchmarkArgs(const char *program_name, int rank) { + std::vector args{program_name != nullptr ? program_name : "ppc_perf_tests"}; + args.emplace_back("--benchmark_format=console"); + args.emplace_back("--benchmark_time_unit=s"); + + const auto benchmark_filter = env::get("PPC_BENCHMARK_FILTER"); + if (benchmark_filter.has_value()) { + args.emplace_back(std::string("--benchmark_filter=") + benchmark_filter.value()); + } + + if (rank == 0) { + const auto benchmark_out = env::get("PPC_BENCHMARK_OUT"); + if (benchmark_out.has_value()) { + const std::filesystem::path out_path(benchmark_out.value()); + if (out_path.has_parent_path()) { + std::filesystem::create_directories(out_path.parent_path()); + } + args.emplace_back(std::string("--benchmark_out=") + benchmark_out.value()); + args.emplace_back("--benchmark_out_format=json"); + } + } + + return args; +} + +void InitializeBenchmark(int argc, char **argv, int rank) { + static std::vector benchmark_args; + static std::vector benchmark_argv; + + benchmark_args = MakeBenchmarkArgs((argc > 0) ? argv[0] : nullptr, rank); + benchmark_argv.clear(); + benchmark_argv.reserve(benchmark_args.size()); + for (auto &arg : benchmark_args) { + benchmark_argv.push_back(arg.data()); + } + int benchmark_argc = static_cast(benchmark_argv.size()); + benchmark::Initialize(&benchmark_argc, benchmark_argv.data()); +} + +int RunRegisteredBenchmarks(int rank) { + ppc::util::PerformanceFailureFlag::Unset(); + if (rank == 0) { + benchmark::RunSpecifiedBenchmarks(); + } else { + NullBenchmarkReporter reporter; + std::ofstream null_stream; +#ifdef _WIN32 + null_stream.open("NUL"); +#else + null_stream.open("/dev/null"); +#endif + if (null_stream.is_open()) { + reporter.SetOutputStream(&null_stream); + reporter.SetErrorStream(&null_stream); + } + benchmark::RunSpecifiedBenchmarks(&reporter, nullptr); + } + const int status = ppc::util::PerformanceFailureFlag::Get() ? EXIT_FAILURE : EXIT_SUCCESS; + benchmark::Shutdown(); + benchmark::ClearRegisteredBenchmarks(); + return status; +} + +int RunAllTestsSafely() { + try { + return RunAllTests(); + } catch (const std::exception &e) { + std::cerr << std::format("[ ERROR ] Exception after performance tests: {}", e.what()) << '\n'; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + return EXIT_FAILURE; + } catch (...) { + std::cerr << "[ ERROR ] Unknown exception after performance tests" << '\n'; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + return EXIT_FAILURE; + } +} + +int SynchronizeStatus(int local_status, std::string_view stage) { + const int local_failed = (local_status == EXIT_SUCCESS) ? 0 : 1; + int any_failed = local_failed; + const int reduce_res = MPI_Allreduce(&local_failed, &any_failed, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + if (reduce_res != MPI_SUCCESS) { + std::cerr << "[ ERROR ] MPI_Allreduce failed while synchronizing " << stage << " status with code " << reduce_res + << '\n'; + MPI_Abort(MPI_COMM_WORLD, reduce_res); + return EXIT_FAILURE; + } + return (any_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} + +int RunPerformanceMain(int argc, char **argv) { + const int init_res = MPI_Init(&argc, &argv); + if (init_res != MPI_SUCCESS) { + std::cerr << "[ ERROR ] MPI_Init failed with code " << init_res << '\n'; + MPI_Abort(MPI_COMM_WORLD, init_res); + return init_res; + } + + tbb::global_control control(tbb::global_control::max_allowed_parallelism, ppc::util::GetNumThreads()); + + ::testing::InitGoogleTest(&argc, argv); + + SyncGTestSeed(); + SyncGTestFilter(); + + int rank = -1; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); + const bool print_workers = HasFlag(argc, argv, "--print-workers"); + if (rank != 0 && !print_workers) { + auto *listener = listeners.Release(listeners.default_result_printer()); + listeners.Append( + new ppc::runners::WorkerTestFailurePrinter(std::shared_ptr<::testing::TestEventListener>(listener))); + } + listeners.Append(new ppc::runners::UnreadMessagesDetector()); + + int status = SynchronizeStatus(RunAllTestsSafely(), "GTest"); + if (status == EXIT_SUCCESS) { + InitializeBenchmark(argc, argv, rank); + status = SynchronizeStatus(RunRegisteredBenchmarks(rank), "Google Benchmark"); + } + + const int finalize_res = MPI_Finalize(); + if (finalize_res != MPI_SUCCESS) { + std::cerr << "[ ERROR ] MPI_Finalize failed with code " << finalize_res << '\n'; + MPI_Abort(MPI_COMM_WORLD, finalize_res); + return finalize_res; + } + return status; +} + +} // namespace int main(int argc, char **argv) { - return ppc::runners::Init(argc, argv); + try { + return RunPerformanceMain(argc, argv); + } catch (const std::exception &e) { + std::cerr << "[ ERROR ] Unhandled exception in performance tests: " << e.what() << '\n'; + } catch (...) { + std::cerr << "[ ERROR ] Unknown unhandled exception in performance tests" << '\n'; + } + return EXIT_FAILURE; } diff --git a/tasks/example/processes/t1/tests/performance/main.cpp b/tasks/example/processes/t1/tests/performance/main.cpp index b4fc6e9db..f628496c8 100644 --- a/tasks/example/processes/t1/tests/performance/main.cpp +++ b/tasks/example/processes/t1/tests/performance/main.cpp @@ -35,7 +35,7 @@ const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks( PPC_SETTINGS_example, "processes.t3"); -TEST_F(ExampleRunPerfTestProcesses3, RunPerfModes) { +TEST_F(ExampleRunPerfTestProcesses3, RunPerf) { std::apply([this](const auto &...test_params) { (ExecuteTest(test_params), ...); }, kAllPerfTasks); } diff --git a/tasks/example/threads/tests/performance/main.cpp b/tasks/example/threads/tests/performance/main.cpp index 82ea9356d..939305f75 100644 --- a/tasks/example/threads/tests/performance/main.cpp +++ b/tasks/example/threads/tests/performance/main.cpp @@ -39,7 +39,7 @@ const auto kAllPerfTasks = } // namespace -TEST_F(ExampleRunPerfTestThreads, RunPerfModes) { +TEST_F(ExampleRunPerfTestThreads, RunPerf) { std::apply([this](const auto &...test_params) { (ExecuteTest(test_params), ...); }, kAllPerfTasks); }