From ef4bf795a64e133979e3a46f2deac99ada482deb Mon Sep 17 00:00:00 2001
From: Alexander Nesterov <nesterov.alexander@outlook.com>
Date: Fri, 5 Jun 2026 19:36:26 +0200
Subject: [PATCH] Use Google Benchmark for performance tables

---
 .clang-format                                 |   7 +
 .github/workflows/perf.yml                    |   4 +-
 .gitmodules                                   |   3 +
 3rdparty/benchmark                            |   1 +
 CMakeLists.txt                                |   1 +
 Doxyfile                                      |   1 -
 cmake/benchmark.cmake                         |  51 +++
 docs/user_guide/api.rst                       |   6 -
 modules/CMakeLists.txt                        |   1 +
 modules/performance/include/performance.hpp   | 133 ------
 modules/performance/tests/perf_tests.cpp      | 402 ------------------
 modules/task/tests/task_tests.cpp             |  42 +-
 modules/util/include/perf_test_util.hpp       | 257 ++++++++---
 modules/util/include/util.hpp                 |  20 +
 modules/util/src/func_test_util.cpp           |   2 +-
 scoreboard/main.py                            | 315 +++++---------
 scoreboard/tests/conftest.py                  |  43 --
 .../test_calculate_performance_metrics.py     |  94 +++-
 .../test_load_benchmark_performance_data.py   | 120 ++++++
 .../tests/test_load_performance_data.py       | 145 -------
 scripts/create_perf_table.py                  | 289 -------------
 scripts/generate_perf_results.bat             |   4 -
 scripts/generate_perf_results.sh              |   6 -
 scripts/run_tests.py                          | 108 +++--
 tasks/CMakeLists.txt                          |   3 +
 tasks/common/runners/performance.cpp          | 229 +++++++++-
 .../processes/t1/tests/performance/main.cpp   |   2 +-
 .../processes/t2/tests/performance/main.cpp   |   2 +-
 .../processes/t3/tests/performance/main.cpp   |   2 +-
 .../threads/tests/performance/main.cpp        |   2 +-
 30 files changed, 923 insertions(+), 1372 deletions(-)
 create mode 160000 3rdparty/benchmark
 create mode 100644 cmake/benchmark.cmake
 delete mode 100644 modules/performance/include/performance.hpp
 delete mode 100644 modules/performance/tests/perf_tests.cpp
 create mode 100644 scoreboard/tests/test_load_benchmark_performance_data.py
 delete mode 100644 scoreboard/tests/test_load_performance_data.py
 delete mode 100644 scripts/create_perf_table.py
 delete mode 100644 scripts/generate_perf_results.bat
 delete mode 100755 scripts/generate_perf_results.sh

diff --git a/.clang-format b/.clang-format
index 98e77c747..67c2f4cda 100644
--- a/.clang-format
+++ b/.clang-format
@@ -6,6 +6,13 @@ UseTab: Never
 AllowShortFunctionsOnASingleLine: Empty
 IndentPPDirectives: AfterHash
 SortIncludes: true
+IncludeCategories:
+  - Regex: '^<gtest/.*'
+    Priority: 1
+  - Regex: '^<.*'
+    Priority: 2
+  - Regex: '.*'
+    Priority: 3
 FixNamespaceComments: true
 InsertBraces: true
 QualifierAlignment: Left
diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml
index 339fb641d..b6c6d769e 100644
--- a/.github/workflows/perf.yml
+++ b/.github/workflows/perf.yml
@@ -30,7 +30,7 @@ jobs:
           tar -xzvf ubuntu-gcc-install-ubuntu-24.04.tar.gz -C install
       - name: Run perf tests
         run: |
-          bash -e scripts/generate_perf_results.sh
+          scripts/run_tests.py --running-type=performance
         env:
           PPC_NUM_PROC: 2
           PPC_NUM_THREADS: 2
@@ -68,7 +68,7 @@ jobs:
           tar -xzvf macos-clang-install.tar.gz -C install
       - name: Run perf tests
         run: |
-          bash -e scripts/generate_perf_results.sh
+          scripts/run_tests.py --running-type=performance
         env:
           PPC_NUM_PROC: 1
           PPC_NUM_THREADS: 2
diff --git a/.gitmodules b/.gitmodules
index 6ef00628f..601a9e307 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,3 +13,6 @@
 [submodule "3rdparty/libenvpp"]
 	path = 3rdparty/libenvpp
 	url = https://github.com/ph3at/libenvpp
+[submodule "3rdparty/benchmark"]
+	path = 3rdparty/benchmark
+	url = https://github.com/google/benchmark
diff --git a/3rdparty/benchmark b/3rdparty/benchmark
new file mode 160000
index 000000000..a8460680f
--- /dev/null
+++ b/3rdparty/benchmark
@@ -0,0 +1 @@
+Subproject commit a8460680f0df91fd26205e0931708a26c3b4094d
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37584c9c5..b374c7004 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,7 @@ endforeach()
 
 message( STATUS "PPC step: Setup external projects" )
 include(cmake/gtest.cmake)
+include(cmake/benchmark.cmake)
 
 ############################## Modules ##############################
 
diff --git a/Doxyfile b/Doxyfile
index b5a477dd7..d759d09b0 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -6,7 +6,6 @@ PROJECT_BRIEF          = "Parallel Programming Course"
 INPUT                  = modules/task/include \
                          modules/util/include \
                          modules/util/src \
-                         modules/performance/include \
                          modules/runners/include \
                          modules/runners/src
 FILE_PATTERNS          = *.h *.c *.hpp *.cpp
diff --git a/cmake/benchmark.cmake b/cmake/benchmark.cmake
new file mode 100644
index 000000000..f2adaea5f
--- /dev/null
+++ b/cmake/benchmark.cmake
@@ -0,0 +1,51 @@
+include_guard()
+
+include(ExternalProject)
+
+ExternalProject_Add(
+  ppc_benchmark
+  SOURCE_DIR "${CMAKE_SOURCE_DIR}/3rdparty/benchmark"
+  PREFIX "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark"
+  BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
+  INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
+  EXCLUDE_FROM_ALL TRUE
+  CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+             -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+             -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
+             -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
+             -DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}
+             -DCMAKE_CXX_STANDARD_REQUIRED=${CMAKE_CXX_STANDARD_REQUIRED}
+             -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+             ${PPC_EXTERNAL_PROJECT_CMAKE_ARGS}
+             -DCMAKE_C_FLAGS=-w
+             -DCMAKE_CXX_FLAGS=-w
+             -DBENCHMARK_ENABLE_TESTING=OFF
+             -DBENCHMARK_ENABLE_GTEST_TESTS=OFF
+             -DBENCHMARK_ENABLE_WERROR=OFF
+             -DBENCHMARK_ENABLE_INSTALL=ON
+             -DBENCHMARK_ENABLE_LIBPFM=OFF
+  BUILD_COMMAND
+    "${CMAKE_COMMAND}" --build "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build"
+    --config $<CONFIG> --parallel
+  INSTALL_COMMAND
+    "${CMAKE_COMMAND}" --install
+    "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/build" --config $<CONFIG>
+    --prefix "${CMAKE_CURRENT_BINARY_DIR}/ppc_benchmark/install"
+    ${PPC_EXTERNAL_PROJECT_LOG_ARGS})
+
+function(ppc_include_benchmark target_name)
+  target_include_directories(
+    ${target_name} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/benchmark/include)
+  target_compile_definitions(${target_name} PUBLIC BENCHMARK_STATIC_DEFINE)
+endfunction()
+
+function(ppc_link_benchmark target_name)
+  ppc_include_benchmark(${target_name})
+  add_dependencies(${target_name} ppc_benchmark)
+  target_link_directories(${target_name} PUBLIC
+                          "${CMAKE_BINARY_DIR}/ppc_benchmark/install/lib")
+  target_link_libraries(${target_name} PUBLIC benchmark Threads::Threads)
+  if(WIN32)
+    target_link_libraries(${target_name} PUBLIC shlwapi)
+  endif()
+endfunction()
diff --git a/docs/user_guide/api.rst b/docs/user_guide/api.rst
index 178c3f401..81167abb4 100644
--- a/docs/user_guide/api.rst
+++ b/docs/user_guide/api.rst
@@ -21,9 +21,3 @@ Utility Module
 
 .. doxygennamespace:: ppc::util
    :project: ParallelProgrammingCourse
-
-Performance Module
-------------------
-
-.. doxygennamespace:: ppc::performance
-   :project: ParallelProgrammingCourse
diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt
index d9b5057e3..d7e6fd76a 100644
--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@@ -26,6 +26,7 @@ set_target_properties(${exec_func_lib} PROPERTIES LINKER_LANGUAGE CXX)
 target_include_directories(
   ${exec_func_lib} PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty
                           ${CMAKE_SOURCE_DIR}/modules ${CMAKE_SOURCE_DIR}/tasks)
+ppc_include_benchmark(${exec_func_lib})
 
 foreach(
   link
diff --git a/modules/performance/include/performance.hpp b/modules/performance/include/performance.hpp
deleted file mode 100644
index 2b5d1e9fb..000000000
--- a/modules/performance/include/performance.hpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#pragma once
-
-#include <cstdint>
-#include <functional>
-#include <iomanip>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-
-#include "task/include/task.hpp"
-#include "util/include/util.hpp"
-
-namespace ppc::performance {
-
-inline double DefaultTimer() {
-  return -1.0;
-}
-
-struct PerfAttr {
-  /// @brief Number of times the task is run for performance evaluation.
-  uint64_t num_running = 5;
-  /// @brief Timer function returning current time in seconds.
-  /// @cond
-  std::function<double()> current_timer = DefaultTimer;
-  /// @endcond
-};
-
-struct PerfResults {
-  /// @brief Measured execution time in seconds.
-  double time_sec = 0.0;
-  enum class TypeOfRunning : uint8_t {
-    kPipeline,
-    kTaskRun,
-    kNone,
-  };
-  TypeOfRunning type_of_running = TypeOfRunning::kNone;
-  constexpr static double kMaxTime = 10.0;
-};
-
-template <typename InType, typename OutType>
-class Perf {
- public:
-  // Init performance analysis with an initialized task and initialized data
-  explicit Perf(const ppc::task::TaskPtr<InType, OutType> &task_ptr) : task_(task_ptr) {
-    task_ptr->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf;
-  }
-  // Check performance of full task's pipeline:  PreProcessing() ->
-  // Validation() -> Run() -> PostProcessing()
-  void PipelineRun(const PerfAttr &perf_attr) {
-    perf_results_.type_of_running = PerfResults::TypeOfRunning::kPipeline;
-
-    CommonRun(perf_attr, [&] {
-      task_->Validation();
-      task_->PreProcessing();
-      task_->Run();
-      task_->PostProcessing();
-    }, perf_results_);
-  }
-  // Check performance of task's Run() function
-  void TaskRun(const PerfAttr &perf_attr) {
-    perf_results_.type_of_running = PerfResults::TypeOfRunning::kTaskRun;
-
-    task_->Validation();
-    task_->PreProcessing();
-    CommonRun(perf_attr, [&] { task_->Run(); }, perf_results_);
-    task_->PostProcessing();
-
-    task_->Validation();
-    task_->PreProcessing();
-    task_->Run();
-    task_->PostProcessing();
-  }
-  // Print results for automation checkers
-  void PrintPerfStatistic(const std::string &test_id) const {
-    std::string type_test_name;
-    if (perf_results_.type_of_running == PerfResults::TypeOfRunning::kTaskRun) {
-      type_test_name = "task_run";
-    } else if (perf_results_.type_of_running == PerfResults::TypeOfRunning::kPipeline) {
-      type_test_name = "pipeline";
-    } else {
-      std::stringstream err_msg;
-      err_msg << '\n' << "The type of performance check for the task was not selected.\n";
-      throw std::runtime_error(err_msg.str().c_str());
-    }
-
-    auto time_secs = perf_results_.time_sec;
-    const auto max_time = ppc::util::GetPerfMaxTime();
-    std::stringstream perf_res_str;
-    if (time_secs < max_time) {
-      perf_res_str << std::fixed << std::setprecision(10) << time_secs;
-      std::cout << test_id << ":" << type_test_name << ":" << perf_res_str.str() << '\n';
-    } else {
-      std::stringstream err_msg;
-      err_msg << '\n' << "Task execute time need to be: ";
-      err_msg << "time < " << max_time << " secs." << '\n';
-      err_msg << "Original time in secs: " << time_secs << '\n';
-      perf_res_str << std::fixed << std::setprecision(10) << -1.0;
-      std::cout << test_id << ":" << type_test_name << ":" << perf_res_str.str() << '\n';
-      throw std::runtime_error(err_msg.str().c_str());
-    }
-  }
-  /// @brief Retrieves the performance test results.
-  /// @return The latest PerfResults structure.
-  [[nodiscard]] PerfResults GetPerfResults() const {
-    return perf_results_;
-  }
-
- private:
-  PerfResults perf_results_;
-  std::shared_ptr<ppc::task::Task<InType, OutType>> task_;
-  static void CommonRun(const PerfAttr &perf_attr, const std::function<void()> &pipeline, PerfResults &perf_results) {
-    auto begin = perf_attr.current_timer();
-    for (uint64_t i = 0; i < perf_attr.num_running; i++) {
-      pipeline();
-    }
-    auto end = perf_attr.current_timer();
-    perf_results.time_sec = (end - begin) / static_cast<double>(perf_attr.num_running);
-  }
-};
-
-inline std::string GetStringParamName(PerfResults::TypeOfRunning type_of_running) {
-  if (type_of_running == PerfResults::TypeOfRunning::kTaskRun) {
-    return "task_run";
-  }
-  if (type_of_running == PerfResults::TypeOfRunning::kPipeline) {
-    return "pipeline";
-  }
-  return "none";
-}
-
-}  // namespace ppc::performance
diff --git a/modules/performance/tests/perf_tests.cpp b/modules/performance/tests/perf_tests.cpp
deleted file mode 100644
index 18f3c6b89..000000000
--- a/modules/performance/tests/perf_tests.cpp
+++ /dev/null
@@ -1,402 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <array>
-#include <chrono>
-#include <cstdint>
-#include <filesystem>
-#include <fstream>
-#include <libenvpp/detail/environment.hpp>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <string_view>
-#include <thread>
-#include <vector>
-
-#include "performance/include/performance.hpp"
-#include "task/include/task.hpp"
-#include "util/include/util.hpp"
-
-using ppc::task::StatusOfTask;
-using ppc::task::Task;
-using ppc::task::TypeOfTask;
-
-namespace ppc::test {
-
-template <typename InType, typename OutType>
-class TestPerfTask : public ppc::task::Task<InType, OutType> {
- public:
-  explicit TestPerfTask(const InType &in) {
-    this->GetInput() = in;
-  }
-
- protected:
-  bool ValidationImpl() override {
-    return !this->GetInput().empty();
-  }
-
-  bool PreProcessingImpl() override {
-    this->GetOutput() = 0;
-    return true;
-  }
-
-  bool RunImpl() override {
-    for (const auto &value : this->GetInput()) {
-      this->GetOutput() += value;
-    }
-    return true;
-  }
-
-  bool PostProcessingImpl() override {
-    return true;
-  }
-};
-
-template <typename InType, typename OutType>
-class FakePerfTask : public TestPerfTask<InType, OutType> {
- public:
-  explicit FakePerfTask(const InType &in) : TestPerfTask<InType, OutType>(in) {}
-
- protected:
-  bool RunImpl() override {
-    std::this_thread::sleep_for(std::chrono::seconds(11));
-    return TestPerfTask<InType, OutType>::RunImpl();
-  }
-};
-
-}  // namespace ppc::test
-
-namespace ppc::performance {
-
-TEST(PerfTests, CheckPerfPipeline) {
-  std::vector<uint32_t> in(2000, 1);
-
-  auto test_task = std::make_shared<ppc::test::TestPerfTask<std::vector<uint32_t>, uint32_t>>(in);
-
-  Perf<std::vector<uint32_t>, uint32_t> perf_analyzer(test_task);
-
-  PerfAttr perf_attr;
-  perf_analyzer.PipelineRun(perf_attr);
-
-  perf_analyzer.PrintPerfStatistic("check_perf_pipeline");
-  ASSERT_LE(perf_analyzer.GetPerfResults().time_sec, PerfResults::kMaxTime);
-  EXPECT_EQ(test_task->GetOutput(), in.size());
-}
-
-TEST(PerfTests, CheckPerfPipelineFloat) {
-  std::vector<float> in(2000, 1);
-
-  auto test_task = std::make_shared<ppc::test::TestPerfTask<std::vector<float>, float>>(in);
-
-  Perf<std::vector<float>, float> perf_analyzer(test_task);
-
-  PerfAttr perf_attr;
-  perf_analyzer.PipelineRun(perf_attr);
-
-  perf_analyzer.PrintPerfStatistic("check_perf_pipeline_float");
-  ASSERT_LE(perf_analyzer.GetPerfResults().time_sec, PerfResults::kMaxTime);
-  EXPECT_EQ(test_task->GetOutput(), in.size());
-}
-
-TEST(PerfTests, CheckPerfPipelineUint8tSlowTest) {
-  std::vector<uint8_t> in(128, 1);
-
-  auto test_task = std::make_shared<ppc::test::FakePerfTask<std::vector<uint8_t>, uint8_t>>(in);
-
-  Perf<std::vector<uint8_t>, uint8_t> perf_analyzer(test_task);
-
-  PerfAttr perf_attr;
-  perf_attr.num_running = 1;
-
-  const auto t0 = std::chrono::high_resolution_clock::now();
-  perf_attr.current_timer = [&] {
-    auto current_time_point = std::chrono::high_resolution_clock::now();
-    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(current_time_point - t0).count();
-    return static_cast<double>(duration) * 1e-9;
-  };
-  perf_analyzer.PipelineRun(perf_attr);
-
-  ASSERT_ANY_THROW(perf_analyzer.PrintPerfStatistic("check_perf_pipeline_uint8_t_slow_test"));
-}
-
-TEST(PerfTests, SlowPerfRespectsEnvOverride) {
-  env::detail::set_scoped_environment_variable scoped("PPC_PERF_MAX_TIME", "12");
-  std::vector<uint8_t> in(128, 1);
-  auto test_task = std::make_shared<ppc::test::FakePerfTask<std::vector<uint8_t>, uint8_t>>(in);
-  Perf<std::vector<uint8_t>, uint8_t> perf_analyzer(test_task);
-  PerfAttr perf_attr;
-  perf_attr.num_running = 1;
-  const auto t0 = std::chrono::high_resolution_clock::now();
-  perf_attr.current_timer = [&] {
-    auto current_time_point = std::chrono::high_resolution_clock::now();
-    auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(current_time_point - t0).count();
-    return static_cast<double>(duration) * 1e-9;
-  };
-  perf_analyzer.PipelineRun(perf_attr);
-  EXPECT_NO_THROW(perf_analyzer.PrintPerfStatistic("slow_perf_respects_env_override"));
-}
-
-TEST(PerfTests, CheckPerfTaskException) {
-  std::vector<uint32_t> in(2000, 1);
-
-  auto test_task = std::make_shared<ppc::test::TestPerfTask<std::vector<uint32_t>, uint32_t>>(in);
-
-  Perf<std::vector<uint32_t>, uint32_t> perf_analyzer(test_task);
-
-  ASSERT_ANY_THROW(perf_analyzer.PrintPerfStatistic("check_perf_task_exception"));
-
-  PerfAttr perf_attr;
-  perf_analyzer.TaskRun(perf_attr);
-}
-
-TEST(PerfTests, CheckPerfTaskFloat) {
-  std::vector<float> in(2000, 1);
-
-  auto test_task = std::make_shared<ppc::test::TestPerfTask<std::vector<float>, float>>(in);
-
-  Perf<std::vector<float>, float> perf_analyzer(test_task);
-
-  PerfAttr perf_attr;
-  perf_analyzer.TaskRun(perf_attr);
-
-  perf_analyzer.PrintPerfStatistic("check_perf_task_float");
-  ASSERT_LE(perf_analyzer.GetPerfResults().time_sec, PerfResults::kMaxTime);
-  EXPECT_EQ(test_task->GetOutput(), in.size());
-}
-
-struct ParamTestCase {
-  PerfResults::TypeOfRunning input;
-  std::string_view expected_output;
-};
-
-namespace {
-
-constexpr std::array<ParamTestCase, 3> kParamTestCases = {
-    {{.input = PerfResults::TypeOfRunning::kTaskRun, .expected_output = "task_run"},
-     {.input = PerfResults::TypeOfRunning::kPipeline, .expected_output = "pipeline"},
-     {.input = PerfResults::TypeOfRunning::kNone, .expected_output = "none"}}};
-
-}  // namespace
-
-TEST(GetStringParamNameParamTest, ReturnsExpectedString) {
-  for (const auto &param : kParamTestCases) {
-    EXPECT_EQ(GetStringParamName(param.input), std::string(param.expected_output));
-  }
-}
-
-struct TaskTypeTestCase {
-  TypeOfTask type;
-  std::string_view expected;
-  std::string_view label;
-};
-
-class GetStringTaskTypeTest : public ::testing::Test {
- protected:
-  std::string temp_path;
-
-  void SetUp() override {
-    temp_path = (std::filesystem::temp_directory_path() / "test_settings.json").string();
-    auto j = ppc::util::InitJSONPtr();
-    *j = {{"tasks", {{"all", "ALL"}, {"stl", "STL"}, {"omp", "OMP"}, {"mpi", "MPI"}, {"tbb", "TBB"}, {"seq", "SEQ"}}}};
-
-    std::ofstream(temp_path) << j->dump();
-  }
-
-  void TearDown() override {
-    std::filesystem::remove(temp_path);
-  }
-};
-
-namespace {
-
-constexpr std::array<TaskTypeTestCase, 6> kTaskTypeTestCases = {
-    {{.type = TypeOfTask::kALL, .expected = "all_ALL", .label = "kALL"},
-     {.type = TypeOfTask::kSTL, .expected = "stl_STL", .label = "kSTL"},
-     {.type = TypeOfTask::kOMP, .expected = "omp_OMP", .label = "kOMP"},
-     {.type = TypeOfTask::kMPI, .expected = "mpi_MPI", .label = "kMPI"},
-     {.type = TypeOfTask::kTBB, .expected = "tbb_TBB", .label = "kTBB"},
-     {.type = TypeOfTask::kSEQ, .expected = "seq_SEQ", .label = "kSEQ"}}};
-
-}  // namespace
-
-TEST_F(GetStringTaskTypeTest, ReturnsExpectedString) {
-  for (const auto &param : kTaskTypeTestCases) {
-    EXPECT_EQ(GetStringTaskType(param.type, temp_path), std::string(param.expected)) << "Failed on: " << param.label;
-  }
-}
-
-TEST(GetStringTaskTypeStandaloneTest, ThrowsIfFileMissing) {
-  std::string missing_path = "non_existent_settings.json";
-  EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, missing_path), std::runtime_error);
-}
-
-TEST(GetStringTaskTypeStandaloneTest, ExceptionMessageContainsPath) {
-  const std::string missing_path = "non_existent_settings.json";
-  EXPECT_THROW(try { GetStringTaskType(TypeOfTask::kSEQ, missing_path); } catch (const std::runtime_error &e) {
-    EXPECT_NE(std::string(e.what()).find(missing_path), std::string::npos);
-    throw;
-  },
-               std::runtime_error);
-}
-
-TEST(GetStringTaskTypeStandaloneTest, ReturnsUnknownForInvalidEnum) {
-  std::string path = (std::filesystem::temp_directory_path() / "tmp_settings.json").string();
-  std::ofstream(path) << R"({"tasks":{"seq":"SEQ"}})";
-
-  auto result = GetStringTaskType(TypeOfTask::kUnknown, path);
-  EXPECT_EQ(result, "unknown");
-
-  std::filesystem::remove(path);
-}
-
-TEST(GetStringTaskTypeEdgeCases, ThrowsIfFileCannotBeOpened) {
-  EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, "definitely_missing_file.json"), std::runtime_error);
-}
-
-TEST(GetStringTaskTypeEdgeCases, ThrowsIfJsonIsMalformed) {
-  std::string path = (std::filesystem::temp_directory_path() / "bad_json.json").string();
-  std::ofstream(path) << "{ this is not valid json ";
-  EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, path), NlohmannJsonParseError);
-  std::filesystem::remove(path);
-}
-
-TEST(GetStringTaskTypeEdgeCases, ThrowsIfJsonValueIsNull) {
-  std::string path = (std::filesystem::temp_directory_path() / "null_value.json").string();
-  std::ofstream(path) << R"({"tasks": { "seq": null }})";
-
-  EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, path), NlohmannJsonTypeError);
-
-  std::filesystem::remove(path);
-}
-
-TEST(GetStringTaskTypeEdgeCases, ReturnsUnknownIfEnumOutOfRange) {
-  std::string path = (std::filesystem::temp_directory_path() / "ok.json").string();
-  std::ofstream(path) << R"({"tasks":{"seq":"SEQ"}})";
-  auto result = GetStringTaskType(TypeOfTask::kUnknown, path);
-  EXPECT_EQ(result, "unknown");
-  std::filesystem::remove(path);
-}
-
-TEST(GetStringTaskStatusTest, HandlesEnabledAndDisabled) {
-  EXPECT_EQ(GetStringTaskStatus(StatusOfTask::kEnabled), "enabled");
-  EXPECT_EQ(GetStringTaskStatus(StatusOfTask::kDisabled), "disabled");
-}
-
-class DummyTask : public Task<int, int> {
- public:
-  using Task::Task;
-
- protected:
-  bool ValidationImpl() override {
-    return true;
-  }
-  bool PreProcessingImpl() override {
-    return true;
-  }
-  bool RunImpl() override {
-    return true;
-  }
-  bool PostProcessingImpl() override {
-    return true;
-  }
-};
-
-TEST(TaskTest, GetDynamicTypeReturnsCorrectEnum) {
-  DummyTask task;
-  task.SetTypeOfTask(TypeOfTask::kOMP);
-  task.Validation();
-  task.PreProcessing();
-  task.Run();
-  task.PostProcessing();
-  EXPECT_EQ(task.GetDynamicTypeOfTask(), TypeOfTask::kOMP);
-}
-
-TEST(TaskTest, DestructorTerminatesIfWrongOrder) {
-  DummyTask task;
-  EXPECT_THROW(task.Run(), std::runtime_error);
-}
-
-namespace my {
-namespace nested {
-struct Type {};
-}  // namespace nested
-
-class Another {};
-}  // namespace my
-
-TEST(GetNamespaceTest, ExtractsNestedNamespaceCorrectly) {
-  EXPECT_EQ(ppc::util::GetNamespace<my::nested::Type>(), "ppc::performance::my::nested");
-}
-
-TEST(GetNamespaceTest, ExtractsParentNamespaceCorrectly) {
-  EXPECT_EQ(ppc::util::GetNamespace<my::Another>(), "ppc::performance::my");
-}
-
-TEST(GetNamespaceTest, ReturnsEmptyStringForGlobalNamespaceType) {
-  EXPECT_EQ(ppc::util::GetNamespace<int>(), "");
-}
-
-TEST(PerfTest, PipelineRunAndTaskRun) {
-  auto task_ptr = std::make_shared<DummyTask>();
-  Perf<int, int> perf(task_ptr);
-
-  PerfAttr attr;
-  double time = 0.0;
-  attr.num_running = 2;
-  attr.current_timer = [&time]() {
-    double t = time;
-    time += 1.0;
-    return t;
-  };
-
-  EXPECT_NO_THROW(perf.PipelineRun(attr));
-  auto res_pipeline = perf.GetPerfResults();
-  EXPECT_EQ(res_pipeline.type_of_running, PerfResults::TypeOfRunning::kPipeline);
-  EXPECT_GT(res_pipeline.time_sec, 0.0);
-
-  EXPECT_NO_THROW(perf.TaskRun(attr));
-  auto res_taskrun = perf.GetPerfResults();
-  EXPECT_EQ(res_taskrun.type_of_running, PerfResults::TypeOfRunning::kTaskRun);
-  EXPECT_GT(res_taskrun.time_sec, 0.0);
-}
-
-TEST(PerfTest, PrintPerfStatisticThrowsOnNone) {
-  {
-    auto task_ptr = std::make_shared<DummyTask>();
-    Perf<int, int> perf(task_ptr);
-    EXPECT_THROW(perf.PrintPerfStatistic("test"), std::runtime_error);
-  }
-  EXPECT_TRUE(ppc::util::DestructorFailureFlag::Get());
-  ppc::util::DestructorFailureFlag::Unset();
-}
-
-TEST(PerfTest, GetStringParamNameTest) {
-  EXPECT_EQ(GetStringParamName(PerfResults::TypeOfRunning::kTaskRun), "task_run");
-  EXPECT_EQ(GetStringParamName(PerfResults::TypeOfRunning::kPipeline), "pipeline");
-  EXPECT_EQ(GetStringParamName(PerfResults::TypeOfRunning::kNone), "none");
-}
-
-TEST(TaskTest, DestructorInvalidPipelineOrderTerminatesPartialPipeline) {
-  {
-    struct BadTask : Task<int, int> {
-     protected:
-      bool ValidationImpl() override {
-        return true;
-      }
-      bool PreProcessingImpl() override {
-        return true;
-      }
-      bool RunImpl() override {
-        return true;
-      }
-      bool PostProcessingImpl() override {
-        return true;
-      }
-    } task;
-    task.Validation();
-  }
-  EXPECT_TRUE(ppc::util::DestructorFailureFlag::Get());
-  ppc::util::DestructorFailureFlag::Unset();
-}
-
-}  // namespace ppc::performance
diff --git a/modules/task/tests/task_tests.cpp b/modules/task/tests/task_tests.cpp
index 70d7c67a2..4589a1520 100644
--- a/modules/task/tests/task_tests.cpp
+++ b/modules/task/tests/task_tests.cpp
@@ -202,12 +202,22 @@ TEST(TaskTest, GetStringTaskTypeEachTypeWithValidFile) {
       << R"({"tasks": {"all": "enabled", "stl": "enabled", "omp": "enabled", "mpi": "enabled", "tbb": "enabled", "seq": "enabled"}})";
   file.close();
 
-  EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kALL, path));
-  EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kSTL, path));
-  EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kOMP, path));
-  EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kMPI, path));
-  EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kTBB, path));
-  EXPECT_NO_THROW(GetStringTaskType(TypeOfTask::kSEQ, path));
+  EXPECT_EQ(GetStringTaskType(TypeOfTask::kALL, path), "all_enabled");
+  EXPECT_EQ(GetStringTaskType(TypeOfTask::kSTL, path), "stl_enabled");
+  EXPECT_EQ(GetStringTaskType(TypeOfTask::kOMP, path), "omp_enabled");
+  EXPECT_EQ(GetStringTaskType(TypeOfTask::kMPI, path), "mpi_enabled");
+  EXPECT_EQ(GetStringTaskType(TypeOfTask::kTBB, path), "tbb_enabled");
+  EXPECT_EQ(GetStringTaskType(TypeOfTask::kSEQ, path), "seq_enabled");
+}
+
+TEST(TaskTest, GetStringTaskTypeExceptionMessageContainsPath) {
+  const std::string missing_path = "non_existent_settings.json";
+  try {
+    GetStringTaskType(TypeOfTask::kSEQ, missing_path);
+    FAIL() << "Expected std::runtime_error";
+  } catch (const std::runtime_error &e) {
+    EXPECT_NE(std::string(e.what()).find(missing_path), std::string::npos);
+  }
 }
 
 TEST(TaskTest, GetStringTaskTypeReadsNestedTaskPath) {
@@ -257,6 +267,16 @@ TEST(TaskTest, GetStringTaskTypeThrowsIfKeyMissing) {
   EXPECT_ANY_THROW(GetStringTaskType(TypeOfTask::kSTL, path));
 }
 
+TEST(TaskTest, GetStringTaskTypeThrowsIfJsonValueIsNull) {
+  std::string path = "settings_null_value.json";
+  ScopedFile cleaner(path);
+  std::ofstream file(path);
+  file << R"({"tasks": {"seq": null}})";
+  file.close();
+
+  EXPECT_THROW(GetStringTaskType(TypeOfTask::kSEQ, path), NlohmannJsonTypeError);
+}
+
 TEST(TaskTest, TaskDestructorThrowsIfStageIncomplete) {
   {
     std::vector<int32_t> in(20, 1);
@@ -368,6 +388,16 @@ class DummyTask : public Task<int, int> {
   }
 };
 
+TEST(TaskTest, GetDynamicTypeReturnsCorrectEnum) {
+  DummyTask task;
+  task.SetTypeOfTask(TypeOfTask::kOMP);
+  task.Validation();
+  task.PreProcessing();
+  task.Run();
+  task.PostProcessing();
+  EXPECT_EQ(task.GetDynamicTypeOfTask(), TypeOfTask::kOMP);
+}
+
 TEST(TaskTest, ValidationThrowsIfCalledTwice) {
   auto task = std::make_shared<DummyTask>();
   task->Validation();
diff --git a/modules/util/include/perf_test_util.hpp b/modules/util/include/perf_test_util.hpp
index 13c4e3f81..e1ec442d4 100644
--- a/modules/util/include/perf_test_util.hpp
+++ b/modules/util/include/perf_test_util.hpp
@@ -1,32 +1,193 @@
 #pragma once
 
 #include <gtest/gtest.h>
-#include <omp.h>
-#include <tbb/tick_count.h>
 
+#include <benchmark/benchmark.h>
 #include <chrono>
 #include <cstddef>
+#include <cstdint>
+#include <exception>
 #include <functional>
-#include <sstream>
+#include <iostream>
+#include <mpi.h>
+#include <omp.h>
 #include <stdexcept>
 #include <string>
 #include <string_view>
+#include <tbb/tick_count.h>
 #include <tuple>
 #include <type_traits>
 #include <utility>
 
-#include "performance/include/performance.hpp"
 #include "task/include/task.hpp"
 #include "util/include/util.hpp"
 
 namespace ppc::util {
 
-double GetTimeMPI();
-int GetMPIRank();
+inline double DefaultTimer() {
+  return -1.0;
+}
+
+struct PerfAttr {
+  /// @brief Number of times the task is run for performance evaluation.
+  uint64_t num_running = 5;
+  /// @brief Timer function returning current time in seconds.
+  /// @cond
+  std::function<double()> current_timer = DefaultTimer;
+  /// @endcond
+};
+
+namespace detail {
+
+inline bool ContainsFilterToken(std::string_view value, std::string_view filter) {
+  if (filter.empty()) {
+    return true;
+  }
+  return value.contains(filter);
+}
+
+inline bool MatchesCategoryFilter(std::string_view task_category, std::string_view category_filter) {
+  if (category_filter.empty() || task_category.empty()) {
+    return true;
+  }
+  return category_filter.contains(task_category);
+}
+
+inline bool ShouldRunBenchmark(std::string_view test_name, std::string_view task_category) {
+  const auto impl_filter = env::get<std::string>("PPC_PERF_IMPL_FILTER");
+  const auto category_filter = env::get<std::string>("PPC_PERF_CATEGORY_FILTER");
+  const auto impl_filter_value = impl_filter.has_value() ? std::string_view(impl_filter.value()) : std::string_view{};
+  const auto category_filter_value =
+      category_filter.has_value() ? std::string_view(category_filter.value()) : std::string_view{};
+  return ContainsFilterToken(test_name, impl_filter_value) &&
+         MatchesCategoryFilter(task_category, category_filter_value);
+}
+
+inline std::string GetPerfTaskCategory(std::string_view settings_task_path) {
+  if (settings_task_path.starts_with("threads")) {
+    return "threads";
+  }
+  if (settings_task_path.starts_with("processes")) {
+    return "processes";
+  }
+  return {};
+}
+
+template <typename InType, typename OutType>
+void RunTaskForValidation(const ppc::task::TaskPtr<InType, OutType> &task) {
+  task->Validation();
+  task->PreProcessing();
+  task->Run();
+  task->PostProcessing();
+}
+
+inline std::function<double()> MakeTechnologyTimer(ppc::task::TypeOfTask task_type) {
+  if (task_type == ppc::task::TypeOfTask::kMPI || task_type == ppc::task::TypeOfTask::kALL) {
+    return [] { return GetTimeMPI(); };
+  }
+  if (task_type == ppc::task::TypeOfTask::kOMP) {
+    return [] { return omp_get_wtime(); };
+  }
+  if (task_type == ppc::task::TypeOfTask::kTBB) {
+    const auto t0 = tbb::tick_count::now();
+    return [t0] { return (tbb::tick_count::now() - t0).seconds(); };
+  }
+  if (task_type == ppc::task::TypeOfTask::kSEQ || task_type == ppc::task::TypeOfTask::kSTL) {
+    const auto t0 = std::chrono::high_resolution_clock::now();
+    return [t0] {
+      const auto now = std::chrono::high_resolution_clock::now();
+      const auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
+      return static_cast<double>(ns) * 1e-9;
+    };
+  }
+  throw std::runtime_error("The task type is not supported for performance testing.");
+}
+
+inline double MaxElapsedTimeAcrossMpiRanks(double elapsed, ppc::task::TypeOfTask task_type) {
+  if (task_type != ppc::task::TypeOfTask::kMPI && task_type != ppc::task::TypeOfTask::kALL) {
+    return elapsed;
+  }
+  double max_elapsed = elapsed;
+  MPI_Allreduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  return max_elapsed;
+}
+
+inline void SkipBenchmarkWithError(benchmark::State &state, const char *message) noexcept {
+  try {
+    state.SkipWithError(message);
+  } catch (const std::exception &e) {
+    std::cerr << "Failed to report benchmark error: " << e.what() << '\n';
+  } catch (...) {
+    std::cerr << "Failed to report unknown benchmark error" << '\n';
+  }
+}
+
+inline void CheckPerfTimeLimit(double elapsed) {
+  if (elapsed >= GetPerfMaxTime()) {
+    throw std::runtime_error("Task execution time exceeded the performance limit.");
+  }
+}
 
 template <typename InType, typename OutType>
-using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string,
-                                 ppc::performance::PerfResults::TypeOfRunning>;
+double RunTaskForBenchmark(const ppc::task::TaskPtr<InType, OutType> &task) {
+  const auto task_type = task->GetDynamicTypeOfTask();
+  const auto timer = MakeTechnologyTimer(task_type);
+  task->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf;
+
+  task->Validation();
+  task->PreProcessing();
+  SynchronizeMpiRanks();
+  const double begin = timer();
+  task->Run();
+  const double elapsed = timer() - begin;
+  task->PostProcessing();
+  const double max_elapsed = MaxElapsedTimeAcrossMpiRanks(elapsed, task_type);
+  CheckPerfTimeLimit(max_elapsed);
+  return max_elapsed;
+}
+
+template <typename TaskGetter, typename InType>
+void RunBenchmarkBody(const TaskGetter &task_getter, const InType &input_data, const std::string &test_env_token,
+                      benchmark::State &state) noexcept {
+  try {
+    const auto benchmark_env_scope = ppc::util::test::ScopedPerTestEnv(test_env_token);
+    for (auto _ : state) {
+      auto task = task_getter(input_data);
+      const double elapsed = RunTaskForBenchmark(task);
+      state.SetIterationTime(elapsed);
+      benchmark::DoNotOptimize(task->GetOutput());
+    }
+  } catch (const std::exception &e) {
+    PerformanceFailureFlag::Set();
+    SkipBenchmarkWithError(state, e.what());
+  } catch (...) {
+    PerformanceFailureFlag::Set();
+    SkipBenchmarkWithError(state, "Unknown exception in performance benchmark");
+  }
+}
+
+template <typename TaskGetter, typename InType>
+class BenchmarkTaskBody final {
+ public:
+  BenchmarkTaskBody(TaskGetter task_getter, InType input_data, std::string test_env_token)
+      : task_getter_(std::move(task_getter)),
+        input_data_(std::move(input_data)),
+        test_env_token_(std::move(test_env_token)) {}
+
+  void operator()(benchmark::State &state) const noexcept {
+    RunBenchmarkBody(task_getter_, input_data_, test_env_token_, state);
+  }
+
+ private:
+  TaskGetter task_getter_;
+  InType input_data_;
+  std::string test_env_token_;
+};
+
+}  // namespace detail
+
+template <typename InType, typename OutType>
+using PerfTestParam = std::tuple<std::function<ppc::task::TaskPtr<InType, OutType>(InType)>, std::string, std::string>;
 
 template <typename InType, typename OutType>
 /// @brief Base class for performance testing of parallel tasks.
@@ -36,9 +197,7 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
  public:
   /// @brief Generates a readable name for the performance test case.
   static std::string CustomPerfTestName(const ::testing::TestParamInfo<PerfTestParam<InType, OutType>> &info) {
-    return ppc::performance::GetStringParamName(
-               std::get<static_cast<std::size_t>(GTestParamIndex::kTestParams)>(info.param)) +
-           "_" + std::get<static_cast<std::size_t>(GTestParamIndex::kNameTest)>(info.param);
+    return std::get<static_cast<std::size_t>(GTestParamIndex::kNameTest)>(info.param);
   }
 
  protected:
@@ -46,69 +205,51 @@ class BaseRunPerfTests : public ::testing::TestWithParam<PerfTestParam<InType, O
   /// @brief Supplies input data for performance testing.
   virtual InType GetTestInputData() = 0;
 
-  virtual void SetPerfAttributes(ppc::performance::PerfAttr &perf_attrs) {
-    if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kMPI ||
-        task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kALL) {
-      const double t0 = GetTimeMPI();
-      perf_attrs.current_timer = [t0] { return GetTimeMPI() - t0; };
-    } else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kOMP) {
-      const double t0 = omp_get_wtime();
-      perf_attrs.current_timer = [t0] { return omp_get_wtime() - t0; };
-    } else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSEQ ||
-               task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kSTL) {
-      const auto t0 = std::chrono::high_resolution_clock::now();
-      perf_attrs.current_timer = [t0] {
-        auto now = std::chrono::high_resolution_clock::now();
-        auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now - t0).count();
-        return static_cast<double>(ns) * 1e-9;
-      };
-    } else if (task_->GetDynamicTypeOfTask() == ppc::task::TypeOfTask::kTBB) {
-      const auto t0 = tbb::tick_count::now();
-      perf_attrs.current_timer = [t0] { return (tbb::tick_count::now() - t0).seconds(); };
-    } else {
-      throw std::runtime_error("The task type is not supported for performance testing.");
-    }
+  virtual void SetPerfAttributes(PerfAttr &perf_attrs) {
+    perf_attrs.current_timer = detail::MakeTechnologyTimer(task_->GetDynamicTypeOfTask());
   }
 
   void ExecuteTest(const PerfTestParam<InType, OutType> &perf_test_param) {
     auto task_getter = std::get<static_cast<std::size_t>(GTestParamIndex::kTaskGetter)>(perf_test_param);
     auto test_name = std::get<static_cast<std::size_t>(GTestParamIndex::kNameTest)>(perf_test_param);
-    auto mode = std::get<static_cast<std::size_t>(GTestParamIndex::kTestParams)>(perf_test_param);
+    auto task_category = std::get<static_cast<std::size_t>(GTestParamIndex::kTestParams)>(perf_test_param);
 
     ASSERT_FALSE(test_name.find("unknown") != std::string::npos);
     if (test_name.find("disabled") != std::string::npos) {
-      // A single perf test body may execute several implementations; do not abort the enabled ones.
+      return;
+    }
+    if (!detail::ShouldRunBenchmark(test_name, task_category)) {
       return;
     }
 
-    const auto test_env_scope = ppc::util::test::MakePerTestEnvForCurrentGTest(test_name);
+    const auto test_env_token = ppc::util::test::MakeCurrentGTestToken(test_name);
+    const auto test_env_scope = ppc::util::test::ScopedPerTestEnv(test_env_token);
 
-    task_ = task_getter(GetTestInputData());
-    ppc::performance::Perf perf(task_);
-    ppc::performance::PerfAttr perf_attr;
+    const auto input_data = GetTestInputData();
+    task_ = task_getter(input_data);
+    task_->GetStateOfTesting() = ppc::task::StateOfTesting::kPerf;
     SynchronizeMpiRanks();
-    SetPerfAttributes(perf_attr);
-
-    if (mode == ppc::performance::PerfResults::TypeOfRunning::kPipeline) {
-      perf.PipelineRun(perf_attr);
-    } else if (mode == ppc::performance::PerfResults::TypeOfRunning::kTaskRun) {
-      perf.TaskRun(perf_attr);
-    } else {
-      std::stringstream err_msg;
-      err_msg << '\n' << "The type of performance check for the task was not selected.\n";
-      throw std::runtime_error(err_msg.str().c_str());
-    }
-
-    if (GetMPIRank() == 0) {
-      perf.PrintPerfStatistic(test_name);
-    }
+    detail::RunTaskForValidation(task_);
 
     OutType output_data = task_->GetOutput();
     ASSERT_TRUE(CheckTestOutputData(output_data));
+
+    PerfAttr perf_attr;
+    SetPerfAttributes(perf_attr);
+    const auto num_iterations = perf_attr.num_running == 0 ? 1 : perf_attr.num_running;
+
+    using BenchmarkInputType = std::decay_t<decltype(input_data)>;
+    auto benchmark_body =
+        detail::BenchmarkTaskBody<decltype(task_getter), BenchmarkInputType>(task_getter, input_data, test_env_token);
+
+    benchmark::RegisterBenchmark(test_name, std::move(benchmark_body))
+        ->UseManualTime()
+        ->Unit(benchmark::kSecond)
+        ->Iterations(static_cast<std::int64_t>(num_iterations));
   }
 
  private:
-  ppc::task::TaskPtr<InType, OutType> task_;
+  ppc::task::TaskPtr<InType, OutType> task_{};
 };
 
 template <typename TaskType, typename InputType>
@@ -117,9 +258,7 @@ auto MakePerfTaskTuples(const std::string &settings_path, std::string_view setti
                     ppc::task::GetStringTaskType(TaskType::GetStaticTypeOfTask(), settings_path, settings_task_path);
 
   return std::make_tuple(std::make_tuple(ppc::task::TaskGetter<TaskType, InputType>, name,
-                                         ppc::performance::PerfResults::TypeOfRunning::kPipeline),
-                         std::make_tuple(ppc::task::TaskGetter<TaskType, InputType>, name,
-                                         ppc::performance::PerfResults::TypeOfRunning::kTaskRun));
+                                         detail::GetPerfTaskCategory(settings_task_path)));
 }
 
 template <typename Tuple, std::size_t... I>
@@ -129,7 +268,7 @@ auto TupleToGTestValuesImpl(const Tuple &tup, std::index_sequence<I...> /*unused
 
 template <typename Tuple>
 auto TupleToGTestValues(Tuple &&tup) {
-  constexpr size_t kSize = std::tuple_size_v<std::decay_t<Tuple>>;
+  constexpr std::size_t kSize{std::tuple_size_v<std::decay_t<Tuple>>};
   return TupleToGTestValuesImpl(std::forward<Tuple>(tup), std::make_index_sequence<kSize>{});
 }
 
diff --git a/modules/util/include/util.hpp b/modules/util/include/util.hpp
index 70a64e847..912344551 100644
--- a/modules/util/include/util.hpp
+++ b/modules/util/include/util.hpp
@@ -64,6 +64,24 @@ class DestructorFailureFlag {
   inline static std::atomic<bool> failure_flag{false};
 };
 
+class PerformanceFailureFlag {
+ public:
+  static void Set() {
+    failure_flag.store(true);
+  }
+
+  static void Unset() {
+    failure_flag.store(false);
+  }
+
+  static bool Get() {
+    return failure_flag.load();
+  }
+
+ private:
+  inline static std::atomic<bool> failure_flag{false};
+};
+
 enum class GTestParamIndex : uint8_t {
   kTaskGetter,
   kNameTest,
@@ -75,6 +93,8 @@ int GetNumThreads();
 int GetNumProc();
 double GetTaskMaxTime();
 double GetPerfMaxTime();
+double GetTimeMPI();
+int GetMPIRank();
 void SynchronizeMpiRanks();
 
 template <typename T>
diff --git a/modules/util/src/func_test_util.cpp b/modules/util/src/func_test_util.cpp
index a5dfe0811..c901919f5 100644
--- a/modules/util/src/func_test_util.cpp
+++ b/modules/util/src/func_test_util.cpp
@@ -1,6 +1,6 @@
 #include <mpi.h>
 
-#include "util/include/perf_test_util.hpp"
+#include "util/include/util.hpp"
 
 double ppc::util::GetTimeMPI() {
   return MPI_Wtime();
diff --git a/scoreboard/main.py b/scoreboard/main.py
index 0b479a0da..997799876 100644
--- a/scoreboard/main.py
+++ b/scoreboard/main.py
@@ -1,7 +1,7 @@
 import argparse
-import csv
 import json
 import logging
+import re
 import shutil
 import subprocess
 import sys
@@ -20,6 +20,8 @@
 # Threads table order: seq first, then omp, tbb, stl, all
 task_types_threads = ["seq", "omp", "tbb", "stl", "all"]
 task_types_processes = ["mpi", "seq"]
+PERF_STAT_PRIORITY = {"median": 0, "mean": 1, "": 2}
+PERF_TIME_UNIT_TO_SECONDS = {"s": 1.0, "ms": 1e-3, "us": 1e-6, "ns": 1e-9}
 
 script_dir = Path(__file__).parent
 tasks_dir = script_dir.parent / "tasks"
@@ -208,143 +210,108 @@ def discover_tasks(tasks_dir, task_types):
 directories, task_category_map = discover_tasks(tasks_dir, task_types)
 
 
-def load_performance_data_threads(perf_stat_file_path: Path) -> dict:
-    """Load threads performance ratios (T_x/T_seq) from CSV.
-    Expected header: Task, SEQ, OMP, TBB, STL, ALL
-    """
-    perf_stats: dict[str, dict] = {}
-    if perf_stat_file_path.exists():
-        with open(perf_stat_file_path, "r", newline="") as csvfile:
-            reader = csv.DictReader(csvfile)
-            for row in reader:
-                task_name = row.get("Task")
-                if not task_name:
-                    continue
-                perf_stats[task_name] = {
-                    "seq": row.get("SEQ", "?"),
-                    "omp": row.get("OMP", "?"),
-                    "tbb": row.get("TBB", "?"),
-                    "stl": row.get("STL", "?"),
-                    "all": row.get("ALL", "?"),
-                }
-    else:
-        logger.warning("Threads perf stats CSV not found at %s", perf_stat_file_path)
-    return perf_stats
+def parse_benchmark_name(name: str) -> tuple[str, str, str] | None:
+    """Parse <task>_<impl>_enabled Google Benchmark names."""
+    base_name = name.split("/", maxsplit=1)[0]
+    match = re.match(
+        r"(.+?)_(all|mpi|omp|seq|stl|tbb)_enabled(?:_(mean|median))?$", base_name
+    )
+    if match is None:
+        return None
+    return match.group(1), match.group(2), match.group(3) or ""
 
 
-def load_performance_data(perf_stat_file_path: Path) -> dict:
-    """Compatibility helper for legacy tests: load perf data with optional MPI column.
+def _benchmark_time_to_seconds(value: float, unit: str) -> float:
+    return float(value) * PERF_TIME_UNIT_TO_SECONDS.get(unit, 1e-9)
 
-    Always returns a mapping: task -> {seq, omp, stl, tbb, all, mpi}
-    Missing columns are filled with ``"N/A"``; empty cells stay empty strings.
-    """
-    perf_stats: dict[str, dict] = {}
-    if not perf_stat_file_path.exists():
-        return perf_stats
-
-    with open(perf_stat_file_path, "r", newline="") as csvfile:
-        reader = csv.DictReader(csvfile)
-        # Normalize column names we care about
-        for row in reader:
-            task_name = row.get("Task")
-            if not task_name:
-                continue
 
-            def _get(col: str) -> str:
-                if col in row:
-                    return row.get(col, "N/A")
-                return "N/A"
-
-            perf_stats[task_name] = {
-                "seq": _get("SEQ"),
-                "omp": _get("OMP"),
-                "stl": _get("STL"),
-                "tbb": _get("TBB"),
-                "all": _get("ALL"),
-                "mpi": _get("MPI"),
-            }
-    return perf_stats
+def _perf_record_priority(record: dict) -> int:
+    return PERF_STAT_PRIORITY.get(str(record.get("statistic", "")), 3)
 
 
-def load_performance_data_processes(perf_stat_file_path: Path) -> dict:
-    """Load processes performance data (raw times, seconds) and merge *_seq/_mpi rows.
+def load_benchmark_performance_data(benchmarks_dir: Path) -> dict[str, dict]:
+    """Load Google Benchmark JSON files written by ppc_perf_tests.
 
-    Expected header: Task, SEQ, MPI with absolute times. If the CSV contains
-    split rows like <task>_seq and <task>_mpi, they are combined into one entry.
+    Returns raw benchmark times in seconds:
+      benchmark task name -> implementation -> seconds
     """
-    perf_stats: dict[str, dict] = {}
-    if not perf_stat_file_path.exists():
-        logger.warning("Processes perf stats CSV not found at %s", perf_stat_file_path)
-        return perf_stats
-
-    with open(perf_stat_file_path, "r", newline="") as csvfile:
-        reader = csv.DictReader(csvfile)
-        for row in reader:
-            task_name = row.get("Task")
-            if not task_name:
+    if not benchmarks_dir.exists():
+        logger.warning("Benchmark JSON directory not found at %s", benchmarks_dir)
+        return {}
+
+    selected: dict[tuple[str, str], dict] = {}
+    for json_path in sorted(benchmarks_dir.glob("*.json")):
+        try:
+            with open(json_path, "r", encoding="utf-8") as file:
+                payload = json.load(file)
+        except (OSError, json.JSONDecodeError) as e:
+            logger.warning("Failed to parse benchmark JSON %s: %s", json_path, e)
+            continue
+
+        for entry in payload.get("benchmarks", []):
+            parsed_name = parse_benchmark_name(str(entry.get("name", "")))
+            if parsed_name is None:
                 continue
-            seq_val = row.get("SEQ", "?")
-            mpi_val = row.get("MPI", "?")
-
-            base_name = task_name
-            mode = None
-            for suff, lbl in (("_seq", "seq"), ("_mpi", "mpi")):
-                if task_name.endswith(suff):
-                    base_name = task_name[: -len(suff)]
-                    mode = lbl
-                    break
-
-            entry = perf_stats.setdefault(base_name, {"seq": "?", "mpi": "?"})
-            if mode == "seq":
-                if seq_val and seq_val != "?":
-                    entry["seq"] = seq_val
-            elif mode == "mpi":
-                if mpi_val and mpi_val != "?":
-                    entry["mpi"] = mpi_val
-            else:
-                if seq_val and seq_val != "?":
-                    entry["seq"] = seq_val
-                if mpi_val and mpi_val != "?":
-                    entry["mpi"] = mpi_val
+            task_name, implementation, statistic = parsed_name
+            try:
+                seconds = _benchmark_time_to_seconds(
+                    float(entry["real_time"]), str(entry.get("time_unit", "ns"))
+                )
+            except (KeyError, TypeError, ValueError):
+                continue
+            record = {
+                "task": task_name,
+                "implementation": implementation,
+                "seconds": seconds,
+                "statistic": statistic or str(entry.get("aggregate_name", "")),
+            }
+            key = (task_name, implementation)
+            previous = selected.get(key)
+            if previous is None or _perf_record_priority(
+                record
+            ) < _perf_record_priority(previous):
+                selected[key] = record
 
+    perf_stats: dict[str, dict] = {}
+    for record in selected.values():
+        perf_stats.setdefault(record["task"], {})[record["implementation"]] = (
+            f"{record['seconds']:.10g}"
+        )
     return perf_stats
 
 
 def calculate_performance_metrics(perf_val, eff_num_proc, task_type, seq_val=None):
-    """Calculate acceleration and efficiency.
-
-    For processes table we pass raw times; for threads legacy ratios we keep old behavior.
-    """
+    """Calculate acceleration and efficiency from raw times in seconds."""
     acceleration = "?"
     efficiency = "?"
     try:
         if seq_val is None:
             perf_float = float(perf_val)
-            if perf_float > 0 and not (
-                perf_float == float("inf") or perf_float != perf_float
-            ):
-                speedup = 1.0 / perf_float
-                if task_type == "seq":
-                    acceleration = "1.00"
-                    efficiency = "N/A"
-                else:
-                    acceleration = f"{speedup:.2f}"
-                    efficiency = f"{speedup / eff_num_proc * 100:.2f}%"
+            if task_type == "seq" and perf_float > 0:
+                return "1.00", "N/A"
+            return acceleration, efficiency
+
+        seq_t = float(seq_val)
+        par_t = float(perf_val)
+        if (
+            seq_t <= 0
+            or par_t <= 0
+            or seq_t == float("inf")
+            or par_t == float("inf")
+            or seq_t != seq_t
+            or par_t != par_t
+        ):
+            return acceleration, efficiency
+        if min(seq_t, par_t) < 0.001:
+            tiny_mark = "t &lt;<br/>1e-3"
+            return tiny_mark, tiny_mark
+        speedup = seq_t / par_t
+        if task_type == "seq":
+            acceleration = "1.00"
+            efficiency = "N/A"
         else:
-            seq_t = float(seq_val)
-            par_t = float(perf_val)
-            # If times are too small, metrics are unstable -> mark N/A
-            if min(seq_t, par_t) < 0.001:
-                tiny_mark = "t &lt;<br/>1e-3"
-                return tiny_mark, tiny_mark
-            if seq_t > 0 and par_t > 0:
-                speedup = seq_t / par_t
-                if task_type == "seq":
-                    acceleration = "1.00"
-                    efficiency = "N/A"
-                else:
-                    acceleration = f"{speedup:.2f}"
-                    efficiency = f"{speedup / eff_num_proc * 100:.2f}%"
+            acceleration = f"{speedup:.2f}"
+            efficiency = f"{speedup / eff_num_proc * 100:.2f}%"
     except (ValueError, TypeError):
         pass
     return acceleration, efficiency
@@ -646,9 +613,11 @@ def _load_student_fields(dir_name: str):
 
             perf_val = perf_stats.get(dir, {}).get(task_type, "?")
 
-            # Calculate acceleration and efficiency if performance data is available
+            seq_val = None
+            if isinstance(perf_stats.get(dir, {}), dict):
+                seq_val = perf_stats.get(dir, {}).get("seq")
             acceleration, efficiency = calculate_performance_metrics(
-                perf_val, eff_num_proc, task_type
+                perf_val, eff_num_proc, task_type, seq_val=seq_val
             )
 
             # Calculate deadline penalty points
@@ -776,80 +745,12 @@ def _process_deadline_labels(task_numbers: list[int]) -> list[str]:
             labels.append(label)
         return labels if any(labels) else []
 
-    # Locate perf CSVs from CI or local runs (threads and processes)
-    candidates_threads = [
-        script_dir.parent
-        / "build"
-        / "perf_stat_dir"
-        / "threads_task_run_perf_table.csv",
-        script_dir.parent / "perf_stat_dir" / "threads_task_run_perf_table.csv",
-        # Fallback to old single-file name
-        script_dir.parent / "build" / "perf_stat_dir" / "task_run_perf_table.csv",
-        script_dir.parent / "perf_stat_dir" / "task_run_perf_table.csv",
+    benchmark_dirs = [
+        script_dir.parent / "build" / "perf_stat_dir" / "benchmarks",
+        script_dir.parent / "perf_stat_dir" / "benchmarks",
     ]
-    threads_csv = next(
-        (p for p in candidates_threads if p.exists()), candidates_threads[0]
-    )
-
-    candidates_processes = [
-        script_dir.parent
-        / "build"
-        / "perf_stat_dir"
-        / "processes_task_run_perf_table.csv",
-        script_dir.parent / "perf_stat_dir" / "processes_task_run_perf_table.csv",
-    ]
-    processes_csv = next(
-        (p for p in candidates_processes if p.exists()), candidates_processes[0]
-    )
-
-    # Read and merge performance statistics CSVs (keys = CSV Task column)
-    perf_stats_threads = load_performance_data_threads(threads_csv)
-    perf_stats_processes = load_performance_data_processes(processes_csv)
-
-    def _aggregate_process_csv(
-        perf_stat_file_path: Path, base: dict[str, dict]
-    ) -> dict:
-        """Parse CSV again to ensure merged seq/mpi entries."""
-        import csv
-
-        perf_stats_local = dict(base)
-        if not perf_stat_file_path.exists():
-            return perf_stats_local
-        with open(perf_stat_file_path, "r", newline="") as csvfile:
-            reader = csv.DictReader(csvfile)
-            for row in reader:
-                task_name = row.get("Task")
-                if not task_name:
-                    continue
-                seq_val = row.get("SEQ", "?")
-                mpi_val = row.get("MPI", "?")
-                base_name = task_name
-                mode = None
-                for suff, lbl in (("_seq", "seq"), ("_mpi", "mpi")):
-                    if task_name.endswith(suff):
-                        base_name = task_name[: -len(suff)]
-                        mode = lbl
-                        break
-                entry = perf_stats_local.setdefault(base_name, {"seq": "?", "mpi": "?"})
-                if mode == "seq":
-                    if seq_val and seq_val != "?":
-                        entry["seq"] = seq_val
-                elif mode == "mpi":
-                    if mpi_val and mpi_val != "?":
-                        entry["mpi"] = mpi_val
-                else:
-                    if seq_val and seq_val != "?":
-                        entry["seq"] = seq_val
-                    if mpi_val and mpi_val != "?":
-                        entry["mpi"] = mpi_val
-        return perf_stats_local
-
-    perf_stats_processes = _aggregate_process_csv(processes_csv, perf_stats_processes)
-
-    perf_stats_raw: dict[str, dict] = {}
-    perf_stats_raw.update(perf_stats_threads)
-    for k, v in perf_stats_processes.items():
-        perf_stats_raw[k] = {**perf_stats_raw.get(k, {}), **v}
+    benchmarks_dir = next((p for p in benchmark_dirs if p.exists()), benchmark_dirs[0])
+    perf_stats_raw = load_benchmark_performance_data(benchmarks_dir)
 
     # Partition tasks by category derived from the filesystem layout.
     threads_task_dirs = [
@@ -867,11 +768,8 @@ def _aggregate_process_csv(
             elif "processes" in name:
                 processes_task_dirs.append(name)
 
-    # Resolve performance stats keys (from CSV Task names) to actual task directories.
-    # Old logic grouped by "family", which made all tasks share the same numbers.
-    # New logic: map each CSV key to the best-matching directory name by substring.
+    # Resolve benchmark task names to actual task directories.
     perf_stats: dict[str, dict] = {}
-    import re as _re
 
     dir_names_sorted = sorted(directories.keys(), key=len, reverse=True)
 
@@ -886,35 +784,18 @@ def _merge_perf_maps(existing: dict, updates: dict) -> dict:
             merged[k] = v
         return merged
 
-    # Precompute mapping: process task number -> list of directories. Meta-layout
-    # tasks derive this from processes/t1, processes/t2, etc.
-    process_tasknum_map: dict[int, list[str]] = {}
-    for d, num in process_task_indices.items():
-        process_tasknum_map.setdefault(num, []).append(d)
-
-    def _match_dir(csv_key: str) -> str | None:
-        # Strip common suffixes like "_mpi_enabled" etc. to improve matching
-        base = _re.sub(r"_(mpi|omp|tbb|stl|all|seq)_enabled.*", "", csv_key)
+    def _match_dir(benchmark_key: str) -> str | None:
+        base = re.sub(r"_(mpi|omp|tbb|stl|all|seq)_enabled.*", "", benchmark_key)
         for d in dir_names_sorted:
-            if base.startswith(d) or d in base or csv_key.startswith(d):
+            if base.startswith(d) or d in base or benchmark_key.startswith(d):
                 return d
         return None
 
     for key, vals in perf_stats_raw.items():
         targets: set[str] = set()
-        # 1) Direct / substring match
         target = _match_dir(key)
         if target:
             targets.add(target)
-        # 2) If a legacy key encodes processes_N, spread to dirs with that task number
-        m_num = _re.search(r"processes_(\d+)", key)
-        if m_num:
-            try:
-                num = int(m_num.group(1))
-                targets.update(process_tasknum_map.get(num, []))
-            except Exception:
-                pass
-        # Apply merged values to all targets
         for t in targets:
             perf_stats[t] = _merge_perf_maps(perf_stats.get(t, {}), vals)
 
diff --git a/scoreboard/tests/conftest.py b/scoreboard/tests/conftest.py
index 08be73a62..960bbe8cf 100644
--- a/scoreboard/tests/conftest.py
+++ b/scoreboard/tests/conftest.py
@@ -2,7 +2,6 @@
 Pytest configuration and shared fixtures for scoreboard tests.
 """
 
-import csv
 import shutil
 import tempfile
 from pathlib import Path
@@ -84,48 +83,6 @@ def sample_task_structure(temp_dir):
     return tasks_dir
 
 
-@pytest.fixture
-def sample_performance_csv(temp_dir):
-    """Create a sample performance CSV file."""
-    csv_file = temp_dir / "performance.csv"
-
-    data = [
-        {
-            "Task": "example_task",
-            "SEQ": "1.0",
-            "OMP": "0.5",
-            "STL": "0.3",
-            "TBB": "0.4",
-            "ALL": "0.2",
-        },
-        {
-            "Task": "disabled_task",
-            "SEQ": "2.0",
-            "OMP": "1.0",
-            "STL": "0.8",
-            "TBB": "0.9",
-            "ALL": "0.7",
-        },
-        {
-            "Task": "partial_task",
-            "SEQ": "1.5",
-            "OMP": "N/A",
-            "STL": "N/A",
-            "TBB": "N/A",
-            "ALL": "N/A",
-        },
-    ]
-
-    with open(csv_file, "w", newline="") as f:
-        writer = csv.DictWriter(
-            f, fieldnames=["Task", "SEQ", "OMP", "STL", "TBB", "ALL"]
-        )
-        writer.writeheader()
-        writer.writerows(data)
-
-    return csv_file
-
-
 @pytest.fixture
 def sample_config_files(temp_dir, sample_config, sample_plagiarism_config):
     """Create sample configuration files."""
diff --git a/scoreboard/tests/test_calculate_performance_metrics.py b/scoreboard/tests/test_calculate_performance_metrics.py
index 4ed144b4e..e14ff64f2 100644
--- a/scoreboard/tests/test_calculate_performance_metrics.py
+++ b/scoreboard/tests/test_calculate_performance_metrics.py
@@ -3,93 +3,145 @@
 
 class TestCalculatePerformanceMetrics:
     def test_calculate_performance_metrics_valid_values(self):
-        acceleration, efficiency = calculate_performance_metrics("0.5", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "0.5", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "2.00"
         assert efficiency == "50.00%"
 
-        acceleration, efficiency = calculate_performance_metrics("0.25", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "0.25", 4, "tbb", seq_val="1.0"
+        )
         assert acceleration == "4.00"
         assert efficiency == "100.00%"
 
-        acceleration, efficiency = calculate_performance_metrics("0.5", 2)
+        acceleration, efficiency = calculate_performance_metrics(
+            "0.5", 2, "stl", seq_val="1.0"
+        )
         assert acceleration == "2.00"
         assert efficiency == "100.00%"
 
     def test_calculate_performance_metrics_edge_cases(self):
-        acceleration, efficiency = calculate_performance_metrics("0.1", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "0.1", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "10.00"
         assert efficiency == "250.00%"
 
-        acceleration, efficiency = calculate_performance_metrics("1.0", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "1.0", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "1.00"
         assert efficiency == "25.00%"
 
-        acceleration, efficiency = calculate_performance_metrics("2.0", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "2.0", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "0.50"
         assert efficiency == "12.50%"
 
     def test_calculate_performance_metrics_invalid_values(self):
-        acceleration, efficiency = calculate_performance_metrics("0.0", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "0.0", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
-        acceleration, efficiency = calculate_performance_metrics("-1.0", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "-1.0", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
-        acceleration, efficiency = calculate_performance_metrics("invalid", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "invalid", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
-        acceleration, efficiency = calculate_performance_metrics("", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
-        acceleration, efficiency = calculate_performance_metrics("inf", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "inf", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
-        acceleration, efficiency = calculate_performance_metrics("nan", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "nan", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
     def test_calculate_performance_metrics_special_strings(self):
-        acceleration, efficiency = calculate_performance_metrics("?", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "?", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
-        acceleration, efficiency = calculate_performance_metrics("N/A", 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            "N/A", 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
-        acceleration, efficiency = calculate_performance_metrics(None, 4)
+        acceleration, efficiency = calculate_performance_metrics(
+            None, 4, "omp", seq_val="1.0"
+        )
         assert acceleration == "?"
         assert efficiency == "?"
 
     def test_calculate_performance_metrics_different_proc_counts(self):
         perf_val = "0.25"
 
-        acceleration, efficiency = calculate_performance_metrics(perf_val, 1)
+        acceleration, efficiency = calculate_performance_metrics(
+            perf_val, 1, "omp", seq_val="1.0"
+        )
         assert acceleration == "4.00"
         assert efficiency == "400.00%"
 
-        acceleration, efficiency = calculate_performance_metrics(perf_val, 2)
+        acceleration, efficiency = calculate_performance_metrics(
+            perf_val, 2, "omp", seq_val="1.0"
+        )
         assert acceleration == "4.00"
         assert efficiency == "200.00%"
 
-        acceleration, efficiency = calculate_performance_metrics(perf_val, 8)
+        acceleration, efficiency = calculate_performance_metrics(
+            perf_val, 8, "omp", seq_val="1.0"
+        )
         assert acceleration == "4.00"
         assert efficiency == "50.00%"
 
-        acceleration, efficiency = calculate_performance_metrics(perf_val, 16)
+        acceleration, efficiency = calculate_performance_metrics(
+            perf_val, 16, "omp", seq_val="1.0"
+        )
         assert acceleration == "4.00"
         assert efficiency == "25.00%"
 
     def test_calculate_performance_metrics_precision(self):
-        acceleration, efficiency = calculate_performance_metrics("0.3", 3)
+        acceleration, efficiency = calculate_performance_metrics(
+            "0.3", 3, "omp", seq_val="1.0"
+        )
         assert acceleration == "3.33"
         assert efficiency == "111.11%"
 
-        acceleration, efficiency = calculate_performance_metrics("0.7", 6)
+        acceleration, efficiency = calculate_performance_metrics(
+            "0.7", 6, "omp", seq_val="1.0"
+        )
         assert acceleration == "1.43"
         assert efficiency == "23.81%"
+
+    def test_calculate_performance_metrics_requires_seq_baseline_for_parallel(self):
+        acceleration, efficiency = calculate_performance_metrics("0.5", 4, "omp")
+        assert acceleration == "?"
+        assert efficiency == "?"
+
+    def test_calculate_performance_metrics_seq_without_baseline(self):
+        acceleration, efficiency = calculate_performance_metrics("1.0", 4, "seq")
+        assert acceleration == "1.00"
+        assert efficiency == "N/A"
diff --git a/scoreboard/tests/test_load_benchmark_performance_data.py b/scoreboard/tests/test_load_benchmark_performance_data.py
new file mode 100644
index 000000000..4bc66ca38
--- /dev/null
+++ b/scoreboard/tests/test_load_benchmark_performance_data.py
@@ -0,0 +1,120 @@
+"""
+Tests for loading Google Benchmark performance JSON files.
+"""
+
+import json
+
+from main import load_benchmark_performance_data, parse_benchmark_name
+
+
+class TestLoadBenchmarkPerformanceData:
+    """Test cases for Google Benchmark performance data loading."""
+
+    def test_parse_threads_benchmark_name(self):
+        assert parse_benchmark_name(
+            "example_threads_omp_enabled/iterations:5/manual_time"
+        ) == (
+            "example_threads",
+            "omp",
+            "",
+        )
+
+    def test_parse_processes_benchmark_name(self):
+        assert parse_benchmark_name("example_processes_t2_mpi_enabled") == (
+            "example_processes_t2",
+            "mpi",
+            "",
+        )
+
+    def test_load_benchmark_json_in_seconds(self, temp_dir):
+        benchmarks_dir = temp_dir / "benchmarks"
+        benchmarks_dir.mkdir()
+        (benchmarks_dir / "threads.json").write_text(
+            json.dumps(
+                {
+                    "benchmarks": [
+                        {
+                            "name": "example_threads_seq_enabled",
+                            "real_time": 1.5,
+                            "time_unit": "s",
+                        },
+                        {
+                            "name": "example_threads_omp_enabled",
+                            "real_time": 0.75,
+                            "time_unit": "s",
+                        },
+                        {
+                            "name": "example_threads_omp_disabled",
+                            "real_time": 0.8,
+                            "time_unit": "s",
+                        },
+                    ]
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = load_benchmark_performance_data(benchmarks_dir)
+
+        assert result["example_threads"]["seq"] == "1.5"
+        assert result["example_threads"]["omp"] == "0.75"
+
+    def test_load_benchmark_json_converts_units_to_seconds(self, temp_dir):
+        benchmarks_dir = temp_dir / "benchmarks"
+        benchmarks_dir.mkdir()
+        (benchmarks_dir / "processes.json").write_text(
+            json.dumps(
+                {
+                    "benchmarks": [
+                        {
+                            "name": "example_processes_t1_seq_enabled",
+                            "real_time": 250,
+                            "time_unit": "ms",
+                        },
+                        {
+                            "name": "example_processes_t1_mpi_enabled",
+                            "real_time": 100000,
+                            "time_unit": "us",
+                        },
+                    ]
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = load_benchmark_performance_data(benchmarks_dir)
+
+        assert result["example_processes_t1"]["seq"] == "0.25"
+        assert result["example_processes_t1"]["mpi"] == "0.1"
+
+    def test_load_benchmark_json_prefers_median_statistic(self, temp_dir):
+        benchmarks_dir = temp_dir / "benchmarks"
+        benchmarks_dir.mkdir()
+        (benchmarks_dir / "threads.json").write_text(
+            json.dumps(
+                {
+                    "benchmarks": [
+                        {
+                            "name": "example_threads_tbb_enabled",
+                            "real_time": 0.4,
+                            "time_unit": "s",
+                        },
+                        {
+                            "name": "example_threads_tbb_enabled_mean",
+                            "real_time": 0.3,
+                            "time_unit": "s",
+                        },
+                        {
+                            "name": "example_threads_tbb_enabled_median",
+                            "real_time": 0.2,
+                            "time_unit": "s",
+                        },
+                    ]
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = load_benchmark_performance_data(benchmarks_dir)
+
+        assert result["example_threads"]["tbb"] == "0.2"
diff --git a/scoreboard/tests/test_load_performance_data.py b/scoreboard/tests/test_load_performance_data.py
deleted file mode 100644
index b32ba4abd..000000000
--- a/scoreboard/tests/test_load_performance_data.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""
-Tests for the load_performance_data function.
-"""
-
-import csv
-
-from main import load_performance_data
-
-
-class TestLoadPerformanceData:
-    """Test cases for load_performance_data function."""
-
-    def test_load_performance_data_valid_csv(self, sample_performance_csv):
-        """Test loading performance data from a valid CSV file."""
-        result = load_performance_data(sample_performance_csv)
-
-        # Check structure
-        assert isinstance(result, dict)
-        assert len(result) == 3
-
-        # Check example_task data
-        assert "example_task" in result
-        example_data = result["example_task"]
-        assert example_data["seq"] == "1.0"
-        assert example_data["omp"] == "0.5"
-        assert example_data["stl"] == "0.3"
-        assert example_data["tbb"] == "0.4"
-        assert example_data["all"] == "0.2"
-        assert example_data["mpi"] == "N/A"
-
-        # Check disabled_task data
-        assert "disabled_task" in result
-        disabled_data = result["disabled_task"]
-        assert disabled_data["seq"] == "2.0"
-        assert disabled_data["omp"] == "1.0"
-
-        # Check partial_task data
-        assert "partial_task" in result
-        partial_data = result["partial_task"]
-        assert partial_data["seq"] == "1.5"
-        assert partial_data["omp"] == "N/A"
-        assert partial_data["mpi"] == "N/A"
-
-    def test_load_performance_data_nonexistent_file(self, temp_dir):
-        """Test loading performance data when file doesn't exist."""
-        nonexistent_file = temp_dir / "nonexistent.csv"
-
-        result = load_performance_data(nonexistent_file)
-
-        assert result == {}
-
-    def test_load_performance_data_empty_csv(self, temp_dir):
-        """Test loading performance data from an empty CSV file."""
-        empty_csv = temp_dir / "empty.csv"
-        empty_csv.touch()
-
-        result = load_performance_data(empty_csv)
-
-        assert result == {}
-
-    def test_load_performance_data_header_only_csv(self, temp_dir):
-        """Test loading performance data from CSV with only headers."""
-        header_only_csv = temp_dir / "header_only.csv"
-
-        with open(header_only_csv, "w", newline="") as f:
-            writer = csv.DictWriter(
-                f, fieldnames=["Task", "SEQ", "OMP", "STL", "TBB", "ALL"]
-            )
-            writer.writeheader()
-
-        result = load_performance_data(header_only_csv)
-
-        assert result == {}
-
-    def test_load_performance_data_malformed_csv(self, temp_dir):
-        """Test loading performance data from malformed CSV."""
-        malformed_csv = temp_dir / "malformed.csv"
-
-        with open(malformed_csv, "w") as f:
-            f.write("Task,SEQ,OMP\n")
-            f.write("test_task,1.0\n")  # Missing OMP value
-            f.write("another_task,invalid,0.5\n")  # Invalid SEQ value
-
-        # Should not crash, but may have incomplete data
-        result = load_performance_data(malformed_csv)
-
-        # Function should handle this gracefully
-        assert isinstance(result, dict)
-
-    def test_load_performance_data_missing_columns(self, temp_dir):
-        """Test loading performance data when some columns are missing."""
-        partial_csv = temp_dir / "partial.csv"
-
-        data = [
-            {"Task": "test_task", "SEQ": "1.0", "OMP": "0.5"}
-            # Missing STL, TBB, ALL columns
-        ]
-
-        with open(partial_csv, "w", newline="") as f:
-            writer = csv.DictWriter(f, fieldnames=["Task", "SEQ", "OMP"])
-            writer.writeheader()
-            writer.writerows(data)
-
-        # Should handle missing columns gracefully
-        result = load_performance_data(partial_csv)
-
-        assert "test_task" in result
-        # Missing columns should be handled (likely as empty strings or errors)
-        task_data = result["test_task"]
-        assert task_data["seq"] == "1.0"
-        assert task_data["omp"] == "0.5"
-        assert task_data["mpi"] == "N/A"  # This should always be set
-
-    def test_load_performance_data_special_values(self, temp_dir):
-        """Test loading performance data with special values."""
-        special_csv = temp_dir / "special.csv"
-
-        data = [
-            {
-                "Task": "special_task",
-                "SEQ": "0.0",
-                "OMP": "inf",
-                "STL": "-1",
-                "TBB": "",
-                "ALL": "N/A",
-            }
-        ]
-
-        with open(special_csv, "w", newline="") as f:
-            writer = csv.DictWriter(
-                f, fieldnames=["Task", "SEQ", "OMP", "STL", "TBB", "ALL"]
-            )
-            writer.writeheader()
-            writer.writerows(data)
-
-        result = load_performance_data(special_csv)
-
-        assert "special_task" in result
-        task_data = result["special_task"]
-        assert task_data["seq"] == "0.0"
-        assert task_data["omp"] == "inf"
-        assert task_data["stl"] == "-1"
-        assert task_data["tbb"] == ""
-        assert task_data["all"] == "N/A"
-        assert task_data["mpi"] == "N/A"
diff --git a/scripts/create_perf_table.py b/scripts/create_perf_table.py
deleted file mode 100644
index 14b6d3105..000000000
--- a/scripts/create_perf_table.py
+++ /dev/null
@@ -1,289 +0,0 @@
-import argparse
-import csv
-import os
-import re
-
-import xlsxwriter
-
-# -------------------------------
-# Helpers and configuration
-# -------------------------------
-
-# Known task types (used to pre-initialize tables)
-list_of_type_of_tasks = ["all", "mpi", "omp", "seq", "stl", "tbb"]
-
-# Compile patterns once
-OLD_PATTERN = re.compile(r"tasks[\/|\\](\w*)[\/|\\](\w*):(\w*):(-*\d*\.\d*)")
-NEW_PATTERN = re.compile(
-    r"(\w+_test_task_(threads|processes))_(\w+)_enabled:(\w*):(-*\d*\.\d*)"
-)
-# Example formats:
-#   <task>_threads_omp_enabled:task_run:0.4749
-#   <task>_processes_t2_mpi_enabled:pipeline:0.0507
-# Accept optional suffix after `_enabled` (e.g., `_enabled_size1000000`) before the colon
-SIMPLE_PATTERN = re.compile(
-    r"(.+?)_(omp|seq|tbb|stl|all|mpi)_enabled[^:]*:(task_run|pipeline):(-*\d*\.\d*)"
-)
-
-
-def _ensure_task_tables(result_tables: dict, perf_type: str, task_name: str) -> None:
-    if perf_type not in result_tables:
-        result_tables[perf_type] = {}
-    if task_name not in result_tables[perf_type]:
-        result_tables[perf_type][task_name] = {t: -1.0 for t in list_of_type_of_tasks}
-
-
-def _infer_category(task_name: str) -> str:
-    return "threads" if "threads" in task_name else "processes"
-
-
-def _columns_for_category(category: str) -> list[str]:
-    return (
-        ["seq", "omp", "tbb", "stl", "all"] if category == "threads" else ["seq", "mpi"]
-    )
-
-
-def _write_excel_sheet(
-    workbook,
-    worksheet,
-    cpu_num: int,
-    tasks_list: list[str],
-    cols: list[str],
-    table: dict,
-):
-    worksheet.set_column("A:Z", 23)
-    right_bold_border = workbook.add_format({"bold": True, "right": 2, "bottom": 2})
-    bottom_bold_border = workbook.add_format({"bold": True, "bottom": 2})
-    right_border = workbook.add_format({"right": 2})
-
-    worksheet.write(0, 0, "cpu_num = " + str(cpu_num), right_bold_border)
-
-    # Header (T_x, S, Eff) per column
-    col = 1
-    for ttype in cols:
-        worksheet.write(0, col, f"T_{ttype}({cpu_num})", bottom_bold_border)
-        col += 1
-        worksheet.write(
-            0,
-            col,
-            f"S({cpu_num}) = T_seq({cpu_num}) / T_{ttype}({cpu_num})",
-            bottom_bold_border,
-        )
-        col += 1
-        worksheet.write(
-            0, col, f"Eff({cpu_num}) = S({cpu_num}) / {cpu_num}", right_bold_border
-        )
-        col += 1
-
-    # Task rows
-    row = 1
-    for task_name in tasks_list:
-        worksheet.write(
-            row, 0, task_name, workbook.add_format({"bold": True, "right": 2})
-        )
-        row += 1
-
-    # Values
-    row = 1
-    for task_name in tasks_list:
-        col = 1
-        for ttype in cols:
-            if task_name not in table:
-                # no data for task at all
-                worksheet.write(row, col, "—")
-                col += 1
-                worksheet.write(row, col, "—")
-                col += 1
-                worksheet.write(row, col, "—", right_border)
-                col += 1
-                continue
-            par_time = table[task_name].get(ttype, -1.0)
-            seq_time = table[task_name].get("seq", -1.0)
-            if par_time in (0.0, -1.0) or seq_time in (0.0, -1.0):
-                speed_up = "—"
-                efficiency = "—"
-            else:
-                speed_up = seq_time / par_time
-                efficiency = speed_up / cpu_num
-            worksheet.write(row, col, par_time if par_time != -1.0 else "?")
-            col += 1
-            worksheet.write(row, col, speed_up)
-            col += 1
-            worksheet.write(row, col, efficiency, right_border)
-            col += 1
-        row += 1
-
-
-def _write_csv(path: str, header: list[str], tasks_list: list[str], table: dict):
-    """Write raw times (seconds) to CSV so downstream can derive speedups correctly."""
-    with open(path, "w", newline="") as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow(header)
-        for task_name in tasks_list:
-            task_row = table.get(task_name, {})
-            seq_time = task_row.get("seq", -1.0)
-            row = [task_name, (seq_time if seq_time not in (0.0, -1.0) else "?")]
-            for col_name in header[2:]:
-                val = task_row.get(col_name.lower(), -1.0)
-                row.append(val if val != -1.0 else "?")
-            writer.writerow(row)
-
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "-i", "--input", help="Input file path (logs of perf tests, .txt)", required=True
-)
-parser.add_argument(
-    "-o", "--output", help="Output file path (path to .xlsx table)", required=True
-)
-args = parser.parse_args()
-logs_path = os.path.abspath(args.input)
-xlsx_path = os.path.abspath(args.output)
-
-# For each perf_type (pipeline/task_run) store times per task
-result_tables = {"pipeline": {}, "task_run": {}}
-# Map task name -> category (threads|processes)
-task_categories = {}
-# Track tasks per category to split output
-tasks_by_category = {"threads": set(), "processes": set()}
-
-with open(logs_path, "r") as logs_file:
-    logs_lines = logs_file.readlines()
-for line in logs_lines:
-    # Handle both old format: tasks/task_type/task_name:perf_type:time
-    # and new format: namespace_task_type_enabled:perf_type:time
-    old_result = OLD_PATTERN.findall(line)
-    new_result = NEW_PATTERN.findall(line)
-    simple_result = SIMPLE_PATTERN.findall(line)
-
-    if len(old_result):
-        task_name = old_result[0][1]
-        perf_type = old_result[0][2]
-        # legacy: track task in threads category by default
-        _ensure_task_tables(result_tables, perf_type, task_name)
-        # Unknown category in legacy format; default to threads
-        task_categories[task_name] = "threads"
-        tasks_by_category["threads"].add(task_name)
-    elif len(new_result):
-        # Extract task name from namespace format and keep it specific.
-        base = new_result[0][0]  # e.g., task_namespace_processes
-        task_category = new_result[0][1]  # "threads" or "processes"
-        task_type_token = new_result[0][2]  # e.g., "all", "omp", or "2_mpi"
-        task_name = f"{base}_{task_type_token}"
-        if "_" in task_type_token:
-            suffix, impl = task_type_token.rsplit("_", 1)
-            if impl in list_of_type_of_tasks:
-                task_name = f"{base}_{suffix}"
-        perf_type = new_result[0][3]
-
-        _ensure_task_tables(result_tables, perf_type, task_name)
-        task_categories[task_name] = task_category
-        tasks_by_category[task_category].add(task_name)
-    elif len(simple_result):
-        # Extract task name in the current format (prefix already includes category suffix)
-        task_name = simple_result[0][0]
-        # Infer category by substring
-        task_category = "threads" if "threads" in task_name else "processes"
-        perf_type = simple_result[0][2]
-
-        # no set tracking needed; category mapping below
-
-        _ensure_task_tables(result_tables, perf_type, task_name)
-        task_categories[task_name] = task_category
-        tasks_by_category[task_category].add(task_name)
-
-for line in logs_lines:
-    # Handle both old format: tasks/task_type/task_name:perf_type:time
-    # and new format: namespace_task_type_enabled:perf_type:time
-    old_result = OLD_PATTERN.findall(line)
-    new_result = NEW_PATTERN.findall(line)
-    simple_result = SIMPLE_PATTERN.findall(line)
-
-    if len(old_result):
-        task_type = old_result[0][0]
-        task_name = old_result[0][1]
-        perf_type = old_result[0][2]
-        perf_time = float(old_result[0][3])
-        result_tables[perf_type][task_name][task_type] = perf_time
-    elif len(new_result):
-        # Extract task details from namespace format (keep specific task name)
-        base = new_result[0][0]
-        task_category = new_result[0][1]  # "threads" or "processes"
-        token = new_result[0][2]  # "all", "omp", "seq", or tokens like "2_mpi"
-        perf_type = new_result[0][3]
-        perf_time = float(new_result[0][4])
-        # Split token like "2_mpi" into task suffix and impl to aggregate seq/mpi together
-        if "_" in token:
-            suffix, impl = token.rsplit("_", 1)
-            if impl in list_of_type_of_tasks:
-                task_name = f"{base}_{suffix}"
-                task_type = impl
-            else:
-                task_name = f"{base}_{token}"
-                task_type = token
-        else:
-            task_name = f"{base}_{token}"
-            task_type = token
-
-        _ensure_task_tables(result_tables, perf_type, task_name)
-        result_tables[perf_type][task_name][task_type] = perf_time
-        task_categories[task_name] = task_category
-        tasks_by_category[task_category].add(task_name)
-    elif len(simple_result):
-        # Extract details from the simplified pattern (current logs)
-        task_name = simple_result[0][0]
-        # Infer category by substring present in task_name
-        task_category = "threads" if "threads" in task_name else "processes"
-        task_type = simple_result[0][1]
-        perf_type = simple_result[0][2]
-        perf_time = float(simple_result[0][3])
-
-        if perf_type not in result_tables:
-            result_tables[perf_type] = {}
-        if task_name not in result_tables[perf_type]:
-            result_tables[perf_type][task_name] = {}
-            for ttype in list_of_type_of_tasks:
-                result_tables[perf_type][task_name][ttype] = -1.0
-        result_tables[perf_type][task_name][task_type] = perf_time
-        task_categories[task_name] = task_category
-        tasks_by_category[task_category].add(task_name)
-
-
-for table_name, table_data in result_tables.items():
-    # Prepare two workbooks/CSVs: threads and processes
-    for category in ["threads", "processes"]:
-        tasks_list = sorted(tasks_by_category[category])
-        if not tasks_list:
-            continue
-
-        # Use appropriate env var per category
-        if category == "threads":
-            cpu_num_env = os.environ.get("PPC_NUM_THREADS")
-            if cpu_num_env is None:
-                raise EnvironmentError(
-                    "Required environment variable 'PPC_NUM_THREADS' is not set."
-                )
-        else:
-            cpu_num_env = os.environ.get("PPC_NUM_PROC")
-            if cpu_num_env is None:
-                raise EnvironmentError(
-                    "Required environment variable 'PPC_NUM_PROC' is not set."
-                )
-        cpu_num = int(cpu_num_env)
-        cols = _columns_for_category(category)
-
-        # Excel
-        wb_path = os.path.join(
-            xlsx_path, f"{category}_" + table_name + "_perf_table.xlsx"
-        )
-        workbook = xlsxwriter.Workbook(wb_path)
-        worksheet = workbook.add_worksheet()
-        _write_excel_sheet(workbook, worksheet, cpu_num, tasks_list, cols, table_data)
-        workbook.close()
-
-        # CSV
-        header = ["Task", "SEQ"] + [c.upper() for c in cols[1:]]
-        csv_path = os.path.join(
-            xlsx_path, f"{category}_" + table_name + "_perf_table.csv"
-        )
-        _write_csv(csv_path, header, tasks_list, table_data)
diff --git a/scripts/generate_perf_results.bat b/scripts/generate_perf_results.bat
deleted file mode 100644
index a7d72690a..000000000
--- a/scripts/generate_perf_results.bat
+++ /dev/null
@@ -1,4 +0,0 @@
-@echo off
-mkdir build\perf_stat_dir
-scripts/run_tests.py --running-type="performance" > build\perf_stat_dir\perf_log.txt
-python scripts\create_perf_table.py --input build\perf_stat_dir\perf_log.txt --output build\perf_stat_dir
diff --git a/scripts/generate_perf_results.sh b/scripts/generate_perf_results.sh
deleted file mode 100755
index da317f427..000000000
--- a/scripts/generate_perf_results.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-mkdir -p build/perf_stat_dir
-scripts/run_tests.py --running-type="performance" | tee build/perf_stat_dir/perf_log.txt
-python3 scripts/create_perf_table.py --input build/perf_stat_dir/perf_log.txt --output build/perf_stat_dir
diff --git a/scripts/run_tests.py b/scripts/run_tests.py
index 7c713f8bd..bdbfc2280 100755
--- a/scripts/run_tests.py
+++ b/scripts/run_tests.py
@@ -3,6 +3,7 @@
 import os
 import platform
 import shlex
+import shutil
 import subprocess
 from pathlib import Path
 
@@ -51,6 +52,7 @@ def __init__(self, build_dir="build", verbose=False):
         self.__ppc_num_threads = None
         self.__ppc_num_proc = None
         self.__ppc_env = None
+        self.__build_dir_path = None
         self.work_dir = None
         self.build_dir = build_dir
         self.verbose = verbose
@@ -98,14 +100,16 @@ def setup_env(self, ppc_env):
             )
 
         project_path = Path(self.__get_project_path())
+        build_dir = Path(self.build_dir)
+        if not build_dir.is_absolute():
+            build_dir = project_path / build_dir
+        self.__build_dir_path = build_dir
+
         install_bin_dir = project_path / "install" / "bin"
         if install_bin_dir.exists():
             self.work_dir = install_bin_dir
             return
 
-        build_dir = Path(self.build_dir)
-        if not build_dir.is_absolute():
-            build_dir = project_path / build_dir
         bin_dir = build_dir if build_dir.name == "bin" else build_dir / "bin"
         if not bin_dir.exists():
             raise FileNotFoundError(
@@ -114,10 +118,13 @@ def setup_env(self, ppc_env):
             )
         self.work_dir = bin_dir
 
-    def __run_exec(self, command):
+    def __run_exec(self, command, extra_env=None):
         if self.verbose:
             print("Executing:", " ".join(shlex.quote(part) for part in command))
-        result = subprocess.run(command, shell=False, env=self.__ppc_env)
+        run_env = self.__ppc_env.copy()
+        if extra_env:
+            run_env.update(extra_env)
+        result = subprocess.run(command, shell=False, env=run_env)
         if result.returncode != 0:
             raise Exception(f"Subprocess return {result.returncode}.")
 
@@ -153,41 +160,42 @@ def __detect_mpi_impl(self):
             return "mpich", "-n"
         return "unknown", "-np"
 
-    def __build_mpi_cmd(self, ppc_num_proc, additional_mpi_args):
+    def __build_mpi_cmd(self, ppc_num_proc, additional_mpi_args, extra_env=None):
+        mpi_env = self.__ppc_env.copy()
+        if extra_env:
+            mpi_env.update(extra_env)
         base = [self.mpi_exec] + shlex.split(additional_mpi_args)
+        forwarded_env = [
+            "PPC_NUM_THREADS",
+            "OMP_NUM_THREADS",
+            "PPC_BENCHMARK_OUT",
+            "PPC_BENCHMARK_FILTER",
+            "PPC_PERF_IMPL_FILTER",
+            "PPC_PERF_CATEGORY_FILTER",
+        ]
 
         if self.platform == "Windows":
             # MS-MPI style
-            env_args = [
-                "-env",
-                "PPC_NUM_THREADS",
-                self.__ppc_env["PPC_NUM_THREADS"],
-                "-env",
-                "OMP_NUM_THREADS",
-                self.__ppc_env["OMP_NUM_THREADS"],
-            ]
+            env_args = []
+            for env_name in forwarded_env:
+                if env_name in mpi_env:
+                    env_args += ["-env", env_name, mpi_env[env_name]]
             np_args = ["-n", ppc_num_proc]
             return base + env_args + np_args
 
         # Non-Windows
         if self.mpi_env_mode == "openmpi":
-            env_args = [
-                "-x",
-                "PPC_NUM_THREADS",
-                "-x",
-                "OMP_NUM_THREADS",
-            ]
+            env_args = []
+            for env_name in forwarded_env:
+                if env_name in mpi_env:
+                    env_args += ["-x", env_name]
             np_flag = "-np"
         elif self.mpi_env_mode == "mpich":
             # Explicitly set env variables for all ranks
-            env_args = [
-                "-env",
-                "PPC_NUM_THREADS",
-                self.__ppc_env["PPC_NUM_THREADS"],
-                "-env",
-                "OMP_NUM_THREADS",
-                self.__ppc_env["OMP_NUM_THREADS"],
-            ]
+            env_args = []
+            for env_name in forwarded_env:
+                if env_name in mpi_env:
+                    env_args += ["-env", env_name, mpi_env[env_name]]
             np_flag = "-n"
         else:
             # Unknown MPI flavor: rely on environment inheritance and default to -np
@@ -196,6 +204,30 @@ def __build_mpi_cmd(self, ppc_num_proc, additional_mpi_args):
 
         return base + env_args + [np_flag, ppc_num_proc]
 
+    def __benchmark_output_dir(self):
+        if self.__build_dir_path is None:
+            raise RuntimeError("Build directory is not initialized.")
+        return self.__build_dir_path / "perf_stat_dir" / "benchmarks"
+
+    def __get_performance_gtest_settings(self):
+        return [
+            "--gtest_repeat=1",
+            "--gtest_recreate_environments_when_repeating",
+            "--gtest_color=0",
+            "--gtest_filter=*RunPerf*",
+        ]
+
+    def __get_benchmark_env(self, category, task_type):
+        output_dir = self.__benchmark_output_dir()
+        output_dir.mkdir(parents=True, exist_ok=True)
+        return {
+            "PPC_PERF_CATEGORY_FILTER": f"_{category}_",
+            "PPC_PERF_IMPL_FILTER": f"_{task_type}_",
+            "PPC_BENCHMARK_OUT": str(
+                output_dir / f"benchmark_{category}_{task_type}.json"
+            ),
+        }
+
     @staticmethod
     def __get_gtest_settings(repeats_count, type_task):
         type_task_patterns = {
@@ -260,19 +292,31 @@ def run_processes(self, additional_mpi_args):
                 )
 
     def run_performance(self):
+        output_dir = self.__benchmark_output_dir()
+        if output_dir.exists():
+            shutil.rmtree(output_dir)
+
         if not self.__ppc_env.get("PPC_ASAN_RUN"):
-            mpi_running = self.__build_mpi_cmd(self.__ppc_num_proc, "")
-            for task_type in ["all", "mpi", "seq"]:
+            for category, task_type in [
+                ("threads", "all"),
+                ("processes", "mpi"),
+                ("processes", "seq"),
+            ]:
+                extra_env = self.__get_benchmark_env(category, task_type)
+                mpi_running = self.__build_mpi_cmd(self.__ppc_num_proc, "", extra_env)
                 self.__run_exec(
                     mpi_running
                     + [str(self.work_dir / "ppc_perf_tests")]
-                    + self.__get_gtest_settings(1, "_" + task_type + "_")
+                    + self.__get_performance_gtest_settings(),
+                    extra_env,
                 )
 
         for task_type in ["omp", "seq", "stl", "tbb"]:
+            extra_env = self.__get_benchmark_env("threads", task_type)
             self.__run_exec(
                 [str(self.work_dir / "ppc_perf_tests")]
-                + self.__get_gtest_settings(1, "_" + task_type + "_")
+                + self.__get_performance_gtest_settings(),
+                extra_env,
             )
 
 
diff --git a/tasks/CMakeLists.txt b/tasks/CMakeLists.txt
index 685457716..a20f74abf 100644
--- a/tasks/CMakeLists.txt
+++ b/tasks/CMakeLists.txt
@@ -12,6 +12,9 @@ include(${CMAKE_SOURCE_DIR}/cmake/functions.cmake)
 # ——— Initialize test executables —————————————————————————————————————
 ppc_add_test(${FUNC_TEST_EXEC} common/runners/functional.cpp USE_FUNC_TESTS)
 ppc_add_test(${PERF_TEST_EXEC} common/runners/performance.cpp USE_PERF_TESTS)
+if(USE_PERF_TESTS)
+  ppc_link_benchmark(${PERF_TEST_EXEC})
+endif()
 
 # ——— List of implementations ————————————————————————————————————————
 set(PPC_IMPLEMENTATIONS "all;mpi;omp;seq;stl;tbb" CACHE STRING "Implementations to build (semicolon-separated)")
diff --git a/tasks/common/runners/performance.cpp b/tasks/common/runners/performance.cpp
index a4b6c0e2f..1f8101bcd 100644
--- a/tasks/common/runners/performance.cpp
+++ b/tasks/common/runners/performance.cpp
@@ -1,5 +1,232 @@
+#include <gtest/gtest.h>
+
+#include <benchmark/benchmark.h>
+#include <benchmark/reporter.h>
+#include <chrono>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <exception>
+#include <filesystem>
+#include <format>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <mpi.h>
+#include <random>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "oneapi/tbb/global_control.h"
 #include "runners/include/runners.hpp"
+#include "util/include/util.hpp"
+
+namespace {
+
+class NullBenchmarkReporter final : public benchmark::BenchmarkReporter {
+ public:
+  bool ReportContext(const Context & /*context*/) override {
+    return true;
+  }
+
+  void ReportRuns(const std::vector<Run> & /*report*/) override {}
+};
+
+int RunAllTests() {
+  const int status = RUN_ALL_TESTS();
+  if (ppc::util::DestructorFailureFlag::Get()) {
+    throw std::runtime_error(
+        std::format("[  ERROR  ] Destructor failed with code {}", ppc::util::DestructorFailureFlag::Get()));
+  }
+  return status;
+}
+
+void SyncGTestSeed() {
+  int rank = -1;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  int seed = ::testing::GTEST_FLAG(random_seed);
+  if (rank == 0 && seed == 0) {
+    try {
+      seed = static_cast<int>((std::random_device{}() % 99999U) + 1U);
+    } catch (...) {
+      seed = 0;
+    }
+    if (seed == 0) {
+      const auto now = static_cast<std::uint64_t>(std::chrono::steady_clock::now().time_since_epoch().count());
+      seed = static_cast<int>((now % 99999ULL) + 1ULL);
+    }
+  }
+  MPI_Bcast(&seed, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  ::testing::GTEST_FLAG(random_seed) = seed;
+}
+
+void SyncGTestFilter() {
+  int rank = -1;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  std::string filter = (rank == 0) ? ::testing::GTEST_FLAG(filter) : std::string{};
+  int len = static_cast<int>(filter.size());
+  MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  if (rank != 0) {
+    filter.resize(static_cast<std::size_t>(len));
+  }
+  if (len > 0) {
+    MPI_Bcast(filter.data(), len, MPI_CHAR, 0, MPI_COMM_WORLD);
+  }
+  ::testing::GTEST_FLAG(filter) = filter;
+}
+
+bool HasFlag(int argc, char **argv, std::string_view flag) {
+  for (int i = 1; i < argc; ++i) {
+    if (argv[i] != nullptr && std::string_view(argv[i]) == flag) {
+      return true;
+    }
+  }
+  return false;
+}
+
+std::vector<std::string> MakeBenchmarkArgs(const char *program_name, int rank) {
+  std::vector<std::string> args{program_name != nullptr ? program_name : "ppc_perf_tests"};
+  args.emplace_back("--benchmark_format=console");
+  args.emplace_back("--benchmark_time_unit=s");
+
+  const auto benchmark_filter = env::get<std::string>("PPC_BENCHMARK_FILTER");
+  if (benchmark_filter.has_value()) {
+    args.emplace_back(std::string("--benchmark_filter=") + benchmark_filter.value());
+  }
+
+  if (rank == 0) {
+    const auto benchmark_out = env::get<std::string>("PPC_BENCHMARK_OUT");
+    if (benchmark_out.has_value()) {
+      const std::filesystem::path out_path(benchmark_out.value());
+      if (out_path.has_parent_path()) {
+        std::filesystem::create_directories(out_path.parent_path());
+      }
+      args.emplace_back(std::string("--benchmark_out=") + benchmark_out.value());
+      args.emplace_back("--benchmark_out_format=json");
+    }
+  }
+
+  return args;
+}
+
+void InitializeBenchmark(int argc, char **argv, int rank) {
+  static std::vector<std::string> benchmark_args;
+  static std::vector<char *> benchmark_argv;
+
+  benchmark_args = MakeBenchmarkArgs((argc > 0) ? argv[0] : nullptr, rank);
+  benchmark_argv.clear();
+  benchmark_argv.reserve(benchmark_args.size());
+  for (auto &arg : benchmark_args) {
+    benchmark_argv.push_back(arg.data());
+  }
+  int benchmark_argc = static_cast<int>(benchmark_argv.size());
+  benchmark::Initialize(&benchmark_argc, benchmark_argv.data());
+}
+
+int RunRegisteredBenchmarks(int rank) {
+  ppc::util::PerformanceFailureFlag::Unset();
+  if (rank == 0) {
+    benchmark::RunSpecifiedBenchmarks();
+  } else {
+    NullBenchmarkReporter reporter;
+    std::ofstream null_stream;
+#ifdef _WIN32
+    null_stream.open("NUL");
+#else
+    null_stream.open("/dev/null");
+#endif
+    if (null_stream.is_open()) {
+      reporter.SetOutputStream(&null_stream);
+      reporter.SetErrorStream(&null_stream);
+    }
+    benchmark::RunSpecifiedBenchmarks(&reporter, nullptr);
+  }
+  const int status = ppc::util::PerformanceFailureFlag::Get() ? EXIT_FAILURE : EXIT_SUCCESS;
+  benchmark::Shutdown();
+  benchmark::ClearRegisteredBenchmarks();
+  return status;
+}
+
+int RunAllTestsSafely() {
+  try {
+    return RunAllTests();
+  } catch (const std::exception &e) {
+    std::cerr << std::format("[  ERROR  ] Exception after performance tests: {}", e.what()) << '\n';
+    MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+    return EXIT_FAILURE;
+  } catch (...) {
+    std::cerr << "[  ERROR  ] Unknown exception after performance tests" << '\n';
+    MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
+    return EXIT_FAILURE;
+  }
+}
+
+int SynchronizeStatus(int local_status, std::string_view stage) {
+  const int local_failed = (local_status == EXIT_SUCCESS) ? 0 : 1;
+  int any_failed = local_failed;
+  const int reduce_res = MPI_Allreduce(&local_failed, &any_failed, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+  if (reduce_res != MPI_SUCCESS) {
+    std::cerr << "[  ERROR  ] MPI_Allreduce failed while synchronizing " << stage << " status with code " << reduce_res
+              << '\n';
+    MPI_Abort(MPI_COMM_WORLD, reduce_res);
+    return EXIT_FAILURE;
+  }
+  return (any_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+int RunPerformanceMain(int argc, char **argv) {
+  const int init_res = MPI_Init(&argc, &argv);
+  if (init_res != MPI_SUCCESS) {
+    std::cerr << "[  ERROR  ] MPI_Init failed with code " << init_res << '\n';
+    MPI_Abort(MPI_COMM_WORLD, init_res);
+    return init_res;
+  }
+
+  tbb::global_control control(tbb::global_control::max_allowed_parallelism, ppc::util::GetNumThreads());
+
+  ::testing::InitGoogleTest(&argc, argv);
+
+  SyncGTestSeed();
+  SyncGTestFilter();
+
+  int rank = -1;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+  auto &listeners = ::testing::UnitTest::GetInstance()->listeners();
+  const bool print_workers = HasFlag(argc, argv, "--print-workers");
+  if (rank != 0 && !print_workers) {
+    auto *listener = listeners.Release(listeners.default_result_printer());
+    listeners.Append(
+        new ppc::runners::WorkerTestFailurePrinter(std::shared_ptr<::testing::TestEventListener>(listener)));
+  }
+  listeners.Append(new ppc::runners::UnreadMessagesDetector());
+
+  int status = SynchronizeStatus(RunAllTestsSafely(), "GTest");
+  if (status == EXIT_SUCCESS) {
+    InitializeBenchmark(argc, argv, rank);
+    status = SynchronizeStatus(RunRegisteredBenchmarks(rank), "Google Benchmark");
+  }
+
+  const int finalize_res = MPI_Finalize();
+  if (finalize_res != MPI_SUCCESS) {
+    std::cerr << "[  ERROR  ] MPI_Finalize failed with code " << finalize_res << '\n';
+    MPI_Abort(MPI_COMM_WORLD, finalize_res);
+    return finalize_res;
+  }
+  return status;
+}
+
+}  // namespace
 
 int main(int argc, char **argv) {
-  return ppc::runners::Init(argc, argv);
+  try {
+    return RunPerformanceMain(argc, argv);
+  } catch (const std::exception &e) {
+    std::cerr << "[  ERROR  ] Unhandled exception in performance tests: " << e.what() << '\n';
+  } catch (...) {
+    std::cerr << "[  ERROR  ] Unknown unhandled exception in performance tests" << '\n';
+  }
+  return EXIT_FAILURE;
 }
diff --git a/tasks/example/processes/t1/tests/performance/main.cpp b/tasks/example/processes/t1/tests/performance/main.cpp
index b4fc6e9db..f628496c8 100644
--- a/tasks/example/processes/t1/tests/performance/main.cpp
+++ b/tasks/example/processes/t1/tests/performance/main.cpp
@@ -35,7 +35,7 @@ const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks<InType, NesterovATestTask
 
 }  // namespace
 
-TEST_F(ExampleRunPerfTestProcesses, RunPerfModes) {
+TEST_F(ExampleRunPerfTestProcesses, RunPerf) {
   std::apply([this](const auto &...test_params) { (ExecuteTest(test_params), ...); }, kAllPerfTasks);
 }
 
diff --git a/tasks/example/processes/t2/tests/performance/main.cpp b/tasks/example/processes/t2/tests/performance/main.cpp
index 130159aeb..0d991ca40 100644
--- a/tasks/example/processes/t2/tests/performance/main.cpp
+++ b/tasks/example/processes/t2/tests/performance/main.cpp
@@ -35,7 +35,7 @@ const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks<InType, NesterovATestTask
 
 }  // namespace
 
-TEST_F(ExampleRunPerfTestProcesses2, RunPerfModes) {
+TEST_F(ExampleRunPerfTestProcesses2, RunPerf) {
   std::apply([this](const auto &...test_params) { (ExecuteTest(test_params), ...); }, kAllPerfTasks);
 }
 
diff --git a/tasks/example/processes/t3/tests/performance/main.cpp b/tasks/example/processes/t3/tests/performance/main.cpp
index 42e3c6e95..cc4d2a63c 100644
--- a/tasks/example/processes/t3/tests/performance/main.cpp
+++ b/tasks/example/processes/t3/tests/performance/main.cpp
@@ -31,7 +31,7 @@ class ExampleRunPerfTestProcesses3 : public ppc::util::BaseRunPerfTests<InType,
 const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks<InType, NesterovATestTaskMPI, NesterovATestTaskSEQ>(
     PPC_SETTINGS_example, "processes.t3");
 
-TEST_F(ExampleRunPerfTestProcesses3, RunPerfModes) {
+TEST_F(ExampleRunPerfTestProcesses3, RunPerf) {
   std::apply([this](const auto &...test_params) { (ExecuteTest(test_params), ...); }, kAllPerfTasks);
 }
 
diff --git a/tasks/example/threads/tests/performance/main.cpp b/tasks/example/threads/tests/performance/main.cpp
index 82ea9356d..939305f75 100644
--- a/tasks/example/threads/tests/performance/main.cpp
+++ b/tasks/example/threads/tests/performance/main.cpp
@@ -39,7 +39,7 @@ const auto kAllPerfTasks =
 
 }  // namespace
 
-TEST_F(ExampleRunPerfTestThreads, RunPerfModes) {
+TEST_F(ExampleRunPerfTestThreads, RunPerf) {
   std::apply([this](const auto &...test_params) { (ExecuteTest(test_params), ...); }, kAllPerfTasks);
 }