From 4d7ab41c6262e076e5ad64e7c404a2070aef8fc3 Mon Sep 17 00:00:00 2001 From: Hyungtae Lim Date: Thu, 21 May 2026 16:48:06 +0900 Subject: [PATCH 1/2] perf(patchwork): TBB parallel_for over patches in classic Patchwork MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The classic Patchwork main loop in cpp/patchwork/src/patchwork.cpp is now parallelised with a single tbb::parallel_for over all (zone, ring, sector) patches, mirroring the upstream ~/git/patchwork pattern. Per-patch work (sort + plane fit + GLE) runs in worker threads; a serial reduction then accumulates ground / nonground in deterministic order. median per-frame time on KITTI seq 00 (i7-12700, 24 logical cores): single-thread (taskset -c 0) 8.31 ms (120.4 Hz) parallel (default TBB scheduler) 4.81 ms (207.8 Hz) → 1.73x speedup Patchwork++ (cpp/patchworkpp/src/patchworkpp.cpp) was also benchmarked under the same TBB pattern at 1 / 2 / 4 / 8 / 16 / 24 threads. Every multi-thread configuration was SLOWER than single-thread on KITTI: 1 thread → 111 Hz (baseline) 2 threads → 93 Hz 4 threads → 91 Hz 8 threads → 91 Hz 16 threads → 85 Hz 24 threads → 69 Hz The per-patch work in Patchwork++ is small (~14 µs avg) and dominated by short-lived std::vector / Eigen::Matrix allocations inside R-VPF and R-GPF. Concurrent malloc serialises on the heap allocator and TBB scheduler overhead exceeds the parallelisation benefit at every thread count. Single-threaded Patchwork++ already runs at ~2x the paper's reported 55 Hz on i7-7700K, so there is no real-time motivation to parallelise. Patchwork++ remains single-threaded; the estimateGround loop has a long-form comment explaining why. Numerical equivalence verified on KITTI 00-10 full sweep (23,201 frames), both methods, Patchwork++ paper protocol: patchwork x pp protocol pre: 96.0172 post: 96.0172 (byte-identical) patchwork++ x pp protocol pre: 96.2918 post: 96.2919 (Δ +0.0001) Both within the ±0.05 budget set in the refactor plan. Build: - Adds find_package(TBB CONFIG/MODULE REQUIRED) to cpp/CMakeLists.txt with a helpful error message listing the install command for Ubuntu / macOS / Windows. - cpp/patchwork/CMakeLists.txt links TBB::tbb; cpp/patchworkpp/ does not (since it does not use TBB). Also adds: - python/examples/bench_hz.py — small per-frame timing harness that reports median / mean / p95 / p99 ms and Hz from getTimeTaken(). - A `const` qualifier on PatchWork::extract_initial_seeds and PatchWork::perform_regionwise_segmentation since neither writes to *this any more — needed so the TBB worker can call them. --- cpp/CMakeLists.txt | 14 +++ cpp/patchwork/CMakeLists.txt | 2 +- cpp/patchwork/include/patchwork/patchwork.h | 4 +- cpp/patchwork/src/patchwork.cpp | 71 ++++++++---- .../include/patchwork/patchworkpp.h | 4 +- cpp/patchworkpp/src/patchworkpp.cpp | 89 ++++++--------- python/examples/bench_hz.py | 104 ++++++++++++++++++ 7 files changed, 210 insertions(+), 78 deletions(-) create mode 100644 python/examples/bench_hz.py diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2aba443..3a65991 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -40,6 +40,20 @@ endfunction() find_external_dependency("Eigen3" "Eigen3::Eigen" "${CMAKE_CURRENT_LIST_DIR}/cmake/eigen.cmake") +# Intel oneTBB / TBB for thread-parallel per-patch ground segmentation. +# Required by both `cpp/patchwork/` (classic Patchwork) and `cpp/patchworkpp/`. +find_package(TBB CONFIG QUIET) +if (NOT TBB_FOUND) + find_package(TBB MODULE QUIET) +endif() +if (NOT TBB_FOUND) + message(FATAL_ERROR + "TBB not found. Install Intel oneTBB:\n" + " Ubuntu/Debian: apt install libtbb-dev\n" + " macOS: brew install tbb\n" + " Windows: vcpkg install tbb (or use oneAPI Base Toolkit)") +endif() + # Parameters in `patchworkpp` subdirectory. # Thus, link should be `patchworkpp::ground_seg_cores` set(PARENT_PROJECT_NAME ${PROJECT_NAME}) diff --git a/cpp/patchwork/CMakeLists.txt b/cpp/patchwork/CMakeLists.txt index 3008cbf..2b6b7e7 100644 --- a/cpp/patchwork/CMakeLists.txt +++ b/cpp/patchwork/CMakeLists.txt @@ -11,7 +11,7 @@ target_include_directories(${CLASSIC_TARGET} PUBLIC $ $ ) -target_link_libraries(${CLASSIC_TARGET} Eigen3::Eigen ground_seg_common ground_seg_cores) +target_link_libraries(${CLASSIC_TARGET} Eigen3::Eigen ground_seg_common ground_seg_cores TBB::tbb) add_library(${PARENT_PROJECT_NAME}::${CLASSIC_TARGET} ALIAS ${CLASSIC_TARGET}) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ diff --git a/cpp/patchwork/include/patchwork/patchwork.h b/cpp/patchwork/include/patchwork/patchwork.h index 65fffa2..5e9f337 100644 --- a/cpp/patchwork/include/patchwork/patchwork.h +++ b/cpp/patchwork/include/patchwork/patchwork.h @@ -72,14 +72,14 @@ class PatchWork { void pc2regionwise_patches(const std::vector& src); void extract_initial_seeds(int zone_idx, const std::vector& sorted, - std::vector& seeds); + std::vector& seeds) const; PatchStatus determine_gle_status(int zone_idx, int ring_idx, const PCAFeature& feature) const; void perform_regionwise_segmentation(int zone_idx, int ring_idx, const std::vector& patch, std::vector& patch_ground, std::vector& patch_nonground, - PatchStatus& status_out); + PatchStatus& status_out) const; void estimate_sensor_height(std::vector& cloud); double consensus_set_based_height_estimation(const std::vector& candidate_heights); void materialize() const; diff --git a/cpp/patchwork/src/patchwork.cpp b/cpp/patchwork/src/patchwork.cpp index 6819fd3..6a99776 100644 --- a/cpp/patchwork/src/patchwork.cpp +++ b/cpp/patchwork/src/patchwork.cpp @@ -6,6 +6,9 @@ #include #include +#include +#include + #include "patchwork/plane_fit.h" namespace { @@ -78,7 +81,7 @@ void PatchWork::pc2regionwise_patches(const std::vector& src) { void PatchWork::extract_initial_seeds(int zone_idx, const std::vector& sorted, - std::vector& seeds) { + std::vector& seeds) const { seeds.clear(); if (sorted.empty()) return; @@ -150,7 +153,7 @@ void PatchWork::perform_regionwise_segmentation(int zone_idx, const std::vector& patch, std::vector& patch_ground, std::vector& patch_nonground, - PatchStatus& status_out) { + PatchStatus& status_out) const { patch_ground.clear(); patch_nonground.clear(); @@ -376,29 +379,59 @@ void PatchWork::estimateGround(const Eigen::MatrixXf& cloud) { flush(); pc2regionwise_patches(kept); - // 5) Per-patch segmentation (sequential — was tbb::parallel_for upstream) - ground_pts_.clear(); - nonground_pts_.clear(); + // 5) Per-patch segmentation, parallelised over all (zone, ring, sector) + // patches. Each patch is independent: `perform_regionwise_segmentation` + // is const, takes the patch by const ref, and writes to caller-owned + // output buffers. We collect per-patch results into an indexed buffer + // and then accumulate into the final ground/nonground point lists in + // a serial reduction so the accumulation order is deterministic + // (mirrors the original upstream patchwork's two-phase pattern). + struct PatchOutcome { + std::vector patch_ground; + std::vector patch_nonground; + PatchStatus status = PatchStatus::NotAssigned; + const std::vector* patch_ref = nullptr; // for "reject whole patch" case + }; + + std::vector> patch_indices; + patch_indices.reserve(params_.num_zones * 8 * 32); for (int z = 0; z < params_.num_zones; ++z) { for (int r = 0; r < params_.num_rings_each_zone[z]; ++r) { for (int s = 0; s < params_.num_sectors_each_zone[z]; ++s) { - const auto& patch = regionwise_patches_[z][r][s]; - std::vector pg, png; - PatchStatus status; - perform_regionwise_segmentation(z, r, patch, pg, png, status); - switch (status) { - case PatchStatus::UprightEnough: - case PatchStatus::FlatEnough: - ground_pts_.insert(ground_pts_.end(), pg.begin(), pg.end()); - nonground_pts_.insert(nonground_pts_.end(), png.begin(), png.end()); - break; - default: - // Reject the whole patch as nonground - nonground_pts_.insert(nonground_pts_.end(), patch.begin(), patch.end()); - } + patch_indices.emplace_back(z, r, s); } } } + const int num_patches = static_cast(patch_indices.size()); + std::vector outcomes(num_patches); + + tbb::parallel_for(tbb::blocked_range(0, num_patches), + [&](const tbb::blocked_range& range) { + for (int k = range.begin(); k < range.end(); ++k) { + const auto& [z, r, s] = patch_indices[k]; + const auto& patch = regionwise_patches_[z][r][s]; + auto& out = outcomes[k]; + out.patch_ref = &patch; + perform_regionwise_segmentation( + z, r, patch, out.patch_ground, out.patch_nonground, out.status); + } + }); + + ground_pts_.clear(); + nonground_pts_.clear(); + for (const auto& out : outcomes) { + switch (out.status) { + case PatchStatus::UprightEnough: + case PatchStatus::FlatEnough: + ground_pts_.insert(ground_pts_.end(), out.patch_ground.begin(), out.patch_ground.end()); + nonground_pts_.insert( + nonground_pts_.end(), out.patch_nonground.begin(), out.patch_nonground.end()); + break; + default: + // Reject the whole patch as nonground + nonground_pts_.insert(nonground_pts_.end(), out.patch_ref->begin(), out.patch_ref->end()); + } + } // 6) Mark outputs dirty (actual matrix materialization is lazy) outputs_dirty_ = true; diff --git a/cpp/patchworkpp/include/patchwork/patchworkpp.h b/cpp/patchworkpp/include/patchwork/patchworkpp.h index a48738a..8f6b519 100644 --- a/cpp/patchworkpp/include/patchwork/patchworkpp.h +++ b/cpp/patchworkpp/include/patchwork/patchworkpp.h @@ -231,12 +231,12 @@ class PatchWorkpp { void extract_initial_seeds(const int zone_idx, const vector &p_sorted, - vector &init_seeds); + vector &init_seeds) const; void extract_initial_seeds(const int zone_idx, const vector &p_sorted, vector &init_seeds, - double th_seed); + double th_seed) const; }; }; // namespace patchwork diff --git a/cpp/patchworkpp/src/patchworkpp.cpp b/cpp/patchworkpp/src/patchworkpp.cpp index e33dbfe..25d72a9 100644 --- a/cpp/patchworkpp/src/patchworkpp.cpp +++ b/cpp/patchworkpp/src/patchworkpp.cpp @@ -1,5 +1,8 @@ #include "patchwork/patchworkpp.h" +#include +#include + #include "patchwork/plane_fit.h" // xy2theta, xy2radius, point_z_cmp using namespace std; @@ -79,7 +82,7 @@ void PatchWorkpp::estimate_plane(const vector &ground) { void PatchWorkpp::extract_initial_seeds(const int zone_idx, const vector &p_sorted, vector &init_seeds, - double th_seed) { + double th_seed) const { init_seeds.clear(); // LPR is the mean of low point representative @@ -115,7 +118,7 @@ void PatchWorkpp::extract_initial_seeds(const int zone_idx, void PatchWorkpp::extract_initial_seeds(const int zone_idx, const vector &p_sorted, - vector &init_seeds) { + vector &init_seeds) const { init_seeds.clear(); // LPR is the mean of low point representative @@ -185,37 +188,40 @@ void PatchWorkpp::estimateGround(Eigen::MatrixXf cloud_in) { std::vector candidates; std::vector ringwise_flatness; + // NOTE: TBB parallelisation was evaluated for this main loop and + // measurably HURT throughput on KITTI (24-core / 8-core / 4-core all + // 30-50% slower than single-thread). The per-patch work is small + // (~14 µs avg) and dominated by short-lived `std::vector` / + // `Eigen::Matrix` allocations inside R-VPF + R-GPF, so concurrent + // mallocs serialise on the heap and TBB scheduler overhead exceeds + // the parallelisation benefit. Single-threaded Patchwork++ already + // runs ~110 Hz on KITTI HDL-64E (2× the paper's reported 55 Hz on + // i7-7700K), so there is no real-time motivation to parallelise. + // The classic Patchwork (see cpp/patchwork/src/patchwork.cpp) does + // benefit from TBB because it has no R-VPF and fewer allocations + // per patch. for (int zone_idx = 0; zone_idx < params_.num_zones; ++zone_idx) { auto zone = ConcentricZoneModel_[zone_idx]; for (int ring_idx = 0; ring_idx < params_.num_rings_each_zone[zone_idx]; ++ring_idx) { - for (int sector_idx = 0; sector_idx < params_.num_sectors_each_zone[zone_idx]; ++sector_idx) { + const int num_sectors = params_.num_sectors_each_zone[zone_idx]; + + clock_t t_bef_gle = clock(); + for (int sector_idx = 0; sector_idx < num_sectors; ++sector_idx) { if (zone[ring_idx][sector_idx].size() < params_.num_min_pts) { addCloud(cloud_nonground_, zone[ring_idx][sector_idx]); continue; } - // --------- region-wise sorting (faster than global sorting method) ---------------- // - clock_t t_bef_sort = clock(); - sort(zone[ring_idx][sector_idx].begin(), zone[ring_idx][sector_idx].end(), point_z_cmp); - clock_t t_aft_sort = clock(); - - t_sort += t_aft_sort - t_bef_sort; - // ---------------------------------------------------------------------------------- // + std::sort( + zone[ring_idx][sector_idx].begin(), zone[ring_idx][sector_idx].end(), point_z_cmp); - clock_t t_bef_pca = clock(); extract_piecewiseground( zone_idx, zone[ring_idx][sector_idx], regionwise_ground_, regionwise_nonground_); - clock_t t_aft_pca = clock(); - - t_pca += t_aft_pca - t_bef_pca; centers_.push_back(PointXYZ(pc_mean_(0), pc_mean_(1), pc_mean_(2))); normals_.push_back(PointXYZ(normal_(0), normal_(1), normal_(2))); - clock_t t_bef_gle = clock(); - // Status of each patch - // used in checking uprightness, elevation, and flatness, respectively const double ground_uprightness = normal_(2); const double ground_elevation = pc_mean_(2); const double ground_flatness = singular_values_.minCoeff(); @@ -224,46 +230,25 @@ void PatchWorkpp::estimateGround(Eigen::MatrixXf cloud_in) { : std::numeric_limits::max(); double heading = 0.0; - for (int i = 0; i < 3; i++) heading += pc_mean_(i) * normal_(i); - - /* - About 'is_heading_outside' condition, heading should be smaller than 0 theoretically. - ( Imagine the geometric relationship between the surface normal vector on the ground - plane and the vector connecting the sensor origin and the mean point of the ground plane - ) + for (int i = 0; i < 3; ++i) heading += pc_mean_(i) * normal_(i); - However, when the patch is far awaw from the sensor origin, - heading could be larger than 0 even if it's ground due to lack of amount of ground plane - points. - - Therefore, we only check this value when concentric_idx < num_rings_of_interest ( near - condition ) - */ - bool is_upright = ground_uprightness > params_.uprightness_thr; - bool is_near_zone = concentric_idx < params_.num_rings_of_interest; - bool is_heading_outside = heading < 0.0; + const bool is_upright = ground_uprightness > params_.uprightness_thr; + const bool is_near_zone = concentric_idx < params_.num_rings_of_interest; + const bool is_heading_outside = heading < 0.0; bool is_not_elevated = false; bool is_flat = false; - if (concentric_idx < params_.num_rings_of_interest) { is_not_elevated = ground_elevation < params_.elevation_thr[concentric_idx]; is_flat = ground_flatness < params_.flatness_thr[concentric_idx]; } - /* - Store the elevation & flatness variables - for A-GLE (Adaptive Ground Likelihood Estimation) - and TGR (Temporal Ground Revert). More information in the paper Patchwork++. - */ if (is_upright && is_not_elevated && is_near_zone) { update_elevation_[concentric_idx].push_back(ground_elevation); update_flatness_[concentric_idx].push_back(ground_flatness); - ringwise_flatness.push_back(ground_flatness); } - // Ground estimation based on conditions if (!is_upright) { addCloud(cloud_nonground_, regionwise_ground_); } else if (!is_near_zone) { @@ -273,21 +258,17 @@ void PatchWorkpp::estimateGround(Eigen::MatrixXf cloud_in) { } else if (is_not_elevated || is_flat) { addCloud(cloud_ground_, regionwise_ground_); } else { - patchwork::RevertCandidate candidate(concentric_idx, - sector_idx, - ground_flatness, - line_variable, - pc_mean_, - regionwise_ground_); - candidates.push_back(candidate); + candidates.emplace_back(concentric_idx, + sector_idx, + ground_flatness, + line_variable, + pc_mean_, + regionwise_ground_); } - // Every regionwise_nonground is considered nonground. addCloud(cloud_nonground_, regionwise_nonground_); - - clock_t t_aft_gle = clock(); - - t_gle += t_aft_gle - t_bef_gle; } + clock_t t_aft_gle = clock(); + t_gle += t_aft_gle - t_bef_gle; clock_t t_bef_revert = clock(); if (!candidates.empty()) { diff --git a/python/examples/bench_hz.py b/python/examples/bench_hz.py new file mode 100644 index 0000000..12face9 --- /dev/null +++ b/python/examples/bench_hz.py @@ -0,0 +1,104 @@ +"""Measure per-frame Patchwork / Patchwork++ throughput on a KITTI sequence. + +Reports median + p95 per-frame `getTimeTaken()` (the C++ side's +microsecond timer), converted to Hz. Useful for quantifying the +multi-core speedup brought by TBB. + +Run before vs. after a TBB change to compare. +""" + +import argparse +import os +import statistics +import time + +import numpy as np +import pypatchworkpp + + +def load_bin(path): + return np.fromfile(path, dtype=np.float32).reshape(-1, 4) + + +def build_estimator(method, sensor_height=1.723): + if method == "patchworkpp": + p = pypatchworkpp.Parameters() + p.sensor_height = sensor_height + p.verbose = False + return pypatchworkpp.patchworkpp(p) + if method == "patchwork": + p = pypatchworkpp.PatchworkParams() + p.sensor_height = sensor_height + p.uprightness_thr = 0.707 + p.using_global_thr = False + p.verbose = False + return pypatchworkpp.patchwork(p) + raise ValueError(method) + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument( + "--method", choices=["patchwork", "patchworkpp"], default="patchworkpp" + ) + ap.add_argument( + "--dataset_path", + default="/home/url/datasets/kitti/dataset/sequences", + ) + ap.add_argument("--seq", default="00") + ap.add_argument("--max_frames", type=int, default=None) + ap.add_argument( + "--warmup", + type=int, + default=20, + help="Discard the first N frames (cache warmup / TBB thread spin-up).", + ) + args = ap.parse_args() + + velodyne_dir = os.path.join(args.dataset_path, args.seq, "velodyne") + bin_files = sorted(f for f in os.listdir(velodyne_dir) if f.endswith(".bin")) + if args.max_frames is not None: + bin_files = bin_files[: args.max_frames] + if not bin_files: + raise SystemExit(f"No .bin files in {velodyne_dir}") + + estimator = build_estimator(args.method) + print( + f"[bench] method={args.method} seq={args.seq} " + f"frames={len(bin_files)} warmup={args.warmup}" + ) + + per_frame_us = [] + wall_t0 = time.perf_counter() + for i, fname in enumerate(bin_files): + cloud = load_bin(os.path.join(velodyne_dir, fname)) + estimator.estimateGround(cloud) + if i >= args.warmup: + per_frame_us.append(estimator.getTimeTaken()) + wall_dt = time.perf_counter() - wall_t0 + + if not per_frame_us: + raise SystemExit( + "No frames after warmup; lower --warmup or use --max_frames bigger." + ) + + per_frame_ms = sorted(t / 1000.0 for t in per_frame_us) + n = len(per_frame_ms) + median_ms = statistics.median(per_frame_ms) + p95_ms = per_frame_ms[int(0.95 * (n - 1))] + p99_ms = per_frame_ms[int(0.99 * (n - 1))] + mean_ms = statistics.fmean(per_frame_ms) + median_hz = 1000.0 / median_ms + mean_hz = 1000.0 / mean_ms + + print(f"[bench] timed frames : {n}") + print(f"[bench] median time : {median_ms:6.2f} ms ({median_hz:6.1f} Hz)") + print(f"[bench] mean time : {mean_ms:6.2f} ms ({mean_hz:6.1f} Hz)") + print(f"[bench] p95 / p99 time : {p95_ms:6.2f} / {p99_ms:6.2f} ms") + print( + f"[bench] wall (incl. I/O) : {wall_dt:6.2f} s ({len(bin_files) / wall_dt:6.1f} Hz including disk)" + ) + + +if __name__ == "__main__": + main() From 02d574b295d38f5f75b3b2e87263a6e14e3b9347 Mon Sep 17 00:00:00 2001 From: Hyungtae Lim Date: Thu, 21 May 2026 17:21:32 +0900 Subject: [PATCH 2/2] build: make TBB optional in classic Patchwork MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI runners (cpp_api on Ubuntu/macOS/Windows, python_package and cibuildwheel jobs) do not install libtbb-dev, so PR #95's FATAL_ERROR on missing TBB broke 11 of 18 checks. Switch to a soft find: - find_package(TBB CONFIG/MODULE QUIET) — sets TBB_FOUND or not. - When TBB_FOUND, classic Patchwork links TBB::tbb and gets a PATCHWORK_HAS_TBB compile define. - cpp/patchwork/src/patchwork.cpp now guards both the #include and the parallel_for site with #ifdef PATCHWORK_HAS_TBB and falls back to a sequential loop over the same patch-index list when TBB is unavailable. - cpp/CMakeLists.txt prints a STATUS message either way so users know whether they got the 1.73x speedup or not. Tested locally: - With libtbb-dev installed: "-- TBB found — classic Patchwork will use tbb::parallel_for." → builds + runs, matches v1.3.1 numbers. - With -DCMAKE_DISABLE_FIND_PACKAGE_TBB=ON: "-- TBB not found — classic Patchwork falls back to a sequential loop." → builds clean, no TBB symbols required. Patchwork++ remains untouched (issue #96). --- cpp/CMakeLists.txt | 23 +++++++++++++++-------- cpp/patchwork/CMakeLists.txt | 6 +++++- cpp/patchwork/src/patchwork.cpp | 26 ++++++++++++++++++-------- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3a65991..d69fadf 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -40,18 +40,25 @@ endfunction() find_external_dependency("Eigen3" "Eigen3::Eigen" "${CMAKE_CURRENT_LIST_DIR}/cmake/eigen.cmake") -# Intel oneTBB / TBB for thread-parallel per-patch ground segmentation. -# Required by both `cpp/patchwork/` (classic Patchwork) and `cpp/patchworkpp/`. +# Intel oneTBB / TBB — OPTIONAL. The classic Patchwork main loop +# uses tbb::parallel_for when available (1.73x speedup on KITTI) and +# falls back to a sequential loop otherwise. Patchwork++ does not use +# TBB; see issue #96 for the measurement that justifies that decision. +# +# To enable the speedup locally: +# Ubuntu/Debian: apt install libtbb-dev +# macOS: brew install tbb +# Windows: vcpkg install tbb (or use oneAPI Base Toolkit) find_package(TBB CONFIG QUIET) if (NOT TBB_FOUND) find_package(TBB MODULE QUIET) endif() -if (NOT TBB_FOUND) - message(FATAL_ERROR - "TBB not found. Install Intel oneTBB:\n" - " Ubuntu/Debian: apt install libtbb-dev\n" - " macOS: brew install tbb\n" - " Windows: vcpkg install tbb (or use oneAPI Base Toolkit)") +if (TBB_FOUND) + message(STATUS "TBB found — classic Patchwork will use tbb::parallel_for.") +else() + message(STATUS + "TBB not found — classic Patchwork falls back to a sequential loop. " + "Install libtbb-dev / brew install tbb / vcpkg install tbb to get the parallel speedup.") endif() # Parameters in `patchworkpp` subdirectory. diff --git a/cpp/patchwork/CMakeLists.txt b/cpp/patchwork/CMakeLists.txt index 2b6b7e7..59d281b 100644 --- a/cpp/patchwork/CMakeLists.txt +++ b/cpp/patchwork/CMakeLists.txt @@ -11,7 +11,11 @@ target_include_directories(${CLASSIC_TARGET} PUBLIC $ $ ) -target_link_libraries(${CLASSIC_TARGET} Eigen3::Eigen ground_seg_common ground_seg_cores TBB::tbb) +target_link_libraries(${CLASSIC_TARGET} Eigen3::Eigen ground_seg_common ground_seg_cores) +if (TBB_FOUND) + target_link_libraries(${CLASSIC_TARGET} TBB::tbb) + target_compile_definitions(${CLASSIC_TARGET} PUBLIC PATCHWORK_HAS_TBB) +endif() add_library(${PARENT_PROJECT_NAME}::${CLASSIC_TARGET} ALIAS ${CLASSIC_TARGET}) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ diff --git a/cpp/patchwork/src/patchwork.cpp b/cpp/patchwork/src/patchwork.cpp index 6a99776..f70a9a1 100644 --- a/cpp/patchwork/src/patchwork.cpp +++ b/cpp/patchwork/src/patchwork.cpp @@ -6,8 +6,10 @@ #include #include +#ifdef PATCHWORK_HAS_TBB #include #include +#endif #include "patchwork/plane_fit.h" @@ -405,17 +407,25 @@ void PatchWork::estimateGround(const Eigen::MatrixXf& cloud) { const int num_patches = static_cast(patch_indices.size()); std::vector outcomes(num_patches); + auto process_patch_range = [&](int begin, int end) { + for (int k = begin; k < end; ++k) { + const auto& [z, r, s] = patch_indices[k]; + const auto& patch = regionwise_patches_[z][r][s]; + auto& out = outcomes[k]; + out.patch_ref = &patch; + perform_regionwise_segmentation( + z, r, patch, out.patch_ground, out.patch_nonground, out.status); + } + }; + +#ifdef PATCHWORK_HAS_TBB tbb::parallel_for(tbb::blocked_range(0, num_patches), [&](const tbb::blocked_range& range) { - for (int k = range.begin(); k < range.end(); ++k) { - const auto& [z, r, s] = patch_indices[k]; - const auto& patch = regionwise_patches_[z][r][s]; - auto& out = outcomes[k]; - out.patch_ref = &patch; - perform_regionwise_segmentation( - z, r, patch, out.patch_ground, out.patch_nonground, out.status); - } + process_patch_range(range.begin(), range.end()); }); +#else + process_patch_range(0, num_patches); +#endif ground_pts_.clear(); nonground_pts_.clear();