From f922c55378df7b99fd8a108fe6ba01f6e315247e Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sat, 27 Jun 2026 23:21:55 +1000 Subject: [PATCH 1/8] Perf framework: align with .NET Azure.Test.Perf (Option A parity) Brings the C++ perf framework (sdk/core/perf) and Storage Blob perf tests to format / contract parity with the .NET Azure.Test.Perf reference, which the cross-language perf-automation pipeline keys off. Framework (sdk/core/perf) ------------------------- * New per-op latency collector (latency_stats.{hpp,cpp}) emitting the .NET 8-percentile distribution: 50 / 75 / 90 / 99 / 99.9 / 99.99 / 99.999 / 100 with the exact `=== Latency Distribution ===` header and `{pct,7:N3}% {ms,8:N2}ms` row format. * New CPU + memory sampler (process_stats.{hpp,cpp}); the throughput `Completed N operations ... (Y ops/s, Z s/op, P% CPU)` line now includes inline CPU like .NET while preserving the existing `(... ops/s` substring that downstream Cpp.cs regex relies on. * New result_output.{hpp,cpp}: - `--results-file` writes `[{ "Time": , "Size": }, ...]` matching .NET OperationResult schema (PascalCase, Size = -1 when test has no SizeOptions). - `--statistics` / `--job-statistics` wraps a `BenchmarkOutput` envelope between `#StartJobStatistics` and `#EndJobStatistics` with Metadata before Measurements (key order matches .NET). - Timestamp emitted at 100-nanosecond (7-digit) resolution like .NET DateTime.ToString("O"). * New versions.{hpp,cpp} printing a `=== Versions ===` block as the last thing emitted by the run (matches .NET ordering). * New options: `--status-interval`, `--results-file`, `--sync` (all present in .NET PerfOptions). * New non-breaking CLI aliases matching .NET names: - `--job-statistics` (bare switch) alongside existing `--statistics <0|1>` - `--no-cleanup` (bare switch) alongside existing `--noclean <0|1>` * GTest coverage for latency, process_stats, circular_stream, and result_output (9 tests, all passing). Storage Blob perf tests (sdk/storage/azure-storage-blobs/test/perf) ------------------------------------------------------------------- * New blob-test flags aligning the C++ UploadBlob / DownloadBlob / ListBlob scenarios with the .NET / Go test surface: - `--upload-method` (buffer | stream | single) - `--download-method` (buffer | stream) - `--block-size`, `--concurrency`, `--num-blobs`, `--page-size` * Memory-budget guard (memory_budget.hpp) prevents OOM in buffer-mode tests at multi-GiB payloads. Verification ------------ Built MinSizeRel on Windows / VS 2026 with vcpkg x64-windows-static (curl, openssl, gtest). All 9 unit tests pass. A live perf run against the `euap` storage account using AzureCliCredential produced output that diffs byte-clean against an equivalent .NET Azure.Storage.Blobs.Perf run for every contract emitted by this change (latency distribution header / rows, throughput line shape including `% CPU`, BenchmarkOutput JSON shape and key order, timestamp precision, Versions-block ordering, results-file schema). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/core/perf/CMakeLists.txt | 8 + sdk/core/perf/inc/azure/perf.hpp | 5 + .../perf/inc/azure/perf/latency_stats.hpp | 105 +++++++ sdk/core/perf/inc/azure/perf/options.hpp | 22 ++ .../perf/inc/azure/perf/process_stats.hpp | 107 +++++++ .../perf/inc/azure/perf/result_output.hpp | 87 ++++++ sdk/core/perf/inc/azure/perf/versions.hpp | 30 ++ sdk/core/perf/src/arg_parser.cpp | 22 ++ sdk/core/perf/src/latency_stats.cpp | 126 +++++++++ sdk/core/perf/src/options.cpp | 24 +- sdk/core/perf/src/process_stats.cpp | 264 ++++++++++++++++++ sdk/core/perf/src/program.cpp | 264 +++++++++++++++--- sdk/core/perf/src/result_output.cpp | 105 +++++++ sdk/core/perf/src/versions.cpp | 57 ++++ sdk/core/perf/test/CMakeLists.txt | 3 + sdk/core/perf/test/src/latency_stats_test.cpp | 82 ++++++ sdk/core/perf/test/src/process_stats_test.cpp | 45 +++ sdk/core/perf/test/src/result_output_test.cpp | 93 ++++++ sdk/storage/azure-storage-blobs/CHANGELOG.md | 1 + .../azure-storage-blobs/perf-tests.yml | 15 + .../test/perf/CMakeLists.txt | 1 + .../storage/blobs/test/download_blob_test.hpp | 92 +++++- .../storage/blobs/test/list_blob_test.hpp | 30 +- .../storage/blobs/test/memory_budget.hpp | 103 +++++++ .../storage/blobs/test/upload_blob_test.hpp | 77 ++++- 25 files changed, 1705 insertions(+), 63 deletions(-) create mode 100644 sdk/core/perf/inc/azure/perf/latency_stats.hpp create mode 100644 sdk/core/perf/inc/azure/perf/process_stats.hpp create mode 100644 sdk/core/perf/inc/azure/perf/result_output.hpp create mode 100644 sdk/core/perf/inc/azure/perf/versions.hpp create mode 100644 sdk/core/perf/src/latency_stats.cpp create mode 100644 sdk/core/perf/src/process_stats.cpp create mode 100644 sdk/core/perf/src/result_output.cpp create mode 100644 sdk/core/perf/src/versions.cpp create mode 100644 sdk/core/perf/test/src/latency_stats_test.cpp create mode 100644 sdk/core/perf/test/src/process_stats_test.cpp create mode 100644 sdk/core/perf/test/src/result_output_test.cpp create mode 100644 sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp diff --git a/sdk/core/perf/CMakeLists.txt b/sdk/core/perf/CMakeLists.txt index 881774cb98..81ae94ea1b 100644 --- a/sdk/core/perf/CMakeLists.txt +++ b/sdk/core/perf/CMakeLists.txt @@ -18,21 +18,29 @@ set( inc/azure/perf/argagg.hpp inc/azure/perf/base_test.hpp inc/azure/perf/dynamic_test_options.hpp + inc/azure/perf/latency_stats.hpp inc/azure/perf/options.hpp + inc/azure/perf/process_stats.hpp inc/azure/perf/program.hpp inc/azure/perf/random_stream.hpp + inc/azure/perf/result_output.hpp inc/azure/perf/test.hpp inc/azure/perf/test_metadata.hpp inc/azure/perf/test_options.hpp + inc/azure/perf/versions.hpp ) set( AZURE_PERFORMANCE_SOURCE src/arg_parser.cpp src/base_test.cpp + src/latency_stats.cpp src/options.cpp + src/process_stats.cpp src/program.cpp src/random_stream.cpp + src/result_output.cpp + src/versions.cpp ) add_library(azure-perf ${AZURE_PERFORMANCE_HEADER} ${AZURE_PERFORMANCE_SOURCE}) diff --git a/sdk/core/perf/inc/azure/perf.hpp b/sdk/core/perf/inc/azure/perf.hpp index 0c9d154292..bf815b4e78 100644 --- a/sdk/core/perf/inc/azure/perf.hpp +++ b/sdk/core/perf/inc/azure/perf.hpp @@ -12,8 +12,13 @@ #include "azure/perf/argagg.hpp" #include "azure/perf/base_test.hpp" #include "azure/perf/dynamic_test_options.hpp" +#include "azure/perf/latency_stats.hpp" #include "azure/perf/options.hpp" +#include "azure/perf/process_stats.hpp" #include "azure/perf/program.hpp" +#include "azure/perf/random_stream.hpp" +#include "azure/perf/result_output.hpp" #include "azure/perf/test.hpp" #include "azure/perf/test_metadata.hpp" #include "azure/perf/test_options.hpp" +#include "azure/perf/versions.hpp" diff --git a/sdk/core/perf/inc/azure/perf/latency_stats.hpp b/sdk/core/perf/inc/azure/perf/latency_stats.hpp new file mode 100644 index 0000000000..d3ce093c4f --- /dev/null +++ b/sdk/core/perf/inc/azure/perf/latency_stats.hpp @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * @file + * @brief Per-operation latency collector and percentile summary. + * + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Azure { namespace Perf { + + /** + * @brief Thread-safe collector of per-operation latency samples. + * + * @remark Records nanosecond-resolution durations from many worker threads and computes + * percentile summaries (p50/p90/p95/p99/max) on demand. Designed to match the latency + * reporting added in the Go perf framework so cross-language results are comparable. + * + */ + class LatencyCollector { + public: + /** + * @brief A single latency sample, optionally tagged by call type. + * + */ + struct Sample + { + std::chrono::nanoseconds Duration{0}; + std::string CallType; + }; + + /** + * @brief Latency summary expressed in milliseconds, matching the .NET + * `Azure.Test.Perf` percentile distribution: 50, 75, 90, 99, 99.9, 99.99, 99.999, 100. + * + */ + struct Summary + { + uint64_t Count = 0; + double P50Ms = 0; + double P75Ms = 0; + double P90Ms = 0; + double P99Ms = 0; + double P999Ms = 0; + double P9999Ms = 0; + double P99999Ms = 0; + double P100Ms = 0; + double MeanMs = 0; + }; + + /** + * @brief Record a single latency sample with no call-type tag. + * + * @param duration The latency to record. + */ + void Record(std::chrono::nanoseconds duration); + + /** + * @brief Record a single latency sample tagged with a call type. + * + * @param callType A short label for the operation (e.g. "Upload"). + * @param duration The latency to record. + */ + void Record(std::string const& callType, std::chrono::nanoseconds duration); + + /** + * @brief Clear all recorded samples. + * + */ + void Reset(); + + /** + * @brief Compute the summary over all recorded samples. + * + * @return The percentile summary. + */ + Summary Summarize() const; + + /** + * @brief Compute summaries grouped by call type. + * + * @return A vector of (callType, summary) pairs, sorted by callType. + */ + std::vector> SummarizeByCallType() const; + + /** + * @brief Snapshot all recorded samples (copy). + * + */ + std::vector Samples() const; + + private: + mutable std::mutex m_mutex; + std::vector m_samples; + }; + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/inc/azure/perf/options.hpp b/sdk/core/perf/inc/azure/perf/options.hpp index ab9d87016b..2629afd15c 100644 --- a/sdk/core/perf/inc/azure/perf/options.hpp +++ b/sdk/core/perf/inc/azure/perf/options.hpp @@ -108,6 +108,28 @@ namespace Azure { namespace Perf { */ std::vector TestProxies; + /** + * @brief Interval in seconds between live status lines printed during a run. + * + */ + int StatusInterval = 1; + + /** + * @brief When set, write a per-operation results file (JSON) containing the per-op + * latency (ms) and the per-op size (bytes) for every measured operation, matching the + * .NET `OperationResult { Time, Size }` schema. + * + * @remark Only populated when #Latency is also enabled. + * + */ + std::string ResultsFile; + + /** + * @brief Runs the sync version of the test. Not currently implemented for C++. + * + */ + bool Sync = false; + /** * @brief Create an array of the performance framework options. * diff --git a/sdk/core/perf/inc/azure/perf/process_stats.hpp b/sdk/core/perf/inc/azure/perf/process_stats.hpp new file mode 100644 index 0000000000..440f0e9392 --- /dev/null +++ b/sdk/core/perf/inc/azure/perf/process_stats.hpp @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * @file + * @brief Cross-platform CPU and resident-memory sampler for the perf framework. + * + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Azure { namespace Perf { + + /** + * @brief Periodically samples process-wide CPU% and resident memory in a background + * thread. Snapshots the running average and the last instantaneous values. + * + * @remark Designed to match the always-on sampler added in the Go perf framework, so + * the live status line and the run summary expose `CPU` (percent) and `Memory(MiB)` + * columns across all language SDKs. + * + */ + class ProcessStatsSampler { + public: + /** + * @brief A point-in-time snapshot of CPU usage and resident memory. + * + */ + struct Snapshot + { + /// CPU percent of all cores combined, e.g. 250.0 means 2.5 cores busy. Never negative. + double CpuPercent = 0.0; + /// Resident memory in bytes (Working Set / RSS). + uint64_t MemoryBytes = 0; + }; + + /** + * @brief Construct a sampler with a fixed sample interval. + * + * @param interval Time between samples. Defaults to 1 second. + */ + explicit ProcessStatsSampler(std::chrono::milliseconds interval = std::chrono::seconds(1)); + + ~ProcessStatsSampler(); + + ProcessStatsSampler(ProcessStatsSampler const&) = delete; + ProcessStatsSampler& operator=(ProcessStatsSampler const&) = delete; + + /** + * @brief Start sampling in a background thread. Safe to call multiple times; later + * calls are no-ops while a sampler thread is running. + */ + void Start(); + + /** + * @brief Stop sampling. Joins the background thread. Safe to call multiple times. + */ + void Stop(); + + /** + * @brief Get the most recent sample (CPU percent, memory bytes). + * + */ + Snapshot Latest() const; + + /** + * @brief Get the average CPU% and average memory bytes across all samples taken so + * far. CPU% is computed from cumulative CPU-seconds against wall-clock seconds; memory + * is the arithmetic mean of all samples. + * + */ + Snapshot Average() const; + + /** + * @brief Reset all accumulated samples. Useful between iterations. + * + */ + void Reset(); + + private: + void Run(); + static double SampleCpuSeconds(); + static uint64_t SampleResidentMemoryBytes(); + + std::chrono::milliseconds m_interval; + std::atomic m_stop{false}; + std::thread m_thread; + + mutable std::mutex m_mutex; + // Sampling state + bool m_haveBaseline = false; + double m_baselineCpuSeconds = 0.0; + std::chrono::steady_clock::time_point m_startTime; + Snapshot m_latest; + // Running averages + uint64_t m_sampleCount = 0; + double m_memoryBytesSum = 0.0; + double m_lastCpuSeconds = 0.0; + }; + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/inc/azure/perf/result_output.hpp b/sdk/core/perf/inc/azure/perf/result_output.hpp new file mode 100644 index 0000000000..c9e6978efb --- /dev/null +++ b/sdk/core/perf/inc/azure/perf/result_output.hpp @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * @file + * @brief Run-summary helpers: `--results-file` writer and `#StartJobStatistics` printer. + * + */ + +#pragma once + +#include "azure/perf/latency_stats.hpp" +#include "azure/perf/process_stats.hpp" + +#include +#include +#include + +namespace Azure { namespace Perf { + + /** + * @brief A consolidated run summary used by the framework. Fields mirror the data + * already printed in the .NET reference framework's results block. + * + */ + struct RunSummary + { + std::string TestName; + int Parallel = 1; + int DurationSeconds = 0; + int Warmup = 0; + int Iterations = 1; + uint64_t TotalOperations = 0; + double WeightedAverageSeconds = 0; + double OperationsPerSecond = 0; + double SecondsPerOperation = 0; + double AverageCpuPercent = 0; + uint64_t AverageMemoryBytes = 0; + LatencyCollector::Summary Latency; + std::vector> LatencyByCallType; + }; + + /** + * @brief A single per-operation result, matching the .NET + * `Azure.Test.Perf.OperationResult { Time, Size }` schema. + * + * `Time` is the operation latency in milliseconds; `Size` is the operation size in + * bytes (or -1 if the test does not have a meaningful size). + * + */ + struct OperationResult + { + double Time = 0; + int64_t Size = -1; + }; + + /** + * @brief Write per-operation results to `path` as a JSON array of + * `OperationResult { Time, Size }` objects, matching the .NET `--results-file` output + * shape. + * + * @param path Destination file. + * @param results The per-operation samples to write. + */ + void WriteResultsFile(std::string const& path, std::vector const& results); + + /** + * @brief Print the `#StartJobStatistics`/`#EndJobStatistics` JSON block consumed by the + * perf-automation tool. + * + * @details The payload matches the .NET reference framework's `BenchmarkOutput` + * envelope: + * ``` + * { "Metadata": [ + * {"Source","Name","ShortDescription","LongDescription","Format"} + * ], + * "Measurements": [ + * {"Timestamp","Name","Value"} + * ] + * } + * ``` + * + * @param summary The run summary to serialize. + */ + void PrintJobStatistics(RunSummary const& summary); + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/inc/azure/perf/versions.hpp b/sdk/core/perf/inc/azure/perf/versions.hpp new file mode 100644 index 0000000000..ab82890e91 --- /dev/null +++ b/sdk/core/perf/inc/azure/perf/versions.hpp @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * @file + * @brief Print the `=== Versions ===` block at end of a run. + * + */ + +#pragma once + +#include +#include +#include + +namespace Azure { namespace Perf { + + /** + * @brief Print the `=== Versions ===` block to stdout, mirroring the Go perf framework + * output. Lists the compiler/toolchain, optional CMake-injected vcpkg port versions, + * and any caller-supplied package versions. + * + * @param injectedVersions Additional `(name, version)` pairs to include in the block. + * Storage perf executables already print their own `VCPKG_..._VERSION` lines for the + * perf-automation tool; pass anything else worth recording here. + */ + void PrintVersionsBlock( + std::vector> const& injectedVersions = {}); + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/src/arg_parser.cpp b/sdk/core/perf/src/arg_parser.cpp index ecb4cc461e..a674e70f98 100644 --- a/sdk/core/perf/src/arg_parser.cpp +++ b/sdk/core/perf/src/arg_parser.cpp @@ -70,6 +70,11 @@ Azure::Perf::GlobalTestOptions Azure::Perf::Program::ArgParser::Parse( { options.JobStatistics = parsedArgs["JobStatistics"].as(); } + // .NET-compatible bare-switch alias --job-statistics; presence implies true. + if (parsedArgs["JobStatisticsSwitch"]) + { + options.JobStatistics = true; + } if (parsedArgs["Latency"]) { options.Latency = parsedArgs["Latency"].as(); @@ -78,6 +83,11 @@ Azure::Perf::GlobalTestOptions Azure::Perf::Program::ArgParser::Parse( { options.NoCleanup = parsedArgs["NoCleanup"].as(); } + // .NET-compatible bare-switch alias --no-cleanup; presence implies true. + if (parsedArgs["NoCleanupSwitch"]) + { + options.NoCleanup = true; + } if (parsedArgs["Parallel"]) { options.Parallel = parsedArgs["Parallel"]; @@ -103,6 +113,18 @@ Azure::Perf::GlobalTestOptions Azure::Perf::Program::ArgParser::Parse( options.TestProxies.push_back(proxy); } } + if (parsedArgs["StatusInterval"]) + { + options.StatusInterval = parsedArgs["StatusInterval"]; + } + if (parsedArgs["ResultsFile"]) + { + options.ResultsFile = parsedArgs["ResultsFile"].as(); + } + if (parsedArgs["Sync"]) + { + options.Sync = true; + } return options; } diff --git a/sdk/core/perf/src/latency_stats.cpp b/sdk/core/perf/src/latency_stats.cpp new file mode 100644 index 0000000000..015ce192a5 --- /dev/null +++ b/sdk/core/perf/src/latency_stats.cpp @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "azure/perf/latency_stats.hpp" + +#include +#include + +namespace { + +double NanosToMs(std::chrono::nanoseconds ns) +{ + return std::chrono::duration(ns).count(); +} + +// Compute the value at percentile `p` (0..100) using nearest-rank. +// `sortedMs` must be sorted ascending. Returns 0 for an empty input. +double Percentile(std::vector const& sortedMs, double p) +{ + if (sortedMs.empty()) + { + return 0.0; + } + if (sortedMs.size() == 1) + { + return sortedMs.front(); + } + double rank = (p / 100.0) * static_cast(sortedMs.size() - 1); + size_t lo = static_cast(rank); + size_t hi = (lo + 1 < sortedMs.size()) ? lo + 1 : lo; + double frac = rank - static_cast(lo); + return sortedMs[lo] * (1.0 - frac) + sortedMs[hi] * frac; +} + +Azure::Perf::LatencyCollector::Summary SummaryFromMs(std::vector& msValues) +{ + Azure::Perf::LatencyCollector::Summary s; + s.Count = msValues.size(); + if (msValues.empty()) + { + return s; + } + std::sort(msValues.begin(), msValues.end()); + double sum = 0; + for (auto v : msValues) + { + sum += v; + } + s.MeanMs = sum / static_cast(msValues.size()); + s.P50Ms = Percentile(msValues, 50); + s.P75Ms = Percentile(msValues, 75); + s.P90Ms = Percentile(msValues, 90); + s.P99Ms = Percentile(msValues, 99); + s.P999Ms = Percentile(msValues, 99.9); + s.P9999Ms = Percentile(msValues, 99.99); + s.P99999Ms = Percentile(msValues, 99.999); + s.P100Ms = msValues.back(); + return s; +} + +} // namespace + +namespace Azure { namespace Perf { + + void LatencyCollector::Record(std::chrono::nanoseconds duration) + { + std::lock_guard lock(m_mutex); + m_samples.push_back(Sample{duration, std::string{}}); + } + + void LatencyCollector::Record(std::string const& callType, std::chrono::nanoseconds duration) + { + std::lock_guard lock(m_mutex); + m_samples.push_back(Sample{duration, callType}); + } + + void LatencyCollector::Reset() + { + std::lock_guard lock(m_mutex); + m_samples.clear(); + } + + LatencyCollector::Summary LatencyCollector::Summarize() const + { + std::vector snapshot; + { + std::lock_guard lock(m_mutex); + snapshot = m_samples; + } + std::vector msValues; + msValues.reserve(snapshot.size()); + for (auto const& s : snapshot) + { + msValues.push_back(NanosToMs(s.Duration)); + } + return SummaryFromMs(msValues); + } + + std::vector> + LatencyCollector::SummarizeByCallType() const + { + std::vector snapshot; + { + std::lock_guard lock(m_mutex); + snapshot = m_samples; + } + std::map> buckets; + for (auto const& s : snapshot) + { + buckets[s.CallType].push_back(NanosToMs(s.Duration)); + } + std::vector> result; + for (auto& kv : buckets) + { + result.emplace_back(kv.first, SummaryFromMs(kv.second)); + } + return result; + } + + std::vector LatencyCollector::Samples() const + { + std::lock_guard lock(m_mutex); + return m_samples; + } + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/src/options.cpp b/sdk/core/perf/src/options.cpp index 44921c84dc..c666711faf 100644 --- a/sdk/core/perf/src/options.cpp +++ b/sdk/core/perf/src/options.cpp @@ -16,7 +16,10 @@ void Azure::Perf::to_json(Azure::Core::Json::_internal::json& j, const GlobalTes {"Latency", p.Latency}, {"NoCleanup", p.NoCleanup}, {"Parallel", p.Parallel}, - {"Warmup", p.Warmup}}; + {"Warmup", p.Warmup}, + {"StatusInterval", p.StatusInterval}, + {"ResultsFile", p.ResultsFile.empty() ? "N/A" : p.ResultsFile}, + {"Sync", p.Sync}}; if (p.Port) { j["Port"] = p.Port.Value(); @@ -73,11 +76,22 @@ std::vector Azure::Perf::GlobalTestOptions::GetOptionMe "Number of iterations of main test loop. Default to 1.", 1}, {"JobStatistics", {"--statistics"}, "Print job statistics. Default to false", 1}, + // .NET-compatible bare-switch alias for --statistics. When present, sets + // JobStatistics=true regardless of any --statistics value parsed. + {"JobStatisticsSwitch", + {"--job-statistics"}, + "Print job statistics (bare switch, matches .NET --job-statistics).", + 0}, {"Latency", {"-l", "--latency"}, "Track and print per-operation latency statistics. Default to false.", 1}, {"NoCleanup", {"--noclean"}, "Disables test clean up. Default to false.", 1}, + // .NET-compatible bare-switch alias for --noclean. + {"NoCleanupSwitch", + {"--no-cleanup"}, + "Disables test clean up (bare switch, matches .NET --no-cleanup).", + 0}, {"Parallel", {"-p", "--parallel"}, "Number of operations to execute in parallel. Default to 1.", @@ -88,5 +102,13 @@ std::vector Azure::Perf::GlobalTestOptions::GetOptionMe {"Sync", {"-y", "--sync"}, "Runs sync version of test, not implemented", 0}, {"TestProxies", {"-x", "--test-proxies"}, "URIs of TestProxy Servers (separated by ';')", 1}, {"Warmup", {"-w", "--warmup"}, "Duration of warmup in seconds. Default to 5 seconds.", 1}, + {"StatusInterval", + {"--status-interval"}, + "Interval in seconds between live status lines. Default to 1.", + 1}, + {"ResultsFile", + {"--results-file"}, + "Write per-operation results ({Time, Size}) as JSON to this file. Requires --latency.", + 1}, }; } diff --git a/sdk/core/perf/src/process_stats.cpp b/sdk/core/perf/src/process_stats.cpp new file mode 100644 index 0000000000..eee159b341 --- /dev/null +++ b/sdk/core/perf/src/process_stats.cpp @@ -0,0 +1,264 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "azure/perf/process_stats.hpp" + +#include + +#if defined(AZ_PLATFORM_WINDOWS) +#if !defined(WIN32_LEAN_AND_MEAN) +#define WIN32_LEAN_AND_MEAN +#endif +#if !defined(NOMINMAX) +#define NOMINMAX +#endif +#include +// psapi.h must follow windows.h +#include +#elif defined(AZ_PLATFORM_LINUX) +#include +#include +#include +#include +#elif defined(AZ_PLATFORM_MAC) +#include +#include +#endif + +#include + +namespace Azure { namespace Perf { + + ProcessStatsSampler::ProcessStatsSampler(std::chrono::milliseconds interval) + : m_interval(interval) + { + } + + ProcessStatsSampler::~ProcessStatsSampler() { Stop(); } + + void ProcessStatsSampler::Start() + { + if (m_thread.joinable()) + { + return; + } + m_stop.store(false); + { + std::lock_guard lock(m_mutex); + m_startTime = std::chrono::steady_clock::now(); + m_baselineCpuSeconds = SampleCpuSeconds(); + m_haveBaseline = true; + m_lastCpuSeconds = m_baselineCpuSeconds; + m_sampleCount = 0; + m_memoryBytesSum = 0.0; + m_latest = Snapshot{}; + } + m_thread = std::thread(&ProcessStatsSampler::Run, this); + } + + void ProcessStatsSampler::Stop() + { + if (!m_thread.joinable()) + { + return; + } + m_stop.store(true); + m_thread.join(); + } + + void ProcessStatsSampler::Run() + { + auto previousCpuSeconds = m_baselineCpuSeconds; + auto previousTime = m_startTime; + while (!m_stop.load()) + { + std::this_thread::sleep_for(m_interval); + auto now = std::chrono::steady_clock::now(); + double cpuSeconds = SampleCpuSeconds(); + uint64_t mem = SampleResidentMemoryBytes(); + + double wall + = std::chrono::duration(now - previousTime).count(); + double cpuDelta = cpuSeconds - previousCpuSeconds; + // Clamp to avoid negative readings if a counter is non-monotonic on some platforms. + double cpuPct = (wall > 0) ? (std::max)(0.0, (cpuDelta / wall) * 100.0) : 0.0; + + { + std::lock_guard lock(m_mutex); + m_latest.CpuPercent = cpuPct; + m_latest.MemoryBytes = mem; + m_sampleCount += 1; + m_memoryBytesSum += static_cast(mem); + m_lastCpuSeconds = cpuSeconds; + } + previousCpuSeconds = cpuSeconds; + previousTime = now; + } + } + + ProcessStatsSampler::Snapshot ProcessStatsSampler::Latest() const + { + std::lock_guard lock(m_mutex); + return m_latest; + } + + ProcessStatsSampler::Snapshot ProcessStatsSampler::Average() const + { + std::lock_guard lock(m_mutex); + Snapshot avg; + auto now = std::chrono::steady_clock::now(); + double wall = std::chrono::duration(now - m_startTime).count(); + if (m_haveBaseline && wall > 0) + { + double cpuDelta = m_lastCpuSeconds - m_baselineCpuSeconds; + avg.CpuPercent = (std::max)(0.0, (cpuDelta / wall) * 100.0); + } + if (m_sampleCount > 0) + { + avg.MemoryBytes + = static_cast(m_memoryBytesSum / static_cast(m_sampleCount)); + } + return avg; + } + + void ProcessStatsSampler::Reset() + { + std::lock_guard lock(m_mutex); + m_startTime = std::chrono::steady_clock::now(); + m_baselineCpuSeconds = SampleCpuSeconds(); + m_haveBaseline = true; + m_lastCpuSeconds = m_baselineCpuSeconds; + m_sampleCount = 0; + m_memoryBytesSum = 0.0; + m_latest = Snapshot{}; + } + +#if defined(AZ_PLATFORM_WINDOWS) + double ProcessStatsSampler::SampleCpuSeconds() + { + FILETIME creation, exitTime, kernel, user; + if (!GetProcessTimes(GetCurrentProcess(), &creation, &exitTime, &kernel, &user)) + { + return 0.0; + } + auto toSeconds = [](FILETIME const& ft) { + ULARGE_INTEGER u; + u.LowPart = ft.dwLowDateTime; + u.HighPart = ft.dwHighDateTime; + // FILETIME is in 100-ns units. + return static_cast(u.QuadPart) / 1.0e7; + }; + return toSeconds(kernel) + toSeconds(user); + } + + uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() + { + PROCESS_MEMORY_COUNTERS pmc; + if (!GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc))) + { + return 0; + } + return static_cast(pmc.WorkingSetSize); + } +#elif defined(AZ_PLATFORM_LINUX) + double ProcessStatsSampler::SampleCpuSeconds() + { + std::ifstream stat("/proc/self/stat"); + if (!stat.is_open()) + { + return 0.0; + } + std::string content; + std::getline(stat, content); + // /proc/self/stat fields are space-separated, but the second field (comm) may contain + // spaces and is wrapped in parentheses. Skip past it before splitting. + auto rp = content.rfind(')'); + if (rp == std::string::npos) + { + return 0.0; + } + std::istringstream iss(content.substr(rp + 1)); + std::string token; + // After the ')', the next field is field 3 ('state'); CPU times are fields 14 (utime) + // and 15 (stime), i.e. tokens 12 and 13 (0-indexed) of the remainder. + unsigned long utime = 0, stime = 0; + for (int i = 0; i < 14; ++i) + { + if (!(iss >> token)) + { + return 0.0; + } + if (i == 11) + { + utime = std::stoul(token); + } + else if (i == 12) + { + stime = std::stoul(token); + } + } + long hz = sysconf(_SC_CLK_TCK); + if (hz <= 0) + { + hz = 100; + } + return static_cast(utime + stime) / static_cast(hz); + } + + uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() + { + std::ifstream status("/proc/self/status"); + if (!status.is_open()) + { + return 0; + } + std::string line; + while (std::getline(status, line)) + { + if (line.rfind("VmRSS:", 0) == 0) + { + std::istringstream iss(line.substr(6)); + unsigned long kb = 0; + std::string unit; + iss >> kb >> unit; + return static_cast(kb) * 1024ULL; + } + } + return 0; + } +#elif defined(AZ_PLATFORM_MAC) + double ProcessStatsSampler::SampleCpuSeconds() + { + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) != 0) + { + return 0.0; + } + double user = static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) / 1.0e6; + double sys = static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) / 1.0e6; + return user + sys; + } + + uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() + { + mach_task_basic_info info; + mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT; + if (task_info( + mach_task_self(), + MACH_TASK_BASIC_INFO, + reinterpret_cast(&info), + &count) + != KERN_SUCCESS) + { + return 0; + } + return static_cast(info.resident_size); + } +#else + double ProcessStatsSampler::SampleCpuSeconds() { return 0.0; } + uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() { return 0; } +#endif + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/src/program.cpp b/sdk/core/perf/src/program.cpp index fac6dec3f7..cd864e4d3a 100644 --- a/sdk/core/perf/src/program.cpp +++ b/sdk/core/perf/src/program.cpp @@ -4,15 +4,22 @@ #include "azure/perf/program.hpp" #include "azure/perf/argagg.hpp" +#include "azure/perf/latency_stats.hpp" +#include "azure/perf/process_stats.hpp" +#include "azure/perf/result_output.hpp" +#include "azure/perf/versions.hpp" #include #include #include #include +#include #include #include +#include #include +#include #include namespace { @@ -151,13 +158,23 @@ inline void RunLoop( uint64_t& completedOperations, std::chrono::nanoseconds& lastCompletionTimes, bool latency, + Azure::Perf::LatencyCollector* latencyCollector, bool& isCancelled) { - (void)latency; auto start = std::chrono::system_clock::now(); while (!isCancelled) { - test.Run(context); + if (latency && latencyCollector != nullptr) + { + auto opStart = std::chrono::steady_clock::now(); + test.Run(context); + auto opEnd = std::chrono::steady_clock::now(); + latencyCollector->Record(opEnd - opStart); + } + else + { + test.Run(context); + } completedOperations += 1; lastCompletionTimes = std::chrono::system_clock::now() - start; } @@ -224,61 +241,94 @@ inline void RunTests( std::vector> const& tests, Azure::Perf::GlobalTestOptions const& options, std::string const& title, + Azure::Perf::ProcessStatsSampler* sampler, + Azure::Perf::LatencyCollector* latencyCollector, + Azure::Perf::RunSummary* outSummary, bool warmup = false) { - (void)title; auto parallelTestsCount = options.Parallel; auto durationInSeconds = warmup ? options.Warmup : options.Duration; - // auto jobStatistics = warmup ? false : options.JobStatistics; - // auto latency = warmup ? false : options.Latency; + auto recordLatency = warmup ? false : options.Latency; std::vector completedOperations(parallelTestsCount); std::vector lastCompletionTimes(parallelTestsCount); + // Per-iteration reset: clear the latency collector so each iteration produces an + // independent summary, matching the Go perf-framework lifecycle. + if (recordLatency && latencyCollector != nullptr) + { + latencyCollector->Reset(); + } + if (!warmup && sampler != nullptr) + { + sampler->Reset(); + } + /********************* Progress Reporter ******************************/ Azure::Core::Context progressToken; uint64_t lastCompleted = 0; - auto progressThread = std::thread( - [&title, &completedOperations, &lastCompletionTimes, &lastCompleted, &progressToken]() { - std::cout << std::endl - << "=== " << title << " ===" << std::endl - << "Current\t\tTotal\t\tAverage" << std::endl; - while (!progressToken.IsCancelled()) - { - using namespace std::chrono_literals; - std::this_thread::sleep_for(1000ms); - auto total = Sum(completedOperations); - auto current = total - lastCompleted; - auto avg = Sum(ZipAvg(completedOperations, lastCompletionTimes)); - lastCompleted = total; - std::cout << current << "\t\t" << total << "\t\t" << avg << std::endl; - } - }); + int statusInterval = (options.StatusInterval > 0) ? options.StatusInterval : 1; + auto progressThread = std::thread([&title, + &completedOperations, + &lastCompletionTimes, + &lastCompleted, + &progressToken, + sampler, + statusInterval]() { + std::cout << std::endl + << "=== " << title << " ===" << std::endl + << "Current\t\tTotal\t\tAverage\t\tCPU\t\tMemory(MiB)" << std::endl; + while (!progressToken.IsCancelled()) + { + std::this_thread::sleep_for(std::chrono::seconds(statusInterval)); + auto total = Sum(completedOperations); + auto current = total - lastCompleted; + auto avg = Sum(ZipAvg(completedOperations, lastCompletionTimes)); + lastCompleted = total; + double cpuPct = 0; + double memMiB = 0; + if (sampler != nullptr) + { + auto snap = sampler->Latest(); + cpuPct = snap.CpuPercent; + memMiB = static_cast(snap.MemoryBytes) / (1024.0 * 1024.0); + } + std::cout << current << "\t\t" << total << "\t\t" << avg << "\t\t" << cpuPct << "\t\t" + << memMiB << std::endl; + } + }); /********************* parallel test creation ******************************/ std::vector tasks(tests.size()); auto deadLineSeconds = std::chrono::seconds(durationInSeconds); for (size_t index = 0; index != tests.size(); index++) { - tasks[index] = std::thread( - [index, &tests, &completedOperations, &lastCompletionTimes, &deadLineSeconds, &context]() { - bool isCancelled = false; - // Azure::Context is not good performer for checking cancellation inside the test loop - auto manualCancellation = std::thread([&deadLineSeconds, &isCancelled] { - std::this_thread::sleep_for(deadLineSeconds); - isCancelled = true; - }); - - RunLoop( - context, - *tests[index], - completedOperations[index], - lastCompletionTimes[index], - false, - isCancelled); - - manualCancellation.join(); - }); + tasks[index] = std::thread([index, + &tests, + &completedOperations, + &lastCompletionTimes, + &deadLineSeconds, + &context, + latencyCollector, + recordLatency]() { + bool isCancelled = false; + // Azure::Context is not good performer for checking cancellation inside the test loop + auto manualCancellation = std::thread([&deadLineSeconds, &isCancelled] { + std::this_thread::sleep_for(deadLineSeconds); + isCancelled = true; + }); + + RunLoop( + context, + *tests[index], + completedOperations[index], + lastCompletionTimes[index], + recordLatency, + latencyCollector, + isCancelled); + + manualCancellation.join(); + }); } // Wait for all tests to complete setUp for (auto& t : tasks) @@ -297,13 +347,74 @@ inline void RunTests( auto secondsPerOperation = 1 / operationsPerSecond; auto weightedAverageSeconds = totalOperations / operationsPerSecond; + // Append `, NN.NN% CPU` inside the parens to match the .NET results-line format that + // perf-automation downstream parsers may key on. The leading `(...) ops/s` substring is + // preserved verbatim so Cpp.cs's existing ops/s regex still matches. + double resultsCpuPercent = 0; + if (sampler != nullptr) + { + resultsCpuPercent = sampler->Average().CpuPercent; + } std::cout << std::endl << "Completed " << FormatNumber(totalOperations, false) << " operations in a weighted-average of " << FormatNumber(weightedAverageSeconds, false) << "s (" - << FormatNumber(operationsPerSecond) << " ops/s, " << secondsPerOperation << " s/op)" - << std::endl + << FormatNumber(operationsPerSecond) << " ops/s, " << secondsPerOperation << " s/op, " + << resultsCpuPercent << "% CPU)" << std::endl << std::endl; + + if (!warmup && outSummary != nullptr) + { + outSummary->TotalOperations = totalOperations; + outSummary->OperationsPerSecond = operationsPerSecond; + outSummary->SecondsPerOperation = secondsPerOperation; + outSummary->WeightedAverageSeconds = weightedAverageSeconds; + if (sampler != nullptr) + { + auto avg = sampler->Average(); + outSummary->AverageCpuPercent = avg.CpuPercent; + outSummary->AverageMemoryBytes = avg.MemoryBytes; + } + if (recordLatency && latencyCollector != nullptr) + { + outSummary->Latency = latencyCollector->Summarize(); + outSummary->LatencyByCallType = latencyCollector->SummarizeByCallType(); + + auto const& s = outSummary->Latency; + if (s.Count > 0) + { + // Match the .NET Azure.Test.Perf latency distribution exactly: + // format string is `{percentile,7:N3}% {ms,8:N2}ms` — i.e., 7-char-wide + // percentile with 3 decimals, then "% ", then 8-char-wide latency with + // 2 decimals, then "ms". Reproduce the format here for byte-near parity. + struct Row + { + double Pct; + double Ms; + }; + Row rows[] = { + {50.0, s.P50Ms}, + {75.0, s.P75Ms}, + {90.0, s.P90Ms}, + {99.0, s.P99Ms}, + {99.9, s.P999Ms}, + {99.99, s.P9999Ms}, + {99.999, s.P99999Ms}, + {100.0, s.P100Ms}, + }; + std::cout << "=== Latency Distribution ===" << std::endl; + for (auto const& row : rows) + { + std::ostringstream pctSs; + pctSs << std::fixed << std::setprecision(3) << row.Pct; + std::ostringstream msSs; + msSs << std::fixed << std::setprecision(2) << row.Ms; + std::cout << std::right << std::setw(7) << pctSs.str() << "% " + << std::right << std::setw(8) << msSs.str() << "ms" << std::endl; + } + } + } + } } } // namespace @@ -327,7 +438,6 @@ void Azure::Perf::Program::Run( // Parse args only to get the test name first auto testMetadata = GetTestMetadata(tests, argc, argv); - auto const& testGenerator = testMetadata->Factory; if (testMetadata == nullptr) { // Wrong input. Print what are the options. @@ -335,6 +445,7 @@ void Azure::Perf::Program::Run( return; } + auto const& testGenerator = testMetadata->Factory; // Initial test to get it's options, we can use a dummy parser results argagg::parser_results argResults; @@ -375,6 +486,13 @@ void Azure::Perf::Program::Run( } } + /******************** Always-on CPU/memory sampler ****************/ + Azure::Perf::ProcessStatsSampler sampler; + sampler.Start(); + + /******************** Per-run latency collector (when --latency) ****************/ + Azure::Perf::LatencyCollector latencyCollector; + /******************** Global Set up ******************************/ std::cout << std::endl << "=== Global Setup ===" << std::endl; test->GlobalSetup(); @@ -419,18 +537,70 @@ void Azure::Perf::Program::Run( /******************** WarmUp ******************************/ if (options.Warmup) { - RunTests(context, parallelTest, options, "Warmup", true); + RunTests(context, parallelTest, options, "Warmup", &sampler, nullptr, nullptr, true); } /******************** Tests ******************************/ std::string iterationInfo; + Azure::Perf::RunSummary finalSummary; + finalSummary.TestName = testMetadata->Name; + finalSummary.Parallel = options.Parallel; + finalSummary.DurationSeconds = options.Duration; + finalSummary.Warmup = options.Warmup; + finalSummary.Iterations = options.Iterations; for (int iteration = 0; iteration < options.Iterations; iteration++) { if (iteration > 0) { iterationInfo.append(FormatNumber(iteration)); } - RunTests(context, parallelTest, options, "Test" + iterationInfo); + RunTests( + context, + parallelTest, + options, + "Test" + iterationInfo, + &sampler, + options.Latency ? &latencyCollector : nullptr, + &finalSummary); + } + + /******************** End-of-run artifacts ************************/ + sampler.Stop(); + + if (options.Latency && !options.ResultsFile.empty()) + { + // Match the .NET `--results-file` shape: an array of OperationResult { Time, Size }. + // Time is per-op latency in ms; Size is taken from the test's --size option when + // present, otherwise -1 (mirroring .NET's `(options as SizeOptions)?.Size ?? -1`). + int64_t opSize = -1; + try + { + if (argResults["Size"]) + { + opSize = argResults["Size"].as(); + } + } + catch (std::exception const&) + { + opSize = -1; + } + + std::vector ops; + auto samples = latencyCollector.Samples(); + ops.reserve(samples.size()); + for (auto const& s : samples) + { + Azure::Perf::OperationResult r; + r.Time = std::chrono::duration(s.Duration).count(); + r.Size = opSize; + ops.push_back(std::move(r)); + } + Azure::Perf::WriteResultsFile(options.ResultsFile, ops); + } + + if (options.JobStatistics) + { + Azure::Perf::PrintJobStatistics(finalSummary); } std::cout << std::endl << "=== Pre-Cleanup ===" << std::endl; @@ -468,4 +638,8 @@ void Azure::Perf::Program::Run( } test->GlobalCleanup(); } + + // Match .NET PerfProgram.cs: the assembly / runtime versions block is the LAST thing + // printed, after Cleanup, so automation parsers see it as the run terminator. + Azure::Perf::PrintVersionsBlock(); } diff --git a/sdk/core/perf/src/result_output.cpp b/sdk/core/perf/src/result_output.cpp new file mode 100644 index 0000000000..344655bce9 --- /dev/null +++ b/sdk/core/perf/src/result_output.cpp @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "azure/perf/result_output.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace { + +using Azure::Core::Json::_internal::json; +using Azure::Perf::OperationResult; +using Azure::Perf::RunSummary; + +bool TryOpen(std::ofstream& f, std::string const& path) +{ + f.open(path, std::ios::out | std::ios::trunc); + if (!f.is_open()) + { + std::cerr << "warning: failed to open output file " << path << std::endl; + return false; + } + return true; +} + +// ISO-8601 UTC timestamp matching .NET's DateTime.ToString("O") JSON serialization, +// which emits fractional seconds at 100-nanosecond (7-digit) resolution. +std::string IsoUtcNow() +{ + using namespace std::chrono; + auto now = system_clock::now(); + auto secs = time_point_cast(now); + // 100-nanosecond ticks within the current second, matching .NET DateTime "fffffff". + // system_clock typically has microsecond resolution on Windows; pad with trailing zeros. + auto ticks = duration_cast>>(now - secs).count(); + std::time_t tt = system_clock::to_time_t(secs); + std::tm tm {}; +#if defined(_WIN32) + gmtime_s(&tm, &tt); +#else + gmtime_r(&tt, &tm); +#endif + std::ostringstream os; + os << std::put_time(&tm, "%Y-%m-%dT%H:%M:%S") << "." << std::setw(7) << std::setfill('0') + << ticks << "Z"; + return os.str(); +} + +} // namespace + +namespace Azure { namespace Perf { + + void WriteResultsFile(std::string const& path, std::vector const& results) + { + if (path.empty()) + { + return; + } + std::ofstream f; + if (!TryOpen(f, path)) + { + return; + } + // Match the .NET Azure.Test.Perf OperationResult JSON shape exactly: + // [ { "Time": , "Size": }, ... ] + json arr = json::array(); + for (auto const& r : results) + { + arr.push_back(json{{"Time", r.Time}, {"Size", r.Size}}); + } + f << arr.dump(2) << std::endl; + } + + void PrintJobStatistics(RunSummary const& summary) + { + // Match the .NET BenchmarkOutput shape AND key order exactly so perf-automation's + // downstream parser sees the same fields as for .NET runs: + // { "Metadata": [ { Source, Name, ShortDescription, LongDescription, Format } ], + // "Measurements": [ { Timestamp, Name, Value } ] } + // We serialize manually because nlohmann::json sorts object keys alphabetically by + // default; .NET emits keys in declaration order. + std::ostringstream os; + os << "{\"Metadata\":[{" + << "\"Source\":\"PerfStress\"," + << "\"Name\":\"perfstress/throughput\"," + << "\"ShortDescription\":\"Throughput (ops/sec)\"," + << "\"LongDescription\":\"Throughput (ops/sec)\"," + << "\"Format\":\"n2\"" + << "}],\"Measurements\":[{" + << "\"Timestamp\":\"" << IsoUtcNow() << "\"," + << "\"Name\":\"perfstress/throughput\"," + << "\"Value\":" << json(summary.OperationsPerSecond).dump() + << "}]}"; + std::cout << "#StartJobStatistics" << std::endl; + std::cout << os.str() << std::endl; + std::cout << "#EndJobStatistics" << std::endl; + } + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/src/versions.cpp b/sdk/core/perf/src/versions.cpp new file mode 100644 index 0000000000..7995541bbc --- /dev/null +++ b/sdk/core/perf/src/versions.cpp @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "azure/perf/versions.hpp" + +#include +#include + +namespace { + +std::string CompilerInfo() +{ + std::ostringstream os; +#if defined(__clang__) + os << "clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__; +#elif defined(__GNUC__) + os << "gcc " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__; +#elif defined(_MSC_VER) + os << "MSVC " << _MSC_VER; +#else + os << "unknown"; +#endif + return os.str(); +} + +std::string LanguageStandard() +{ + std::ostringstream os; +#if defined(__cplusplus) + os << "C++ __cplusplus=" << __cplusplus; +#else + os << "C++ unknown"; +#endif + return os.str(); +} + +} // namespace + +namespace Azure { namespace Perf { + + void PrintVersionsBlock( + std::vector> const& injectedVersions) + { + std::cout << std::endl << "=== Versions ===" << std::endl; + std::cout << "Compiler: " << CompilerInfo() << std::endl; + std::cout << "Language: " << LanguageStandard() << std::endl; + for (auto const& kv : injectedVersions) + { + if (kv.second.empty()) + { + continue; + } + std::cout << kv.first << ": " << kv.second << std::endl; + } + } + +}} // namespace Azure::Perf diff --git a/sdk/core/perf/test/CMakeLists.txt b/sdk/core/perf/test/CMakeLists.txt index bf213d5ab2..a9120a2079 100644 --- a/sdk/core/perf/test/CMakeLists.txt +++ b/sdk/core/perf/test/CMakeLists.txt @@ -19,7 +19,10 @@ include(GoogleTest) add_executable ( azure-perf-unit-test + src/latency_stats_test.cpp + src/process_stats_test.cpp src/random_stream_test.cpp + src/result_output_test.cpp ) if (MSVC) diff --git a/sdk/core/perf/test/src/latency_stats_test.cpp b/sdk/core/perf/test/src/latency_stats_test.cpp new file mode 100644 index 0000000000..402b385d9d --- /dev/null +++ b/sdk/core/perf/test/src/latency_stats_test.cpp @@ -0,0 +1,82 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include + +#include +#include +#include + +#include + +using namespace std::chrono; +using Azure::Perf::LatencyCollector; + +TEST(latency_stats, percentiles) +{ + LatencyCollector c; + // Insert 1..100 ms; percentiles should land near the respective ranks. + for (int i = 1; i <= 100; ++i) + { + c.Record(milliseconds(i)); + } + auto s = c.Summarize(); + EXPECT_EQ(s.Count, 100u); + EXPECT_NEAR(s.P50Ms, 50.0, 1.5); + EXPECT_NEAR(s.P75Ms, 75.0, 1.5); + EXPECT_NEAR(s.P90Ms, 90.0, 1.5); + EXPECT_NEAR(s.P99Ms, 99.0, 1.5); + // p99.9/p99.99/p99.999 on a 100-sample input all collapse near the top of the range. + EXPECT_GE(s.P999Ms, 99.0); + EXPECT_GE(s.P9999Ms, 99.0); + EXPECT_GE(s.P99999Ms, 99.0); + EXPECT_LE(s.P999Ms, 100.0); + EXPECT_LE(s.P9999Ms, 100.0); + EXPECT_LE(s.P99999Ms, 100.0); + EXPECT_NEAR(s.P100Ms, 100.0, 0.001); + EXPECT_NEAR(s.MeanMs, 50.5, 0.001); +} + +TEST(latency_stats, by_call_type) +{ + LatencyCollector c; + c.Record("A", milliseconds(10)); + c.Record("A", milliseconds(30)); + c.Record("B", milliseconds(50)); + auto byType = c.SummarizeByCallType(); + ASSERT_EQ(byType.size(), 2u); + EXPECT_EQ(byType[0].first, "A"); + EXPECT_EQ(byType[0].second.Count, 2u); + EXPECT_EQ(byType[1].first, "B"); + EXPECT_EQ(byType[1].second.Count, 1u); +} + +TEST(latency_stats, reset) +{ + LatencyCollector c; + c.Record(milliseconds(5)); + c.Reset(); + EXPECT_EQ(c.Summarize().Count, 0u); +} + +TEST(latency_stats, concurrent_record) +{ + LatencyCollector c; + constexpr int N = 8; + constexpr int Per = 1000; + std::vector threads; + for (int t = 0; t < N; ++t) + { + threads.emplace_back([&c]() { + for (int i = 0; i < Per; ++i) + { + c.Record(microseconds(100)); + } + }); + } + for (auto& th : threads) + { + th.join(); + } + EXPECT_EQ(c.Summarize().Count, static_cast(N * Per)); +} diff --git a/sdk/core/perf/test/src/process_stats_test.cpp b/sdk/core/perf/test/src/process_stats_test.cpp new file mode 100644 index 0000000000..37180b3eb1 --- /dev/null +++ b/sdk/core/perf/test/src/process_stats_test.cpp @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include + +#include +#include + +#include + +using Azure::Perf::ProcessStatsSampler; + +TEST(process_stats, start_stop) +{ + ProcessStatsSampler s(std::chrono::milliseconds(50)); + s.Start(); + // Burn a little CPU so there is something to sample. + auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(250); + volatile uint64_t x = 0; + while (std::chrono::steady_clock::now() < deadline) + { + for (int i = 0; i < 10000; ++i) + { + x += i; + } + } + s.Stop(); + auto avg = s.Average(); + // CPU and memory must be non-negative; we cannot assert tighter bounds in CI. + EXPECT_GE(avg.CpuPercent, 0.0); + // MemoryBytes is unsigned; just sanity-check accessor. + (void)avg.MemoryBytes; +} + +TEST(process_stats, reset_clears) +{ + ProcessStatsSampler s(std::chrono::milliseconds(50)); + s.Start(); + std::this_thread::sleep_for(std::chrono::milliseconds(150)); + s.Stop(); + s.Reset(); + auto avg = s.Average(); + EXPECT_DOUBLE_EQ(avg.CpuPercent, 0.0); + EXPECT_EQ(avg.MemoryBytes, 0u); +} diff --git a/sdk/core/perf/test/src/result_output_test.cpp b/sdk/core/perf/test/src/result_output_test.cpp new file mode 100644 index 0000000000..1efd4e7750 --- /dev/null +++ b/sdk/core/perf/test/src/result_output_test.cpp @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include + +#include +#include +#include +#include + +#include + +using Azure::Perf::OperationResult; +using Azure::Perf::RunSummary; + +namespace { +std::string TempPath(std::string const& tag) +{ + auto base = ::testing::TempDir(); + if (!base.empty() && base.back() != '/' && base.back() != '\\') + { +#ifdef _WIN32 + base += '\\'; +#else + base += '/'; +#endif + } + return base + "azure_perf_test_" + tag + ".json"; +} + +bool FileExists(std::string const& path) +{ + std::ifstream f(path.c_str()); + return f.good(); +} + +std::string Slurp(std::string const& path) +{ + std::ifstream f(path.c_str(), std::ios::binary); + std::ostringstream ss; + ss << f.rdbuf(); + return ss.str(); +} +} // namespace + +TEST(result_output, write_results_file_matches_dotnet_schema) +{ + auto path = TempPath("results"); + std::vector results; + results.push_back({12.5, 1024}); + results.push_back({8.0, 1024}); + Azure::Perf::WriteResultsFile(path, results); + EXPECT_TRUE(FileExists(path)); + auto contents = Slurp(path); + // .NET OperationResult JSON shape: { "Time": ..., "Size": ... }. + // Field names are PascalCase and must match the .NET reference framework exactly. + EXPECT_NE(contents.find("\"Time\""), std::string::npos); + EXPECT_NE(contents.find("\"Size\""), std::string::npos); + EXPECT_NE(contents.find("12.5"), std::string::npos); + EXPECT_NE(contents.find("1024"), std::string::npos); + // Schema must NOT contain the legacy / Go-style field names. + EXPECT_EQ(contents.find("\"operation\""), std::string::npos); + EXPECT_EQ(contents.find("\"latencyMs\""), std::string::npos); + EXPECT_EQ(contents.find("\"sizeBytes\""), std::string::npos); + std::remove(path.c_str()); +} + +TEST(result_output, print_job_statistics_matches_dotnet_envelope) +{ + RunSummary s; + s.OperationsPerSecond = 1234.5; + + // Capture std::cout. + std::stringstream buffer; + auto* oldBuf = std::cout.rdbuf(buffer.rdbuf()); + Azure::Perf::PrintJobStatistics(s); + std::cout.rdbuf(oldBuf); + + auto out = buffer.str(); + EXPECT_NE(out.find("#StartJobStatistics"), std::string::npos); + EXPECT_NE(out.find("#EndJobStatistics"), std::string::npos); + // Match the .NET BenchmarkOutput envelope AND key order: + // { "Metadata": [...], "Measurements": [...] } — Metadata must appear before Measurements. + std::size_t metaPos = out.find("\"Metadata\""); + std::size_t measPos = out.find("\"Measurements\""); + EXPECT_NE(metaPos, std::string::npos); + EXPECT_NE(measPos, std::string::npos); + EXPECT_LT(metaPos, measPos); + EXPECT_NE(out.find("\"Source\""), std::string::npos); + EXPECT_NE(out.find("PerfStress"), std::string::npos); + EXPECT_NE(out.find("perfstress/throughput"), std::string::npos); + EXPECT_NE(out.find("1234.5"), std::string::npos); +} diff --git a/sdk/storage/azure-storage-blobs/CHANGELOG.md b/sdk/storage/azure-storage-blobs/CHANGELOG.md index 67c57e47f4..cd15b7a8b5 100644 --- a/sdk/storage/azure-storage-blobs/CHANGELOG.md +++ b/sdk/storage/azure-storage-blobs/CHANGELOG.md @@ -11,6 +11,7 @@ ### Other Changes - Updated the default concurrency for upload and download operations. It now scales with the number of hardware threads available, clamped between 8 and 96 (previously a fixed value of 5). +- Expanded the Storage Blobs performance test surface to align with the .NET and Go perf harnesses: added `--upload-method` (`buffer`/`stream`/`single`), `--download-method` (`buffer`/`stream`), `--block-size`, `--concurrency`, `--num-blobs`, and `--page-size` flags; added a memory-budget guard for buffer-mode tests; new test variants exercising streaming for multi-GiB payloads. ## 12.18.0 (2026-06-11) diff --git a/sdk/storage/azure-storage-blobs/perf-tests.yml b/sdk/storage/azure-storage-blobs/perf-tests.yml index 0fd2a358b7..2113439fe3 100644 --- a/sdk/storage/azure-storage-blobs/perf-tests.yml +++ b/sdk/storage/azure-storage-blobs/perf-tests.yml @@ -20,6 +20,11 @@ Tests: - --size 10485760 --parallel 32 --token-credential - --size 1073741824 --parallel 1 --warmup 60 --duration 60 --token-credential - --size 1073741824 --parallel 8 --warmup 60 --duration 60 --token-credential + # New: streaming download for very large payloads (no contiguous buffer) + - --size 5368709120 --parallel 1 --warmup 60 --duration 60 --download-method stream + - --size 5368709120 --parallel 4 --warmup 60 --duration 60 --download-method stream + # New: tune chunk size / concurrency for buffer-mode download + - --size 1073741824 --parallel 4 --warmup 60 --duration 60 --block-size 16777216 --concurrency 16 - Test: upload Class: UploadBlob @@ -32,6 +37,13 @@ Tests: - --size 10485760 --parallel 32 --token-credential - --size 1073741824 --parallel 1 --warmup 60 --duration 60 --token-credential - --size 1073741824 --parallel 8 --warmup 60 --duration 60 --token-credential + # New: single-shot upload of a buffered payload + - --size 10485760 --parallel 32 --upload-method single + # New: streaming upload for very large payloads (no contiguous buffer) + - --size 5368709120 --parallel 1 --warmup 60 --duration 60 --upload-method stream + - --size 5368709120 --parallel 4 --warmup 60 --duration 60 --upload-method stream + # New: tune chunk size / concurrency for buffer-mode upload + - --size 1073741824 --parallel 4 --warmup 60 --duration 60 --block-size 16777216 --concurrency 16 - Test: list-blobs Class: ListBlob @@ -42,3 +54,6 @@ Tests: - --count 5 --parallel 64 --token-credential - --count 500 --parallel 32 --token-credential - --count 50000 --parallel 32 --warmup 60 --duration 60 --token-credential + # New: --num-blobs is the canonical alias of --count; exercise --page-size + - --num-blobs 50000 --parallel 16 --warmup 60 --duration 60 --page-size 1000 + - --num-blobs 50000 --parallel 16 --warmup 60 --duration 60 --page-size 5000 diff --git a/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt b/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt index 607e6a0237..28a8728fdd 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt +++ b/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt @@ -21,6 +21,7 @@ set( inc/azure/storage/blobs/test/download_blob_test.hpp ${DOWNLOAD_WITH_LIBCURL} inc/azure/storage/blobs/test/list_blob_test.hpp + inc/azure/storage/blobs/test/memory_budget.hpp inc/azure/storage/blobs/test/upload_blob_test.hpp ) diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp index e1871135b7..ed0ba34bd0 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp @@ -10,9 +10,11 @@ #pragma once #include "azure/storage/blobs/test/blob_base_test.hpp" +#include "azure/storage/blobs/test/memory_budget.hpp" #include #include +#include #include #include @@ -23,10 +25,25 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { /** * @brief A test to measure downloading a blob. * + * @details `--download-method` chooses between: + * - `buffer` (default, preserves existing behavior): allocate a contiguous buffer and + * call `DownloadTo(buffer, size)`. Guarded by a `size * parallel` memory-budget + * check. + * - `stream`: stream the response with `Download()` and drain its body stream without + * materializing the payload in RAM. Use for multi-GiB sizes. + * + * `--block-size` and `--concurrency` are forwarded to `DownloadBlobToOptions` for the + * `buffer` method. */ class DownloadBlob : public Azure::Storage::Blobs::Test::BlobsTest { private: std::unique_ptr> m_downloadBuffer; + long m_size = 0; + std::string m_downloadMethod = "buffer"; + long m_blockSize = 0; + int m_concurrency = 0; + + static constexpr size_t StreamDrainBufferSize = 1024 * 1024; public: /** @@ -45,22 +62,64 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { // Call base to create blob client BlobsTest::Setup(); - long size = m_options.GetMandatoryOption("Size"); - - m_downloadBuffer = std::make_unique>(size); - - auto rawData = std::make_unique>(size); - auto content = Azure::Core::IO::MemoryBodyStream(*rawData); - m_blobClient->Upload(content); + m_size = m_options.GetMandatoryOption("Size"); + m_downloadMethod + = m_options.GetOptionOrDefault("DownloadMethod", "buffer"); + m_blockSize = m_options.GetOptionOrDefault("BlockSize", 0); + m_concurrency = m_options.GetOptionOrDefault("Concurrency", 0); + + if (m_downloadMethod == "buffer") + { + CheckMemoryBudget(static_cast(m_size), 1); + m_downloadBuffer = std::make_unique>(m_size); + } + else if (m_downloadMethod != "stream") + { + throw std::runtime_error( + "Invalid --download-method '" + m_downloadMethod + + "'. Expected one of: buffer, stream."); + } + + // Stage the blob with random data. Use the streaming RandomStream so very large + // sizes do not materialize a contiguous staging buffer. + auto staging = Azure::Perf::RandomStream::Create(m_size); + m_blobClient->Upload(*staging); } /** * @brief Define the test * */ - void Run(Azure::Core::Context const&) override + void Run(Azure::Core::Context const& context) override { - m_blobClient->DownloadTo(m_downloadBuffer->data(), m_downloadBuffer->size()); + if (m_downloadMethod == "stream") + { + auto response = m_blobClient->Download({}, context); + auto& bodyStream = response.Value.BodyStream; + if (bodyStream) + { + uint8_t buffer[StreamDrainBufferSize]; + while (true) + { + auto read = bodyStream->Read(buffer, sizeof(buffer), context); + if (read == 0) + { + break; + } + } + } + return; + } + Azure::Storage::Blobs::DownloadBlobToOptions opts; + if (m_blockSize > 0) + { + opts.TransferOptions.ChunkSize = m_blockSize; + } + if (m_concurrency > 0) + { + opts.TransferOptions.Concurrency = m_concurrency; + } + m_blobClient->DownloadTo(m_downloadBuffer->data(), m_downloadBuffer->size(), opts); } /** @@ -77,7 +136,20 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { {"--token-credential"}, "Use a token credential to run the test. By default, a connection string is used.", 0}, - {"Size", {"--size"}, "Size of payload (in bytes)", 1, true}}; + {"Size", {"--size"}, "Size of payload (in bytes)", 1, true}, + {"DownloadMethod", + {"--download-method"}, + "Download method: 'buffer' (default, contiguous buffer via DownloadTo) or " + "'stream' (drain the response BodyStream, no contiguous buffer).", + 1}, + {"BlockSize", + {"--block-size"}, + "Chunk size (bytes) for buffer-mode DownloadTo. Default: client default.", + 1}, + {"Concurrency", + {"--concurrency"}, + "Per-operation concurrency for buffer-mode DownloadTo. Default: client default.", + 1}}; } /** diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/list_blob_test.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/list_blob_test.hpp index 99172b79a9..e20d6ac68a 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/list_blob_test.hpp +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/list_blob_test.hpp @@ -26,6 +26,9 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { * */ class ListBlob : public Azure::Storage::Blobs::Test::BlobsTest { + private: + int m_pageSize = 0; + public: /** * @brief Construct a new ListBlob test. @@ -42,7 +45,18 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { { // Call base to create blob client BlobsTest::Setup(); - long count = m_options.GetMandatoryOption("Count"); + // --num-blobs is the canonical name (matches the Go perf harness); --count is kept + // for backward compatibility with existing test definitions. + long count = 0; + if (m_options.HasOption("NumBlobs")) + { + count = m_options.GetMandatoryOption("NumBlobs"); + } + else + { + count = m_options.GetMandatoryOption("Count"); + } + m_pageSize = m_options.GetOptionOrDefault("PageSize", 0); auto rawData = std::make_unique>(1); auto content = Azure::Core::IO::MemoryBodyStream(*rawData); @@ -62,8 +76,13 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { */ void Run(Azure::Core::Context const& context) override { + Azure::Storage::Blobs::ListBlobsOptions opts; + if (m_pageSize > 0) + { + opts.PageSizeHint = m_pageSize; + } // Loop each page - auto page = m_containerClient->ListBlobs({}, context); + auto page = m_containerClient->ListBlobs(opts, context); for (; page.HasPage(); page.MoveToNextPage(context)) { // loop each blob @@ -87,7 +106,12 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { {"--token-credential"}, "Use a token credential to run the test. By default, a connection string is used.", 0}, - {"Count", {"--count"}, "Number of blobs to list", 1, true}}; + {"Count", {"--count"}, "Number of blobs to list (legacy alias of --num-blobs).", 1}, + {"NumBlobs", {"--num-blobs"}, "Number of blobs to list.", 1}, + {"PageSize", + {"--page-size"}, + "Server page size hint for ListBlobs. Default: server default.", + 1}}; } /** diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp new file mode 100644 index 0000000000..71f8ca9415 --- /dev/null +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp @@ -0,0 +1,103 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * @file + * @brief System-memory budget guard used by buffer-mode upload/download perf tests. + * + * @remark Mirrors the memory-budget guard in the Go perf framework so buffer-mode tests + * fail fast with a clear message when `--size x --parallel` would exceed a fraction of + * the host's available memory, instead of triggering an OOM kill. + */ + +#pragma once + +#include + +#include +#include +#include + +#if defined(AZ_PLATFORM_WINDOWS) +#if !defined(WIN32_LEAN_AND_MEAN) +#define WIN32_LEAN_AND_MEAN +#endif +#if !defined(NOMINMAX) +#define NOMINMAX +#endif +#include +#elif defined(AZ_PLATFORM_LINUX) +#include +#elif defined(AZ_PLATFORM_MAC) +#include +#include +#endif + +namespace Azure { namespace Storage { namespace Blobs { namespace Test { + + /** + * @brief Get the total system memory in bytes. Returns 0 if it cannot be determined. + */ + inline uint64_t GetSystemMemoryBytes() + { +#if defined(AZ_PLATFORM_WINDOWS) + MEMORYSTATUSEX status; + status.dwLength = sizeof(status); + if (GlobalMemoryStatusEx(&status)) + { + return static_cast(status.ullTotalPhys); + } + return 0; +#elif defined(AZ_PLATFORM_LINUX) + long pages = sysconf(_SC_PHYS_PAGES); + long pageSize = sysconf(_SC_PAGE_SIZE); + if (pages > 0 && pageSize > 0) + { + return static_cast(pages) * static_cast(pageSize); + } + return 0; +#elif defined(AZ_PLATFORM_MAC) + int mib[2] = {CTL_HW, HW_MEMSIZE}; + uint64_t mem = 0; + size_t len = sizeof(mem); + if (sysctl(mib, 2, &mem, &len, nullptr, 0) == 0) + { + return mem; + } + return 0; +#else + return 0; +#endif + } + + /** + * @brief Throw `std::runtime_error` when `sizeBytes * parallel` would exceed + * `budgetFraction` of system memory. + * + * @param sizeBytes Per-task buffer size requested by the test. + * @param parallel Number of parallel tasks. + * @param budgetFraction Fraction of system memory we are willing to consume (default + * 0.8, matching the Go perf framework). + */ + inline void CheckMemoryBudget(uint64_t sizeBytes, int parallel, double budgetFraction = 0.8) + { + uint64_t systemBytes = GetSystemMemoryBytes(); + if (systemBytes == 0 || parallel <= 0) + { + return; + } + uint64_t requested = sizeBytes * static_cast(parallel); + uint64_t budget = static_cast(static_cast(systemBytes) * budgetFraction); + if (requested > budget) + { + throw std::runtime_error( + "Requested buffer footprint " + std::to_string(requested) + " bytes (size " + + std::to_string(sizeBytes) + " x parallel " + std::to_string(parallel) + + ") exceeds " + std::to_string(static_cast(budgetFraction * 100)) + + "% of system memory (" + std::to_string(systemBytes) + + " bytes). Use --upload-method stream / --download-method stream, lower --size, " + "or lower --parallel."); + } + } + +}}}} // namespace Azure::Storage::Blobs::Test diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp index b313ff69d9..2d5d65a4f7 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp @@ -10,6 +10,7 @@ #pragma once #include "azure/storage/blobs/test/blob_base_test.hpp" +#include "azure/storage/blobs/test/memory_budget.hpp" #include #include @@ -24,11 +25,27 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { /** * @brief A test to measure uploading a blob. * + * @details Supports three upload methods selected via `--upload-method`: + * - `buffer` (default, preserves existing behavior): build a contiguous in-memory + * payload and call `BlockBlobClient::UploadFrom(buffer, size)`. Guarded by a + * `size * parallel` memory-budget check to avoid OOM kills. + * - `stream`: do not materialize the payload; stream a circular `RandomStream` into + * `BlockBlobClient::Upload(BodyStream)`. Use for multi-GiB sizes. + * - `single`: same as `buffer` but uses the single-shot `Upload(BodyStream)` for the + * in-memory buffer (no chunked staging). Useful to compare buffered vs. chunked + * upload paths. + * + * `--block-size` and `--concurrency` are forwarded to `UploadBlockBlobFromOptions` for + * the `buffer` method. */ class UploadBlob : public Azure::Storage::Blobs::Test::BlobsTest { private: // C++ can upload and download from contiguous memory or file only std::vector m_uploadBuffer; + long m_size = 0; + std::string m_uploadMethod = "buffer"; + long m_blockSize = 0; + int m_concurrency = 0; public: /** @@ -47,8 +64,24 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { // Call base to create blob client BlobsTest::Setup(); - long size = m_options.GetMandatoryOption("Size"); - m_uploadBuffer = Azure::Perf::RandomStream::Create(size)->ReadToEnd(Azure::Core::Context{}); + m_size = m_options.GetMandatoryOption("Size"); + m_uploadMethod = m_options.GetOptionOrDefault("UploadMethod", "buffer"); + m_blockSize = m_options.GetOptionOrDefault("BlockSize", 0); + m_concurrency = m_options.GetOptionOrDefault("Concurrency", 0); + + if (m_uploadMethod == "buffer" || m_uploadMethod == "single") + { + // Allocates a contiguous buffer; guard against OOM on huge sizes. + CheckMemoryBudget(static_cast(m_size), 1); + m_uploadBuffer + = Azure::Perf::RandomStream::Create(m_size)->ReadToEnd(Azure::Core::Context{}); + } + else if (m_uploadMethod != "stream") + { + throw std::runtime_error( + "Invalid --upload-method '" + m_uploadMethod + + "'. Expected one of: buffer, stream, single."); + } } /** @@ -57,7 +90,29 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { */ void Run(Azure::Core::Context const&) override { - m_blobClient->UploadFrom(m_uploadBuffer.data(), m_uploadBuffer.size()); + if (m_uploadMethod == "stream") + { + auto stream = Azure::Perf::RandomStream::Create(m_size); + m_blobClient->Upload(*stream); + return; + } + if (m_uploadMethod == "single") + { + auto stream = Azure::Core::IO::MemoryBodyStream(m_uploadBuffer); + m_blobClient->Upload(stream); + return; + } + // Default: buffer (chunked via UploadFrom). + Azure::Storage::Blobs::UploadBlockBlobFromOptions opts; + if (m_blockSize > 0) + { + opts.TransferOptions.ChunkSize = m_blockSize; + } + if (m_concurrency > 0) + { + opts.TransferOptions.Concurrency = m_concurrency; + } + m_blobClient->UploadFrom(m_uploadBuffer.data(), m_uploadBuffer.size(), opts); } /** @@ -73,7 +128,21 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { {"--token-credential"}, "Use a token credential to run the test. By default, a connection string is used.", 0}, - {"Size", {"--size", "-s"}, "Size of payload (in bytes)", 1, true}}; + {"Size", {"--size", "-s"}, "Size of payload (in bytes)", 1, true}, + {"UploadMethod", + {"--upload-method"}, + "Upload method: 'buffer' (default, chunked UploadFrom), 'stream' (Upload " + "BodyStream from a circular RandomStream, no contiguous buffer), or 'single' " + "(single-shot Upload of an in-memory buffer).", + 1}, + {"BlockSize", + {"--block-size"}, + "Chunk size (bytes) for buffer-mode UploadFrom. Default: client default.", + 1}, + {"Concurrency", + {"--concurrency"}, + "Per-operation concurrency for buffer-mode UploadFrom. Default: client default.", + 1}}; } /** From 0a9cca84e1cde3a3538c32b04683d779588cbeb5 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sun, 28 Jun 2026 13:39:10 +1000 Subject: [PATCH 2/8] Perf framework: drop versions.hpp / versions.cpp The `=== Versions ===` block didn't actually mirror .NET's PerfProgram.PrintAssemblyVersions: it printed compiler / __cplusplus strings rather than runtime + Azure assembly versions, no caller ever populated the `injectedVersions` extension point, and the data the perf-automation pipeline consumes (the per-test `VCPKG_*_VERSION` lines and the throughput / latency / BenchmarkOutput contracts) is unaffected. The module produced output that no parser reads and that isn't faithful to the framework it claims parity with, so drop it. Also revert the azure-storage-blobs CHANGELOG entry from the previous commit: the perf harness is internal and isn't customer-facing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/core/perf/CMakeLists.txt | 2 - sdk/core/perf/inc/azure/perf.hpp | 1 - sdk/core/perf/inc/azure/perf/versions.hpp | 30 ----------- sdk/core/perf/src/program.cpp | 5 -- sdk/core/perf/src/versions.cpp | 57 -------------------- sdk/storage/azure-storage-blobs/CHANGELOG.md | 1 - 6 files changed, 96 deletions(-) delete mode 100644 sdk/core/perf/inc/azure/perf/versions.hpp delete mode 100644 sdk/core/perf/src/versions.cpp diff --git a/sdk/core/perf/CMakeLists.txt b/sdk/core/perf/CMakeLists.txt index 81ae94ea1b..7a0f358ebc 100644 --- a/sdk/core/perf/CMakeLists.txt +++ b/sdk/core/perf/CMakeLists.txt @@ -27,7 +27,6 @@ set( inc/azure/perf/test.hpp inc/azure/perf/test_metadata.hpp inc/azure/perf/test_options.hpp - inc/azure/perf/versions.hpp ) set( @@ -40,7 +39,6 @@ set( src/program.cpp src/random_stream.cpp src/result_output.cpp - src/versions.cpp ) add_library(azure-perf ${AZURE_PERFORMANCE_HEADER} ${AZURE_PERFORMANCE_SOURCE}) diff --git a/sdk/core/perf/inc/azure/perf.hpp b/sdk/core/perf/inc/azure/perf.hpp index bf815b4e78..12ee138464 100644 --- a/sdk/core/perf/inc/azure/perf.hpp +++ b/sdk/core/perf/inc/azure/perf.hpp @@ -21,4 +21,3 @@ #include "azure/perf/test.hpp" #include "azure/perf/test_metadata.hpp" #include "azure/perf/test_options.hpp" -#include "azure/perf/versions.hpp" diff --git a/sdk/core/perf/inc/azure/perf/versions.hpp b/sdk/core/perf/inc/azure/perf/versions.hpp deleted file mode 100644 index ab82890e91..0000000000 --- a/sdk/core/perf/inc/azure/perf/versions.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -/** - * @file - * @brief Print the `=== Versions ===` block at end of a run. - * - */ - -#pragma once - -#include -#include -#include - -namespace Azure { namespace Perf { - - /** - * @brief Print the `=== Versions ===` block to stdout, mirroring the Go perf framework - * output. Lists the compiler/toolchain, optional CMake-injected vcpkg port versions, - * and any caller-supplied package versions. - * - * @param injectedVersions Additional `(name, version)` pairs to include in the block. - * Storage perf executables already print their own `VCPKG_..._VERSION` lines for the - * perf-automation tool; pass anything else worth recording here. - */ - void PrintVersionsBlock( - std::vector> const& injectedVersions = {}); - -}} // namespace Azure::Perf diff --git a/sdk/core/perf/src/program.cpp b/sdk/core/perf/src/program.cpp index cd864e4d3a..9b0007e108 100644 --- a/sdk/core/perf/src/program.cpp +++ b/sdk/core/perf/src/program.cpp @@ -7,7 +7,6 @@ #include "azure/perf/latency_stats.hpp" #include "azure/perf/process_stats.hpp" #include "azure/perf/result_output.hpp" -#include "azure/perf/versions.hpp" #include #include @@ -638,8 +637,4 @@ void Azure::Perf::Program::Run( } test->GlobalCleanup(); } - - // Match .NET PerfProgram.cs: the assembly / runtime versions block is the LAST thing - // printed, after Cleanup, so automation parsers see it as the run terminator. - Azure::Perf::PrintVersionsBlock(); } diff --git a/sdk/core/perf/src/versions.cpp b/sdk/core/perf/src/versions.cpp deleted file mode 100644 index 7995541bbc..0000000000 --- a/sdk/core/perf/src/versions.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#include "azure/perf/versions.hpp" - -#include -#include - -namespace { - -std::string CompilerInfo() -{ - std::ostringstream os; -#if defined(__clang__) - os << "clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__; -#elif defined(__GNUC__) - os << "gcc " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__; -#elif defined(_MSC_VER) - os << "MSVC " << _MSC_VER; -#else - os << "unknown"; -#endif - return os.str(); -} - -std::string LanguageStandard() -{ - std::ostringstream os; -#if defined(__cplusplus) - os << "C++ __cplusplus=" << __cplusplus; -#else - os << "C++ unknown"; -#endif - return os.str(); -} - -} // namespace - -namespace Azure { namespace Perf { - - void PrintVersionsBlock( - std::vector> const& injectedVersions) - { - std::cout << std::endl << "=== Versions ===" << std::endl; - std::cout << "Compiler: " << CompilerInfo() << std::endl; - std::cout << "Language: " << LanguageStandard() << std::endl; - for (auto const& kv : injectedVersions) - { - if (kv.second.empty()) - { - continue; - } - std::cout << kv.first << ": " << kv.second << std::endl; - } - } - -}} // namespace Azure::Perf diff --git a/sdk/storage/azure-storage-blobs/CHANGELOG.md b/sdk/storage/azure-storage-blobs/CHANGELOG.md index cd15b7a8b5..67c57e47f4 100644 --- a/sdk/storage/azure-storage-blobs/CHANGELOG.md +++ b/sdk/storage/azure-storage-blobs/CHANGELOG.md @@ -11,7 +11,6 @@ ### Other Changes - Updated the default concurrency for upload and download operations. It now scales with the number of hardware threads available, clamped between 8 and 96 (previously a fixed value of 5). -- Expanded the Storage Blobs performance test surface to align with the .NET and Go perf harnesses: added `--upload-method` (`buffer`/`stream`/`single`), `--download-method` (`buffer`/`stream`), `--block-size`, `--concurrency`, `--num-blobs`, and `--page-size` flags; added a memory-budget guard for buffer-mode tests; new test variants exercising streaming for multi-GiB payloads. ## 12.18.0 (2026-06-11) From 3eef5ec8a7b2f9464111c340c298ec307cbce4a3 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sun, 28 Jun 2026 13:55:21 +1000 Subject: [PATCH 3/8] Storage Blobs perf: drop memory_budget.hpp The guard threw a friendly std::runtime_error when `--size x --parallel` would exceed 80%% of system memory, instead of letting buffer-mode allocations OOM-kill the process. Assuming perf runs target hosts with enough memory for the configured size, this is dead defensive code: drop the header and the two `CheckMemoryBudget` calls in upload_blob_test.hpp / download_blob_test.hpp. Oversized buffer-mode runs now fail with std::bad_alloc / OS OOM as they would natively. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../test/perf/CMakeLists.txt | 1 - .../storage/blobs/test/download_blob_test.hpp | 2 - .../storage/blobs/test/memory_budget.hpp | 103 ------------------ .../storage/blobs/test/upload_blob_test.hpp | 3 - 4 files changed, 109 deletions(-) delete mode 100644 sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp diff --git a/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt b/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt index 28a8728fdd..607e6a0237 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt +++ b/sdk/storage/azure-storage-blobs/test/perf/CMakeLists.txt @@ -21,7 +21,6 @@ set( inc/azure/storage/blobs/test/download_blob_test.hpp ${DOWNLOAD_WITH_LIBCURL} inc/azure/storage/blobs/test/list_blob_test.hpp - inc/azure/storage/blobs/test/memory_budget.hpp inc/azure/storage/blobs/test/upload_blob_test.hpp ) diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp index ed0ba34bd0..1058cfb945 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp @@ -10,7 +10,6 @@ #pragma once #include "azure/storage/blobs/test/blob_base_test.hpp" -#include "azure/storage/blobs/test/memory_budget.hpp" #include #include @@ -70,7 +69,6 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { if (m_downloadMethod == "buffer") { - CheckMemoryBudget(static_cast(m_size), 1); m_downloadBuffer = std::make_unique>(m_size); } else if (m_downloadMethod != "stream") diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp deleted file mode 100644 index 71f8ca9415..0000000000 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/memory_budget.hpp +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -/** - * @file - * @brief System-memory budget guard used by buffer-mode upload/download perf tests. - * - * @remark Mirrors the memory-budget guard in the Go perf framework so buffer-mode tests - * fail fast with a clear message when `--size x --parallel` would exceed a fraction of - * the host's available memory, instead of triggering an OOM kill. - */ - -#pragma once - -#include - -#include -#include -#include - -#if defined(AZ_PLATFORM_WINDOWS) -#if !defined(WIN32_LEAN_AND_MEAN) -#define WIN32_LEAN_AND_MEAN -#endif -#if !defined(NOMINMAX) -#define NOMINMAX -#endif -#include -#elif defined(AZ_PLATFORM_LINUX) -#include -#elif defined(AZ_PLATFORM_MAC) -#include -#include -#endif - -namespace Azure { namespace Storage { namespace Blobs { namespace Test { - - /** - * @brief Get the total system memory in bytes. Returns 0 if it cannot be determined. - */ - inline uint64_t GetSystemMemoryBytes() - { -#if defined(AZ_PLATFORM_WINDOWS) - MEMORYSTATUSEX status; - status.dwLength = sizeof(status); - if (GlobalMemoryStatusEx(&status)) - { - return static_cast(status.ullTotalPhys); - } - return 0; -#elif defined(AZ_PLATFORM_LINUX) - long pages = sysconf(_SC_PHYS_PAGES); - long pageSize = sysconf(_SC_PAGE_SIZE); - if (pages > 0 && pageSize > 0) - { - return static_cast(pages) * static_cast(pageSize); - } - return 0; -#elif defined(AZ_PLATFORM_MAC) - int mib[2] = {CTL_HW, HW_MEMSIZE}; - uint64_t mem = 0; - size_t len = sizeof(mem); - if (sysctl(mib, 2, &mem, &len, nullptr, 0) == 0) - { - return mem; - } - return 0; -#else - return 0; -#endif - } - - /** - * @brief Throw `std::runtime_error` when `sizeBytes * parallel` would exceed - * `budgetFraction` of system memory. - * - * @param sizeBytes Per-task buffer size requested by the test. - * @param parallel Number of parallel tasks. - * @param budgetFraction Fraction of system memory we are willing to consume (default - * 0.8, matching the Go perf framework). - */ - inline void CheckMemoryBudget(uint64_t sizeBytes, int parallel, double budgetFraction = 0.8) - { - uint64_t systemBytes = GetSystemMemoryBytes(); - if (systemBytes == 0 || parallel <= 0) - { - return; - } - uint64_t requested = sizeBytes * static_cast(parallel); - uint64_t budget = static_cast(static_cast(systemBytes) * budgetFraction); - if (requested > budget) - { - throw std::runtime_error( - "Requested buffer footprint " + std::to_string(requested) + " bytes (size " - + std::to_string(sizeBytes) + " x parallel " + std::to_string(parallel) - + ") exceeds " + std::to_string(static_cast(budgetFraction * 100)) - + "% of system memory (" + std::to_string(systemBytes) - + " bytes). Use --upload-method stream / --download-method stream, lower --size, " - "or lower --parallel."); - } - } - -}}}} // namespace Azure::Storage::Blobs::Test diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp index 2d5d65a4f7..0e27f7cea6 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/upload_blob_test.hpp @@ -10,7 +10,6 @@ #pragma once #include "azure/storage/blobs/test/blob_base_test.hpp" -#include "azure/storage/blobs/test/memory_budget.hpp" #include #include @@ -71,8 +70,6 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { if (m_uploadMethod == "buffer" || m_uploadMethod == "single") { - // Allocates a contiguous buffer; guard against OOM on huge sizes. - CheckMemoryBudget(static_cast(m_size), 1); m_uploadBuffer = Azure::Perf::RandomStream::Create(m_size)->ReadToEnd(Azure::Core::Context{}); } From 70862b3f81876bde4ce2149371c1f048c959486d Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sun, 28 Jun 2026 13:59:25 +1000 Subject: [PATCH 4/8] Perf: fix stack-overflow risk and stale CPU baseline after sampler reset Addresses two Copilot review comments on PR #7201 that are real bugs: 1. download_blob_test.hpp: the streaming-download per-op loop declared `uint8_t buffer[1024*1024]` on the stack. On Windows the default thread stack is 1 MiB, so a single call already overflows; high `--parallel` makes it worse. Replace with a function-local `static thread_local std::vector`: each worker thread allocates the 1 MiB drain buffer once on the heap and reuses it across operations. 2. process_stats.cpp: `ProcessStatsSampler::Reset()` updated members under the mutex but the sampler thread's `Run()` had already cached `previousCpuSeconds` / `previousTime` in locals, so the first sample after reset computed cpuDelta / wall against the pre-reset baseline and reported a wrong CPU%. Reset now stops and restarts the thread, which forces `Run()` to re-read the fresh baselines. Verified: 9/9 perf unit tests still pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/core/perf/src/process_stats.cpp | 12 ++++-------- .../azure/storage/blobs/test/download_blob_test.hpp | 9 +++++---- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/sdk/core/perf/src/process_stats.cpp b/sdk/core/perf/src/process_stats.cpp index eee159b341..fc7ff997e8 100644 --- a/sdk/core/perf/src/process_stats.cpp +++ b/sdk/core/perf/src/process_stats.cpp @@ -123,14 +123,10 @@ namespace Azure { namespace Perf { void ProcessStatsSampler::Reset() { - std::lock_guard lock(m_mutex); - m_startTime = std::chrono::steady_clock::now(); - m_baselineCpuSeconds = SampleCpuSeconds(); - m_haveBaseline = true; - m_lastCpuSeconds = m_baselineCpuSeconds; - m_sampleCount = 0; - m_memoryBytesSum = 0.0; - m_latest = Snapshot{}; + // Stop the sampler thread first so we can re-prime baselines without racing the + // Run() loop, which caches previous* in locals at thread start. + Stop(); + Start(); } #if defined(AZ_PLATFORM_WINDOWS) diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp index 1058cfb945..fba97bfef1 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp @@ -26,8 +26,7 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { * * @details `--download-method` chooses between: * - `buffer` (default, preserves existing behavior): allocate a contiguous buffer and - * call `DownloadTo(buffer, size)`. Guarded by a `size * parallel` memory-budget - * check. + * call `DownloadTo(buffer, size)`. * - `stream`: stream the response with `Download()` and drain its body stream without * materializing the payload in RAM. Use for multi-GiB sizes. * @@ -96,10 +95,12 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { auto& bodyStream = response.Value.BodyStream; if (bodyStream) { - uint8_t buffer[StreamDrainBufferSize]; + // Drain into a thread-local heap buffer; a stack buffer this large would + // overflow the default Windows thread stack (1 MiB) under high --parallel. + static thread_local std::vector buffer(StreamDrainBufferSize); while (true) { - auto read = bodyStream->Read(buffer, sizeof(buffer), context); + auto read = bodyStream->Read(buffer.data(), buffer.size(), context); if (read == 0) { break; From 74545576da5cb92e09bea9717dbe5683fa5f2e16 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sun, 28 Jun 2026 14:12:29 +1000 Subject: [PATCH 5/8] Perf: fix CI validations (ASCII, clang-format, cspell) - Replace em-dashes with -- in two comments - Add 'perfstress' to cspell dictionary (used in BenchmarkDotNet-compatible metric name to match .NET Azure.Test.Perf output) - Apply clang-format to perf framework files Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .vscode/cspell.json | 1 + sdk/core/perf/src/process_stats.cpp | 8 ++------ sdk/core/perf/src/program.cpp | 6 +++--- sdk/core/perf/src/result_output.cpp | 9 ++++----- sdk/core/perf/test/src/result_output_test.cpp | 2 +- .../inc/azure/storage/blobs/test/download_blob_test.hpp | 3 +-- 6 files changed, 12 insertions(+), 17 deletions(-) diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 4f298a27d7..042ece778f 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -226,6 +226,7 @@ "PCERT", "PBYTE", "pdbs", + "perfstress", "phoebusm", "Piotrowski", "pkcs", diff --git a/sdk/core/perf/src/process_stats.cpp b/sdk/core/perf/src/process_stats.cpp index fc7ff997e8..237b564f36 100644 --- a/sdk/core/perf/src/process_stats.cpp +++ b/sdk/core/perf/src/process_stats.cpp @@ -77,8 +77,7 @@ namespace Azure { namespace Perf { double cpuSeconds = SampleCpuSeconds(); uint64_t mem = SampleResidentMemoryBytes(); - double wall - = std::chrono::duration(now - previousTime).count(); + double wall = std::chrono::duration(now - previousTime).count(); double cpuDelta = cpuSeconds - previousCpuSeconds; // Clamp to avoid negative readings if a counter is non-monotonic on some platforms. double cpuPct = (wall > 0) ? (std::max)(0.0, (cpuDelta / wall) * 100.0) : 0.0; @@ -242,10 +241,7 @@ namespace Azure { namespace Perf { mach_task_basic_info info; mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT; if (task_info( - mach_task_self(), - MACH_TASK_BASIC_INFO, - reinterpret_cast(&info), - &count) + mach_task_self(), MACH_TASK_BASIC_INFO, reinterpret_cast(&info), &count) != KERN_SUCCESS) { return 0; diff --git a/sdk/core/perf/src/program.cpp b/sdk/core/perf/src/program.cpp index 9b0007e108..d7f6f1f3e9 100644 --- a/sdk/core/perf/src/program.cpp +++ b/sdk/core/perf/src/program.cpp @@ -383,7 +383,7 @@ inline void RunTests( if (s.Count > 0) { // Match the .NET Azure.Test.Perf latency distribution exactly: - // format string is `{percentile,7:N3}% {ms,8:N2}ms` — i.e., 7-char-wide + // format string is `{percentile,7:N3}% {ms,8:N2}ms` -- i.e., 7-char-wide // percentile with 3 decimals, then "% ", then 8-char-wide latency with // 2 decimals, then "ms". Reproduce the format here for byte-near parity. struct Row @@ -408,8 +408,8 @@ inline void RunTests( pctSs << std::fixed << std::setprecision(3) << row.Pct; std::ostringstream msSs; msSs << std::fixed << std::setprecision(2) << row.Ms; - std::cout << std::right << std::setw(7) << pctSs.str() << "% " - << std::right << std::setw(8) << msSs.str() << "ms" << std::endl; + std::cout << std::right << std::setw(7) << pctSs.str() << "% " << std::right + << std::setw(8) << msSs.str() << "ms" << std::endl; } } } diff --git a/sdk/core/perf/src/result_output.cpp b/sdk/core/perf/src/result_output.cpp index 344655bce9..9e21f6aee7 100644 --- a/sdk/core/perf/src/result_output.cpp +++ b/sdk/core/perf/src/result_output.cpp @@ -40,15 +40,15 @@ std::string IsoUtcNow() // system_clock typically has microsecond resolution on Windows; pad with trailing zeros. auto ticks = duration_cast>>(now - secs).count(); std::time_t tt = system_clock::to_time_t(secs); - std::tm tm {}; + std::tm tm{}; #if defined(_WIN32) gmtime_s(&tm, &tt); #else gmtime_r(&tt, &tm); #endif std::ostringstream os; - os << std::put_time(&tm, "%Y-%m-%dT%H:%M:%S") << "." << std::setw(7) << std::setfill('0') - << ticks << "Z"; + os << std::put_time(&tm, "%Y-%m-%dT%H:%M:%S") << "." << std::setw(7) << std::setfill('0') << ticks + << "Z"; return os.str(); } @@ -95,8 +95,7 @@ namespace Azure { namespace Perf { << "}],\"Measurements\":[{" << "\"Timestamp\":\"" << IsoUtcNow() << "\"," << "\"Name\":\"perfstress/throughput\"," - << "\"Value\":" << json(summary.OperationsPerSecond).dump() - << "}]}"; + << "\"Value\":" << json(summary.OperationsPerSecond).dump() << "}]}"; std::cout << "#StartJobStatistics" << std::endl; std::cout << os.str() << std::endl; std::cout << "#EndJobStatistics" << std::endl; diff --git a/sdk/core/perf/test/src/result_output_test.cpp b/sdk/core/perf/test/src/result_output_test.cpp index 1efd4e7750..95bac92b9f 100644 --- a/sdk/core/perf/test/src/result_output_test.cpp +++ b/sdk/core/perf/test/src/result_output_test.cpp @@ -80,7 +80,7 @@ TEST(result_output, print_job_statistics_matches_dotnet_envelope) EXPECT_NE(out.find("#StartJobStatistics"), std::string::npos); EXPECT_NE(out.find("#EndJobStatistics"), std::string::npos); // Match the .NET BenchmarkOutput envelope AND key order: - // { "Metadata": [...], "Measurements": [...] } — Metadata must appear before Measurements. + // { "Metadata": [...], "Measurements": [...] } -- Metadata must appear before Measurements. std::size_t metaPos = out.find("\"Metadata\""); std::size_t measPos = out.find("\"Measurements\""); EXPECT_NE(metaPos, std::string::npos); diff --git a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp index fba97bfef1..9686c5f4ee 100644 --- a/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp +++ b/sdk/storage/azure-storage-blobs/test/perf/inc/azure/storage/blobs/test/download_blob_test.hpp @@ -61,8 +61,7 @@ namespace Azure { namespace Storage { namespace Blobs { namespace Test { BlobsTest::Setup(); m_size = m_options.GetMandatoryOption("Size"); - m_downloadMethod - = m_options.GetOptionOrDefault("DownloadMethod", "buffer"); + m_downloadMethod = m_options.GetOptionOrDefault("DownloadMethod", "buffer"); m_blockSize = m_options.GetOptionOrDefault("BlockSize", 0); m_concurrency = m_options.GetOptionOrDefault("Concurrency", 0); From ce302f22ae52e7961dc7abbffad9b2e13a9f1832 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Tue, 30 Jun 2026 21:37:26 +1000 Subject: [PATCH 6/8] Address review feedback: drop --sync, --job-statistics alias, and CPU/memory sampler Per @jalauzon-msft review on PR #7201: - Remove --sync (parsed-and-ignored option had no behavior). PerfAutomation is updated separately to set NoSync=true for the Cpp language so it never appends --sync to test arguments. - Remove the --job-statistics bare-switch alias; keep --statistics <0|1> which is what perf-automation actually invokes. - Remove the CPU/memory sampler (ProcessStatsSampler) and the associated ' Memory(MiB)' / '% CPU' columns; perf-automation tracks the process itself, so per-run sampling in C++ added complexity without value. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/core/perf/CMakeLists.txt | 2 - sdk/core/perf/inc/azure/perf.hpp | 1 - sdk/core/perf/inc/azure/perf/options.hpp | 6 - .../perf/inc/azure/perf/process_stats.hpp | 107 -------- .../perf/inc/azure/perf/result_output.hpp | 3 - sdk/core/perf/src/arg_parser.cpp | 9 - sdk/core/perf/src/options.cpp | 11 +- sdk/core/perf/src/process_stats.cpp | 256 ------------------ sdk/core/perf/src/program.cpp | 49 +--- sdk/core/perf/test/CMakeLists.txt | 1 - sdk/core/perf/test/src/process_stats_test.cpp | 45 --- 11 files changed, 8 insertions(+), 482 deletions(-) delete mode 100644 sdk/core/perf/inc/azure/perf/process_stats.hpp delete mode 100644 sdk/core/perf/src/process_stats.cpp delete mode 100644 sdk/core/perf/test/src/process_stats_test.cpp diff --git a/sdk/core/perf/CMakeLists.txt b/sdk/core/perf/CMakeLists.txt index 7a0f358ebc..a138164474 100644 --- a/sdk/core/perf/CMakeLists.txt +++ b/sdk/core/perf/CMakeLists.txt @@ -20,7 +20,6 @@ set( inc/azure/perf/dynamic_test_options.hpp inc/azure/perf/latency_stats.hpp inc/azure/perf/options.hpp - inc/azure/perf/process_stats.hpp inc/azure/perf/program.hpp inc/azure/perf/random_stream.hpp inc/azure/perf/result_output.hpp @@ -35,7 +34,6 @@ set( src/base_test.cpp src/latency_stats.cpp src/options.cpp - src/process_stats.cpp src/program.cpp src/random_stream.cpp src/result_output.cpp diff --git a/sdk/core/perf/inc/azure/perf.hpp b/sdk/core/perf/inc/azure/perf.hpp index 12ee138464..038a1eedfd 100644 --- a/sdk/core/perf/inc/azure/perf.hpp +++ b/sdk/core/perf/inc/azure/perf.hpp @@ -14,7 +14,6 @@ #include "azure/perf/dynamic_test_options.hpp" #include "azure/perf/latency_stats.hpp" #include "azure/perf/options.hpp" -#include "azure/perf/process_stats.hpp" #include "azure/perf/program.hpp" #include "azure/perf/random_stream.hpp" #include "azure/perf/result_output.hpp" diff --git a/sdk/core/perf/inc/azure/perf/options.hpp b/sdk/core/perf/inc/azure/perf/options.hpp index 2629afd15c..e635b79998 100644 --- a/sdk/core/perf/inc/azure/perf/options.hpp +++ b/sdk/core/perf/inc/azure/perf/options.hpp @@ -124,12 +124,6 @@ namespace Azure { namespace Perf { */ std::string ResultsFile; - /** - * @brief Runs the sync version of the test. Not currently implemented for C++. - * - */ - bool Sync = false; - /** * @brief Create an array of the performance framework options. * diff --git a/sdk/core/perf/inc/azure/perf/process_stats.hpp b/sdk/core/perf/inc/azure/perf/process_stats.hpp deleted file mode 100644 index 440f0e9392..0000000000 --- a/sdk/core/perf/inc/azure/perf/process_stats.hpp +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -/** - * @file - * @brief Cross-platform CPU and resident-memory sampler for the perf framework. - * - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace Azure { namespace Perf { - - /** - * @brief Periodically samples process-wide CPU% and resident memory in a background - * thread. Snapshots the running average and the last instantaneous values. - * - * @remark Designed to match the always-on sampler added in the Go perf framework, so - * the live status line and the run summary expose `CPU` (percent) and `Memory(MiB)` - * columns across all language SDKs. - * - */ - class ProcessStatsSampler { - public: - /** - * @brief A point-in-time snapshot of CPU usage and resident memory. - * - */ - struct Snapshot - { - /// CPU percent of all cores combined, e.g. 250.0 means 2.5 cores busy. Never negative. - double CpuPercent = 0.0; - /// Resident memory in bytes (Working Set / RSS). - uint64_t MemoryBytes = 0; - }; - - /** - * @brief Construct a sampler with a fixed sample interval. - * - * @param interval Time between samples. Defaults to 1 second. - */ - explicit ProcessStatsSampler(std::chrono::milliseconds interval = std::chrono::seconds(1)); - - ~ProcessStatsSampler(); - - ProcessStatsSampler(ProcessStatsSampler const&) = delete; - ProcessStatsSampler& operator=(ProcessStatsSampler const&) = delete; - - /** - * @brief Start sampling in a background thread. Safe to call multiple times; later - * calls are no-ops while a sampler thread is running. - */ - void Start(); - - /** - * @brief Stop sampling. Joins the background thread. Safe to call multiple times. - */ - void Stop(); - - /** - * @brief Get the most recent sample (CPU percent, memory bytes). - * - */ - Snapshot Latest() const; - - /** - * @brief Get the average CPU% and average memory bytes across all samples taken so - * far. CPU% is computed from cumulative CPU-seconds against wall-clock seconds; memory - * is the arithmetic mean of all samples. - * - */ - Snapshot Average() const; - - /** - * @brief Reset all accumulated samples. Useful between iterations. - * - */ - void Reset(); - - private: - void Run(); - static double SampleCpuSeconds(); - static uint64_t SampleResidentMemoryBytes(); - - std::chrono::milliseconds m_interval; - std::atomic m_stop{false}; - std::thread m_thread; - - mutable std::mutex m_mutex; - // Sampling state - bool m_haveBaseline = false; - double m_baselineCpuSeconds = 0.0; - std::chrono::steady_clock::time_point m_startTime; - Snapshot m_latest; - // Running averages - uint64_t m_sampleCount = 0; - double m_memoryBytesSum = 0.0; - double m_lastCpuSeconds = 0.0; - }; - -}} // namespace Azure::Perf diff --git a/sdk/core/perf/inc/azure/perf/result_output.hpp b/sdk/core/perf/inc/azure/perf/result_output.hpp index c9e6978efb..7c78bffb89 100644 --- a/sdk/core/perf/inc/azure/perf/result_output.hpp +++ b/sdk/core/perf/inc/azure/perf/result_output.hpp @@ -10,7 +10,6 @@ #pragma once #include "azure/perf/latency_stats.hpp" -#include "azure/perf/process_stats.hpp" #include #include @@ -34,8 +33,6 @@ namespace Azure { namespace Perf { double WeightedAverageSeconds = 0; double OperationsPerSecond = 0; double SecondsPerOperation = 0; - double AverageCpuPercent = 0; - uint64_t AverageMemoryBytes = 0; LatencyCollector::Summary Latency; std::vector> LatencyByCallType; }; diff --git a/sdk/core/perf/src/arg_parser.cpp b/sdk/core/perf/src/arg_parser.cpp index a674e70f98..36ff216981 100644 --- a/sdk/core/perf/src/arg_parser.cpp +++ b/sdk/core/perf/src/arg_parser.cpp @@ -70,11 +70,6 @@ Azure::Perf::GlobalTestOptions Azure::Perf::Program::ArgParser::Parse( { options.JobStatistics = parsedArgs["JobStatistics"].as(); } - // .NET-compatible bare-switch alias --job-statistics; presence implies true. - if (parsedArgs["JobStatisticsSwitch"]) - { - options.JobStatistics = true; - } if (parsedArgs["Latency"]) { options.Latency = parsedArgs["Latency"].as(); @@ -121,10 +116,6 @@ Azure::Perf::GlobalTestOptions Azure::Perf::Program::ArgParser::Parse( { options.ResultsFile = parsedArgs["ResultsFile"].as(); } - if (parsedArgs["Sync"]) - { - options.Sync = true; - } return options; } diff --git a/sdk/core/perf/src/options.cpp b/sdk/core/perf/src/options.cpp index c666711faf..ecc239f544 100644 --- a/sdk/core/perf/src/options.cpp +++ b/sdk/core/perf/src/options.cpp @@ -18,8 +18,7 @@ void Azure::Perf::to_json(Azure::Core::Json::_internal::json& j, const GlobalTes {"Parallel", p.Parallel}, {"Warmup", p.Warmup}, {"StatusInterval", p.StatusInterval}, - {"ResultsFile", p.ResultsFile.empty() ? "N/A" : p.ResultsFile}, - {"Sync", p.Sync}}; + {"ResultsFile", p.ResultsFile.empty() ? "N/A" : p.ResultsFile}}; if (p.Port) { j["Port"] = p.Port.Value(); @@ -59,7 +58,6 @@ std::vector Azure::Perf::GlobalTestOptions::GetOptionMe [Option('p', "parallel", Default = 1, HelpText = "Number of operations to execute in parallel")] [Option("port", HelpText = "Port to redirect HTTP requests")] [Option('r', "rate", HelpText = "Target throughput (ops/sec)")] - [Option("sync", HelpText = "Runs sync version of test")] -- Not supported [Option('w', "warmup", Default = 5, HelpText = "Duration of warmup in seconds")] [Option('x', "proxy", Default = "", HelpText = "Proxy server")] */ @@ -76,12 +74,6 @@ std::vector Azure::Perf::GlobalTestOptions::GetOptionMe "Number of iterations of main test loop. Default to 1.", 1}, {"JobStatistics", {"--statistics"}, "Print job statistics. Default to false", 1}, - // .NET-compatible bare-switch alias for --statistics. When present, sets - // JobStatistics=true regardless of any --statistics value parsed. - {"JobStatisticsSwitch", - {"--job-statistics"}, - "Print job statistics (bare switch, matches .NET --job-statistics).", - 0}, {"Latency", {"-l", "--latency"}, "Track and print per-operation latency statistics. Default to false.", @@ -99,7 +91,6 @@ std::vector Azure::Perf::GlobalTestOptions::GetOptionMe {"Port", {"--port"}, "Port to redirect HTTP requests. Default to no redirection.", 1}, {"Rate", {"-r", "--rate"}, "Target throughput (ops/sec). Default to no throughput.", 1}, - {"Sync", {"-y", "--sync"}, "Runs sync version of test, not implemented", 0}, {"TestProxies", {"-x", "--test-proxies"}, "URIs of TestProxy Servers (separated by ';')", 1}, {"Warmup", {"-w", "--warmup"}, "Duration of warmup in seconds. Default to 5 seconds.", 1}, {"StatusInterval", diff --git a/sdk/core/perf/src/process_stats.cpp b/sdk/core/perf/src/process_stats.cpp deleted file mode 100644 index 237b564f36..0000000000 --- a/sdk/core/perf/src/process_stats.cpp +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#include "azure/perf/process_stats.hpp" - -#include - -#if defined(AZ_PLATFORM_WINDOWS) -#if !defined(WIN32_LEAN_AND_MEAN) -#define WIN32_LEAN_AND_MEAN -#endif -#if !defined(NOMINMAX) -#define NOMINMAX -#endif -#include -// psapi.h must follow windows.h -#include -#elif defined(AZ_PLATFORM_LINUX) -#include -#include -#include -#include -#elif defined(AZ_PLATFORM_MAC) -#include -#include -#endif - -#include - -namespace Azure { namespace Perf { - - ProcessStatsSampler::ProcessStatsSampler(std::chrono::milliseconds interval) - : m_interval(interval) - { - } - - ProcessStatsSampler::~ProcessStatsSampler() { Stop(); } - - void ProcessStatsSampler::Start() - { - if (m_thread.joinable()) - { - return; - } - m_stop.store(false); - { - std::lock_guard lock(m_mutex); - m_startTime = std::chrono::steady_clock::now(); - m_baselineCpuSeconds = SampleCpuSeconds(); - m_haveBaseline = true; - m_lastCpuSeconds = m_baselineCpuSeconds; - m_sampleCount = 0; - m_memoryBytesSum = 0.0; - m_latest = Snapshot{}; - } - m_thread = std::thread(&ProcessStatsSampler::Run, this); - } - - void ProcessStatsSampler::Stop() - { - if (!m_thread.joinable()) - { - return; - } - m_stop.store(true); - m_thread.join(); - } - - void ProcessStatsSampler::Run() - { - auto previousCpuSeconds = m_baselineCpuSeconds; - auto previousTime = m_startTime; - while (!m_stop.load()) - { - std::this_thread::sleep_for(m_interval); - auto now = std::chrono::steady_clock::now(); - double cpuSeconds = SampleCpuSeconds(); - uint64_t mem = SampleResidentMemoryBytes(); - - double wall = std::chrono::duration(now - previousTime).count(); - double cpuDelta = cpuSeconds - previousCpuSeconds; - // Clamp to avoid negative readings if a counter is non-monotonic on some platforms. - double cpuPct = (wall > 0) ? (std::max)(0.0, (cpuDelta / wall) * 100.0) : 0.0; - - { - std::lock_guard lock(m_mutex); - m_latest.CpuPercent = cpuPct; - m_latest.MemoryBytes = mem; - m_sampleCount += 1; - m_memoryBytesSum += static_cast(mem); - m_lastCpuSeconds = cpuSeconds; - } - previousCpuSeconds = cpuSeconds; - previousTime = now; - } - } - - ProcessStatsSampler::Snapshot ProcessStatsSampler::Latest() const - { - std::lock_guard lock(m_mutex); - return m_latest; - } - - ProcessStatsSampler::Snapshot ProcessStatsSampler::Average() const - { - std::lock_guard lock(m_mutex); - Snapshot avg; - auto now = std::chrono::steady_clock::now(); - double wall = std::chrono::duration(now - m_startTime).count(); - if (m_haveBaseline && wall > 0) - { - double cpuDelta = m_lastCpuSeconds - m_baselineCpuSeconds; - avg.CpuPercent = (std::max)(0.0, (cpuDelta / wall) * 100.0); - } - if (m_sampleCount > 0) - { - avg.MemoryBytes - = static_cast(m_memoryBytesSum / static_cast(m_sampleCount)); - } - return avg; - } - - void ProcessStatsSampler::Reset() - { - // Stop the sampler thread first so we can re-prime baselines without racing the - // Run() loop, which caches previous* in locals at thread start. - Stop(); - Start(); - } - -#if defined(AZ_PLATFORM_WINDOWS) - double ProcessStatsSampler::SampleCpuSeconds() - { - FILETIME creation, exitTime, kernel, user; - if (!GetProcessTimes(GetCurrentProcess(), &creation, &exitTime, &kernel, &user)) - { - return 0.0; - } - auto toSeconds = [](FILETIME const& ft) { - ULARGE_INTEGER u; - u.LowPart = ft.dwLowDateTime; - u.HighPart = ft.dwHighDateTime; - // FILETIME is in 100-ns units. - return static_cast(u.QuadPart) / 1.0e7; - }; - return toSeconds(kernel) + toSeconds(user); - } - - uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() - { - PROCESS_MEMORY_COUNTERS pmc; - if (!GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc))) - { - return 0; - } - return static_cast(pmc.WorkingSetSize); - } -#elif defined(AZ_PLATFORM_LINUX) - double ProcessStatsSampler::SampleCpuSeconds() - { - std::ifstream stat("/proc/self/stat"); - if (!stat.is_open()) - { - return 0.0; - } - std::string content; - std::getline(stat, content); - // /proc/self/stat fields are space-separated, but the second field (comm) may contain - // spaces and is wrapped in parentheses. Skip past it before splitting. - auto rp = content.rfind(')'); - if (rp == std::string::npos) - { - return 0.0; - } - std::istringstream iss(content.substr(rp + 1)); - std::string token; - // After the ')', the next field is field 3 ('state'); CPU times are fields 14 (utime) - // and 15 (stime), i.e. tokens 12 and 13 (0-indexed) of the remainder. - unsigned long utime = 0, stime = 0; - for (int i = 0; i < 14; ++i) - { - if (!(iss >> token)) - { - return 0.0; - } - if (i == 11) - { - utime = std::stoul(token); - } - else if (i == 12) - { - stime = std::stoul(token); - } - } - long hz = sysconf(_SC_CLK_TCK); - if (hz <= 0) - { - hz = 100; - } - return static_cast(utime + stime) / static_cast(hz); - } - - uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() - { - std::ifstream status("/proc/self/status"); - if (!status.is_open()) - { - return 0; - } - std::string line; - while (std::getline(status, line)) - { - if (line.rfind("VmRSS:", 0) == 0) - { - std::istringstream iss(line.substr(6)); - unsigned long kb = 0; - std::string unit; - iss >> kb >> unit; - return static_cast(kb) * 1024ULL; - } - } - return 0; - } -#elif defined(AZ_PLATFORM_MAC) - double ProcessStatsSampler::SampleCpuSeconds() - { - struct rusage ru; - if (getrusage(RUSAGE_SELF, &ru) != 0) - { - return 0.0; - } - double user = static_cast(ru.ru_utime.tv_sec) - + static_cast(ru.ru_utime.tv_usec) / 1.0e6; - double sys = static_cast(ru.ru_stime.tv_sec) - + static_cast(ru.ru_stime.tv_usec) / 1.0e6; - return user + sys; - } - - uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() - { - mach_task_basic_info info; - mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT; - if (task_info( - mach_task_self(), MACH_TASK_BASIC_INFO, reinterpret_cast(&info), &count) - != KERN_SUCCESS) - { - return 0; - } - return static_cast(info.resident_size); - } -#else - double ProcessStatsSampler::SampleCpuSeconds() { return 0.0; } - uint64_t ProcessStatsSampler::SampleResidentMemoryBytes() { return 0; } -#endif - -}} // namespace Azure::Perf diff --git a/sdk/core/perf/src/program.cpp b/sdk/core/perf/src/program.cpp index d7f6f1f3e9..06f0132ca1 100644 --- a/sdk/core/perf/src/program.cpp +++ b/sdk/core/perf/src/program.cpp @@ -5,7 +5,6 @@ #include "azure/perf/argagg.hpp" #include "azure/perf/latency_stats.hpp" -#include "azure/perf/process_stats.hpp" #include "azure/perf/result_output.hpp" #include @@ -240,7 +239,6 @@ inline void RunTests( std::vector> const& tests, Azure::Perf::GlobalTestOptions const& options, std::string const& title, - Azure::Perf::ProcessStatsSampler* sampler, Azure::Perf::LatencyCollector* latencyCollector, Azure::Perf::RunSummary* outSummary, bool warmup = false) @@ -258,10 +256,6 @@ inline void RunTests( { latencyCollector->Reset(); } - if (!warmup && sampler != nullptr) - { - sampler->Reset(); - } /********************* Progress Reporter ******************************/ Azure::Core::Context progressToken; @@ -272,11 +266,10 @@ inline void RunTests( &lastCompletionTimes, &lastCompleted, &progressToken, - sampler, statusInterval]() { std::cout << std::endl << "=== " << title << " ===" << std::endl - << "Current\t\tTotal\t\tAverage\t\tCPU\t\tMemory(MiB)" << std::endl; + << "Current\t\tTotal\t\tAverage" << std::endl; while (!progressToken.IsCancelled()) { std::this_thread::sleep_for(std::chrono::seconds(statusInterval)); @@ -284,16 +277,7 @@ inline void RunTests( auto current = total - lastCompleted; auto avg = Sum(ZipAvg(completedOperations, lastCompletionTimes)); lastCompleted = total; - double cpuPct = 0; - double memMiB = 0; - if (sampler != nullptr) - { - auto snap = sampler->Latest(); - cpuPct = snap.CpuPercent; - memMiB = static_cast(snap.MemoryBytes) / (1024.0 * 1024.0); - } - std::cout << current << "\t\t" << total << "\t\t" << avg << "\t\t" << cpuPct << "\t\t" - << memMiB << std::endl; + std::cout << current << "\t\t" << total << "\t\t" << avg << std::endl; } }); @@ -346,20 +330,14 @@ inline void RunTests( auto secondsPerOperation = 1 / operationsPerSecond; auto weightedAverageSeconds = totalOperations / operationsPerSecond; - // Append `, NN.NN% CPU` inside the parens to match the .NET results-line format that - // perf-automation downstream parsers may key on. The leading `(...) ops/s` substring is - // preserved verbatim so Cpp.cs's existing ops/s regex still matches. - double resultsCpuPercent = 0; - if (sampler != nullptr) - { - resultsCpuPercent = sampler->Average().CpuPercent; - } + // Match the established `Completed N operations in a weighted-average of Ts (X ops/s, + // Y s/op)` line format that downstream tools (Cpp.cs's ops/s regex) key off. std::cout << std::endl << "Completed " << FormatNumber(totalOperations, false) << " operations in a weighted-average of " << FormatNumber(weightedAverageSeconds, false) << "s (" - << FormatNumber(operationsPerSecond) << " ops/s, " << secondsPerOperation << " s/op, " - << resultsCpuPercent << "% CPU)" << std::endl + << FormatNumber(operationsPerSecond) << " ops/s, " << secondsPerOperation << " s/op)" + << std::endl << std::endl; if (!warmup && outSummary != nullptr) @@ -368,12 +346,6 @@ inline void RunTests( outSummary->OperationsPerSecond = operationsPerSecond; outSummary->SecondsPerOperation = secondsPerOperation; outSummary->WeightedAverageSeconds = weightedAverageSeconds; - if (sampler != nullptr) - { - auto avg = sampler->Average(); - outSummary->AverageCpuPercent = avg.CpuPercent; - outSummary->AverageMemoryBytes = avg.MemoryBytes; - } if (recordLatency && latencyCollector != nullptr) { outSummary->Latency = latencyCollector->Summarize(); @@ -485,10 +457,6 @@ void Azure::Perf::Program::Run( } } - /******************** Always-on CPU/memory sampler ****************/ - Azure::Perf::ProcessStatsSampler sampler; - sampler.Start(); - /******************** Per-run latency collector (when --latency) ****************/ Azure::Perf::LatencyCollector latencyCollector; @@ -536,7 +504,7 @@ void Azure::Perf::Program::Run( /******************** WarmUp ******************************/ if (options.Warmup) { - RunTests(context, parallelTest, options, "Warmup", &sampler, nullptr, nullptr, true); + RunTests(context, parallelTest, options, "Warmup", nullptr, nullptr, true); } /******************** Tests ******************************/ @@ -558,14 +526,11 @@ void Azure::Perf::Program::Run( parallelTest, options, "Test" + iterationInfo, - &sampler, options.Latency ? &latencyCollector : nullptr, &finalSummary); } /******************** End-of-run artifacts ************************/ - sampler.Stop(); - if (options.Latency && !options.ResultsFile.empty()) { // Match the .NET `--results-file` shape: an array of OperationResult { Time, Size }. diff --git a/sdk/core/perf/test/CMakeLists.txt b/sdk/core/perf/test/CMakeLists.txt index a9120a2079..d764cfefc2 100644 --- a/sdk/core/perf/test/CMakeLists.txt +++ b/sdk/core/perf/test/CMakeLists.txt @@ -20,7 +20,6 @@ include(GoogleTest) add_executable ( azure-perf-unit-test src/latency_stats_test.cpp - src/process_stats_test.cpp src/random_stream_test.cpp src/result_output_test.cpp ) diff --git a/sdk/core/perf/test/src/process_stats_test.cpp b/sdk/core/perf/test/src/process_stats_test.cpp deleted file mode 100644 index 37180b3eb1..0000000000 --- a/sdk/core/perf/test/src/process_stats_test.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -#include - -#include -#include - -#include - -using Azure::Perf::ProcessStatsSampler; - -TEST(process_stats, start_stop) -{ - ProcessStatsSampler s(std::chrono::milliseconds(50)); - s.Start(); - // Burn a little CPU so there is something to sample. - auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(250); - volatile uint64_t x = 0; - while (std::chrono::steady_clock::now() < deadline) - { - for (int i = 0; i < 10000; ++i) - { - x += i; - } - } - s.Stop(); - auto avg = s.Average(); - // CPU and memory must be non-negative; we cannot assert tighter bounds in CI. - EXPECT_GE(avg.CpuPercent, 0.0); - // MemoryBytes is unsigned; just sanity-check accessor. - (void)avg.MemoryBytes; -} - -TEST(process_stats, reset_clears) -{ - ProcessStatsSampler s(std::chrono::milliseconds(50)); - s.Start(); - std::this_thread::sleep_for(std::chrono::milliseconds(150)); - s.Stop(); - s.Reset(); - auto avg = s.Average(); - EXPECT_DOUBLE_EQ(avg.CpuPercent, 0.0); - EXPECT_EQ(avg.MemoryBytes, 0u); -} From d15a4158ebb410d8ef9fe95f18198ce76724e908 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Tue, 30 Jun 2026 21:43:01 +1000 Subject: [PATCH 7/8] perf: accept --sync as a no-op flag for cross-language CLI compatibility PerfAutomation appends '--sync' to test runs for sync-only languages. C++ has no async variant, but the driver still passes --sync, so the perf binary must accept it. Register --sync as a bare switch that is parsed and intentionally ignored, with no corresponding Sync field on GlobalTestOptions (so it doesn't show up in the JSON options dump or anywhere else). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/core/perf/src/options.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/core/perf/src/options.cpp b/sdk/core/perf/src/options.cpp index ecc239f544..c80ee0c27e 100644 --- a/sdk/core/perf/src/options.cpp +++ b/sdk/core/perf/src/options.cpp @@ -91,6 +91,10 @@ std::vector Azure::Perf::GlobalTestOptions::GetOptionMe {"Port", {"--port"}, "Port to redirect HTTP requests. Default to no redirection.", 1}, {"Rate", {"-r", "--rate"}, "Target throughput (ops/sec). Default to no throughput.", 1}, + // Accepted for cross-language CLI compatibility (perf-automation appends --sync for + // sync-only languages). C++ is sync-only and has no async variant, so the flag is + // parsed and intentionally ignored. + {"Sync", {"-y", "--sync"}, "Accepted for compatibility; ignored (C++ is sync-only).", 0}, {"TestProxies", {"-x", "--test-proxies"}, "URIs of TestProxy Servers (separated by ';')", 1}, {"Warmup", {"-w", "--warmup"}, "Duration of warmup in seconds. Default to 5 seconds.", 1}, {"StatusInterval", From b8b54c9b05b49839566c956bb171e58ddcdc2ae0 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Tue, 30 Jun 2026 23:22:49 +1000 Subject: [PATCH 8/8] Revert perf-tests.yml additions to avoid confusion The new --upload-method/--download-method/--block-size/--concurrency/--page-size flags are available on the binaries but should be tuned per host, not baked into the CI matrix. Keep perf-tests.yml at the pre-PR baseline. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/storage/azure-storage-blobs/perf-tests.yml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/sdk/storage/azure-storage-blobs/perf-tests.yml b/sdk/storage/azure-storage-blobs/perf-tests.yml index 2113439fe3..0fd2a358b7 100644 --- a/sdk/storage/azure-storage-blobs/perf-tests.yml +++ b/sdk/storage/azure-storage-blobs/perf-tests.yml @@ -20,11 +20,6 @@ Tests: - --size 10485760 --parallel 32 --token-credential - --size 1073741824 --parallel 1 --warmup 60 --duration 60 --token-credential - --size 1073741824 --parallel 8 --warmup 60 --duration 60 --token-credential - # New: streaming download for very large payloads (no contiguous buffer) - - --size 5368709120 --parallel 1 --warmup 60 --duration 60 --download-method stream - - --size 5368709120 --parallel 4 --warmup 60 --duration 60 --download-method stream - # New: tune chunk size / concurrency for buffer-mode download - - --size 1073741824 --parallel 4 --warmup 60 --duration 60 --block-size 16777216 --concurrency 16 - Test: upload Class: UploadBlob @@ -37,13 +32,6 @@ Tests: - --size 10485760 --parallel 32 --token-credential - --size 1073741824 --parallel 1 --warmup 60 --duration 60 --token-credential - --size 1073741824 --parallel 8 --warmup 60 --duration 60 --token-credential - # New: single-shot upload of a buffered payload - - --size 10485760 --parallel 32 --upload-method single - # New: streaming upload for very large payloads (no contiguous buffer) - - --size 5368709120 --parallel 1 --warmup 60 --duration 60 --upload-method stream - - --size 5368709120 --parallel 4 --warmup 60 --duration 60 --upload-method stream - # New: tune chunk size / concurrency for buffer-mode upload - - --size 1073741824 --parallel 4 --warmup 60 --duration 60 --block-size 16777216 --concurrency 16 - Test: list-blobs Class: ListBlob @@ -54,6 +42,3 @@ Tests: - --count 5 --parallel 64 --token-credential - --count 500 --parallel 32 --token-credential - --count 50000 --parallel 32 --warmup 60 --duration 60 --token-credential - # New: --num-blobs is the canonical alias of --count; exercise --page-size - - --num-blobs 50000 --parallel 16 --warmup 60 --duration 60 --page-size 1000 - - --num-blobs 50000 --parallel 16 --warmup 60 --duration 60 --page-size 5000