diff --git a/src/iceberg/logging/cerr_logger.cc b/src/iceberg/logging/cerr_logger.cc index 0856d963f..4e4c98fcf 100644 --- a/src/iceberg/logging/cerr_logger.cc +++ b/src/iceberg/logging/cerr_logger.cc @@ -39,20 +39,20 @@ std::string_view Basename(std::string_view path) noexcept { } /// \brief Format a record into a single newline-terminated line. -std::string FormatLine(const LogMessage& message) { +std::string FormatLine(const LogRecord& record) { auto now = std::chrono::floor(std::chrono::system_clock::now()); return std::format("{:%Y-%m-%dT%H:%M:%S}Z {} [{}] [{}:{}] {}\n", now, - ToString(message.level), OsThreadId(), - Basename(message.location.file_name()), message.location.line(), - message.message); + ToString(record.level), OsThreadId(), + Basename(record.location.file_name()), record.location.line(), + record.message); } } // namespace -void CerrLogger::Log(LogMessage&& message) noexcept { +void CerrLogger::Log(const LogRecord& record) noexcept { try { - std::string line = FormatLine(message); + std::string line = FormatLine(record); std::lock_guard lock(mutex_); std::cerr << line; } catch (...) { diff --git a/src/iceberg/logging/cerr_logger.h b/src/iceberg/logging/cerr_logger.h index 334f20391..e9a023349 100644 --- a/src/iceberg/logging/cerr_logger.h +++ b/src/iceberg/logging/cerr_logger.h @@ -44,7 +44,7 @@ class ICEBERG_EXPORT CerrLogger : public Logger { bool ShouldLog(LogLevel level) const noexcept override { return level >= level_.load(std::memory_order_relaxed); } - void Log(LogMessage&& message) noexcept override; + void Log(const LogRecord& record) noexcept override; void SetLevel(LogLevel level) noexcept override { level_.store(level, std::memory_order_relaxed); } diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index a8db67ca9..f19c878bc 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include @@ -36,7 +38,7 @@ namespace { class NoopLogger final : public Logger { public: bool ShouldLog(LogLevel /*level*/) const noexcept override { return false; } - void Log(LogMessage&& /*message*/) noexcept override {} + void Log(const LogRecord& /*record*/) noexcept override {} void SetLevel(LogLevel /*level*/) noexcept override {} LogLevel level() const noexcept override { return LogLevel::kOff; } bool IsNoop() const override { return true; } @@ -178,22 +180,25 @@ void RestoreThreadOverride(std::shared_ptr prev) noexcept { cache->override_ = std::move(prev); } +std::string& FormatBuffer() noexcept { + static thread_local std::string buf; + return buf; +} + void Emit(Logger& logger, LogLevel level, const std::source_location& location, - std::string&& message) { - logger.Log(LogMessage{.level = level, - .message = std::move(message), - .location = location, - .attributes = {}}); + std::string_view message) { + logger.Log(LogRecord{ + .level = level, .message = message, .location = location, .attributes = {}}); } void EmitFormatError(Logger& logger, LogLevel level, const std::source_location& location) noexcept { - // Fixed short literal (<= 15 bytes, fits SSO on libstdc++/libc++/MSVC -> no heap - // allocation), no std::format, no retry. Cannot throw or recurse. - logger.Log(LogMessage{.level = level, - .message = std::string(""), - .location = location, - .attributes = {}}); + // Fixed string-view literal: no std::format, no allocation, no buffer -- safe to + // emit even under OOM. No retry, cannot throw or recurse. + logger.Log(LogRecord{.level = level, + .message = std::string_view(""), + .location = location, + .attributes = {}}); } } // namespace internal diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 66f81501a..37a4a33d9 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -29,8 +29,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -54,13 +56,26 @@ struct ICEBERG_EXPORT LogAttribute { std::string value; }; -/// \brief A single log record handed to a Logger. +/// \brief A non-owning view of one log record, as handed to Logger::Log(). /// -/// The formatted message is owned (moved in by the logging macros), so a sink -/// may safely retain the record beyond the Log() call. The member set must not -/// depend on the build's logging backend (the spdlog backend never appears here). -/// Use LogMessage::Builder for a readable way to assemble one, especially with -/// structured attributes. +/// `message` and `attributes` are borrowed and valid ONLY for the duration of the +/// Log() call -- they typically reference a reused per-thread format buffer. A sink +/// that consumes the record synchronously (the in-tree CerrLogger/SpdLogger) uses +/// them directly with no allocation; a sink that RETAINS the record (async, or the +/// test CapturingLogger) must deep-copy into owned storage (e.g. a LogMessage). +struct ICEBERG_EXPORT LogRecord { + LogLevel level = LogLevel::kOff; + std::string_view message; + std::source_location location = std::source_location::current(); + std::span attributes; +}; + +/// \brief An owned log record: the result of LogMessage::Builder and the type a +/// retaining sink copies into. Converts implicitly to a (non-owning) LogRecord +/// that views this object's storage, so `logger->Log(builder.Build())` works. +/// +/// The member set must not depend on the build's logging backend (the spdlog +/// backend never appears here). struct ICEBERG_EXPORT LogMessage { LogLevel level = LogLevel::kOff; std::string message; @@ -68,6 +83,17 @@ struct ICEBERG_EXPORT LogMessage { std::vector attributes; class Builder; + + /// \brief View this owned record as a LogRecord (no copy). The view is valid as + /// long as this LogMessage lives. + LogRecord AsRecord() const noexcept { + return LogRecord{.level = level, + .message = message, + .location = location, + .attributes = attributes}; + } + // NOLINTNEXTLINE(google-explicit-constructor): implicit so logger->Log(msg) works + operator LogRecord() const noexcept { return AsRecord(); } }; /// \brief Fluent builder for LogMessage, the easy path to attach structured @@ -165,8 +191,12 @@ class ICEBERG_EXPORT Logger { /// \brief Cheap check whether a record at \p level would be emitted. virtual bool ShouldLog(LogLevel level) const noexcept = 0; - /// \brief Emit one (already-formatted) record, taking ownership. Must not throw. - virtual void Log(LogMessage&& message) noexcept = 0; + /// \brief Emit one (already-formatted) record. Must not throw. + /// + /// The record's `message`/`attributes` are non-owning views valid only for this + /// call; a sink that retains the record must deep-copy them. An owned LogMessage + /// converts implicitly, so `logger->Log(builder.Build())` still works. + virtual void Log(const LogRecord& record) noexcept = 0; /// \brief Set the minimum level this logger emits. virtual void SetLevel(LogLevel level) noexcept = 0; @@ -283,12 +313,22 @@ namespace internal { /// statement. ICEBERG_EXPORT const std::shared_ptr& CurrentLogger() noexcept; -/// \brief Build a LogMessage from the already-formatted text and dispatch it. +/// \brief Wrap the already-formatted text in a LogRecord view and dispatch it. /// -/// Declared ICEBERG_EXPORT because the logging macros expand into this call in -/// consumer translation units. +/// \p message is borrowed (typically the per-thread FormatBuffer) and must outlive +/// the call. Declared ICEBERG_EXPORT because the logging macros expand into this +/// call in consumer translation units. ICEBERG_EXPORT void Emit(Logger& logger, LogLevel level, - const std::source_location& location, std::string&& message); + const std::source_location& location, std::string_view message); + +/// \brief A reused per-thread scratch buffer for formatting a log line. +/// +/// One std::string per thread, cleared (capacity retained) before each format, so +/// repeated logging on a thread does no heap allocation after the buffer warms up. +/// Holds no Logger references, so it is independent of the CurrentLogger teardown +/// machinery. Not reentrant: a sink must not log from within Log() (already UB per +/// the Logger no-reentrancy contract) or it would clobber a format in progress. +ICEBERG_EXPORT std::string& FormatBuffer() noexcept; /// \brief Emit a fixed fallback record when formatting threw. /// @@ -337,9 +377,14 @@ template void FormatAndEmit(Logger& logger, LogLevel level, const std::source_location& loc, std::format_string fmt, Args&&... args) noexcept { if (!logger.ShouldLog(level)) return; + std::string& buf = FormatBuffer(); + buf.clear(); // retains capacity -> no heap allocation after warmup try { - Emit(logger, level, loc, std::format(fmt, std::forward(args)...)); - } catch (...) { + // format_to with a std::format_string keeps compile-time format checking; the + // result lives in the reused buffer and is passed to Emit as a view. + std::format_to(std::back_inserter(buf), fmt, std::forward(args)...); + Emit(logger, level, loc, buf); + } catch (const std::exception&) { EmitFormatError(logger, level, loc); } } diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index e4d85bc98..49a35504c 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -108,6 +108,10 @@ add_iceberg_test(logging_test log_level_test.cc logger_test.cc) +# Separate binary: replaces global operator new to count allocations, so it must +# not be linked with other tests. +add_iceberg_test(logging_zero_alloc_test SOURCES logging_zero_alloc_test.cc) + add_iceberg_test(expression_test SOURCES aggregate_test.cc diff --git a/src/iceberg/test/logging_test_helpers.h b/src/iceberg/test/logging_test_helpers.h index f3999195e..596b76594 100644 --- a/src/iceberg/test/logging_test_helpers.h +++ b/src/iceberg/test/logging_test_helpers.h @@ -36,9 +36,15 @@ class CapturingLogger : public Logger { return level >= level_.load(std::memory_order_relaxed); } - void Log(LogMessage&& message) noexcept override { + // Retaining sink: deep-copy the non-owning view into an owned LogMessage so the + // record outlives the Log() call and tests can inspect it. + void Log(const LogRecord& record) noexcept override { std::lock_guard lock(mutex_); - records_.push_back(std::move(message)); + records_.push_back( + LogMessage{.level = record.level, + .message = std::string(record.message), + .location = record.location, + .attributes = {record.attributes.begin(), record.attributes.end()}}); } void SetLevel(LogLevel level) noexcept override { diff --git a/src/iceberg/test/logging_zero_alloc_test.cc b/src/iceberg/test/logging_zero_alloc_test.cc new file mode 100644 index 000000000..9fec1bc8e --- /dev/null +++ b/src/iceberg/test/logging_zero_alloc_test.cc @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Proves the format -> dispatch path allocates no heap memory after the per-thread +// FormatBuffer warms up. Lives in its own test binary because it replaces the +// global operator new/delete with an allocation counter; the counter is only armed +// inside the measured window so the rest of the program is unaffected. + +#include +#include +#include +#include +#include +#include + +#include + +#include "iceberg/logging/log_level.h" +#include "iceberg/logging/logger.h" +#include "iceberg/test/logging_test_helpers.h" + +namespace { + +std::atomic g_armed{false}; +std::atomic g_allocs{0}; + +void Count() noexcept { + if (g_armed.load(std::memory_order_relaxed)) { + g_allocs.fetch_add(1, std::memory_order_relaxed); + } +} + +} // namespace + +// Global allocation-function replacement (program-wide, single definition). +void* operator new(std::size_t n) { + Count(); + void* p = std::malloc(n != 0 ? n : 1); + if (p == nullptr) throw std::bad_alloc(); + return p; +} +void* operator new[](std::size_t n) { + Count(); + void* p = std::malloc(n != 0 ? n : 1); + if (p == nullptr) throw std::bad_alloc(); + return p; +} +void operator delete(void* p) noexcept { std::free(p); } +void operator delete[](void* p) noexcept { std::free(p); } +void operator delete(void* p, std::size_t) noexcept { std::free(p); } +void operator delete[](void* p, std::size_t) noexcept { std::free(p); } + +namespace iceberg { + +namespace { + +/// \brief Synchronous sink that consumes the record without allocating: it reads +/// the message view's size and discards it. +class CountingSink : public Logger { + public: + bool ShouldLog(LogLevel /*level*/) const noexcept override { return true; } + void Log(const LogRecord& record) noexcept override { total_ += record.message.size(); } + void SetLevel(LogLevel /*level*/) noexcept override {} + LogLevel level() const noexcept override { return LogLevel::kTrace; } + std::size_t total() const { return total_; } + + private: + std::size_t total_ = 0; +}; + +} // namespace + +// After the thread-local FormatBuffer has grown, formatting + dispatching a +// same-size message must perform zero heap allocations. +TEST(LoggingZeroAllocTest, FormatDispatchPathDoesNotAllocateAfterWarmup) { + auto sink = std::make_shared(); + ScopedDefaultLogger guard(sink); + const std::string payload(256, 'x'); // well past SSO + + // Warm up: first format grows FormatBuffer to >= payload size (this allocates). + Log(LogLevel::kInfo, "{}", payload); + + g_allocs.store(0, std::memory_order_relaxed); + g_armed.store(true, std::memory_order_relaxed); + Log(LogLevel::kInfo, "{}", payload); // reuses the buffer -> no allocation + g_armed.store(false, std::memory_order_relaxed); + + EXPECT_EQ(g_allocs.load(std::memory_order_relaxed), 0u); + EXPECT_GT(sink->total(), 0u); // the record actually reached the sink +} + +// Negative control: the counter actually observes heap allocations, so the zero +// above is meaningful and not a dead probe. +TEST(LoggingZeroAllocTest, CounterDetectsAllocation) { + g_allocs.store(0, std::memory_order_relaxed); + g_armed.store(true, std::memory_order_relaxed); + std::string forced(256, 'y'); // a fresh >SSO string must hit the heap + volatile char escape = forced[0]; + (void)escape; + g_armed.store(false, std::memory_order_relaxed); + + EXPECT_GT(g_allocs.load(std::memory_order_relaxed), 0u); +} + +} // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index ba0f6230d..1b7d115a7 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -68,6 +68,8 @@ iceberg_tests = { 'logger_test.cc', ), }, + # Separate binary: replaces global operator new to count allocations. + 'logging_zero_alloc_test': {'sources': files('logging_zero_alloc_test.cc')}, 'expression_test': { 'sources': files( 'aggregate_test.cc',