From 5b1a757304d93f06b8ec88050ff71cd247be3f3b Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Tue, 2 Jun 2026 16:49:41 +0900 Subject: [PATCH 1/2] Fix mismatched sINT/dINT log field types Seven boolean/counter fields were declared dINT, but their marshal functions write a single int, and proxy_protocol_version (ppv) was declared dINT while it actually marshals a string. The dINT type wrongly excludes these fields from log filters and aggregates, and the ppv mislabeling misrepresents variable-length string bytes as two fixed ints to any type-driven consumer. Retype the single-int fields as sINT and ppv as STRING so the declared type matches what each marshal function emits. --- include/proxy/logging/LogField.h | 8 ++++---- src/proxy/logging/Log.cc | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/proxy/logging/LogField.h b/include/proxy/logging/LogField.h index d69f675034f..27a1fb6b209 100644 --- a/include/proxy/logging/LogField.h +++ b/include/proxy/logging/LogField.h @@ -96,10 +96,10 @@ class LogField using VarUnmarshalFunc = std::variant; enum Type { - sINT = 0, - dINT, - STRING, - IP, ///< IP Address. + sINT = 0, ///< Single INT + dINT, ///< Double INT + STRING, ///< String + IP, ///< IP Address N_TYPES }; diff --git a/src/proxy/logging/Log.cc b/src/proxy/logging/Log.cc index 9a0ebd1eb11..2176a62dfc6 100644 --- a/src/proxy/logging/Log.cc +++ b/src/proxy/logging/Log.cc @@ -538,17 +538,17 @@ Log::init_fields() global_field_list.add(field, false); field_symbol_hash.emplace("cqcl", field); - field = new LogField("client_req_tcp_reused", "cqtr", LogField::dINT, &LogAccess::marshal_client_req_tcp_reused, + field = new LogField("client_req_tcp_reused", "cqtr", LogField::sINT, &LogAccess::marshal_client_req_tcp_reused, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqtr", field); - field = new LogField("client_req_is_ssl", "cqssl", LogField::dINT, &LogAccess::marshal_client_req_is_ssl, + field = new LogField("client_req_is_ssl", "cqssl", LogField::sINT, &LogAccess::marshal_client_req_is_ssl, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssl", field); - field = new LogField("client_req_ssl_reused", "cqssr", LogField::dINT, &LogAccess::marshal_client_req_ssl_reused, + field = new LogField("client_req_ssl_reused", "cqssr", LogField::sINT, &LogAccess::marshal_client_req_ssl_reused, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssr", field); @@ -812,7 +812,7 @@ Log::init_fields() global_field_list.add(field, false); field_symbol_hash.emplace("pqssl", field); - field = new LogField("proxy_req_ssl_reused", "pqssr", LogField::dINT, &LogAccess::marshal_proxy_req_ssl_reused, + field = new LogField("proxy_req_ssl_reused", "pqssr", LogField::sINT, &LogAccess::marshal_proxy_req_ssl_reused, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqssr", field); @@ -993,17 +993,17 @@ Log::init_fields() global_field_list.add(field, false); field_symbol_hash.emplace("ctid", field); - field = new LogField("cache_read_retry_attempts", "crra", LogField::dINT, &LogAccess::marshal_cache_read_retries, + field = new LogField("cache_read_retry_attempts", "crra", LogField::sINT, &LogAccess::marshal_cache_read_retries, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("crra", field); - field = new LogField("cache_write_retry_attempts", "cwra", LogField::dINT, &LogAccess::marshal_cache_write_retries, + field = new LogField("cache_write_retry_attempts", "cwra", LogField::sINT, &LogAccess::marshal_cache_write_retries, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cwra", field); - field = new LogField("cache_collapsed_connection_success", "cccs", LogField::dINT, + field = new LogField("cache_collapsed_connection_success", "cccs", LogField::sINT, &LogAccess::marshal_cache_collapsed_connection_success, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cccs", field); @@ -1018,7 +1018,7 @@ Log::init_fields() global_field_list.add(field, false); field_symbol_hash.emplace("ctpd", field); - field = new LogField("proxy_protocol_version", "ppv", LogField::dINT, &LogAccess::marshal_proxy_protocol_version, + field = new LogField("proxy_protocol_version", "ppv", LogField::STRING, &LogAccess::marshal_proxy_protocol_version, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("ppv", field); From 8358d716f589bb00eb9e63f705966e620a821d23 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Tue, 2 Jun 2026 14:07:31 +0900 Subject: [PATCH 2/2] Self-describing binary log format (LogBuffer v3) Publish each field's type in a per-segment schema so a generic reader can decode a .blog from the file alone, without an embedded ATS symbol-to-type table that must track the writer in lockstep. The per-field code is LogField::Type serialized directly (now an enum class : uint8_t with INVALID=0 reserved and sINT..IP = 1..4 as the frozen wire codes); a static_assert pins the values. This relies on each field's declared type matching its marshalled framing, which the parent commit ("Fix mismatched sINT/dINT log field types") establishes. Readers (LogBufferIterator, logcat, logstats, the ASCII output paths) accept both v2 and v3 segments, sizing the header read to the on-disk version, so a v3 build keeps decoding logs written by an older one. Integer values stay in host byte order, as in v2 (no endianness change). The public TSLogType enum is given the same values as LogField::Type so TSLogFieldRegister can static_cast between them; static_asserts in InkAPI.cc (the only TU that sees both) pin the alignment so a future reorder fails to compile. The writer version is per-LogObject: logging.yaml "binary_log_version: 2" pins a binary log to the pre-v3 layout (no schema, shorter header) so a not-yet-upgraded downstream parser keeps working during a migration; the default is v3. Decoding untrusted .blog input is bounded: LogBufferIterator validates data_offset and each entry against the segment, and the JSON decoder validates the schema offset alignment and cross-checks field_count against the symbol list. --- doc/admin-guide/files/logging.yaml.en.rst | 34 ++ .../command-line/traffic_logcat.en.rst | 9 + .../command-line/traffic_logstats.en.rst | 3 + .../binary-log-v3-format.en.rst | 216 ++++++++++++ .../logging-architecture/index.en.rst | 1 + include/proxy/logging/LogBuffer.h | 110 +++++- include/proxy/logging/LogField.h | 22 +- include/proxy/logging/LogFormat.h | 7 + include/proxy/logging/LogObject.h | 17 +- include/ts/apidefs.h.in | 6 +- src/api/InkAPI.cc | 13 + src/proxy/logging/CMakeLists.txt | 4 + src/proxy/logging/Log.cc | 324 +++++++++--------- src/proxy/logging/LogAccess.cc | 1 + src/proxy/logging/LogBuffer.cc | 136 +++++++- src/proxy/logging/LogField.cc | 35 +- src/proxy/logging/LogFile.cc | 29 +- src/proxy/logging/LogFilter.cc | 10 +- src/proxy/logging/LogFormat.cc | 2 +- src/proxy/logging/LogObject.cc | 3 +- src/proxy/logging/YamlLogConfig.cc | 18 +- .../logging/unit-tests/test_LogBuffer.cc | 178 ++++++++++ src/traffic_logcat/CMakeLists.txt | 11 +- src/traffic_logcat/LogEntryJson.cc | 247 +++++++++++++ src/traffic_logcat/LogEntryJson.h | 48 +++ src/traffic_logcat/logcat.cc | 57 ++- .../unit-tests/test_LogEntryJson.cc | 320 +++++++++++++++++ src/traffic_logstats/logstats.cc | 22 +- .../gold_tests/logging/binary_log_v3.test.py | 145 ++++++++ .../logging/gold/binary_log_v3_ascii.gold | 3 + .../logging/gold/binary_log_v3_json.gold | 3 + 31 files changed, 1785 insertions(+), 249 deletions(-) create mode 100644 doc/developer-guide/logging-architecture/binary-log-v3-format.en.rst create mode 100644 src/proxy/logging/unit-tests/test_LogBuffer.cc create mode 100644 src/traffic_logcat/LogEntryJson.cc create mode 100644 src/traffic_logcat/LogEntryJson.h create mode 100644 src/traffic_logcat/unit-tests/test_LogEntryJson.cc create mode 100644 tests/gold_tests/logging/binary_log_v3.test.py create mode 100644 tests/gold_tests/logging/gold/binary_log_v3_ascii.gold create mode 100644 tests/gold_tests/logging/gold/binary_log_v3_json.gold diff --git a/doc/admin-guide/files/logging.yaml.en.rst b/doc/admin-guide/files/logging.yaml.en.rst index 6bc7d222d36..095f4649e4b 100644 --- a/doc/admin-guide/files/logging.yaml.en.rst +++ b/doc/admin-guide/files/logging.yaml.en.rst @@ -289,6 +289,15 @@ filename string The name of the logfile relative to the defau format string a string with a valid named format specification. header string If present, emitted as the first line of each new log file. +binary_log_version number For ``binary`` logs only: the on-disk segment + format version, ``2`` or ``3`` (default + ``3``). Version 3 is self-describing (embeds a + per-field type schema so a generic reader can + decode the file without an ATS symbol table); + use ``2`` to emit the legacy layout for + downstream parsers that do not yet understand + version 3. Ignored for ``ascii`` and + ``ascii_pipe``. rolling_enabled *see below* Determines the type of log rolling to use (or whether to disable rolling). Overrides :ts:cv:`proxy.config.log.rolling_enabled`. @@ -389,3 +398,28 @@ matched the REFRESH_HIT filter we created. format: summaryfmt filters: - refreshhitfilter + +The following is an example of a binary log. Binary logs are written in the +self-describing version 3 format by default, so ``traffic_logcat`` and +``traffic_logstats`` (and any reader built from the +:ref:`v3 format specification `) can decode the +``minimal.blog`` file without an embedded copy of the field table: + +.. code:: yaml + + logs: + - mode: binary + filename: minimal + format: minimalfmt + +To keep emitting the older version 2 layout for a downstream parser that does +not yet understand version 3, pin the object with ``binary_log_version``. This +key only applies to ``binary`` logs and defaults to ``3``: + +.. code:: yaml + + logs: + - mode: binary + filename: minimal_legacy + format: minimalfmt + binary_log_version: 2 diff --git a/doc/appendices/command-line/traffic_logcat.en.rst b/doc/appendices/command-line/traffic_logcat.en.rst index 953f9391f3f..8bda118a71b 100644 --- a/doc/appendices/command-line/traffic_logcat.en.rst +++ b/doc/appendices/command-line/traffic_logcat.en.rst @@ -33,6 +33,9 @@ Description To analyze a binary log file using standard tools, you must first convert it to ASCII. :program:`traffic_logcat` does exactly that. +:program:`traffic_logcat` reads both version 2 and version 3 binary log +segments. See :ref:`binary-log-v3-format` for the self-describing v3 format. + Options ======= @@ -74,6 +77,12 @@ Attempts to transform the input to Squid format, if possible. Attempt to transform the input to Netscape Extended-2 format, if possible. +.. option:: -j, --json + +Emits each entry as a JSON object, decoded directly from the self-describing +v3 field-type schema (see :ref:`binary-log-v3-format`). Requires version 3 +binary logs; version 2 segments lack the schema and are skipped with a note. + .. option:: -T, --debug_tags .. option:: -w, --overwrite_output diff --git a/doc/appendices/command-line/traffic_logstats.en.rst b/doc/appendices/command-line/traffic_logstats.en.rst index 481c3d240be..9a2aca5a183 100644 --- a/doc/appendices/command-line/traffic_logstats.en.rst +++ b/doc/appendices/command-line/traffic_logstats.en.rst @@ -35,6 +35,9 @@ produce metrics for total and per origin requests. Currently, this utility only supports parsing and processing the Squid binary log format, or a custom format that is compatible with the initial log fields of the Squid format. +Both version 2 and version 3 binary log segments are supported. See +:ref:`binary-log-v3-format` for the self-describing v3 format. + Output can either be a human readable text file, or a JSON format. Parsing can be done incrementally, and :program:`traffic_logstats` supports restarting where it left off previously (state is stored in an external file). This is diff --git a/doc/developer-guide/logging-architecture/binary-log-v3-format.en.rst b/doc/developer-guide/logging-architecture/binary-log-v3-format.en.rst new file mode 100644 index 00000000000..49f7a37aa75 --- /dev/null +++ b/doc/developer-guide/logging-architecture/binary-log-v3-format.en.rst @@ -0,0 +1,216 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../../common.defs + +.. _binary-log-v3-format: + +Self-Describing Binary Log Format (v3) +************************************** + +This page specifies the on-disk format of a binary log segment, version 3, in +enough detail to implement a decoder *without* the Traffic Server source tree. +A version 3 segment is **self-describing**: every field's type is published in +the segment header, so a generic reader can decode each entry by dispatching on +a small, stable set of type codes — no embedded copy of the ATS symbol-to-type +table is required. + +Motivation +========== + +In version 2, a segment header carries the field *symbols* (``fmt_fieldlist``, +e.g. ``"chi cqu pssc"``) and a printf-style *template* (``fmt_printf``) but +**not** the field types. To decode an entry a reader had to already know the +type of each symbol, because the value encodings are only self-delimiting once +the type is known (``IP`` is variable length, for example). That coupled every +out-of-tree parser to the exact ATS build that wrote the log. + +Version 3 adds one thing: a per-segment **field-type schema** that lists the +wire type of every field, in field order. Decoding then needs only the symbols +(as keys) and the schema (for types). + +Segment layout +============== + +A ``.blog`` file is a stream of segments, each a serialized ``LogBuffer``: + +:: + + LogBufferHeader (per segment) + cookie = 0xaceface + version = 3 + format_type, byte_count, entry_count, timestamps, flags, signature + fmt_name_offset + fmt_fieldlist_offset -> "chi cqu pssc ..." (symbols, space separated) + fmt_printf_offset -> "% % ..." + src_hostname_offset, log_filename_offset + data_offset -> first entry + fmt_fieldtypes_offset -> field-type schema (NEW in v3) + [ LogEntryHeader | field0 field1 field2 ... ] x entry_count + LogEntryHeader: timestamp(8) timestamp_usec(4) entry_len(4) + fields: concatenated in fieldlist order, no per-field tags + +All ``*_offset`` members are byte offsets from the start of the segment (the +address of the ``LogBufferHeader``). ``fmt_fieldtypes_offset`` is appended +**after** ``data_offset`` so that the layout through ``data_offset`` is +byte-identical to version 2; a value of ``0`` means the schema is absent (e.g. +a text-format segment, or a version 2 segment). + +Field-type schema +================= + +At ``fmt_fieldtypes_offset`` the segment stores: + +:: + + uint16_t field_count; // == number of symbols in fmt_fieldlist + uint8_t type_code[field_count]; // one type code per field, in order + +``type_code[i]`` is the type of the i-th field, which corresponds to the i-th +symbol in ``fmt_fieldlist`` and the i-th value in each entry. The ``uint16_t`` +``field_count`` prefix is written in **host byte order**, like the rest of +``LogBufferHeader``. The blob is padded along with the header to an 8-byte +boundary. + +The schema carries no independent version of its own: the segment ``version`` +(``3`` here) governs this layout, so a future schema change rides the same +``LOG_SEGMENT_VERSION`` bump rather than a second, separate counter. + +Stable type codes +================= + +The type codes are the values of the in-tree ``LogField::Type`` enumeration, +serialized directly. They are part of the published format and are +**append-only**: codes are never renumbered or reused. + +==== ========= =========================================================== +Code Name Wire encoding +==== ========= =========================================================== +0 INVALID Reserved. Not emitted by a correct writer; a reader that + meets it -- or any code it does not recognize -- cannot + determine the field length and must stop decoding the entry. +1 sINT A single ``int64_t``, fixed 8 bytes, **host byte order**. +2 dINT Two ``int64_t`` (16 bytes), host byte order. Used for + values stored as two integers, e.g. HTTP version + major/minor. +3 STRING NUL-terminated bytes, then padded to an 8-byte boundary. +4 IP ``uint16_t`` address family followed by a family-sized + address, then padded to an 8-byte boundary (see below). +==== ========= =========================================================== + +The code reflects how the value is *framed* on disk, i.e. how a reader walks +(or skips) it -- not what the value means. (The ``sINT``/``dINT`` names are an +ATS-internal distinction; on the wire ``sINT`` is one 8-byte integer and +``dINT`` is two consecutive ones.) How a consumer *renders* a value -- mapping +a cache-result integer to ``TCP_HIT``, or a ``dINT`` to ``1.1`` -- is layered +on top by the consumer and is not part of the wire format. + +Value encodings +=============== + +sINT + An ``int64_t`` occupying exactly 8 bytes, in **host byte order** (as in + version 2). Integer values are not endianness-normalized, so a ``.blog`` is + not portable across hosts of differing endianness; cross-architecture + portability is future work. + +dINT + Two consecutive ``sINT`` values: 16 bytes total, in host byte order. Used + where one log field is stored as two integers, such as an HTTP version + (major then minor). The reference decoder renders it as a JSON array, e.g. + ``[1,1]``; turning that into ``1.1`` is a consumer concern. + +STRING + The string bytes followed by a single NUL, then zero padding up to the next + 8-byte boundary. The on-wire length is therefore + ``align_up(strlen + 1, 8)``. An empty/absent string is written as ``"-"``. + +IP + A ``uint16_t`` address family in host byte order, then: + + .. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Family + - Following bytes + * - ``AF_INET`` (IPv4) + - 4-byte ``in_addr`` + * - ``AF_INET6`` (IPv6) + - 16-byte ``in6_addr`` + * - ``AF_UNIX`` + - fixed-size path buffer + * - ``AF_UNSPEC`` / other + - no address bytes + + The whole field is padded to the next 8-byte boundary. Because the length + depends on the family byte *inside* the value, only a reader that knows the + field is an ``IP`` (from the schema) can compute its size — which is exactly + why the schema is required to skip or decode unknown fields safely. + +Decoding an entry +================= + +Given a segment, a generic decoder: + +#. Reads ``field_count`` and the ``type_code[]`` array from the schema at + ``fmt_fieldtypes_offset``. +#. Splits ``fmt_fieldlist`` into ``field_count`` whitespace-separated symbols. +#. For each entry (located via ``data_offset`` and walked using + ``LogEntryHeader::entry_len``), reads the fields left to right, using + ``type_code[i]`` to pick the encoding above and advance the read cursor. + +The reference implementation is ``log_entry_to_json()`` +(``src/traffic_logcat/LogEntryJson.cc``), which renders an entry as a JSON +object using only the symbols and the schema — it does not consult the global +field table. It is exposed by :program:`traffic_logcat`'s ``-j``/``--json`` +option. For example, a three-field entry decodes to: + +:: + + {"chi":"192.0.2.10","cqu":"GET /index.html","pssc":200} + +.. note:: + + Some integer fields hold coded values (cache result, hierarchy, finish + status, etc.). The binary format stores the raw integer; mapping it to a + mnemonic such as ``TCP_HIT`` is a presentation concern left to the consumer. + +Compatibility +============= + +* **New reader, old file (v3 reader, v2 file):** supported. The readers shipped + with Traffic Server accept the inclusive version range + ``[2, 3]`` and size the header read to the on-disk version, so a v2 segment + (which has no ``fmt_fieldtypes_offset``) still decodes. Its ASCII output is + produced from ``fmt_fieldlist`` + ``fmt_printf`` exactly as before. +* **Old reader, new file (v2 reader, v3 file):** a reader built before v3 + support gates on the version and will skip v3 segments. v3 logs therefore + require tooling from a release that understands v3. As an escape hatch, a + binary log object can be pinned to the version 2 layout with + ``binary_log_version: 2`` in :file:`logging.yaml`, so a not-yet-upgraded + downstream parser keeps working during a migration. +* The text/Squid/CLF ASCII output paths are unchanged: the schema is additive + and ignored when rendering ASCII. + +.. note:: + + v3 does not change integer endianness: field values, the integers in + ``LogBufferHeader`` / ``LogEntryHeader``, and the ``IP`` family word are all + written in host byte order, as in v2. A ``.blog`` is therefore not portable + across hosts of differing endianness; cross-architecture portability is + future work. diff --git a/doc/developer-guide/logging-architecture/index.en.rst b/doc/developer-guide/logging-architecture/index.en.rst index 16b6fd8bbdf..542e05ad5fa 100644 --- a/doc/developer-guide/logging-architecture/index.en.rst +++ b/doc/developer-guide/logging-architecture/index.en.rst @@ -26,3 +26,4 @@ Logging Architecture :maxdepth: 2 architecture.en + binary-log-v3-format.en diff --git a/include/proxy/logging/LogBuffer.h b/include/proxy/logging/LogBuffer.h index 4f8295ae444..836a6d5447d 100644 --- a/include/proxy/logging/LogBuffer.h +++ b/include/proxy/logging/LogBuffer.h @@ -30,12 +30,19 @@ #include "proxy/logging/LogLimits.h" #include "proxy/logging/LogAccess.h" +#include + class LogObject; class LogConfig; class LogBufferIterator; #define LOG_SEGMENT_COOKIE 0xaceface -#define LOG_SEGMENT_VERSION 2 +#define LOG_SEGMENT_VERSION 3 + +// Oldest segment version this build can still read. Readers accept the +// inclusive range [LOG_SEGMENT_VERSION_MIN_SUPPORTED, LOG_SEGMENT_VERSION] so +// that a new build keeps decoding logs written by an older one. +#define LOG_SEGMENT_VERSION_MIN_SUPPORTED 2 #if defined(__linux__) #define LB_DEFAULT_ALIGN 512 @@ -56,6 +63,35 @@ struct LogEntryHeader { uint32_t entry_len; }; +/*------------------------------------------------------------------------- + LogFieldTypeSchema + + Self-describing field-type table for v3 segments, written once per segment at + LogBufferHeader::fmt_fieldtypes_offset (alongside fmt_fieldlist). It lets a + generic reader decode every field from the file alone, dispatching on the + LogField::Type codes with no embedded ATS symbol->type table. + + On-wire layout: + + uint16_t field_count; // == number of symbols in fmt_fieldlist + uint8_t type_code[field_count]; // LogField::Type, in fieldlist order + + No independent schema version: the segment's LOG_SEGMENT_VERSION governs this + layout. All integers are host byte order, like the rest of LogBufferHeader; + the blob is padded with the header to 8-byte alignment. + -------------------------------------------------------------------------*/ + +struct LogFieldTypeSchema { + uint16_t field_count; + // Immediately followed by uint8_t type_code[field_count]. + + const uint8_t * + type_codes() const + { + return reinterpret_cast(this) + sizeof(LogFieldTypeSchema); + } +}; + /*------------------------------------------------------------------------- LogBufferHeader @@ -87,14 +123,52 @@ struct LogBufferHeader { uint32_t data_offset; // offset to start of data entry // section + // NEW in v3: offset to the LogFieldTypeSchema blob, or 0 if absent. Appended + // after data_offset so the layout through data_offset is byte-identical to + // v2; v2 readers ignore it and v3 readers tolerate v2 segments lacking it. + uint32_t fmt_fieldtypes_offset; + // some helper functions to return the header strings char *fmt_fieldlist(); char *fmt_printf(); + char *fmt_fieldtypes(); // v3 field-type schema blob; nullptr for v2 segments char *src_hostname(); char *log_filename(); }; +/** Whether this build can read a segment of the given @a version. + + The single source of truth for the supported range: readers accept the + inclusive range [LOG_SEGMENT_VERSION_MIN_SUPPORTED, LOG_SEGMENT_VERSION] so + a new build keeps decoding logs written by an older one. +*/ +inline bool +log_segment_version_supported(unsigned version) +{ + return LOG_SEGMENT_VERSION_MIN_SUPPORTED <= version && version <= LOG_SEGMENT_VERSION; +} + +/** On-disk size of LogBufferHeader for a given segment version. + + Raw readers that read the header by size (rather than via the data_offset + field) must size the read to the version on disk: v3 appended + fmt_fieldtypes_offset after data_offset, so a v2 segment's header is + shorter. Reading a v2 segment with the (larger) v3 struct size would + consume bytes belonging to the data section. + + @return the header size in bytes, or 0 if @a version is unsupported. +*/ +inline size_t +log_buffer_header_size(unsigned version) +{ + if (!log_segment_version_supported(version)) { + return 0; + } + // v2 stops at data_offset; v3 and later include fmt_fieldtypes_offset. + return version >= 3 ? sizeof(LogBufferHeader) : offsetof(LogBufferHeader, fmt_fieldtypes_offset); +} + union LB_State { LB_State() : ival(0) {} LB_State(LB_State &vs) { ival = vs.ival; } @@ -193,6 +267,7 @@ class LogBuffer static size_t max_entry_bytes(); static int to_ascii(LogEntryHeader *entry, LogFormatType type, char *buf, int max_len, const char *symbol_str, char *printf_str, unsigned buffer_version, const char *alt_format = nullptr, LogEscapeType escape_type = LOG_ESCAPE_NONE); + static int resolve_custom_entry(LogFieldList *fieldlist, char *printf_str, char *read_from, char *write_to, int write_to_len, long timestamp, long timestamp_us, unsigned buffer_version, LogFieldList *alt_fieldlist = nullptr, char *alt_printf_str = nullptr, LogEscapeType escape_type = LOG_ESCAPE_NONE); @@ -239,6 +314,7 @@ class LogBuffer // private functions size_t _add_buffer_header(const LogConfig *cfg); unsigned add_header_str(const char *str, char *buf_ptr, unsigned buf_len); + unsigned add_field_type_schema(const LogFieldList *fieldlist, char *buf_ptr, unsigned buf_len); void freeLogBuffer(); // -- member functions that are not allowed -- @@ -296,6 +372,7 @@ class LogBufferIterator private: char *m_next; + char *m_buffer_end; // one past the last readable byte of the segment unsigned m_iter_entry_count; unsigned m_buffer_entry_count; @@ -311,21 +388,30 @@ class LogBufferIterator -------------------------------------------------------------------------*/ inline LogBufferIterator::LogBufferIterator(LogBufferHeader *header) - : m_next(nullptr), m_iter_entry_count(0), m_buffer_entry_count(0) + : m_next(nullptr), m_buffer_end(nullptr), m_iter_entry_count(0), m_buffer_entry_count(0) { ink_assert(header); - switch (header->version) { - case LOG_SEGMENT_VERSION: - m_next = (char *)header + header->data_offset; - m_buffer_entry_count = header->entry_count; - break; - - default: + // Entry iteration is identical across v2/v3 (v3 only appended a header field + // after data_offset). Accept the whole supported range. + if (log_segment_version_supported(header->version)) { + // Bound the data section against byte_count so a corrupt/hostile data_offset + // (a .blog read by logcat/logstats may be untrusted) can't make next() + // dereference outside the buffer. data_offset sits past the header, and must + // be 8-byte aligned so the int64 reads in each entry are well-aligned. + size_t header_size = log_buffer_header_size(header->version); + if (header->data_offset >= header_size && header->data_offset <= header->byte_count && + header->data_offset % INK_MIN_ALIGN == 0) { + m_next = reinterpret_cast(header) + header->data_offset; + m_buffer_end = reinterpret_cast(header) + header->byte_count; + m_buffer_entry_count = header->entry_count; + } + // else: bad data_offset -- leave m_next null (next() yields nothing). The + // file readers report it; the iterator stays silent to avoid log spam. + } else { Note("Invalid LogBuffer version %d in LogBufferIterator; " - "current version is %d", - header->version, LOG_SEGMENT_VERSION); - break; + "supported versions are %d-%d", + header->version, LOG_SEGMENT_VERSION_MIN_SUPPORTED, LOG_SEGMENT_VERSION); } } diff --git a/include/proxy/logging/LogField.h b/include/proxy/logging/LogField.h index 27a1fb6b209..1fbb3dc2168 100644 --- a/include/proxy/logging/LogField.h +++ b/include/proxy/logging/LogField.h @@ -95,12 +95,22 @@ class LogField using VarUnmarshalFuncSliceOnly = std::variant; using VarUnmarshalFunc = std::variant; - enum Type { - sINT = 0, ///< Single INT - dINT, ///< Double INT - STRING, ///< String - IP, ///< IP Address - N_TYPES + /** Field value type. + + These values are also the on-wire type codes of the v3 binary log schema + (see LogBufferHeader::fmt_fieldtypes() and the v3 format docs under + doc/developer-guide/logging-architecture/), so they are a published + contract: append before N_TYPES, never renumber. 0 is reserved for INVALID + so a zero-filled schema byte can't pass as a real type. The code describes + only framing (how to walk/skip a field), never the value's meaning. + */ + enum class Type : uint8_t { + INVALID = 0, ///< Reserved: never written; a reader treats 0 (or any unknown code) as unframmable and stops. + sINT = 1, ///< one int64_t, 8 bytes (host byte order). + dINT = 2, ///< two int64_t (16 bytes), e.g. HTTP version major/minor. + STRING = 3, ///< NUL-terminated, 8-byte padded. + IP = 4, ///< uint16_t family + family-sized address, 8-byte padded. + N_TYPES = 5, ///< Internal bound (asserts / name table); NOT a wire code. }; enum Container { diff --git a/include/proxy/logging/LogFormat.h b/include/proxy/logging/LogFormat.h index 15bb04c5977..431bfa4fd83 100644 --- a/include/proxy/logging/LogFormat.h +++ b/include/proxy/logging/LogFormat.h @@ -115,6 +115,13 @@ class LogFormat : public RefCountObjInHeap { return m_field_count; } + // Read-only view of the parsed fields, in marshalling (on-wire) order. Used + // by the v3 binary log writer to publish each field's wire type. + const LogFieldList & + field_list() const + { + return m_field_list; + } long interval() const { diff --git a/include/proxy/logging/LogObject.h b/include/proxy/logging/LogObject.h index 24e668e43cf..2b774aa8c5d 100644 --- a/include/proxy/logging/LogObject.h +++ b/include/proxy/logging/LogObject.h @@ -96,7 +96,8 @@ class LogObject : public RefCountObjInHeap LogObject(LogConfig *cfg, const LogFormat *format, const char *log_dir, const char *basename, LogFileFormat file_format, const char *header, Log::RollingEnabledValues rolling_enabled, int flush_threads, int rolling_interval_sec = 0, int rolling_offset_hr = 0, int rolling_size_mb = 0, bool auto_created = false, int rolling_max_count = 0, - int rolling_min_count = 0, bool reopen_after_rolling = false, int pipe_buffer_size = 0, bool m_fast = false); + int rolling_min_count = 0, bool reopen_after_rolling = false, int pipe_buffer_size = 0, bool m_fast = false, + unsigned binary_log_version = LOG_SEGMENT_VERSION); ~LogObject() override; void add_filter(LogFilter *filter, bool copy = true); @@ -219,6 +220,15 @@ class LogObject : public RefCountObjInHeap return m_flags; } + // On-disk binary segment version (logging.yaml "binary_log_version"); 2 emits + // the pre-v3 layout. Binary logs only; defaults to the current version. Set at + // construction (before the first buffer header is stamped), not afterward. + inline unsigned + get_binary_log_version() const + { + return m_binary_log_version; + } + void rename(char *new_name); inline bool @@ -259,8 +269,9 @@ class LogObject : public RefCountObjInHeap // could not be used because of // name conflicts - unsigned int m_flags; // diverse object flags (see above) - uint64_t m_signature; // INK_MD5 signature for object + unsigned int m_flags; // diverse object flags (see above) + uint64_t m_signature; // INK_MD5 signature for object + unsigned m_binary_log_version = LOG_SEGMENT_VERSION; // on-disk binary segment version (logging.yaml "binary_log_version") Log::RollingEnabledValues m_rolling_enabled; int m_flush_threads; // number of flush threads diff --git a/include/ts/apidefs.h.in b/include/ts/apidefs.h.in index bce145c8a49..8527fc9ae58 100644 --- a/include/ts/apidefs.h.in +++ b/include/ts/apidefs.h.in @@ -1650,10 +1650,10 @@ struct TSResponseAction { }; enum TSLogType { - TS_LOG_TYPE_INT, + TS_LOG_TYPE_INT = 1, ///< LogField::Type::sINT // DINT is omitted from the public API for now, until we decide whether we keep the type - TS_LOG_TYPE_STRING = 2, - TS_LOG_TYPE_ADDR = 3, + TS_LOG_TYPE_STRING = 3, ///< LogField::Type::STRING + TS_LOG_TYPE_ADDR = 4, ///< LogField::Type::IP }; /* -------------------------------------------------------------------------- diff --git a/src/api/InkAPI.cc b/src/api/InkAPI.cc index 22ec85e8cb2..b26836e81df 100644 --- a/src/api/InkAPI.cc +++ b/src/api/InkAPI.cc @@ -9053,6 +9053,19 @@ TSLogFieldRegister(std::string_view name, std::string_view symbol, TSLogType typ } } + // TSLogType mirrors LogField::Type's values and is static_cast to it below. + // apidefs.h.in can't reference LogField::Type, so pin the alignment here (the + // only TU that sees both) -- a reorder then fails to compile. + static_assert(static_cast(TS_LOG_TYPE_INT) == static_cast(LogField::Type::sINT)); + static_assert(static_cast(TS_LOG_TYPE_STRING) == static_cast(LogField::Type::STRING)); + static_assert(static_cast(TS_LOG_TYPE_ADDR) == static_cast(LogField::Type::IP)); + + // Reject anything outside the public set before the cast, so a bad plugin + // value can't become an INVALID/out-of-range LogField::Type and trip the ctor. + if (type != TS_LOG_TYPE_INT && type != TS_LOG_TYPE_STRING && type != TS_LOG_TYPE_ADDR) { + return TS_ERROR; + } + LogField *field = new LogField( name.data(), symbol.data(), static_cast(type), [marshal_cb](void *sm, char *buf) -> int { return marshal_cb(reinterpret_cast(sm), buf); }, unmarshal_cb); diff --git a/src/proxy/logging/CMakeLists.txt b/src/proxy/logging/CMakeLists.txt index f897952ff2f..e345be520c5 100644 --- a/src/proxy/logging/CMakeLists.txt +++ b/src/proxy/logging/CMakeLists.txt @@ -68,6 +68,10 @@ if(BUILD_TESTING) add_executable(test_LogAccess unit-tests/test_LogAccess.cc) target_link_libraries(test_LogAccess ts::logging ts::configmanager ts::inkevent records Catch2::Catch2WithMain) add_catch2_test(NAME test_LogAccess COMMAND test_LogAccess) + + add_executable(test_LogBuffer unit-tests/test_LogBuffer.cc) + target_link_libraries(test_LogBuffer ts::logging ts::configmanager ts::inkevent records Catch2::Catch2WithMain) + add_catch2_test(NAME test_LogBuffer COMMAND test_LogBuffer) endif() clang_tidy_check(logging) diff --git a/src/proxy/logging/Log.cc b/src/proxy/logging/Log.cc index 2176a62dfc6..50d7ddf29ab 100644 --- a/src/proxy/logging/Log.cc +++ b/src/proxy/logging/Log.cc @@ -328,341 +328,347 @@ Log::init_fields() LogField::init_milestone_container(); // client -> proxy fields - field = new LogField("client_host_ip", "chi", LogField::IP, &LogAccess::marshal_client_host_ip, &LogAccess::unmarshal_ip_to_str); + field = + new LogField("client_host_ip", "chi", LogField::Type::IP, &LogAccess::marshal_client_host_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("chi", field); - field = - new LogField("client_host_port", "chp", LogField::sINT, &LogAccess::marshal_client_host_port, &LogAccess::unmarshal_int_to_str); + field = new LogField("client_host_port", "chp", LogField::Type::sINT, &LogAccess::marshal_client_host_port, + &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("chp", field); - field = - new LogField("client_host_ip_hex", "chih", LogField::IP, &LogAccess::marshal_client_host_ip, &LogAccess::unmarshal_ip_to_hex); + field = new LogField("client_host_ip_hex", "chih", LogField::Type::IP, &LogAccess::marshal_client_host_ip, + &LogAccess::unmarshal_ip_to_hex); global_field_list.add(field, false); field_symbol_hash.emplace("chih", field); - field = new LogField("client_host_ip_verified", "chiv", LogField::IP, &LogAccess::marshal_client_host_ip_verified, + field = new LogField("client_host_ip_verified", "chiv", LogField::Type::IP, &LogAccess::marshal_client_host_ip_verified, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("chiv", field); // remote client (Not necessarily the requesting client IP - See proxy protocol) - field = new LogField("remote_host_ip", "rchi", LogField::IP, &LogAccess::marshal_remote_host_ip, &LogAccess::unmarshal_ip_to_str); + field = + new LogField("remote_host_ip", "rchi", LogField::Type::IP, &LogAccess::marshal_remote_host_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("rchi", field); - field = new LogField("remote_host_port", "rchp", LogField::sINT, &LogAccess::marshal_remote_host_port, + field = new LogField("remote_host_port", "rchp", LogField::Type::sINT, &LogAccess::marshal_remote_host_port, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("rchp", field); - field = - new LogField("remote_host_ip_hex", "rchh", LogField::IP, &LogAccess::marshal_remote_host_ip, &LogAccess::unmarshal_ip_to_hex); + field = new LogField("remote_host_ip_hex", "rchh", LogField::Type::IP, &LogAccess::marshal_remote_host_ip, + &LogAccess::unmarshal_ip_to_hex); global_field_list.add(field, false); field_symbol_hash.emplace("rchh", field); // interface ip - field = - new LogField("host_interface_ip", "hii", LogField::IP, &LogAccess::marshal_host_interface_ip, &LogAccess::unmarshal_ip_to_str); + field = new LogField("host_interface_ip", "hii", LogField::Type::IP, &LogAccess::marshal_host_interface_ip, + &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("hii", field); - field = new LogField("host_interface_ip_hex", "hiih", LogField::IP, &LogAccess::marshal_host_interface_ip, + field = new LogField("host_interface_ip_hex", "hiih", LogField::Type::IP, &LogAccess::marshal_host_interface_ip, &LogAccess::unmarshal_ip_to_hex); global_field_list.add(field, false); field_symbol_hash.emplace("hiih", field); // interface ip end - field = new LogField("client_auth_user_name", "caun", LogField::STRING, &LogAccess::marshal_client_auth_user_name, + field = new LogField("client_auth_user_name", "caun", LogField::Type::STRING, &LogAccess::marshal_client_auth_user_name, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("caun", field); - field = new LogField("plugin_identity_id", "piid", LogField::sINT, &LogAccess::marshal_plugin_identity_id, + field = new LogField("plugin_identity_id", "piid", LogField::Type::sINT, &LogAccess::marshal_plugin_identity_id, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("piid", field); - field = new LogField("plugin_identity_tag", "pitag", LogField::STRING, &LogAccess::marshal_plugin_identity_tag, + field = new LogField("plugin_identity_tag", "pitag", LogField::Type::STRING, &LogAccess::marshal_plugin_identity_tag, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("pitag", field); - field = new LogField("client_req_timestamp_sec", "cqts", LogField::sINT, &LogAccess::marshal_client_req_timestamp_sec, + field = new LogField("client_req_timestamp_sec", "cqts", LogField::Type::sINT, &LogAccess::marshal_client_req_timestamp_sec, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqts", field); - field = new LogField("client_req_timestamp_hex_sec", "cqth", LogField::sINT, &LogAccess::marshal_client_req_timestamp_sec, + field = new LogField("client_req_timestamp_hex_sec", "cqth", LogField::Type::sINT, &LogAccess::marshal_client_req_timestamp_sec, &LogAccess::unmarshal_int_to_str_hex); global_field_list.add(field, false); field_symbol_hash.emplace("cqth", field); - field = new LogField("client_req_timestamp_squid", "cqtq", LogField::sINT, &LogAccess::marshal_client_req_timestamp_ms, + field = new LogField("client_req_timestamp_squid", "cqtq", LogField::Type::sINT, &LogAccess::marshal_client_req_timestamp_ms, &LogAccess::unmarshal_ttmsf); global_field_list.add(field, false); field_symbol_hash.emplace("cqtq", field); - field = new LogField("client_req_timestamp_netscape", "cqtn", LogField::sINT, &LogAccess::marshal_client_req_timestamp_sec, + field = new LogField("client_req_timestamp_netscape", "cqtn", LogField::Type::sINT, &LogAccess::marshal_client_req_timestamp_sec, &LogAccess::unmarshal_int_to_netscape_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqtn", field); - field = new LogField("client_req_timestamp_date", "cqtd", LogField::sINT, &LogAccess::marshal_client_req_timestamp_sec, + field = new LogField("client_req_timestamp_date", "cqtd", LogField::Type::sINT, &LogAccess::marshal_client_req_timestamp_sec, &LogAccess::unmarshal_int_to_date_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqtd", field); - field = new LogField("client_req_timestamp_time", "cqtt", LogField::sINT, &LogAccess::marshal_client_req_timestamp_sec, + field = new LogField("client_req_timestamp_time", "cqtt", LogField::Type::sINT, &LogAccess::marshal_client_req_timestamp_sec, &LogAccess::unmarshal_int_to_time_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqtt", field); - field = new LogField("client_req_http_method", "cqhm", LogField::STRING, &LogAccess::marshal_client_req_http_method, + field = new LogField("client_req_http_method", "cqhm", LogField::Type::STRING, &LogAccess::marshal_client_req_http_method, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqhm", field); - field = new LogField("client_req_url", "cqu", LogField::STRING, &LogAccess::marshal_client_req_url, &LogAccess::unmarshal_str, - &LogAccess::set_client_req_url); + field = new LogField("client_req_url", "cqu", LogField::Type::STRING, &LogAccess::marshal_client_req_url, + &LogAccess::unmarshal_str, &LogAccess::set_client_req_url); global_field_list.add(field, false); field_symbol_hash.emplace("cqu", field); - field = new LogField("client_req_url", "pqu", LogField::STRING, &LogAccess::marshal_client_req_url, &LogAccess::unmarshal_str, - &LogAccess::set_client_req_url); + field = new LogField("client_req_url", "pqu", LogField::Type::STRING, &LogAccess::marshal_client_req_url, + &LogAccess::unmarshal_str, &LogAccess::set_client_req_url); global_field_list.add(field, false); field_symbol_hash.emplace("pqu", field); - field = new LogField("client_req_url_canonical", "cquc", LogField::STRING, &LogAccess::marshal_client_req_url_canon, + field = new LogField("client_req_url_canonical", "cquc", LogField::Type::STRING, &LogAccess::marshal_client_req_url_canon, &LogAccess::unmarshal_str, &LogAccess::set_client_req_url_canon); global_field_list.add(field, false); field_symbol_hash.emplace("cquc", field); - field = new LogField("client_req_url_canonical", "pquc", LogField::STRING, &LogAccess::marshal_client_req_url_canon, + field = new LogField("client_req_url_canonical", "pquc", LogField::Type::STRING, &LogAccess::marshal_client_req_url_canon, &LogAccess::unmarshal_str, &LogAccess::set_client_req_url_canon); global_field_list.add(field, false); field_symbol_hash.emplace("pquc", field); - field = - new LogField("client_req_unmapped_url_canonical", "cquuc", LogField::STRING, &LogAccess::marshal_client_req_unmapped_url_canon, - &LogAccess::unmarshal_str, &LogAccess::set_client_req_unmapped_url_canon); + field = new LogField("client_req_unmapped_url_canonical", "cquuc", LogField::Type::STRING, + &LogAccess::marshal_client_req_unmapped_url_canon, &LogAccess::unmarshal_str, + &LogAccess::set_client_req_unmapped_url_canon); global_field_list.add(field, false); field_symbol_hash.emplace("cquuc", field); - field = new LogField("client_req_unmapped_url_path", "cquup", LogField::STRING, &LogAccess::marshal_client_req_unmapped_url_path, - &LogAccess::unmarshal_str, &LogAccess::set_client_req_unmapped_url_path); + field = + new LogField("client_req_unmapped_url_path", "cquup", LogField::Type::STRING, &LogAccess::marshal_client_req_unmapped_url_path, + &LogAccess::unmarshal_str, &LogAccess::set_client_req_unmapped_url_path); global_field_list.add(field, false); field_symbol_hash.emplace("cquup", field); - field = new LogField("client_req_unmapped_url_host", "cquuh", LogField::STRING, &LogAccess::marshal_client_req_unmapped_url_host, - &LogAccess::unmarshal_str, &LogAccess::set_client_req_unmapped_url_host); + field = + new LogField("client_req_unmapped_url_host", "cquuh", LogField::Type::STRING, &LogAccess::marshal_client_req_unmapped_url_host, + &LogAccess::unmarshal_str, &LogAccess::set_client_req_unmapped_url_host); global_field_list.add(field, false); field_symbol_hash.emplace("cquuh", field); - field = new LogField("client_req_url_scheme", "cqus", LogField::STRING, &LogAccess::marshal_client_req_url_scheme, + field = new LogField("client_req_url_scheme", "cqus", LogField::Type::STRING, &LogAccess::marshal_client_req_url_scheme, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqus", field); - field = new LogField("client_req_url_scheme", "pqus", LogField::STRING, &LogAccess::marshal_client_req_url_scheme, + field = new LogField("client_req_url_scheme", "pqus", LogField::Type::STRING, &LogAccess::marshal_client_req_url_scheme, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqus", field); - field = new LogField("client_req_url_path", "cqup", LogField::STRING, &LogAccess::marshal_client_req_url_path, + field = new LogField("client_req_url_path", "cqup", LogField::Type::STRING, &LogAccess::marshal_client_req_url_path, &LogAccess::unmarshal_str, &LogAccess::set_client_req_url_path); global_field_list.add(field, false); field_symbol_hash.emplace("cqup", field); - field = new LogField("client_req_url_path", "pqup", LogField::STRING, &LogAccess::marshal_client_req_url_path, + field = new LogField("client_req_url_path", "pqup", LogField::Type::STRING, &LogAccess::marshal_client_req_url_path, &LogAccess::unmarshal_str, &LogAccess::set_client_req_url_path); global_field_list.add(field, false); field_symbol_hash.emplace("pqup", field); - field = new LogField("client_req_protocol_version", "cqpv", LogField::STRING, &LogAccess::marshal_client_req_protocol_version, - &LogAccess::unmarshal_str); + field = new LogField("client_req_protocol_version", "cqpv", LogField::Type::STRING, + &LogAccess::marshal_client_req_protocol_version, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqpv", field); - field = new LogField("server_req_protocol_version", "sqpv", LogField::STRING, &LogAccess::marshal_server_req_protocol_version, - &LogAccess::unmarshal_str); + field = new LogField("server_req_protocol_version", "sqpv", LogField::Type::STRING, + &LogAccess::marshal_server_req_protocol_version, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("sqpv", field); - field = new LogField("client_req_header_len", "cqhl", LogField::sINT, &LogAccess::marshal_client_req_header_len, + field = new LogField("client_req_header_len", "cqhl", LogField::Type::sINT, &LogAccess::marshal_client_req_header_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqhl", field); - field = new LogField("client_req_squid_len", "cqql", LogField::sINT, &LogAccess::marshal_client_req_squid_len, + field = new LogField("client_req_squid_len", "cqql", LogField::Type::sINT, &LogAccess::marshal_client_req_squid_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqql", field); - field = new LogField("cache_lookup_url_canonical", "cluc", LogField::STRING, &LogAccess::marshal_cache_lookup_url_canon, + field = new LogField("cache_lookup_url_canonical", "cluc", LogField::Type::STRING, &LogAccess::marshal_cache_lookup_url_canon, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cluc", field); - field = new LogField("cache_key_hash", "ckh", LogField::STRING, &LogAccess::marshal_cache_key_hash, &LogAccess::unmarshal_str); + field = + new LogField("cache_key_hash", "ckh", LogField::Type::STRING, &LogAccess::marshal_cache_key_hash, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("ckh", field); - field = new LogField("client_sni_server_name", "cssn", LogField::STRING, &LogAccess::marshal_client_sni_server_name, + field = new LogField("client_sni_server_name", "cssn", LogField::Type::STRING, &LogAccess::marshal_client_sni_server_name, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cssn", field); - field = new LogField("client_ssl_cert_provided", "cscert", LogField::STRING, &LogAccess::marshal_client_provided_cert, + field = new LogField("client_ssl_cert_provided", "cscert", LogField::Type::STRING, &LogAccess::marshal_client_provided_cert, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cscert", field); - field = new LogField("proxy_ssl_cert_provided", "pscert", LogField::STRING, &LogAccess::marshal_proxy_provided_cert, + field = new LogField("proxy_ssl_cert_provided", "pscert", LogField::Type::STRING, &LogAccess::marshal_proxy_provided_cert, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pscert", field); - field = new LogField("process_uuid", "puuid", LogField::STRING, &LogAccess::marshal_process_uuid, &LogAccess::unmarshal_str); + field = + new LogField("process_uuid", "puuid", LogField::Type::STRING, &LogAccess::marshal_process_uuid, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("puuid", field); - field = - new LogField("process_snowflake_id", "psfid", LogField::STRING, &LogAccess::marshal_process_sfid, &LogAccess::unmarshal_str); + field = new LogField("process_snowflake_id", "psfid", LogField::Type::STRING, &LogAccess::marshal_process_sfid, + &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("psfid", field); - field = new LogField("client_req_content_len", "cqcl", LogField::sINT, &LogAccess::marshal_client_req_content_len, + field = new LogField("client_req_content_len", "cqcl", LogField::Type::sINT, &LogAccess::marshal_client_req_content_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqcl", field); - field = new LogField("client_req_tcp_reused", "cqtr", LogField::sINT, &LogAccess::marshal_client_req_tcp_reused, + field = new LogField("client_req_tcp_reused", "cqtr", LogField::Type::sINT, &LogAccess::marshal_client_req_tcp_reused, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqtr", field); - field = new LogField("client_req_is_ssl", "cqssl", LogField::sINT, &LogAccess::marshal_client_req_is_ssl, + field = new LogField("client_req_is_ssl", "cqssl", LogField::Type::sINT, &LogAccess::marshal_client_req_is_ssl, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssl", field); - field = new LogField("client_req_ssl_reused", "cqssr", LogField::sINT, &LogAccess::marshal_client_req_ssl_reused, + field = new LogField("client_req_ssl_reused", "cqssr", LogField::Type::sINT, &LogAccess::marshal_client_req_ssl_reused, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssr", field); - field = new LogField("client_req_is_internal", "cqint", LogField::sINT, &LogAccess::marshal_client_req_is_internal, + field = new LogField("client_req_is_internal", "cqint", LogField::Type::sINT, &LogAccess::marshal_client_req_is_internal, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqint", field); - field = new LogField("client_req_mptcp", "cqmpt", LogField::sINT, &LogAccess::marshal_client_req_mptcp_state, + field = new LogField("client_req_mptcp", "cqmpt", LogField::Type::sINT, &LogAccess::marshal_client_req_mptcp_state, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqmpt", field); - field = new LogField("client_sec_protocol", "cqssv", LogField::STRING, &LogAccess::marshal_client_security_protocol, + field = new LogField("client_sec_protocol", "cqssv", LogField::Type::STRING, &LogAccess::marshal_client_security_protocol, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssv", field); - field = new LogField("client_cipher_suite", "cqssc", LogField::STRING, &LogAccess::marshal_client_security_cipher_suite, + field = new LogField("client_cipher_suite", "cqssc", LogField::Type::STRING, &LogAccess::marshal_client_security_cipher_suite, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssc", field); - field = - new LogField("client_curve", "cqssu", LogField::STRING, &LogAccess::marshal_client_security_curve, &LogAccess::unmarshal_str); + field = new LogField("client_curve", "cqssu", LogField::Type::STRING, &LogAccess::marshal_client_security_curve, + &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssu", field); - field = - new LogField("client_group", "cqssg", LogField::STRING, &LogAccess::marshal_client_security_group, &LogAccess::unmarshal_str); + field = new LogField("client_group", "cqssg", LogField::Type::STRING, &LogAccess::marshal_client_security_group, + &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssg", field); - field = - new LogField("client_sec_alpn", "cqssa", LogField::STRING, &LogAccess::marshal_client_security_alpn, &LogAccess::unmarshal_str); + field = new LogField("client_sec_alpn", "cqssa", LogField::Type::STRING, &LogAccess::marshal_client_security_alpn, + &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cqssa", field); Ptr finish_status_map = make_ptr(new LogFieldAliasTable); finish_status_map->init(N_LOG_FINISH_CODE_TYPES, LOG_FINISH_FIN, "FIN", LOG_FINISH_INTR, "INTR", LOG_FINISH_TIMEOUT, "TIMEOUT"); - field = new LogField("client_finish_status_code", "cfsc", LogField::sINT, &LogAccess::marshal_client_finish_status_code, + field = new LogField("client_finish_status_code", "cfsc", LogField::Type::sINT, &LogAccess::marshal_client_finish_status_code, &LogAccess::unmarshal_finish_status, make_alias_map(finish_status_map)); global_field_list.add(field, false); field_symbol_hash.emplace("cfsc", field); - field = - new LogField("client_req_id", "crid", LogField::sINT, &LogAccess::marshal_client_req_id, &LogAccess::unmarshal_int_to_str); + field = new LogField("client_req_id", "crid", LogField::Type::sINT, &LogAccess::marshal_client_req_id, + &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("crid", field); - field = - new LogField("client_req_uuid", "cruuid", LogField::STRING, &LogAccess::marshal_client_req_uuid, &LogAccess::unmarshal_str); + field = new LogField("client_req_uuid", "cruuid", LogField::Type::STRING, &LogAccess::marshal_client_req_uuid, + &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("cruuid", field); - field = new LogField("client_rx_error_code", "crec", LogField::STRING, &LogAccess::marshal_client_rx_error_code, + field = new LogField("client_rx_error_code", "crec", LogField::Type::STRING, &LogAccess::marshal_client_rx_error_code, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("crec", field); - field = new LogField("client_tx_error_code", "ctec", LogField::STRING, &LogAccess::marshal_client_tx_error_code, + field = new LogField("client_tx_error_code", "ctec", LogField::Type::STRING, &LogAccess::marshal_client_tx_error_code, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("ctec", field); - field = new LogField("client_request_all_header_fields", "cqah", LogField::STRING, + field = new LogField("client_request_all_header_fields", "cqah", LogField::Type::STRING, &LogAccess::marshal_client_req_all_header_fields, &LogUtils::unmarshalMimeHdr); global_field_list.add(field, false); field_symbol_hash.emplace("cqah", field); // proxy -> client fields - field = new LogField("proxy_resp_content_type", "psct", LogField::STRING, &LogAccess::marshal_proxy_resp_content_type, + field = new LogField("proxy_resp_content_type", "psct", LogField::Type::STRING, &LogAccess::marshal_proxy_resp_content_type, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("psct", field); - field = new LogField("proxy_resp_reason_phrase", "prrp", LogField::STRING, &LogAccess::marshal_proxy_resp_reason_phrase, + field = new LogField("proxy_resp_reason_phrase", "prrp", LogField::Type::STRING, &LogAccess::marshal_proxy_resp_reason_phrase, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("prrp", field); - field = new LogField("proxy_resp_squid_len", "psql", LogField::sINT, &LogAccess::marshal_proxy_resp_squid_len, + field = new LogField("proxy_resp_squid_len", "psql", LogField::Type::sINT, &LogAccess::marshal_proxy_resp_squid_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("psql", field); - field = new LogField("proxy_resp_content_len", "pscl", LogField::sINT, &LogAccess::marshal_proxy_resp_content_len, + field = new LogField("proxy_resp_content_len", "pscl", LogField::Type::sINT, &LogAccess::marshal_proxy_resp_content_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pscl", field); - field = new LogField("proxy_resp_content_len_hex", "psch", LogField::sINT, &LogAccess::marshal_proxy_resp_content_len, + field = new LogField("proxy_resp_content_len_hex", "psch", LogField::Type::sINT, &LogAccess::marshal_proxy_resp_content_len, &LogAccess::unmarshal_int_to_str_hex); global_field_list.add(field, false); field_symbol_hash.emplace("psch", field); - field = new LogField("proxy_resp_status_code", "pssc", LogField::sINT, &LogAccess::marshal_proxy_resp_status_code, + field = new LogField("proxy_resp_status_code", "pssc", LogField::Type::sINT, &LogAccess::marshal_proxy_resp_status_code, &LogAccess::unmarshal_http_status); global_field_list.add(field, false); field_symbol_hash.emplace("pssc", field); - field = new LogField("proxy_response_status_code_setter", "prscs", LogField::STRING, &LogAccess::marshal_status_plugin_entry, - &LogAccess::unmarshal_str); + field = new LogField("proxy_response_status_code_setter", "prscs", LogField::Type::STRING, + &LogAccess::marshal_status_plugin_entry, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("prscs", field); - field = new LogField("proxy_resp_header_len", "pshl", LogField::sINT, &LogAccess::marshal_proxy_resp_header_len, + field = new LogField("proxy_resp_header_len", "pshl", LogField::Type::sINT, &LogAccess::marshal_proxy_resp_header_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pshl", field); - field = new LogField("proxy_finish_status_code", "pfsc", LogField::sINT, &LogAccess::marshal_proxy_finish_status_code, + field = new LogField("proxy_finish_status_code", "pfsc", LogField::Type::sINT, &LogAccess::marshal_proxy_finish_status_code, &LogAccess::unmarshal_finish_status, make_alias_map(finish_status_map)); global_field_list.add(field, false); field_symbol_hash.emplace("pfsc", field); @@ -710,58 +716,59 @@ Log::init_fields() SQUID_MISS_ERROR, "MISS_ERROR", SQUID_MISS_CACHE_BYPASS, "MISS_CACHE_BYPASS", SQUID_HIT_MISS_INVALID_ASSIGNED_CODE, "INVALID_CODE"); - field = new LogField("cache_result_code", "crc", LogField::sINT, &LogAccess::marshal_cache_result_code, + field = new LogField("cache_result_code", "crc", LogField::Type::sINT, &LogAccess::marshal_cache_result_code, &LogAccess::unmarshal_cache_code, make_alias_map(cache_code_map)); global_field_list.add(field, false); field_symbol_hash.emplace("crc", field); // Reuse the unmarshalling code from crc - field = new LogField("cache_result_subcode", "crsc", LogField::sINT, &LogAccess::marshal_cache_result_subcode, + field = new LogField("cache_result_subcode", "crsc", LogField::Type::sINT, &LogAccess::marshal_cache_result_subcode, &LogAccess::unmarshal_cache_code, make_alias_map(cache_subcode_map)); global_field_list.add(field, false); field_symbol_hash.emplace("crsc", field); - field = new LogField("cache_hit_miss", "chm", LogField::sINT, &LogAccess::marshal_cache_hit_miss, + field = new LogField("cache_hit_miss", "chm", LogField::Type::sINT, &LogAccess::marshal_cache_hit_miss, &LogAccess::unmarshal_cache_hit_miss, make_alias_map(cache_hit_miss_map)); global_field_list.add(field, false); field_symbol_hash.emplace("chm", field); - field = new LogField("proxy_response_all_header_fields", "psah", LogField::STRING, + field = new LogField("proxy_response_all_header_fields", "psah", LogField::Type::STRING, &LogAccess::marshal_proxy_resp_all_header_fields, &LogUtils::unmarshalMimeHdr); global_field_list.add(field, false); field_symbol_hash.emplace("psah", field); // proxy -> server fields - field = new LogField("proxy_req_header_len", "pqhl", LogField::sINT, &LogAccess::marshal_proxy_req_header_len, + field = new LogField("proxy_req_header_len", "pqhl", LogField::Type::sINT, &LogAccess::marshal_proxy_req_header_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqhl", field); - field = new LogField("proxy_req_squid_len", "pqql", LogField::sINT, &LogAccess::marshal_proxy_req_squid_len, + field = new LogField("proxy_req_squid_len", "pqql", LogField::Type::sINT, &LogAccess::marshal_proxy_req_squid_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqql", field); - field = new LogField("proxy_req_content_len", "pqcl", LogField::sINT, &LogAccess::marshal_proxy_req_content_len, + field = new LogField("proxy_req_content_len", "pqcl", LogField::Type::sINT, &LogAccess::marshal_proxy_req_content_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqcl", field); - field = new LogField("proxy_req_server_ip", "pqsi", LogField::IP, &LogAccess::marshal_proxy_req_server_ip, + field = new LogField("proxy_req_server_ip", "pqsi", LogField::Type::IP, &LogAccess::marshal_proxy_req_server_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqsi", field); - field = new LogField("proxy_req_server_port", "pqsp", LogField::sINT, &LogAccess::marshal_proxy_req_server_port, + field = new LogField("proxy_req_server_port", "pqsp", LogField::Type::sINT, &LogAccess::marshal_proxy_req_server_port, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqsp", field); - field = new LogField("next_hop_ip", "nhi", LogField::IP, &LogAccess::marshal_next_hop_ip, &LogAccess::unmarshal_ip_to_str); + field = new LogField("next_hop_ip", "nhi", LogField::Type::IP, &LogAccess::marshal_next_hop_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("nhi", field); - field = new LogField("next_hop_port", "nhp", LogField::IP, &LogAccess::marshal_next_hop_port, &LogAccess::unmarshal_int_to_str); + field = + new LogField("next_hop_port", "nhp", LogField::Type::IP, &LogAccess::marshal_next_hop_port, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("nhp", field); @@ -789,155 +796,159 @@ Log::init_fields() SquidHierarchyCode::TIMEOUT_TIMEOUT_SSL_PARENT_MISS, "TIMEOUT_TIMEOUT_SSL_PARENT_MISS", SquidHierarchyCode::INVALID_ASSIGNED_CODE, "INVALID_ASSIGNED_CODE"); - field = new LogField("proxy_hierarchy_route", "phr", LogField::sINT, &LogAccess::marshal_proxy_hierarchy_route, + field = new LogField("proxy_hierarchy_route", "phr", LogField::Type::sINT, &LogAccess::marshal_proxy_hierarchy_route, &LogAccess::unmarshal_hierarchy, make_alias_map(hierarchy_map)); global_field_list.add(field, false); field_symbol_hash.emplace("phr", field); - field = new LogField("proxy_host_name", "phn", LogField::STRING, &LogAccess::marshal_proxy_host_name, &LogAccess::unmarshal_str); + field = + new LogField("proxy_host_name", "phn", LogField::Type::STRING, &LogAccess::marshal_proxy_host_name, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("phn", field); - field = new LogField("proxy_host_ip", "phi", LogField::IP, &LogAccess::marshal_proxy_host_ip, &LogAccess::unmarshal_ip_to_str); + field = + new LogField("proxy_host_ip", "phi", LogField::Type::IP, &LogAccess::marshal_proxy_host_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("phi", field); - field = - new LogField("proxy_host_port", "php", LogField::sINT, &LogAccess::marshal_proxy_host_port, &LogAccess::unmarshal_int_to_str); + field = new LogField("proxy_host_port", "php", LogField::Type::sINT, &LogAccess::marshal_proxy_host_port, + &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("php", field); - field = new LogField("proxy_req_is_ssl", "pqssl", LogField::sINT, &LogAccess::marshal_proxy_req_is_ssl, + field = new LogField("proxy_req_is_ssl", "pqssl", LogField::Type::sINT, &LogAccess::marshal_proxy_req_is_ssl, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqssl", field); - field = new LogField("proxy_req_ssl_reused", "pqssr", LogField::sINT, &LogAccess::marshal_proxy_req_ssl_reused, + field = new LogField("proxy_req_ssl_reused", "pqssr", LogField::Type::sINT, &LogAccess::marshal_proxy_req_ssl_reused, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("pqssr", field); - field = new LogField("proxy_request_all_header_fields", "pqah", LogField::STRING, &LogAccess::marshal_proxy_req_all_header_fields, - &LogUtils::unmarshalMimeHdr); + field = new LogField("proxy_request_all_header_fields", "pqah", LogField::Type::STRING, + &LogAccess::marshal_proxy_req_all_header_fields, &LogUtils::unmarshalMimeHdr); global_field_list.add(field, false); field_symbol_hash.emplace("pqah", field); // server -> proxy fields - field = new LogField("server_host_ip", "shi", LogField::IP, &LogAccess::marshal_server_host_ip, &LogAccess::unmarshal_ip_to_str); + field = + new LogField("server_host_ip", "shi", LogField::Type::IP, &LogAccess::marshal_server_host_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("shi", field); - field = - new LogField("server_host_name", "shn", LogField::STRING, &LogAccess::marshal_server_host_name, &LogAccess::unmarshal_str); + field = new LogField("server_host_name", "shn", LogField::Type::STRING, &LogAccess::marshal_server_host_name, + &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("shn", field); - field = new LogField("server_resp_status_code", "sssc", LogField::sINT, &LogAccess::marshal_server_resp_status_code, + field = new LogField("server_resp_status_code", "sssc", LogField::Type::sINT, &LogAccess::marshal_server_resp_status_code, &LogAccess::unmarshal_http_status); global_field_list.add(field, false); field_symbol_hash.emplace("sssc", field); - field = new LogField("server_resp_content_len", "sscl", LogField::sINT, &LogAccess::marshal_server_resp_content_len, + field = new LogField("server_resp_content_len", "sscl", LogField::Type::sINT, &LogAccess::marshal_server_resp_content_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("sscl", field); - field = new LogField("server_resp_header_len", "sshl", LogField::sINT, &LogAccess::marshal_server_resp_header_len, + field = new LogField("server_resp_header_len", "sshl", LogField::Type::sINT, &LogAccess::marshal_server_resp_header_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("sshl", field); - field = new LogField("server_resp_squid_len", "ssql", LogField::sINT, &LogAccess::marshal_server_resp_squid_len, + field = new LogField("server_resp_squid_len", "ssql", LogField::Type::sINT, &LogAccess::marshal_server_resp_squid_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ssql", field); - field = new LogField("server_resp_http_version", "sshv", LogField::dINT, &LogAccess::marshal_server_resp_http_version, + field = new LogField("server_resp_http_version", "sshv", LogField::Type::dINT, &LogAccess::marshal_server_resp_http_version, &LogAccess::unmarshal_http_version); global_field_list.add(field, false); field_symbol_hash.emplace("sshv", field); - field = new LogField("milestones_csv", "mstsms", LogField::STRING, &LogAccess::marshal_milestones_csv, &LogAccess::unmarshal_str); + field = + new LogField("milestones_csv", "mstsms", LogField::Type::STRING, &LogAccess::marshal_milestones_csv, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("mstsms", field); - field = new LogField("server_resp_time", "stms", LogField::sINT, &LogAccess::marshal_server_resp_time_ms, + field = new LogField("server_resp_time", "stms", LogField::Type::sINT, &LogAccess::marshal_server_resp_time_ms, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("stms", field); - field = new LogField("server_resp_time_hex", "stmsh", LogField::sINT, &LogAccess::marshal_server_resp_time_ms, + field = new LogField("server_resp_time_hex", "stmsh", LogField::Type::sINT, &LogAccess::marshal_server_resp_time_ms, &LogAccess::unmarshal_int_to_str_hex); global_field_list.add(field, false); field_symbol_hash.emplace("stmsh", field); - field = new LogField("server_resp_time_fractional", "stmsf", LogField::sINT, &LogAccess::marshal_server_resp_time_ms, + field = new LogField("server_resp_time_fractional", "stmsf", LogField::Type::sINT, &LogAccess::marshal_server_resp_time_ms, &LogAccess::unmarshal_ttmsf); global_field_list.add(field, false); field_symbol_hash.emplace("stmsf", field); - field = new LogField("server_resp_time_sec", "sts", LogField::sINT, &LogAccess::marshal_server_resp_time_s, + field = new LogField("server_resp_time_sec", "sts", LogField::Type::sINT, &LogAccess::marshal_server_resp_time_s, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("sts", field); - field = new LogField("server_transact_count", "sstc", LogField::sINT, &LogAccess::marshal_server_transact_count, + field = new LogField("server_transact_count", "sstc", LogField::Type::sINT, &LogAccess::marshal_server_transact_count, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("sstc", field); - field = new LogField("server_unavailable_retry_count", "surc", LogField::sINT, &LogAccess::marshal_server_unavailable_retry_count, - &LogAccess::unmarshal_int_to_str); + field = new LogField("server_unavailable_retry_count", "surc", LogField::Type::sINT, + &LogAccess::marshal_server_unavailable_retry_count, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("surc", field); - field = new LogField("server_simple_retry_count", "ssrc", LogField::sINT, &LogAccess::marshal_server_simple_retry_count, + field = new LogField("server_simple_retry_count", "ssrc", LogField::Type::sINT, &LogAccess::marshal_server_simple_retry_count, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ssrc", field); - field = new LogField("server_connect_attempts", "sca", LogField::sINT, &LogAccess::marshal_server_connect_attempts, + field = new LogField("server_connect_attempts", "sca", LogField::Type::sINT, &LogAccess::marshal_server_connect_attempts, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("sca", field); - field = new LogField("origin_response_all_header_fields", "ssah", LogField::STRING, + field = new LogField("origin_response_all_header_fields", "ssah", LogField::Type::STRING, &LogAccess::marshal_server_resp_all_header_fields, &LogUtils::unmarshalMimeHdr); global_field_list.add(field, false); field_symbol_hash.emplace("ssah", field); - field = new LogField("cached_resp_status_code", "csssc", LogField::sINT, &LogAccess::marshal_cache_resp_status_code, + field = new LogField("cached_resp_status_code", "csssc", LogField::Type::sINT, &LogAccess::marshal_cache_resp_status_code, &LogAccess::unmarshal_http_status); global_field_list.add(field, false); field_symbol_hash.emplace("csssc", field); - field = new LogField("cached_resp_content_len", "csscl", LogField::sINT, &LogAccess::marshal_cache_resp_content_len, + field = new LogField("cached_resp_content_len", "csscl", LogField::Type::sINT, &LogAccess::marshal_cache_resp_content_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("csscl", field); - field = new LogField("cached_resp_header_len", "csshl", LogField::sINT, &LogAccess::marshal_cache_resp_header_len, + field = new LogField("cached_resp_header_len", "csshl", LogField::Type::sINT, &LogAccess::marshal_cache_resp_header_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("csshl", field); - field = new LogField("cached_resp_squid_len", "cssql", LogField::sINT, &LogAccess::marshal_cache_resp_squid_len, + field = new LogField("cached_resp_squid_len", "cssql", LogField::Type::sINT, &LogAccess::marshal_cache_resp_squid_len, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cssql", field); - field = new LogField("cached_resp_http_version", "csshv", LogField::dINT, &LogAccess::marshal_cache_resp_http_version, + field = new LogField("cached_resp_http_version", "csshv", LogField::Type::dINT, &LogAccess::marshal_cache_resp_http_version, &LogAccess::unmarshal_http_version); global_field_list.add(field, false); field_symbol_hash.emplace("csshv", field); - field = new LogField("cache_origin_response_all_header_fields", "cssah", LogField::STRING, + field = new LogField("cache_origin_response_all_header_fields", "cssah", LogField::Type::STRING, &LogAccess::marshal_cache_resp_all_header_fields, &LogUtils::unmarshalMimeHdr); global_field_list.add(field, false); field_symbol_hash.emplace("cssah", field); - field = new LogField("client_retry_after_time", "crat", LogField::sINT, &LogAccess::marshal_client_retry_after_time, + field = new LogField("client_retry_after_time", "crat", LogField::Type::sINT, &LogAccess::marshal_client_retry_after_time, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("crat", field); @@ -947,118 +958,119 @@ Log::init_fields() Ptr cache_write_code_map = make_ptr(new LogFieldAliasTable); cache_write_code_map->init(N_LOG_CACHE_WRITE_TYPES, LOG_CACHE_WRITE_NONE, "-", LOG_CACHE_WRITE_LOCK_MISSED, "WL_MISS", LOG_CACHE_WRITE_LOCK_ABORTED, "INTR", LOG_CACHE_WRITE_ERROR, "ERR", LOG_CACHE_WRITE_COMPLETE, "FIN"); - field = new LogField("cache_write_result", "cwr", LogField::sINT, &LogAccess::marshal_cache_write_code, + field = new LogField("cache_write_result", "cwr", LogField::Type::sINT, &LogAccess::marshal_cache_write_code, &LogAccess::unmarshal_cache_write_code, make_alias_map(cache_write_code_map)); global_field_list.add(field, false); field_symbol_hash.emplace("cwr", field); - field = new LogField("cache_write_transform_result", "cwtr", LogField::sINT, &LogAccess::marshal_cache_write_transform_code, + field = new LogField("cache_write_transform_result", "cwtr", LogField::Type::sINT, &LogAccess::marshal_cache_write_transform_code, &LogAccess::unmarshal_cache_write_code, make_alias_map(cache_write_code_map)); global_field_list.add(field, false); field_symbol_hash.emplace("cwtr", field); // other fields - field = new LogField("transfer_time_ms", "ttms", LogField::sINT, &LogAccess::marshal_transfer_time_ms, + field = new LogField("transfer_time_ms", "ttms", LogField::Type::sINT, &LogAccess::marshal_transfer_time_ms, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ttms", field); - field = new LogField("transfer_time_ms_hex", "ttmsh", LogField::sINT, &LogAccess::marshal_transfer_time_ms, + field = new LogField("transfer_time_ms_hex", "ttmsh", LogField::Type::sINT, &LogAccess::marshal_transfer_time_ms, &LogAccess::unmarshal_int_to_str_hex); global_field_list.add(field, false); field_symbol_hash.emplace("ttmsh", field); - field = new LogField("transfer_time_ms_fractional", "ttmsf", LogField::sINT, &LogAccess::marshal_transfer_time_ms, + field = new LogField("transfer_time_ms_fractional", "ttmsf", LogField::Type::sINT, &LogAccess::marshal_transfer_time_ms, &LogAccess::unmarshal_ttmsf); global_field_list.add(field, false); field_symbol_hash.emplace("ttmsf", field); - field = - new LogField("transfer_time_sec", "tts", LogField::sINT, &LogAccess::marshal_transfer_time_s, &LogAccess::unmarshal_int_to_str); + field = new LogField("transfer_time_sec", "tts", LogField::Type::sINT, &LogAccess::marshal_transfer_time_s, + &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("tts", field); - field = new LogField("file_size", "fsiz", LogField::sINT, &LogAccess::marshal_file_size, &LogAccess::unmarshal_int_to_str); + field = new LogField("file_size", "fsiz", LogField::Type::sINT, &LogAccess::marshal_file_size, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("fsiz", field); - field = new LogField("client_connection_id", "ccid", LogField::sINT, &LogAccess::marshal_client_http_connection_id, + field = new LogField("client_connection_id", "ccid", LogField::Type::sINT, &LogAccess::marshal_client_http_connection_id, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ccid", field); - field = new LogField("client_transaction_id", "ctid", LogField::sINT, &LogAccess::marshal_client_http_transaction_id, + field = new LogField("client_transaction_id", "ctid", LogField::Type::sINT, &LogAccess::marshal_client_http_transaction_id, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ctid", field); - field = new LogField("cache_read_retry_attempts", "crra", LogField::sINT, &LogAccess::marshal_cache_read_retries, + field = new LogField("cache_read_retry_attempts", "crra", LogField::Type::sINT, &LogAccess::marshal_cache_read_retries, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("crra", field); - field = new LogField("cache_write_retry_attempts", "cwra", LogField::sINT, &LogAccess::marshal_cache_write_retries, + field = new LogField("cache_write_retry_attempts", "cwra", LogField::Type::sINT, &LogAccess::marshal_cache_write_retries, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cwra", field); - field = new LogField("cache_collapsed_connection_success", "cccs", LogField::sINT, + field = new LogField("cache_collapsed_connection_success", "cccs", LogField::Type::sINT, &LogAccess::marshal_cache_collapsed_connection_success, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("cccs", field); - field = new LogField("client_transaction_priority_weight", "ctpw", LogField::sINT, + field = new LogField("client_transaction_priority_weight", "ctpw", LogField::Type::sINT, &LogAccess::marshal_client_http_transaction_priority_weight, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ctpw", field); - field = new LogField("client_transaction_priority_dependence", "ctpd", LogField::sINT, + field = new LogField("client_transaction_priority_dependence", "ctpd", LogField::Type::sINT, &LogAccess::marshal_client_http_transaction_priority_dependence, &LogAccess::unmarshal_int_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ctpd", field); - field = new LogField("proxy_protocol_version", "ppv", LogField::STRING, &LogAccess::marshal_proxy_protocol_version, + field = new LogField("proxy_protocol_version", "ppv", LogField::Type::STRING, &LogAccess::marshal_proxy_protocol_version, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("ppv", field); - field = new LogField("proxy_protocol_src_ip", "pps", LogField::IP, &LogAccess::marshal_proxy_protocol_src_ip, + field = new LogField("proxy_protocol_src_ip", "pps", LogField::Type::IP, &LogAccess::marshal_proxy_protocol_src_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ppsip", field); - field = new LogField("proxy_protocol_dst_ip", "ppd", LogField::IP, &LogAccess::marshal_proxy_protocol_dst_ip, + field = new LogField("proxy_protocol_dst_ip", "ppd", LogField::Type::IP, &LogAccess::marshal_proxy_protocol_dst_ip, &LogAccess::unmarshal_ip_to_str); global_field_list.add(field, false); field_symbol_hash.emplace("ppdip", field); - field = new LogField("proxy_protocol_authority", "ppa", LogField::IP, &LogAccess::marshal_proxy_protocol_authority, + field = new LogField("proxy_protocol_authority", "ppa", LogField::Type::IP, &LogAccess::marshal_proxy_protocol_authority, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("ppa", field); - field = new LogField("proxy_protocol_tls_cipher", "pptc", LogField::STRING, &LogAccess::marshal_proxy_protocol_tls_cipher, + field = new LogField("proxy_protocol_tls_cipher", "pptc", LogField::Type::STRING, &LogAccess::marshal_proxy_protocol_tls_cipher, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("pptc", field); - field = new LogField("proxy_protocol_tls_version", "pptv", LogField::STRING, &LogAccess::marshal_proxy_protocol_tls_version, + field = new LogField("proxy_protocol_tls_version", "pptv", LogField::Type::STRING, &LogAccess::marshal_proxy_protocol_tls_version, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("pptv", field); - field = new LogField("proxy_protocol_tls_group", "pptg", LogField::STRING, &LogAccess::marshal_proxy_protocol_tls_group, + field = new LogField("proxy_protocol_tls_group", "pptg", LogField::Type::STRING, &LogAccess::marshal_proxy_protocol_tls_group, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("pptg", field); - field = new LogField("version_build_number", "vbn", LogField::STRING, &LogAccess::marshal_version_build_number, + field = new LogField("version_build_number", "vbn", LogField::Type::STRING, &LogAccess::marshal_version_build_number, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("vbn", field); - field = new LogField("version_string", "vs", LogField::STRING, &LogAccess::marshal_version_string, &LogAccess::unmarshal_str); + field = + new LogField("version_string", "vs", LogField::Type::STRING, &LogAccess::marshal_version_string, &LogAccess::unmarshal_str); global_field_list.add(field, false); field_symbol_hash.emplace("vs", field); diff --git a/src/proxy/logging/LogAccess.cc b/src/proxy/logging/LogAccess.cc index 2bb9ed83cfb..50410190382 100644 --- a/src/proxy/logging/LogAccess.cc +++ b/src/proxy/logging/LogAccess.cc @@ -1075,6 +1075,7 @@ LogAccess::unmarshal_http_version(char **buf, char *dest, int len) DBG_UNMARSHAL_DEST_OVERRUN return -1; } + p += res2; int val_len = p - val_buf; if (val_len < len) { diff --git a/src/proxy/logging/LogBuffer.cc b/src/proxy/logging/LogBuffer.cc index eb69c37c1d0..13e63255d18 100644 --- a/src/proxy/logging/LogBuffer.cc +++ b/src/proxy/logging/LogBuffer.cc @@ -88,6 +88,16 @@ LogBufferHeader::fmt_printf() return addr; } +char * +LogBufferHeader::fmt_fieldtypes() +{ + char *addr = nullptr; + if (fmt_fieldtypes_offset) { + addr = reinterpret_cast(this) + fmt_fieldtypes_offset; + } + return addr; +} + char * LogBufferHeader::src_hostname() { @@ -384,6 +394,61 @@ LogBuffer::add_header_str(const char *str, char *buf_ptr, unsigned buf_len) return len; } +/*------------------------------------------------------------------------- + LogBuffer::add_field_type_schema + + Write the v3 LogFieldTypeSchema blob (field_count, then one LogField::Type + code per field, in field-list order) at buf_ptr. Returns bytes written, or 0 + if the field list is empty or the blob would not fit (caller then leaves + fmt_fieldtypes_offset at 0). + -------------------------------------------------------------------------*/ +// No tail padding, so type codes start right after the uint16_t field_count. +static_assert(sizeof(LogFieldTypeSchema) == sizeof(uint16_t), "LogFieldTypeSchema wire layout must not gain padding"); + +// LogField::Type is serialized directly as the wire code; pin the values so a +// reorder fails to compile rather than corrupting every v3 log. +static_assert(static_cast(LogField::Type::INVALID) == 0 && static_cast(LogField::Type::sINT) == 1 && + static_cast(LogField::Type::dINT) == 2 && static_cast(LogField::Type::STRING) == 3 && + static_cast(LogField::Type::IP) == 4 && static_cast(LogField::Type::N_TYPES) == 5, + "LogField::Type values are the v3 .blog wire codes and must not change"); + +unsigned +LogBuffer::add_field_type_schema(const LogFieldList *fieldlist, char *buf_ptr, unsigned buf_len) +{ + if (fieldlist == nullptr) { + return 0; + } + + unsigned field_count = 0; + for (LogField *f = fieldlist->first(); f != nullptr; f = fieldlist->next(f)) { + ++field_count; + } + if (field_count == 0) { + return 0; + } + + // field_count is published as a uint16_t; refuse to emit a truncated schema. + if (field_count > UINT16_MAX) { + return 0; + } + + unsigned blob_len = sizeof(LogFieldTypeSchema) + field_count; // one uint8_t code per field + if (blob_len > buf_len) { + return 0; + } + + LogFieldTypeSchema *schema = reinterpret_cast(buf_ptr); + schema->field_count = static_cast(field_count); + + uint8_t *codes = reinterpret_cast(buf_ptr) + sizeof(LogFieldTypeSchema); + unsigned i = 0; + for (LogField *f = fieldlist->first(); f != nullptr; f = fieldlist->next(f)) { + codes[i++] = static_cast(f->type()); + } + + return blob_len; +} + size_t LogBuffer::_add_buffer_header(const LogConfig *cfg) { @@ -392,10 +457,18 @@ LogBuffer::_add_buffer_header(const LogConfig *cfg) // // initialize the header // - LogFormat *fmt = m_owner->m_format.get(); + LogFormat *fmt = m_owner->m_format.get(); + + // Per-LogObject on-disk version (logging.yaml "binary_log_version"); v2 emits + // the pre-v3 layout. Clamp defensively against bad config. + unsigned version = m_owner->get_binary_log_version(); + if (!log_segment_version_supported(version)) { + version = LOG_SEGMENT_VERSION; + } + m_header = reinterpret_cast(m_buffer); m_header->cookie = LOG_SEGMENT_COOKIE; - m_header->version = LOG_SEGMENT_VERSION; + m_header->version = version; m_header->format_type = fmt->type(); m_header->entry_count = 0; m_header->low_timestamp = LogUtils::timestamp(); @@ -412,13 +485,16 @@ LogBuffer::_add_buffer_header(const LogConfig *cfg) // size of the buffer header. // - header_len = sizeof(LogBufferHeader); // at least ... + // Size the header to the version: a v2 segment omits the trailing + // fmt_fieldtypes_offset word, else version-sized reads misframe it. + header_len = log_buffer_header_size(version); - m_header->fmt_name_offset = 0; - m_header->fmt_fieldlist_offset = 0; - m_header->fmt_printf_offset = 0; - m_header->src_hostname_offset = 0; - m_header->log_filename_offset = 0; + m_header->fmt_name_offset = 0; + m_header->fmt_fieldlist_offset = 0; + m_header->fmt_printf_offset = 0; + m_header->src_hostname_offset = 0; + m_header->log_filename_offset = 0; + m_header->fmt_fieldtypes_offset = 0; if (fmt->name()) { m_header->fmt_name_offset = header_len; @@ -440,6 +516,18 @@ LogBuffer::_add_buffer_header(const LogConfig *cfg) m_header->log_filename_offset = header_len; header_len += add_header_str(m_owner->get_base_filename(), &m_buffer[header_len], m_size - header_len); } + // v3: publish the per-field types (paired with fmt_fieldlist) so a generic + // reader needs no ATS symbol->type table. Skipped for v2 and text logs. The + // schema leads with a uint16, so align its offset off the NUL-terminated + // header strings before writing. + if (version >= 3 && fmt->fieldlist()) { + size_t schema_off = INK_ALIGN(header_len, alignof(LogFieldTypeSchema)); + if (unsigned schema_len = add_field_type_schema(&fmt->field_list(), &m_buffer[schema_off], m_size - schema_off); + schema_len > 0) { + m_header->fmt_fieldtypes_offset = schema_off; + header_len = schema_off + schema_len; + } + } // update the rest of the header fields; make sure the header_len is // correctly aligned, so that the first record will start on a legal // alignment mark. @@ -772,16 +860,28 @@ LogBufferList::get() LogEntryHeader * LogBufferIterator::next() { - LogEntryHeader *ret_val = nullptr; - LogEntryHeader *entry = reinterpret_cast(m_next); - - if (entry) { - if (m_iter_entry_count < m_buffer_entry_count) { - m_next += entry->entry_len; - ++m_iter_entry_count; - ret_val = entry; - } + if (m_next == nullptr || m_iter_entry_count >= m_buffer_entry_count) { + return nullptr; } - return ret_val; + // The entry header must fit before we read entry_len. (m_next <= m_buffer_end + // holds: the ctor bounds the start and we only advance by validated lengths.) + if (static_cast(m_buffer_end - m_next) < sizeof(LogEntryHeader)) { + return nullptr; + } + + LogEntryHeader *entry = reinterpret_cast(m_next); + + // entry_len must cover its header (forward progress), stay in the segment, + // and be 8-byte aligned so the next entry's int64 reads stay aligned; guards + // a hostile entry_len in an untrusted .blog. + if (entry->entry_len < sizeof(LogEntryHeader) || entry->entry_len > static_cast(m_buffer_end - m_next) || + entry->entry_len % INK_MIN_ALIGN != 0) { + return nullptr; + } + + m_next += entry->entry_len; + ++m_iter_entry_count; + + return entry; } diff --git a/src/proxy/logging/LogField.cc b/src/proxy/logging/LogField.cc index f1397865542..8bd3c176aaa 100644 --- a/src/proxy/logging/LogField.cc +++ b/src/proxy/logging/LogField.cc @@ -261,7 +261,7 @@ LogField::LogField(const char *name, const char *symbol, Type type, MarshalFunc { ink_assert(m_name != nullptr); ink_assert(m_symbol != nullptr); - ink_assert(m_type >= 0 && m_type < N_TYPES); + ink_assert(m_type > Type::INVALID && m_type < Type::N_TYPES); ink_assert(m_marshal_func != (MarshalFunc) nullptr); m_time_field = (strcmp(m_symbol, "cqts") == 0 || strcmp(m_symbol, "cqth") == 0 || strcmp(m_symbol, "cqtq") == 0 || @@ -287,7 +287,7 @@ LogField::LogField(const char *name, const char *symbol, Type type, MarshalFunc { ink_assert(m_name != nullptr); ink_assert(m_symbol != nullptr); - ink_assert(m_type >= 0 && m_type < N_TYPES); + ink_assert(m_type > Type::INVALID && m_type < Type::N_TYPES); ink_assert(m_marshal_func != (MarshalFunc) nullptr); ink_assert(m_alias_map); @@ -316,7 +316,7 @@ LogField::LogField(const char *name, const char *symbol, Type type, CustomMarsha { ink_assert(m_name != nullptr); ink_assert(m_symbol != nullptr); - ink_assert(m_type >= 0 && m_type < N_TYPES); + ink_assert(m_type > Type::INVALID && m_type < Type::N_TYPES); m_time_field = (strcmp(m_symbol, "cqts") == 0 || strcmp(m_symbol, "cqth") == 0 || strcmp(m_symbol, "cqtq") == 0 || strcmp(m_symbol, "cqtn") == 0 || strcmp(m_symbol, "cqtd") == 0 || strcmp(m_symbol, "cqtt") == 0); @@ -365,7 +365,7 @@ LogField::milestones_from_m_name(TSMilestonesType *ms1, TSMilestonesType *ms2) LogField::LogField(const char *field, Container container) : m_name(ats_strdup(field)), m_symbol(ats_strdup(container_names[container])), - m_type(LogField::STRING), + m_type(LogField::Type::STRING), m_container(container), m_marshal_func(nullptr), m_unmarshal_func(nullptr), @@ -380,7 +380,7 @@ LogField::LogField(const char *field, Container container) { ink_assert(m_name != nullptr); ink_assert(m_symbol != nullptr); - ink_assert(m_type >= 0 && m_type < N_TYPES); + ink_assert(m_type > Type::INVALID && m_type < Type::N_TYPES); m_time_field = (strcmp(m_symbol, "cqts") == 0 || strcmp(m_symbol, "cqth") == 0 || strcmp(m_symbol, "cqtq") == 0 || strcmp(m_symbol, "cqtn") == 0 || strcmp(m_symbol, "cqtd") == 0 || strcmp(m_symbol, "cqtt") == 0); @@ -402,7 +402,7 @@ LogField::LogField(const char *field, Container container) case ICFG: m_unmarshal_func = &(LogAccess::unmarshal_int_to_str); - m_type = LogField::sINT; + m_type = LogField::Type::sINT; break; case RECORD: @@ -415,7 +415,7 @@ LogField::LogField(const char *field, Container container) Note("Invalid milestone name in LogField ctor: %s", m_name); } m_unmarshal_func = &(LogAccess::unmarshal_int_to_str); - m_type = LogField::sINT; + m_type = LogField::Type::sINT; break; case MSDMS: { @@ -424,7 +424,7 @@ LogField::LogField(const char *field, Container container) Note("Invalid milestone range in LogField ctor: %s", m_name); } m_unmarshal_func = &(LogAccess::unmarshal_milestone_diff); - m_type = LogField::sINT; + m_type = LogField::Type::sINT; break; } @@ -437,7 +437,7 @@ LogField::LogField(const char *symbol, std::vector header_fields, s std::optional fallback_default) : m_name(ats_strdup(symbol)), m_symbol(ats_strdup(symbol)), - m_type(LogField::STRING), + m_type(LogField::Type::STRING), m_container(NO_CONTAINER), m_marshal_func(nullptr), m_unmarshal_func(&(LogAccess::unmarshal_str)), @@ -455,7 +455,7 @@ LogField::LogField(const char *symbol, std::vector header_fields, s { ink_assert(m_name != nullptr); ink_assert(m_symbol != nullptr); - ink_assert(m_type >= 0 && m_type < N_TYPES); + ink_assert(m_type > Type::INVALID && m_type < Type::N_TYPES); ink_assert(!m_fallback_header_fields.empty()); ink_assert(!(m_fallback_field && m_fallback_default.has_value())); } @@ -484,7 +484,7 @@ LogField::LogField(const LogField &rhs) { ink_assert(m_name != nullptr); ink_assert(m_symbol != nullptr); - ink_assert(m_type >= 0 && m_type < N_TYPES); + ink_assert(m_type > Type::INVALID && m_type < Type::N_TYPES); } /*------------------------------------------------------------------------- @@ -763,14 +763,15 @@ LogField::unmarshal(char **buf, char *dest, int len, LogEscapeType escape_type) void LogField::display(FILE *fd) { - static const char *names[LogField::N_TYPES] = {"sINT", "dINT", "STR", "IP"}; + // Indexed by LogField::Type; index 0 is the reserved INVALID slot. + static const char *names[static_cast(LogField::Type::N_TYPES)] = {"INVALID", "sINT", "dINT", "STR", "IP"}; if (is_field_fallback()) { - fprintf(fd, " %30s %10s %5s\n", m_name, "fallback", names[m_type]); + fprintf(fd, " %30s %10s %5s\n", m_name, "fallback", names[static_cast(m_type)]); return; } - fprintf(fd, " %30s %10s %5s\n", m_name, m_symbol, names[m_type]); + fprintf(fd, " %30s %10s %5s\n", m_name, m_symbol, names[static_cast(m_type)]); } /*------------------------------------------------------------------------- @@ -1019,7 +1020,7 @@ LogFieldList::add(LogField *field, bool copy) m_field_list.enqueue(field); } - if (field->type() == LogField::sINT) { + if (field->type() == LogField::Type::sINT) { m_marshal_len += INK_MIN_ALIGN; } } @@ -1029,7 +1030,7 @@ LogFieldList::remove(LogField *field) { ink_assert(field != nullptr); - if (field->type() == LogField::sINT) { + if (field->type() == LogField::Type::sINT) { m_marshal_len -= INK_MIN_ALIGN; } m_field_list.remove(field); @@ -1074,7 +1075,7 @@ LogFieldList::marshal_len(LogAccess *lad) { int bytes = 0; for (LogField *f = first(); f; f = next(f)) { - if (f->type() != LogField::sINT) { + if (f->type() != LogField::Type::sINT) { const int len = f->marshal_len(lad); ink_release_assert(len >= INK_MIN_ALIGN); bytes += len; diff --git a/src/proxy/logging/LogFile.cc b/src/proxy/logging/LogFile.cc index cbc2c4b910c..682f04f295c 100644 --- a/src/proxy/logging/LogFile.cc +++ b/src/proxy/logging/LogFile.cc @@ -518,18 +518,16 @@ LogFile::write_ascii_logbuffer(LogBufferHeader *buffer_header, int fd, const cha char *fieldlist_str; char *printf_str; - switch (buffer_header->version) { - case LOG_SEGMENT_VERSION: - format_type = static_cast(buffer_header->format_type); - + // v3 is identical to v2 for ASCII output: it still uses fmt_fieldlist and + // fmt_printf; the extra field-type schema is additive and ignored here. + if (log_segment_version_supported(buffer_header->version)) { + format_type = static_cast(buffer_header->format_type); fieldlist_str = buffer_header->fmt_fieldlist(); printf_str = buffer_header->fmt_printf(); - break; - - default: + } else { Note("Invalid LogBuffer version %d in write_ascii_logbuffer; " - "current version is %d", - buffer_header->version, LOG_SEGMENT_VERSION); + "supported versions are %d-%d", + buffer_header->version, LOG_SEGMENT_VERSION_MIN_SUPPORTED, LOG_SEGMENT_VERSION); return 0; } @@ -584,17 +582,16 @@ LogFile::write_ascii_logbuffer3(LogBufferHeader *buffer_header, const char *alt_ char *printf_str; char *ascii_buffer; - switch (buffer_header->version) { - case LOG_SEGMENT_VERSION: + // Same v2/v3 handling as write_ascii_logbuffer: the v3 schema is additive + // and not needed for ASCII output. + if (log_segment_version_supported(buffer_header->version)) { format_type = static_cast(buffer_header->format_type); fieldlist_str = buffer_header->fmt_fieldlist(); printf_str = buffer_header->fmt_printf(); - break; - - default: + } else { Note("Invalid LogBuffer version %d in write_ascii_logbuffer; " - "current version is %d", - buffer_header->version, LOG_SEGMENT_VERSION); + "supported versions are %d-%d", + buffer_header->version, LOG_SEGMENT_VERSION_MIN_SUPPORTED, LOG_SEGMENT_VERSION); return 0; } diff --git a/src/proxy/logging/LogFilter.cc b/src/proxy/logging/LogFilter.cc index 74fbc377e76..9579a21881f 100644 --- a/src/proxy/logging/LogFilter.cc +++ b/src/proxy/logging/LogFilter.cc @@ -171,24 +171,24 @@ LogFilter::parse(const char *name, Action action, const char *condition) LogFilter *filter; switch (field_type) { - case LogField::sINT: + case LogField::Type::sINT: filter = new LogFilterInt(name, logfield.get(), action, oper, val_str); break; - case LogField::dINT: + case LogField::Type::dINT: Error("Invalid field type (double int); cannot create filter '%s'", name); return nullptr; - case LogField::STRING: + case LogField::Type::STRING: filter = new LogFilterString(name, logfield.get(), action, oper, val_str); break; - case LogField::IP: + case LogField::Type::IP: filter = new LogFilterIP(name, logfield.get(), action, oper, val_str); break; default: - Error("Unknown logging field type %d; cannot create filter '%s'", field_type, name); + Error("Unknown logging field type %d; cannot create filter '%s'", static_cast(field_type), name); return nullptr; } diff --git a/src/proxy/logging/LogFormat.cc b/src/proxy/logging/LogFormat.cc index 1864c0189b9..8313b0963d6 100644 --- a/src/proxy/logging/LogFormat.cc +++ b/src/proxy/logging/LogFormat.cc @@ -485,7 +485,7 @@ LogFormat::parse_symbol_string(const char *symbol_string, LogFieldList *field_li Note("Invalid field symbol %s used in aggregate " "operation", name); - } else if (f->type() != LogField::sINT) { + } else if (f->type() != LogField::Type::sINT) { Note("Only single integer field types may be aggregated"); } else { LogField *new_f = new LogField(*f); diff --git a/src/proxy/logging/LogObject.cc b/src/proxy/logging/LogObject.cc index 03e1316a17c..bb9586740b7 100644 --- a/src/proxy/logging/LogObject.cc +++ b/src/proxy/logging/LogObject.cc @@ -102,10 +102,11 @@ LogBufferManager::preproc_buffers(LogBufferSink *sink) LogObject::LogObject(LogConfig *cfg, const LogFormat *format, const char *log_dir, const char *basename, LogFileFormat file_format, const char *header, Log::RollingEnabledValues rolling_enabled, int flush_threads, int rolling_interval_sec, int rolling_offset_hr, int rolling_size_mb, bool /* auto_created ATS_UNUSED */, int rolling_max_count, - int rolling_min_count, bool reopen_after_rolling, int pipe_buffer_size, bool fast) + int rolling_min_count, bool reopen_after_rolling, int pipe_buffer_size, bool fast, unsigned binary_log_version) : m_alt_filename(nullptr), m_flags(0), m_signature(0), + m_binary_log_version(binary_log_version), m_flush_threads(flush_threads), m_rolling_interval_sec(rolling_interval_sec), m_rolling_offset_hr(rolling_offset_hr), diff --git a/src/proxy/logging/YamlLogConfig.cc b/src/proxy/logging/YamlLogConfig.cc index ee88b2f724c..eb76226db74 100644 --- a/src/proxy/logging/YamlLogConfig.cc +++ b/src/proxy/logging/YamlLogConfig.cc @@ -133,6 +133,7 @@ std::set valid_log_object_keys = {"filename", "rolling_max_count", "rolling_allow_empty", "pipe_buffer_size", + "binary_log_version", "fast"}; LogObject * @@ -233,11 +234,26 @@ YamlLogConfig::decodeLogObject(const YAML::Node &node) } } + // On-disk binary segment version (default: current). 2 emits the pre-v3 + // layout for parsers that don't yet understand v3; binary logs only. + int binary_log_version = LOG_SEGMENT_VERSION; + if (node["binary_log_version"]) { + binary_log_version = node["binary_log_version"].as(); + if (!log_segment_version_supported(binary_log_version)) { + Warning("Invalid binary_log_version '%d' (supported %d-%d); using %d", binary_log_version, LOG_SEGMENT_VERSION_MIN_SUPPORTED, + LOG_SEGMENT_VERSION, LOG_SEGMENT_VERSION); + binary_log_version = LOG_SEGMENT_VERSION; + } else if (file_type != LOG_FILE_BINARY) { + Warning("binary_log_version only applies to binary logs; ignoring for this object"); + } + } + auto logObject = new LogObject(cfg, fmt, cfg->logfile_dir, filename.c_str(), file_type, header.c_str(), static_cast(obj_rolling_enabled), cfg->preproc_threads, obj_rolling_interval_sec, obj_rolling_offset_hr, obj_rolling_size_mb, /* auto_created */ false, /* rolling_max_count */ obj_rolling_max_count, /* rolling_min_count */ obj_rolling_min_count, - /* reopen_after_rolling */ obj_rolling_allow_empty > 0, pipe_buffer_size, fast); + /* reopen_after_rolling */ obj_rolling_allow_empty > 0, pipe_buffer_size, fast, + static_cast(binary_log_version)); // Generate LogDeletingInfo entry for later use std::string ext; diff --git a/src/proxy/logging/unit-tests/test_LogBuffer.cc b/src/proxy/logging/unit-tests/test_LogBuffer.cc new file mode 100644 index 00000000000..4c019e78900 --- /dev/null +++ b/src/proxy/logging/unit-tests/test_LogBuffer.cc @@ -0,0 +1,178 @@ +/** @file + + Unit tests for the self-describing v3 binary log format (LogBuffer v3). + + Covers the core (in-tree) pieces: that each field's LogField::Type matches + its on-wire framing, the marshal/unmarshal INT round-trip, the writer's + field-list-to-schema mapping, and the version-aware header sizing. The JSON + reference decoder lives in traffic_logcat and is tested by test_LogEntryJson. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include + +#include "proxy/logging/LogBuffer.h" +#include "proxy/logging/LogAccess.h" +#include "proxy/logging/LogField.h" +#include "proxy/logging/LogFormat.h" +#include "proxy/logging/Log.h" + +#include "tscore/ink_align.h" + +#include +#include +#include +#include + +TEST_CASE("LogField::Type reflects each field's on-wire framing", "[logging][v3]") +{ + Log::init_fields(); + + // Each field's declared type must match its on-wire framing, since the schema + // serializes Type directly: pssc/crc single ints, cqu string, chi IP, sshv a + // pair (HTTP version), ppv a string (after the sINT/dINT type fixes). + LogFormat fmt("v3wt", "% % % % % %"); + REQUIRE(fmt.valid()); + + std::vector types; + for (LogField *f = fmt.field_list().first(); f != nullptr; f = fmt.field_list().next(f)) { + types.push_back(f->type()); + } + + REQUIRE(types == (std::vector{LogField::Type::sINT, LogField::Type::sINT, LogField::Type::STRING, + LogField::Type::IP, LogField::Type::dINT, LogField::Type::STRING})); +} + +TEST_CASE("marshal_int / unmarshal_int round-trip", "[logging][v3]") +{ + alignas(int64_t) char buf[INK_MIN_ALIGN] = {}; + LogAccess::marshal_int(buf, 0x0102030405060708LL); + + char *p = buf; + CHECK(LogAccess::unmarshal_int(&p) == 0x0102030405060708LL); + CHECK(p == buf + INK_MIN_ALIGN); +} + +TEST_CASE("v3 schema maps each field to its type in field-list order", "[logging][v3]") +{ + Log::init_fields(); + + // chi -> IP, cqu -> STRING, pssc -> sINT (see Log::init_fields). + LogFormat fmt("v3test", "% % %"); + REQUIRE(fmt.valid()); + + const LogFieldList &fields = fmt.field_list(); + std::vector symbols; + std::vector types; + for (LogField *f = fields.first(); f != nullptr; f = fields.next(f)) { + symbols.emplace_back(f->symbol()); + types.push_back(f->type()); + } + + REQUIRE(symbols == (std::vector{"chi", "cqu", "pssc"})); + REQUIRE(types == (std::vector{LogField::Type::IP, LogField::Type::STRING, LogField::Type::sINT})); +} + +TEST_CASE("log_buffer_header_size sizes the header per version", "[logging][v3]") +{ + CHECK(log_buffer_header_size(2) == offsetof(LogBufferHeader, fmt_fieldtypes_offset)); + CHECK(log_buffer_header_size(3) == sizeof(LogBufferHeader)); + CHECK(log_buffer_header_size(1) == 0); // below the supported range + CHECK(log_buffer_header_size(4) == 0); // above the supported range +} + +TEST_CASE("LogBufferIterator walks a well-formed segment then stops", "[logging][v3]") +{ + alignas(8) char storage[256] = {}; + auto *h = reinterpret_cast(storage); + h->version = LOG_SEGMENT_VERSION; + h->data_offset = sizeof(LogBufferHeader); + h->entry_count = 1; + + auto *e = reinterpret_cast(storage + h->data_offset); + const uint32_t len = INK_ALIGN_DEFAULT(sizeof(LogEntryHeader) + INK_MIN_ALIGN); + e->entry_len = len; + h->byte_count = h->data_offset + len; + + LogBufferIterator iter(h); + CHECK(iter.next() == e); + CHECK(iter.next() == nullptr); +} + +TEST_CASE("LogBufferIterator rejects an out-of-segment data_offset", "[logging][v3]") +{ + // A corrupt/hostile data_offset past the segment must not let next() + // dereference outside the buffer (.blog read by logcat/logstats is untrusted). + alignas(8) char storage[256] = {}; + auto *h = reinterpret_cast(storage); + h->version = LOG_SEGMENT_VERSION; + h->byte_count = sizeof(storage); + h->entry_count = 1; + h->data_offset = sizeof(storage) + 64; // points past the segment + + LogBufferIterator iter(h); + CHECK(iter.next() == nullptr); +} + +TEST_CASE("LogBufferIterator stops on an entry_len that overruns the segment", "[logging][v3]") +{ + alignas(8) char storage[256] = {}; + auto *h = reinterpret_cast(storage); + h->version = LOG_SEGMENT_VERSION; + h->byte_count = sizeof(storage); + h->entry_count = 1; + h->data_offset = sizeof(LogBufferHeader); + + auto *e = reinterpret_cast(storage + h->data_offset); + e->entry_len = sizeof(storage); // from data_offset this runs past byte_count + + LogBufferIterator iter(h); + CHECK(iter.next() == nullptr); +} + +TEST_CASE("LogBufferIterator rejects a misaligned data_offset", "[logging][v3]") +{ + // A non-8-aligned data_offset would make the entry's int64 reads misaligned. + alignas(8) char storage[256] = {}; + auto *h = reinterpret_cast(storage); + h->version = LOG_SEGMENT_VERSION; + h->byte_count = sizeof(storage); + h->entry_count = 1; + h->data_offset = sizeof(LogBufferHeader) + 4; // in range but not 8-aligned + + LogBufferIterator iter(h); + CHECK(iter.next() == nullptr); +} + +TEST_CASE("LogBufferIterator stops on a misaligned entry_len", "[logging][v3]") +{ + alignas(8) char storage[256] = {}; + auto *h = reinterpret_cast(storage); + h->version = LOG_SEGMENT_VERSION; + h->byte_count = sizeof(storage); + h->entry_count = 1; + h->data_offset = sizeof(LogBufferHeader); + + auto *e = reinterpret_cast(storage + h->data_offset); + e->entry_len = sizeof(LogEntryHeader) + 4; // >= header and in range, but not 8-aligned + + LogBufferIterator iter(h); + CHECK(iter.next() == nullptr); +} diff --git a/src/traffic_logcat/CMakeLists.txt b/src/traffic_logcat/CMakeLists.txt index 17f835a9ca4..25fc3d81de4 100644 --- a/src/traffic_logcat/CMakeLists.txt +++ b/src/traffic_logcat/CMakeLists.txt @@ -15,8 +15,17 @@ # ####################### -add_executable(traffic_logcat logcat.cc) +add_executable(traffic_logcat logcat.cc LogEntryJson.cc) target_link_libraries(traffic_logcat PRIVATE ts::logging ts::tscore ts::diagsconfig ts::inkevent) install(TARGETS traffic_logcat) +if(BUILD_TESTING) + add_executable(test_LogEntryJson unit-tests/test_LogEntryJson.cc LogEntryJson.cc) + target_include_directories(test_LogEntryJson PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + target_link_libraries( + test_LogEntryJson PRIVATE ts::logging ts::configmanager ts::inkevent records Catch2::Catch2WithMain + ) + add_catch2_test(NAME test_LogEntryJson COMMAND test_LogEntryJson) +endif() + clang_tidy_check(traffic_logcat) diff --git a/src/traffic_logcat/LogEntryJson.cc b/src/traffic_logcat/LogEntryJson.cc new file mode 100644 index 00000000000..32410abfb8a --- /dev/null +++ b/src/traffic_logcat/LogEntryJson.cc @@ -0,0 +1,247 @@ +/** @file + + Reference decoder for the self-describing v3 binary log format. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "LogEntryJson.h" + +#include "proxy/logging/LogBuffer.h" +#include "proxy/logging/LogAccess.h" +#include "proxy/logging/LogField.h" + +// Deliberately not including the global field table (proxy/logging/Log.h): +// decoding must depend only on the segment's schema (see LogEntryJson.h). + +#include "tscore/ink_inet.h" +#include "tscore/ink_align.h" + +#include +#include +#include +#include + +namespace +{ +// fmt_fieldlist symbols are comma-separated (e.g. "chi,cqu,pssc"); tolerate +// spaces too. +bool +is_field_sep(char c) +{ + return c == ',' || c == ' '; +} +} // namespace + +int +log_entry_to_json(LogEntryHeader *entry, LogBufferHeader *header, char *buf, int buf_len) +{ + if (entry == nullptr || header == nullptr || buf == nullptr || buf_len <= 0) { + return -1; + } + + // [seg_start, seg_end) is the only readable region (byte_count == segment size). + char *seg_start = reinterpret_cast(header); + char *seg_end = seg_start + header->byte_count; + + // v3 decode needs the field-type schema and the symbols. + char *schema_blob = header->fmt_fieldtypes(); + char *symbols = header->fmt_fieldlist(); + if (schema_blob == nullptr || symbols == nullptr) { + return -1; + } + // Both must lie within the segment, including the fixed schema prefix. + if (schema_blob < seg_start || schema_blob + sizeof(LogFieldTypeSchema) > seg_end || symbols < seg_start || symbols >= seg_end) { + return -1; + } + + // field_count is a uint16 that may sit at an unaligned offset (the writer + // places the schema right after the NUL-terminated header strings), so read + // it byte-wise rather than through the struct. + uint16_t fc16 = 0; + memcpy(&fc16, schema_blob, sizeof(fc16)); + const unsigned field_count = fc16; + const uint8_t *codes = reinterpret_cast(schema_blob) + sizeof(LogFieldTypeSchema); + if (reinterpret_cast(codes) + field_count > seg_end) { + return -1; + } + + // field_count must match the symbol count; a mismatch is a corrupt segment. + unsigned symbol_count = 0; + bool in_token = false; + for (const char *p = symbols; p < seg_end && *p != '\0'; ++p) { + if (is_field_sep(*p)) { + in_token = false; + } else if (!in_token) { + in_token = true; + ++symbol_count; + } + } + if (symbol_count != field_count) { + return -1; + } + + // Clamp value reads to this entry (or the segment, if entry_len is bogus). + char *entry_start = reinterpret_cast(entry); + if (entry_start < seg_start || entry_start + sizeof(LogEntryHeader) > seg_end) { + return -1; + } + char *read_from = entry_start + sizeof(LogEntryHeader); + char *read_end = entry_start + entry->entry_len; + if (read_end < read_from || read_end > seg_end) { + read_end = seg_end; + } + + int written = 0; + + // Append n bytes, keeping one byte in reserve for the trailing NUL. + auto put = [&](const char *src, int n) -> bool { + if (n < 0 || written + n >= buf_len) { + return false; + } + memcpy(buf + written, src, n); + written += n; + return true; + }; + auto put_ch = [&](char c) -> bool { return put(&c, 1); }; + + if (!put_ch('{')) { + return -1; + } + + const char *sym = symbols; + for (unsigned i = 0; i < field_count; ++i) { + // Next symbol token (comma-separated in fmt_fieldlist), bounded by the segment. + while (sym < seg_end && is_field_sep(*sym)) { + ++sym; + } + const char *sym_start = sym; + while (sym < seg_end && *sym != '\0' && !is_field_sep(*sym)) { + ++sym; + } + int sym_len = static_cast(sym - sym_start); + + if (i > 0 && !put_ch(',')) { + return -1; + } + if (!put_ch('"') || !put(sym_start, sym_len) || !put_ch('"') || !put_ch(':')) { + return -1; + } + + // Dispatch on the framing type only, never the symbol; no field semantics + // (see the contract in LogEntryJson.h). + switch (static_cast(codes[i])) { + case LogField::Type::sINT: { + if (read_from + INK_MIN_ALIGN > read_end) { + return -1; + } + int64_t v = LogAccess::unmarshal_int(&read_from); + char num[24]; // INT64_MIN is 20 digits + sign + NUL + int n = snprintf(num, sizeof(num), "%" PRId64, v); + if (!put(num, n)) { + return -1; + } + break; + } + case LogField::Type::STRING: { + // NUL-terminated, 8-byte padded; require the terminator and padded field + // within the entry. + char *nul = static_cast(memchr(read_from, '\0', static_cast(read_end - read_from))); + if (nul == nullptr) { + return -1; + } + const char *s = read_from; + size_t padded_len = INK_ALIGN_DEFAULT(static_cast(nul - read_from) + 1); + if (read_from + padded_len > read_end) { + return -1; + } + read_from += padded_len; + if (!put_ch('"')) { + return -1; + } + for (const char *p = s; p < nul; ++p) { + // Minimal JSON escaping for structural characters. + if (*p == '"' || *p == '\\') { + if (!put_ch('\\')) { + return -1; + } + } + if (!put_ch(*p)) { + return -1; + } + } + if (!put_ch('"')) { + return -1; + } + break; + } + case LogField::Type::IP: { + // uint16 family + family-sized address, 8-byte padded. + if (read_from + sizeof(LogFieldIp) > read_end) { + return -1; + } + const LogFieldIp *ip = reinterpret_cast(read_from); + size_t len = sizeof(LogFieldIp); + if (ip->_family == AF_INET) { + len = sizeof(LogFieldIp4); + } else if (ip->_family == AF_INET6) { + len = sizeof(LogFieldIp6); + } else if (ip->_family == AF_UNIX) { + len = sizeof(LogFieldUn); + } + if (read_from + INK_ALIGN_DEFAULT(len) > read_end) { + return -1; + } + char ip_str[INET6_ADDRSTRLEN + 1] = {0}; + int n = LogAccess::unmarshal_ip_to_str(&read_from, ip_str, sizeof(ip_str) - 1); + if (n < 0) { + return -1; + } + if (!put_ch('"') || !put(ip_str, n) || !put_ch('"')) { + return -1; + } + break; + } + case LogField::Type::dINT: { + // Two int64s (16 bytes), rendered as a JSON array; meaning is the consumer's. + if (read_from + 2 * INK_MIN_ALIGN > read_end) { + return -1; + } + int64_t first = LogAccess::unmarshal_int(&read_from); + int64_t second = LogAccess::unmarshal_int(&read_from); + char num[48]; // two int64s, brackets, comma, NUL + int n = snprintf(num, sizeof(num), "[%" PRId64 ",%" PRId64 "]", first, second); + if (!put(num, n)) { + return -1; + } + break; + } + default: + // INVALID or an unknown future type: length unknown, so we can't advance + // safely. Bail rather than desync the entry. + return -1; + } + } + + if (!put_ch('}')) { + return -1; + } + buf[written] = '\0'; + return written; +} diff --git a/src/traffic_logcat/LogEntryJson.h b/src/traffic_logcat/LogEntryJson.h new file mode 100644 index 00000000000..7c26c1e46a4 --- /dev/null +++ b/src/traffic_logcat/LogEntryJson.h @@ -0,0 +1,48 @@ +/** @file + + Reference decoder for the self-describing v3 binary log format. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#pragma once + +struct LogEntryHeader; +struct LogBufferHeader; + +/** Self-describing decode of a single v3 binary-log entry to JSON. + + The reference an out-of-tree decoder mirrors against the v3 spec + (doc/developer-guide/logging-architecture/binary-log-v3-format.en.rst): + dispatches purely on the schema's LogField::Type codes, with no global field + table or symbol->type map. + + Framing, not semantics: each value is emitted in its type's generic form + (sINT/dINT as JSON number(s), STRING as text, IP as address string). Do NOT + render field semantics here -- alias maps ("TCP_HIT"), date/hex variants, + slicing, or anything symbol-specific; the symbol is only the JSON key. That + is what keeps this a faithful file-only reference. + + The segment may be untrusted, so every read is bounded; a malformed segment + yields -1 rather than an out-of-bounds read. + + @return bytes written to @a buf (excluding the trailing NUL), or -1 on + error, on a v2 segment lacking the schema, or on insufficient space. +*/ +int log_entry_to_json(LogEntryHeader *entry, LogBufferHeader *header, char *buf, int buf_len); diff --git a/src/traffic_logcat/logcat.cc b/src/traffic_logcat/logcat.cc index 95857696b7c..70d586d5da2 100644 --- a/src/traffic_logcat/logcat.cc +++ b/src/traffic_logcat/logcat.cc @@ -45,6 +45,8 @@ #include "proxy/logging/LogUtils.h" #include "proxy/logging/Log.h" +#include "LogEntryJson.h" + namespace { @@ -54,6 +56,7 @@ int follow_flag = 0; int clf_flag = 0; int elf_flag = 0; int elf2_flag = 0; +int json_flag = 0; int auto_filenames = 0; int overwrite_existing_file = 0; char output_file[1024]; @@ -69,6 +72,7 @@ const ArgumentDescription argument_descriptions[] = { {"debug_tags", 'T', "Colon-Separated Debug Tags", "S1023", error_tags, NULL, NULL}, {"overwrite_output", 'w', "Overwrite existing output file(s)", "T", &overwrite_existing_file, NULL, NULL}, {"elf2", '2', "Convert to Extended2 Logging Format", "T", &elf2_flag, NULL, NULL}, + {"json", 'j', "Convert v3 binary logs to JSON", "T", &json_flag, NULL, NULL}, HELP_ARGUMENT_DESCRIPTION(), VERSION_ARGUMENT_DESCRIPTION(), RUNROOT_ARGUMENT_DESCRIPTION() @@ -120,6 +124,34 @@ follow_rotate(const char *input_file, ino_t old_inode_num) } } +// Emit every entry of a v3 segment as a line of JSON. v2 segments lack the +// field-type schema needed for self-describing decode, so they are skipped +// with a note. Returns the number of entries written. +int +write_json_logbuffer(LogBufferHeader *header, int out_fd) +{ + if (header->fmt_fieldtypes() == nullptr) { + fprintf(stderr, "JSON output requires a v3 binary log with a field-type schema; skipping segment.\n"); + return 0; + } + + LogBufferIterator iter(header); + LogEntryHeader *entry; + char line[LOG_MAX_FORMATTED_LINE]; + int count = 0; + + while ((entry = iter.next()) != nullptr) { + int n = log_entry_to_json(entry, header, line, sizeof(line) - 1); + if (n > 0) { + line[n] = '\n'; + if (write(out_fd, line, n + 1) > 0) { + ++count; + } + } + } + return count; +} + int process_file(int in_fd, int out_fd) { @@ -136,7 +168,6 @@ process_file(int in_fd, int out_fd) // cookie and the version number. // unsigned first_read_size = sizeof(uint32_t) + sizeof(uint32_t); - unsigned header_size = sizeof(LogBufferHeader); LogBufferHeader *header = (LogBufferHeader *)&buffer[0]; nread = read(in_fd, buffer, first_read_size); @@ -150,6 +181,17 @@ process_file(int in_fd, int out_fd) fprintf(stderr, "Bad LogBuffer!\n"); return 1; } + + // The header size on disk depends on the segment version (v3 grew the + // struct), so size the read from the version we just read rather than from + // sizeof(LogBufferHeader). This keeps v2 files readable by a v3 build. + // + unsigned header_size = static_cast(log_buffer_header_size(header->version)); + if (header_size == 0) { + fprintf(stderr, "Unsupported LogBuffer version %u!\n", header->version); + return 1; + } + // read the rest of the header // unsigned second_read_size = header_size - first_read_size; @@ -171,6 +213,13 @@ process_file(int in_fd, int out_fd) fprintf(stderr, "Buffer too large! byte_count=%d\n", byte_count); return 1; } + // Guard the unsigned subtraction below: a corrupt/truncated segment whose + // byte_count is smaller than its own header would otherwise wrap to a huge + // read size. + if (byte_count < header_size) { + fprintf(stderr, "Bad LogBuffer! byte_count=%u < header_size=%u\n", byte_count, header_size); + return 1; + } buffer_bytes = byte_count - header_size; if (buffer_bytes == 0) { return 0; @@ -202,9 +251,11 @@ process_file(int in_fd, int out_fd) // line // const char *alt_format = nullptr; - // convert the buffer to ascii entries and place onto stdout + // convert the buffer to ascii (or JSON) entries and place onto stdout // - if (header->fmt_fieldlist()) { + if (json_flag) { + write_json_logbuffer(header, out_fd); + } else if (header->fmt_fieldlist()) { LogFile::write_ascii_logbuffer(header, out_fd, ".", alt_format); } else { // TODO investigate why this buffer goes wonky diff --git a/src/traffic_logcat/unit-tests/test_LogEntryJson.cc b/src/traffic_logcat/unit-tests/test_LogEntryJson.cc new file mode 100644 index 00000000000..9391c26c5f9 --- /dev/null +++ b/src/traffic_logcat/unit-tests/test_LogEntryJson.cc @@ -0,0 +1,320 @@ +/** @file + + Unit tests for the v3 binary-log JSON reference decoder (log_entry_to_json). + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include + +#include "LogEntryJson.h" + +#include "proxy/logging/LogBuffer.h" +#include "proxy/logging/LogAccess.h" +#include "proxy/logging/LogField.h" + +#include "tscore/ink_inet.h" +#include "tscore/ink_align.h" + +#include +#include +#include +#include + +using namespace std::literals; + +namespace +{ +// Hand-build a minimal in-memory v3 segment containing a single entry, then +// decode it with log_entry_to_json. The decoder is exercised with no global +// field table: it sees only the symbols and the field-type schema, exactly as +// an out-of-tree reader would. +struct V3Segment { + alignas(16) char storage[1024] = {}; + + LogBufferHeader * + header() + { + return reinterpret_cast(storage); + } +}; + +// Offsets chosen to clear sizeof(LogBufferHeader) and stay 8-byte aligned so +// that the int64 marshalling below is well-aligned. +constexpr unsigned SYM_OFF = 256; +constexpr unsigned SCHEMA_OFF = 320; +constexpr unsigned DATA_OFF = 384; + +void +init_segment(V3Segment &seg, const char *symbols, const std::vector &codes) +{ + LogBufferHeader *h = seg.header(); + h->cookie = LOG_SEGMENT_COOKIE; + h->version = LOG_SEGMENT_VERSION; + h->byte_count = sizeof(seg.storage); // the whole hand-built segment is readable + h->fmt_fieldlist_offset = SYM_OFF; + h->fmt_fieldtypes_offset = SCHEMA_OFF; + h->data_offset = DATA_OFF; + + memcpy(seg.storage + SYM_OFF, symbols, strlen(symbols) + 1); + + auto *schema = reinterpret_cast(seg.storage + SCHEMA_OFF); + schema->field_count = static_cast(codes.size()); + + auto *type_codes = const_cast(schema->type_codes()); + for (size_t i = 0; i < codes.size(); ++i) { + type_codes[i] = static_cast(codes[i]); + } +} +} // namespace + +TEST_CASE("v3 generic decode round-trip with IPv4, STRING, INT", "[logcat][v3]") +{ + V3Segment seg; + init_segment(seg, "chi,cqu,pssc", {LogField::Type::IP, LogField::Type::STRING, LogField::Type::sINT}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + entry->timestamp = 1234; + entry->timestamp_usec = 5678; + + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + + IpEndpoint ep; + REQUIRE(0 == ats_ip_pton("192.0.2.10"sv, &ep.sa)); + w += LogAccess::marshal_ip(w, &ep.sa); + + const char *url = "GET /index.html"; + int slen = LogAccess::padded_strlen(url); + LogAccess::marshal_str(w, url, slen); + w += slen; + + LogAccess::marshal_int(w, 200); + w += INK_MIN_ALIGN; + + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[256]; + int n = log_entry_to_json(entry, seg.header(), out, sizeof(out)); + REQUIRE(n > 0); + CHECK(std::string(out, n) == R"({"chi":"192.0.2.10","cqu":"GET /index.html","pssc":200})"); +} + +TEST_CASE("v3 generic decode handles IPv6 and unspecified IP", "[logcat][v3]") +{ + auto decode_single_ip = [](sockaddr const *ip) -> std::string { + V3Segment seg; + init_segment(seg, "chi", {LogField::Type::IP}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + w += LogAccess::marshal_ip(w, ip); + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[256]; + int n = log_entry_to_json(entry, seg.header(), out, sizeof(out)); + REQUIRE(n > 0); + return std::string(out, n); + }; + + SECTION("IPv6") + { + IpEndpoint ep; + REQUIRE(0 == ats_ip_pton("2001:db8::1"sv, &ep.sa)); + CHECK(decode_single_ip(&ep.sa) == R"({"chi":"2001:db8::1"})"); + } + + SECTION("unspecified / null IP") + { + // marshal_ip(nullptr) records an AF_UNSPEC address; the decoder renders the + // existing "invalid address" sentinel. + CHECK(decode_single_ip(nullptr) == R"({"chi":"0"})"); + } +} + +TEST_CASE("v3 generic decode requires the field-type schema", "[logcat][v3]") +{ + V3Segment seg; + LogBufferHeader *h = seg.header(); + h->cookie = LOG_SEGMENT_COOKIE; + h->version = LOG_SEGMENT_VERSION; + h->byte_count = sizeof(seg.storage); + h->fmt_fieldlist_offset = SYM_OFF; + h->fmt_fieldtypes_offset = 0; // emulate a v2 segment: no schema + h->data_offset = DATA_OFF; + memcpy(seg.storage + SYM_OFF, "chi", 4); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char out[64]; + CHECK(log_entry_to_json(entry, h, out, sizeof(out)) == -1); +} + +TEST_CASE("v3 generic decode escapes JSON structural characters in strings", "[logcat][v3]") +{ + V3Segment seg; + init_segment(seg, "msg", {LogField::Type::STRING}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + + const char *msg = "he\"llo\\x"; // contains a quote and a backslash + int slen = LogAccess::padded_strlen(msg); + LogAccess::marshal_str(w, msg, slen); + w += slen; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[256]; + int n = log_entry_to_json(entry, seg.header(), out, sizeof(out)); + REQUIRE(n > 0); + CHECK(std::string(out, n) == R"({"msg":"he\"llo\\x"})"); +} + +TEST_CASE("v3 generic decode rejects an unknown type code", "[logcat][v3]") +{ + V3Segment seg; + init_segment(seg, "x", {static_cast(99)}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + LogAccess::marshal_int(w, 1); + w += INK_MIN_ALIGN; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[64]; + CHECK(log_entry_to_json(entry, seg.header(), out, sizeof(out)) == -1); +} + +TEST_CASE("v3 generic decode returns -1 when the output buffer is too small", "[logcat][v3]") +{ + V3Segment seg; + init_segment(seg, "pssc", {LogField::Type::sINT}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + LogAccess::marshal_int(w, 200); + w += INK_MIN_ALIGN; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[4]; // too small for {"pssc":200} + CHECK(log_entry_to_json(entry, seg.header(), out, sizeof(out)) == -1); +} + +TEST_CASE("v3 generic decode rejects a field_count larger than the entry data", "[logcat][v3]") +{ + V3Segment seg; + // Schema claims three INT fields, but the entry only contains one. + init_segment(seg, "a,b,c", {LogField::Type::sINT, LogField::Type::sINT, LogField::Type::sINT}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + LogAccess::marshal_int(w, 1); + w += INK_MIN_ALIGN; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); // room for one INT only + + char out[256]; + CHECK(log_entry_to_json(entry, seg.header(), out, sizeof(out)) == -1); +} + +TEST_CASE("v3 generic decode rejects field_count not matching the symbol list", "[logcat][v3]") +{ + V3Segment seg; + // Two symbols, but the schema claims a single field: the counts disagree, so + // the segment is malformed and must be refused (not decoded with garbage keys). + init_segment(seg, "a,b", {LogField::Type::sINT}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + LogAccess::marshal_int(w, 1); + w += INK_MIN_ALIGN; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[64]; + CHECK(log_entry_to_json(entry, seg.header(), out, sizeof(out)) == -1); +} + +TEST_CASE("v3 generic decode reads a dINT field (16 bytes)", "[logcat][v3]") +{ + V3Segment seg; + init_segment(seg, "sshv", {LogField::Type::dINT}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + + LogAccess::marshal_int(w, 1); // major + w += INK_MIN_ALIGN; + LogAccess::marshal_int(w, 1); // minor + w += INK_MIN_ALIGN; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[256]; + int n = log_entry_to_json(entry, seg.header(), out, sizeof(out)); + REQUIRE(n > 0); + CHECK(std::string(out, n) == R"({"sshv":[1,1]})"); +} + +TEST_CASE("v3 generic decode rejects a truncated dINT field", "[logcat][v3]") +{ + V3Segment seg; + init_segment(seg, "sshv", {LogField::Type::dINT}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + + LogAccess::marshal_int(w, 1); + w += INK_MIN_ALIGN; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); // only one int, not two + + char out[64]; + CHECK(log_entry_to_json(entry, seg.header(), out, sizeof(out)) == -1); +} + +TEST_CASE("v3 generic decode rejects an unterminated string field", "[logcat][v3]") +{ + V3Segment seg; + init_segment(seg, "s", {LogField::Type::STRING}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + memset(w, 'A', 8); // no NUL within the entry window + entry->entry_len = static_cast((w - reinterpret_cast(entry)) + 8); + + char out[64]; + CHECK(log_entry_to_json(entry, seg.header(), out, sizeof(out)) == -1); +} + +TEST_CASE("v3 generic decode emits raw values, never field semantics", "[logcat][v3]") +{ + // Pins "framing, not semantics" (see LogEntryJson.h). crc is alias-mapped to + // "TCP_HIT" and cqts to a date by the ASCII path; the schema-driven decoder + // dispatches on the framing type (sINT) and must emit the raw integers. + V3Segment seg; + init_segment(seg, "crc,cqts", {LogField::Type::sINT, LogField::Type::sINT}); + + auto *entry = reinterpret_cast(seg.storage + DATA_OFF); + char *w = reinterpret_cast(entry) + sizeof(LogEntryHeader); + LogAccess::marshal_int(w, 2); // a cache-result code; "TCP_HIT"-like in ASCII + w += INK_MIN_ALIGN; + LogAccess::marshal_int(w, 1700000000); // a timestamp; a date string in ASCII + w += INK_MIN_ALIGN; + entry->entry_len = static_cast(w - reinterpret_cast(entry)); + + char out[256]; + int n = log_entry_to_json(entry, seg.header(), out, sizeof(out)); + REQUIRE(n > 0); + CHECK(std::string(out, n) == R"({"crc":2,"cqts":1700000000})"); +} diff --git a/src/traffic_logstats/logstats.cc b/src/traffic_logstats/logstats.cc index 8904ded0a8f..783eaa81943 100644 --- a/src/traffic_logstats/logstats.cc +++ b/src/traffic_logstats/logstats.cc @@ -1831,12 +1831,15 @@ process_file(int in_fd, off_t offset, unsigned max_age) } Dbg(dbg_ctl_logstats, "LogBuffer version %d, current = %d", header->version, LOG_SEGMENT_VERSION); - if (header->version != LOG_SEGMENT_VERSION) { + size_t header_size = log_buffer_header_size(header->version); + if (header_size == 0) { + Dbg(dbg_ctl_logstats, "Unsupported LogBuffer version %d (supported %d-%d)", header->version, + LOG_SEGMENT_VERSION_MIN_SUPPORTED, LOG_SEGMENT_VERSION); return 1; } - // read the rest of the header - unsigned second_read_size = sizeof(LogBufferHeader) - first_read_size; + // read the rest of the header (sized to the on-disk version, not sizeof) + unsigned second_read_size = header_size - first_read_size; nread = read(in_fd, &buffer[first_read_size], second_read_size); if (!nread || EOF == nread) { Dbg(dbg_ctl_logstats, "Second read of header failed (attempted %d bytes at offset %d, got nothing), errno=%d.", @@ -1850,8 +1853,15 @@ process_file(int in_fd, off_t offset, unsigned max_age) return 1; } - buffer_bytes = header->byte_count - sizeof(LogBufferHeader); - if (buffer_bytes <= 0 || (unsigned int)buffer_bytes > (sizeof(buffer) - sizeof(LogBufferHeader))) { + // Guard the unsigned subtraction: a corrupt segment whose byte_count is + // smaller than its own header would otherwise wrap to a huge payload size. + if (header->byte_count < header_size) { + Dbg(dbg_ctl_logstats, "Header byte count [%d] < header size [%zu]", header->byte_count, header_size); + return 1; + } + + buffer_bytes = header->byte_count - header_size; + if (buffer_bytes <= 0 || (unsigned int)buffer_bytes > (sizeof(buffer) - header_size)) { Dbg(dbg_ctl_logstats, "Buffer payload [%d] is wrong.", buffer_bytes); return 1; } @@ -1860,7 +1870,7 @@ process_file(int in_fd, off_t offset, unsigned max_age) int total_read = 0; int read_tries_remaining = MAX_READ_TRIES; // since the data will be old anyway, let's only try a few times. do { - nread = read(in_fd, &buffer[sizeof(LogBufferHeader) + total_read], buffer_bytes - total_read); + nread = read(in_fd, &buffer[header_size + total_read], buffer_bytes - total_read); if (EOF == nread || !nread) { // just bail on error Dbg(dbg_ctl_logstats, "Read failed while reading log buffer, wanted %d bytes, nread=%d, errno=%d", buffer_bytes - total_read, nread, errno); diff --git a/tests/gold_tests/logging/binary_log_v3.test.py b/tests/gold_tests/logging/binary_log_v3.test.py new file mode 100644 index 00000000000..f5ab22c5d57 --- /dev/null +++ b/tests/gold_tests/logging/binary_log_v3.test.py @@ -0,0 +1,145 @@ +''' +End-to-end test of the self-describing v3 binary log format. +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +Test.Summary = ''' +Write a v3 binary log and read it back with traffic_logcat (ASCII + JSON), +and prove the per-LogObject binary_log_version:2 writer is still readable. +''' + +# chi resolves to the client address; a Unix-domain-socket curl changes it. +Test.SkipIf(Condition.CurlUsingUnixDomainSocket()) + + +class BinaryLogV3Test: + ''' Drive traffic into a v3 binary log, then decode it with traffic_logcat + and compare the ASCII and JSON output against gold files. + ''' + + # A counter to give each ATS process a unique name. + __ts_counter = 1 + + # Number of transactions (and therefore log entries) to generate. + num_requests = 3 + + # One format exercising all four wire types: + # chi = client IP -> IP -> "127.0.0.1" + # cqu = request URL -> STRING -> "http://127.0.0.1:/get" + # pssc = response status -> sINT -> "200" + # sshv = resp HTTP version -> dINT -> "HTTP/1.1" (ASCII) / [1,1] (JSON) + # The dynamic listen port in cqu is masked with `` in the gold files. + log_format = '% % % %' + + def __init__(self): + # Origin returning a plain 200 over HTTP/1.1 (so sshv == HTTP/1.1). + self.httpbin = Test.MakeHttpBinServer("httpbin") + self.ts = self.__configure_traffic_server() + self.__configure_traffic_test_run() + self.__configure_decode_test_runs() + + def __configure_traffic_server(self): + ''' Configure ATS with the ASCII, v2, and v3 binary log objects. + + Return: + The traffic_server process. + ''' + name = f"ts{BinaryLogV3Test.__ts_counter}" + BinaryLogV3Test.__ts_counter += 1 + ts = Test.MakeATSProcess(name) + + ts.Disk.records_config.update( + { + # No caching: every request reaches the origin, so sshv is populated. + 'proxy.config.http.cache.http': 0, + # Flush quickly so the .blog is on disk shortly after the requests. + 'proxy.config.log.max_secs_per_buffer': 1, + 'proxy.config.log.periodic_tasks_interval': 1, + }) + + ts.Disk.remap_config.AddLine(f'map http://127.0.0.1:{ts.Variables.port}/ http://127.0.0.1:{self.httpbin.Variables.Port}/') + + ts.Disk.logging_yaml.AddLines( + f''' + logging: + formats: + - name: custom_fmt + format: "{self.log_format}" + logs: + - filename: v2 + format: custom_fmt + mode: binary + binary_log_version: 2 + - filename: v3 + format: custom_fmt + mode: binary + binary_log_version: 3 + - filename: ascii + format: custom_fmt + mode: ascii + '''.split("\n")) + + logdir = ts.Variables.LOGDIR + self.v2_blog = os.path.join(logdir, 'v2.blog') + self.v3_blog = os.path.join(logdir, 'v3.blog') + self.ascii_log = os.path.join(logdir, 'ascii.log') + return ts + + def __configure_traffic_test_run(self): + ''' Generate num_requests identical origin-backed transactions. ''' + url = f'http://127.0.0.1:{self.ts.Variables.port}/get' + for i in range(BinaryLogV3Test.num_requests): + tr = Test.AddTestRun(f'Generate binary log traffic #{i + 1}') + if i == 0: + tr.Processes.Default.StartBefore(self.httpbin) + tr.Processes.Default.StartBefore(self.ts) + tr.MakeCurlCommand(f'--http1.1 "{url}"', ts=self.ts) + tr.Processes.Default.ReturnCode = 0 + + def __configure_decode_test_runs(self): + ''' Decode the binary logs with traffic_logcat and compare to gold. ''' + logcat = os.path.join(self.ts.Variables.BINDIR, 'traffic_logcat') + + # Wait until the ASCII companion has every entry; the binary logs are + # written in the same periodic flush, so they are complete by then. + Test.AddAwaitFileContainsTestRun('Await the logs to flush.', self.ascii_log, r'/get', BinaryLogV3Test.num_requests) + + # The binary_log_version:2 log decodes to the same ASCII as v3. + tr = Test.AddTestRun('Decode the v2 binary log to ASCII') + tr.Processes.Default.Command = f'{logcat} {self.v2_blog}' + tr.Processes.Default.ReturnCode = 0 + tr.Processes.Default.Streams.stdout = 'gold/binary_log_v3_ascii.gold' + + # traffic_logcat decodes the v3 .blog to the expected ASCII. + tr = Test.AddTestRun('Decode the v3 binary log to ASCII') + tr.Processes.Default.Command = f'{logcat} {self.v3_blog}' + tr.Processes.Default.ReturnCode = 0 + tr.Processes.Default.Streams.stdout = 'gold/binary_log_v3_ascii.gold' + + # traffic_logcat -j decodes the v3 .blog to the expected JSON. + tr = Test.AddTestRun('Decode the v3 binary log to JSON') + tr.Processes.Default.Command = f'{logcat} -j {self.v3_blog}' + tr.Processes.Default.ReturnCode = 0 + tr.Processes.Default.Streams.stdout = 'gold/binary_log_v3_json.gold' + + +# +# Run the test. +# +BinaryLogV3Test() diff --git a/tests/gold_tests/logging/gold/binary_log_v3_ascii.gold b/tests/gold_tests/logging/gold/binary_log_v3_ascii.gold new file mode 100644 index 00000000000..98126a2c981 --- /dev/null +++ b/tests/gold_tests/logging/gold/binary_log_v3_ascii.gold @@ -0,0 +1,3 @@ +127.0.0.1 http://127.0.0.1:``/get 200 HTTP/1.1 +127.0.0.1 http://127.0.0.1:``/get 200 HTTP/1.1 +127.0.0.1 http://127.0.0.1:``/get 200 HTTP/1.1 diff --git a/tests/gold_tests/logging/gold/binary_log_v3_json.gold b/tests/gold_tests/logging/gold/binary_log_v3_json.gold new file mode 100644 index 00000000000..3889f468174 --- /dev/null +++ b/tests/gold_tests/logging/gold/binary_log_v3_json.gold @@ -0,0 +1,3 @@ +{"chi":"127.0.0.1","cqu":"http://127.0.0.1:``/get","pssc":200,"sshv":[1,1]} +{"chi":"127.0.0.1","cqu":"http://127.0.0.1:``/get","pssc":200,"sshv":[1,1]} +{"chi":"127.0.0.1","cqu":"http://127.0.0.1:``/get","pssc":200,"sshv":[1,1]}