Skip to content

Commit cd91d5d

Browse files
committed
Add new JSON log formatter QW_LOG_FORMAT=DDG
The existing JSON formatter (`QW_LOG_FORMAT=json`) produces deeply nested JSON with separate span objects. This makes logs hard to search and filter. The new DDG formatter outputs a flat JSON object with four fields: - `timestamp`: RFC 3339 - `level`: log level (INFO, WARN, ERROR, ...) - `service`: crate name extracted from the tracing target - `message`: the full text-formatted log line (level, target, spans, fields) The message reuses tracing's standard Full formatter, so it matches the regular text output minus the timestamp.
1 parent 845d9a7 commit cd91d5d

1 file changed

Lines changed: 298 additions & 14 deletions

File tree

quickwit/quickwit-cli/src/logger.rs

Lines changed: 298 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -174,30 +174,33 @@ fn time_formatter() -> UtcTime<Vec<BorrowedFormatItem<'static>>> {
174174
enum EventFormat<'a> {
175175
Full(Format<Full, UtcTime<Vec<BorrowedFormatItem<'a>>>>),
176176
Json(Format<Json>),
177+
Ddg(DdgFormat),
177178
}
178179

179180
impl EventFormat<'_> {
180-
/// Gets the log format from the environment variable `QW_LOG_FORMAT`. Returns a JSON
181-
/// formatter if the variable is set to `json`, otherwise returns a full formatter.
181+
/// Gets the log format from the environment variable `QW_LOG_FORMAT`.
182182
fn get_from_env() -> Self {
183-
if get_from_env_opt::<String>("QW_LOG_FORMAT", false)
184-
.map(|log_format| log_format.eq_ignore_ascii_case("json"))
185-
.unwrap_or(false)
183+
match get_from_env_opt::<String>("QW_LOG_FORMAT", false)
184+
.as_deref()
185+
.map(str::to_ascii_lowercase)
186+
.as_deref()
186187
{
187-
let json_format = tracing_subscriber::fmt::format().json();
188-
EventFormat::Json(json_format)
189-
} else {
190-
let full_format = tracing_subscriber::fmt::format()
191-
.with_target(true)
192-
.with_timer(time_formatter());
193-
194-
EventFormat::Full(full_format)
188+
Some("json") => EventFormat::Json(tracing_subscriber::fmt::format().json()),
189+
Some("ddg") => EventFormat::Ddg(DdgFormat::new()),
190+
_ => {
191+
let full_format = tracing_subscriber::fmt::format()
192+
.with_target(true)
193+
.with_timer(time_formatter());
194+
EventFormat::Full(full_format)
195+
}
195196
}
196197
}
197198

198199
fn format_fields(&self) -> FieldFormat {
199200
match self {
200-
EventFormat::Full(_) => FieldFormat::Default(DefaultFields::new()),
201+
EventFormat::Full(_) | EventFormat::Ddg(_) => {
202+
FieldFormat::Default(DefaultFields::new())
203+
}
201204
EventFormat::Json(_) => FieldFormat::Json(JsonFields::new()),
202205
}
203206
}
@@ -217,10 +220,70 @@ where
217220
match self {
218221
EventFormat::Full(format) => format.format_event(ctx, writer, event),
219222
EventFormat::Json(format) => format.format_event(ctx, writer, event),
223+
EventFormat::Ddg(format) => format.format_event(ctx, writer, event),
224+
}
225+
}
226+
}
227+
228+
/// Outputs JSON with `timestamp`, `level`, `service`, and `message` fields.
229+
/// The `service` is extracted from the tracing target (crate name).
230+
/// The `message` is formatted using the regular text formatter (level, target, spans, fields).
231+
///
232+
/// Example output:
233+
/// ```json
234+
/// {"timestamp":"2025-03-23T14:30:45Z","level":"INFO","service":"quickwit_search","message":"INFO quickwit_search: hello"}
235+
/// ```
236+
struct DdgFormat {
237+
text_format: Format<Full, ()>,
238+
}
239+
240+
impl DdgFormat {
241+
fn new() -> Self {
242+
Self {
243+
text_format: tracing_subscriber::fmt::format()
244+
.with_target(true)
245+
.without_time(),
220246
}
221247
}
222248
}
223249

250+
impl<S, N> FormatEvent<S, N> for DdgFormat
251+
where
252+
S: Subscriber + for<'a> LookupSpan<'a>,
253+
N: for<'a> FormatFields<'a> + 'static,
254+
{
255+
fn format_event(
256+
&self,
257+
ctx: &FmtContext<'_, S, N>,
258+
mut writer: Writer<'_>,
259+
event: &Event<'_>,
260+
) -> fmt::Result {
261+
// Render the event as text using the Full formatter (without timestamp)
262+
let mut message = String::with_capacity(256);
263+
self.text_format
264+
.format_event(ctx, Writer::new(&mut message), event)?;
265+
let message = message.trim();
266+
267+
// Timestamp (RFC 3339)
268+
let timestamp = time::OffsetDateTime::now_utc()
269+
.format(&time::format_description::well_known::Rfc3339)
270+
.map_err(|_| fmt::Error)?;
271+
272+
let level = event.metadata().level().as_str();
273+
274+
// Extract crate name from the target (e.g. "quickwit_cli::logger" -> "quickwit_cli")
275+
let target = event.metadata().target();
276+
let service = target.split("::").next().unwrap_or(target);
277+
278+
// Write JSON with properly escaped message
279+
let escaped_message = serde_json::to_string(message).map_err(|_| fmt::Error)?;
280+
writeln!(
281+
writer,
282+
r#"{{"timestamp":"{timestamp}","level":"{level}","service":"{service}","message":{escaped_message}}}"#
283+
)
284+
}
285+
}
286+
224287
enum FieldFormat {
225288
Default(DefaultFields),
226289
Json(JsonFields),
@@ -363,3 +426,224 @@ pub(super) mod jemalloc_profiled {
363426
))
364427
}
365428
}
429+
430+
#[cfg(test)]
431+
mod tests {
432+
use std::sync::{Arc, Mutex};
433+
434+
use tracing_subscriber::layer::SubscriberExt;
435+
436+
use super::*;
437+
438+
/// A shared buffer writer for capturing log output in tests.
439+
#[derive(Clone, Default)]
440+
struct TestMakeWriter(Arc<Mutex<Vec<u8>>>);
441+
442+
impl TestMakeWriter {
443+
fn get_string(&self) -> String {
444+
String::from_utf8(self.0.lock().unwrap().clone()).unwrap()
445+
}
446+
}
447+
448+
impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for TestMakeWriter {
449+
type Writer = TestWriter;
450+
451+
fn make_writer(&'a self) -> Self::Writer {
452+
TestWriter(self.0.clone())
453+
}
454+
}
455+
456+
struct TestWriter(Arc<Mutex<Vec<u8>>>);
457+
458+
impl std::io::Write for TestWriter {
459+
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
460+
self.0.lock().unwrap().write_all(buf)?;
461+
Ok(buf.len())
462+
}
463+
464+
fn flush(&mut self) -> std::io::Result<()> {
465+
Ok(())
466+
}
467+
}
468+
469+
/// Sets up a subscriber with `DdgFormat` and captures the output.
470+
fn capture_ddg_log<F: FnOnce()>(f: F) -> serde_json::Value {
471+
let writer = TestMakeWriter::default();
472+
let subscriber = tracing_subscriber::registry().with(
473+
tracing_subscriber::fmt::layer()
474+
.event_format(DdgFormat::new())
475+
.fmt_fields(FieldFormat::Default(DefaultFields::new()))
476+
.with_ansi(false)
477+
.with_writer(writer.clone()),
478+
);
479+
tracing::subscriber::with_default(subscriber, f);
480+
let output = writer.get_string();
481+
serde_json::from_str(&output).expect("output should be valid JSON")
482+
}
483+
484+
const TARGET: &str = "quickwit_cli::logger::tests";
485+
486+
#[test]
487+
fn test_ddg_format_has_expected_fields() {
488+
let json = capture_ddg_log(|| tracing::info!("hello"));
489+
let obj = json.as_object().unwrap();
490+
assert_eq!(obj.len(), 4, "{obj:?}");
491+
assert!(obj.contains_key("timestamp"));
492+
assert!(obj.contains_key("level"));
493+
assert!(obj.contains_key("service"));
494+
assert!(obj.contains_key("message"));
495+
}
496+
497+
#[test]
498+
fn test_ddg_format_basic_message() {
499+
let json = capture_ddg_log(|| tracing::info!("hello world"));
500+
assert_eq!(json["level"], "INFO");
501+
assert_eq!(json["service"], "quickwit_cli");
502+
assert_eq!(
503+
json["message"].as_str().unwrap(),
504+
format!("INFO {TARGET}: hello world")
505+
);
506+
}
507+
508+
#[test]
509+
fn test_ddg_format_with_fields() {
510+
let json = capture_ddg_log(|| {
511+
tracing::info!(key = "value", count = 42, "processing request");
512+
});
513+
assert_eq!(
514+
json["message"].as_str().unwrap(),
515+
format!("INFO {TARGET}: processing request key=\"value\" count=42")
516+
);
517+
}
518+
519+
#[test]
520+
fn test_ddg_format_with_span() {
521+
let json = capture_ddg_log(|| {
522+
let span = tracing::info_span!("my_span", id = 123);
523+
let _guard = span.enter();
524+
tracing::info!("inside span");
525+
});
526+
assert_eq!(
527+
json["message"].as_str().unwrap(),
528+
format!("INFO my_span{{id=123}}: {TARGET}: inside span")
529+
);
530+
}
531+
532+
/// Captures raw text output using the production Full formatter (with timestamp, no ANSI).
533+
fn capture_full_log<F: FnOnce()>(f: F) -> String {
534+
let writer = TestMakeWriter::default();
535+
let full_format = tracing_subscriber::fmt::format()
536+
.with_target(true)
537+
.with_timer(time_formatter());
538+
let subscriber = tracing_subscriber::registry().with(
539+
tracing_subscriber::fmt::layer()
540+
.event_format(full_format)
541+
.fmt_fields(DefaultFields::new())
542+
.with_ansi(false)
543+
.with_writer(writer.clone()),
544+
);
545+
tracing::subscriber::with_default(subscriber, f);
546+
writer.get_string().trim_end().to_string()
547+
}
548+
549+
#[test]
550+
fn test_ddg_format_with_nested_spans() {
551+
let make_event = || {
552+
let outer = tracing::info_span!("outer", req_id = 42);
553+
let _outer_guard = outer.enter();
554+
let inner = tracing::info_span!("inner", step = "parse");
555+
let _inner_guard = inner.enter();
556+
tracing::info!("deep inside");
557+
};
558+
559+
// Compare DDG message against production Full formatter output.
560+
// The only difference is the leading timestamp.
561+
let full_output = capture_full_log(make_event);
562+
let json = capture_ddg_log(make_event);
563+
let ddg_message = json["message"].as_str().unwrap();
564+
565+
// Full output: "2025-03-23T14:30:45.123Z INFO outer{...}: target: deep inside"
566+
// DDG message: "INFO outer{...}: target: deep inside"
567+
// The timestamp adds one extra space of padding, so we trim both and compare.
568+
let full_without_timestamp = full_output
569+
.find(" ")
570+
.map(|pos| &full_output[pos..])
571+
.unwrap_or(&full_output);
572+
assert_eq!(
573+
ddg_message.trim_start(),
574+
full_without_timestamp.trim_start(),
575+
);
576+
577+
assert_eq!(
578+
ddg_message,
579+
format!("INFO outer{{req_id=42}}:inner{{step=\"parse\"}}: {TARGET}: deep inside")
580+
);
581+
}
582+
583+
#[test]
584+
fn test_ddg_format_escapes_special_chars() {
585+
let json = capture_ddg_log(|| {
586+
tracing::info!(r#"hello "world" with\backslash"#);
587+
});
588+
assert_eq!(
589+
json["message"].as_str().unwrap(),
590+
format!(r#"INFO {TARGET}: hello "world" with\backslash"#)
591+
);
592+
}
593+
594+
#[test]
595+
fn test_ddg_format_escapes_newlines() {
596+
let json = capture_ddg_log(|| {
597+
tracing::info!("line1\nline2\ttab");
598+
});
599+
assert_eq!(
600+
json["message"].as_str().unwrap(),
601+
format!("INFO {TARGET}: line1\nline2\ttab")
602+
);
603+
}
604+
605+
#[test]
606+
fn test_ddg_format_levels() {
607+
for (expected_level, log_fn) in [
608+
(
609+
"WARN",
610+
Box::new(|| tracing::warn!("w")) as Box<dyn FnOnce()>,
611+
),
612+
("ERROR", Box::new(|| tracing::error!("e"))),
613+
("INFO", Box::new(|| tracing::info!("i"))),
614+
] {
615+
let json = capture_ddg_log(log_fn);
616+
assert_eq!(json["level"], expected_level);
617+
}
618+
}
619+
620+
#[test]
621+
fn test_ddg_format_timestamp_is_rfc3339() {
622+
let json = capture_ddg_log(|| tracing::info!("hello"));
623+
let ts = json["timestamp"].as_str().unwrap();
624+
time::OffsetDateTime::parse(ts, &time::format_description::well_known::Rfc3339)
625+
.unwrap_or_else(|err| panic!("timestamp is not valid RFC 3339: {ts}: {err}"));
626+
}
627+
628+
#[test]
629+
fn test_ddg_format_with_bool_and_float_fields() {
630+
let json = capture_ddg_log(|| {
631+
tracing::info!(enabled = true, ratio = 0.75, "status check");
632+
});
633+
assert_eq!(
634+
json["message"].as_str().unwrap(),
635+
format!("INFO {TARGET}: status check enabled=true ratio=0.75")
636+
);
637+
}
638+
639+
#[test]
640+
fn test_ddg_format_fields_only() {
641+
let json = capture_ddg_log(|| {
642+
tracing::info!(action = "ping");
643+
});
644+
assert_eq!(
645+
json["message"].as_str().unwrap(),
646+
format!("INFO {TARGET}: action=\"ping\"")
647+
);
648+
}
649+
}

0 commit comments

Comments
 (0)