From 77f60a77e7e3423df712208eb4907a1356bd2c8f Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Mon, 22 Jun 2026 17:06:38 +0200 Subject: [PATCH] fix(tools): collapse intra-file duplicate typedefs in base header dedup_headers only removed definitions from child headers that were byte-identical to ones already present in the base header. It never deduplicated definitions within the base header itself, so cbindgen output that emits the same profiling type from two crate boundaries left duplicate typedefs in the merged common.h. Two cases survived and broke consumers compiling with -Werror -Wtypedef-redefinition (C11): 1. A bare forward declaration "typedef struct X X;" coexisting with the full-body definition "typedef struct X { ... } X;" (e.g. ddog_prof_EncodedProfile, ddog_prof_StringId, OpaqueStringId). 2. An identical pointer typedef emitted twice whose doc comments differ, so the existing exact-string dedup kept both (ddog_prof_StringId2, ddog_prof_MappingId2, ddog_prof_FunctionId2). Add a final pass over the assembled base header that drops a forward struct/union/enum declaration when a full-body definition of the same name exists, and removes later duplicates of an identical typedef statement regardless of differing comments. This makes the generated common.h clean by construction and removes the need for downstream post-processing workarounds. --- tools/src/lib.rs | 203 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 202 insertions(+), 1 deletion(-) diff --git a/tools/src/lib.rs b/tools/src/lib.rs index e07b143cf4..43106b45bf 100644 --- a/tools/src/lib.rs +++ b/tools/src/lib.rs @@ -111,6 +111,117 @@ pub mod headers { new_content_parts } + /// Strip an optional leading block comment (`/** ... */`) and surrounding + /// whitespace from a definition span, returning just the statement text + /// with trailing whitespace removed. + /// + /// This lets two definitions that only differ by their doc comment be + /// compared by their statement alone. + fn statement_text(def: &str) -> &str { + let s = def.trim_start(); + let s = if s.starts_with("/*") { + match s.find("*/") { + Some(end) => s[end + 2..].trim_start(), + None => s, + } + } else { + s + }; + s.trim_end() + } + + fn is_ident(s: &str) -> bool { + !s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_') + } + + /// The identifier a typedef introduces, i.e. the last identifier before the + /// terminating semicolon. Works for `typedef ... NAME;`, pointer typedefs + /// (`typedef struct X *NAME;`) and full-body typedefs (`... } NAME;`). + fn typedef_name(stmt: &str) -> Option<&str> { + let s = stmt.trim_end().strip_suffix(';')?.trim_end(); + let start = s + .rfind(|c: char| !(c.is_ascii_alphanumeric() || c == '_')) + .map(|i| i + 1) + .unwrap_or(0); + let name = &s[start..]; + is_ident(name).then_some(name) + } + + /// If the statement is a full-body struct/union/enum typedef + /// (`typedef struct X { ... } X;`), return the name it defines. + fn bodied_typedef_name(stmt: &str) -> Option<&str> { + if !stmt.contains('{') { + return None; + } + if !(stmt.starts_with("typedef struct") + || stmt.starts_with("typedef union") + || stmt.starts_with("typedef enum")) + { + return None; + } + typedef_name(stmt) + } + + /// If the statement is a bare forward declaration of the form + /// `typedef struct X X;` (the two identifiers being equal, no body), + /// return the name `X`. + fn forward_decl_name(stmt: &str) -> Option<&str> { + if stmt.contains('{') { + return None; + } + let rest = stmt.strip_prefix("typedef")?.trim_start(); + let rest = ["struct ", "union ", "enum "] + .iter() + .find_map(|kw| rest.strip_prefix(kw))? + .trim_start(); + let body = rest.strip_suffix(';')?; + let mut tokens = body.split_whitespace(); + let first = tokens.next()?; + let second = tokens.next()?; + if tokens.next().is_some() { + return None; + } + (first == second && is_ident(first)).then_some(first) + } + + /// Remove intra-file duplicate typedefs left behind in the base header. + /// + /// cbindgen can emit the same type from multiple crate boundaries, and the + /// child-vs-base deduplication above only collapses byte-identical + /// definitions. Two cases slip through and break consumers compiling with + /// `-Werror -Wtypedef-redefinition`: + /// + /// 1. A bare forward declaration `typedef struct X X;` that coexists with the full-body + /// definition `typedef struct X { ... } X;`. The forward declaration is redundant and is + /// dropped. + /// 2. Two identical typedef statements whose doc comments differ (so they are not + /// byte-identical). The later occurrence is dropped. + fn dedup_base_typedefs(content: &str) -> String { + let defs = collect_definitions(content); + + let bodied_names: HashSet<&str> = defs + .iter() + .filter_map(|d| bodied_typedef_name(statement_text(d.str))) + .collect(); + + let mut seen: HashSet<&str> = HashSet::new(); + let mut result = String::with_capacity(content.len()); + let mut pos = 0; + for d in &defs { + let stmt = statement_text(d.str); + let is_typedef = stmt.starts_with("typedef"); + let drop = is_typedef + && (forward_decl_name(stmt).is_some_and(|name| bodied_names.contains(name)) + || !seen.insert(stmt)); + if drop { + result.push_str(&content[pos..d.start]); + pos = d.end; + } + } + result.push_str(&content[pos..]); + result + } + pub fn dedup_headers(base: &str, headers: &[&str]) { let mut unique_child_defs: Vec = Vec::new(); let mut present = HashSet::new(); @@ -154,7 +265,14 @@ pub mod headers { base_new_parts.push(child_def); } base_new_parts.push(&base_header_content[base_defs.last().unwrap().end..]); - write_parts(&mut BufWriter::new(&base_header), &base_new_parts).unwrap(); + + // Definitions moved in from child headers (and any already present in + // the base) can introduce intra-file duplicate typedefs that the + // child-vs-base pass above does not catch. Collapse them so the base + // header compiles under `-Werror -Wtypedef-redefinition`. + let merged_base: String = base_new_parts.concat(); + let deduped_base = dedup_base_typedefs(&merged_base); + write_parts(&mut BufWriter::new(&base_header), &[&deduped_base]).unwrap(); } #[cfg(test)] @@ -335,5 +453,88 @@ typedef struct ddog_Vec_U8 { " ); } + + #[test] + fn forward_decl_is_dropped_when_full_body_exists() { + // The forward declaration may appear either before or after the + // full-body definition; both must be collapsed to the body. + let before = r"typedef struct ddog_prof_EncodedProfile ddog_prof_EncodedProfile; + +typedef struct ddog_prof_EncodedProfile { + struct ddog_prof_EncodedProfile *inner; +} ddog_prof_EncodedProfile; +"; + assert_eq!( + dedup_base_typedefs(before), + r"typedef struct ddog_prof_EncodedProfile { + struct ddog_prof_EncodedProfile *inner; +} ddog_prof_EncodedProfile; +" + ); + + let after = r"typedef struct OpaqueStringId { + uint32_t offset; +} OpaqueStringId; + +typedef struct OpaqueStringId OpaqueStringId; +"; + // The blank line that separated the two definitions is retained. + assert_eq!( + dedup_base_typedefs(after), + "typedef struct OpaqueStringId {\n uint32_t offset;\n} OpaqueStringId;\n\n" + ); + } + + #[test] + fn opaque_forward_decl_is_kept_without_body() { + // An opaque type only has a forward declaration; it must be kept. + let input = + "typedef struct ddog_OpaqueCancellationToken ddog_OpaqueCancellationToken;\n"; + assert_eq!(dedup_base_typedefs(input), input); + } + + #[test] + fn distinct_alias_typedef_is_kept() { + // `typedef struct A B;` with A != B is a real alias, not a + // redundant forward declaration, even if `A` has a body. + let input = r"typedef struct ddog_OpaqueCancellationToken { + uint32_t _0; +} ddog_OpaqueCancellationToken; + +typedef struct ddog_OpaqueCancellationToken ddog_prof_TokioCancellationToken; +"; + assert_eq!(dedup_base_typedefs(input), input); + } + + #[test] + fn exact_duplicate_typedef_with_differing_comment_is_dropped() { + // Same pointer typedef emitted twice, once bare and once with a doc + // comment. The first occurrence (and its comment, if any) is kept, + // the later duplicate is removed. + let input = r"typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2; + +/** + * A handle to a function. + */ +typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2; +"; + assert_eq!( + dedup_base_typedefs(input), + "typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2;\n\n" + ); + } + + #[test] + fn unrelated_typedefs_are_untouched() { + let input = r"typedef uint64_t ddog_QueueId; + +typedef struct ddog_Vec_U8 { + const uint8_t *ptr; +} ddog_Vec_U8; + +typedef struct ddog_Slice_U8 *ddog_Slice_U8Handle; +"; + assert_eq!(dedup_base_typedefs(input), input); + } } } /* Headers */