diff --git a/tools/src/lib.rs b/tools/src/lib.rs index e07b143cf4..43106b45bf 100644 --- a/tools/src/lib.rs +++ b/tools/src/lib.rs @@ -111,6 +111,117 @@ pub mod headers { new_content_parts } + /// Strip an optional leading block comment (`/** ... */`) and surrounding + /// whitespace from a definition span, returning just the statement text + /// with trailing whitespace removed. + /// + /// This lets two definitions that only differ by their doc comment be + /// compared by their statement alone. + fn statement_text(def: &str) -> &str { + let s = def.trim_start(); + let s = if s.starts_with("/*") { + match s.find("*/") { + Some(end) => s[end + 2..].trim_start(), + None => s, + } + } else { + s + }; + s.trim_end() + } + + fn is_ident(s: &str) -> bool { + !s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_') + } + + /// The identifier a typedef introduces, i.e. the last identifier before the + /// terminating semicolon. Works for `typedef ... NAME;`, pointer typedefs + /// (`typedef struct X *NAME;`) and full-body typedefs (`... } NAME;`). + fn typedef_name(stmt: &str) -> Option<&str> { + let s = stmt.trim_end().strip_suffix(';')?.trim_end(); + let start = s + .rfind(|c: char| !(c.is_ascii_alphanumeric() || c == '_')) + .map(|i| i + 1) + .unwrap_or(0); + let name = &s[start..]; + is_ident(name).then_some(name) + } + + /// If the statement is a full-body struct/union/enum typedef + /// (`typedef struct X { ... } X;`), return the name it defines. + fn bodied_typedef_name(stmt: &str) -> Option<&str> { + if !stmt.contains('{') { + return None; + } + if !(stmt.starts_with("typedef struct") + || stmt.starts_with("typedef union") + || stmt.starts_with("typedef enum")) + { + return None; + } + typedef_name(stmt) + } + + /// If the statement is a bare forward declaration of the form + /// `typedef struct X X;` (the two identifiers being equal, no body), + /// return the name `X`. + fn forward_decl_name(stmt: &str) -> Option<&str> { + if stmt.contains('{') { + return None; + } + let rest = stmt.strip_prefix("typedef")?.trim_start(); + let rest = ["struct ", "union ", "enum "] + .iter() + .find_map(|kw| rest.strip_prefix(kw))? + .trim_start(); + let body = rest.strip_suffix(';')?; + let mut tokens = body.split_whitespace(); + let first = tokens.next()?; + let second = tokens.next()?; + if tokens.next().is_some() { + return None; + } + (first == second && is_ident(first)).then_some(first) + } + + /// Remove intra-file duplicate typedefs left behind in the base header. + /// + /// cbindgen can emit the same type from multiple crate boundaries, and the + /// child-vs-base deduplication above only collapses byte-identical + /// definitions. Two cases slip through and break consumers compiling with + /// `-Werror -Wtypedef-redefinition`: + /// + /// 1. A bare forward declaration `typedef struct X X;` that coexists with the full-body + /// definition `typedef struct X { ... } X;`. The forward declaration is redundant and is + /// dropped. + /// 2. Two identical typedef statements whose doc comments differ (so they are not + /// byte-identical). The later occurrence is dropped. + fn dedup_base_typedefs(content: &str) -> String { + let defs = collect_definitions(content); + + let bodied_names: HashSet<&str> = defs + .iter() + .filter_map(|d| bodied_typedef_name(statement_text(d.str))) + .collect(); + + let mut seen: HashSet<&str> = HashSet::new(); + let mut result = String::with_capacity(content.len()); + let mut pos = 0; + for d in &defs { + let stmt = statement_text(d.str); + let is_typedef = stmt.starts_with("typedef"); + let drop = is_typedef + && (forward_decl_name(stmt).is_some_and(|name| bodied_names.contains(name)) + || !seen.insert(stmt)); + if drop { + result.push_str(&content[pos..d.start]); + pos = d.end; + } + } + result.push_str(&content[pos..]); + result + } + pub fn dedup_headers(base: &str, headers: &[&str]) { let mut unique_child_defs: Vec = Vec::new(); let mut present = HashSet::new(); @@ -154,7 +265,14 @@ pub mod headers { base_new_parts.push(child_def); } base_new_parts.push(&base_header_content[base_defs.last().unwrap().end..]); - write_parts(&mut BufWriter::new(&base_header), &base_new_parts).unwrap(); + + // Definitions moved in from child headers (and any already present in + // the base) can introduce intra-file duplicate typedefs that the + // child-vs-base pass above does not catch. Collapse them so the base + // header compiles under `-Werror -Wtypedef-redefinition`. + let merged_base: String = base_new_parts.concat(); + let deduped_base = dedup_base_typedefs(&merged_base); + write_parts(&mut BufWriter::new(&base_header), &[&deduped_base]).unwrap(); } #[cfg(test)] @@ -335,5 +453,88 @@ typedef struct ddog_Vec_U8 { " ); } + + #[test] + fn forward_decl_is_dropped_when_full_body_exists() { + // The forward declaration may appear either before or after the + // full-body definition; both must be collapsed to the body. + let before = r"typedef struct ddog_prof_EncodedProfile ddog_prof_EncodedProfile; + +typedef struct ddog_prof_EncodedProfile { + struct ddog_prof_EncodedProfile *inner; +} ddog_prof_EncodedProfile; +"; + assert_eq!( + dedup_base_typedefs(before), + r"typedef struct ddog_prof_EncodedProfile { + struct ddog_prof_EncodedProfile *inner; +} ddog_prof_EncodedProfile; +" + ); + + let after = r"typedef struct OpaqueStringId { + uint32_t offset; +} OpaqueStringId; + +typedef struct OpaqueStringId OpaqueStringId; +"; + // The blank line that separated the two definitions is retained. + assert_eq!( + dedup_base_typedefs(after), + "typedef struct OpaqueStringId {\n uint32_t offset;\n} OpaqueStringId;\n\n" + ); + } + + #[test] + fn opaque_forward_decl_is_kept_without_body() { + // An opaque type only has a forward declaration; it must be kept. + let input = + "typedef struct ddog_OpaqueCancellationToken ddog_OpaqueCancellationToken;\n"; + assert_eq!(dedup_base_typedefs(input), input); + } + + #[test] + fn distinct_alias_typedef_is_kept() { + // `typedef struct A B;` with A != B is a real alias, not a + // redundant forward declaration, even if `A` has a body. + let input = r"typedef struct ddog_OpaqueCancellationToken { + uint32_t _0; +} ddog_OpaqueCancellationToken; + +typedef struct ddog_OpaqueCancellationToken ddog_prof_TokioCancellationToken; +"; + assert_eq!(dedup_base_typedefs(input), input); + } + + #[test] + fn exact_duplicate_typedef_with_differing_comment_is_dropped() { + // Same pointer typedef emitted twice, once bare and once with a doc + // comment. The first occurrence (and its comment, if any) is kept, + // the later duplicate is removed. + let input = r"typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2; + +/** + * A handle to a function. + */ +typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2; +"; + assert_eq!( + dedup_base_typedefs(input), + "typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2;\n\n" + ); + } + + #[test] + fn unrelated_typedefs_are_untouched() { + let input = r"typedef uint64_t ddog_QueueId; + +typedef struct ddog_Vec_U8 { + const uint8_t *ptr; +} ddog_Vec_U8; + +typedef struct ddog_Slice_U8 *ddog_Slice_U8Handle; +"; + assert_eq!(dedup_base_typedefs(input), input); + } } } /* Headers */