Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 202 additions & 1 deletion tools/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,117 @@ pub mod headers {
new_content_parts
}

/// Strip an optional leading block comment (`/** ... */`) and surrounding
/// whitespace from a definition span, returning just the statement text
/// with trailing whitespace removed.
///
/// This lets two definitions that only differ by their doc comment be
/// compared by their statement alone.
fn statement_text(def: &str) -> &str {
let s = def.trim_start();
let s = if s.starts_with("/*") {
match s.find("*/") {
Some(end) => s[end + 2..].trim_start(),
None => s,
Comment on lines +123 to +125

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
match s.find("*/") {
Some(end) => s[end + 2..].trim_start(),
None => s,
s.find("*/").map(|end| s[end + 2..].trim_start()).unwrap_or(s);

}
} else {
s
};
s.trim_end()
}

fn is_ident(s: &str) -> bool {
!s.is_empty() && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_')

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a token to be a valid C identifier, the first character must NOT be a digit, which isn't included in this test. I don't remember in C but in Rust for example this could match 0usize which isn't an identifier.

}

/// The identifier a typedef introduces, i.e. the last identifier before the
/// terminating semicolon. Works for `typedef ... NAME;`, pointer typedefs
/// (`typedef struct X *NAME;`) and full-body typedefs (`... } NAME;`).
fn typedef_name(stmt: &str) -> Option<&str> {
let s = stmt.trim_end().strip_suffix(';')?.trim_end();
let start = s
.rfind(|c: char| !(c.is_ascii_alphanumeric() || c == '_'))
.map(|i| i + 1)
.unwrap_or(0);
let name = &s[start..];
is_ident(name).then_some(name)
}

/// If the statement is a full-body struct/union/enum typedef
/// (`typedef struct X { ... } X;`), return the name it defines.
fn bodied_typedef_name(stmt: &str) -> Option<&str> {
if !stmt.contains('{') {
return None;
}
if !(stmt.starts_with("typedef struct")
|| stmt.starts_with("typedef union")
|| stmt.starts_with("typedef enum"))
{
return None;
}
typedef_name(stmt)
}

/// If the statement is a bare forward declaration of the form
/// `typedef struct X X;` (the two identifiers being equal, no body),
/// return the name `X`.
fn forward_decl_name(stmt: &str) -> Option<&str> {
if stmt.contains('{') {
return None;
}
let rest = stmt.strip_prefix("typedef")?.trim_start();
let rest = ["struct ", "union ", "enum "]
.iter()
.find_map(|kw| rest.strip_prefix(kw))?
.trim_start();
let body = rest.strip_suffix(';')?;
let mut tokens = body.split_whitespace();
let first = tokens.next()?;
let second = tokens.next()?;
if tokens.next().is_some() {
return None;
}
(first == second && is_ident(first)).then_some(first)
}

/// Remove intra-file duplicate typedefs left behind in the base header.
///
/// cbindgen can emit the same type from multiple crate boundaries, and the
/// child-vs-base deduplication above only collapses byte-identical
/// definitions. Two cases slip through and break consumers compiling with
/// `-Werror -Wtypedef-redefinition`:
///
/// 1. A bare forward declaration `typedef struct X X;` that coexists with the full-body
/// definition `typedef struct X { ... } X;`. The forward declaration is redundant and is
/// dropped.
/// 2. Two identical typedef statements whose doc comments differ (so they are not
/// byte-identical). The later occurrence is dropped.
fn dedup_base_typedefs(content: &str) -> String {
let defs = collect_definitions(content);

let bodied_names: HashSet<&str> = defs
.iter()
.filter_map(|d| bodied_typedef_name(statement_text(d.str)))
.collect();

let mut seen: HashSet<&str> = HashSet::new();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not entirely clear if what seen includes going forward. I would suggest to give it a more explicit name (I suppose it's the typedefs that are not bodied?)

let mut result = String::with_capacity(content.len());
let mut pos = 0;
for d in &defs {
let stmt = statement_text(d.str);
let is_typedef = stmt.starts_with("typedef");
let drop = is_typedef
&& (forward_decl_name(stmt).is_some_and(|name| bodied_names.contains(name))
|| !seen.insert(stmt));
if drop {
result.push_str(&content[pos..d.start]);
pos = d.end;
}
}
result.push_str(&content[pos..]);
result
}

pub fn dedup_headers(base: &str, headers: &[&str]) {
let mut unique_child_defs: Vec<String> = Vec::new();
let mut present = HashSet::new();
Expand Down Expand Up @@ -154,7 +265,14 @@ pub mod headers {
base_new_parts.push(child_def);
}
base_new_parts.push(&base_header_content[base_defs.last().unwrap().end..]);
write_parts(&mut BufWriter::new(&base_header), &base_new_parts).unwrap();

// Definitions moved in from child headers (and any already present in
// the base) can introduce intra-file duplicate typedefs that the
// child-vs-base pass above does not catch. Collapse them so the base
// header compiles under `-Werror -Wtypedef-redefinition`.
let merged_base: String = base_new_parts.concat();
let deduped_base = dedup_base_typedefs(&merged_base);
write_parts(&mut BufWriter::new(&base_header), &[&deduped_base]).unwrap();
}

#[cfg(test)]
Expand Down Expand Up @@ -335,5 +453,88 @@ typedef struct ddog_Vec_U8 {
"
);
}

#[test]
fn forward_decl_is_dropped_when_full_body_exists() {
// The forward declaration may appear either before or after the
// full-body definition; both must be collapsed to the body.
let before = r"typedef struct ddog_prof_EncodedProfile ddog_prof_EncodedProfile;

typedef struct ddog_prof_EncodedProfile {
struct ddog_prof_EncodedProfile *inner;
} ddog_prof_EncodedProfile;
";
assert_eq!(
dedup_base_typedefs(before),
r"typedef struct ddog_prof_EncodedProfile {
struct ddog_prof_EncodedProfile *inner;
} ddog_prof_EncodedProfile;
"
);

let after = r"typedef struct OpaqueStringId {
uint32_t offset;
} OpaqueStringId;

typedef struct OpaqueStringId OpaqueStringId;
";
// The blank line that separated the two definitions is retained.
assert_eq!(
dedup_base_typedefs(after),
"typedef struct OpaqueStringId {\n uint32_t offset;\n} OpaqueStringId;\n\n"
);
}

#[test]
fn opaque_forward_decl_is_kept_without_body() {
// An opaque type only has a forward declaration; it must be kept.
let input =
"typedef struct ddog_OpaqueCancellationToken ddog_OpaqueCancellationToken;\n";
assert_eq!(dedup_base_typedefs(input), input);
}

#[test]
fn distinct_alias_typedef_is_kept() {
// `typedef struct A B;` with A != B is a real alias, not a
// redundant forward declaration, even if `A` has a body.
let input = r"typedef struct ddog_OpaqueCancellationToken {
uint32_t _0;
} ddog_OpaqueCancellationToken;

typedef struct ddog_OpaqueCancellationToken ddog_prof_TokioCancellationToken;
";
assert_eq!(dedup_base_typedefs(input), input);
}

#[test]
fn exact_duplicate_typedef_with_differing_comment_is_dropped() {
// Same pointer typedef emitted twice, once bare and once with a doc
// comment. The first occurrence (and its comment, if any) is kept,
// the later duplicate is removed.
let input = r"typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2;

/**
* A handle to a function.
*/
typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2;
";
assert_eq!(
dedup_base_typedefs(input),
"typedef struct ddog_prof_Function2 *ddog_prof_FunctionId2;\n\n"
);
}

#[test]
fn unrelated_typedefs_are_untouched() {
let input = r"typedef uint64_t ddog_QueueId;

typedef struct ddog_Vec_U8 {
const uint8_t *ptr;
} ddog_Vec_U8;

typedef struct ddog_Slice_U8 *ddog_Slice_U8Handle;
";
assert_eq!(dedup_base_typedefs(input), input);
}
}
} /* Headers */
Loading