Skip to content

Commit 3642b54

Browse files
Produce annotated pointer constants in incremental SMT2 traces
When the incremental SMT2 backend returns pointer values via get(), decompose the bit-vector into object ID and offset, look up the base expression in the object map, and produce annotated_pointer_constant_exprt. This enables the trace printer to display symbolic pointer expressions (e.g., string literals, address-of) instead of raw integer addresses. Integer addresses (null object with nonzero offset) are displayed as typecast-to-pointer rather than NULL + offset. Fixes: #8067 Co-authored-by: Kiro <kiro-agent@users.noreply.github.com>
1 parent 6df204e commit 3642b54

7 files changed

Lines changed: 451 additions & 7 deletions

File tree

regression/cbmc/trace-strings/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE no-new-smt
1+
CORE
22
main.c
33
--trace
44
^EXIT=10$

regression/cbmc/trace_address_arithmetic1/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE no-new-smt
1+
CORE
22
main.c
33
--trace
44
^EXIT=10$

regression/cbmc/xml-trace2/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE no-new-smt
1+
CORE
22
main.c
33
--xml-ui
44
<full_lhs_value( binary="[01]+")?>\{ 14, 0 \}</full_lhs_value>

src/solvers/smt2_incremental/construct_value_expr_from_smt.cpp

Lines changed: 227 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,110 @@
11
// Author: Diffblue Ltd.
22

33
#include <util/arith_tools.h>
4+
#include <util/byte_operators.h>
45
#include <util/c_types.h>
6+
#include <util/config.h>
57
#include <util/namespace.h>
68
#include <util/pointer_expr.h>
9+
#include <util/pointer_offset_size.h>
10+
#include <util/simplify_expr.h>
711
#include <util/std_expr.h>
812
#include <util/std_types.h>
913
#include <util/type.h>
1014

1115
#include <solvers/smt2_incremental/ast/smt_terms.h>
1216
#include <solvers/smt2_incremental/construct_value_expr_from_smt.h>
1317

18+
/// \brief Build a symbolic pointer expression from the given
19+
/// \p object_expr, \p offset, and target pointer \p type, following the
20+
/// same approach as pointer_logict::pointer_expr in pointer_logic.cpp.
21+
static exprt build_pointer_expr(
22+
const exprt &object_expr,
23+
const mp_integer &offset,
24+
const pointer_typet &type,
25+
const namespacet &ns)
26+
{
27+
typet subtype = type.base_type();
28+
// Void pointers with an offset are treated as char pointers, matching
29+
// GCC behavior for pointer arithmetic on void pointers.
30+
if(subtype.id() == ID_empty)
31+
subtype = char_type();
32+
if(object_expr.id() == ID_string_constant)
33+
{
34+
subtype = object_expr.type();
35+
// A string constant must be array-typed with fixed size.
36+
const array_typet &array_type = to_array_type(object_expr.type());
37+
mp_integer array_size =
38+
numeric_cast_v<mp_integer>(to_constant_expr(array_type.size()));
39+
if(array_size > offset)
40+
{
41+
to_array_type(subtype).size() =
42+
from_integer(array_size - offset, array_type.size().type());
43+
}
44+
}
45+
auto deep_object_opt =
46+
get_subexpression_at_offset(object_expr, offset, subtype, ns);
47+
if(!deep_object_opt.has_value())
48+
return nil_exprt{};
49+
exprt deep_object = deep_object_opt.value();
50+
simplify(deep_object, ns);
51+
if(
52+
deep_object.id() != ID_byte_extract_little_endian &&
53+
deep_object.id() != ID_byte_extract_big_endian)
54+
{
55+
return typecast_exprt::conditional_cast(
56+
address_of_exprt(deep_object), type);
57+
}
58+
59+
const byte_extract_exprt &be = to_byte_extract_expr(deep_object);
60+
const address_of_exprt base(be.op());
61+
if(be.offset() == 0)
62+
return typecast_exprt::conditional_cast(base, type);
63+
64+
const auto object_size = pointer_offset_size(be.op().type(), ns);
65+
if(object_size.has_value() && *object_size <= 1)
66+
{
67+
return typecast_exprt::conditional_cast(
68+
plus_exprt(base, from_integer(offset, pointer_diff_type())), type);
69+
}
70+
else if(object_size.has_value() && offset % *object_size == 0)
71+
{
72+
return typecast_exprt::conditional_cast(
73+
plus_exprt(
74+
base, from_integer(offset / *object_size, pointer_diff_type())),
75+
type);
76+
}
77+
else
78+
{
79+
return typecast_exprt::conditional_cast(
80+
plus_exprt(
81+
typecast_exprt(base, pointer_type(char_type())),
82+
from_integer(offset, pointer_diff_type())),
83+
type);
84+
}
85+
}
86+
1487
class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort
1588
{
1689
private:
1790
const typet &type_to_construct;
1891
const namespacet &ns;
92+
const smt_object_mapt *object_map;
93+
const std::unordered_map<exprt, smt_identifier_termt, irep_hash>
94+
*expression_identifiers;
1995
std::optional<exprt> result;
2096

2197
explicit value_expr_from_smt_factoryt(
2298
const typet &type_to_construct,
23-
const namespacet &ns)
24-
: type_to_construct{type_to_construct}, ns{ns}, result{}
99+
const namespacet &ns,
100+
const smt_object_mapt *object_map = nullptr,
101+
const std::unordered_map<exprt, smt_identifier_termt, irep_hash>
102+
*expression_identifiers = nullptr)
103+
: type_to_construct{type_to_construct},
104+
ns{ns},
105+
object_map{object_map},
106+
expression_identifiers{expression_identifiers},
107+
result{}
25108
{
26109
}
27110

@@ -53,6 +136,10 @@ class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort
53136
{
54137
result = null_pointer_exprt{*pointer_type};
55138
}
139+
else if(object_map)
140+
{
141+
result = build_annotated_pointer(bit_vector_constant, *pointer_type);
142+
}
56143
else
57144
{
58145
// The reason we are manually constructing a constant_exprt here is a
@@ -97,6 +184,91 @@ class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort
97184
type_to_construct.pretty());
98185
}
99186

187+
/// \brief Build an annotated pointer constant from a bit-vector value
188+
/// using the object map to reconstruct the symbolic pointer expression.
189+
exprt build_annotated_pointer(
190+
const smt_bit_vector_constant_termt &bit_vector_constant,
191+
const pointer_typet &pointer_type)
192+
{
193+
const auto sort_width = bit_vector_constant.get_sort().bit_width();
194+
const irep_idt bvrep =
195+
integer2bvrep(bit_vector_constant.value(), sort_width);
196+
const std::size_t object_bits = config.bv_encoding.object_bits;
197+
const std::size_t pointer_width = pointer_type.get_width();
198+
INVARIANT(
199+
pointer_width > object_bits,
200+
"Pointer width should be wider than object bits.");
201+
const std::size_t offset_bits = pointer_width - object_bits;
202+
203+
// Extract object ID from high bits.
204+
const mp_integer object_id = bit_vector_constant.value() >> offset_bits;
205+
206+
// Extract offset from low bits, sign-extending.
207+
mp_integer offset = bit_vector_constant.value() % power(2, offset_bits);
208+
if(offset >= power(2, offset_bits - 1))
209+
offset -= power(2, offset_bits);
210+
211+
// Null object with nonzero offset represents an integer address
212+
// (e.g., (int *)0xDEADBEEF).
213+
if(object_id == 0)
214+
{
215+
exprt symbolic =
216+
typecast_exprt{from_integer(offset, pointer_diff_type()), pointer_type};
217+
return annotated_pointer_constant_exprt{bvrep, symbolic};
218+
}
219+
220+
// Build reverse map from unique_id to base_expression.
221+
std::unordered_map<std::size_t, const decision_procedure_objectt *>
222+
id_to_object;
223+
for(const auto &entry : *object_map)
224+
id_to_object.emplace(entry.second.unique_id, &entry.second);
225+
226+
const auto it = id_to_object.find(numeric_cast_v<std::size_t>(object_id));
227+
if(it == id_to_object.end())
228+
{
229+
// Unknown object - fall back to plain constant.
230+
return constant_exprt{bvrep, pointer_type};
231+
}
232+
233+
const decision_procedure_objectt &object = *it->second;
234+
235+
// Invalid pointer object.
236+
if(object.base_expression == make_invalid_pointer_expr())
237+
return annotated_pointer_constant_exprt{
238+
bvrep, constant_exprt("INVALID", pointer_type)};
239+
240+
// Resolve the base expression through the expression_identifiers
241+
// reverse map. When string constants or other expressions are
242+
// substituted with symbol_exprt identifiers during SMT conversion,
243+
// the object_map tracks the substituted symbol rather than the
244+
// original expression. We reverse that lookup here to recover the
245+
// original expression for display in traces.
246+
exprt resolved_expr = object.base_expression;
247+
if(
248+
expression_identifiers &&
249+
expr_try_dynamic_cast<symbol_exprt>(object.base_expression))
250+
{
251+
const auto &sym = to_symbol_expr(object.base_expression);
252+
for(const auto &entry : *expression_identifiers)
253+
{
254+
if(entry.second.identifier() == sym.get_identifier())
255+
{
256+
resolved_expr = entry.first;
257+
break;
258+
}
259+
}
260+
}
261+
262+
exprt symbolic =
263+
build_pointer_expr(resolved_expr, offset, pointer_type, ns);
264+
if(symbolic.is_nil())
265+
{
266+
// Could not build symbolic expression - fall back.
267+
return constant_exprt{bvrep, pointer_type};
268+
}
269+
return annotated_pointer_constant_exprt{bvrep, symbolic};
270+
}
271+
100272
void
101273
visit(const smt_function_application_termt &function_application) override
102274
{
@@ -131,6 +303,37 @@ class value_expr_from_smt_factoryt : public smt_term_const_downcast_visitort
131303
INVARIANT(factory.result.has_value(), "Factory must result in expr value.");
132304
return *factory.result;
133305
}
306+
307+
/// \brief Overload accepting an object map for annotated pointer
308+
/// construction.
309+
static exprt make(
310+
const smt_termt &value_term,
311+
const typet &type_to_construct,
312+
const namespacet &ns,
313+
const smt_object_mapt &object_map)
314+
{
315+
value_expr_from_smt_factoryt factory{type_to_construct, ns, &object_map};
316+
value_term.accept(factory);
317+
INVARIANT(factory.result.has_value(), "Factory must result in expr value.");
318+
return *factory.result;
319+
}
320+
321+
/// \brief Overload accepting both an object map and expression identifiers
322+
/// for resolving substituted expressions in pointer annotation.
323+
static exprt make(
324+
const smt_termt &value_term,
325+
const typet &type_to_construct,
326+
const namespacet &ns,
327+
const smt_object_mapt &object_map,
328+
const std::unordered_map<exprt, smt_identifier_termt, irep_hash>
329+
&expression_identifiers)
330+
{
331+
value_expr_from_smt_factoryt factory{
332+
type_to_construct, ns, &object_map, &expression_identifiers};
333+
value_term.accept(factory);
334+
INVARIANT(factory.result.has_value(), "Factory must result in expr value.");
335+
return *factory.result;
336+
}
134337
};
135338

136339
exprt construct_value_expr_from_smt(
@@ -140,3 +343,25 @@ exprt construct_value_expr_from_smt(
140343
{
141344
return value_expr_from_smt_factoryt::make(value_term, type_to_construct, ns);
142345
}
346+
347+
exprt construct_value_expr_from_smt(
348+
const smt_termt &value_term,
349+
const typet &type_to_construct,
350+
const namespacet &ns,
351+
const smt_object_mapt &object_map)
352+
{
353+
return value_expr_from_smt_factoryt::make(
354+
value_term, type_to_construct, ns, object_map);
355+
}
356+
357+
exprt construct_value_expr_from_smt(
358+
const smt_termt &value_term,
359+
const typet &type_to_construct,
360+
const namespacet &ns,
361+
const smt_object_mapt &object_map,
362+
const std::unordered_map<exprt, smt_identifier_termt, irep_hash>
363+
&expression_identifiers)
364+
{
365+
return value_expr_from_smt_factoryt::make(
366+
value_term, type_to_construct, ns, object_map, expression_identifiers);
367+
}

src/solvers/smt2_incremental/construct_value_expr_from_smt.h

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55

66
#include <util/expr.h>
77

8-
class smt_termt;
8+
#include <solvers/smt2_incremental/ast/smt_terms.h>
9+
#include <solvers/smt2_incremental/object_tracking.h>
10+
911
class typet;
1012

1113
/// \brief Given a \p value_term and a \p type_to_construct, this function
@@ -32,4 +34,48 @@ exprt construct_value_expr_from_smt(
3234
const typet &type_to_construct,
3335
const namespacet &ns);
3436

37+
/// \brief Overload that accepts an \p object_map for constructing annotated
38+
/// pointer constants. When the type to construct is a pointer type, the
39+
/// object map is used to reconstruct a symbolic pointer expression from the
40+
/// encoded object ID and offset in the bit-vector value.
41+
/// \param value_term
42+
/// The SMT term encoding the value.
43+
/// \param type_to_construct
44+
/// The type which the constructed expr returned is expected to have.
45+
/// \param ns
46+
/// The namespace for type lookups.
47+
/// \param object_map
48+
/// Map from base expressions to tracked object information, used to
49+
/// reconstruct symbolic pointer expressions for trace display.
50+
exprt construct_value_expr_from_smt(
51+
const smt_termt &value_term,
52+
const typet &type_to_construct,
53+
const namespacet &ns,
54+
const smt_object_mapt &object_map);
55+
56+
/// \brief Overload that additionally accepts \p expression_identifiers to
57+
/// reverse identifier substitutions for pointer annotation. When the object
58+
/// map contains a symbol_exprt that was introduced by identifier substitution
59+
/// (e.g., for string constants mapped to array functions), this overload
60+
/// recovers the original expression for symbolic pointer display.
61+
/// \param value_term
62+
/// The SMT term encoding the value.
63+
/// \param type_to_construct
64+
/// The type which the constructed expr returned is expected to have.
65+
/// \param ns
66+
/// The namespace for type lookups.
67+
/// \param object_map
68+
/// Map from base expressions to tracked object information.
69+
/// \param expression_identifiers
70+
/// Map from original expressions to their SMT identifier terms, as used
71+
/// during expression-to-SMT conversion. This is used to reverse the
72+
/// substitution and recover the original expression for trace display.
73+
exprt construct_value_expr_from_smt(
74+
const smt_termt &value_term,
75+
const typet &type_to_construct,
76+
const namespacet &ns,
77+
const smt_object_mapt &object_map,
78+
const std::unordered_map<exprt, smt_identifier_termt, irep_hash>
79+
&expression_identifiers);
80+
3581
#endif // CPROVER_SOLVERS_SMT2_INCREMENTAL_CONSTRUCT_VALUE_EXPR_FROM_SMT_H

src/solvers/smt2_incremental/smt2_incremental_decision_procedure.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,11 @@ std::optional<exprt> smt2_incremental_decision_proceduret::get_expr(
516516
response.pretty()};
517517
}
518518
return construct_value_expr_from_smt(
519-
get_value_response->pairs()[0].get().value(), type, ns);
519+
get_value_response->pairs()[0].get().value(),
520+
type,
521+
ns,
522+
object_map,
523+
expression_identifiers);
520524
}
521525

522526
// This is a fall back which builds resulting expression based on getting the

0 commit comments

Comments
 (0)