39 auto original_goto_model =
42 if(!original_goto_model.has_value())
44 log.
error() <<
"Unable to read goto binary for linker script merging"
50 std::list<irep_idt> linker_defined_symbols;
52 linker_defined_symbols,
53 original_goto_model->symbol_table,
55 linker_def_outfile());
80 original_goto_model->symbol_table,
84 log.
error() <<
"Could not add linkerscript defs to symbol table"
89 original_goto_model->goto_functions.function_map.erase(
93 original_goto_model->symbol_table,
98 original_goto_model->symbol_table,
99 original_goto_model->goto_functions,
101 original_goto_model->goto_functions.update();
115 log.
error() <<
"Could not pointerize all linker-defined expressions"
122 *original_goto_model,
128 log.
error() <<
"Could not write linkerscript-augmented binary"
136 const std::string &elf_binary,
137 const std::string &goto_binary,
140 : elf_binary(elf_binary),
141 goto_binary(goto_binary),
143 log(message_handler),
144 replacement_predicates(
146 "address of array's first member",
151 [](
const exprt &expr) {
152 return expr.
id() == ID_address_of &&
153 expr.
type().
id() == ID_pointer &&
161 .
id() == ID_symbol &&
169 .
id() == ID_constant &&
173 .
id() == ID_signedbv;
180 [](
const exprt &expr) {
181 return expr.
id() == ID_address_of &&
182 expr.
type().
id() == ID_pointer &&
192 [](
const exprt &expr) {
193 return expr.
id() == ID_address_of &&
194 expr.
type().
id() == ID_pointer &&
206 [](
const exprt &expr) {
207 return expr.
id() == ID_symbol && expr.
type().
id() == ID_array;
210 "pointer (does not need pointerizing)",
214 [](
const exprt &expr) {
215 return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer;
231 std::list<symbol_exprt> to_pointerize;
234 if(to_pointerize.empty())
236 log.
debug() <<
"Pointerizing the symbol-table value of symbol "
242 if(to_pointerize.empty() && fail==0)
245 for(
const auto &sym : to_pointerize)
247 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
248 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n"
249 << sym.pretty() <<
"\n";
261 for(
exprt *insts : std::list<exprt *>(
262 {&(instruction.code_nonconst()), &(instruction.guard)}))
264 std::list<symbol_exprt> to_pointerize;
266 if(to_pointerize.empty())
270 if(to_pointerize.empty() && fail==0)
273 for(
const auto &sym : to_pointerize)
275 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
276 <<
"' in function " << gf.first <<
". Pretty:\n"
277 << sym.pretty() <<
"\n";
292 const std::string &shape)
294 auto it=linker_values.find(ident);
295 if(it==linker_values.end())
297 log.
error() <<
"Could not find a new expression for linker script-defined "
303 log.
debug() <<
"Pointerizing linker-defined symbol '" << ident
311 std::list<symbol_exprt> &to_pointerize,
315 for(
auto const &pair : linker_values)
318 if(!pattern.match(expr))
321 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
324 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
325 pattern.description());
327 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
329 if(result==to_pointerize.end())
336 to_pointerize.erase(result);
355 std::list<symbol_exprt> &to_pointerize)
const
357 for(
const auto &op : expr.
operands())
359 if(op.id()!=ID_symbol)
363 if(pair!=linker_values.end())
364 to_pointerize.push_back(sym_exp);
366 for(
const auto &op : expr.
operands())
371The current implementation of
this function is less precise than the
372 commented-out version below. To understand the difference between these
373 implementations, consider the following example:
375Suppose we have a section in the linker script, 100 bytes long, where the
376address of the symbol sec_start is the start of the section (value 4096) and the
377address of sec_end is the end of that section (value 4196).
379The current implementation synthesizes the
goto-version of the following C:
381 char __sec_array[100];
382 char *sec_start=(&__sec_array[0]);
383 char *sec_end=((&__sec_array[0])+100);
387This is imprecise
for the following reason: the actual address of the array and
388the pointers shall be some random CBMC-internal address, instead of being 4096
389and 4196. The linker script, on the other hand, would have specified the exact
390position of the section, and we even know what the actual values of sec_start
391and sec_end are from the
object file (these values are in the `addresses` list
392of the `
data` argument to
this function). If the correctness of the code depends
393on these actual values, then CBMCs model of the code is too imprecise to verify
396The commented-out version of
this function below synthesizes the following:
398 char *sec_start=4096;
402This code has both the actual addresses of the start and end of the section and
403tells CBMC that the intermediate region is valid. However, the allocated_memory
404macro does not currently allocate an actual
object at the address 4096, so
405symbolic execution can fail. In particular, the
'allocated memory' is part of
406__CPROVER_memory, which does not have a bounded size;
this means that (
for
407example) calls to memcpy or memset fail, because the first and third arguments
408do not have know n size. The commented-out implementation should be reinstated
411In either
case,
no other changes to the rest of the code (outside
this function)
412should be necessary. The rest of
this file converts expressions containing the
413linker-defined symbol into pointer types
if they were not already, and
this is
414the right behaviour
for both implementations.
418 const std::string &linker_script,
423 std::map<irep_idt, std::size_t> truncated_symbols;
426 bool has_end=d[
"has-end-symbol"].is_true();
430 << d[
"start-symbol"].value;
439 log.warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
440 << array_size <<
" is too large to model. Truncating to "
452 std::ostringstream array_comment;
453 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
454 << array_size <<
" bytes";
467 array_sym.
type = array_type;
468 array_sym.
value = *zi;
471 bool failed = symbol_table.
add(array_sym);
483 start_sym.
name = d[
"start-symbol"].value;
485 start_sym.
value = array_start;
486 linker_values.emplace(
487 d[
"start-symbol"].value,
488 std::make_pair(start_sym.
symbol_expr(), array_start));
492 auto it = std::find_if(
495 [&d](
const jsont &add) {
496 return add[
"sym"].
value == d[
"start-symbol"].value;
500 log.error() <<
"Start: Could not find address corresponding to symbol '"
501 << d[
"start-symbol"].value <<
"' (start of section)"
507 std::ostringstream start_comment;
508 start_comment <<
"Pointer to beginning of object section '"
509 << d[
"section"].value <<
"'. Original address in object file"
510 <<
" is " << (*it)[
"val"].value;
514 auto start_sym_entry = symbol_table.
insert(start_sym);
515 if(!start_sym_entry.second)
516 start_sym_entry.first = start_sym;
520 plus_exprt array_end(array_start, array_size_expr);
526 end_sym.
name = d[
"end-symbol"].value;
528 end_sym.
value = array_end;
529 linker_values.emplace(
530 d[
"end-symbol"].value,
533 auto entry = std::find_if(
536 [&d](
const jsont &add) {
537 return add[
"sym"].
value == d[
"end-symbol"].value;
541 log.debug() <<
"Could not find address corresponding to symbol '"
542 << d[
"end-symbol"].value <<
"' (end of section)"
548 std::ostringstream end_comment;
549 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
550 <<
"'. Original address in object file"
551 <<
" is " << (*entry)[
"val"].value;
555 auto end_sym_entry = symbol_table.
insert(end_sym);
556 if(!end_sym_entry.second)
557 end_sym_entry.first = end_sym;
568 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
569 if(it!=linker_values.end())
576 const auto &pair=truncated_symbols.find(d[
"sym"].value);
577 if(pair==truncated_symbols.end())
578 symbol_value=d[
"val"].value;
582 <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
584 <<
" because it corresponds to the size of a too-large section."
591 loc.
set_comment(
"linker script-defined symbol: char *"+
592 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
607 symbol.
value = rhs_tc;
609 linker_values.emplace(
629 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
631 f.add_source_location()=loc;
634 i.make_function_call(f);
635 initialize_instructions.push_front(i);
646 symbol_table.
add(sym);
653 loc.
set_comment(
"linker script-defined symbol: char *"+
654 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
666 linker_values.emplace(
667 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
670 assign.add_source_location()=loc;
672 assign_i.make_assignment(assign);
673 initialize_instructions.push_front(assign_i);
680 std::list<irep_idt> &linker_defined_symbols,
682 const std::string &out_file,
683 const std::string &def_out_file)
685 for(
auto const &pair : symbol_table.
symbols)
688 pair.second.is_extern && pair.second.value.is_nil() &&
691 linker_defined_symbols.push_back(pair.second.name);
695 std::ostringstream linker_def_str;
697 linker_defined_symbols.begin(),
698 linker_defined_symbols.end(),
699 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
700 log.
debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
704 std::ofstream linker_def_file(linker_def_infile());
705 linker_def_file << linker_def_str.str();
706 linker_def_file.close();
708 std::vector<std::string> argv=
712 "--object", out_file,
713 "--sym-file", linker_def_infile(),
714 "--out-file", def_out_file
718 argv.push_back(
"--very-verbose");
720 argv.push_back(
"--verbose");
723 for(std::size_t i=0; i<argv.size(); i++)
727 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
735 const std::list<irep_idt> &linker_defined_symbols,
739 for(
const auto &sym : linker_defined_symbols)
740 if(linker_values.find(sym)==linker_values.end())
744 <<
"' was declared extern but never given "
748 for(
const auto &pair : linker_values)
750 auto it=std::find(linker_defined_symbols.begin(),
751 linker_defined_symbols.end(), pair.first);
752 if(it==linker_defined_symbols.end())
756 <<
"Linker script-defined symbol '" << pair.first <<
"' was "
757 <<
"either defined in the C source code, not declared extern in "
758 <<
"the C source code, or does not appear in the C source code"
767 if(!
data.is_object())
772 !(data_object.
find(
"regions") != data_object.
end() &&
773 data_object.
find(
"addresses") != data_object.
end() &&
774 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
783 return address.
find(
"val") != address.end() &&
784 address.find(
"sym") != address.end() &&
785 address[
"val"].is_number() && address[
"sym"].is_string();
795 return region.
find(
"start") != region.end() &&
796 region.find(
"size") != region.end() &&
797 region.find(
"annot") != region.end() &&
798 region.find(
"commt") != region.end() &&
799 region.find(
"start-symbol") != region.end() &&
800 region.find(
"has-end-symbol") != region.end() &&
801 region.find(
"section") != region.end() &&
802 region[
"start"].is_number() && region[
"size"].is_number() &&
803 region[
"annot"].is_string() &&
804 region[
"start-symbol"].is_string() &&
805 region[
"section"].is_string() && region[
"commt"].is_string() &&
806 ((region[
"has-end-symbol"].is_true() &&
807 region.find(
"end-symbol") != region.end() &&
808 region[
"end-symbol"].is_string()) ||
809 (region[
"has-end-symbol"].is_false() &&
810 region.find(
"size-symbol") != region.end() &&
811 region.find(
"end-symbol") == region.end() &&
812 region[
"size-symbol"].is_string()));
std::string array_name(const namespacet &ns, const exprt &expr)
unsignedbv_typet unsigned_int_type()
unsignedbv_typet size_type()
pointer_typet pointer_type(const typet &subtype)
bitvector_typet char_type()
Operator to return the address of an object.
std::string get_value(char option) const
virtual bool isset(char option) const
A codet representing an assignment in the program.
codet representation of a function call statement.
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
A constant literal expression.
void set_value(const irep_idt &value)
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Base class for all expressions.
typet & type()
Return the type of the expression.
const source_locationt & source_location() const
source_locationt & add_source_location()
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
goto_functionst goto_functions
GOTO functions.
This class represents an instruction in the GOTO intermediate representation.
A generic container class for the GOTO intermediate representation of one function.
instructionst instructions
The list of instructions in the goto program.
std::list< instructiont > instructionst
const irep_idt & id() const
iterator find(const std::string &key)
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int ls_data2instructions(jsont &data, const std::string &linker_script, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into the symbol_table.
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
unsigned get_verbosity() const
source_locationt source_location
message_handlert & get_message_handler()
mstreamt & warning() const
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
The plus expression Associativity is not specified.
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
const irep_idt & get_identifier() const
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
source_locationt location
Source code location of definition of symbol.
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
typet type
Type of symbol.
irep_idt name
The unique identifier.
irep_idt pretty_name
Language-specific display name.
exprt value
Initial value of symbol.
Semantic type conversion.
static exprt conditional_cast(const exprt &expr, const typet &type)
Compile and link source and object files.
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
void goto_convert(const codet &code, symbol_table_baset &symbol_table, goto_programt &dest, message_handlert &message_handler, const irep_idt &mode)
Goto Programs with Functions.
const std::string & id2string(const irep_idt &d)
json_objectt & to_json_object(jsont &json)
json_arrayt & to_json_array(jsont &json)
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
const mp_integer string2integer(const std::string &n, unsigned base)
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
int run(const std::string &what, const std::vector< std::string > &argv)
#define CHECK_RETURN(CONDITION)
static optionalt< codet > static_lifetime_init(const irep_idt &identifier, symbol_tablet &symbol_table)
#define INITIALIZE_FUNCTION
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
unsigned safe_string2unsigned(const std::string &str, int base)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
static bool failed(bool error_indicator)