cprover
type2name.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Type Naming for C
4
5Author: Daniel Kroening, kroening@cs.cmu.edu
6
7\*******************************************************************/
8
11
12#include "type2name.h"
13
14#include <util/arith_tools.h>
15#include <util/c_types.h>
16#include <util/invariant.h>
17#include <util/namespace.h>
19#include <util/std_expr.h>
20#include <util/symbol.h>
21
22#include <regex>
23
24typedef std::unordered_map<irep_idt, std::pair<size_t, bool>> symbol_numbert;
25
26static std::string type2name(
27 const typet &type,
28 const namespacet &ns,
29 symbol_numbert &symbol_number);
30
31static std::string type2name_tag(
32 const tag_typet &type,
33 const namespacet &ns,
34 symbol_numbert &symbol_number)
35{
36 const irep_idt &identifier = type.get_identifier();
37
38 const symbolt *symbol;
39
40 if(ns.lookup(identifier, symbol))
41 return "SYM#"+id2string(identifier)+"#";
42
43 assert(symbol && symbol->is_type);
44
45 if(symbol->type.id()!=ID_struct &&
46 symbol->type.id()!=ID_union)
47 return type2name(symbol->type, ns, symbol_number);
48
49 // assign each symbol a number when seen for the first time
50 std::pair<symbol_numbert::iterator, bool> entry=
51 symbol_number.insert(std::make_pair(
52 identifier,
53 std::make_pair(symbol_number.size(), true)));
54
55 std::string result = "SYM" +
57 '#' + std::to_string(entry.first->second.first);
58
59 // new entry, add definition
60 if(entry.second)
61 {
62 result+="={";
63 result+=type2name(symbol->type, ns, symbol_number);
64 result+='}';
65
66 entry.first->second.second=false;
67 }
68
69 return result;
70}
71
73 const typet &type,
74 const namespacet &ns)
75{
76 auto bits = pointer_offset_bits(type, ns);
77 CHECK_RETURN(bits.has_value());
78 return integer2string(*bits);
79}
80
81static std::string type2name(
82 const typet &type,
83 const namespacet &ns,
84 symbol_numbert &symbol_number)
85{
86 std::string result;
87
88 // qualifiers first
89 if(type.get_bool(ID_C_constant))
90 result+='c';
91
92 if(type.get_bool(ID_C_restricted))
93 result+='r';
94
95 if(type.get_bool(ID_C_volatile))
96 result+='v';
97
98 if(type.get_bool(ID_C_transparent_union))
99 result+='t';
100
101 if(type.get_bool(ID_C_noreturn))
102 result+='n';
103
104 // this isn't really a qualifier, but the linker needs to
105 // distinguish these - should likely be fixed in the linker instead
106 if(!type.source_location().get_function().empty())
107 result+='l';
108
109 if(type.id().empty())
110 throw "empty type encountered";
111 else if(type.id()==ID_empty)
112 result+='V';
113 else if(type.id()==ID_signedbv)
114 result+="S" + pointer_offset_bits_as_string(type, ns);
115 else if(type.id()==ID_unsignedbv)
116 result+="U" + pointer_offset_bits_as_string(type, ns);
117 else if(type.id()==ID_bool ||
118 type.id()==ID_c_bool)
119 result+='B';
120 else if(type.id()==ID_integer)
121 result+='I';
122 else if(type.id()==ID_real)
123 result+='R';
124 else if(type.id()==ID_complex)
125 result+='C';
126 else if(type.id()==ID_floatbv)
127 result+="F" + pointer_offset_bits_as_string(type, ns);
128 else if(type.id()==ID_fixedbv)
129 result+="X" + pointer_offset_bits_as_string(type, ns);
130 else if(type.id()==ID_natural)
131 result+='N';
132 else if(type.id()==ID_pointer)
133 {
134 if(type.get_bool(ID_C_reference))
135 result+='&';
136 else
137 result+='*';
138 }
139 else if(type.id()==ID_code)
140 {
141 const code_typet &t=to_code_type(type);
142 const code_typet::parameterst parameters=t.parameters();
143 result+=type2name(t.return_type(), ns, symbol_number)+"(";
144
145 for(code_typet::parameterst::const_iterator
146 it=parameters.begin();
147 it!=parameters.end();
148 it++)
149 {
150 if(it!=parameters.begin())
151 result+='|';
152 result+=type2name(it->type(), ns, symbol_number);
153 }
154
155 if(t.has_ellipsis())
156 {
157 if(!parameters.empty())
158 result+='|';
159 result+="...";
160 }
161
162 result+=")->";
163 result+=type2name(t.return_type(), ns, symbol_number);
164 }
165 else if(type.id()==ID_array)
166 {
167 const exprt &size = to_array_type(type).size();
168
169 if(size.id() == ID_symbol)
170 result += "ARR" + id2string(to_symbol_expr(size).get_identifier());
171 else
172 {
173 const auto size_int = numeric_cast<mp_integer>(size);
174
175 if(!size_int.has_value())
176 result += "ARR?";
177 else
178 result += "ARR" + integer2string(*size_int);
179 }
180 }
181 else if(
182 type.id() == ID_c_enum_tag || type.id() == ID_struct_tag ||
183 type.id() == ID_union_tag)
184 {
185 result += type2name_tag(to_tag_type(type), ns, symbol_number);
186 }
187 else if(type.id()==ID_struct ||
188 type.id()==ID_union)
189 {
190 const auto &struct_union_type = to_struct_union_type(type);
191
192 if(struct_union_type.is_incomplete())
193 {
194 if(type.id() == ID_struct)
195 result += "ST?";
196 else if(type.id() == ID_union)
197 result += "UN?";
198 }
199 else
200 {
201 if(type.id() == ID_struct)
202 result += "ST";
203 if(type.id() == ID_union)
204 result += "UN";
205 result += '[';
206 bool first = true;
207 for(const auto &c : struct_union_type.components())
208 {
209 if(!first)
210 result += '|';
211 else
212 first = false;
213
214 result += type2name(c.type(), ns, symbol_number);
215 const irep_idt &component_name = c.get_name();
216 CHECK_RETURN(!component_name.empty());
217 result += "'" + id2string(component_name) + "'";
218 }
219 result += ']';
220 }
221 }
222 else if(type.id()==ID_c_enum)
223 {
224 const c_enum_typet &t=to_c_enum_type(type);
225
226 if(t.is_incomplete())
227 result += "EN?";
228 else
229 {
230 result += "EN";
231 const c_enum_typet::memberst &members = t.members();
232 result += '[';
233 for(c_enum_typet::memberst::const_iterator it = members.begin();
234 it != members.end();
235 ++it)
236 {
237 if(it != members.begin())
238 result += '|';
239 result += id2string(it->get_value());
240 result += "'" + id2string(it->get_identifier()) + "'";
241 }
242 }
243 }
244 else if(type.id()==ID_c_bit_field)
245 result+="BF"+pointer_offset_bits_as_string(type, ns);
246 else if(type.id()==ID_vector)
247 result+="VEC"+type.get_string(ID_size);
248 else
249 throw "unknown type '"+type.id_string()+"' encountered";
250
251 if(type.has_subtype())
252 {
253 result+='{';
254 result +=
255 type2name(to_type_with_subtype(type).subtype(), ns, symbol_number);
256 result+='}';
257 }
258
259 if(type.has_subtypes())
260 {
261 result+='$';
262 for(const typet &subtype : to_type_with_subtypes(type).subtypes())
263 {
264 result += type2name(subtype, ns, symbol_number);
265 result+='|';
266 }
267 result[result.size()-1]='$';
268 }
269
270 return result;
271}
272
273std::string type2name(const typet &type, const namespacet &ns)
274{
275 symbol_numbert symbol_number;
276 return type2name(type, ns, symbol_number);
277}
278
283std::string type_name2type_identifier(const std::string &name)
284{
285 const auto replace_special_characters = [](const std::string &name) {
286 std::string result{};
287 for(char c : name)
288 {
289 switch(c)
290 {
291 case '*':
292 result += "_ptr_";
293 break;
294 case '{':
295 result += "_start_sub_";
296 break;
297 case '}':
298 result += "_end_sub_";
299 break;
300 default:
301 result += c;
302 break;
303 }
304 }
305 return result;
306 };
307 const auto replace_invalid_characters_with_underscore =
308 [](const std::string &identifier) {
309 static const std::regex non_alpha_numeric{"[^A-Za-z0-9\x80-\xff]+"};
310 return std::regex_replace(identifier, non_alpha_numeric, "_");
311 };
312 const auto strip_leading_digits = [](const std::string &identifier) {
313 static const std::regex identifier_regex{
314 "[A-Za-z\x80-\xff_][A-Za-z0-9_\x80-\xff]*"};
315 std::smatch match_results;
316 bool found = std::regex_search(identifier, match_results, identifier_regex);
317 POSTCONDITION(found);
318 return match_results.str(0);
319 };
320 return strip_leading_digits(replace_invalid_characters_with_underscore(
321 replace_special_characters(name)));
322}
323
324std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
325{
326 return type_name2type_identifier(type2name(type, ns));
327}
const c_enum_typet & to_c_enum_type(const typet &type)
Cast a typet to a c_enum_typet.
Definition: c_types.h:302
const exprt & size() const
Definition: std_types.h:790
The type of C enums.
Definition: c_types.h:217
const memberst & members() const
Definition: c_types.h:255
std::vector< c_enum_membert > memberst
Definition: c_types.h:253
bool is_incomplete() const
enum types may be incomplete
Definition: c_types.h:261
Base type of functions.
Definition: std_types.h:539
std::vector< parametert > parameterst
Definition: std_types.h:542
const parameterst & parameters() const
Definition: std_types.h:655
const typet & return_type() const
Definition: std_types.h:645
bool has_ellipsis() const
Definition: std_types.h:611
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
bool empty() const
Definition: dstring.h:88
Base class for all expressions.
Definition: expr.h:54
bool get_bool(const irep_idt &name) const
Definition: irep.cpp:58
const std::string & id_string() const
Definition: irep.h:399
const irep_idt & id() const
Definition: irep.h:396
const std::string & get_string(const irep_idt &name) const
Definition: irep.h:409
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:138
const irep_idt & get_function() const
irep_idt get_tag() const
Definition: std_types.h:168
Symbol table entry.
Definition: symbol.h:28
bool is_type
Definition: symbol.h:61
typet type
Type of symbol.
Definition: symbol.h:31
A tag-based type, i.e., typet with an identifier.
Definition: std_types.h:396
const irep_idt & get_identifier() const
Definition: std_types.h:410
The type of an expression, extends irept.
Definition: type.h:29
bool has_subtypes() const
Definition: type.h:63
const source_locationt & source_location() const
Definition: type.h:74
bool has_subtype() const
Definition: type.h:66
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:103
optionalt< mp_integer > pointer_offset_bits(const typet &type, const namespacet &ns)
Pointer Logic.
static optionalt< smt_termt > get_identifier(const exprt &expr, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_handle_identifiers, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_identifiers)
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define POSTCONDITION(CONDITION)
Definition: invariant.h:479
API to expression classes.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:189
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:744
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:832
const struct_union_typet & to_struct_union_type(const typet &type)
Cast a typet to a struct_union_typet.
Definition: std_types.h:214
const tag_typet & to_tag_type(const typet &type)
Cast a typet to a tag_typet.
Definition: std_types.h:434
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Symbol table entry.
static std::string pointer_offset_bits_as_string(const typet &type, const namespacet &ns)
Definition: type2name.cpp:72
std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
Constructs a string describing the given type, which can be used as part of a C identifier.
Definition: type2name.cpp:324
static std::string type2name(const typet &type, const namespacet &ns, symbol_numbert &symbol_number)
Definition: type2name.cpp:81
static std::string type2name_tag(const tag_typet &type, const namespacet &ns, symbol_numbert &symbol_number)
Definition: type2name.cpp:31
std::string type_name2type_identifier(const std::string &name)
type2name generates strings that aren't valid C identifiers If we want utilities like dump_c to work ...
Definition: type2name.cpp:283
std::unordered_map< irep_idt, std::pair< size_t, bool > > symbol_numbert
Definition: type2name.cpp:24
Type Naming for C.
const type_with_subtypest & to_type_with_subtypes(const typet &type)
Definition: type.h:221
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition: type.h:177