cprover
mini_c_parser.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Mini C Parser
4
5Author: Daniel Kroening, dkr@amazon.com
6
7\*******************************************************************/
8
11
12#include "mini_c_parser.h"
13
15#include <util/invariant.h>
16
17#include "cscanner.h"
18
20{
21public:
23 {
24 }
25
26 c_translation_unitt parse(std::istream &);
27
28protected:
29 std::size_t token_index;
30 using tokenst = std::vector<ctokent>;
32
33 bool eof() const
34 {
35 return is_eof(peek());
36 }
37
43
44 const ctokent &peek() const
45 {
47 return tokens[token_index];
48 }
49
50 const ctokent &peek(std::size_t how_many) const
51 {
52 PRECONDITION(token_index + how_many < tokens.size());
53 return tokens[token_index + how_many];
54 }
55
57 {
60 return tokens[token_index++];
61 }
62
63 static bool is_storage_class(const ctokent &token)
64 {
65 return token == "auto" || token == "extern" || token == "static" ||
66 token == "register" || token == "_Thread_local";
67 }
68
69 static bool is_type_qualifier(const ctokent &token)
70 {
71 return token == "const" || token == "volatile" || token == "restrict" ||
72 token == "_Atomic";
73 }
74
75 void skip_ws(tokenst &);
76 void parse_brackets(char open, char close, tokenst &dest);
77};
78
79std::ostream &operator<<(std::ostream &out, const c_declarationt &declaration)
80{
81 for(const auto &t : declaration.pre_declarator)
82 out << t.text;
83
84 for(const auto &t : declaration.declarator)
85 out << t.text;
86
87 for(const auto &t : declaration.post_declarator)
88 out << t.text;
89
90 for(const auto &t : declaration.initializer)
91 out << t.text;
92
93 return out;
94}
95
96void c_declarationt::print(std::ostream &out) const
97{
98 if(!declarator.empty())
99 {
100 out << "DECLARATOR: ";
101 for(const auto &t : declarator)
102 out << t.text;
103 out << '\n';
104 }
105}
106
108{
109 return !post_declarator.empty() && post_declarator.front() == '(';
110}
111
113{
114 return !initializer.empty() && initializer.front() == '{';
115}
116
118{
119 for(auto &t : declarator)
120 if(is_identifier(t))
121 return t;
122 return {};
123}
124
126{
127 if(eof())
128 return;
129
130 while(is_ws(peek()) || is_comment(peek()) ||
132 {
133 dest.push_back(consume_token());
134 }
135}
136
137void mini_c_parsert::parse_brackets(char open, char close, tokenst &dest)
138{
139 if(eof() || peek() != open)
140 return;
141
142 std::size_t bracket_count = 0;
143 while(true)
144 {
145 if(eof())
146 throw invalid_source_file_exceptiont("expected " + std::string(1, close));
147
148 auto &token = consume_token();
149 dest.push_back(token);
150 if(token == open)
151 bracket_count++;
152 else if(token == close)
153 {
154 bracket_count--;
155 if(bracket_count == 0)
156 break; // done
157 }
158 }
159}
160
162{
163 // type qualifier
164 // storage class
165 // type
166 // '*'
167 tokenst result;
168
169 while(true)
170 {
171 skip_ws(result);
172
173 if(eof())
174 return result;
175
176 auto &token = peek();
177
178 if(
179 is_type_qualifier(token) || is_storage_class(token) || token == '*' ||
180 token == "int" || token == "signed" || token.text == "unsigned" ||
181 token == "char" || token == "short" || token == "long" ||
182 token == "float" || token == "double" || token == "inline" ||
183 token == "typedef")
184 {
185 result.push_back(consume_token());
186 }
187 else if(token == "enum" || token == "struct" || token == "union")
188 {
189 result.push_back(consume_token());
190
191 skip_ws(result);
192
193 // may be followed by a tag
194 if(!eof() && is_identifier(peek()))
195 result.push_back(consume_token());
196
197 skip_ws(result);
198
199 // may be followed by a body {...}
200 parse_brackets('{', '}', result);
201 }
202 else if(token == "__attribute__")
203 {
204 result.push_back(consume_token());
205 skip_ws(result);
206 // followed by (( ... ))
207 parse_brackets('(', ')', result);
208 }
209 else if(is_identifier(token))
210 {
211 // Might be typedef or the declarator.
212 // We look ahead for the next non-WS token to tell the difference.
213 std::size_t index = 1;
214 while(true)
215 {
216 const auto &next_token = peek(index);
217 if(
218 is_ws(next_token) || is_preprocessor_directive(next_token) ||
219 is_comment(next_token))
220 index++;
221 else
222 break;
223 }
224
225 auto &next_token = peek(index);
226 if(!is_identifier(next_token) && next_token != '*')
227 {
228 // 'token' is the declarator
229 return result;
230 }
231 else
232 result.push_back(consume_token()); // it's a type
233 }
234 else if(token == ';')
235 return result;
236 else if(token == '(') // function type, part of declarator
237 return result;
238 else
240 "expected a declaration but got '" + token.text + "'");
241 }
242}
243
245{
246 // symbol
247 // ((...* symbol ...))
248
249 if(eof())
250 return {};
251
252 if(peek() == ';')
253 return {};
254
255 if(peek() == '(')
256 {
257 tokenst result;
258 parse_brackets('(', ')', result);
259 return result;
260 }
261 else if(is_identifier(peek()))
262 {
263 return {consume_token()};
264 }
265 else
266 throw invalid_source_file_exceptiont("expected an identifier");
267}
268
270{
271 // consume everything until we see one of the following:
272 // 1) ';' (end of declaration)
273 // 2) '{' (function body)
274 // 3) '=' (initializer)
275
276 tokenst result;
277
278 while(true)
279 {
280 if(eof())
281 return result;
282
283 if(peek() == ';' || peek() == '{' || peek() == '=')
284 return result;
285
286 result.push_back(consume_token());
287 }
288}
289
291{
292 if(eof())
293 return {};
294 else if(peek() == '=')
295 {
296 tokenst result;
297 while(true)
298 {
299 if(eof())
300 throw invalid_source_file_exceptiont("expected an initializer");
301 auto &token = consume_token();
302 result.push_back(token);
303 if(token == ';')
304 return result;
305 }
306 }
307 else if(peek() == ';')
308 {
309 // done
310 return {consume_token()};
311 }
312 else if(peek() == '{')
313 {
314 // function body
315 tokenst result;
316 std::size_t bracket_count = 0;
317 while(true)
318 {
319 if(eof())
320 throw invalid_source_file_exceptiont("eof in function body");
321 auto &token = consume_token();
322 result.push_back(token);
323 if(token == '{')
324 bracket_count++;
325 else if(token == '}')
326 {
327 bracket_count--;
328 if(bracket_count == 0)
329 return result;
330 }
331 }
332 }
333 else
334 PRECONDITION(false);
335}
336
338{
339 c_declarationt result;
340
342 result.declarator = parse_declarator();
345
346 return result;
347}
348
350{
351 cscannert cscanner(in);
352 cscanner.return_WS_and_comments = true;
353 tokens = cscanner.get_tokens();
354 token_index = 0;
355
356 if(tokens.empty())
357 return {};
358
359 DATA_INVARIANT(is_eof(tokens.back()), "token stream must end on eof");
360
361 c_translation_unitt result;
362
363 while(!eof())
364 result.push_back(parse_declaration());
365
366 return result;
367}
368
370{
371 return mini_c_parsert().parse(in);
372}
bool return_WS_and_comments
Definition: cscanner.h:31
std::vector< ctokent > get_tokens()
Definition: cscanner.cpp:41
Definition: ctoken.h:19
Thrown when we can't handle something in an input source file.
tokenst parse_declarator()
void parse_brackets(char open, char close, tokenst &dest)
const ctokent & peek(std::size_t how_many) const
c_translation_unitt parse(std::istream &)
void skip_ws(tokenst &)
std::size_t token_index
const ctokent & peek() const
static bool is_storage_class(const ctokent &token)
tokenst parse_post_declarator()
tokenst parse_pre_declarator()
std::vector< ctokent > tokenst
bool eof() const
c_declarationt parse_declaration()
tokenst parse_initializer()
const ctokent & consume_token()
static bool is_type_qualifier(const ctokent &token)
cscanner
static bool is_identifier(const ctokent &t)
Definition: ctoken.h:68
static bool is_comment(const ctokent &t)
Definition: ctoken.h:93
static bool is_preprocessor_directive(const ctokent &t)
Definition: ctoken.h:98
static bool is_ws(const ctokent &t)
Definition: ctoken.h:83
static bool is_eof(const ctokent &t)
Definition: ctoken.h:88
std::ostream & operator<<(std::ostream &out, const c_declarationt &declaration)
c_translation_unitt parse_c(std::istream &in)
Mini C Parser.
std::vector< c_declarationt > c_translation_unitt
Definition: mini_c_parser.h:38
nonstd::optional< T > optionalt
Definition: optional.h:35
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:510
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
bool has_body() const
bool is_function() const
void print(std::ostream &) const
tokenst post_declarator
Definition: mini_c_parser.h:29
tokenst initializer
Definition: mini_c_parser.h:30
optionalt< ctokent > declared_identifier() const
tokenst declarator
Definition: mini_c_parser.h:28
tokenst pre_declarator
Definition: mini_c_parser.h:27