tlx
split_quoted.cpp
Go to the documentation of this file.
1/*******************************************************************************
2 * tlx/string/split_quoted.cpp
3 *
4 * Part of tlx - http://panthema.net/tlx
5 *
6 * Copyright (C) 2016-2018 Timo Bingmann <tb@panthema.net>
7 *
8 * All rights reserved. Published under the Boost Software License, Version 1.0
9 ******************************************************************************/
10
12
13#include <stdexcept>
14
15namespace tlx {
16
17std::vector<std::string>
18split_quoted(const std::string& str, char sep, char quote, char escape) {
19
20 std::vector<std::string> out;
21
22 std::string::const_iterator it = str.begin();
23 std::string entry;
24
25 for ( ; it != str.end(); )
26 {
27 if (*it == sep) {
28 // skip separator outside of fields
29 ++it;
30 }
31 else if (*it == quote) {
32 // parse quoted entry
33 ++it;
34
35 while (true) {
36 if (it == str.end()) {
37 throw std::runtime_error(
38 "unmatched end quote in split_quoted().");
39 }
40 else if (*it == quote) {
41 ++it;
42 if (it == str.end()) {
43 // last quote and end-of-line
44 out.emplace_back(std::move(entry));
45 return out;
46 }
47 else if (*it == sep) {
48 // quote + sep -> end of this entry
49 out.emplace_back(std::move(entry));
50 ++it;
51 break;
52 }
53 else {
54 throw std::runtime_error(
55 std::string("extra quote enclosed in entry,"
56 " followed by ") + *it);
57 }
58 }
59 else if (*it == escape) {
60 ++it;
61 if (it == str.end()) {
62 throw std::runtime_error(
63 "escape as last character in string");
64 }
65 else if (*it == quote) {
66 // escape + quote -> quote
67 entry += *it++;
68 }
69 else if (*it == escape) {
70 // escape + escape -> escape
71 entry += *it++;
72 }
73 else if (*it == 'n') {
74 // escape + n -> new line
75 entry += '\n', ++it;
76 }
77 else if (*it == 'r') {
78 // escape + r -> carriage return
79 entry += '\r', ++it;
80 }
81 else if (*it == 't') {
82 // escape + t -> tab
83 entry += '\t', ++it;
84 }
85 else {
86 throw std::runtime_error(
87 std::string("escape followed by "
88 "unknown character") + *it);
89 }
90 }
91 else {
92 // normal character
93 entry += *it++;
94 }
95 }
96 }
97 else {
98 // parse unquoted entry
99 while (true) {
100 if (it == str.end()) {
101 // end-of-line
102 out.emplace_back(std::move(entry));
103 return out;
104 }
105 else if (*it == sep) {
106 // sep -> end of this entry
107 out.emplace_back(std::move(entry));
108 ++it;
109 break;
110 }
111 else {
112 // normal character
113 entry += *it++;
114 }
115 }
116 }
117 }
118
119 return out;
120}
121
122std::vector<std::string> split_quoted(const std::string& str) {
123 return split_quoted(str, ' ', '"', '\\');
124}
125
126} // namespace tlx
127
128/******************************************************************************/
std::vector< std::string > split_quoted(const std::string &str, char sep, char quote, char escape)
Split the given string at each separator character into distinct substrings.