summaryrefslogtreecommitdiffstats
path: root/src/lib/util/str.h
blob: 4e817d502359275ddaa40c558a6d76fbe9ce73f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef KEA_UTIL_STR_H
#define KEA_UTIL_STR_H

#include <exceptions/exceptions.h>

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#include <boost/lexical_cast.hpp>

namespace isc {
namespace util {
namespace str {

/// @brief A Set of C++ Utilities for Manipulating Strings

///
/// @brief A standard string util exception that is thrown if getToken or
/// numToToken are called with bad input data
class StringTokenError : public Exception {
public:
    StringTokenError(const char* file, size_t line, const char* what)
        : isc::Exception(file, line, what) {
    }
};

/// @brief Trim leading and trailing spaces.
///
/// @details Returns a copy of the input string but with any leading
/// or trailing spaces or tabs removed.
///
/// @param input Input string to modify.
///
/// @return String with leading and trailing spaces removed.
std::string
trim(const std::string& input);

/// @brief Finds the "trimmed" end of a buffer
///
/// @details Works backward from the end of the buffer, looking for the first
/// character not equal to the trim value, and returns an iterator
/// pointing to that position.
///
/// @param begin - Forward iterator pointing to the beginning of the
/// buffer to trim.
/// @param end - Forward iterator pointing to the untrimmed end of
/// the buffer to trim.
/// @param trim_val - byte value to trim off
///
/// @return Iterator pointing the first character from the end of the
/// buffer not equal to the trim value.
template <typename Iterator>
Iterator
seekTrimmed(Iterator const& begin, Iterator end, uint8_t const trim_val) {
    while (end != begin && *(end - 1) == trim_val) {
        --end;
    }

    return (end);
}

/// @brief Split string into tokens.
///
/// @details Splits a string into tokens (the tokens being delimited by one or more of
/// the delimiter characters) and returns the tokens in a vector.
/// Adjacent delimiters are considered to be a single delimiter.
///
/// Special use cases are:
/// 1. The empty string is considered to be zero tokens.
/// 2. A string comprising nothing but delimiters
///    is considered to be zero tokens.
///
/// The reasoning behind this is that the string can be thought of
/// as having invisible leading and trailing delimiter characters.
/// Therefore both cases reduce to a set of contiguous delimiters,
/// which are considered a single delimiter (so getting rid of the
/// string). Optional escape allows to escape delimiter characters
/// (and *only* them and the escape character itself) using backslash.
///
/// We could use Boost for this, but this (simple) function eliminates
/// one dependency in the code.
///
/// @param text String to be split.  Passed by value as the internal
///             copy is altered during the processing.
/// @param delim Delimiter characters
/// @param escape Use backslash to escape delimiter characters
///
/// @return Vector of tokens.
std::vector<std::string>
tokens(const std::string& text, const std::string& delim = " \t\n", bool escape = false);

/// @brief Convert character to uppercase.
///
/// @details Used in uppercase() to pass as a parameter to std::transform().
/// The function std::toupper() can't be used as it takes an "int" as its
/// parameter; this confuses the template expansion mechanism because
/// dereferencing a string::iterator returns a char.
///
/// @param chr Character to be upper-cased.
///
/// @return Uppercase version of the input character.
char
toUpper(char const chr);

/// @brief Convert string to uppercase.
///
/// @param text String to be upper-cased.
void
uppercase(std::string& text);

/// @brief Convert character to lowercase.
///
/// @details Used in lowercase() to pass as a parameter to std::transform().
/// The function std::tolower() can't be used as it takes an "int" as its
/// parameter; this confuses the template expansion mechanism because
/// dereferencing a string::iterator returns a char.
///
/// @param chr Character to be lower-cased.
///
/// @return Lowercase version of the input character.
char
toLower(char const chr);

/// @brief Convert string to lowercase.
///
/// @param text String to be lower-cased.
void
lowercase(std::string& text);

/// @brief Converts a string in quotes into vector.
///
/// @details A converted string is first trimmed. If a trimmed string is in
/// quotes, the quotes are removed and the resulting string is copied
/// into a vector. If the string is not in quotes, an empty vector is
/// returned.
///
/// The resulting string is copied to a vector and returned.
///
/// This function is intended to be used by the server configuration
/// parsers to convert string values surrounded with quotes into
/// binary form.
///
/// @param quoted_string String to be converted.
///
/// @return Vector containing converted string or empty string if
/// input string didn't contain expected quote characters.
std::vector<uint8_t>
quotedStringToBinary(const std::string& quoted_string);

/// @brief Converts a string of separated hexadecimal digits
/// into a vector.
///
/// @details Octets may contain 1 or 2 digits. For example, using a colon
/// for a separator all of the following are valid:
///
/// - yy:yy:yy:yy:yy
/// - y:y:y:y:y
/// - y:yy:yy:y:y
///
/// If the decoded string doesn't match any of the supported formats,
/// an exception is thrown.
///
/// @param hex_string Input string.
/// @param sep character to use as a separator.
/// @param binary Vector receiving converted string into binary.
///
/// @throw isc::BadValue if the format of the input string is invalid.
void
decodeSeparatedHexString(const std::string& hex_string,
                         const std::string& sep,
                         std::vector<uint8_t>& binary);

/// @brief Converts a string of hexadecimal digits with colons into
///  a vector.
///
/// @param hex_string Input string.
/// @param binary Vector receiving converted string into binary.
///
/// @throw isc::BadValue if the format of the input string is invalid.
void
decodeColonSeparatedHexString(const std::string& hex_string, std::vector<uint8_t>& binary);

/// @brief Converts a formatted string of hexadecimal digits into
/// a vector.
///
/// @details This function supports the following formats:
///
/// - yy:yy:yy:yy or yy yy yy yy - octets delimited by colons or
/// spaces, see @c decodeSeparatedHexString
///
/// - yyyyyyyyyy
/// - 0xyyyyyyyyyy
///
/// If there is an odd number of hexadecimal digits in the input
/// string, the '0' is prepended to the string before decoding.
///
/// @param hex_string Input string.
/// @param binary Vector receiving converted string into binary.
///
/// @throw isc::BadValue if the format of the input string is invalid.
void
decodeFormattedHexString(const std::string& hex_string, std::vector<uint8_t>& binary);

/// @brief Forward declaration to the @c StringSanitizer implementation.
class StringSanitizerImpl;

/// @brief Type representing the pointer to the @c StringSanitizerImpl.
using StringSanitizerImplPtr = std::shared_ptr<StringSanitizerImpl>;

/// @brief Implements a regular expression based string scrubber.
class StringSanitizer {
public:
    /// @brief Constructor.
    ///
    /// @details Compiles the given character set into a regular expression, and
    /// retains the given character replacement. Thereafter, the instance
    /// may be used to scrub an arbitrary number of strings.
    ///
    /// @param char_set string containing a regular expression (POSIX
    /// extended syntax) that describes the characters to replace.  If you
    /// wanted to sanitize hostnames for example, you could specify the
    /// inversion of valid characters "[^A-Za-z0-9_-]".
    /// @param char_replacement string of one or more characters to use as the
    /// replacement for invalid characters.
    ///
    /// @throw BadValue if given an invalid regular expression.
    StringSanitizer(const std::string& char_set, const std::string& char_replacement);

    /// @brief Returns a scrubbed copy of a given string.
    ///
    /// @details Replaces all occurrences of characters described by the regular
    /// expression with the character replacement.
    ///
    /// @param original The string to be scrubbed.
    ///
    /// @throw Unexpected if an error occurs during scrubbing.
    std::string scrub(const std::string& original);

    /// @brief The maximum size for regex parameters.
    ///
    /// @note The regex engine is implemented using recursion and can cause
    /// stack overflow if the input data is too large. An arbitrary size of
    /// 4096 should be enough for all cases.
    static const uint32_t MAX_DATA_SIZE;

private:
    /// @brief Pointer to the @c StringSanitizerImpl.
    StringSanitizerImplPtr impl_;
};

/// @brief Type representing the pointer to the @c StringSanitizer.
using StringSanitizerPtr = std::unique_ptr<StringSanitizer>;

/// @brief Check if a string is printable.
///
/// @param content String to check for printable characters.
///
/// @return True if empty or contains only printable characters, False otherwise.
bool
isPrintable(const std::string& content);

/// @brief Check if a byte vector is printable.
///
/// @param content Vector to check for printable characters.
///
/// @return True if empty or contains only printable characters, False otherwise.
bool
isPrintable(const std::vector<uint8_t>& content);

/// @brief Dumps a buffer of bytes as a string of hexadecimal digits.
///
/// @param data Pointer to the data to dump.
/// @param length Number of bytes to dump. Caller should ensure the length
/// does not exceed the buffer.
std::string
dumpAsHex(const uint8_t* data, size_t length);

/// @brief Converts a double to a string with given precision
///
/// @param val double to convert
/// @param precision number of maximum number decimal places to output
/// @return string representation of val
std::string dumpDouble(double val, size_t precision = 5);

}  // namespace str
}  // namespace util
}  // namespace isc

#endif  // KEA_UTIL_STR_H