string_utilities.hpp
1: #ifndef STRING_UTILITIES_HPP
2: #define STRING_UTILITIES_HPP
3: /*------------------------------------------------------------------------------
4:
5: Author: Andy Rushton
6: Copyright: (c) Andy Rushton, 2004
7: License: BSD License, see ../docs/license.html
8:
9: Utilities for manipulating std::strings, missing from the STL or C++ libraries
10:
11: ------------------------------------------------------------------------------*/
12: #include "os_fixes.hpp"
13: #include "format_types.hpp"
14: #include "textio.hpp"
15: #include <string>
16: #include <vector>
17: #include <bitset>
18: #include <list>
19: #include <map>
20: #include <set>
21: #include <vector>
22: #include <stdexcept>
23: #include <time.h>
24:
25: ////////////////////////////////////////////////////////////////////////////////
26: // Conversions of Integer types to string
27: ////////////////////////////////////////////////////////////////////////////////
28:
29: // The radix (i.e. base) for these conversions can be any value from base 2 to base 36
30: // specifying any other radix causes std::invalid_argument to be thrown
31:
32: // The way in which the radix is displayed is defined in radix_types.hpp
33: // If any other value is used, std::invalid_argument is thrown
34:
35: // The width argument specifies the number of numerical digits to use in the result
36: // This is a minimum - if the value requires more digits then it will be wider than the width argument
37: // However, if it is smaller, then it will be extended to the specified width
38: // Then, the radix display prefix is added to this width
39: // For example, using the hash representation of 0 in hex with width=4 gives: 16#0000 - so there's 4 digits in the number part
40:
41: std::string to_string(bool i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
42: throw(std::invalid_argument);
43:
44: std::string to_string(short i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
45: throw(std::invalid_argument);
46:
47: std::string to_string(unsigned short i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
48: throw(std::invalid_argument);
49:
50: std::string to_string(int i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
51: throw(std::invalid_argument);
52:
53: std::string to_string(unsigned int i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
54: throw(std::invalid_argument);
55:
56: std::string to_string(long i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
57: throw(std::invalid_argument);
58:
59: std::string to_string(unsigned long i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
60: throw(std::invalid_argument);
61:
62: std::string to_string(const void*, unsigned radix = 16, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
63: throw(std::invalid_argument);
64:
65: ////////////////////////////////////////////////////////////////////////////////
66: // convert a real type to string
67: ////////////////////////////////////////////////////////////////////////////////
68:
69: // Only decimal radix is supported
70:
71: // The way in which the number is displayed is defined in radix_types.hpp
72: // Using any other value for the display type causes std::invalid_argument to be thrown
73:
74: std::string to_string(float f, real_display_t display = display_mixed, unsigned width = 0, unsigned precision = 6)
75: throw(std::invalid_argument);
76: std::string to_string(double f, real_display_t display = display_mixed, unsigned width = 0, unsigned precision = 6)
77: throw(std::invalid_argument);
78:
79: ////////////////////////////////////////////////////////////////////////////////
80: // Convert a string to string
81: ////////////////////////////////////////////////////////////////////////////////
82:
83: // this is necessary for completeness, e.g. for use in vector_to_string for vector<string>
84: std::string to_string(const std::string& value);
85:
86: // ditto for char*
87: std::string to_string(const char* value);
88:
89: ////////////////////////////////////////////////////////////////////////////////
90: // convert a string to a simple type
91: ////////////////////////////////////////////////////////////////////////////////
92:
93: // Convert a string to an integer type
94: // supports all the formats described above for the reverse conversion
95: // If the radix is set to zero, the conversions deduce the radix from the string representation
96: // So, 0b prefix is binary, 0 prefix is octal, 0x is hex and <base># prefix is my hash format
97: // A non-zero radix should be used when the string value has no radix information and is non-decimal
98: // e.g. the hex value FEDCBA has no indication that it is hex, so specify radix 16
99: // The radix must be either zero as explained above, or in the range 2 to 16
100: // Any other value will cause std::invalid_argument to be thrown
101:
102: bool to_bool(const std::string& value, unsigned radix = 0)
103: throw(std::invalid_argument);
104:
105: short to_short(const std::string& value, unsigned radix = 0)
106: throw(std::invalid_argument);
107:
108: unsigned short to_ushort(const std::string& value, unsigned radix = 0)
109: throw(std::invalid_argument);
110:
111: int to_int(const std::string& value, unsigned radix = 0)
112: throw(std::invalid_argument);
113:
114: unsigned int to_uint(const std::string& value, unsigned radix = 0)
115: throw(std::invalid_argument);
116:
117: long to_long(const std::string& value, unsigned radix = 0)
118: throw(std::invalid_argument);
119:
120: unsigned long to_ulong(const std::string& value, unsigned radix = 0)
121: throw(std::invalid_argument);
122:
123: void* to_void_star(const std::string& value, unsigned radix = 0)
124: throw(std::invalid_argument);
125:
126: // Convert a floating-point type
127:
128: float to_float(const std::string& value)
129: throw(std::invalid_argument);
130:
131: double to_double(const std::string& value)
132: throw(std::invalid_argument);
133:
134: ////////////////////////////////////////////////////////////////////////////////
135: // template string conversions for pointers and STL containers
136: ////////////////////////////////////////////////////////////////////////////////
137: // Note: STLplus containers tend to have built-in string conversion functions consistent with these
138:
139: template <typename T>
140: std::string pointer_to_string(const T* value, const std::string& null_string, const std::string& prefix, const std::string& suffix);
141:
142: template<size_t N>
143: std::string bitset_to_string(const std::bitset<N>& data);
144:
145: template<typename T>
146: std::string list_to_string(const std::list<T>& values, const std::string& separator);
147:
148: template<typename L, typename R>
149: std::string pair_to_string(const std::pair<L,R>& values, const std::string& separator);
150:
151: template<typename K, typename T, typename P>
152: std::string map_to_string(const std::map<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
153:
154: template<typename K, typename T, typename P>
155: std::string multimap_to_string(const std::multimap<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
156:
157: template<typename K, typename P>
158: std::string set_to_string(const std::set<K,P>& values, const std::string& separator);
159:
160: template<typename K, typename P>
161: std::string multiset_to_string(const std::multiset<K,P>& values, const std::string& separator);
162:
163: template<typename T>
164: std::string vector_to_string(const std::vector<T>& values, const std::string& separator);
165:
166: std::string to_string(const std::vector<bool>& values);
167:
168: ////////////////////////////////////////////////////////////////////////////////
169: // Print routines for basic types
170: ////////////////////////////////////////////////////////////////////////////////
171:
172: // The convention is to have a print(str,val) for printing in-line (i.e. the
173: // value is on one line) and to have a print(str,val,indent) to print on a whole
174: // line, with indent before and newline after.
175:
176: // set the number of spaces to indent per indent step (i.e. the number of spaces = indent*indent_step)
177: // default is built-in and set to 2
178: void set_indent_step(unsigned step);
179: unsigned indent_step(void);
180: // utility for printing the indent, called from within the second form of print
181: otext& print_indent(otext& str, unsigned indent);
182:
183: // print routines for integer types
184: // the arguments are as for the to_string
185:
186: otext& print(otext& str, const bool& value,
187: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
188: unsigned width = 0)
189: throw(std::invalid_argument);
190:
191: otext& print(otext& str, const short& value,
192: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
193: unsigned width = 0)
194: throw(std::invalid_argument);
195:
196: otext& print(otext& str, const unsigned short& value,
197: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
198: unsigned width = 0)
199: throw(std::invalid_argument);
200:
201: otext& print(otext& str, const int& value,
202: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
203: unsigned width = 0)
204: throw(std::invalid_argument);
205:
206: otext& print(otext& str, const unsigned int& value,
207: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
208: unsigned width = 0)
209: throw(std::invalid_argument);
210:
211: otext& print(otext& str, const long& value,
212: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
213: unsigned width = 0)
214: throw(std::invalid_argument);
215:
216: otext& print(otext& str, const unsigned long& value,
217: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
218: unsigned width = 0)
219: throw(std::invalid_argument);
220:
221: otext& print(otext& str, const void*& value,
222: unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
223: unsigned width = 0)
224: throw(std::invalid_argument);
225:
226: // print routines for floating-point types
227:
228: otext& print(otext& str, float f,
229: real_display_t display = display_mixed,
230: unsigned width = 0, unsigned precision = 6)
231: throw(std::invalid_argument);
232:
233: otext& print(otext& str, double f,
234: real_display_t display = display_mixed,
235: unsigned width = 0, unsigned precision = 6)
236: throw(std::invalid_argument);
237:
238: // print routines for string
239: // this is needed for completeness, e.g. when calling print_vector on a vector of strings
240:
241: otext& print(otext& str, const std::string& value);
242: otext& print(otext& str, const std::string& value, unsigned indent);
243:
244: ////////////////////////////////////////////////////////////////////////////////
245: // template print routines for pointers and STL containers
246: ////////////////////////////////////////////////////////////////////////////////
247: // STLplus containers have these built-in
248:
249: template <typename T>
250: otext& print_pointer(otext& str, const T* value,
251: const std::string& null_string, const std::string& prefix, const std::string& suffix);
252: template <typename T>
253: otext& print_pointer(otext& str, const T* value, unsigned indent,
254: const std::string& null_string, const std::string& prefix, const std::string& suffix);
255:
256: template<size_t N>
257: otext& print_bitset(otext& str, const std::bitset<N>& value);
258: template<size_t N>
259: otext& print_bitset(otext& str, const std::bitset<N>& value, unsigned indent);
260:
261: template<typename T>
262: otext& print_list(otext& str, const std::list<T>& values, const std::string& separator);
263: template<typename T>
264: otext& print_list(otext& str, const std::list<T>& values, unsigned indent);
265:
266: template<typename L, typename R>
267: otext& print_pair(otext& str, const std::pair<L,R>& values, const std::string& separator);
268: template<typename L, typename R>
269: otext& print_pair(otext& str, const std::pair<L,R>& values, const std::string& separator, unsigned indent);
270:
271: template<typename K, typename T, typename P>
272: otext& print_map(otext& str, const std::map<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
273: template<typename K, typename T, typename P>
274: otext& print_map(otext& str, const std::map<K,T,P>& values, const std::string& pair_separator, unsigned indent);
275:
276: template<typename K, typename T, typename P>
277: otext& print_multimap(otext& str, const std::multimap<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
278: template<typename K, typename T, typename P>
279: otext& print_multimap(otext& str, const std::multimap<K,T,P>& values, const std::string& pair_separator, unsigned indent);
280:
281: template<typename K, typename P>
282: otext& print_set(otext& str, const std::set<K,P>& values, const std::string& separator);
283: template<typename K, typename P>
284: otext& print_set(otext& str, const std::set<K,P>& values, unsigned indent);
285:
286: template<typename K, typename P>
287: otext& print_multiset(otext& str, const std::multiset<K,P>& values, const std::string& separator);
288: template<typename K, typename P>
289: otext& print_multiset(otext& str, const std::multiset<K,P>& values, unsigned indent);
290:
291: template<typename T>
292: otext& print_vector(otext& str, const std::vector<T>& values, const std::string& separator);
293: template<typename T>
294: otext& print_vector(otext& str, const std::vector<T>& values, unsigned indent);
295:
296: otext& print(otext& str, const std::vector<bool>& values);
297:
298: ////////////////////////////////////////////////////////////////////////////////
299: // other string manipulations
300: ////////////////////////////////////////////////////////////////////////////////
301:
302: // Padding function allows a string to be printed in a fixed-width field
303:
304: // The definitions for the alignment are declared in format_types.hpp
305: // Any other value will cause std::invalid_argument to be thrown
306:
307: std::string pad(const std::string& str, alignment_t alignment, unsigned width, char padch = ' ')
308: throw(std::invalid_argument);
309:
310: // whitespace trimming
311: std::string trim_left(const std::string& val);
312: std::string trim_right(const std::string& val);
313: std::string trim(const std::string& val);
314:
315: // case conversion for std::strings
316: std::string lowercase(const std::string& val);
317: std::string uppercase(const std::string& val);
318:
319: // convert characters represented in from_set to the characters in the same position in to_set
320: // for example:
321: // filename = translate(filename, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
322: // converts the filename to uppercase and returns the result (Note that the uppercase function does this more easily).
323: // if the from_set is longer than the to_set, then the overlap represents characters to delete (i.e. they map to nothing)
324: std::string translate(const std::string& input, const std::string& from_set, const std::string& to_set = std::string());
325:
326: ////////////////////////////////////////////////////////////////////////////////
327: // wildcard matching
328: ////////////////////////////////////////////////////////////////////////////////
329:
330: // this function does wildcard matching of the wildcard expression against the candidate std::string
331: // wildcards are NOT regular expressions
332: // the wildcard characters are * and ? where * matches 1 or more characters and ? matches only one
333: // there are also character sets [a-z] [qwertyuiop] etc. which match 1 character
334: // TODO: character sets like [:alpha:]
335: // TODO eventually: regular expression matching and substitution (3rd party library?)
336:
337: bool match_wildcard(const std::string& wild, const std::string& match);
338:
339: ////////////////////////////////////////////////////////////////////////////////
340: // Perl-inspired split/join functions
341: ////////////////////////////////////////////////////////////////////////////////
342:
343: // splits the string at every occurance of splitter and adds it as a separate string to the return value
344: // the splitter is removed
345: // a string with no splitter in it will give a single-vector string
346: // an empty string gives an empty vector
347: std::vector<std::string> split (const std::string& str, const std::string& splitter = "\n");
348:
349: // the reverse of the above
350: // joins the string vector to create a single string with the joiner inserted between the joins
351: // Note: the joiner will not be added at the beginning or the end
352: // However, there are optional fields to add such prefix and suffix strings
353: std::string join (const std::vector<std::string>&,
354: const std::string& joiner = "\n", const std::string& prefix = "", const std::string& suffix = "");
355:
356: ////////////////////////////////////////////////////////////////////////////////
357: // special displays
358:
359: // display the parameter as a number in bytes, kbytes, Mbytes, Gbytes depending on range
360: std::string display_bytes(long bytes);
361:
362: // display the parameter in seconds as a string representation in weeks, days, hours, minutes, seconds
363: // e.g. "1d 1:01:01" means 1 day, 1 hour, 1 minute and 1 second
364: std::string display_time(time_t seconds);
365:
366: ////////////////////////////////////////////////////////////////////////////////
367: #include "string_utilities.tpp"
368: #endif