string_utilities.hpp

    1: #ifndef STRING_UTILITIES_HPP
    2: #define STRING_UTILITIES_HPP
    3: /*------------------------------------------------------------------------------
    4: 
    5:   Author:    Andy Rushton
    6:   Copyright: (c) Andy Rushton, 2004
    7:   License:   BSD License, see ../docs/license.html
    8: 
    9:   Utilities for manipulating std::strings, missing from the STL or C++ libraries
   10: 
   11:   ------------------------------------------------------------------------------*/
   12: #include "os_fixes.hpp"
   13: #include "format_types.hpp"
   14: #include "textio.hpp"
   15: #include <string>
   16: #include <vector>
   17: #include <bitset>
   18: #include <list>
   19: #include <map>
   20: #include <set>
   21: #include <vector>
   22: #include <stdexcept>
   23: #include <time.h>
   24: 
   25: ////////////////////////////////////////////////////////////////////////////////
   26: // Conversions of Integer types to string
   27: ////////////////////////////////////////////////////////////////////////////////
   28: 
   29: // The radix (i.e. base) for these conversions can be any value from base 2 to base 36
   30: // specifying any other radix causes std::invalid_argument to be thrown
   31: 
   32: // The way in which the radix is displayed is defined in radix_types.hpp
   33: // If any other value is used, std::invalid_argument is thrown
   34: 
   35: // The width argument specifies the number of numerical digits to use in the result
   36: // This is a minimum - if the value requires more digits then it will be wider than the width argument
   37: // However, if it is smaller, then it will be extended to the specified width
   38: // Then, the radix display prefix is added to this width
   39: // For example, using the hash representation of 0 in hex with width=4 gives: 16#0000 - so there's 4 digits in the number part
   40: 
   41: std::string to_string(bool i,           unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   42:   throw(std::invalid_argument);
   43: 
   44: std::string to_string(short i,          unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   45:   throw(std::invalid_argument);
   46: 
   47: std::string to_string(unsigned short i, unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   48:   throw(std::invalid_argument);
   49: 
   50: std::string to_string(int i,            unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   51:   throw(std::invalid_argument);
   52: 
   53: std::string to_string(unsigned int i,   unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   54:   throw(std::invalid_argument);
   55: 
   56: std::string to_string(long i,           unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   57:   throw(std::invalid_argument);
   58: 
   59: std::string to_string(unsigned long i,  unsigned radix = 10, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   60:   throw(std::invalid_argument);
   61: 
   62: std::string to_string(const void*,      unsigned radix = 16, radix_display_t display = radix_c_style_or_hash, unsigned width = 0)
   63:   throw(std::invalid_argument);
   64: 
   65: ////////////////////////////////////////////////////////////////////////////////
   66: // convert a real type to string
   67: ////////////////////////////////////////////////////////////////////////////////
   68: 
   69: // Only decimal radix is supported
   70: 
   71: // The way in which the number is displayed is defined in radix_types.hpp
   72: // Using any other value for the display type causes std::invalid_argument to be thrown
   73: 
   74: std::string to_string(float f,  real_display_t display = display_mixed, unsigned width = 0, unsigned precision = 6)
   75:   throw(std::invalid_argument);
   76: std::string to_string(double f, real_display_t display = display_mixed, unsigned width = 0, unsigned precision = 6)
   77:   throw(std::invalid_argument);
   78: 
   79: ////////////////////////////////////////////////////////////////////////////////
   80: // Convert a string to string
   81: ////////////////////////////////////////////////////////////////////////////////
   82: 
   83: // this is necessary for completeness, e.g. for use in vector_to_string for vector<string>
   84: std::string to_string(const std::string& value);
   85: 
   86: // ditto for char*
   87: std::string to_string(const char* value);
   88: 
   89: ////////////////////////////////////////////////////////////////////////////////
   90: // convert a string to a simple type
   91: ////////////////////////////////////////////////////////////////////////////////
   92: 
   93: // Convert a string to an integer type
   94: // supports all the formats described above for the reverse conversion
   95: // If the radix is set to zero, the conversions deduce the radix from the string representation
   96: // So, 0b prefix is binary, 0 prefix is octal, 0x is hex and <base># prefix is my hash format
   97: // A non-zero radix should be used when the string value has no radix information and is non-decimal
   98: // e.g. the hex value FEDCBA has no indication that it is hex, so specify radix 16
   99: // The radix must be either zero as explained above, or in the range 2 to 16
  100: // Any other value will cause std::invalid_argument to be thrown
  101: 
  102: bool to_bool(const std::string& value, unsigned radix = 0)
  103:   throw(std::invalid_argument);
  104: 
  105: short to_short(const std::string& value, unsigned radix = 0)
  106:   throw(std::invalid_argument);
  107: 
  108: unsigned short to_ushort(const std::string& value, unsigned radix = 0)
  109:   throw(std::invalid_argument);
  110: 
  111: int to_int(const std::string& value, unsigned radix = 0)
  112:   throw(std::invalid_argument);
  113: 
  114: unsigned int to_uint(const std::string& value, unsigned radix = 0)
  115:   throw(std::invalid_argument);
  116: 
  117: long to_long(const std::string& value, unsigned radix = 0)
  118:   throw(std::invalid_argument);
  119: 
  120: unsigned long to_ulong(const std::string& value, unsigned radix = 0)
  121:   throw(std::invalid_argument);
  122: 
  123: void* to_void_star(const std::string& value, unsigned radix = 0)
  124:   throw(std::invalid_argument);
  125: 
  126: // Convert a floating-point type
  127: 
  128: float to_float(const std::string& value)
  129:   throw(std::invalid_argument);
  130: 
  131: double to_double(const std::string& value)
  132:   throw(std::invalid_argument);
  133: 
  134: ////////////////////////////////////////////////////////////////////////////////
  135: // template string conversions for pointers and STL containers
  136: ////////////////////////////////////////////////////////////////////////////////
  137: // Note: STLplus containers tend to have built-in string conversion functions consistent with these
  138: 
  139: template <typename T>
  140: std::string pointer_to_string(const T* value, const std::string& null_string, const std::string& prefix, const std::string& suffix);
  141: 
  142: template<size_t N>
  143: std::string bitset_to_string(const std::bitset<N>& data);
  144: 
  145: template<typename T>
  146: std::string list_to_string(const std::list<T>& values, const std::string& separator);
  147: 
  148: template<typename L, typename R>
  149: std::string pair_to_string(const std::pair<L,R>& values, const std::string& separator);
  150: 
  151: template<typename K, typename T, typename P>
  152: std::string map_to_string(const std::map<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
  153: 
  154: template<typename K, typename T, typename P>
  155: std::string multimap_to_string(const std::multimap<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
  156: 
  157: template<typename K, typename P>
  158: std::string set_to_string(const std::set<K,P>& values, const std::string& separator);
  159: 
  160: template<typename K, typename P>
  161: std::string multiset_to_string(const std::multiset<K,P>& values, const std::string& separator);
  162: 
  163: template<typename T>
  164: std::string vector_to_string(const std::vector<T>& values, const std::string& separator);
  165: 
  166: std::string to_string(const std::vector<bool>& values);
  167: 
  168: ////////////////////////////////////////////////////////////////////////////////
  169: // Print routines for basic types
  170: ////////////////////////////////////////////////////////////////////////////////
  171: 
  172: // The convention is to have a print(str,val) for printing in-line (i.e. the
  173: // value is on one line) and to have a print(str,val,indent) to print on a whole
  174: // line, with indent before and newline after.
  175: 
  176: // set the number of spaces to indent per indent step (i.e. the number of spaces = indent*indent_step)
  177: // default is built-in and set to 2
  178: void set_indent_step(unsigned step);
  179: unsigned indent_step(void);
  180: // utility for printing the indent, called from within the second form of print
  181: otext& print_indent(otext& str, unsigned indent);
  182: 
  183: // print routines for integer types
  184: // the arguments are as for the to_string
  185: 
  186: otext& print(otext& str, const bool& value,
  187:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  188:              unsigned width = 0)
  189:   throw(std::invalid_argument);
  190: 
  191: otext& print(otext& str, const short& value,
  192:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  193:              unsigned width = 0)
  194:   throw(std::invalid_argument);
  195: 
  196: otext& print(otext& str, const unsigned short& value,
  197:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  198:              unsigned width = 0)
  199:   throw(std::invalid_argument);
  200: 
  201: otext& print(otext& str, const int& value,
  202:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  203:              unsigned width = 0)
  204:   throw(std::invalid_argument);
  205: 
  206: otext& print(otext& str, const unsigned int& value,
  207:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  208:              unsigned width = 0)
  209:   throw(std::invalid_argument);
  210: 
  211: otext& print(otext& str, const long& value,
  212:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  213:              unsigned width = 0)
  214:   throw(std::invalid_argument);
  215: 
  216: otext& print(otext& str, const unsigned long& value,
  217:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  218:              unsigned width = 0)
  219:   throw(std::invalid_argument);
  220: 
  221: otext& print(otext& str, const void*& value,
  222:              unsigned radix = 10, radix_display_t display = radix_c_style_or_hash,
  223:              unsigned width = 0)
  224:   throw(std::invalid_argument);
  225: 
  226: // print routines for floating-point types
  227: 
  228: otext& print(otext& str, float f,
  229:              real_display_t display = display_mixed,
  230:              unsigned width = 0, unsigned precision = 6)
  231:   throw(std::invalid_argument);
  232: 
  233: otext& print(otext& str, double f,
  234:              real_display_t display = display_mixed,
  235:              unsigned width = 0, unsigned precision = 6)
  236:   throw(std::invalid_argument);
  237: 
  238: // print routines for string
  239: // this is needed for completeness, e.g. when calling print_vector on a vector of strings
  240: 
  241: otext& print(otext& str, const std::string& value);
  242: otext& print(otext& str, const std::string& value, unsigned indent);
  243: 
  244: ////////////////////////////////////////////////////////////////////////////////
  245: // template print routines for pointers and STL containers
  246: ////////////////////////////////////////////////////////////////////////////////
  247: // STLplus containers have these built-in
  248: 
  249: template <typename T>
  250: otext& print_pointer(otext& str, const T* value,
  251:                      const std::string& null_string, const std::string& prefix, const std::string& suffix);
  252: template <typename T>
  253: otext& print_pointer(otext& str, const T* value, unsigned indent,
  254:                      const std::string& null_string, const std::string& prefix, const std::string& suffix);
  255: 
  256: template<size_t N>
  257: otext& print_bitset(otext& str, const std::bitset<N>& value);
  258: template<size_t N>
  259: otext& print_bitset(otext& str, const std::bitset<N>& value, unsigned indent);
  260: 
  261: template<typename T>
  262: otext& print_list(otext& str, const std::list<T>& values, const std::string& separator);
  263: template<typename T>
  264: otext& print_list(otext& str, const std::list<T>& values, unsigned indent);
  265: 
  266: template<typename L, typename R>
  267: otext& print_pair(otext& str, const std::pair<L,R>& values, const std::string& separator);
  268: template<typename L, typename R>
  269: otext& print_pair(otext& str, const std::pair<L,R>& values, const std::string& separator, unsigned indent);
  270: 
  271: template<typename K, typename T, typename P>
  272: otext& print_map(otext& str, const std::map<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
  273: template<typename K, typename T, typename P>
  274: otext& print_map(otext& str, const std::map<K,T,P>& values, const std::string& pair_separator, unsigned indent);
  275: 
  276: template<typename K, typename T, typename P>
  277: otext& print_multimap(otext& str, const std::multimap<K,T,P>& values, const std::string& pair_separator, const std::string& separator);
  278: template<typename K, typename T, typename P>
  279: otext& print_multimap(otext& str, const std::multimap<K,T,P>& values, const std::string& pair_separator, unsigned indent);
  280: 
  281: template<typename K, typename P>
  282: otext& print_set(otext& str, const std::set<K,P>& values, const std::string& separator);
  283: template<typename K, typename P>
  284: otext& print_set(otext& str, const std::set<K,P>& values, unsigned indent);
  285: 
  286: template<typename K, typename P>
  287: otext& print_multiset(otext& str, const std::multiset<K,P>& values, const std::string& separator);
  288: template<typename K, typename P>
  289: otext& print_multiset(otext& str, const std::multiset<K,P>& values, unsigned indent);
  290: 
  291: template<typename T>
  292: otext& print_vector(otext& str, const std::vector<T>& values, const std::string& separator);
  293: template<typename T>
  294: otext& print_vector(otext& str, const std::vector<T>& values, unsigned indent);
  295: 
  296: otext& print(otext& str, const std::vector<bool>& values);
  297: 
  298: ////////////////////////////////////////////////////////////////////////////////
  299: // other string manipulations
  300: ////////////////////////////////////////////////////////////////////////////////
  301: 
  302: // Padding function allows a string to be printed in a fixed-width field
  303: 
  304: // The definitions for the alignment are declared in format_types.hpp
  305: // Any other value will cause std::invalid_argument to be thrown
  306: 
  307: std::string pad(const std::string& str, alignment_t alignment, unsigned width, char padch = ' ')
  308:   throw(std::invalid_argument);
  309: 
  310: // whitespace trimming
  311: std::string trim_left(const std::string& val);
  312: std::string trim_right(const std::string& val);
  313: std::string trim(const std::string& val);
  314: 
  315: // case conversion for std::strings
  316: std::string lowercase(const std::string& val);
  317: std::string uppercase(const std::string& val);
  318: 
  319: // convert characters represented in from_set to the characters in the same position in to_set
  320: // for example:
  321: //   filename = translate(filename, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  322: // converts the filename to uppercase and returns the result (Note that the uppercase function does this more easily).
  323: // if the from_set is longer than the to_set, then the overlap represents characters to delete (i.e. they map to nothing)
  324: std::string translate(const std::string& input, const std::string& from_set, const std::string& to_set = std::string());
  325: 
  326: ////////////////////////////////////////////////////////////////////////////////
  327: // wildcard matching
  328: ////////////////////////////////////////////////////////////////////////////////
  329: 
  330: // this function does wildcard matching of the wildcard expression against the candidate std::string
  331: // wildcards are NOT regular expressions
  332: // the wildcard characters are * and ? where * matches 1 or more characters and ? matches only one
  333: // there are also character sets [a-z] [qwertyuiop] etc. which match 1 character
  334: // TODO: character sets like [:alpha:]
  335: // TODO eventually: regular expression matching and substitution (3rd party library?)
  336: 
  337: bool match_wildcard(const std::string& wild, const std::string& match);
  338: 
  339: ////////////////////////////////////////////////////////////////////////////////
  340: // Perl-inspired split/join functions
  341: ////////////////////////////////////////////////////////////////////////////////
  342: 
  343: // splits the string at every occurance of splitter and adds it as a separate string to the return value
  344: // the splitter is removed
  345: // a string with no splitter in it will give a single-vector string
  346: // an empty string gives an empty vector
  347: std::vector<std::string> split (const std::string& str, const std::string& splitter = "\n");
  348: 
  349: // the reverse of the above
  350: // joins the string vector to create a single string with the joiner inserted between the joins
  351: // Note: the joiner will not be added at the beginning or the end
  352: // However, there are optional fields to add such prefix and suffix strings
  353: std::string join (const std::vector<std::string>&,
  354:                   const std::string& joiner = "\n", const std::string& prefix = "", const std::string& suffix = "");
  355: 
  356: ////////////////////////////////////////////////////////////////////////////////
  357: // special displays
  358: 
  359: // display the parameter as a number in bytes, kbytes, Mbytes, Gbytes depending on range
  360: std::string display_bytes(long bytes);
  361: 
  362: // display the parameter in seconds as a string representation in weeks, days, hours, minutes, seconds
  363: // e.g. "1d 1:01:01" means 1 day, 1 hour, 1 minute and 1 second
  364: std::string display_time(time_t seconds);
  365: 
  366: ////////////////////////////////////////////////////////////////////////////////
  367: #include "string_utilities.tpp"
  368: #endif