1 | // (C) Copyright Jeremy Siek 2004
|
---|
2 | // Distributed under the Boost Software License, Version 1.0. (See
|
---|
3 | // accompanying file LICENSE_1_0.txt or copy at
|
---|
4 | // http://www.boost.org/LICENSE_1_0.txt)
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * stringtok.hpp -- Breaks a string into tokens. This is an example for lib3.
|
---|
8 | *
|
---|
9 | * Template function looks like this:
|
---|
10 | *
|
---|
11 | * template <typename Container>
|
---|
12 | * void stringtok (Container &l,
|
---|
13 | * string const &s,
|
---|
14 | * char const * const ws = " \t\n");
|
---|
15 | *
|
---|
16 | * A nondestructive version of strtok() that handles its own memory and can
|
---|
17 | * be broken up by any character(s). Does all the work at once rather than
|
---|
18 | * in an invocation loop like strtok() requires.
|
---|
19 | *
|
---|
20 | * Container is any type that supports push_back(a_string), although using
|
---|
21 | * list<string> and deque<string> are indicated due to their O(1) push_back.
|
---|
22 | * (I prefer deque<> because op[]/at() is available as well.) The first
|
---|
23 | * parameter references an existing Container.
|
---|
24 | *
|
---|
25 | * s is the string to be tokenized. From the parameter declaration, it can
|
---|
26 | * be seen that s is not affected. Since references-to-const may refer to
|
---|
27 | * temporaries, you could use stringtok(some_container, readline("")) when
|
---|
28 | * using the GNU readline library.
|
---|
29 | *
|
---|
30 | * The final parameter is an array of characters that serve as whitespace.
|
---|
31 | * Whitespace characters default to one or more of tab, space, and newline,
|
---|
32 | * in any combination.
|
---|
33 | *
|
---|
34 | * 'l' need not be empty on entry. On return, 'l' will have the token
|
---|
35 | * strings appended.
|
---|
36 | *
|
---|
37 | *
|
---|
38 | * [Example:
|
---|
39 | * list<string> ls;
|
---|
40 | * stringtok (ls, " this \t is\t\n a test ");
|
---|
41 | * for (list<string>::const_iterator i = ls.begin();
|
---|
42 | * i != ls.end(); ++i)
|
---|
43 | * {
|
---|
44 | * cerr << ':' << (*i) << ":\n";
|
---|
45 | * }
|
---|
46 | *
|
---|
47 | * would print
|
---|
48 | * :this:
|
---|
49 | * :is:
|
---|
50 | * :a:
|
---|
51 | * :test:
|
---|
52 | * -end example]
|
---|
53 | *
|
---|
54 | * pedwards@jaj.com May 1999
|
---|
55 | */
|
---|
56 |
|
---|
57 |
|
---|
58 | #include <string>
|
---|
59 | #include <cstring> // for strchr
|
---|
60 |
|
---|
61 |
|
---|
62 | /*****************************************************************
|
---|
63 | * This is the only part of the implementation that I don't like.
|
---|
64 | * It can probably be improved upon by the reader...
|
---|
65 | */
|
---|
66 | namespace {
|
---|
67 | inline bool
|
---|
68 | isws (char c, char const * const wstr)
|
---|
69 | {
|
---|
70 | using namespace std;
|
---|
71 | return (strchr(wstr,c) != NULL);
|
---|
72 | }
|
---|
73 | }
|
---|
74 |
|
---|
75 | namespace boost {
|
---|
76 |
|
---|
77 | /*****************************************************************
|
---|
78 | * Simplistic and quite Standard, but a bit slow. This should be
|
---|
79 | * templatized on basic_string instead, or on a more generic StringT
|
---|
80 | * that just happens to support ::size_type, .substr(), and so on.
|
---|
81 | * I had hoped that "whitespace" would be a trait, but it isn't, so
|
---|
82 | * the user must supply it. Enh, this lets them break up strings on
|
---|
83 | * different things easier than traits would anyhow.
|
---|
84 | */
|
---|
85 | template <typename Container>
|
---|
86 | void
|
---|
87 | stringtok (Container &l, std::string const &s, char const * const ws = " \t\n")
|
---|
88 | {
|
---|
89 | typedef std::string::size_type size_type;
|
---|
90 | const size_type S = s.size();
|
---|
91 | size_type i = 0;
|
---|
92 |
|
---|
93 | while (i < S) {
|
---|
94 | // eat leading whitespace
|
---|
95 | while ((i < S) && (isws(s[i],ws))) ++i;
|
---|
96 | if (i == S) return; // nothing left but WS
|
---|
97 |
|
---|
98 | // find end of word
|
---|
99 | size_type j = i+1;
|
---|
100 | while ((j < S) && (!isws(s[j],ws))) ++j;
|
---|
101 |
|
---|
102 | // add word
|
---|
103 | l.push_back(s.substr(i,j-i));
|
---|
104 |
|
---|
105 | // set up for next loop
|
---|
106 | i = j+1;
|
---|
107 | }
|
---|
108 | }
|
---|
109 |
|
---|
110 |
|
---|
111 | } // namespace boost
|
---|