source: OGRE/trunk/ogre_dependencies/Dependencies/include/CEGUI/CEGUIString.h @ 692

Revision 692, 148.4 KB checked in by mattausch, 19 years ago (diff)

adding ogre 1.2 and dependencies

Line 
1/************************************************************************
2        filename:       CEGUIString.h
3        created:        26/2/2004
4        author:         Paul D Turner
5       
6        purpose:        Defines string class used within the GUI system.
7*************************************************************************/
8/*************************************************************************
9    Crazy Eddie's GUI System (http://www.cegui.org.uk)
10    Copyright (C)2004 - 2005 Paul D Turner (paul@cegui.org.uk)
11
12    This library is free software; you can redistribute it and/or
13    modify it under the terms of the GNU Lesser General Public
14    License as published by the Free Software Foundation; either
15    version 2.1 of the License, or (at your option) any later version.
16
17    This library is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    Lesser General Public License for more details.
21
22    You should have received a copy of the GNU Lesser General Public
23    License along with this library; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25*************************************************************************/
26#ifndef _CEGUIString_h_
27#define _CEGUIString_h_
28
29#include "CEGUIBase.h"
30#include <string>
31#include <string.h>
32#include <stdexcept>
33
34// Start of CEGUI namespace section
35namespace CEGUI
36{
37#define STR_QUICKBUFF_SIZE      32
38        /*************************************************************************
39                Basic Types
40        *************************************************************************/
41        typedef         uint8   utf8;
42        //typedef               uint16  utf16;  // removed typedef to prevent usage, as utf16 is not supported (yet)
43        typedef         uint32  utf32;
44
45/*!
46\brief
47        String class used within the GUI system.
48
49        For the most part, this class can replace std::string in basic usage.  However, currently String does not use the
50        current locale, and also comparisons do not take into account the Unicode data tables, so are not 'correct'
51        as such.
52*/
53class CEGUIEXPORT String
54{
55public:
56        /*************************************************************************
57                Integral Types
58        *************************************************************************/
59        typedef         utf32                   value_type;                                     //!< Basic 'code point' type used for String (utf32)
60        typedef         size_t                  size_type;                                      //!< Unsigned type used for size values and indices
61        typedef         ptrdiff_t               difference_type;                        //!< Signed type used for differences
62        typedef         utf32&                  reference;                                      //!< Type used for utf32 code point references
63        typedef         const utf32&    const_reference;                        //!< Type used for constant utf32 code point references
64        typedef         utf32*                  pointer;                                        //!< Type used for utf32 code point pointers
65        typedef         const utf32*    const_pointer;                          //!< Type used for constant utf32 code point pointers
66
67        static const size_type          npos;                                           //!< Value used to represent 'not found' conditions and 'all code points' etc.
68
69private:
70        /*************************************************************************
71                Implementation data
72        *************************************************************************/
73        size_type       d_cplength;                     //!< holds length of string in code points (not including null termination)
74        size_type       d_reserve;                      //!< code point reserve size (currently allocated buffer size in code points).
75
76        mutable utf8*           d_encodedbuff;          //!< holds string data encoded as utf8 (generated only by calls to c_str() and data())
77        mutable size_type       d_encodeddatlen;        //!< holds length of encoded data (in case it's smaller than buffer).
78        mutable size_type       d_encodedbufflen;       //!< length of above buffer (since buffer can be bigger then the data it holds to save re-allocations).
79
80        utf32           d_quickbuff[STR_QUICKBUFF_SIZE];        //!< This is a integrated 'quick' buffer to save allocations for smallish strings
81        utf32*          d_buffer;                                                       //!< Pointer the the main buffer memory.  This is only valid when quick-buffer is not being used
82
83public:
84        /*************************************************************************
85                Iterator Classes
86        *************************************************************************/
87        /*!
88        \brief
89                Constant forward iterator class for String objects
90        */
91#if defined(_MSC_VER) && (_MSC_VER <= 1200)
92        class const_iterator : public std::iterator<std::random_access_iterator_tag, utf32>
93#else
94        class const_iterator : public std::iterator<std::random_access_iterator_tag, utf32, std::ptrdiff_t, const utf32*, const utf32&>
95#endif
96        {
97
98        public:
99                //////////////////////////////////////////////////////////////////////////
100                // data
101                //////////////////////////////////////////////////////////////////////////
102                const utf32*    d_ptr;
103
104
105                //////////////////////////////////////////////////////////////////////////
106                // Methods
107                //////////////////////////////////////////////////////////////////////////
108                const_iterator(void)
109                {
110                        d_ptr = 0;
111                }
112                const_iterator(const_pointer ptr)
113                {
114                        d_ptr = ptr;
115                }
116
117                const_reference operator*() const
118                {
119                        return *d_ptr;
120                }
121
122#if defined(_MSC_VER) && (_MSC_VER <= 1200)
123#       pragma warning (push)
124#       pragma warning (disable : 4284)
125#endif
126                const_pointer   operator->() const             
127                {
128                        return &**this;
129                }
130
131#if defined(_MSC_VER) && (_MSC_VER <= 1200)
132#       pragma warning (pop)
133#endif
134
135                const_iterator& operator++()
136                {
137                        ++d_ptr;
138                        return *this;
139                }
140
141                const_iterator  operator++(int)         
142                {
143                        const_iterator temp = *this;
144                        ++*this;
145                        return temp;
146                }
147
148                const_iterator& operator--()
149                {
150                        --d_ptr;
151                        return *this;
152                }
153
154                const_iterator  operator--(int)         
155                {
156                        const_iterator temp = *this;
157                        --*this;
158                        return temp;
159                }
160
161                const_iterator& operator+=(difference_type offset)
162                {
163                        d_ptr += offset;
164                        return *this;
165                }
166
167                const_iterator operator+(difference_type offset) const
168                {
169                        const_iterator temp = *this;
170                        return temp += offset;
171                }
172
173                const_iterator& operator-=(difference_type offset)
174                {
175                        return *this += -offset;
176                }
177
178                const_iterator operator-(difference_type offset) const
179                {
180                        const_iterator temp = *this;
181                        return temp -= offset;
182                }
183
184                difference_type operator-(const const_iterator& iter) const
185                {
186                        return d_ptr - iter.d_ptr;
187                }
188
189                const_reference operator[](difference_type offset) const
190                {
191                        return *(*this + offset);
192                }
193
194                bool operator==(const const_iterator& iter) const
195                {
196                        return d_ptr == iter.d_ptr;
197                }
198
199                bool operator!=(const const_iterator& iter) const
200                {
201                        return !(*this == iter);
202                }
203
204                bool operator<(const const_iterator& iter) const
205                {
206                        return d_ptr < iter.d_ptr;
207                }
208
209                bool operator>(const const_iterator& iter) const
210                {
211                        return (!(iter < *this));
212                }
213
214                bool operator<=(const const_iterator& iter) const
215                {
216                        return (!(iter < *this));
217                }
218
219                bool operator>=(const const_iterator& iter) const
220                {
221                        return (!(*this < iter));
222                }
223
224                friend const_iterator operator+(difference_type offset, const const_iterator& iter)
225                {
226                        return iter + offset;
227                }
228
229        };
230
231        /*!
232        \brief
233                Forward iterator class for String objects
234        */
235        class iterator : public const_iterator
236        {
237        public:
238                iterator(void) {}
239                iterator(pointer ptr) : const_iterator(ptr) {}
240
241
242                reference operator*() const
243                {
244                        return ((reference)**(const_iterator *)this);
245                }
246
247#if defined(_MSC_VER) && (_MSC_VER <= 1200)
248#       pragma warning (push)
249#       pragma warning (disable : 4284)
250#endif
251
252                pointer operator->() const
253                {
254                        return &**this;
255                }
256
257#if defined(_MSC_VER) && (_MSC_VER <= 1200)
258#       pragma warning (pop)
259#endif
260
261                iterator& operator++()
262                {
263                        ++this->d_ptr;
264                        return *this;
265                }
266
267                iterator operator++(int)
268                {
269                        iterator temp = *this;
270                        ++*this;
271                        return temp;
272                }
273
274                iterator& operator--()
275                {
276                        --this->d_ptr;
277                        return *this;
278                }
279
280                iterator operator--(int)
281                {
282                        iterator temp = *this;
283                        --*this;
284                        return temp;
285                }
286
287                iterator& operator+=(difference_type offset)
288                {
289                        this->d_ptr += offset;
290                        return *this;
291                }
292
293                iterator operator+(difference_type offset) const
294                {
295                        iterator temp = *this;
296                        return temp + offset;
297                }
298
299                iterator& operator-=(difference_type offset)
300                {
301                        return *this += -offset;
302                }
303
304                iterator operator-(difference_type offset) const
305                {
306                        iterator temp = *this;
307                        return temp -= offset;
308                }
309
310                difference_type operator-(const const_iterator& iter) const
311                {
312                        return ((const_iterator)*this - iter);
313                }
314
315                reference operator[](difference_type offset) const
316                {
317                        return *(*this + offset);
318                }
319
320                friend iterator operator+(difference_type offset, const iterator& iter)
321                {
322                        return iter + offset;
323                }
324
325        };
326
327        /*!
328        \brief
329                Constant reverse iterator class for String objects
330        */
331#if defined(_MSC_VER) && ((_MSC_VER <= 1200) || ((_MSC_VER <= 1300) && defined(_STLPORT_VERSION)))
332        typedef std::reverse_iterator<const_iterator, const_pointer, const_reference, difference_type>  const_reverse_iterator;
333#else
334        typedef std::reverse_iterator<const_iterator>   const_reverse_iterator;
335#endif
336
337        /*!
338        \brief
339                Reverse iterator class for String objects
340        */
341#if defined(_MSC_VER) && ((_MSC_VER <= 1200) || ((_MSC_VER <= 1300) && defined(_STLPORT_VERSION)))
342        typedef std::reverse_iterator<iterator, pointer, reference, difference_type>                    reverse_iterator;
343#else
344        typedef std::reverse_iterator<iterator>                 reverse_iterator;
345#endif
346
347public:
348        //////////////////////////////////////////////////////////////////////////
349        // Default Construction and Destructor
350        //////////////////////////////////////////////////////////////////////////
351        /*!
352        \brief
353                Constructs an empty string
354        */
355        String(void)
356        {
357                init();
358        }
359
360        /*!
361        \brief
362                Destructor for String objects
363        */
364        ~String(void);
365
366        //////////////////////////////////////////////////////////////////////////
367        // Construction via CEGUI::String
368        //////////////////////////////////////////////////////////////////////////
369        /*!
370        \brief
371                Copy constructor - Creates a new string with the same value as \a str
372
373        \param str
374                String object used to initialise the newly created string
375
376        \return
377                Nothing
378        */
379        String(const String& str)
380        {
381                init();
382                assign(str);
383        }
384       
385
386        /*!
387        \brief
388                Constructs a new string initialised with code points from another String object.
389
390        \param str
391                String object used to initialise the newly created string
392
393        \param str_idx
394                Starting code-point of \a str to be used when initialising the new String
395
396        \param str_num
397                Maximum number of code points from \a str that are to be assigned to the new String
398
399        \return
400                Nothing
401        */
402        String(const String& str, size_type str_idx, size_type str_num = npos)
403        {
404                init();
405                assign(str, str_idx, str_num);
406        }
407
408        //////////////////////////////////////////////////////////////////////////
409        // Construction via std::string
410        //////////////////////////////////////////////////////////////////////////
411        /*!
412        \brief
413                Constructs a new string and initialises it using the std::string std_str
414
415        \param std_str
416                The std::string object that is to be used to initialise the new String object.
417
418        \note
419                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
420                the provided data will occur.
421
422        \return
423                Nothing
424
425        \exception std::length_error    Thrown if resulting String object would be too big.
426        */
427        String(const std::string& std_str)
428        {
429                init();
430                assign(std_str);
431        }
432               
433        /*!
434        \brief
435                Constructs a new string initialised with characters from the given std::string object.
436
437        \param std_str
438                std::string object used to initialise the newly created string
439
440        \param str_idx
441                Starting character of \a std_str to be used when initialising the new String
442
443        \note
444                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
445                the provided data will occur.
446
447        \param str_num
448                Maximum number of characters from \a std_str that are to be assigned to the new String
449
450        \return
451                Nothing
452
453        \exception std::length_error    Thrown if resulting String object would be too big.
454        */
455        String(const std::string& std_str, size_type str_idx, size_type str_num = npos)
456        {
457                init();
458                assign(std_str, str_idx, str_num);
459        }
460
461
462        //////////////////////////////////////////////////////////////////////////
463        // Construction via UTF-8 stream (for straight ASCII use, only codes 0x00 - 0x7f are valid)
464        //////////////////////////////////////////////////////////////////////////
465        /*!
466        \brief
467                Constructs a new String object and initialise it using the provided utf8 encoded string buffer.
468
469        \param utf8_str
470                Pointer to a buffer containing a null-terminated Unicode string encoded as utf8 data.
471
472        \note
473                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
474                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
475                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
476                results.
477
478        \return
479                Nothing
480
481        \exception std::length_error    Thrown if resulting String object would be too big.
482        */
483        String(const utf8* utf8_str)
484        {
485                init();
486                assign(utf8_str);
487        }
488
489        /*!
490        \brief
491                Constructs a new String object and initialise it using the provided utf8 encoded string buffer.
492
493                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
494                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
495                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
496                results.
497
498        \param utf8_str
499                Pointer to a buffer containing Unicode string data encoded as utf8.
500
501        \note
502                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
503                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
504                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
505                results.
506
507        \param chars_len
508                Length of the provided utf8 string in code units (not code-points).
509
510        \return
511                Nothing
512
513        \exception std::length_error    Thrown if resulting String object would be too big.
514        */
515        String(const utf8* utf8_str, size_type chars_len)
516        {
517                init();
518                assign(utf8_str, chars_len);
519        }
520
521        //////////////////////////////////////////////////////////////////////////
522        // Construction via code-point (using a UTF-32 code unit)
523        //////////////////////////////////////////////////////////////////////////
524        /*!
525        \brief
526                Constructs a new String that is initialised with the specified code point
527
528        \param num
529                The number of times \a code_point is to be put into new String object
530
531        \param code_point
532                The Unicode code point to be used when initialising the String object
533
534        \return
535                Nothing
536
537        \exception std::length_error    Thrown if resulting String object would be too big.
538        */
539        String(size_type num, utf32 code_point)
540        {
541                init();
542                assign(num, code_point);
543        }
544
545        //////////////////////////////////////////////////////////////////////////
546        // Construction via iterator
547        //////////////////////////////////////////////////////////////////////////
548        // Create string with characters in the range [beg, end)
549        /*!
550        \brief
551                Construct a new string object and initialise it with code-points from the range [beg, end).
552
553        \param beg
554                Iterator describing the start of the data to be used when initialising the String object
555
556        \param end
557                Iterator describing the (exclusive) end of the data to be used when initialising the String object
558
559        \return
560                Nothing
561        */
562        String(const_iterator iter_beg, const_iterator iter_end)
563        {
564                init();
565                append(iter_beg, iter_end);
566        }
567
568
569        //////////////////////////////////////////////////////////////////////////
570        // Construction via c-string
571        //////////////////////////////////////////////////////////////////////////
572        /*!
573        \brief
574                Constructs a new String object and initialise it using the provided c-string.
575
576        \param c_str
577                Pointer to a c-string.
578
579        \return
580                Nothing
581
582        \exception std::length_error    Thrown if resulting String object would be too big.
583        */
584        String(const char* cstr)
585        {
586                init();
587                assign(cstr);
588        }
589
590        /*!
591        \brief
592                Constructs a new String object and initialise it using characters from the provided char array.
593
594        \param chars
595                char array.
596
597        \param chars_len
598                Number of chars from the array to be used.
599
600        \return
601                Nothing
602
603        \exception std::length_error    Thrown if resulting String object would be too big.
604        */
605        String(const char* chars, size_type chars_len)
606        {
607                init();
608                assign(chars, chars_len);
609        }
610
611
612        //////////////////////////////////////////////////////////////////////////
613        // Size operations
614        //////////////////////////////////////////////////////////////////////////
615        /*!
616        \brief
617                Returns the size of the String in code points
618
619        \return
620                Number of code points currently in the String
621        */
622        size_type       size(void) const
623        {
624                return d_cplength;
625        }
626
627        /*!
628        \brief
629                Returns the size of the String in code points
630
631        \return
632                Number of code points currently in the String
633        */
634        size_type       length(void) const
635        {
636                return d_cplength;
637        }
638
639        /*!
640        \brief
641                Returns true if the String is empty
642
643        \return
644                true if the String is empty, else false.
645        */
646        bool    empty(void) const
647        {
648                return  (d_cplength == 0);
649        }
650
651        /*!
652        \brief
653                Returns the maximum size of a String.
654
655                Any operation that would result in a String that is larger than this value will throw the std::length_error exception.
656
657        \return
658                The maximum number of code points that a string can contain
659        */
660        static size_type        max_size(void)
661        {
662                return (((size_type)-1) / sizeof(utf32));
663        }
664
665        //////////////////////////////////////////////////////////////////////////
666        // Capacity Operations
667        //////////////////////////////////////////////////////////////////////////
668        // return the number of code points the string could hold without re-allocation
669        // (due to internal encoding this will always report the figure for worst-case encoding, and could even be < size()!)
670        /*!
671        \brief
672                Return the number of code points that the String could hold before a re-allocation would be required.
673
674        \return
675                Size of the current reserve buffer.  This is the maximum number of code points the String could hold before a buffer
676                re-allocation would be required
677        */
678        size_type capacity(void) const
679        {
680                return d_reserve - 1;
681        }
682
683        // reserve internal memory for at-least 'num' code-points (characters).  if num is 0, request is shrink-to-fit.
684        /*!
685        \brief
686                Specifies the amount of reserve capacity to allocate.
687
688        \param num
689                The number of code points to allocate space for.  If \a num is larger that the current reserve, then a re-allocation will occur.  If
690                \a num is smaller than the current reserve (but not 0) the buffer may be shrunk to the larger of the specified number, or the current
691                String size (operation is currently not implemented).  If \a num is 0, then the buffer is re-allocated to fit the current String size.
692
693        \return
694                Nothing
695
696        \exception std::length_error    Thrown if resulting String object would be too big.
697        */
698        void    reserve(size_type num = 0)
699        {
700                if (num == 0)
701                        trim();
702                else
703                        grow(num);
704        }
705
706        //////////////////////////////////////////////////////////////////////////
707        // Comparisons
708        //////////////////////////////////////////////////////////////////////////
709        /*!
710        \brief
711                Compares this String with the String 'str'.
712
713        \note
714                This does currently not properly consider Unicode and / or the system locale.
715
716        \param str
717                The String object that is to compared with this String.
718
719        \return
720                -  0 if the String objects are equal
721                - <0 if this String is lexicographically smaller than \a str
722                - >0 if this String is lexicographically greater than \a str
723        */
724        int             compare(const String& str) const
725        {
726                return compare(0, d_cplength, str);
727        }
728
729        /*!
730        \brief
731                Compares code points from this String with code points from the String 'str'.
732
733        \note
734                This does currently not properly consider Unicode and / or the system locale.
735
736        \param idx
737                Index of the first code point from this String to consider.
738
739        \param len
740                Maximum number of code points from this String to consider.
741
742        \param str
743                The String object that is to compared with this String.
744
745        \param str_idx
746                Index of the first code point from String \a str to consider.
747
748        \param str_len
749                Maximum number of code points from String \a str to consider
750
751        \return
752                -  0 if the specified sub-strings are equal
753                - <0 if specified sub-strings are lexicographically smaller than \a str
754                - >0 if specified sub-strings are lexicographically greater than \a str
755
756        \exception std::out_of_range    Thrown if either \a idx or \a str_idx are invalid.
757        */
758        int             compare(size_type idx, size_type len, const String& str, size_type str_idx = 0, size_type str_len = npos) const
759        {
760                if ((d_cplength < idx) || (str.d_cplength < str_idx))
761                        throw std::out_of_range("Index is out of range for CEGUI::String");
762
763                if ((len == npos) || (idx + len > d_cplength))
764                        len = d_cplength - idx;
765
766                if ((str_len == npos) || (str_idx + str_len > str.d_cplength))
767                        str_len = str.d_cplength - str_idx;
768
769                int val = (len == 0) ? 0 : utf32_comp_utf32(&ptr()[idx], &str.ptr()[str_idx], (len < str_len) ? len : str_len);
770
771                return (val != 0) ? ((val < 0) ? -1 : 1) : (len < str_len) ? -1 : (len == str_len) ? 0 : 1;
772        }
773
774
775        /*!
776        \brief
777                Compares this String with the std::string 'std_str'.
778
779        \note
780                This does currently not properly consider Unicode and / or the system locale.
781
782        \param std_str
783                The std::string object that is to compared with this String.
784
785        \note
786                Characters from \a std_str are considered to represent Unicode code points in the range 0x00..0xFF.  No translation of
787                the encountered data is performed.
788
789        \return
790                -  0 if the string objects are equal
791                - <0 if this string is lexicographically smaller than \a std_str
792                - >0 if this string is lexicographically greater than \a std_str
793        */
794        int             compare(const std::string& std_str) const
795        {
796                return compare(0, d_cplength, std_str);
797        }
798
799
800        /*!
801        \brief
802                Compares code points from this String with code points from the std::string 'std_str'.
803
804        \note
805                This does currently not properly consider Unicode and / or the system locale.
806
807        \param idx
808                Index of the first code point from this String to consider.
809
810        \param len
811                Maximum number of code points from this String to consider.
812
813        \param std_str
814                The std::string object that is to compared with this String.
815
816        \note
817                Characters from \a std_str are considered to represent Unicode code points in the range 0x00..0xFF.  No translation of
818                the encountered data is performed.
819
820        \param str_idx
821                Index of the first character from std::string \a std_str to consider.
822
823        \param str_len
824                Maximum number of characters from std::string \a std_str to consider
825
826        \return
827                -  0 if the specified sub-strings are equal
828                - <0 if specified sub-strings are lexicographically smaller than \a std_str
829                - >0 if specified sub-strings are lexicographically greater than \a std_str
830
831        \exception std::out_of_range    Thrown if either \a idx or \a str_idx are invalid.
832        */
833        int             compare(size_type idx, size_type len, const std::string& std_str, size_type str_idx = 0, size_type str_len = npos) const
834        {
835                if (d_cplength < idx)
836                        throw std::out_of_range("Index is out of range for CEGUI::String");
837
838                if (std_str.size() < str_idx)
839                        throw std::out_of_range("Index is out of range for std::string");
840
841                if ((len == npos) || (idx + len > d_cplength))
842                        len = d_cplength - idx;
843
844                if ((str_len == npos) || (str_idx + str_len > std_str.size()))
845                        str_len = (size_type)std_str.size() - str_idx;
846
847                int val = (len == 0) ? 0 : utf32_comp_char(&ptr()[idx], &std_str.c_str()[str_idx], (len < str_len) ? len : str_len);
848
849                return (val != 0) ? ((val < 0) ? -1 : 1) : (len < str_len) ? -1 : (len == str_len) ? 0 : 1;
850        }
851
852
853        /*!
854        \brief
855                Compares this String with the null-terminated utf8 encoded 'utf8_str'.
856
857        \note
858                This does currently not properly consider Unicode and / or the system locale.
859
860        \param utf8_str
861                The buffer containing valid Unicode data encoded as utf8 that is to compared with this String.
862
863        \note
864                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
865                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
866                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
867                results.
868
869        \return
870                -  0 if the strings are equal
871                - <0 if this string is lexicographically smaller than \a utf8_str
872                - >0 if this string is lexicographically greater than \a utf8_str
873        */
874        int             compare(const utf8* utf8_str) const
875        {
876                return compare(0, d_cplength, utf8_str, encoded_size(utf8_str));
877        }
878
879
880        /*!
881        \brief
882                Compares code points from this String with the null-terminated utf8 encoded 'utf8_str'.
883
884        \note
885                This does currently not properly consider Unicode and / or the system locale.
886
887        \param idx
888                Index of the first code point from this String to consider.
889
890        \param len
891                Maximum number of code points from this String to consider.
892
893        \param utf8_str
894                The buffer containing valid Unicode data encoded as utf8 that is to compared with this String.
895
896        \note
897                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
898                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
899                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
900                results.
901
902        \return
903                -  0 if the specified sub-strings are equal
904                - <0 if specified sub-strings are lexicographically smaller than \a utf8_str
905                - >0 if specified sub-strings are lexicographically greater than \a utf8_str
906
907        \exception std::out_of_range    Thrown if \a idx is invalid.
908        */
909        int             compare(size_type idx, size_type len, const utf8* utf8_str) const
910        {
911                return compare(idx, len, utf8_str, encoded_size(utf8_str));
912        }
913
914        /*!
915        \brief
916                Compares code points from this String with the utf8 encoded data in buffer 'utf8_str'.
917
918        \note
919                This does currently not properly consider Unicode and / or the system locale.
920
921        \param idx
922                Index of the first code point from this String to consider.
923
924        \param len
925                Maximum number of code points from this String to consider.
926
927        \param utf8_str
928                The buffer containing valid Unicode data encoded as utf8 that is to compared with this String.
929
930        \note
931                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
932                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
933                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
934                results.
935
936        \param str_cplen
937                The number of encoded code points in the buffer \a utf8_str (this is not the same as the number of code units).
938
939        \return
940                -  0 if the specified sub-strings are equal
941                - <0 if specified sub-strings are lexicographically smaller than \a utf8_str
942                - >0 if specified sub-strings are lexicographically greater than \a utf8_str
943
944        \exception std::out_of_range    Thrown if \a idx is invalid.
945        \exception std::length_error    Thrown if \a str_cplen is set to npos.
946        */
947        int             compare(size_type idx, size_type len, const utf8* utf8_str, size_type str_cplen) const
948        {
949                if (d_cplength < idx)
950                        throw std::out_of_range("Index is out of range for CEGUI::String");
951
952                if (str_cplen == npos)
953                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
954
955                if ((len == npos) || (idx + len > d_cplength))
956                        len = d_cplength - idx;
957
958                int val = (len == 0) ? 0 : utf32_comp_utf8(&ptr()[idx], utf8_str, (len < str_cplen) ? len : str_cplen);
959
960                return (val != 0) ? ((val < 0) ? -1 : 1) : (len < str_cplen) ? -1 : (len == str_cplen) ? 0 : 1;
961        }
962
963
964        /*!
965        \brief
966                Compares this String with the given c-string.
967
968        \note
969                This does currently not properly consider Unicode and / or the system locale.
970
971        \param c_str
972                The c-string that is to compared with this String.
973
974        \return
975                -  0 if the strings are equal
976                - <0 if this string is lexicographically smaller than \a c_str
977                - >0 if this string is lexicographically greater than \a c_str
978        */
979        int             compare(const char* cstr) const
980        {
981                return compare(0, d_cplength, cstr, strlen(cstr));
982        }
983
984
985        /*!
986        \brief
987                Compares code points from this String with the given c-string.
988
989        \note
990                This does currently not properly consider Unicode and / or the system locale.
991
992        \param idx
993                Index of the first code point from this String to consider.
994
995        \param len
996                Maximum number of code points from this String to consider.
997
998        \param c_str
999                The c-string that is to compared with this String.
1000
1001        \return
1002                -  0 if the specified sub-strings are equal
1003                - <0 if specified sub-strings are lexicographically smaller than \a c_str
1004                - >0 if specified sub-strings are lexicographically greater than \a c_str
1005
1006        \exception std::out_of_range    Thrown if \a idx is invalid.
1007        */
1008        int             compare(size_type idx, size_type len, const char* cstr) const
1009        {
1010                return compare(idx, len, cstr, strlen(cstr));
1011        }
1012
1013
1014        /*!
1015        \brief
1016                Compares code points from this String with chars in the given char array.
1017
1018        \note
1019                This does currently not properly consider Unicode and / or the system locale.
1020
1021        \param idx
1022                Index of the first code point from this String to consider.
1023
1024        \param len
1025                Maximum number of code points from this String to consider.
1026
1027        \param chars
1028                The array containing the chars that are to compared with this String.
1029
1030        \param chars_len
1031                The number of chars in the array.
1032
1033        \return
1034                -  0 if the specified sub-strings are equal
1035                - <0 if specified sub-strings are lexicographically smaller than \a chars
1036                - >0 if specified sub-strings are lexicographically greater than \a chars
1037
1038        \exception std::out_of_range    Thrown if \a idx is invalid.
1039        \exception std::length_error    Thrown if \a chars_len is set to npos.
1040        */
1041        int             compare(size_type idx, size_type len, const char* chars, size_type chars_len) const
1042        {
1043                if (d_cplength < idx)
1044                        throw std::out_of_range("Index is out of range for CEGUI::String");
1045
1046                if (chars_len == npos)
1047                        throw std::length_error("Length for char array can not be 'npos'");
1048
1049                if ((len == npos) || (idx + len > d_cplength))
1050                        len = d_cplength - idx;
1051
1052                int val = (len == 0) ? 0 : utf32_comp_char(&ptr()[idx], chars, (len < chars_len) ? len : chars_len);
1053
1054                return (val != 0) ? ((val < 0) ? -1 : 1) : (len < chars_len) ? -1 : (len == chars_len) ? 0 : 1;
1055        }
1056
1057
1058        //////////////////////////////////////////////////////////////////////////
1059        // Character access
1060        //////////////////////////////////////////////////////////////////////////
1061        /*!
1062        \brief
1063                Returns the code point at the given index.
1064
1065        \param idx
1066                Zero based index of the code point to be returned.
1067
1068        \note
1069                - For constant strings length()/size() provide a valid index and will access the default utf32 value.
1070                - For non-constant strings length()/size() is an invalid index, and acceesing (especially writing) this index could cause string corruption.
1071
1072        \return
1073                The utf32 code point at the given index within the String.
1074        */
1075        reference       operator[](size_type idx)
1076        {
1077                return (ptr()[idx]);
1078        }
1079
1080        /*!
1081        \brief
1082                Returns the code point at the given index.
1083
1084        \param idx
1085                Zero based index of the code point to be returned.
1086
1087        \note
1088                - For constant strings length()/size() provide a valid index and will access the default utf32 value.
1089                - For non-constant strings length()/size() is an invalid index, and acceesing (especially writing) this index could cause string corruption.
1090
1091        \return
1092                The utf32 code point at the given index within the String.
1093        */
1094        value_type      operator[](size_type idx) const
1095        {
1096                return ptr()[idx];
1097        }
1098
1099        /*!
1100        \brief
1101                Returns the code point at the given index.
1102
1103        \param idx
1104                Zero based index of the code point to be returned.
1105
1106        \return
1107                The utf32 code point at the given index within the String.
1108
1109        \exception std::out_of_range    Thrown if \a idx is >= length().
1110        */
1111        reference       at(size_type idx)
1112        {
1113                if (d_cplength <= idx)
1114                        throw std::out_of_range("Index is out of range for CEGUI::String");
1115
1116                return ptr()[idx];
1117        }
1118
1119        /*!
1120        \brief
1121                Returns the code point at the given index.
1122
1123        \param idx
1124                Zero based index of the code point to be returned.
1125
1126        \return
1127                The utf32 code point at the given index within the String.
1128
1129        \exception std::out_of_range    Thrown if \a idx is >= length().
1130        */
1131        const_reference at(size_type idx) const
1132        {
1133                if (d_cplength <= idx)
1134                        throw std::out_of_range("Index is out of range for CEGUI::String");
1135
1136                return ptr()[idx];
1137        }
1138
1139
1140        //////////////////////////////////////////////////////////////////////////
1141        // C-Strings and arrays
1142        //////////////////////////////////////////////////////////////////////////
1143        /*!
1144        \brief
1145                Returns contents of the String as a null terminated string of utf8 encoded data.
1146
1147        \return
1148                Pointer to a char buffer containing the contents of the String encoded as null-terminated utf8 data.
1149
1150        \note
1151                The buffer returned from this function is owned by the String object.
1152
1153        \note
1154                Any function that modifies the String data will invalidate the buffer returned by this call.
1155        */
1156        const char* c_str(void) const
1157        {
1158                return (const char*)build_utf8_buff();
1159        }
1160
1161        /*!
1162        \brief
1163                Returns contents of the String as utf8 encoded data.
1164
1165        \return
1166                Pointer to a buffer containing the contents of the String encoded utf8 data.
1167
1168        \note
1169                The buffer returned from this function is owned by the String object.
1170
1171        \note
1172                Any function that modifies the String data will invalidate the buffer returned by this call.
1173        */
1174        const utf8* data(void) const
1175        {
1176                return build_utf8_buff();
1177        }
1178
1179        // copy, at most, 'len' code-points of the string, begining with code-point 'idx', into the array 'buf' as valid utf8 encoded data
1180        // return number of utf8 code units placed into the buffer
1181        /*!
1182        \brief
1183                Copies an area of the String into the provided buffer as encoded utf8 data.
1184
1185        \param buf
1186                Pointer to a buffer that is to receive the encoded data (this must be big enough to hold the encoded data)
1187
1188        \param len
1189                Maximum number of code points from the String that should be encoded into the buffer
1190       
1191        \param idx
1192                Index of the first code point to be encoded into the buffer
1193
1194        \return
1195                The number of utf8 encoded code units transferred to the buffer.
1196
1197                \note A code unit does not equal a code point.  A utf32 code point, when encoded as utf8, can occupy between 1 and 4 code units.
1198
1199        \exception std::out_of_range    Thrown if \a idx was invalid for this String.
1200        */
1201        size_type       copy(utf8* buf, size_type len = npos, size_type idx = 0) const
1202        {
1203                if (d_cplength < idx)
1204                        throw std::out_of_range("Index is out of range for CEGUI::String");
1205
1206                if (len == npos)
1207                        len = d_cplength;
1208
1209                return encode(&ptr()[idx], buf, npos, len);
1210        }
1211
1212        //////////////////////////////////////////////////////////////////////////
1213        // UTF8 Encoding length information
1214        //////////////////////////////////////////////////////////////////////////
1215        // return the number of bytes required to hold 'num' code-points, starting at code-point 'idx', of the the string when encoded as utf8 data.
1216        /*!
1217        \brief
1218                Return the number of utf8 code units required to hold an area of the String when encoded as utf8 data
1219
1220        \param num
1221                Maximum number of code points to consider when calculating utf8 encoded size.
1222
1223        \param idx
1224                Index of the first code point to consider when calculating the utf8 encoded size
1225
1226        \return
1227                The number of utf8 code units (bytes) required to hold the specified sub-string when encoded as utf8 data.
1228
1229        \exception std::out_of_range    Thrown if \a idx was invalid for this String.
1230        */
1231        size_type       utf8_stream_len(size_type num = npos, size_type idx = 0) const
1232        {
1233                using namespace std;
1234
1235                if (d_cplength < idx)
1236                        throw out_of_range("Index was out of range for CEGUI::String object");
1237
1238                size_type       maxlen = d_cplength - idx;
1239
1240                return encoded_size(&ptr()[idx], ceguimin(num, maxlen));
1241        }
1242
1243        //////////////////////////////////////////////////////////////////////////
1244        // Assignment Functions
1245        //////////////////////////////////////////////////////////////////////////
1246        /*!
1247        \brief
1248                Assign the value of String \a str to this String
1249
1250        \param str
1251                String object containing the string value to be assigned.
1252
1253        \return
1254                This String after the assignment has happened
1255        */
1256        String& operator=(const String& str)
1257        {
1258                return assign(str);
1259        }
1260
1261        /*!
1262        \brief
1263                Assign a sub-string of String \a str to this String
1264
1265        \param str
1266                String object containing the string data to be assigned.
1267
1268        \param str_idx
1269                Index of the first code point in \a str that is to be assigned
1270
1271        \param str_num
1272                Maximum number of code points from \a str that are be be assigned
1273
1274        \return
1275                This String after the assignment has happened
1276
1277        \exception std::out_of_range    Thrown if str_idx is invalid for \a str
1278        */
1279        String& assign(const String& str, size_type str_idx = 0, size_type str_num = npos)
1280        {
1281                if (str.d_cplength < str_idx)
1282                        throw std::out_of_range("Index was out of range for CEGUI::String object");
1283
1284                if (str_num == npos)
1285                        str_num = str.d_cplength - str_idx;
1286
1287                grow(str_num);
1288                setlen(str_num);
1289                memcpy(ptr(), &str.ptr()[str_idx], str_num * sizeof(utf32));
1290               
1291                return *this;
1292        }
1293
1294        /*!
1295        \brief
1296                Assign the value of std::string \a std_str to this String
1297
1298        \note
1299                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
1300                the provided data will occur.
1301
1302        \param std_str
1303                std::string object containing the string value to be assigned.
1304
1305        \return
1306                This String after the assignment has happened
1307
1308        \exception std::length_error    Thrown if the resulting String would have been too large.
1309        */
1310        String& operator=(const std::string& std_str)
1311        {
1312                return assign(std_str);
1313        }
1314
1315        /*!
1316        \brief
1317                Assign a sub-string of std::string \a std_str to this String
1318
1319        \note
1320                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
1321                the provided data will occur.
1322
1323        \param std_str
1324                std::string object containing the string value to be assigned.
1325
1326        \param str_idx
1327                Index of the first character of \a std_str to be assigned
1328
1329        \param str_num
1330                Maximum number of characters from \a std_str to be assigned
1331
1332        \return
1333                This String after the assignment has happened
1334
1335        \exception std::out_of_range    Thrown if \a str_idx is invalid for \a std_str
1336        \exception std::length_error    Thrown if the resulting String would have been too large.
1337        */
1338        String& assign(const std::string& std_str, size_type str_idx = 0, size_type str_num = npos)
1339        {
1340                if (std_str.size() < str_idx)
1341                        throw std::out_of_range("Index was out of range for std::string object");
1342
1343                if (str_num == npos)
1344                        str_num = (size_type)std_str.size() - str_idx;
1345
1346                grow(str_num);
1347                setlen(str_num);
1348
1349                while(str_num--)
1350                {
1351                        ((*this)[str_num]) = static_cast<utf32>(static_cast<unsigned char>(std_str[str_num + str_idx]));
1352                }
1353
1354                return *this;
1355        }
1356
1357        /*!
1358        \brief
1359                Assign to this String the string value represented by the given null-terminated utf8 encoded data
1360
1361        \note
1362                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
1363                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
1364                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
1365                results.
1366
1367        \param utf8_str
1368                Buffer containing valid null-terminated utf8 encoded data
1369
1370        \return
1371                This String after the assignment has happened
1372
1373        \exception std::length_error    Thrown if the resulting String would have been too large.
1374        */
1375        String& operator=(const utf8* utf8_str)
1376        {
1377                return assign(utf8_str, utf_length(utf8_str));
1378        }
1379
1380        /*!
1381        \brief
1382                Assign to this String the string value represented by the given null-terminated utf8 encoded data
1383
1384        \note
1385                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
1386                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
1387                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
1388                results.
1389
1390        \param utf8_str
1391                Buffer containing valid null-terminated utf8 encoded data
1392
1393        \return
1394                This String after the assignment has happened
1395
1396        \exception std::length_error    Thrown if the resulting String would have been too large.
1397        */
1398        String& assign(const utf8* utf8_str)
1399        {
1400                return assign(utf8_str, utf_length(utf8_str));
1401        }
1402
1403        /*!
1404        \brief
1405                Assign to this String the string value represented by the given utf8 encoded data
1406
1407        \note
1408                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
1409                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
1410                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
1411                results.
1412
1413        \param utf8_str
1414                Buffer containing valid utf8 encoded data
1415
1416        \param str_num
1417                Number of code units (not code points) in the buffer pointed to by \a utf8_str
1418
1419        \return
1420                This String after the assignment has happened
1421
1422        \exception std::length_error    Thrown if the resulting String would have been too large, or if str_num is 'npos'.
1423        */
1424        String& assign(const utf8* utf8_str, size_type str_num)
1425        {
1426                if (str_num == npos)
1427                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
1428
1429                size_type enc_sze = encoded_size(utf8_str, str_num);
1430
1431                grow(enc_sze);
1432                encode(utf8_str, ptr(), d_reserve, str_num);
1433                setlen(enc_sze);
1434                return *this;
1435        }
1436
1437        /*!
1438        \brief
1439                Assigns the specified utf32 code point to this String.  Result is always a String 1 code point in length.
1440
1441        \param code_point
1442                Valid utf32 Unicode code point to be assigned to the string
1443
1444        \return
1445                This String after assignment
1446        */
1447        String& operator=(utf32 code_point)
1448        {
1449                return assign(1, code_point);
1450        }
1451
1452        /*!
1453        \brief
1454                Assigns the specified code point repeatedly to the String
1455
1456        \param num
1457                The number of times to assign the code point
1458
1459        \param code_point
1460                Valid utf32 Unicode code point to be assigned to the string
1461
1462        \return
1463                This String after assignment.
1464
1465        \exception std::length_error    Thrown if \a num was 'npos'
1466        */
1467        String& assign(size_type num, utf32 code_point)
1468        {
1469                if (num == npos)
1470                        throw std::length_error("Code point count can not be 'npos'");
1471
1472                grow(num);
1473                setlen(num);
1474                utf32* p = ptr();
1475
1476                while(num--)
1477                        *p++ = code_point;
1478
1479                return *this;
1480        }
1481
1482
1483        /*!
1484        \brief
1485                Assign to this String the given C-string.
1486
1487        \param c_str
1488                Pointer to a valid C style string.
1489
1490        \return
1491                This String after the assignment has happened
1492
1493        \exception std::length_error    Thrown if the resulting String would have been too large.
1494        */
1495        String& operator=(const char* cstr)
1496        {
1497                return assign(cstr, strlen(cstr));
1498        }
1499
1500
1501        /*!
1502        \brief
1503                Assign to this String the given C-string.
1504
1505        \param c_str
1506                Pointer to a valid C style string.
1507
1508        \return
1509                This String after the assignment has happened
1510
1511        \exception std::length_error    Thrown if the resulting String would have been too large.
1512        */
1513        String& assign(const char* cstr)
1514        {
1515                return assign(cstr, strlen(cstr));
1516        }
1517
1518
1519        /*!
1520        \brief
1521                Assign to this String a number of chars from a char array.
1522
1523        \param chars
1524                char array.
1525
1526        \param chars_len
1527                Number of chars to be assigned.
1528
1529        \return
1530                This String after the assignment has happened
1531
1532        \exception std::length_error    Thrown if the resulting String would have been too large.
1533        */
1534        String& assign(const char* chars, size_type chars_len)
1535        {
1536                grow(chars_len);
1537                utf32* pt = ptr();
1538
1539                for (size_type i = 0; i < chars_len; ++i)
1540                {
1541                        *pt++ = static_cast<utf32>(static_cast<unsigned char>(*chars++));
1542                }
1543
1544                setlen(chars_len);
1545                return *this;
1546        }
1547
1548
1549        /*!
1550        \brief
1551                Swaps the value of this String with the given String \a str
1552
1553        \param str
1554                String object whos value is to be swapped with this String.
1555
1556        \return
1557                Nothing
1558        */
1559        void    swap(String& str)
1560        {
1561                size_type       temp_len        = d_cplength;
1562                d_cplength = str.d_cplength;
1563                str.d_cplength = temp_len;
1564
1565                size_type       temp_res        = d_reserve;
1566                d_reserve = str.d_reserve;
1567                str.d_reserve = temp_res;
1568
1569                utf32*          temp_buf        = d_buffer;
1570                d_buffer = str.d_buffer;
1571                str.d_buffer = temp_buf;
1572
1573                // see if we need to swap 'quick buffer' data
1574                if (temp_res <= STR_QUICKBUFF_SIZE)
1575                {
1576                        utf32           temp_qbf[STR_QUICKBUFF_SIZE];
1577
1578                        memcpy(temp_qbf, d_quickbuff, STR_QUICKBUFF_SIZE * sizeof(utf32));
1579                        memcpy(d_quickbuff, str.d_quickbuff, STR_QUICKBUFF_SIZE * sizeof(utf32));
1580                        memcpy(str.d_quickbuff, temp_qbf, STR_QUICKBUFF_SIZE * sizeof(utf32));
1581                }
1582
1583        }
1584
1585        //////////////////////////////////////////////////////////////////////////
1586        // Appending Functions
1587        //////////////////////////////////////////////////////////////////////////
1588        /*!
1589        \brief
1590                Appends the String \a str
1591
1592        \param str
1593                String object that is to be appended
1594
1595        \return
1596                This String after the append operation
1597
1598        \exception std::length_error    Thrown if resulting String would be too large.
1599        */
1600        String& operator+=(const String& str)
1601        {
1602                return append(str);
1603        }
1604
1605        /*!
1606        \brief
1607                Appends a sub-string of the String \a str
1608
1609        \param str
1610                String object containing data to be appended
1611
1612        \param str_idx
1613                Index of the first code point to be appended
1614
1615        \param str_num
1616                Maximum number of code points to be appended
1617
1618        \return
1619                This String after the append operation
1620
1621        \exception std::out_of_range    Thrown if \a str_idx is invalid for \a str.
1622        \exception std::length_error    Thrown if resulting String would be too large.
1623        */
1624        String& append(const String& str, size_type str_idx = 0, size_type str_num = npos)
1625        {
1626                if (str.d_cplength < str_idx)
1627                        throw std::out_of_range("Index is out of range for CEGUI::String");
1628
1629                if (str_num == npos)
1630                        str_num = str.d_cplength - str_idx;
1631
1632                grow(d_cplength + str_num);
1633                memcpy(&ptr()[d_cplength], &str.ptr()[str_idx], str_num * sizeof(utf32));
1634                setlen(d_cplength + str_num);
1635                return *this;
1636        }
1637
1638
1639        /*!
1640        \brief
1641                Appends the std::string \a std_str
1642
1643        \param std_str
1644                std::string object that is to be appended
1645
1646        \note
1647                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
1648                the provided data will occur.
1649
1650        \return
1651                This String after the append operation
1652
1653        \exception std::length_error    Thrown if resulting String would be too large.
1654        */
1655        String& operator+=(const std::string& std_str)
1656        {
1657                return append(std_str);
1658        }
1659
1660        /*!
1661        \brief
1662                Appends a sub-string of the std::string \a std_str
1663
1664        \param std_str
1665                std::string object containing data to be appended
1666
1667        \note
1668                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
1669                the provided data will occur.
1670
1671        \param str_idx
1672                Index of the first character to be appended
1673
1674        \param str_num
1675                Maximum number of characters to be appended
1676
1677        \return
1678                This String after the append operation
1679
1680        \exception std::out_of_range    Thrown if \a str_idx is invalid for \a std_str.
1681        \exception std::length_error    Thrown if resulting String would be too large.
1682        */
1683        String& append(const std::string& std_str, size_type str_idx = 0, size_type str_num = npos)
1684        {
1685                if (std_str.size() < str_idx)
1686                        throw std::out_of_range("Index is out of range for std::string");
1687
1688                if (str_num == npos)
1689                        str_num = (size_type)std_str.size() - str_idx;
1690
1691                size_type newsze = d_cplength + str_num;
1692
1693                grow(newsze);
1694                utf32* pt = &ptr()[newsze-1];
1695
1696                while(str_num--)
1697                        *pt-- = static_cast<utf32>(static_cast<unsigned char>(std_str[str_num]));
1698
1699                setlen(newsze);
1700                return *this;
1701        }
1702
1703
1704        /*!
1705        \brief
1706                Appends to the String the null-terminated utf8 encoded data in the buffer utf8_str.
1707
1708        \param utf8_str
1709                buffer holding the null-terminated utf8 encoded data that is to be appended
1710
1711        \note
1712                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
1713                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
1714                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
1715                results.
1716
1717        \return
1718                This String after the append operation
1719
1720        \exception std::length_error    Thrown if resulting String would be too large.
1721        */
1722        String& operator+=(const utf8* utf8_str)
1723        {
1724                return append(utf8_str, utf_length(utf8_str));
1725        }
1726
1727        /*!
1728        \brief
1729                Appends to the String the null-terminated utf8 encoded data in the buffer utf8_str.
1730
1731        \param utf8_str
1732                Buffer holding the null-terminated utf8 encoded data that is to be appended
1733
1734        \note
1735                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
1736                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
1737                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
1738                results.
1739
1740        \return
1741                This String after the append operation
1742
1743        \exception std::length_error    Thrown if resulting String would be too large.
1744        */
1745        String& append(const utf8* utf8_str)
1746        {
1747                return append(utf8_str, utf_length(utf8_str));
1748        }
1749
1750
1751        /*!
1752        \brief
1753                Appends to the String the utf8 encoded data in the buffer utf8_str.
1754
1755        \param utf8_str
1756                Buffer holding the utf8 encoded data that is to be appended
1757
1758        \note
1759                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
1760                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
1761                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
1762                results.
1763
1764        \param len
1765                Number of code units (not code points) in the buffer to be appended
1766
1767        \return
1768                This String after the append operation
1769
1770        \exception std::length_error    Thrown if resulting String would be too large, or if \a len was 'npos'
1771        */
1772        String& append(const utf8* utf8_str, size_type len)
1773        {
1774                if (len == npos)
1775                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
1776
1777                size_type encsz = encoded_size(utf8_str, len);
1778                size_type newsz = d_cplength + encsz;
1779
1780                grow(newsz);
1781                encode(utf8_str, &ptr()[d_cplength], encsz, len);
1782                setlen(newsz);
1783
1784                return *this;
1785        }
1786
1787
1788        /*!
1789        \brief
1790                Appends a single code point to the string
1791
1792        \param code_point
1793                utf32 Unicode code point that is to be appended
1794
1795        \return
1796                This String after the append operation
1797
1798        \exception std::length_error    Thrown if resulting String would be too long.
1799        */
1800        String& operator+=(utf32 code_point)
1801        {
1802                return append(1, code_point);
1803        }
1804
1805        /*!
1806        \brief
1807                Appends a single code point multiple times to the string
1808
1809        \param num
1810                Number of copies of the code point to be appended
1811
1812        \param code_point
1813                utf32 Unicode code point that is to be appended
1814
1815        \return
1816                This String after the append operation
1817
1818        \exception std::length_error    Thrown if resulting String would be too long, or if \a num was 'npos'.
1819        */
1820        String& append(size_type num, utf32 code_point)
1821        {
1822                if (num == npos)
1823                        throw std::length_error("Code point count can not be 'npos'");
1824
1825                size_type newsz = d_cplength + num;
1826                grow(newsz);
1827
1828                utf32* p = &ptr()[d_cplength];
1829
1830                while(num--)
1831                        *p++ = code_point;
1832
1833                setlen(newsz);
1834
1835                return *this;
1836        }
1837
1838        /*!
1839        \brief
1840                Appends a single code point to the string
1841
1842        \param code_point
1843                utf32 Unicode code point that is to be appended
1844
1845        \return
1846                Nothing
1847
1848        \exception std::length_error    Thrown if resulting String would be too long.
1849        */
1850        void    push_back(utf32 code_point)
1851        {
1852                append(1, code_point);
1853        }
1854
1855        /*!
1856        \brief
1857                Appends the code points in the reange [beg, end)
1858
1859        \param beg
1860                Iterator describing the start of the range to be appended
1861
1862        \param end
1863                Iterator describing the (exclusive) end of the range to be appended.
1864
1865        \return
1866                This String after the append operation
1867
1868        \exception std::length_error    Thrown if the resulting string would be too large.
1869        */
1870        String& append(const_iterator iter_beg, const_iterator iter_end)
1871        {
1872                return replace(end(), end(), iter_beg, iter_end);
1873        }
1874
1875
1876        /*!
1877        \brief
1878                Appends to the String the given c-string.
1879
1880        \param c_str
1881                c-string that is to be appended.
1882
1883        \return
1884                This String after the append operation
1885
1886        \exception std::length_error    Thrown if resulting String would be too large.
1887        */
1888        String& operator+=(const char* cstr)
1889        {
1890                return append(cstr, strlen(cstr));
1891        }
1892
1893
1894        /*!
1895        \brief
1896                Appends to the String the given c-string.
1897
1898        \param c_str
1899                c-string that is to be appended.
1900
1901        \return
1902                This String after the append operation
1903
1904        \exception std::length_error    Thrown if resulting String would be too large.
1905        */
1906        String& append(const char* cstr)
1907        {
1908                return append(cstr, strlen(cstr));
1909        }
1910
1911
1912        /*!
1913        \brief
1914                Appends to the String chars from the given char array.
1915
1916        \param chars
1917                char array holding the chars that are to be appended
1918
1919        \param chars_len
1920                Number of chars to be appended
1921
1922        \return
1923                This String after the append operation
1924
1925        \exception std::length_error    Thrown if resulting String would be too large, or if \a chars_len was 'npos'
1926        */
1927        String& append(const char* chars, size_type chars_len)
1928        {
1929                if (chars_len == npos)
1930                        throw std::length_error("Length for char array can not be 'npos'");
1931
1932                size_type newsz = d_cplength + chars_len;
1933
1934                grow(newsz);
1935
1936                utf32* pt = &ptr()[newsz-1];
1937
1938                while(chars_len--)
1939                        *pt-- = static_cast<utf32>(static_cast<unsigned char>(chars[chars_len]));
1940
1941                setlen(newsz);
1942
1943                return *this;
1944        }
1945
1946
1947        //////////////////////////////////////////////////////////////////////////
1948        // Insertion Functions
1949        //////////////////////////////////////////////////////////////////////////
1950        /*!
1951        \brief
1952                Inserts the given String object at the specified position.
1953
1954        \param idx
1955                Index where the string is to be inserted.
1956
1957        \param str
1958                String object that is to be inserted.
1959
1960        \return
1961                This String after the insert.
1962
1963        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
1964        \exception std::length_error    Thrown if resulting String would be too large.
1965        */
1966        String& insert(size_type idx, const String& str)
1967        {
1968                return insert(idx, str, 0, npos);
1969        }
1970
1971        /*!
1972        \brief
1973                Inserts a sub-string of the given String object at the specified position.
1974
1975        \param idx
1976                Index where the string is to be inserted.
1977
1978        \param str
1979                String object containing data to be inserted.
1980
1981        \param str_idx
1982                Index of the first code point from \a str to be inserted.
1983
1984        \param str_num
1985                Maximum number of code points from \a str to be inserted.
1986
1987        \return
1988                This String after the insert.
1989
1990        \exception std::out_of_range    Thrown if \a idx or \a str_idx are out of range.
1991        \exception std::length_error    Thrown if resulting String would be too large.
1992        */
1993        String& insert(size_type idx, const String& str, size_type str_idx, size_type str_num)
1994        {
1995                if ((d_cplength < idx) || (str.d_cplength < str_idx))
1996                        throw std::out_of_range("Index is out of range for CEGUI::String");
1997
1998                if (str_num == npos)
1999                        str_num = str.d_cplength - str_idx;
2000
2001                size_type newsz = d_cplength + str_num;
2002                grow(newsz);
2003                memmove(&ptr()[idx + str_num], &ptr()[idx], (d_cplength - idx) * sizeof(utf32));
2004                memcpy(&ptr()[idx], &str.ptr()[str_idx], str_num * sizeof(utf32));
2005                setlen(newsz);
2006
2007                return *this;
2008        }
2009
2010        /*!
2011        \brief
2012                Inserts the given std::string object at the specified position.
2013
2014        \param idx
2015                Index where the std::string is to be inserted.
2016
2017        \param std_str
2018                std::string object that is to be inserted.
2019
2020        \note
2021                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
2022                the provided data will occur.
2023
2024        \return
2025                This String after the insert.
2026
2027        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2028        \exception std::length_error    Thrown if resulting String would be too large.
2029        */
2030        String& insert(size_type idx, const std::string& std_str)
2031        {
2032                return insert(idx, std_str, 0, npos);
2033        }
2034
2035        /*!
2036        \brief
2037                Inserts a sub-string of the given std::string object at the specified position.
2038
2039        \param idx
2040                Index where the string is to be inserted.
2041
2042        \param std_str
2043                std::string object containing data to be inserted.
2044
2045        \note
2046                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
2047                the provided data will occur.
2048
2049        \param str_idx
2050                Index of the first character from \a std_str to be inserted.
2051
2052        \param str_num
2053                Maximum number of characters from \a str to be inserted.
2054
2055        \return
2056                This String after the insert.
2057
2058        \exception std::out_of_range    Thrown if \a idx or \a str_idx are out of range.
2059        \exception std::length_error    Thrown if resulting String would be too large.
2060        */
2061        String& insert(size_type idx, const std::string& std_str, size_type str_idx, size_type str_num)
2062        {
2063                if (d_cplength < idx)
2064                        throw std::out_of_range("Index is out of range for CEGUI::String");
2065
2066                if (std_str.size() < str_idx)
2067                        throw std::out_of_range("Index is out of range for std::string");
2068
2069                if (str_num == npos)
2070                        str_num = (size_type)std_str.size() - str_idx;
2071
2072                size_type newsz = d_cplength + str_num;
2073                grow(newsz);
2074
2075                memmove(&ptr()[idx + str_num], &ptr()[idx], (d_cplength - idx) * sizeof(utf32));
2076
2077                utf32* pt = &ptr()[idx + str_num - 1];
2078               
2079                while(str_num--)
2080                        *pt-- = static_cast<utf32>(static_cast<unsigned char>(std_str[str_idx + str_num]));
2081
2082                setlen(newsz);
2083
2084                return *this;
2085        }
2086
2087        /*!
2088        \brief
2089                Inserts the given null-terminated utf8 encoded data at the specified position.
2090
2091        \param idx
2092                Index where the data is to be inserted.
2093
2094        \param utf8_str
2095                Buffer containing the null-terminated utf8 encoded data that is to be inserted.
2096
2097        \note
2098                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
2099                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
2100                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
2101                results.
2102
2103        \return
2104                This String after the insert.
2105
2106        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2107        \exception std::length_error    Thrown if resulting String would be too large.
2108        */
2109        String& insert(size_type idx, const utf8* utf8_str)
2110        {
2111                return insert(idx, utf8_str, utf_length(utf8_str));
2112        }
2113
2114        /*!
2115        \brief
2116                Inserts the given utf8 encoded data at the specified position.
2117
2118        \param idx
2119                Index where the data is to be inserted.
2120
2121        \param utf8_str
2122                Buffer containing the utf8 encoded data that is to be inserted.
2123
2124        \note
2125                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
2126                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
2127                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
2128                results.
2129
2130        \param len
2131                Length of the data to be inserted in uf8 code units (not code points)
2132
2133        \return
2134                This String after the insert.
2135
2136        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2137        \exception std::length_error    Thrown if resulting String would be too large, or if \a len is 'npos'
2138        */
2139        String& insert(size_type idx, const utf8* utf8_str, size_type len)
2140        {
2141                if (d_cplength < idx)
2142                        throw std::out_of_range("Index is out of range for CEGUI::String");
2143
2144                if (len == npos)
2145                        throw std::length_error("Length of utf8 encoded string can not be 'npos'");
2146
2147                size_type encsz = encoded_size(utf8_str, len);
2148                size_type newsz = d_cplength + encsz;
2149
2150                grow(newsz);
2151                memmove(&ptr()[idx + encsz], &ptr()[idx], (d_cplength - idx) * sizeof(utf32));
2152                encode(utf8_str, &ptr()[idx], encsz, len);
2153                setlen(newsz);
2154
2155                return *this;
2156        }
2157
2158        /*!
2159        \brief
2160                Inserts a code point multiple times into the String
2161
2162        \param idx
2163                Index where the code point(s) are to be inserted
2164
2165        \param num
2166                The number of times to insert the code point
2167
2168        \param code_point
2169                The utf32 code point that is to be inserted
2170
2171        \return
2172                This String after the insertion.
2173
2174        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2175        \exception std::length_error    Thrown if resulting String would be too large, or if \a num is 'npos'
2176        */
2177        String& insert(size_type idx, size_type num, utf32 code_point)
2178        {
2179                if (d_cplength < idx)
2180                        throw std::out_of_range("Index is out of range for CEGUI::String");
2181
2182                if (num == npos)
2183                        throw std::length_error("Code point count can not be 'npos'");
2184
2185                size_type newsz = d_cplength + num;
2186                grow(newsz);
2187
2188                memmove(&ptr()[idx + num], &ptr()[idx], (d_cplength - idx) * sizeof(utf32));
2189
2190                utf32* pt = &ptr()[idx + num - 1];
2191
2192                while(num--)
2193                        *pt-- = code_point;
2194
2195                setlen(newsz);
2196
2197                return *this;
2198        }
2199
2200        /*!
2201        \brief
2202                Inserts a code point multiple times into the String
2203
2204        \param pos
2205                Iterator describing the position where the code point(s) are to be inserted
2206
2207        \param num
2208                The number of times to insert the code point
2209
2210        \param code_point
2211                The utf32 code point that is to be inserted
2212
2213        \return
2214                This String after the insertion.
2215
2216        \exception std::length_error    Thrown if resulting String would be too large, or if \a num is 'npos'
2217        */
2218        void insert(iterator pos, size_type num, utf32 code_point)
2219        {
2220                insert(safe_iter_dif(pos, begin()), num, code_point);
2221        }
2222
2223        /*!
2224        \brief
2225                Inserts a single code point into the String
2226
2227        \param pos
2228                Iterator describing the position where the code point is to be inserted
2229
2230        \param code_point
2231                The utf32 code point that is to be inserted
2232
2233        \return
2234                This String after the insertion.
2235
2236        \exception std::length_error    Thrown if resulting String would be too large.
2237        */
2238        iterator insert(iterator pos, utf32 code_point)
2239        {
2240                insert(pos, 1, code_point);
2241                return pos;
2242        }
2243
2244        /*!
2245        \brief
2246                Inserts code points specified by the range [beg, end).
2247
2248        \param pos
2249                Iterator describing the position where the data is to be inserted
2250
2251        \param beg
2252                Iterator describing the begining of the range to be inserted
2253
2254        \param end
2255                Iterator describing the (exclusive) end of the range to be inserted.
2256
2257        \return
2258                Nothing.
2259
2260        \exception std::length_error    Thrown if resulting String would be too large.
2261        */
2262        void    insert(iterator iter_pos, const_iterator iter_beg, const_iterator iter_end)
2263        {
2264                replace(iter_pos, iter_pos, iter_beg, iter_end);
2265        }
2266
2267
2268        /*!
2269        \brief
2270                Inserts the given c-string at the specified position.
2271
2272        \param idx
2273                Index where the c-string is to be inserted.
2274
2275        \param c_str
2276                c-string that is to be inserted.
2277
2278        \return
2279                This String after the insert.
2280
2281        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2282        \exception std::length_error    Thrown if resulting String would be too large.
2283        */
2284        String& insert(size_type idx, const char* cstr)
2285        {
2286                return insert(idx, cstr, strlen(cstr));
2287        }
2288
2289
2290        /*!
2291        \brief
2292                Inserts chars from the given char array at the specified position.
2293
2294        \param idx
2295                Index where the data is to be inserted.
2296
2297        \param chars
2298                char array containing the chars that are to be inserted.
2299
2300        \param chars_len
2301                Length of the char array to be inserted.
2302
2303        \return
2304                This String after the insert.
2305
2306        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2307        \exception std::length_error    Thrown if resulting String would be too large, or if \a chars_len is 'npos'
2308        */
2309        String& insert(size_type idx, const char* chars, size_type chars_len)
2310        {
2311                if (d_cplength < idx)
2312                        throw std::out_of_range("Index is out of range for CEGUI::String");
2313
2314                if (chars_len == npos)
2315                        throw std::length_error("Length of char array can not be 'npos'");
2316
2317                size_type newsz = d_cplength + chars_len;
2318
2319                grow(newsz);
2320                memmove(&ptr()[idx + chars_len], &ptr()[idx], (d_cplength - idx) * sizeof(utf32));
2321
2322                utf32* pt = &ptr()[idx + chars_len - 1];
2323
2324                while(chars_len--)
2325                        *pt-- = static_cast<utf32>(static_cast<unsigned char>(chars[chars_len]));
2326
2327                setlen(newsz);
2328
2329                return *this;
2330        }
2331
2332
2333        //////////////////////////////////////////////////////////////////////////
2334        // Erasing characters
2335        //////////////////////////////////////////////////////////////////////////
2336        /*!
2337        \brief
2338                Removes all data from the String
2339
2340        \return
2341                Nothing
2342        */
2343        void    clear(void)
2344        {
2345                setlen(0);
2346                trim();
2347        }
2348
2349        /*!
2350        \brief
2351                Removes all data from the String
2352
2353        \return
2354                The empty String (*this)
2355        */
2356        String& erase(void)
2357        {
2358                clear();
2359                return *this;
2360        }
2361
2362        /*!
2363        \brief
2364                Erase a single code point from the string
2365
2366        \param idx
2367                The index of the code point to be removed.
2368
2369        \return
2370                This String after the erase operation
2371
2372        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2373        */
2374        String& erase(size_type idx)
2375        {
2376                return erase(idx, 1);
2377        }
2378
2379        /*!
2380        \brief
2381                Erase a range of code points
2382
2383        \param idx
2384                Index of the first code point to be removed.
2385
2386        \param len
2387                Maximum number of code points to be removed.
2388
2389        \return
2390                This String after the erase operation.
2391
2392        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
2393        */
2394        String& erase(size_type idx, size_type len = npos)
2395        {
2396                if (d_cplength < idx)
2397                        throw std::out_of_range("Index is out of range foe CEGUI::String");
2398
2399                if (len == npos)
2400                        len = d_cplength - idx;
2401
2402                size_type newsz = d_cplength - len;
2403
2404                memmove(&ptr()[idx], &ptr()[idx + len], (d_cplength - idx - len) * sizeof(utf32));
2405                setlen(newsz);
2406                return  *this;
2407        }
2408
2409        /*!
2410        \brief
2411                Erase the code point described by the given iterator
2412
2413        \param pos
2414                Iterator describing the code point to be erased
2415
2416        \return
2417                This String after the erase operation.
2418        */
2419        String& erase(iterator pos)
2420        {
2421                return erase(safe_iter_dif(pos, begin()), 1);
2422        }
2423
2424        /*!
2425        \brief
2426                Erase a range of code points described by the iterators [beg, end).
2427
2428        \param beg
2429                Iterator describing the postion of the beginning of the range to erase
2430
2431        \param end
2432                Iterator describing the postion of the (exclusive) end of the range to erase
2433
2434        \return
2435                This String after the erase operation.
2436        */
2437        String& erase(iterator iter_beg, iterator iter_end)
2438        {
2439                return erase(safe_iter_dif(iter_beg, begin()), safe_iter_dif(iter_end, iter_beg));
2440        }
2441
2442        //////////////////////////////////////////////////////////////////////////
2443        // Resizing
2444        //////////////////////////////////////////////////////////////////////////
2445        /*!
2446        \brief
2447                Resizes the String either by inserting default utf32 code points to make it larger, or by truncating to make it smaller
2448
2449        \param num
2450                The length, in code points, that the String is to be made.
2451
2452        \return
2453                Nothing.
2454
2455        \exception std::length_error    Thrown if the String would be too large.
2456        */
2457        void    resize(size_type num)
2458        {
2459                resize(num, utf32());
2460        }
2461
2462        /*!
2463        \brief
2464                Resizes the String either by inserting the given utf32 code point to make it larger, or by truncating to make it smaller
2465
2466        \param num
2467                The length, in code points, that the String is to be made.
2468
2469        \param code_point
2470                The utf32 code point that should be used when majing the String larger
2471
2472        \return
2473                Nothing.
2474
2475        \exception std::length_error    Thrown if the String would be too large.
2476        */
2477        void    resize(size_type num, utf32 code_point)
2478        {
2479                if (num < d_cplength)
2480                {
2481                        setlen(num);
2482                }
2483                else
2484                {
2485                        append(num - d_cplength, code_point);
2486                }
2487
2488        }
2489
2490        //////////////////////////////////////////////////////////////////////////
2491        // Replacing Characters
2492        //////////////////////////////////////////////////////////////////////////
2493        /*!
2494        \brief
2495                Replace code points in the String with the specified String object
2496
2497        \param idx
2498                Index of the first code point to be replaced
2499
2500        \param len
2501                Maximum number of code points to be replaced (if this is 0, operation is an insert at position \a idx)
2502
2503        \param str
2504                The String object that is to replace the specified code points
2505
2506        \return
2507                This String after the replace operation
2508
2509        \exception std::out_of_range    Thrown if \a idx is invalid for this String
2510        \exception std::length_error    Thrown if the resulting String would be too large.
2511        */
2512        String& replace(size_type idx, size_type len, const String& str)
2513        {
2514                return replace(idx, len, str, 0, npos);
2515        }
2516
2517        /*!
2518        \brief
2519                Replace the code points in the range [beg, end) with the specified String object
2520
2521        \note
2522                If \a beg == \a end, the operation is a insert at iterator position \a beg
2523
2524        \param beg
2525                Iterator describing the start of the range to be replaced
2526
2527        \param end
2528                Iterator describing the (exclusive) end of the range to be replaced.
2529
2530        \param str
2531                The String object that is to replace the specified range of code points
2532
2533        \return
2534                This String after the replace operation
2535
2536        \exception std::length_error    Thrown if the resulting String would be too large.
2537        */
2538        String& replace(iterator iter_beg, iterator iter_end, const String& str)
2539        {
2540                return replace(safe_iter_dif(iter_beg, begin()), safe_iter_dif(iter_end, iter_beg), str, 0, npos);
2541        }
2542
2543        /*!
2544        \brief
2545                Replace code points in the String with a specified sub-string of a given String object.
2546
2547        \param idx
2548                Index of the first code point to be replaced
2549
2550        \param len
2551                Maximum number of code points to be replaced.  If this is 0, the operation is an insert at position \a idx.
2552
2553        \param str
2554                String object containing the data that will replace the specified range of code points
2555
2556        \param str_idx
2557                Index of the first code point of \a str that is to replace the specified code point range
2558
2559        \param str_num
2560                Maximum number of code points of \a str that are to replace the specified code point range
2561
2562        \return
2563                This String after the replace operation
2564
2565        \exception std::out_of_range    Thrown if either \a idx, or \a str_idx are invalid
2566        \exception std::length_error    Thrown if the resulting String would have been too large.
2567        */
2568        String& replace(size_type idx, size_type len, const String& str, size_type str_idx, size_type str_num)
2569        {
2570                if ((d_cplength < idx) || (str.d_cplength < str_idx))
2571                        throw std::out_of_range("Index is out of range for CEGUI::String");
2572
2573                if (((str_idx + str_num) > str.d_cplength) || (str_num == npos))
2574                        str_num = str.d_cplength - str_idx;
2575
2576                if (((len + idx) > d_cplength) || (len == npos))
2577                        len = d_cplength - idx;
2578
2579                size_type newsz = d_cplength + str_num - len;
2580
2581                grow(newsz);
2582
2583                if ((idx + len) < d_cplength)
2584                        memmove(&ptr()[idx + str_num], &ptr()[len + idx], (d_cplength - idx - len) * sizeof(utf32));
2585
2586                memcpy(&ptr()[idx], &str.ptr()[str_idx], str_num * sizeof(utf32));
2587                setlen(newsz);
2588
2589                return *this;
2590        }
2591
2592
2593        /*!
2594        \brief
2595                Replace code points in the String with the specified std::string object
2596
2597        \param idx
2598                Index of the first code point to be replaced
2599
2600        \param len
2601                Maximum number of code points to be replaced (if this is 0, operation is an insert at position \a idx)
2602
2603        \param std_str
2604                The std::string object that is to replace the specified code points
2605
2606        \note
2607                Characters from \a std_str are considered to represent Unicode code points in the range 0x00..0xFF.  No translation of
2608                the encountered data is performed.
2609
2610        \return
2611                This String after the replace operation
2612
2613        \exception std::out_of_range    Thrown if \a idx is invalid for this String
2614        \exception std::length_error    Thrown if the resulting String would be too large.
2615        */
2616        String& replace(size_type idx, size_type len, const std::string& std_str)
2617        {
2618                return replace(idx, len, std_str, 0, npos);
2619        }
2620
2621        /*!
2622        \brief
2623                Replace the code points in the range [beg, end) with the specified std::string object
2624
2625        \note
2626                If \a beg == \a end, the operation is a insert at iterator position \a beg
2627
2628        \param beg
2629                Iterator describing the start of the range to be replaced
2630
2631        \param end
2632                Iterator describing the (exclusive) end of the range to be replaced.
2633
2634        \param std_str
2635                The std::string object that is to replace the specified range of code points
2636
2637        \note
2638                Characters from \a std_str are considered to represent Unicode code points in the range 0x00..0xFF.  No translation of
2639                the encountered data is performed.
2640
2641        \return
2642                This String after the replace operation
2643
2644        \exception std::length_error    Thrown if the resulting String would be too large.
2645        */
2646        String& replace(iterator iter_beg, iterator iter_end, const std::string& std_str)
2647        {
2648                return replace(safe_iter_dif(iter_beg, begin()), safe_iter_dif(iter_end, iter_beg), std_str, 0, npos);
2649        }
2650
2651        /*!
2652        \brief
2653                Replace code points in the String with a specified sub-string of a given std::string object.
2654
2655        \param idx
2656                Index of the first code point to be replaced
2657
2658        \param len
2659                Maximum number of code points to be replaced.  If this is 0, the operation is an insert at position \a idx.
2660
2661        \param std_str
2662                std::string object containing the data that will replace the specified range of code points
2663
2664        \note
2665                Characters from \a std_str are considered to represent Unicode code points in the range 0x00..0xFF.  No translation of
2666                the encountered data is performed.
2667
2668        \param str_idx
2669                Index of the first code point of \a std_str that is to replace the specified code point range
2670
2671        \param str_num
2672                Maximum number of code points of \a std_str that are to replace the specified code point range
2673
2674        \return
2675                This String after the replace operation
2676
2677        \exception std::out_of_range    Thrown if either \a idx, or \a str_idx are invalid
2678        \exception std::length_error    Thrown if the resulting String would have been too large.
2679        */
2680        String& replace(size_type idx, size_type len, const std::string& std_str, size_type str_idx, size_type str_num)
2681        {
2682                if (d_cplength < idx)
2683                        throw std::out_of_range("Index is out of range for CEGUI::String");
2684
2685                if (std_str.size() < str_idx)
2686                        throw std::out_of_range("Index is out of range for std::string");
2687
2688                if (((str_idx + str_num) > std_str.size()) || (str_num == npos))
2689                        str_num = (size_type)std_str.size() - str_idx;
2690
2691                if (((len + idx) > d_cplength) || (len == npos))
2692                        len = d_cplength - idx;
2693
2694                size_type newsz = d_cplength + str_num - len;
2695
2696                grow(newsz);
2697
2698                if ((idx + len) < d_cplength)
2699                        memmove(&ptr()[idx + str_num], &ptr()[len + idx], (d_cplength - idx - len) * sizeof(utf32));
2700
2701                utf32* pt = &ptr()[idx + str_num - 1];
2702
2703                while (str_num--)
2704                        *pt-- = static_cast<utf32>(static_cast<unsigned char>(std_str[str_idx + str_num]));
2705
2706                setlen(newsz);
2707
2708                return *this;
2709        }
2710
2711
2712        /*!
2713        \brief
2714                Replace code points in the String with the specified null-terminated utf8 encoded data.
2715
2716        \param idx
2717                Index of the first code point to be replaced
2718
2719        \param len
2720                Maximum number of code points to be replaced (if this is 0, operation is an insert at position \a idx)
2721
2722        \param utf8_str
2723                Buffer containing the null-terminated utf8 encoded data that is to replace the specified code points
2724
2725        \note
2726                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
2727                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
2728                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
2729                results.
2730
2731        \return
2732                This String after the replace operation
2733
2734        \exception std::out_of_range    Thrown if \a idx is invalid for this String
2735        \exception std::length_error    Thrown if the resulting String would be too large.
2736        */
2737        String& replace(size_type idx, size_type len, const utf8* utf8_str)
2738        {
2739                return replace(idx, len, utf8_str, utf_length(utf8_str));
2740        }
2741
2742        /*!
2743        \brief
2744                Replace the code points in the range [beg, end) with the specified null-terminated utf8 encoded data.
2745
2746        \note
2747                If \a beg == \a end, the operation is a insert at iterator position \a beg
2748
2749        \param beg
2750                Iterator describing the start of the range to be replaced
2751
2752        \param end
2753                Iterator describing the (exclusive) end of the range to be replaced.
2754
2755        \param utf8_str
2756                Buffer containing the null-terminated utf8 encoded data that is to replace the specified range of code points
2757
2758        \note
2759                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
2760                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
2761                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
2762                results.
2763
2764        \return
2765                This String after the replace operation
2766
2767        \exception std::length_error    Thrown if the resulting String would be too large.
2768        */
2769        String& replace(iterator iter_beg, iterator iter_end, const utf8* utf8_str)
2770        {
2771                return replace(iter_beg, iter_end, utf8_str, utf_length(utf8_str));
2772        }
2773
2774        /*!
2775        \brief
2776                Replace code points in the String with the specified utf8 encoded data.
2777
2778        \param idx
2779                Index of the first code point to be replaced
2780
2781        \param len
2782                Maximum number of code points to be replaced (if this is 0, operation is an insert at position \a idx)
2783
2784        \param utf8_str
2785                Buffer containing the null-terminated utf8 encoded data that is to replace the specified code points
2786
2787        \note
2788                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
2789                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
2790                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
2791                results.
2792
2793        \param str_len
2794                Length of the utf8 encoded data in utf8 code units (not code points).
2795
2796        \return
2797                This String after the replace operation
2798
2799        \exception std::out_of_range    Thrown if \a idx is invalid for this String
2800        \exception std::length_error    Thrown if the resulting String would be too large, or if \a str_len was 'npos'.
2801        */
2802        String& replace(size_type idx, size_type len, const utf8* utf8_str, size_type str_len)
2803        {
2804                if (d_cplength < idx)
2805                        throw std::out_of_range("Index is out of range for CEGUI::String");
2806
2807                if (str_len == npos)
2808                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
2809
2810                if (((len + idx) > d_cplength) || (len == npos))
2811                        len = d_cplength - idx;
2812
2813                size_type encsz = encoded_size(utf8_str, str_len);
2814                size_type newsz = d_cplength + encsz - len;
2815
2816                grow(newsz);
2817
2818                if ((idx + len) < d_cplength)
2819                        memmove(&ptr()[idx + encsz], &ptr()[len + idx], (d_cplength - idx - len) * sizeof(utf32));
2820
2821                encode(utf8_str, &ptr()[idx], encsz, str_len);
2822
2823                setlen(newsz);
2824                return *this;
2825        }
2826
2827        /*!
2828        \brief
2829                Replace the code points in the range [beg, end) with the specified null-terminated utf8 encoded data.
2830
2831        \note
2832                If \a beg == \a end, the operation is a insert at iterator position \a beg
2833
2834        \param beg
2835                Iterator describing the start of the range to be replaced
2836
2837        \param end
2838                Iterator describing the (exclusive) end of the range to be replaced.
2839
2840        \param utf8_str
2841                Buffer containing the null-terminated utf8 encoded data that is to replace the specified range of code points
2842
2843        \note
2844                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
2845                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
2846                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
2847                results.
2848
2849        \param str_len
2850                Length of the utf8 encoded data in utf8 code units (not code points).
2851
2852        \return
2853                This String after the replace operation
2854
2855                \exception std::length_error    Thrown if the resulting String would be too large, or if \a str_len was 'npos'.
2856        */
2857        String& replace(iterator iter_beg, iterator iter_end, const utf8* utf8_str, size_type str_len)
2858        {
2859                return replace(safe_iter_dif(iter_beg, begin()), safe_iter_dif(iter_end, iter_beg), utf8_str, str_len);
2860        }
2861
2862        /*!
2863        \brief
2864                Replaces a specified range of code points with occurrences of a given code point
2865
2866        \param idx
2867                Index of the first code point to be replaced
2868
2869        \param len
2870                Maximum number of code points to replace.  If this is 0 the operation is an insert
2871
2872        \param num
2873                Number of occurrences of \a code_point that are to replace the specified range of code points
2874
2875        \param code_point
2876                Code point that is to be used when replacing the specified range of code points
2877
2878        \return
2879                This String after the replace operation.
2880
2881        \exception std::out_of_range    Thrown if \a idx is invalid for this String
2882        \exception std::length_error    Thrown if resulting String would have been too long, or if \a num was 'npos'.
2883        */
2884        String& replace(size_type idx, size_type len, size_type num, utf32 code_point)
2885        {
2886                if (d_cplength < idx)
2887                        throw std::out_of_range("Index is out of range for CEGUI::String");
2888
2889                if (num == npos)
2890                        throw std::length_error("Code point count can not be 'npos'");
2891
2892                if (((len + idx) > d_cplength) || (len == npos))
2893                        len = d_cplength - idx;
2894
2895                size_type newsz = d_cplength + num - len;
2896
2897                grow(newsz);
2898
2899                if ((idx + len) < d_cplength)
2900                        memmove(&ptr()[idx + num], &ptr()[len + idx], (d_cplength - idx - len) * sizeof(utf32));
2901
2902                utf32* pt = &ptr()[idx + num - 1];
2903
2904                while (num--)
2905                        *pt-- = code_point;
2906
2907                setlen(newsz);
2908
2909                return *this;
2910        }
2911
2912        /*!
2913        \brief
2914                Replace the code points in the range [beg, end) with occurrences of a given code point
2915
2916        \note
2917                If \a beg == \a end, the operation is an insert at iterator position \a beg
2918
2919        \param beg
2920                Iterator describing the start of the range to be replaced
2921
2922        \param end
2923                Iterator describing the (exclusive) end of the range to be replaced.
2924
2925        \param num
2926                Number of occurrences of \a code_point that are to replace the specified range of code points
2927
2928        \param code_point
2929                Code point that is to be used when replacing the specified range of code points
2930
2931        \return
2932                This String after the replace operation
2933
2934        \exception std::length_error    Thrown if resulting String would have been too long, or if \a num was 'npos'.
2935        */
2936        String& replace(iterator iter_beg, iterator iter_end, size_type num, utf32 code_point)
2937        {
2938                return replace(safe_iter_dif(iter_beg, begin()), safe_iter_dif(iter_end, iter_beg), num, code_point);
2939        }
2940
2941
2942        /*!
2943        \brief
2944                Replace the code points in the range [beg, end) with code points from the range [newBeg, newEnd).
2945
2946        \note
2947                If \a beg == \a end, the operation is an insert at iterator position \a beg
2948
2949        \param beg
2950                Iterator describing the start of the range to be replaced
2951
2952        \param end
2953                Iterator describing the (exclusive) end of the range to be replaced.
2954
2955        \param newBeg
2956                Iterator describing the beginning of the range to insert.
2957
2958        \param newEnd   
2959                Iterator describing the (exclusive) end of the range to insert.
2960
2961        \return
2962                This String after the insert operation.
2963
2964        \exception std::length_error    Thrown if the resulting string would be too long.
2965        */
2966        String& replace(iterator iter_beg, iterator iter_end, const_iterator iter_newBeg, const_iterator iter_newEnd)
2967        {
2968                if (iter_beg == iter_end)
2969                {
2970                        erase(safe_iter_dif(iter_beg, begin()), safe_iter_dif(iter_end, iter_beg));
2971                }
2972                else
2973                {
2974                        size_type str_len = safe_iter_dif(iter_newEnd, iter_newBeg);
2975                        size_type idx = safe_iter_dif(iter_beg, begin());
2976                        size_type len = safe_iter_dif(iter_end, iter_beg);
2977
2978                        if ((len + idx) > d_cplength)
2979                                len = d_cplength - idx;
2980
2981                        size_type newsz = d_cplength + str_len - len;
2982
2983                        grow(newsz);
2984
2985                        if ((idx + len) < d_cplength)
2986                                memmove(&ptr()[idx + str_len], &ptr()[len + idx], (d_cplength - idx - len) * sizeof(utf32));
2987
2988                        memcpy(&ptr()[idx], iter_newBeg.d_ptr, str_len * sizeof(utf32));
2989                        setlen(newsz);
2990                }
2991
2992                return *this;
2993        }
2994
2995
2996        /*!
2997        \brief
2998                Replace code points in the String with the specified c-string.
2999
3000        \param idx
3001                Index of the first code point to be replaced
3002
3003        \param len
3004                Maximum number of code points to be replaced (if this is 0, operation is an insert at position \a idx)
3005
3006        \param c_str
3007                c-string that is to replace the specified code points
3008
3009        \return
3010                This String after the replace operation
3011
3012        \exception std::out_of_range    Thrown if \a idx is invalid for this String
3013        \exception std::length_error    Thrown if the resulting String would be too large.
3014        */
3015        String& replace(size_type idx, size_type len, const char* cstr)
3016        {
3017                return replace(idx, len, cstr, strlen(cstr));
3018        }
3019
3020
3021        /*!
3022        \brief
3023                Replace the code points in the range [beg, end) with the specified c-string.
3024
3025        \note
3026                If \a beg == \a end, the operation is a insert at iterator position \a beg
3027
3028        \param beg
3029                Iterator describing the start of the range to be replaced
3030
3031        \param end
3032                Iterator describing the (exclusive) end of the range to be replaced.
3033
3034        \param c_str
3035                c-string that is to replace the specified range of code points
3036
3037        \return
3038                This String after the replace operation
3039
3040        \exception std::length_error    Thrown if the resulting String would be too large.
3041        */
3042        String& replace(iterator iter_beg, iterator iter_end, const char* cstr)
3043        {
3044                return replace(iter_beg, iter_end, cstr, strlen(cstr));
3045        }
3046
3047
3048        /*!
3049        \brief
3050                Replace code points in the String with chars from the given char array.
3051
3052        \param idx
3053                Index of the first code point to be replaced
3054
3055        \param len
3056                Maximum number of code points to be replaced (if this is 0, operation is an insert at position \a idx)
3057
3058        \param chars
3059                char array containing the cars that are to replace the specified code points
3060
3061        \param chars_len
3062                Number of chars in the char array.
3063
3064        \return
3065                This String after the replace operation
3066
3067        \exception std::out_of_range    Thrown if \a idx is invalid for this String
3068        \exception std::length_error    Thrown if the resulting String would be too large, or if \a chars_len was 'npos'.
3069        */
3070        String& replace(size_type idx, size_type len, const char* chars, size_type chars_len)
3071        {
3072                if (d_cplength < idx)
3073                        throw std::out_of_range("Index is out of range for CEGUI::String");
3074
3075                if (chars_len == npos)
3076                        throw std::length_error("Length for the char array can not be 'npos'");
3077
3078                if (((len + idx) > d_cplength) || (len == npos))
3079                        len = d_cplength - idx;
3080
3081                size_type newsz = d_cplength + chars_len - len;
3082
3083                grow(newsz);
3084
3085                if ((idx + len) < d_cplength)
3086                        memmove(&ptr()[idx + chars_len], &ptr()[len + idx], (d_cplength - idx - len) * sizeof(utf32));
3087
3088                utf32* pt = &ptr()[idx + chars_len - 1];
3089
3090                while (chars_len--)
3091                        *pt-- = static_cast<utf32>(static_cast<unsigned char>(chars[chars_len]));
3092
3093                setlen(newsz);
3094                return *this;
3095        }
3096
3097
3098        /*!
3099        \brief
3100                Replace the code points in the range [beg, end) with chars from the given char array.
3101
3102        \note
3103                If \a beg == \a end, the operation is a insert at iterator position \a beg
3104
3105        \param beg
3106                Iterator describing the start of the range to be replaced
3107
3108        \param end
3109                Iterator describing the (exclusive) end of the range to be replaced.
3110
3111        \param chars
3112                char array containing the chars that are to replace the specified range of code points
3113
3114        \param chars_len
3115                Number of chars in the char array.
3116
3117        \return
3118                This String after the replace operation
3119
3120        \exception std::length_error    Thrown if the resulting String would be too large, or if \a chars_len was 'npos'.
3121        */
3122        String& replace(iterator iter_beg, iterator iter_end, const char* chars, size_type chars_len)
3123        {
3124                return replace(safe_iter_dif(iter_beg, begin()), safe_iter_dif(iter_end, iter_beg), chars, chars_len);
3125        }
3126
3127
3128        //////////////////////////////////////////////////////////////////////////
3129        // Find a code point
3130        //////////////////////////////////////////////////////////////////////////
3131        /*!
3132        \brief
3133                Search forwards for a given code point
3134
3135        \param code_point
3136                The utf32 code point to search for
3137
3138        \param idx
3139                Index of the code point where the search is to start.
3140
3141        \return
3142                - Index of the first occurrence of \a code_point travelling forwards from \a idx.
3143                - npos if the code point could not be found
3144        */
3145        size_type       find(utf32 code_point, size_type idx = 0) const
3146        {
3147                if (idx < d_cplength)
3148                {
3149                        const utf32* pt = &ptr()[idx];
3150
3151                        while (idx < d_cplength)
3152                        {
3153                                if (*pt++ == code_point)
3154                                        return idx;
3155
3156                                ++idx;
3157                        }
3158
3159                }
3160
3161                return npos;
3162        }
3163
3164        /*!
3165        \brief
3166                Search backwards for a given code point
3167
3168        \param code_point
3169                The utf32 code point to search for
3170
3171        \param idx
3172                Index of the code point where the search is to start.
3173
3174        \return
3175                - Index of the first occurrence of \a code_point travelling backwards from \a idx.
3176                - npos if the code point could not be found
3177        */
3178        size_type       rfind(utf32 code_point, size_type idx = npos) const
3179        {
3180                if (idx >= d_cplength)
3181                        idx = d_cplength - 1;
3182
3183                if (d_cplength > 0)
3184                {
3185                        const utf32* pt = &ptr()[idx];
3186
3187                        do
3188                        {
3189                                if (*pt-- == code_point)
3190                                        return idx;
3191
3192                        } while (idx-- != 0);
3193
3194                }
3195
3196                return npos;
3197        }
3198
3199        //////////////////////////////////////////////////////////////////////////
3200        // Find a substring
3201        //////////////////////////////////////////////////////////////////////////
3202        /*!
3203        \brief
3204                Search forwards for a sub-string
3205
3206        \param str
3207                String object describing the sub-string to search for
3208
3209        \param idx
3210                Index of the code point where the search is to start
3211
3212        \return
3213                - Index of the first occurrence of sub-string \a str travelling forwards from \a idx.
3214                - npos if the sub-string could not be found
3215        */
3216        size_type       find(const String& str, size_type idx = 0) const
3217        {
3218                if ((str.d_cplength == 0) && (idx < d_cplength))
3219                        return idx;
3220
3221                if (idx < d_cplength)
3222                {
3223                        // loop while search string could fit in to search area
3224                        while (d_cplength - idx >= str.d_cplength)
3225                        {
3226                                if (0 == compare(idx, str.d_cplength, str))
3227                                        return idx;
3228
3229                                ++idx;
3230                        }
3231
3232                }
3233
3234                return npos;
3235        }
3236
3237        /*!
3238        \brief
3239                Search backwards for a sub-string
3240
3241        \param str
3242                String object describing the sub-string to search for
3243
3244        \param idx
3245                Index of the code point where the search is to start
3246
3247        \return
3248                - Index of the first occurrence of sub-string \a str travelling backwards from \a idx.
3249                - npos if the sub-string could not be found
3250        */
3251        size_type       rfind(const String& str, size_type idx = npos) const
3252        {
3253                if (str.d_cplength == 0)
3254                        return (idx < d_cplength) ? idx : d_cplength;
3255
3256                if (str.d_cplength <= d_cplength)
3257                {
3258                        if (idx > (d_cplength - str.d_cplength))
3259                                idx = d_cplength - str.d_cplength;
3260
3261                        do
3262                        {
3263                                if (0 == compare(idx, str.d_cplength, str))
3264                                        return idx;
3265
3266                        } while (idx-- != 0);
3267
3268                }
3269
3270                return npos;
3271        }
3272
3273        /*!
3274        \brief
3275                Search forwards for a sub-string
3276
3277        \param std_str
3278                std::string object describing the sub-string to search for
3279
3280        \note
3281                Characters from \a std_str are considered to represent Unicode code points in the range 0x00..0xFF.  No translation of
3282                the encountered data is performed.
3283
3284        \param idx
3285                Index of the code point where the search is to start
3286
3287        \return
3288                - Index of the first occurrence of sub-string \a std_str travelling forwards from \a idx.
3289                - npos if the sub-string could not be found
3290        */
3291        size_type       find(const std::string& std_str, size_type idx = 0) const
3292        {
3293                std::string::size_type sze = std_str.size();
3294
3295                if ((sze == 0) && (idx < d_cplength))
3296                        return idx;
3297
3298                if (idx < d_cplength)
3299                {
3300                        // loop while search string could fit in to search area
3301                        while (d_cplength - idx >= sze)
3302                        {
3303                                if (0 == compare(idx, (size_type)sze, std_str))
3304                                        return idx;
3305
3306                                ++idx;
3307                        }
3308
3309                }
3310
3311                return npos;
3312        }
3313
3314        /*!
3315        \brief
3316                Search backwards for a sub-string
3317
3318        \param std_str
3319                std::string object describing the sub-string to search for
3320
3321        \note
3322                Characters from \a std_str are considered to represent Unicode code points in the range 0x00..0xFF.  No translation of
3323                the encountered data is performed.
3324
3325        \param idx
3326                Index of the code point where the search is to start
3327
3328        \return
3329                - Index of the first occurrence of sub-string \a std_str travelling backwards from \a idx.
3330                - npos if the sub-string could not be found
3331        */
3332        size_type       rfind(const std::string& std_str, size_type idx = npos) const
3333        {
3334                std::string::size_type sze = std_str.size();
3335
3336                if (sze == 0)
3337                        return (idx < d_cplength) ? idx : d_cplength;
3338
3339                if (sze <= d_cplength)
3340                {
3341                        if (idx > (d_cplength - sze))
3342                                idx = d_cplength - sze;
3343
3344                        do
3345                        {
3346                                if (0 == compare(idx, (size_type)sze, std_str))
3347                                        return idx;
3348
3349                        } while (idx-- != 0);
3350
3351                }
3352
3353                return npos;
3354        }
3355
3356        /*!
3357        \brief
3358                Search forwards for a sub-string
3359
3360        \param utf8_str
3361                Buffer containing null-terminated utf8 encoded data describing the sub-string to search for
3362
3363        \note
3364                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3365                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3366                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3367                results.
3368
3369        \param idx
3370                Index of the code point where the search is to start
3371
3372        \return
3373                - Index of the first occurrence of sub-string \a utf8_str travelling forwards from \a idx.
3374                - npos if the sub-string could not be found
3375
3376        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
3377        */
3378        size_type       find(const utf8* utf8_str, size_type idx = 0) const
3379        {
3380                return find(utf8_str, idx, utf_length(utf8_str));
3381        }
3382
3383        /*!
3384        \brief
3385                Search backwards for a sub-string
3386
3387        \param utf8_str
3388                Buffer containing null-terminated utf8 encoded data describing the sub-string to search for
3389
3390        \note
3391                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3392                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3393                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3394                results.
3395
3396        \param idx
3397                Index of the code point where the search is to start
3398
3399        \return
3400                - Index of the first occurrence of sub-string \a utf8_str travelling backwards from \a idx.
3401                - npos if the sub-string could not be found
3402
3403        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
3404        */
3405        size_type       rfind(const utf8* utf8_str, size_type idx = npos) const
3406        {
3407                return rfind(utf8_str, idx, utf_length(utf8_str));
3408        }
3409
3410        /*!
3411        \brief
3412                Search forwards for a sub-string
3413
3414        \param utf8_str
3415                Buffer containing utf8 encoded data describing the sub-string to search for
3416
3417        \note
3418                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3419                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3420                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3421                results.
3422
3423        \param idx
3424                Index of the code point where the search is to start
3425
3426        \param str_len
3427                Length of the utf8 encoded sub-string in utf8 code units (not code points)
3428
3429        \return
3430                - Index of the first occurrence of sub-string \a utf8_str travelling forwards from \a idx.
3431                - npos if the sub-string could not be found
3432
3433        \exception std::length_error    Thrown if \a str_len is 'npos'
3434        */
3435        size_type       find(const utf8* utf8_str, size_type idx, size_type str_len) const
3436        {
3437                if (str_len == npos)
3438                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
3439
3440                size_type sze = encoded_size(utf8_str, str_len);
3441
3442                if ((sze == 0) && (idx < d_cplength))
3443                        return idx;
3444
3445                if (idx < d_cplength)
3446                {
3447                        // loop while search string could fit in to search area
3448                        while (d_cplength - idx >= sze)
3449                        {
3450                                if (0 == compare(idx, sze, utf8_str, sze))
3451                                        return idx;
3452
3453                                ++idx;
3454                        }
3455
3456                }
3457
3458                return npos;
3459        }
3460
3461        /*!
3462        \brief
3463                Search backwards for a sub-string
3464
3465        \param utf8_str
3466                Buffer containing utf8 encoded data describing the sub-string to search for
3467
3468        \note
3469                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3470                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3471                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3472                results.
3473
3474        \param idx
3475                Index of the code point where the search is to start
3476
3477        \param str_len
3478                Length of the utf8 encoded sub-string in utf8 code units (not code points)
3479
3480        \return
3481                - Index of the first occurrence of sub-string \a utf8_str travelling backwards from \a idx.
3482                - npos if the sub-string could not be found
3483
3484        \exception std::length_error    Thrown if \a str_len is 'npos'
3485        */
3486        size_type       rfind(const utf8* utf8_str, size_type idx, size_type str_len) const
3487        {
3488                if (str_len == npos)
3489                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
3490
3491                size_type sze = encoded_size(utf8_str, str_len);
3492
3493                if (sze == 0)
3494                        return (idx < d_cplength) ? idx : d_cplength;
3495
3496                if (sze <= d_cplength)
3497                {
3498                        if (idx > (d_cplength - sze))
3499                                idx = d_cplength - sze;
3500
3501                        do
3502                        {
3503                                if (0 == compare(idx, sze, utf8_str, sze))
3504                                        return idx;
3505
3506                        } while (idx-- != 0);
3507
3508                }
3509
3510                return npos;
3511        }
3512
3513
3514        /*!
3515        \brief
3516                Search forwards for a sub-string
3517
3518        \param c_str
3519                c-string describing the sub-string to search for
3520
3521        \param idx
3522                Index of the code point where the search is to start
3523
3524        \return
3525                - Index of the first occurrence of sub-string \a c_str travelling forwards from \a idx.
3526                - npos if the sub-string could not be found
3527
3528        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
3529        */
3530        size_type       find(const char* cstr, size_type idx = 0) const
3531        {
3532                return find(cstr, idx, strlen(cstr));
3533        }
3534
3535
3536        /*!
3537        \brief
3538                Search backwards for a sub-string
3539
3540        \param c_str
3541                c-string describing the sub-string to search for
3542
3543        \param idx
3544                Index of the code point where the search is to start
3545
3546        \return
3547                - Index of the first occurrence of sub-string \a c_str travelling backwards from \a idx.
3548                - npos if the sub-string could not be found
3549
3550        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
3551        */
3552        size_type       rfind(const char* cstr, size_type idx = npos) const
3553        {
3554                return rfind(cstr, idx, strlen(cstr));
3555        }
3556
3557
3558        /*!
3559        \brief
3560                Search forwards for a sub-string
3561
3562        \param chars
3563                char array describing the sub-string to search for
3564
3565        \param idx
3566                Index of the code point where the search is to start
3567
3568        \param chars_len
3569                Number of chars in the char array.
3570
3571        \return
3572                - Index of the first occurrence of sub-string \a chars travelling forwards from \a idx.
3573                - npos if the sub-string could not be found
3574
3575        \exception std::length_error    Thrown if \a chars_len is 'npos'
3576        */
3577        size_type       find(const char* chars, size_type idx, size_type chars_len) const
3578        {
3579                if (chars_len == npos)
3580                        throw std::length_error("Length for char array can not be 'npos'");
3581
3582                if ((chars_len == 0) && (idx < d_cplength))
3583                        return idx;
3584
3585                if (idx < d_cplength)
3586                {
3587                        // loop while search string could fit in to search area
3588                        while (d_cplength - idx >= chars_len)
3589                        {
3590                                if (0 == compare(idx, chars_len, chars, chars_len))
3591                                        return idx;
3592
3593                                ++idx;
3594                        }
3595
3596                }
3597
3598                return npos;
3599        }
3600
3601
3602        /*!
3603        \brief
3604                Search backwards for a sub-string
3605
3606        \param chars
3607                char array describing the sub-string to search for
3608
3609        \param idx
3610                Index of the code point where the search is to start
3611
3612        \param chars_len
3613                Number of chars in the char array.
3614
3615        \return
3616                - Index of the first occurrence of sub-string \a chars travelling backwards from \a idx.
3617                - npos if the sub-string could not be found
3618
3619        \exception std::length_error    Thrown if \a chars_len is 'npos'
3620        */
3621        size_type       rfind(const char* chars, size_type idx, size_type chars_len) const
3622        {
3623                if (chars_len == npos)
3624                        throw std::length_error("Length for char array can not be 'npos'");
3625
3626                if (chars_len == 0)
3627                        return (idx < d_cplength) ? idx : d_cplength;
3628
3629                if (chars_len <= d_cplength)
3630                {
3631                        if (idx > (d_cplength - chars_len))
3632                                idx = d_cplength - chars_len;
3633
3634                        do
3635                        {
3636                                if (0 == compare(idx, chars_len, chars, chars_len))
3637                                        return idx;
3638
3639                        } while (idx-- != 0);
3640
3641                }
3642
3643                return npos;
3644        }
3645
3646
3647        //////////////////////////////////////////////////////////////////////////
3648        // Find first of different code-points
3649        //////////////////////////////////////////////////////////////////////////
3650        /*!
3651        \brief
3652                Find the first occurrence of one of a set of code points.
3653
3654        \param str
3655                String object describing the set of code points.
3656
3657        \param idx
3658                Index of the start point for the search
3659
3660        \return
3661                - Index of the first occurrence of any one of the code points in \a str starting from from \a idx.
3662                - npos if none of the code points in \a str were found.
3663        */
3664        size_type       find_first_of(const String& str, size_type idx = 0) const
3665        {
3666                if (idx < d_cplength)
3667                {
3668                        const utf32* pt = &ptr()[idx];
3669
3670                        do
3671                        {
3672                                if (npos != str.find(*pt++))
3673                                        return idx;
3674
3675                        } while (++idx != d_cplength);
3676
3677                }
3678
3679                return npos;
3680        }
3681
3682        /*!
3683        \brief
3684                Find the first code point that is not one of a set of code points.
3685
3686        \param str
3687                String object describing the set of code points.
3688
3689        \param idx
3690                Index of the start point for the search
3691
3692        \return
3693                - Index of the first code point that does not match any one of the code points in \a str starting from from \a idx.
3694                - npos if all code points matched one of the code points in \a str.
3695        */
3696        size_type       find_first_not_of(const String& str, size_type idx = 0) const
3697        {
3698                if (idx < d_cplength)
3699                {
3700                        const utf32* pt = &ptr()[idx];
3701
3702                        do
3703                        {
3704                                if (npos == str.find(*pt++))
3705                                        return idx;
3706
3707                        } while (++idx != d_cplength);
3708
3709                }
3710
3711                return npos;
3712        }
3713
3714
3715        /*!
3716        \brief
3717                Find the first occurrence of one of a set of code points.
3718
3719        \param std_str
3720                std::string object describing the set of code points.
3721
3722        \note
3723                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
3724                the provided data will occur.
3725
3726        \param idx
3727                Index of the start point for the search
3728
3729        \return
3730                - Index of the first occurrence of any one of the code points in \a std_str starting from from \a idx.
3731                - npos if none of the code points in \a std_str were found.
3732        */
3733        size_type       find_first_of(const std::string& std_str, size_type idx = 0) const
3734        {
3735                if (idx < d_cplength)
3736                {
3737                        const utf32* pt = &ptr()[idx];
3738
3739                        do
3740                        {
3741                                if (npos != find_codepoint(std_str, *pt++))
3742                                        return idx;
3743
3744                        } while (++idx != d_cplength);
3745
3746                }
3747
3748                return npos;
3749        }
3750
3751        /*!
3752        \brief
3753                Find the first code point that is not one of a set of code points.
3754
3755        \param std_str
3756                std::string object describing the set of code points.
3757
3758        \note
3759                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
3760                the provided data will occur.
3761
3762        \param idx
3763                Index of the start point for the search
3764
3765        \return
3766                - Index of the first code point that does not match any one of the code points in \a std_str starting from from \a idx.
3767                - npos if all code points matched one of the code points in \a std_str.
3768        */
3769        size_type       find_first_not_of(const std::string& std_str, size_type idx = 0) const
3770        {
3771                if (idx < d_cplength)
3772                {
3773                        const utf32* pt = &ptr()[idx];
3774
3775                        do
3776                        {
3777                                if (npos == find_codepoint(std_str, *pt++))
3778                                        return idx;
3779
3780                        } while (++idx != d_cplength);
3781
3782                }
3783
3784                return npos;
3785        }
3786
3787
3788        /*!
3789        \brief
3790                Find the first occurrence of one of a set of code points.
3791
3792        \param utf8_str
3793                Buffer containing null-terminated utf8 encoded data describing the set of code points.
3794
3795        \note
3796                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3797                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3798                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3799                results.
3800
3801        \param idx
3802                Index of the start point for the search
3803
3804        \return
3805                - Index of the first occurrence of any one of the code points in \a utf8_str starting from from \a idx.
3806                - npos if none of the code points in \a utf8_str were found.
3807
3808        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
3809        */
3810        size_type       find_first_of(const utf8* utf8_str, size_type idx = 0) const
3811        {
3812                return find_first_of(utf8_str, idx, utf_length(utf8_str));
3813        }
3814
3815        /*!
3816        \brief
3817                Find the first code point that is not one of a set of code points.
3818
3819        \param utf8_str
3820                Buffer containing null-terminated utf8 encoded data describing the set of code points.
3821
3822        \note
3823                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3824                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3825                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3826                results.
3827
3828        \param idx
3829                Index of the start point for the search
3830
3831        \return
3832                - Index of the first code point that does not match any one of the code points in \a utf8_str starting from from \a idx.
3833                - npos if all code points matched one of the code points in \a utf8_str.
3834
3835        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
3836        */
3837        size_type       find_first_not_of(const utf8* utf8_str, size_type idx = 0) const
3838        {
3839                return find_first_not_of(utf8_str, idx, utf_length(utf8_str));
3840        }
3841
3842        /*!
3843        \brief
3844                Find the first occurrence of one of a set of code points.
3845
3846        \param utf8_str
3847                Buffer containing utf8 encoded data describing the set of code points.
3848
3849        \note
3850                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3851                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3852                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3853                results.
3854
3855        \param idx
3856                Index of the start point for the search
3857
3858        \param str_len
3859                Length of the utf8 encoded data in utf8 code units (not code points).
3860
3861        \return
3862                - Index of the first occurrence of any one of the code points in \a utf8_str starting from from \a idx.
3863                - npos if none of the code points in \a utf8_str were found.
3864
3865        \exception std::length_error    Thrown if \a str_len was 'npos'.
3866        */
3867        size_type       find_first_of(const utf8* utf8_str, size_type idx, size_type str_len) const
3868        {
3869                if (str_len == npos)
3870                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
3871
3872                if (idx < d_cplength)
3873                {
3874                        size_type encsze = encoded_size(utf8_str, str_len);
3875
3876                        const utf32* pt = &ptr()[idx];
3877
3878                        do
3879                        {
3880                                if (npos != find_codepoint(utf8_str, encsze, *pt++))
3881                                        return idx;
3882
3883                        } while (++idx != d_cplength);
3884
3885                }
3886
3887                return npos;
3888        }
3889
3890        /*!
3891        \brief
3892                Find the first code point that is not one of a set of code points.
3893
3894        \param utf8_str
3895                Buffer containing utf8 encoded data describing the set of code points.
3896
3897        \note
3898                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
3899                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
3900                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
3901                results.
3902
3903        \param idx
3904                Index of the start point for the search
3905
3906        \param str_len
3907                Length of the utf8 encoded data in utf8 code units (not code points).
3908
3909        \return
3910                - Index of the first code point that does not match any one of the code points in \a utf8_str starting from from \a idx.
3911                - npos if all code points matched one of the code points in \a utf8_str.
3912
3913        \exception std::length_error    Thrown if \a str_len was 'npos'.
3914        */
3915        size_type       find_first_not_of(const utf8* utf8_str, size_type idx, size_type str_len) const
3916        {
3917                if (str_len == npos)
3918                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
3919
3920                if (idx < d_cplength)
3921                {
3922                        size_type encsze = encoded_size(utf8_str, str_len);
3923
3924                        const utf32* pt = &ptr()[idx];
3925
3926                        do
3927                        {
3928                                if (npos == find_codepoint(utf8_str, encsze, *pt++))
3929                                        return idx;
3930
3931                        } while (++idx != d_cplength);
3932
3933                }
3934
3935                return npos;
3936        }
3937
3938
3939        /*!
3940        \brief
3941                Search forwards for a given code point
3942
3943        \param code_point
3944                The utf32 code point to search for
3945
3946        \param idx
3947                Index of the code point where the search is to start.
3948
3949        \return
3950                - Index of the first occurrence of \a code_point starting from from \a idx.
3951                - npos if the code point could not be found
3952        */
3953        size_type       find_first_of(utf32 code_point, size_type idx = 0) const
3954        {
3955                return find(code_point, idx);
3956        }
3957
3958        /*!
3959        \brief
3960                Search forwards for the first code point that does not match a given code point
3961
3962        \param code_point
3963                The utf32 code point to search for
3964
3965        \param idx
3966                Index of the code point where the search is to start.
3967
3968        \return
3969                - Index of the first code point that does not match \a code_point starting from from \a idx.
3970                - npos if all code points matched \a code_point
3971
3972        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
3973        */
3974        size_type       find_first_not_of(utf32 code_point, size_type idx = 0) const
3975        {
3976                if (idx < d_cplength)
3977                {
3978                        do
3979                        {
3980                                if ((*this)[idx] != code_point)
3981                                        return idx;
3982
3983                        } while(idx++ < d_cplength);
3984
3985                }
3986
3987                return npos;
3988        }
3989
3990
3991        /*!
3992        \brief
3993                Find the first occurrence of one of a set of chars.
3994
3995        \param c_str
3996                c-string describing the set of chars.
3997
3998        \param idx
3999                Index of the start point for the search
4000
4001        \return
4002                - Index of the first occurrence of any one of the chars in \a c_str starting from from \a idx.
4003                - npos if none of the chars in \a c_str were found.
4004
4005        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
4006        */
4007        size_type       find_first_of(const char* cstr, size_type idx = 0) const
4008        {
4009                return find_first_of(cstr, idx, strlen(cstr));
4010        }
4011
4012
4013        /*!
4014        \brief
4015                Find the first code point that is not one of a set of chars.
4016
4017        \param c_str
4018                c-string describing the set of chars.
4019
4020        \param idx
4021                Index of the start point for the search
4022
4023        \return
4024                - Index of the first code point that does not match any one of the chars in \a c_str starting from from \a idx.
4025                - npos if all code points matched any of the chars in \a c_str.
4026
4027        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
4028        */
4029        size_type       find_first_not_of(const char* cstr, size_type idx = 0) const
4030        {
4031                return find_first_not_of(cstr, idx, strlen(cstr));
4032        }
4033
4034
4035        /*!
4036        \brief
4037                Find the first occurrence of one of a set of chars.
4038
4039        \param chars
4040                char array containing the set of chars.
4041
4042        \param idx
4043                Index of the start point for the search
4044
4045        \param chars_len
4046                Number of chars in the char array.
4047
4048        \return
4049                - Index of the first occurrence of any one of the chars in \a chars starting from from \a idx.
4050                - npos if none of the chars in \a chars were found.
4051
4052        \exception std::length_error    Thrown if \a chars_len was 'npos'.
4053        */
4054        size_type       find_first_of(const char* chars, size_type idx, size_type chars_len) const
4055        {
4056                if (chars_len == npos)
4057                        throw std::length_error("Length for char array can not be 'npos'");
4058
4059                if (idx < d_cplength)
4060                {
4061                        const utf32* pt = &ptr()[idx];
4062
4063                        do
4064                        {
4065                                if (npos != find_codepoint(chars, chars_len, *pt++))
4066                                        return idx;
4067
4068                        } while (++idx != d_cplength);
4069
4070                }
4071
4072                return npos;
4073        }
4074
4075
4076        /*!
4077        \brief
4078                Find the first code point that is not one of a set of chars.
4079
4080        \param chars
4081                char array containing the set of chars.
4082
4083        \param idx
4084                Index of the start point for the search
4085
4086        \param chars_len
4087                Number of chars in the car array.
4088
4089        \return
4090                - Index of the first code point that does not match any one of the chars in \a chars starting from from \a idx.
4091                - npos if all code points matched any of the chars in \a chars.
4092
4093        \exception std::length_error    Thrown if \a chars_len was 'npos'.
4094        */
4095        size_type       find_first_not_of(const char* chars, size_type idx, size_type chars_len) const
4096        {
4097                if (chars_len == npos)
4098                        throw std::length_error("Length for char array can not be 'npos'");
4099
4100                if (idx < d_cplength)
4101                {
4102                        const utf32* pt = &ptr()[idx];
4103
4104                        do
4105                        {
4106                                if (npos == find_codepoint(chars, chars_len, *pt++))
4107                                        return idx;
4108
4109                        } while (++idx != d_cplength);
4110
4111                }
4112
4113                return npos;
4114        }
4115
4116
4117        //////////////////////////////////////////////////////////////////////////
4118        // Find last of different code-points
4119        //////////////////////////////////////////////////////////////////////////
4120        /*!
4121        \brief
4122                Find the last occurrence of one of a set of code points.
4123
4124        \param str
4125                String object describing the set of code points.
4126
4127        \param idx
4128                Index of the start point for the search
4129
4130        \return
4131                - Index of the last occurrence of any one of the code points in \a str starting from \a idx.
4132                - npos if none of the code points in \a str were found.
4133        */
4134        size_type       find_last_of(const String& str, size_type idx = npos) const
4135        {
4136                if (d_cplength > 0)
4137                {
4138                        if (idx >= d_cplength)
4139                                idx = d_cplength - 1;
4140
4141                        const utf32* pt = &ptr()[idx];
4142
4143                        do
4144                        {
4145                                if (npos != str.find(*pt--))
4146                                        return idx;
4147
4148                        } while (idx-- != 0);
4149
4150                }
4151
4152                return npos;
4153        }
4154
4155        /*!
4156        \brief
4157                Find the last code point that is not one of a set of code points.
4158
4159        \param str
4160                String object describing the set of code points.
4161
4162        \param idx
4163                Index of the start point for the search
4164
4165        \return
4166                - Index of the last code point that does not match any one of the code points in \a str starting from \a idx.
4167                - npos if all code points matched one of the code points in \a str.
4168        */
4169        size_type       find_last_not_of(const String& str, size_type idx = npos) const
4170        {
4171                if (d_cplength > 0)
4172                {
4173                        if (idx >= d_cplength)
4174                                idx = d_cplength - 1;
4175
4176                        const utf32* pt = &ptr()[idx];
4177
4178                        do
4179                        {
4180                                if (npos == str.find(*pt--))
4181                                        return idx;
4182
4183                        } while (idx-- != 0);
4184
4185                }
4186
4187                return npos;
4188        }
4189
4190
4191        /*!
4192        \brief
4193                Find the last occurrence of one of a set of code points.
4194
4195        \param std_str
4196                std::string object describing the set of code points.
4197
4198        \note
4199                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
4200                the provided data will occur.
4201
4202        \param idx
4203                Index of the start point for the search
4204
4205        \return
4206                - Index of the last occurrence of any one of the code points in \a std_str starting from \a idx.
4207                - npos if none of the code points in \a std_str were found.
4208        */
4209        size_type       find_last_of(const std::string& std_str, size_type idx = npos) const
4210        {
4211                if (d_cplength > 0)
4212                {
4213                        if (idx >= d_cplength)
4214                                idx = d_cplength - 1;
4215
4216                        const utf32* pt = &ptr()[idx];
4217
4218                        do
4219                        {
4220                                if (npos != find_codepoint(std_str, *pt--))
4221                                        return idx;
4222
4223                        } while (idx-- != 0);
4224
4225                }
4226
4227                return npos;
4228        }
4229
4230        /*!
4231        \brief
4232                Find the last code point that is not one of a set of code points.
4233
4234        \param std_str
4235                std::string object describing the set of code points.
4236
4237        \note
4238                The characters of \a std_str are taken to be unencoded data which represent Unicode code points 0x00..0xFF.  No translation of
4239                the provided data will occur.
4240
4241        \param idx
4242                Index of the start point for the search
4243
4244        \return
4245                - Index of the last code point that does not match any one of the code points in \a std_str starting from \a idx.
4246                - npos if all code points matched one of the code points in \a std_str.
4247        */
4248        size_type       find_last_not_of(const std::string& std_str, size_type idx = npos) const
4249        {
4250                if (d_cplength > 0)
4251                {
4252                        if (idx >= d_cplength)
4253                                idx = d_cplength - 1;
4254
4255                        const utf32* pt = &ptr()[idx];
4256
4257                        do
4258                        {
4259                                if (npos == find_codepoint(std_str, *pt--))
4260                                        return idx;
4261
4262                        } while (idx-- != 0);
4263
4264                }
4265
4266                return npos;
4267        }
4268
4269
4270        /*!
4271        \brief
4272                Find the last occurrence of one of a set of code points.
4273
4274        \param utf8_str
4275                Buffer containing null-terminated utf8 encoded data describing the set of code points.
4276
4277        \note
4278                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
4279                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
4280                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
4281                results.
4282
4283        \param idx
4284                Index of the start point for the search
4285
4286        \return
4287                - Index of the last occurrence of any one of the code points in \a utf8_str starting from \a idx.
4288                - npos if none of the code points in \a utf8_str were found.
4289
4290        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
4291        */
4292        size_type       find_last_of(const utf8* utf8_str, size_type idx = npos) const
4293        {
4294                return find_last_of(utf8_str, idx, utf_length(utf8_str));
4295        }
4296
4297        /*!
4298        \brief
4299                Find the last code point that is not one of a set of code points.
4300
4301        \param utf8_str
4302                Buffer containing null-terminated utf8 encoded data describing the set of code points.
4303
4304        \note
4305                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
4306                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
4307                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
4308                results.
4309
4310        \param idx
4311                Index of the start point for the search
4312
4313        \return
4314                - Index of the last code point that does not match any one of the code points in \a utf8_str starting from \a idx.
4315                - npos if all code points matched one of the code points in \a utf8_str.
4316
4317        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
4318        */
4319        size_type       find_last_not_of(const utf8* utf8_str, size_type idx = npos) const
4320        {
4321                return find_last_not_of(utf8_str, idx, utf_length(utf8_str));
4322        }
4323
4324        /*!
4325        \brief
4326                Find the last occurrence of one of a set of code points.
4327
4328        \param utf8_str
4329                Buffer containing utf8 encoded data describing the set of code points.
4330
4331        \note
4332                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
4333                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
4334                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
4335                results.
4336
4337        \param idx
4338                Index of the start point for the search
4339
4340        \param str_len
4341                Length of the utf8 encoded data in utf8 code units (not code points).
4342
4343        \return
4344                - Index of the last occurrence of any one of the code points in \a utf8_str starting from from \a idx.
4345                - npos if none of the code points in \a utf8_str were found.
4346
4347        \exception std::length_error    Thrown if \a str_len was 'npos'.
4348        */
4349        size_type       find_last_of(const utf8* utf8_str, size_type idx, size_type str_len) const
4350        {
4351                if (str_len == npos)
4352                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
4353
4354                if (d_cplength > 0)
4355                {
4356                        if (idx >= d_cplength)
4357                                idx = d_cplength - 1;
4358
4359                        size_type encsze = encoded_size(utf8_str, str_len);
4360
4361                        const utf32* pt = &ptr()[idx];
4362
4363                        do
4364                        {
4365                                if (npos != find_codepoint(utf8_str, encsze, *pt--))
4366                                        return idx;
4367
4368                        } while (idx-- != 0);
4369
4370                }
4371
4372                return npos;
4373        }
4374
4375        /*!
4376        \brief
4377                Find the last code point that is not one of a set of code points.
4378
4379        \param utf8_str
4380                Buffer containing utf8 encoded data describing the set of code points.
4381
4382        \note
4383                A basic string literal (cast to utf8*) can be passed to this function, provided that the string is
4384                comprised only of code points 0x00..0x7f.  The use of extended ASCII characters (with values >0x7f)
4385                would result in incorrect behaviour as the String will attempt to 'decode' the data, with unpredictable
4386                results.
4387
4388        \param idx
4389                Index of the start point for the search
4390
4391        \param str_len
4392                Length of the utf8 encoded data in utf8 code units (not code points).
4393
4394        \return
4395                - Index of the last code point that does not match any one of the code points in \a utf8_str starting from from \a idx.
4396                - npos if all code points matched one of the code points in \a utf8_str.
4397
4398        \exception std::length_error    Thrown if \a str_len was 'npos'.
4399        */
4400        size_type       find_last_not_of(const utf8* utf8_str, size_type idx, size_type str_len) const
4401        {
4402                if (str_len == npos)
4403                        throw std::length_error("Length for utf8 encoded string can not be 'npos'");
4404
4405                if (d_cplength > 0)
4406                {
4407                        if (idx >= d_cplength)
4408                                idx = d_cplength - 1;
4409
4410                        size_type encsze = encoded_size(utf8_str, str_len);
4411
4412                        const utf32* pt = &ptr()[idx];
4413
4414                        do
4415                        {
4416                                if (npos == find_codepoint(utf8_str, encsze, *pt--))
4417                                        return idx;
4418
4419                        } while (idx-- != 0);
4420
4421                }
4422
4423                return npos;
4424        }
4425
4426
4427        /*!
4428        \brief
4429                Search for last occurrence of a given code point
4430
4431        \param code_point
4432                The utf32 code point to search for
4433
4434        \param idx
4435                Index of the code point where the search is to start.
4436
4437        \return
4438                - Index of the last occurrence of \a code_point starting from \a idx.
4439                - npos if the code point could not be found
4440        */
4441        size_type       find_last_of(utf32 code_point, size_type idx = npos) const
4442        {
4443                return rfind(code_point, idx);
4444        }
4445
4446        /*!
4447        \brief
4448                Search for the last code point that does not match a given code point
4449
4450        \param code_point
4451                The utf32 code point to search for
4452
4453        \param idx
4454                Index of the code point where the search is to start.
4455
4456        \return
4457                - Index of the last code point that does not match \a code_point starting from from \a idx.
4458                - npos if all code points matched \a code_point
4459        */
4460        size_type       find_last_not_of(utf32 code_point, size_type idx = npos) const
4461        {
4462                if (d_cplength > 0)
4463                {
4464                        if (idx >= d_cplength)
4465                                idx = d_cplength - 1;
4466
4467                        do
4468                        {
4469                                if ((*this)[idx] != code_point)
4470                                        return idx;
4471
4472                        } while(idx-- != 0);
4473
4474                }
4475
4476                return npos;
4477        }
4478
4479
4480        /*!
4481        \brief
4482                Find the last occurrence of one of a set of chars.
4483
4484        \param c_str
4485                c-string describing the set of chars.
4486
4487        \param idx
4488                Index of the start point for the search
4489
4490        \return
4491                - Index of the last occurrence of any one of the chars in \a c_str starting from \a idx.
4492                - npos if none of the chars in \a c_str were found.
4493
4494        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
4495        */
4496        size_type       find_last_of(const char* cstr, size_type idx = npos) const
4497        {
4498                return find_last_of(cstr, idx, strlen(cstr));
4499        }
4500
4501
4502        /*!
4503        \brief
4504                Find the last code point that is not one of a set of chars.
4505
4506        \param c_str
4507                c-string describing the set of chars.
4508
4509        \param idx
4510                Index of the start point for the search
4511
4512        \return
4513                - Index of the last code point that does not match any one of the chars in \a c_str starting from \a idx.
4514                - npos if all code points matched any of the chars in \a c_str.
4515
4516        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
4517        */
4518        size_type       find_last_not_of(const char* cstr, size_type idx = npos) const
4519        {
4520                return find_last_not_of(cstr, idx, strlen(cstr));
4521        }
4522
4523
4524        /*!
4525        \brief
4526                Find the last occurrence of one of a set of chars.
4527
4528        \param chars
4529                char array containing the set of chars.
4530
4531        \param idx
4532                Index of the start point for the search
4533
4534        \param chars_len
4535                Number of chars in the char array.
4536
4537        \return
4538                - Index of the last occurrence of any one of the chars in \a chars, starting from from \a idx.
4539                - npos if none of the chars in \a chars were found.
4540
4541        \exception std::length_error    Thrown if \a chars_len was 'npos'.
4542        */
4543        size_type       find_last_of(const char* chars, size_type idx, size_type chars_len) const
4544        {
4545                if (chars_len == npos)
4546                        throw std::length_error("Length for char array can not be 'npos'");
4547
4548                if (d_cplength > 0)
4549                {
4550                        if (idx >= d_cplength)
4551                                idx = d_cplength - 1;
4552
4553                        const utf32* pt = &ptr()[idx];
4554
4555                        do
4556                        {
4557                                if (npos != find_codepoint(chars, chars_len, *pt--))
4558                                        return idx;
4559
4560                        } while (idx-- != 0);
4561
4562                }
4563
4564                return npos;
4565        }
4566
4567
4568        /*!
4569        \brief
4570                Find the last code point that is not one of a set of chars.
4571
4572        \param chars
4573                char array containing the set of chars.
4574
4575        \param idx
4576                Index of the start point for the search
4577
4578        \param chars_len
4579                Number of chars in the char array.
4580
4581        \return
4582                - Index of the last code point that does not match any one of the chars in \a chars, starting from from \a idx.
4583                - npos if all code points matched any of the chars in \a chars.
4584
4585        \exception std::length_error    Thrown if \a chars_len was 'npos'.
4586        */
4587        size_type       find_last_not_of(const char* chars, size_type idx, size_type chars_len) const
4588        {
4589                if (chars_len == npos)
4590                        throw std::length_error("Length for char array can not be 'npos'");
4591
4592                if (d_cplength > 0)
4593                {
4594                        if (idx >= d_cplength)
4595                                idx = d_cplength - 1;
4596
4597                        const utf32* pt = &ptr()[idx];
4598
4599                        do
4600                        {
4601                                if (npos == find_codepoint(chars, chars_len, *pt--))
4602                                        return idx;
4603
4604                        } while (idx-- != 0);
4605
4606                }
4607
4608                return npos;
4609        }
4610
4611
4612        //////////////////////////////////////////////////////////////////////////
4613        // Substring
4614        //////////////////////////////////////////////////////////////////////////
4615        /*!
4616        \brief
4617                Returns a substring of this String.
4618
4619        \param idx
4620                Index of the first code point to use for the sub-string.
4621
4622        \param len
4623                Maximum number of code points to use for the sub-string
4624
4625        \return
4626                A String object containing the specified sub-string.
4627
4628        \exception std::out_of_range    Thrown if \a idx is invalid for this String.
4629        */
4630        String  substr(size_type idx = 0, size_type len = npos) const
4631        {
4632                if (d_cplength < idx)
4633                        throw std::out_of_range("Index is out of range for this CEGUI::String");
4634
4635                return String(*this, idx, len);
4636        }
4637
4638        //////////////////////////////////////////////////////////////////////////
4639        // Iterator creation
4640        //////////////////////////////////////////////////////////////////////////
4641        /*!
4642        \brief
4643                Return a forwards iterator that describes the beginning of the String
4644
4645        \return
4646                iterator object that describes the beginning of the String.
4647        */
4648        iterator                begin(void)
4649        {
4650                return iterator(ptr());
4651        }
4652
4653        /*!
4654        \brief
4655                Return a constant forwards iterator that describes the beginning of the String
4656
4657        \return
4658                const_iterator object that describes the beginning of the String.
4659        */
4660        const_iterator  begin(void) const
4661        {
4662                return const_iterator(ptr());
4663        }
4664
4665        /*!
4666        \brief
4667                Return a forwards iterator that describes the end of the String
4668
4669        \return
4670                iterator object that describes the end of the String.
4671        */
4672        iterator                end(void)
4673        {
4674                return iterator(&ptr()[d_cplength]);
4675        }
4676
4677        /*!
4678        \brief
4679                Return a constant forwards iterator that describes the end of the String
4680
4681        \return
4682                const_iterator object that describes the end of the String.
4683        */
4684        const_iterator  end(void) const
4685        {
4686                return const_iterator(&ptr()[d_cplength]);
4687        }
4688
4689        /*!
4690        \brief
4691                Return a reverse iterator that describes the beginning of the String
4692
4693        \return
4694                reverse_iterator object that describes the beginning of the String (so is actually at the end)
4695        */
4696        reverse_iterator                rbegin(void)
4697        {
4698                return reverse_iterator(end());
4699        }
4700
4701        /*!
4702        \brief
4703                Return a constant reverse iterator that describes the beginning of the String
4704
4705        \return
4706                const_reverse_iterator object that describes the beginning of the String (so is actually at the end)
4707        */
4708        const_reverse_iterator  rbegin(void) const
4709        {
4710                return const_reverse_iterator(end());
4711        }
4712
4713        /*!
4714        \brief
4715                Return a reverse iterator that describes the end of the String
4716
4717        \return
4718                reverse_iterator object that describes the end of the String (so is actually at the beginning)
4719        */
4720        reverse_iterator                rend(void)
4721        {
4722                return reverse_iterator(begin());
4723        }
4724
4725        /*!
4726        \brief
4727                Return a constant reverse iterator that describes the end of the String
4728
4729        \return
4730                const_reverse_iterator object that describes the end of the String (so is actually at the beginning)
4731        */
4732        const_reverse_iterator  rend(void) const
4733        {
4734                return const_reverse_iterator(begin());
4735        }
4736
4737private:
4738        /*************************************************************************
4739                Implementation Functions
4740        *************************************************************************/
4741        // string management
4742
4743        // change size of allocated buffer so it is at least 'new_size'.
4744        // May or may not cause re-allocation and copy of buffer if size is larger
4745        // will never re-allocate to make size smaller.  (see trim())
4746    bool        grow(size_type new_size);
4747
4748        // perform re-allocation to remove wasted space.
4749    void        trim(void);
4750
4751        // set the length of the string, and terminate it, according to the given value (will not re-allocate, use grow() first).
4752        void    setlen(size_type len)
4753        {
4754                d_cplength = len;
4755                ptr()[len] = (utf32)(0);
4756        }
4757
4758        // return a ptr to the buffer in use.
4759        utf32*  ptr(void)
4760        {
4761                return (d_reserve > STR_QUICKBUFF_SIZE) ? d_buffer : d_quickbuff;
4762        }
4763
4764        // return a ptr tot he buffer in use (const version)
4765        const utf32*    ptr(void) const
4766        {
4767                return (d_reserve > STR_QUICKBUFF_SIZE) ? d_buffer : d_quickbuff;
4768        }
4769
4770        // initialise string object
4771        void    init(void)
4772        {
4773                d_reserve                       = STR_QUICKBUFF_SIZE;
4774                d_encodedbuff           = NULL;
4775                d_encodedbufflen        = 0;
4776                d_encodeddatlen         = 0;
4777                setlen(0);
4778        }
4779
4780        // return true if the given pointer is inside the string data
4781        bool    inside(utf32* inptr)
4782        {
4783                if (inptr < ptr() || ptr() + d_cplength <= inptr)
4784                        return false;
4785                else
4786                        return true;
4787        }
4788
4789        // compute distance between two iterators, returning a 'safe' value
4790        size_type safe_iter_dif(const const_iterator& iter1, const const_iterator& iter2) const
4791        {
4792                return (iter1.d_ptr == 0) ? 0 : (iter1 - iter2);
4793        }
4794       
4795        // encoding functions
4796        // for all:
4797        //      src_len is in code units, or 0 for null terminated string.
4798        //      dest_len is in code units.
4799        //      returns number of code units put into dest buffer.
4800        size_type encode(const utf32* src, utf8* dest, size_type dest_len, size_type src_len = 0) const
4801        {
4802                // count length for null terminated source...
4803                if (src_len == 0)
4804                {
4805                        src_len = utf_length(src);
4806                }
4807
4808                size_type destCapacity = dest_len;
4809
4810                // while there is data in the source buffer,
4811                for (uint idx = 0; idx < src_len; ++idx)
4812                {
4813                        utf32   cp = src[idx];
4814
4815                        // check there is enough destination buffer to receive this encoded unit (exit loop & return if not)
4816                        if (destCapacity < encoded_size(cp))
4817                        {
4818                                break;
4819                        }
4820
4821                        if (cp < 0x80)
4822                        {
4823                                *dest++ = (utf8)cp;
4824                                --destCapacity;
4825                        }
4826                        else if (cp < 0x0800)
4827                        {
4828                                *dest++ = (utf8)((cp >> 6) | 0xC0);
4829                                *dest++ = (utf8)((cp & 0x3F) | 0x80);
4830                                destCapacity -= 2;
4831                        }
4832                        else if (cp < 0x10000)
4833                        {
4834                                *dest++ = (utf8)((cp >> 12) | 0xE0);
4835                                *dest++ = (utf8)(((cp >> 6) & 0x3F) | 0x80);
4836                                *dest++ = (utf8)((cp & 0x3F) | 0x80);
4837                                destCapacity -= 3;
4838                        }
4839                        else
4840                        {
4841                                *dest++ = (utf8)((cp >> 18) | 0xF0);
4842                                *dest++ = (utf8)(((cp >> 12) & 0x3F) | 0x80);
4843                                *dest++ = (utf8)(((cp >> 6) & 0x3F) | 0x80);
4844                                *dest++ = (utf8)((cp & 0x3F) | 0x80);
4845                                destCapacity -= 4;
4846                        }
4847
4848                }
4849
4850                return dest_len - destCapacity;
4851        }
4852
4853        size_type encode(const utf8* src, utf32* dest, size_type dest_len, size_type src_len = 0) const
4854        {
4855                // count length for null terminated source...
4856                if (src_len == 0)
4857                {
4858                        src_len = utf_length(src);
4859                }
4860
4861                size_type destCapacity = dest_len;
4862
4863                // while there is data in the source buffer, and space in the dest buffer
4864                for (uint idx = 0; ((idx < src_len) && (destCapacity > 0));)
4865                {
4866                        utf32   cp;
4867                        utf8    cu = src[idx++];
4868
4869                        if (cu < 0x80)
4870                        {
4871                                cp = (utf32)(cu);
4872                        }
4873                        else if (cu < 0xE0)
4874                        {
4875                                cp = ((cu & 0x1F) << 6);
4876                                cp |= (src[idx++] & 0x3F);
4877                        }
4878                        else if (cu < 0xF0)
4879                        {
4880                                cp = ((cu & 0x0F) << 12);
4881                                cp |= ((src[idx++] & 0x3F) << 6);
4882                                cp |= (src[idx++] & 0x3F);
4883                        }
4884                        else
4885                        {
4886                                cp = ((cu & 0x07) << 18);
4887                                cp |= ((src[idx++] & 0x3F) << 12);
4888                                cp |= ((src[idx++] & 0x3F) << 6);
4889                                cp |= (src[idx++] & 0x3F);
4890                        }
4891
4892                        *dest++ = cp;
4893                        --destCapacity;
4894                }
4895
4896                return dest_len - destCapacity;
4897        }
4898
4899        // return the number of utf8 code units required to encode the given utf32 code point
4900        size_type encoded_size(utf32 code_point) const
4901        {
4902                if (code_point < 0x80)
4903                        return 1;
4904                else if (code_point < 0x0800)
4905                        return 2;
4906                else if (code_point < 0x10000)
4907                        return 3;
4908                else
4909                        return 4;
4910        }
4911
4912        // return number of code units required to re-encode given null-terminated utf32 data as utf8.  return does not include terminating null.
4913        size_type encoded_size(const utf32* buf) const
4914        {
4915                return encoded_size(buf, utf_length(buf));
4916        }
4917
4918        // return number of code units required to re-encode given utf32 data as utf8.   len is number of code units in 'buf'.
4919        size_type encoded_size(const utf32* buf, size_type len) const
4920        {
4921                size_type count = 0;
4922
4923                while (len--)
4924                {
4925                        count += encoded_size(*buf++);
4926                }
4927
4928                return count;
4929        }
4930
4931        // return number of utf32 code units required to re-encode given utf8 data as utf32.  return does not include terminating null.
4932        size_type encoded_size(const utf8* buf) const
4933        {
4934                return encoded_size(buf, utf_length(buf));
4935        }
4936
4937        // return number of utf32 code units required to re-encode given utf8 data as utf32.  len is number of code units in 'buf'.
4938        size_type encoded_size(const utf8* buf, size_type len) const
4939        {
4940                utf8 tcp;
4941                size_type count = 0;
4942
4943                while (len--)
4944                {
4945                        tcp = *buf++;
4946                        ++count;
4947
4948                        if (tcp < 0x80)
4949                        {
4950                        }
4951                        else if (tcp < 0xE0)
4952                        {
4953                                --len;
4954                                ++buf;
4955                        }
4956                        else if (tcp < 0xF0)
4957                        {
4958                                len -= 2;
4959                                buf += 2;
4960                        }
4961                        else
4962                        {
4963                                len -= 2;
4964                                buf += 3;
4965                        }
4966
4967                }
4968
4969                return count;
4970        }
4971
4972        // return number of code units in a null terminated string
4973        size_type utf_length(const utf8* utf8_str) const
4974        {
4975                size_type cnt = 0;
4976                while (*utf8_str++)
4977                        cnt++;
4978
4979                return cnt;
4980        }
4981
4982        // return number of code units in a null terminated string
4983        size_type utf_length(const utf32* utf32_str) const
4984        {
4985                size_type cnt = 0;
4986                while (*utf32_str++)
4987                        cnt++;
4988
4989                return cnt;
4990        }
4991
4992        // build an internal buffer with the string encoded as utf8 (remains valid until string is modified).
4993    utf8* build_utf8_buff(void) const;
4994
4995        // compare two utf32 buffers
4996        int     utf32_comp_utf32(const utf32* buf1, const utf32* buf2, size_type cp_count) const
4997        {
4998                if (!cp_count)
4999                        return 0;
5000
5001                while ((--cp_count) && (*buf1 == *buf2))
5002                        buf1++, buf2++;
5003               
5004                return *buf1 - *buf2;
5005        }
5006
5007        // compare utf32 buffer with char buffer (chars are taken to be code-points in the range 0x00-0xFF)
5008        int utf32_comp_char(const utf32* buf1, const char* buf2, size_type cp_count) const
5009        {
5010                if (!cp_count)
5011                        return 0;
5012
5013                while ((--cp_count) && (*buf1 == static_cast<utf32>(static_cast<unsigned char>(*buf2))))
5014                        buf1++, buf2++;
5015
5016                return *buf1 - static_cast<utf32>(static_cast<unsigned char>(*buf2));
5017        }
5018
5019        // compare utf32 buffer with encoded utf8 data
5020        int utf32_comp_utf8(const utf32* buf1, const utf8* buf2, size_type cp_count) const
5021        {
5022                if (!cp_count)
5023                        return 0;
5024
5025                utf32   cp;
5026                utf8    cu;
5027
5028                do
5029                {
5030                        cu = *buf2++;
5031
5032                        if (cu < 0x80)
5033                        {
5034                                cp = (utf32)(cu);
5035                        }
5036                        else if (cu < 0xE0)
5037                        {
5038                                cp = ((cu & 0x1F) << 6);
5039                                cp |= (*buf2++ & 0x3F);
5040                        }
5041                        else if (cu < 0xF0)
5042                        {
5043                                cp = ((cu & 0x0F) << 12);
5044                                cp |= ((*buf2++ & 0x3F) << 6);
5045                                cp |= (*buf2++ & 0x3F);
5046                        }
5047                        else
5048                        {
5049                                cp = ((cu & 0x07) << 18);
5050                                cp |= ((*buf2++ & 0x3F) << 12);
5051                                cp |= ((*buf2++ & 0x3F) << 6);
5052                                cp |= (*buf2++ & 0x3F);
5053                        }
5054
5055                } while ((*buf1++ == cp) && (--cp_count));
5056
5057                return (*--buf1) - cp;
5058        }
5059
5060        // return index of first occurrence of 'code_point' in std::string 'str', or npos if none
5061        size_type find_codepoint(const std::string& str, utf32 code_point) const
5062        {
5063                size_type idx = 0, sze = (size_type)str.size();
5064
5065                while (idx != sze)
5066                {
5067                        if (code_point == static_cast<utf32>(static_cast<unsigned char>(str[idx])))
5068                                return idx;
5069
5070                        ++idx;
5071                }
5072
5073                return npos;
5074        }
5075
5076        // return index of first occurrence of 'code_point' in utf8 encoded string 'str', or npos if none.  len is in code points.
5077        size_type find_codepoint(const utf8* str, size_type len, utf32 code_point) const
5078        {
5079                size_type idx = 0;
5080
5081                utf32   cp;
5082                utf8    cu;
5083
5084                while (idx != len) {
5085                        cu = *str++;
5086
5087                        if (cu < 0x80)
5088                        {
5089                                cp = (utf32)(cu);
5090                        }
5091                        else if (cu < 0xE0)
5092                        {
5093                                cp = ((cu & 0x1F) << 6);
5094                                cp |= (*str++ & 0x3F);
5095                        }
5096                        else if (cu < 0xF0)
5097                        {
5098                                cp = ((cu & 0x0F) << 12);
5099                                cp |= ((*str++ & 0x3F) << 6);
5100                                cp |= (*str++ & 0x3F);
5101                        }
5102                        else
5103                        {
5104                                cp = ((cu & 0x07) << 18);
5105                                cp |= ((*str++ & 0x3F) << 12);
5106                                cp |= ((*str++ & 0x3F) << 6);
5107                                cp |= (*str++ & 0x3F);
5108                        }
5109
5110                        if (code_point == cp)
5111                                return idx;
5112
5113                        ++idx;
5114                }
5115
5116                return npos;
5117        }
5118
5119
5120        // return index of first occurrence of 'code_point' in char array 'chars', or npos if none
5121        size_type find_codepoint(const char* chars, size_type chars_len, utf32 code_point) const
5122        {
5123                for (size_type idx = 0; idx != chars_len; ++idx)
5124                {
5125                        if (code_point == static_cast<utf32>(static_cast<unsigned char>(chars[idx])))
5126                                return idx;
5127                }
5128
5129                return npos;
5130        }
5131
5132};
5133
5134
5135//////////////////////////////////////////////////////////////////////////
5136// Comparison operators
5137//////////////////////////////////////////////////////////////////////////
5138/*!
5139\brief
5140        Return true if String \a str1 is equal to String \a str2
5141*/
5142bool CEGUIEXPORT        operator==(const String& str1, const String& str2);
5143
5144/*!
5145\brief
5146        Return true if String \a str is equal to std::string \a std_str
5147*/
5148bool CEGUIEXPORT        operator==(const String& str, const std::string& std_str);
5149
5150/*!
5151\brief
5152        Return true if String \a str is equal to std::string \a std_str
5153*/
5154bool CEGUIEXPORT        operator==(const std::string& std_str, const String& str);
5155
5156/*!
5157\brief
5158        Return true if String \a str is equal to null-terminated utf8 data \a utf8_str
5159*/
5160bool CEGUIEXPORT        operator==(const String& str, const utf8* utf8_str);
5161
5162/*!
5163\brief
5164        Return true if String \a str is equal to null-terminated utf8 data \a utf8_str
5165*/
5166bool CEGUIEXPORT        operator==(const utf8* utf8_str, const String& str);
5167
5168/*!
5169\brief
5170        Return true if String \a str1 is not equal to String \a str2
5171*/
5172bool CEGUIEXPORT        operator!=(const String& str1, const String& str2);
5173
5174/*!
5175\brief
5176        Return true if String \a str is not equal to std::string \a std_str
5177*/
5178bool CEGUIEXPORT        operator!=(const String& str, const std::string& std_str);
5179
5180/*!
5181\brief
5182        Return true if String \a str is not equal to std::string \a std_str
5183*/
5184bool CEGUIEXPORT        operator!=(const std::string& std_str, const String& str);
5185
5186/*!
5187\brief
5188        Return true if String \a str is not equal to null-terminated utf8 data \a utf8_str
5189*/
5190bool CEGUIEXPORT        operator!=(const String& str, const utf8* utf8_str);
5191
5192/*!
5193\brief
5194        Return true if String \a str is not equal to null-terminated utf8 data \a utf8_str
5195*/
5196bool CEGUIEXPORT        operator!=(const utf8* utf8_str, const String& str);
5197
5198/*!
5199\brief
5200        Return true if String \a str1 is lexicographically less than String \a str2
5201*/
5202bool CEGUIEXPORT        operator<(const String& str1, const String& str2);
5203
5204/*!
5205\brief
5206        Return true if String \a str is lexicographically less than std::string \a std_str
5207*/
5208bool CEGUIEXPORT        operator<(const String& str, const std::string& std_str);
5209
5210/*!
5211\brief
5212        Return true if String \a str is lexicographically less than std::string \a std_str
5213*/
5214bool CEGUIEXPORT        operator<(const std::string& std_str, const String& str);
5215
5216/*!
5217\brief
5218        Return true if String \a str is lexicographically less than null-terminated utf8 data \a utf8_str
5219*/
5220bool CEGUIEXPORT        operator<(const String& str, const utf8* utf8_str);
5221
5222/*!
5223\brief
5224        Return true if String \a str is lexicographically less than null-terminated utf8 data \a utf8_str
5225*/
5226bool CEGUIEXPORT        operator<(const utf8* utf8_str, const String& str);
5227
5228/*!
5229\brief
5230        Return true if String \a str1 is lexicographically greater than String \a str2
5231*/
5232bool CEGUIEXPORT        operator>(const String& str1, const String& str2);
5233
5234/*!
5235\brief
5236        Return true if String \a str is lexicographically greater than std::string \a std_str
5237*/
5238bool CEGUIEXPORT        operator>(const String& str, const std::string& std_str);
5239
5240/*!
5241\brief
5242        Return true if String \a str is lexicographically greater than std::string \a std_str
5243*/
5244bool CEGUIEXPORT        operator>(const std::string& std_str, const String& str);
5245
5246/*!
5247\brief
5248        Return true if String \a str is lexicographically greater than null-terminated utf8 data \a utf8_str
5249*/
5250bool CEGUIEXPORT        operator>(const String& str, const utf8* utf8_str);
5251
5252/*!
5253\brief
5254        Return true if String \a str is lexicographically greater than null-terminated utf8 data \a utf8_str
5255*/
5256bool CEGUIEXPORT        operator>(const utf8* utf8_str, const String& str);
5257
5258/*!
5259\brief
5260        Return true if String \a str1 is lexicographically less than or equal to String \a str2
5261*/
5262bool CEGUIEXPORT        operator<=(const String& str1, const String& str2);
5263
5264/*!
5265\brief
5266        Return true if String \a str is lexicographically less than or equal to std::string \a std_str
5267*/
5268bool CEGUIEXPORT        operator<=(const String& str, const std::string& std_str);
5269
5270/*!
5271\brief
5272        Return true if String \a str is lexicographically less than or equal to std::string \a std_str
5273*/
5274bool CEGUIEXPORT        operator<=(const std::string& std_str, const String& str);
5275
5276/*!
5277\brief
5278        Return true if String \a str is lexicographically less than or equal to null-terminated utf8 data \a utf8_str
5279*/
5280bool CEGUIEXPORT        operator<=(const String& str, const utf8* utf8_str);
5281
5282/*!
5283\brief
5284        Return true if String \a str is lexicographically less than or equal to null-terminated utf8 data \a utf8_str
5285*/
5286bool CEGUIEXPORT        operator<=(const utf8* utf8_str, const String& str);
5287
5288/*!
5289\brief
5290        Return true if String \a str1 is lexicographically greater than or equal to String \a str2
5291*/
5292bool CEGUIEXPORT        operator>=(const String& str1, const String& str2);
5293
5294/*!
5295\brief
5296        Return true if String \a str is lexicographically greater than or equal to std::string \a std_str
5297*/
5298bool CEGUIEXPORT        operator>=(const String& str, const std::string& std_str);
5299
5300/*!
5301\brief
5302        Return true if String \a str is lexicographically greater than or equal to std::string \a std_str
5303*/
5304bool CEGUIEXPORT        operator>=(const std::string& std_str, const String& str);
5305
5306/*!
5307\brief
5308        Return true if String \a str is lexicographically greater than or equal to null-terminated utf8 data \a utf8_str
5309*/
5310bool CEGUIEXPORT        operator>=(const String& str, const utf8* utf8_str);
5311
5312/*!
5313\brief
5314        Return true if String \a str is lexicographically greater than or equal to null-terminated utf8 data \a utf8_str
5315*/
5316bool CEGUIEXPORT        operator>=(const utf8* utf8_str, const String& str);
5317
5318/*!
5319\brief
5320        Return true if String \a str is equal to c-string \a c_str
5321*/
5322bool CEGUIEXPORT        operator==(const String& str, const char* c_str);
5323
5324/*!
5325\brief
5326        Return true if c-string \a c_str is equal to String \a str
5327*/
5328bool CEGUIEXPORT        operator==(const char* c_str, const String& str);
5329
5330/*!
5331\brief
5332        Return true if String \a str is not equal to c-string \a c_str
5333*/
5334bool CEGUIEXPORT        operator!=(const String& str, const char* c_str);
5335
5336/*!
5337\brief
5338        Return true if c-string \a c_str is not equal to String \a str
5339*/
5340bool CEGUIEXPORT        operator!=(const char* c_str, const String& str);
5341
5342/*!
5343\brief
5344        Return true if String \a str is lexicographically less than c-string \a c_str
5345*/
5346bool CEGUIEXPORT        operator<(const String& str, const char* c_str);
5347
5348/*!
5349\brief
5350        Return true if c-string \a c_str is lexicographically less than String \a str
5351*/
5352bool CEGUIEXPORT        operator<(const char* c_str, const String& str);
5353
5354/*!
5355\brief
5356Return true if String \a str is lexicographically greater than c-string \a c_str
5357*/
5358bool CEGUIEXPORT        operator>(const String& str, const char* c_str);
5359
5360/*!
5361\brief
5362Return true if c-string \a c_str is lexicographically greater than String \a str
5363*/
5364bool CEGUIEXPORT        operator>(const char* c_str, const String& str);
5365
5366/*!
5367\brief
5368        Return true if String \a str is lexicographically less than or equal to c-string \a c_str
5369*/
5370bool CEGUIEXPORT        operator<=(const String& str, const char* c_str);
5371
5372/*!
5373\brief
5374        Return true if c-string \a c_str is lexicographically less than or equal to String \a str
5375*/
5376bool CEGUIEXPORT        operator<=(const char* c_str, const String& str);
5377
5378/*!
5379\brief
5380        Return true if String \a str is lexicographically greater than or equal to c-string \a c_str
5381*/
5382bool CEGUIEXPORT        operator>=(const String& str, const char* c_str);
5383
5384/*!
5385\brief
5386        Return true if c-string \a c_str is lexicographically greater than or equal to String \a str
5387*/
5388bool CEGUIEXPORT        operator>=(const char* c_str, const String& str);
5389
5390//////////////////////////////////////////////////////////////////////////
5391// Concatenation operator functions
5392//////////////////////////////////////////////////////////////////////////
5393/*!
5394\brief
5395        Return String object that is the concatenation of the given inputs
5396
5397\param str1
5398        String object describing first part of the new string
5399
5400\param str2
5401        String object describing the second part of the new string
5402
5403\return
5404        A String object that is the concatenation of \a str1 and \a str2
5405
5406\exception std::length_error    Thrown if the resulting String would be too large.
5407*/
5408String CEGUIEXPORT      operator+(const String& str1, const String& str2);
5409
5410/*!
5411\brief
5412        Return String object that is the concatenation of the given inputs
5413
5414\param str
5415        String object describing first part of the new string
5416
5417\param std_str
5418        std::string object describing the second part of the new string
5419
5420\return
5421        A String object that is the concatenation of \a str and \a std_str
5422
5423\exception std::length_error    Thrown if the resulting String would be too large.
5424*/
5425String CEGUIEXPORT      operator+(const String& str, const std::string& std_str);
5426
5427/*!
5428\brief
5429        Return String object that is the concatenation of the given inputs
5430
5431\param std_str
5432        std::string object describing the first part of the new string
5433
5434\param str
5435        String object describing the second part of the new string
5436
5437\return
5438        A String object that is the concatenation of \a std_str and \a str
5439
5440\exception std::length_error    Thrown if the resulting String would be too large.
5441*/
5442String CEGUIEXPORT      operator+(const std::string& std_str, const String& str);
5443
5444/*!
5445\brief
5446        Return String object that is the concatenation of the given inputs
5447
5448\param str
5449        String object describing first part of the new string
5450
5451\param utf8_str
5452        Buffer containing null-terminated utf8 encoded data describing the second part of the new string
5453
5454\return
5455        A String object that is the concatenation of \a str and \a utf8_str
5456
5457\exception std::length_error    Thrown if the resulting String would be too large.
5458*/
5459String CEGUIEXPORT      operator+(const String& str, const utf8* utf8_str);
5460
5461/*!
5462\brief
5463        Return String object that is the concatenation of the given inputs
5464
5465\param utf8_str
5466        Buffer containing null-terminated utf8 encoded data describing the first part of the new string
5467
5468\param str
5469        String object describing the second part of the new string
5470
5471\return
5472        A String object that is the concatenation of \a str and \a utf8_str
5473
5474\exception std::length_error    Thrown if the resulting String would be too large.
5475*/
5476String CEGUIEXPORT      operator+(const utf8* utf8_str, const String& str);
5477
5478/*!
5479\brief
5480        Return String object that is the concatenation of the given inputs
5481
5482\param str
5483        String object describing the first part of the new string
5484
5485\param code_point
5486        utf32 code point describing the second part of the new string
5487
5488\return
5489        A String object that is the concatenation of \a str and \a code_point
5490
5491\exception std::length_error    Thrown if the resulting String would be too large.
5492*/
5493String CEGUIEXPORT      operator+(const String& str, utf32 code_point);
5494
5495/*!
5496\brief
5497        Return String object that is the concatenation of the given inputs
5498
5499\param code_point
5500        utf32 code point describing the first part of the new string
5501
5502\param str
5503        String object describing the second part of the new string
5504
5505\return
5506        A String object that is the concatenation of \a code_point and \a str
5507
5508\exception std::length_error    Thrown if the resulting String would be too large.
5509*/
5510String CEGUIEXPORT      operator+(utf32 code_point, const String& str);
5511
5512/*!
5513\brief
5514        Return String object that is the concatenation of the given inputs
5515
5516\param str
5517        String object describing first part of the new string
5518
5519\param c_str
5520        c-string describing the second part of the new string
5521
5522\return
5523        A String object that is the concatenation of \a str and \a c_str
5524
5525\exception std::length_error    Thrown if the resulting String would be too large.
5526*/
5527String CEGUIEXPORT      operator+(const String& str, const char* c_str);
5528
5529/*!
5530\brief
5531        Return String object that is the concatenation of the given inputs
5532
5533\param c_str
5534        c-string describing the first part of the new string
5535
5536\param str
5537        String object describing the second part of the new string
5538
5539\return
5540        A String object that is the concatenation of \a c_str and \a str
5541
5542\exception std::length_error    Thrown if the resulting String would be too large.
5543*/
5544String CEGUIEXPORT      operator+(const char* c_str, const String& str);
5545
5546
5547//////////////////////////////////////////////////////////////////////////
5548// Output (stream) functions
5549//////////////////////////////////////////////////////////////////////////
5550CEGUIEXPORT std::ostream& operator<<(std::ostream& s, const String& str);
5551
5552
5553//////////////////////////////////////////////////////////////////////////
5554// Modifying operations
5555//////////////////////////////////////////////////////////////////////////
5556/*!
5557\brief
5558        Swap the contents for two String objects
5559
5560\param str1
5561        String object who's contents are to be swapped with \a str2
5562
5563\param str2
5564        String object who's contents are to be swapped with \a str1
5565
5566\return
5567        Nothing
5568*/
5569void CEGUIEXPORT swap(String& str1, String& str2);
5570
5571
5572} // End of  CEGUI namespace section
5573
5574
5575#endif  // end of guard _CEGUIString_h_
Note: See TracBrowser for help on using the repository browser.