libUTL++
|
Regular expression. More...
#include <Regex.h>
Public Member Functions | |
Regex (const String ®ex) | |
Constructor. More... | |
Regex (const char *str) | |
Constructor. More... | |
virtual int | compare (const Object &rhs) const |
Compare with another object. More... | |
virtual void | copy (const Object &rhs) |
Copy another instance. More... | |
virtual void | steal (Object &rhs) |
"Steal" the internal representation from another instance. More... | |
bool | compile () |
Compile the regex. More... | |
bool | match (const String &str, RegexMatch *m=nullptr, size_t idx=0) const |
Attempt to match the given string against the regex. More... | |
size_t | searchReplace (String &str, const String &rep) |
At each position within str, attempt to match the regex. More... | |
bool | ok () const |
Determine whether the regex is successfully compiled. More... | |
Public Member Functions inherited from utl::String | |
String (const char *s, bool owner=true, bool duplicate=true, bool caseSensitive=true) | |
Constructor. More... | |
String (size_t size) | |
Constructor. More... | |
String (char c) | |
Constructor. More... | |
virtual size_t | hash (size_t size) const |
Get the hash code for the object. More... | |
virtual void | serialize (Stream &stream, uint_t io, uint_t mode=ser_default) |
Serialize to or from a stream. More... | |
virtual size_t | innerAllocatedSize () const |
Get the "inner" allocated size. More... | |
virtual String | toString () const |
Return a string representation of self. More... | |
bool | isOwner () const |
Get the ownership flag. More... | |
void | setOwner (bool owner) |
Set the ownership flag (without doing anything else). More... | |
bool | isCaseSensitive () const |
Get the case-sensitive flag. More... | |
void | setCaseSensitive (bool caseSensitive) |
Set the case-sensitive flag. More... | |
bool | isMarked () const |
Get the marked flag. More... | |
void | setMarked (bool marked=true) |
Set the marked flag. More... | |
int | compareSubstring (size_t begin, const String &rhs, size_t n) |
Compare the given string against a substring of self. More... | |
int | compareSubstring (size_t begin, const char *rhs, size_t len) |
Compare the given string against a substring of self. More... | |
int | comparePrefix (const String &rhs) const |
Compare the given string against the beginning of self. More... | |
int | comparePrefix (const char *rhs) const |
Compare the given string against the beginning of self. More... | |
int | compareSuffix (const String &rhs) const |
Compare the given string against the end of self. More... | |
int | compareSuffix (const char *rhs) const |
Compare the given string against the end of self. More... | |
int | strcmp (const char *lhs, const char *rhs) const |
Compare two strings (case sensitive comparison iff isCaseSensitive()). More... | |
std::function< int(const char *, const char *)> | strcmp () const |
Get the string comparison function (strcmp or strcasecmp). More... | |
int | strncmp (const char *lhs, const char *rhs, size_t n) const |
Compare (up to) the first n bytes of two strings (case sensitive iff isCaseSensitive()). More... | |
std::function< int(const char *, const char *, size_t)> | strncmp () const |
Get the length-limited string comparison function (strncmp or strncasecmp). More... | |
String | backslashEscaped (const char *specials) const |
Backslash-escape a string. More... | |
String | backslashEscaped (const String &specials) const |
Backslash-escape a string. More... | |
String | backslashUnescaped () const |
Backslash-unescape a string. More... | |
bool | empty () const |
Is the string empty? | |
size_t | find (const String &str, size_t begin=0) const |
Find the first instance of the given string in self. More... | |
size_t | find (const char *str, size_t begin=0) const |
Find the first instance of the given string in self. More... | |
size_t | find (char c, size_t begin=0) const |
Find the first instance of the given character in self. More... | |
size_t | findBM (const String &str, size_t begin=0) const |
Use Boyer-Moore algorithm to find the first instance of the given string. More... | |
size_t | findBM (const char *str, size_t begin=0) const |
Use Boyer-Moore algorithm to find the first instance of the given string. More... | |
char | firstChar () const |
Get the first character in the string (nul if empty). More... | |
char | lastChar () const |
Get the last character in the string (nul if empty). More... | |
const char * | get () const |
Get const char*. More... | |
size_t | length () const |
Get the length of the string. More... | |
void | lengthInvalidate () const |
Invalidate the cached length of the string. More... | |
String | prefix (size_t n) const |
Get the first n characters of the string. More... | |
size_t | size () const |
Get the size of the allocated character array. More... | |
String | subString (size_t begin, size_t len=size_t_max) const |
Get a substring. More... | |
String | suffix (size_t n) const |
Get the last n characters of the string. More... | |
String | reversed () const |
Get a copy of self with the characters in reverse order. More... | |
String | nextToken (size_t &idx, char delim=' ', bool processQuotes=false) const |
Return the next token in a delimited string. More... | |
String & | clear () |
Reset to empty string. More... | |
String & | excise () |
Like clear(), but de-allocate the array instead of writing a nul character at position 0. | |
char * | get () |
Get char*. More... | |
char | get (size_t i) const |
Get the specified character. More... | |
String & | assertOwner () |
Make sure self has its own copy of the string. More... | |
void | assertOwner (size_t size, size_t increment=8) |
Make sure self has its own copy of the string. More... | |
void | economize () |
Make sure no extra space is allocated. More... | |
void | reserve (size_t size, size_t increment=8) |
Grow to the specified size. More... | |
String & | append (char c) |
Append the given character. More... | |
String & | append (const String &s) |
Append the given string. More... | |
String & | append (const char *s) |
Append the given string. More... | |
String & | append (const char *s, size_t len) |
Append the given string. More... | |
String | chop (size_t begin, size_t len=size_t_max) |
Chop out and return a sub-string. More... | |
void | remove (size_t begin, size_t len=size_t_max) |
Remove a sub-string (like chop(), but doesn't return the removed sub-string). More... | |
String & | replace (const String &lhs, const String &rhs) |
Replace all instances of lhs with rhs. More... | |
String & | replace (const char *lhs, const char *rhs) |
Replace all instances of lhs with rhs. More... | |
String & | replace (const char *lhs, const String &rhs) |
Replace all instances of lhs with rhs. More... | |
String & | replace (const String &lhs, const char *rhs) |
Replace all instances of lhs with rhs. More... | |
String & | replace (size_t begin, size_t len, const String &str) |
Replace a substring with a given string. More... | |
String & | reverse () |
Reverse the ordering of the characters. More... | |
void | set (const char *s, bool owner=true, bool duplicate=true, size_t length=size_t_max) |
Set a new string. More... | |
String & | toLower (size_t begin=0, size_t len=size_t_max) |
Convert characters to lowercase. More... | |
String & | toUpper (size_t begin=0, size_t len=size_t_max) |
Convert characters to uppercase. More... | |
String & | padBegin (size_t len, char c=' ') |
Pad with spaces (or another character) at the beginning. More... | |
String & | padEnd (size_t len, char c=' ') |
Pad with spaces (or another character) at the end. More... | |
String & | trim () |
Trim whitespace from the beginning and end. More... | |
String & | trimBegin () |
Trim whitespace from the beginning. More... | |
String & | trimEnd () |
Trim whitespace from the end. More... | |
String | operator+ (char c) const |
Get a copy of self with the given character appended. More... | |
String | operator+ (const char *str) const |
Get a copy of self with the given string appended. More... | |
String | operator+ (const String &rhs) const |
Get a copy of self with the given string appended. More... | |
String & | operator+= (char c) |
Append the given character to self. More... | |
String & | operator+= (const char *rhs) |
Append the given string to self. More... | |
String & | operator+= (const String &rhs) |
Append the given string to self. More... | |
String & | operator= (const char *str) |
Make self equal to a copy of the given character array. More... | |
String & | operator= (char c) |
Make self equal to the given character. More... | |
char & | operator[] (size_t i) |
Array access operator. More... | |
const char & | operator[] (size_t i) const |
Array access operator. More... | |
operator char * () | |
Conversion to char*. More... | |
operator const char * () | |
Conversion to const char*. More... | |
operator const char * () const | |
Conversion to const char*. More... | |
operator void * () | |
Conversion to void*. More... | |
operator const void * () | |
Conversion to const void*. More... | |
operator const void * () const | |
Conversion to const void*. More... | |
bool | operator< (const Object &rhs) const |
bool | operator<= (const Object &rhs) const |
bool | operator> (const Object &rhs) const |
bool | operator>= (const Object &rhs) const |
bool | operator== (const Object &rhs) const |
bool | operator!= (const Object &rhs) const |
bool | operator< (const String &rhs) const |
bool | operator<= (const String &rhs) const |
bool | operator> (const String &rhs) const |
bool | operator>= (const String &rhs) const |
bool | operator== (const String &rhs) const |
bool | operator!= (const String &rhs) const |
Public Member Functions inherited from utl::Object | |
void | clear () |
Revert to initial state. More... | |
virtual void | vclone (const Object &rhs) |
Make an exact copy of another instance. More... | |
virtual void | dump (Stream &os, uint_t level=uint_t_max) const |
Dump a human-readable representation of self to the given output stream. More... | |
void | dumpWithClassName (Stream &os, uint_t indent=4, uint_t level=uint_t_max) const |
Front-end for dump() that prints the object's class name. More... | |
virtual const Object & | getKey () const |
Get the key for this object. More... | |
bool | hasKey () const |
Determine whether or not the object has a key. More... | |
virtual const Object & | getProxiedObject () const |
Get the proxied object (= self if none). More... | |
virtual Object & | getProxiedObject () |
Get the proxied object (= self if none). More... | |
bool | _isA (const RunTimeClass *runTimeClass) const |
Determine whether self's class is a descendent of the given class. More... | |
operator String () const | |
Conversion to String. More... | |
size_t | allocatedSize () const |
Get the total allocated size of this object. More... | |
virtual void | addOwnedIt (const class FwdIt *it) const |
Notify self that it owns the given iterator. More... | |
virtual void | removeOwnedIt (const class FwdIt *it) const |
Notify self that the given owned iterator has been destroyed. More... | |
bool | operator< (const Object &rhs) const |
Less-than operator. More... | |
bool | operator<= (const Object &rhs) const |
Less-than-or-equal-to operator. More... | |
bool | operator> (const Object &rhs) const |
Greater-than operator. More... | |
bool | operator>= (const Object &rhs) const |
Greater-than-or-equal-to operator. More... | |
bool | operator== (const Object &rhs) const |
Equal-to operator. More... | |
bool | operator!= (const Object &rhs) const |
Unequal-to operator. More... | |
void | serializeIn (Stream &is, uint_t mode=ser_default) |
Serialize from an input stream. More... | |
void | serializeOut (Stream &os, uint_t mode=ser_default) const |
Serialize to an output stream. More... | |
void | serializeOutBoxed (Stream &os, uint_t mode=ser_default) const |
Serialize a boxed object to an output stream. More... | |
Public Member Functions inherited from utl::FlagsMI | |
FlagsMI () | |
Constructor. More... | |
virtual | ~FlagsMI () |
Destructor. More... | |
void | copyFlags (const FlagsMI &rhs) |
Copy the given flags. More... | |
void | copyFlags (const FlagsMI &rhs, uint_t lsb, uint_t msb) |
Copy (some of) the given flags. More... | |
void | copyFlags (uint64_t flags, uint_t lsb, uint_t msb) |
Copy (some of) the given flags. More... | |
bool | getFlag (uint_t flagNum) const |
Get a user-defined flag. More... | |
void | setFlag (uint_t flagNum, bool val) |
Set a user-defined flag. More... | |
uint64_t | getFlagsNumber (uint64_t mask, uint64_t shift=0) |
Get a multi-bit value in the flags data (which is stored as one 64-bit integer). More... | |
void | setFlagsNumber (uint64_t mask, uint64_t shift, uint64_t num) |
Set a multi-bit value in the flags data (which is stored as one 64-bit integer). More... | |
uint64_t | getFlags () const |
Get the flags. More... | |
void | setFlags (uint64_t flags) |
Set the flags. More... | |
Additional Inherited Members | |
Static Public Member Functions inherited from utl::String | |
static String | repeat (char c, size_t num) |
Return a string consisting of a repeated character. More... | |
static String | spaces (size_t num) |
Return a string consisting of the given number of spaces. More... | |
Static Public Member Functions inherited from utl::Object | |
static Object * | serializeInNullable (Stream &is, uint_t mode=ser_default) |
Serialize a nullptr-able object from an input stream. More... | |
static void | serializeOutNullable (const Object *object, Stream &os, uint_t mode=ser_default) |
Serialize a nullptr-able object to an output stream. More... | |
static void | serializeNullable (Object *&object, Stream &stream, uint_t io, uint_t mode=ser_default) |
Serialize a nullptr-able object to or from a stream. More... | |
static Object * | serializeInBoxed (Stream &is, uint_t mode=ser_default) |
Serialize a boxed object from an input stream. More... | |
static void | serializeBoxed (Object *&object, Stream &stream, uint_t io, uint_t mode=ser_default) |
Serialize a boxed object to or from a stream. More... | |
Regular expression.
A regular expression (RE) is a pattern that describes a set of matching strings. The grammar for a RE (as supported in this implementation) is as follows:
A REGEX consists of one or more BRANCHes separated by a '|'. The RE matches any string that matches any one of the branches.
A BRANCH consists of one or more PIECEs concatenated together. It matches a match for the first piece, followed by a match for the second, etc...
A PIECE consists of an ATOM optionally followed by one of:
*
: match the atom zero or more times +
: match the atom one or more times ?
: match the atom zero or one times {m}
: match the atom exactly m
times {m,}
: match the atom m
or more times {m,n}
: match the atom between m
and n
times An ATOM matches any of the following:
'()'
(matching the expression) ^
: beginning of line $
: end of line .
: any character \a
: match any alphabetic character \d
: match any digit \s
: match any whitespace character \w
: match any alpha-numeric or underscore character \A
: inverse of \a
\D
: inverse of \d
\S
: inverse of \s
\W
: inverse of \w
'^$[](){}|*+?\'
by preceding them with a '\'
. '|'
, you'd have to escape it like this: '|'
. '[]'
, matching any character in the list '^'
, match any character not in the list. '-'
, match any character between the two (inclusive). [:alnum:]
: alphanumeric characters: [a-zA-Z0-9]
[:alpha:]
: alphabetic characters: [a-zA-Z]
[:blank:]
: space and tab: [ \t]
[:cntrl:]
: control characters: [\x00-\x1f\x7f]
[:digit:]
: decimal digits: [0-9]
[:graph:]
: visible characters: [\x21-\x7e]
[:lower:]
: lowercase characters: [a-z]
[:odigit:]
: octal digits: [0-7]
(actually not POSIX, I added this one) [:print:]
: visible characters and spaces: [\x20-\x7e]
[:punct:]
: punctuation characters: [-!"#$%&'()*+,./:;<=>?@[\\\]_`{|}~]
[:space:]
: whitespace characters: [ \t\r\n\v\f]
[:upper:]
: uppercase characters: [A-Z]
[:word:]
: alphanumeric characters plus '_'
: [a-zA-Z0-9_]
[:xdigit:]
: hexadecimal digits: [0-9a-fA-F]
[x-z[:punct:]]
: lowercase x, y, or z, or any punctuation character [[:upper:]yz]
: any uppercase character or lowercase y or z [[:graph:][^,.]]
: any visible character except ','
or '.'
[[:graph:][^:lower:][^.]]
: any visible character except lowercase or '.'
|
inline |
|
inline |
Constructor.
str | regex |
Definition at line 150 of file Regex.h.
References utl::compare(), utl::copy(), and utl::init().
|
virtual |
Compare with another object.
If no overridden version succeeds in doing the comparison, then an attempt will be made to re-start the comparison process using one or both of the objects' keys. Usually, an override of compare() should call the superclass's compare() if it doesn't know how to compare itself with the rhs object.
rhs | object to compare with |
Reimplemented from utl::String.
|
virtual |
Copy another instance.
When you override copy(), you should usually call the superclass's copy().
rhs | object to copy |
Reimplemented from utl::String.
|
virtual |
"Steal" the internal representation from another instance.
The default implementation just calls vclone(), so you have to override this if you want a "move" capability.
Reimplemented from utl::String.
bool utl::Regex::compile | ( | ) |
Compile the regex.
bool utl::Regex::match | ( | const String & | str, |
RegexMatch * | m = nullptr , |
||
size_t | idx = 0 |
||
) | const |
Attempt to match the given string against the regex.
str | string to match against |
m | reference to RegexMatch structure |
idx | starting search index |
At each position within str, attempt to match the regex.
If a match is found, the matching substring is replaced by the replacement string.
str | search/replace string |
rep | replacement string |
|
inline |
Determine whether the regex is successfully compiled.
Definition at line 188 of file Regex.h.
References ASSERTD, utl::deInit(), utl::init(), UTL_CLASS_DECL, UTL_CLASS_NO_COMPARE, and UTL_CLASS_NO_COPY.