libUTL++
Regex.h
1 #pragma once
2 
4 
5 #include <libutl/Array.h>
6 #include <libutl/BitArray.h>
7 #include <libutl/BoyerMooreSearch.h>
8 #include <libutl/Queue.h>
9 #include <libutl/Span.h>
10 #include <libutl/String.h>
11 
13 
14 UTL_NS_BEGIN;
15 
17 
18 class REnode;
19 class RegexMatch;
20 
22 
131 
133 class Regex : public String
134 {
136 
137 public:
139  Regex(const String& regex)
140  : String(regex)
141  {
142  init();
143  compile();
144  }
145 
150  Regex(const char* str)
151  : String(str)
152  {
153  init();
154  compile();
155  }
156 
157  virtual int compare(const Object& rhs) const;
158 
159  virtual void copy(const Object& rhs);
160 
161  virtual void steal(Object& rhs);
162 
163  void clear();
164 
166  bool compile();
167 
175  bool match(const String& str, RegexMatch* m = nullptr, size_t idx = 0) const;
176 
184  size_t searchReplace(String& str, const String& rep);
185 
187  bool
188  ok() const
189  {
190  return getFlag(flg_ok);
191  }
192 
193 private:
194  void init();
195  void deInit();
196 
197  static String getToken(const TDequeIt<String>& it);
198 
199  bool
200  isAnchored() const
201  {
202  return getFlag(flg_anchored);
203  }
204 
205  static bool isSymbol(const TDequeIt<String>& it);
206 
207  bool match(RegexMatch& m, size_t& idx, const REnode* node) const;
208 
209  REnode* parseExpression(TDequeIt<String>& it, bool paren = false);
210  REnode* parseBranch(TDequeIt<String>& it);
211  REnode* parsePiece(TDequeIt<String>& it);
212  REnode* parseAtom(TDequeIt<String>& it);
213  void parseBracketExpr(TDequeIt<String>& it, BitArray& charSet, bool tv);
214 
215  void scan(Queue<String>& queue);
216 
217  void
218  setAnchored(bool anchored)
219  {
220  setFlag(flg_anchored, anchored);
221  }
222 
223  bool setCharSet(const REnode* node, BitArray& charSet);
224 
225  void
226  setOK(bool ok)
227  {
228  setFlag(flg_ok, ok);
229  }
230 
231  void setTail(REnode* chain, REnode* tail);
232 
233 private:
234  REnode* _start;
235  TArray<REnode>* _regex;
236  uint_t _parenNo;
237  char _firstChar;
238  BoyerMooreSearch<>* _mustStr;
239  enum flg_t
240  {
241  flg_ok = 4,
242  flg_anchored = 5
243  };
244 };
245 
247 // RegexMatch //////////////////////////////////////////////////////////////////////////////////////
249 
250 /*
251  Regular expression match information.
252 
253  RegexMatch stores information about matches that were found when comparing a String
254  against a Regex.
255 
256  \author Adam McKee
257  \ingroup string
258 */
259 
261 
262 class RegexMatch : public utl::Object
263 {
264  UTL_CLASS_DECL(RegexMatch, Object);
267 
268 public:
270  bool
271  ok() const
272  {
273  return _matches != nullptr;
274  }
275 
277  void
278  clear()
279  {
280  _matchStr.clear();
281  delete _matches;
282  _matches = nullptr;
283  }
284 
286  void
287  set(const String& matchStr, const TArray<Span<size_t>>* matches)
288  {
289  delete _matches;
290  _matchStr = matchStr;
291  _matches = matches;
292  }
293 
295  const String&
296  matchString() const
297  {
298  return _matchStr;
299  }
300 
302  size_t
303  numMatchSpans() const
304  {
305  ASSERTD(ok());
306  return _matches->size();
307  }
308 
310  Span<size_t>&
311  matchSpan(size_t idx)
312  {
313  ASSERTD(ok());
314  return (*_matches)(idx);
315  }
316 
318  const Span<size_t>&
319  matchSpan(size_t idx) const
320  {
321  ASSERTD(ok());
322  return (*_matches)(idx);
323  }
324 
329  String replaceString(const String& str) const;
330 
331 private:
332  void
333  init()
334  {
335  _matches = nullptr;
336  }
337  void
338  deInit()
339  {
340  delete _matches;
341  }
342 
343 private:
344  String _matchStr;
345  const TArray<Span<size_t>>* _matches;
346 };
347 
349 
350 UTL_NS_END;
bool ok() const
Determine whether the regex is successfully compiled.
Definition: Regex.h:188
void deInit()
De-initialize UTL++.
FIFO (first-in, first-out) data structure.
Definition: Queue.h:24
Template version of DequeIt.
Definition: TDequeIt.h:23
#define UTL_CLASS_DECL(DC, BC)
Declaration of standard UTL++ functionality for a non-template class.
Definition: macros.h:688
Character string.
Definition: String.h:31
Regex(const char *str)
Constructor.
Definition: Regex.h:150
Regex(const String &regex)
Constructor.
Definition: Regex.h:139
void copy(T *dest, const T *src, size_t len)
Copy one array of objects to another.
Definition: util_inl.h:690
Boyer-Moore string search.
unsigned int uint_t
Unsigned integer.
Definition: types.h:59
#define UTL_CLASS_NO_COMPARE
Declare that a class cannot do compare().
Definition: macros.h:344
#define UTL_CLASS_NO_COPY
Declare that a class cannot do copy().
Definition: macros.h:358
Regular expression.
Definition: Regex.h:133
Span of values.
Definition: Span.h:47
Root of UTL++ class hierarchy.
Definition: Object.h:52
int compare(bool lhs, bool rhs)
Compare two boolean values.
void init()
Initialize UTL++.
#define ASSERTD
Do an assertion in DEBUG mode only.
Array of n-bit values.
Definition: BitArray.h:36