blocxx
PosixRegEx.cpp
Go to the documentation of this file.
1/*******************************************************************************
2* Copyright (C) 2005 Novell, Inc. All rights reserved.
3*
4* Redistribution and use in source and binary forms, with or without
5* modification, are permitted provided that the following conditions are met:
6*
7* - Redistributions of source code must retain the above copyright notice,
8* this list of conditions and the following disclaimer.
9*
10* - Redistributions in binary form must reproduce the above copyright notice,
11* this list of conditions and the following disclaimer in the documentation
12* and/or other materials provided with the distribution.
13*
14* - Neither the name of Vintela, Inc., Novell, Inc., nor the names of its
15* contributors may be used to endorse or promote products derived from this
16* software without specific prior written permission.
17*
18* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
19* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21* ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc., Novell, Inc., OR THE
22* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*******************************************************************************/
33
34#include "blocxx/PosixRegEx.hpp"
35#ifdef BLOCXX_HAVE_REGEX
36#ifdef BLOCXX_HAVE_REGEX_H
37
39#include "blocxx/Assertion.hpp"
40#include "blocxx/Format.hpp"
41
42
43namespace BLOCXX_NAMESPACE
44{
45
46namespace
47{
48// the REG_NOERROR enum value from linux's regex.h is non-standard, so don't use it.
49const int REG_NOERROR = 0;
50}
51
52// -------------------------------------------------------------------
53static String
54substitute_caps(const PosixRegEx::MatchArray &sub,
55 const String &str, const String &rep)
56{
57 static const char *cap_refs[] = {
58 NULL, "\\1", "\\2", "\\3", "\\4",
59 "\\5", "\\6", "\\7", "\\8", "\\9", NULL
60 };
61
62 String res( rep);
63 size_t pos;
64
65 for(size_t i=1; cap_refs[i] != NULL; i++)
66 {
67 String cap;
68
69 if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
70 {
71 cap = str.substring(sub[i].rm_so, sub[i].rm_eo
72 - sub[i].rm_so);
73 }
74
75 pos = res.indexOf(cap_refs[i]);
76 while( pos != String::npos)
77 {
78 size_t quotes = 0;
79 size_t at = pos;
80
81 while( at > 0 && res.charAt(--at) == '\\')
82 quotes++;
83
84 if( quotes % 2)
85 {
86 quotes = (quotes + 1) / 2;
87
88 res = res.erase(pos - quotes, quotes);
89
90 pos = res.indexOf(cap_refs[i],
91 pos + 2 - quotes);
92 }
93 else
94 {
95 quotes = quotes / 2;
96
97 res = res.substring(0, pos - quotes) +
98 cap +
99 res.substring(pos + 2);
100
101 pos = res.indexOf(cap_refs[i],
102 pos + cap.length() - quotes);
103 }
104 }
105 }
106 return res;
107}
108
109
110// -------------------------------------------------------------------
111static inline String
112getError(const regex_t *preg, const int code)
113{
114 char err[256] = { '\0'};
115 ::regerror(code, preg, err, sizeof(err));
116 return String(err);
117}
118
119
120// -------------------------------------------------------------------
121PosixRegEx::PosixRegEx()
122 : compiled(false)
123 , m_flags(0)
124 , m_ecode(REG_NOERROR)
125{
126}
127
128
129// -------------------------------------------------------------------
130PosixRegEx::PosixRegEx(const String &regex, int cflags)
131 : compiled(false)
132 , m_flags(0)
133 , m_ecode(REG_NOERROR)
134{
135 if( !compile(regex, cflags))
136 {
137 BLOCXX_THROW_ERR(RegExCompileException,
138 errorString().c_str(), m_ecode);
139 }
140}
141
142
143// -------------------------------------------------------------------
144PosixRegEx::PosixRegEx(const PosixRegEx &ref)
145 : compiled(false)
146 , m_flags(ref.m_flags)
147 , m_ecode(REG_NOERROR)
148 , m_rxstr(ref.m_rxstr)
149{
150 if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
151 {
152 BLOCXX_THROW_ERR(RegExCompileException,
153 errorString().c_str(), m_ecode);
154 }
155}
156
157
158// -------------------------------------------------------------------
159PosixRegEx::~PosixRegEx()
160{
161 if( compiled)
162 {
163 regfree(&m_regex);
164 }
165}
166
167
168// -------------------------------------------------------------------
169PosixRegEx &
170PosixRegEx::operator = (const PosixRegEx &ref)
171{
172 if( !ref.compiled)
173 {
174 m_ecode = REG_NOERROR;
175 m_error.erase();
176 m_flags = ref.m_flags;
177 m_rxstr = ref.m_rxstr;
178 if( compiled)
179 {
180 regfree(&m_regex);
181 compiled = false;
182 }
183 }
184 else if( !compile(ref.m_rxstr, ref.m_flags))
185 {
186 BLOCXX_THROW_ERR(RegExCompileException,
187 errorString().c_str(), m_ecode);
188 }
189 return *this;
190}
191
192
193// -------------------------------------------------------------------
194bool
195PosixRegEx::compile(const String &regex, int cflags)
196{
197 if( compiled)
198 {
199 regfree(&m_regex);
200 compiled = false;
201 }
202
203 m_rxstr = regex;
204 m_flags = cflags;
205 m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
206 if( m_ecode == REG_NOERROR)
207 {
208 compiled = true;
209 m_error.erase();
210 return true;
211 }
212 else
213 {
214 m_error = getError(&m_regex, m_ecode);
215 return false;
216 }
217}
218
219
220// -------------------------------------------------------------------
221int
222PosixRegEx::errorCode()
223{
224 return m_ecode;
225}
226
227
228// -------------------------------------------------------------------
229String
230PosixRegEx::errorString() const
231{
232 return m_error;
233}
234
235
236// -------------------------------------------------------------------
237String
238PosixRegEx::patternString() const
239{
240 return m_rxstr;
241}
242
243
244// -------------------------------------------------------------------
245int
246PosixRegEx::compileFlags() const
247{
248 return m_flags;
249}
250
251
252// -------------------------------------------------------------------
253bool
254PosixRegEx::isCompiled() const
255{
256 return compiled;
257}
258
259
260// -------------------------------------------------------------------
261bool
262PosixRegEx::execute(MatchArray &sub, const String &str,
263 size_t index, size_t count, int eflags)
264{
265 if( !compiled)
266 {
267 BLOCXX_THROW(RegExCompileException,
268 "Regular expression is not compiled");
269 }
270
271 if( index > str.length())
272 {
273 BLOCXX_THROW(OutOfBoundsException,
274 Format("String index out of bounds ("
275 "length = %1, index = %2).",
276 str.length(), index
277 ).c_str());
278 }
279
280 if( count == 0)
281 {
282 count = m_regex.re_nsub + 1;
283 }
284 AutoPtrVec<regmatch_t> rsub(new regmatch_t[count]);
285 rsub[0].rm_so = -1;
286 rsub[0].rm_eo = -1;
287
288 sub.clear();
289 m_ecode = ::regexec(&m_regex, str.c_str() + index,
290 count, rsub.get(), eflags);
291 if( m_ecode == REG_NOERROR)
292 {
293 m_error.erase();
294 if( m_flags & REG_NOSUB)
295 {
296 return true;
297 }
298
299 sub.resize(count);
300 for(size_t n = 0; n < count; n++)
301 {
302 if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
303 {
304 sub[n] = rsub[n];
305 }
306 else
307 {
308 rsub[n].rm_so += index;
309 rsub[n].rm_eo += index;
310 sub[n] = rsub[n];
311 }
312 }
313 return true;
314 }
315 else
316 {
317 m_error = getError(&m_regex, m_ecode);
318 return false;
319 }
320}
321
322
323// -------------------------------------------------------------------
325PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
326{
327 if( !compiled)
328 {
329 BLOCXX_THROW(RegExCompileException,
330 "Regular expression is not compiled");
331 }
332
333 MatchArray rsub;
334 StringArray ssub;
335
336 bool match = execute(rsub, str, index, count, eflags);
337 if( match)
338 {
339 if( rsub.empty())
340 {
341 BLOCXX_THROW(RegExCompileException,
342 "Non-capturing regular expression");
343 }
344
345 MatchArray::const_iterator i=rsub.begin();
346 for( ; i != rsub.end(); ++i)
347 {
348 if( i->rm_so >= 0 && i->rm_eo >= 0)
349 {
350 ssub.push_back(str.substring(i->rm_so,
351 i->rm_eo - i->rm_so));
352 }
353 else
354 {
355 ssub.push_back(String(""));
356 }
357 }
358 }
359 else if(m_ecode != REG_NOMATCH)
360 {
361 BLOCXX_THROW_ERR(RegExExecuteException,
362 errorString().c_str(), m_ecode);
363 }
364 return ssub;
365}
366
367
368// -------------------------------------------------------------------
369blocxx::String
370PosixRegEx::replace(const String &str, const String &rep,
371 bool global, int eflags)
372{
373 if( !compiled)
374 {
375 BLOCXX_THROW(RegExCompileException,
376 "Regular expression is not compiled");
377 }
378
379 MatchArray rsub;
380 bool match;
381 size_t off = 0;
382 String out = str;
383
384 do
385 {
386 match = execute(rsub, out, off, 0, eflags);
387 if( match)
388 {
389 if( rsub.empty() ||
390 rsub[0].rm_so < 0 ||
391 rsub[0].rm_eo < 0)
392 {
393 // only if empty (missused as guard).
394 BLOCXX_THROW(RegExCompileException,
395 "Non-capturing regular expression");
396 }
397
398 String res = substitute_caps(rsub, out, rep);
399
400 out = out.substring(0, rsub[0].rm_so) +
401 res + out.substring(rsub[0].rm_eo);
402
403 off = rsub[0].rm_so + res.length();
404 }
405 else if(m_ecode == REG_NOMATCH)
406 {
407 m_ecode = REG_NOERROR;
408 m_error.erase();
409 }
410 else
411 {
412 BLOCXX_THROW_ERR(RegExExecuteException,
413 errorString().c_str(), m_ecode);
414 }
415 } while(global && match && out.length() > off);
416
417 return out;
418}
419
420// -------------------------------------------------------------------
422PosixRegEx::split(const String &str, bool empty, int eflags)
423{
424 if( !compiled)
425 {
426 BLOCXX_THROW(RegExCompileException,
427 "Regular expression is not compiled");
428 }
429
430 MatchArray rsub;
431 StringArray ssub;
432 bool match;
433 size_t off = 0;
434 size_t len = str.length();
435
436 do
437 {
438 match = execute(rsub, str, off, 1, eflags);
439 if( match)
440 {
441 if( rsub.empty() ||
442 rsub[0].rm_so < 0 ||
443 rsub[0].rm_eo < 0)
444 {
445 BLOCXX_THROW(RegExCompileException,
446 "Non-capturing regular expression");
447 }
448
449 if( empty || ((size_t)rsub[0].rm_so > off))
450 {
451 ssub.push_back(str.substring(off,
452 rsub[0].rm_so - off));
453 }
454 off = rsub[0].rm_eo;
455 }
456 else if(m_ecode == REG_NOMATCH)
457 {
458 String tmp = str.substring(off);
459 if( empty || !tmp.empty())
460 {
461 ssub.push_back(tmp);
462 }
463 m_ecode = REG_NOERROR;
464 m_error.erase();
465 }
466 else
467 {
468 BLOCXX_THROW_ERR(RegExExecuteException,
469 errorString().c_str(), m_ecode);
470 }
471 } while(match && len > off);
472
473 return ssub;
474}
475
476
477// -------------------------------------------------------------------
479PosixRegEx::grep(const StringArray &src, int eflags)
480{
481 if( !compiled)
482 {
483 BLOCXX_THROW(RegExCompileException,
484 "Regular expression is not compiled");
485 }
486
487 m_ecode = REG_NOERROR;
488 m_error.erase();
489
490 StringArray out;
491 if( !src.empty())
492 {
493 StringArray::const_iterator i=src.begin();
494 for( ; i != src.end(); ++i)
495 {
496 int ret = ::regexec(&m_regex, i->c_str(),
497 0, NULL, eflags);
498 if( ret == REG_NOERROR)
499 {
500 out.push_back(*i);
501 }
502 else if(ret != REG_NOMATCH)
503 {
504 m_ecode = ret;
505 m_error = getError(&m_regex, m_ecode);
506 BLOCXX_THROW_ERR(RegExExecuteException,
507 errorString().c_str(), m_ecode);
508 }
509 }
510 }
511
512 return out;
513}
514
515
516// -------------------------------------------------------------------
517bool
518PosixRegEx::match(const String &str, size_t index, int eflags) const
519{
520 if( !compiled)
521 {
522 BLOCXX_THROW(RegExCompileException,
523 "Regular expression is not compiled");
524 }
525
526 if( index > str.length())
527 {
528 BLOCXX_THROW(OutOfBoundsException,
529 Format("String index out of bounds ("
530 "length = %1, index = %2).",
531 str.length(), index
532 ).c_str());
533 }
534
535 m_ecode = ::regexec(&m_regex, str.c_str() + index,
536 0, NULL, eflags);
537
538 if( m_ecode == REG_NOERROR)
539 {
540 m_error.erase();
541 return true;
542 }
543 else if(m_ecode == REG_NOMATCH)
544 {
545 m_error = getError(&m_regex, m_ecode);
546 return false;
547 }
548 else
549 {
550 m_error = getError(&m_regex, m_ecode);
551 BLOCXX_THROW_ERR(RegExExecuteException,
552 errorString().c_str(), m_ecode);
553 }
554}
555
556
557// -------------------------------------------------------------------
558} // namespace BLOCXX_NAMESPACE
559
560#endif // BLOCXX_HAVE_REGEX_H
561#endif // BLOCXX_HAVE_REGEX
562
563/* vim: set ts=8 sts=8 sw=8 ai noet: */
564
#define BLOCXX_THROW(exType, msg)
Throw an exception using FILE and LINE.
#define BLOCXX_THROW_ERR(exType, msg, err)
Throw an exception using FILE and LINE.
iterator erase(iterator position)
Remove an element of the Array specified with an iterator.
This String class is an abstract data type that represents as NULL terminated string of characters.
Definition String.hpp:67
size_t indexOf(char ch, size_t fromIndex=0) const
Find the first occurence of a given character in this String object.
Definition String.cpp:556
static const size_t npos
Definition String.hpp:742
Taken from RFC 1321.
Array< String > StringArray
Definition CommonFwd.hpp:73