blocxx
PosixRegEx.cpp
Go to the documentation of this file.
1/*******************************************************************************
2* Copyright (C) 2005 Novell, Inc. All rights reserved.
3*
4* Redistribution and use in source and binary forms, with or without
5* modification, are permitted provided that the following conditions are met:
6*
7* - Redistributions of source code must retain the above copyright notice,
8* this list of conditions and the following disclaimer.
9*
10* - Redistributions in binary form must reproduce the above copyright notice,
11* this list of conditions and the following disclaimer in the documentation
12* and/or other materials provided with the distribution.
13*
14* - Neither the name of Vintela, Inc., Novell, Inc., nor the names of its
15* contributors may be used to endorse or promote products derived from this
16* software without specific prior written permission.
17*
18* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
19* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21* ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc., Novell, Inc., OR THE
22* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*******************************************************************************/
34#include "blocxx/PosixRegEx.hpp"
35#ifdef BLOCXX_HAVE_REGEX
36#ifdef BLOCXX_HAVE_REGEX_H
37
39#include "blocxx/Assertion.hpp"
40#include "blocxx/Format.hpp"
41
42
43namespace BLOCXX_NAMESPACE
44{
45
46namespace
47{
48// the REG_NOERROR enum value from linux's regex.h is non-standard, so don't use it.
49const int REG_NOERROR = 0;
50}
51
52// -------------------------------------------------------------------
53static String
54substitute_caps(const PosixRegEx::MatchArray &sub,
55 const String &str, const String &rep)
56{
57 static const char *cap_refs[] = {
58 NULL, "\\1", "\\2", "\\3", "\\4",
59 "\\5", "\\6", "\\7", "\\8", "\\9", NULL
60 };
61
62 String res( rep);
63 size_t pos;
64
65 for(size_t i=1; cap_refs[i] != NULL; i++)
66 {
67 String cap;
68
69 if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
70 {
71 cap = str.substring(sub[i].rm_so, sub[i].rm_eo
72 - sub[i].rm_so);
73 }
74
75 pos = res.indexOf(cap_refs[i]);
76 while( pos != String::npos)
77 {
78 size_t quotes = 0;
79 size_t at = pos;
80
81 while( at > 0 && res.charAt(--at) == '\\')
82 quotes++;
83
84 if( quotes % 2)
85 {
86 quotes = (quotes + 1) / 2;
87
88 res = res.erase(pos - quotes, quotes);
89
90 pos = res.indexOf(cap_refs[i],
91 pos + 2 - quotes);
92 }
93 else
94 {
95 quotes = quotes / 2;
96
97 res = res.substring(0, pos - quotes) +
98 cap +
99 res.substring(pos + 2);
100
101 pos = res.indexOf(cap_refs[i],
102 pos + cap.length() - quotes);
103 }
104 }
105 }
106 return res;
107}
108
109
110// -------------------------------------------------------------------
111static inline String
112getError(const regex_t *preg, const int code)
113{
114 char err[256] = { '\0'};
115 ::regerror(code, preg, err, sizeof(err));
116 return String(err);
117}
118
119
120// -------------------------------------------------------------------
121PosixRegEx::PosixRegEx()
122 : compiled(false)
123 , m_flags(0)
124 , m_ecode(REG_NOERROR)
125{
126}
127
128
129// -------------------------------------------------------------------
130PosixRegEx::PosixRegEx(const String &regex, int cflags)
131 : compiled(false)
132 , m_flags(0)
133 , m_ecode(REG_NOERROR)
134{
135 if( !compile(regex, cflags))
136 {
137 BLOCXX_THROW_ERR(RegExCompileException,
138 errorString().c_str(), m_ecode);
139 }
140}
141
142
143// -------------------------------------------------------------------
144PosixRegEx::PosixRegEx(const PosixRegEx &ref)
145 : compiled(false)
146 , m_flags(ref.m_flags)
147 , m_ecode(REG_NOERROR)
148 , m_rxstr(ref.m_rxstr)
149{
150 if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
151 {
152 BLOCXX_THROW_ERR(RegExCompileException,
153 errorString().c_str(), m_ecode);
154 }
155}
156
157
158// -------------------------------------------------------------------
159PosixRegEx::~PosixRegEx()
160{
161 if( compiled)
162 {
163 regfree(&m_regex);
164 }
165}
166
167
168// -------------------------------------------------------------------
169PosixRegEx &
170PosixRegEx::operator = (const PosixRegEx &ref)
171{
172 if( !ref.compiled)
173 {
174 m_ecode = REG_NOERROR;
175 m_error.erase();
176 m_flags = ref.m_flags;
177 m_rxstr = ref.m_rxstr;
178 if( compiled)
179 {
180 regfree(&m_regex);
181 compiled = false;
182 }
183 }
184 else if( !compile(ref.m_rxstr, ref.m_flags))
185 {
186 BLOCXX_THROW_ERR(RegExCompileException,
187 errorString().c_str(), m_ecode);
188 }
189 return *this;
190}
191
192
193// -------------------------------------------------------------------
194bool
195PosixRegEx::compile(const String &regex, int cflags)
196{
197 if( compiled)
198 {
199 regfree(&m_regex);
200 compiled = false;
201 }
202
203 m_rxstr = regex;
204 m_flags = cflags;
205 m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
206 if( m_ecode == REG_NOERROR)
207 {
208 compiled = true;
209 m_error.erase();
210 return true;
211 }
212 else
213 {
214 m_error = getError(&m_regex, m_ecode);
215 return false;
216 }
217}
218
219
220// -------------------------------------------------------------------
221int
222PosixRegEx::errorCode()
223{
224 return m_ecode;
225}
226
227
228// -------------------------------------------------------------------
229String
230PosixRegEx::errorString() const
231{
232 return m_error;
233}
234
235
236// -------------------------------------------------------------------
237String
238PosixRegEx::patternString() const
239{
240 return m_rxstr;
241}
242
243
244// -------------------------------------------------------------------
245int
246PosixRegEx::compileFlags() const
247{
248 return m_flags;
249}
250
251
252// -------------------------------------------------------------------
253bool
254PosixRegEx::isCompiled() const
255{
256 return compiled;
257}
258
259
260// -------------------------------------------------------------------
261bool
262PosixRegEx::execute(MatchArray &sub, const String &str,
263 size_t index, size_t count, int eflags)
264{
265 if( !compiled)
266 {
267 BLOCXX_THROW(RegExCompileException,
268 "Regular expression is not compiled");
269 }
270
271 if( index > str.length())
272 {
273 BLOCXX_THROW(OutOfBoundsException,
274 Format("String index out of bounds ("
275 "length = %1, index = %2).",
276 str.length(), index
277 ).c_str());
278 }
279
280 if( count == 0)
281 {
282 count = m_regex.re_nsub + 1;
283 }
284 AutoPtrVec<regmatch_t> rsub(new regmatch_t[count]);
285 rsub[0].rm_so = -1;
286 rsub[0].rm_eo = -1;
287
288 sub.clear();
289 m_ecode = ::regexec(&m_regex, str.c_str() + index,
290 count, rsub.get(), eflags);
291 if( m_ecode == REG_NOERROR)
292 {
293 m_error.erase();
294 if( m_flags & REG_NOSUB)
295 {
296 return true;
297 }
298
299 sub.resize(count);
300 for(size_t n = 0; n < count; n++)
301 {
302 if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
303 {
304 sub[n] = rsub[n];
305 }
306 else
307 {
308 rsub[n].rm_so += index;
309 rsub[n].rm_eo += index;
310 sub[n] = rsub[n];
311 }
312 }
313 return true;
314 }
315 else
316 {
317 m_error = getError(&m_regex, m_ecode);
318 return false;
319 }
320}
321
322
323// -------------------------------------------------------------------
325PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
326{
327 if( !compiled)
328 {
329 BLOCXX_THROW(RegExCompileException,
330 "Regular expression is not compiled");
331 }
332
333 MatchArray rsub;
334 StringArray ssub;
335
336 bool match = execute(rsub, str, index, count, eflags);
337 if( match)
338 {
339 if( rsub.empty())
340 {
341 BLOCXX_THROW(RegExCompileException,
342 "Non-capturing regular expression");
343 }
344
345 MatchArray::const_iterator i=rsub.begin();
346 for( ; i != rsub.end(); ++i)
347 {
348 if( i->rm_so >= 0 && i->rm_eo >= 0)
349 {
350 ssub.push_back(str.substring(i->rm_so,
351 i->rm_eo - i->rm_so));
352 }
353 else
354 {
355 ssub.push_back(String(""));
356 }
357 }
358 }
359 else if(m_ecode != REG_NOMATCH)
360 {
361 BLOCXX_THROW_ERR(RegExExecuteException,
362 errorString().c_str(), m_ecode);
363 }
364 return ssub;
365}
366
367
368// -------------------------------------------------------------------
369blocxx::String
370PosixRegEx::replace(const String &str, const String &rep,
371 bool global, int eflags)
372{
373 if( !compiled)
374 {
375 BLOCXX_THROW(RegExCompileException,
376 "Regular expression is not compiled");
377 }
378
379 MatchArray rsub;
380 bool match;
381 size_t off = 0;
382 String out = str;
383
384 do
385 {
386 match = execute(rsub, out, off, 0, eflags);
387 if( match)
388 {
389 if( rsub.empty() ||
390 rsub[0].rm_so < 0 ||
391 rsub[0].rm_eo < 0)
392 {
393 // only if empty (missused as guard).
394 BLOCXX_THROW(RegExCompileException,
395 "Non-capturing regular expression");
396 }
397
398 String res = substitute_caps(rsub, out, rep);
399
400 out = out.substring(0, rsub[0].rm_so) +
401 res + out.substring(rsub[0].rm_eo);
402
403 off = rsub[0].rm_so + res.length();
404 }
405 else if(m_ecode == REG_NOMATCH)
406 {
407 m_ecode = REG_NOERROR;
408 m_error.erase();
409 }
410 else
411 {
412 BLOCXX_THROW_ERR(RegExExecuteException,
413 errorString().c_str(), m_ecode);
414 }
415 } while(global && match && out.length() > off);
416
417 return out;
418}
419
420// -------------------------------------------------------------------
422PosixRegEx::split(const String &str, bool empty, int eflags)
423{
424 if( !compiled)
425 {
426 BLOCXX_THROW(RegExCompileException,
427 "Regular expression is not compiled");
428 }
429
430 MatchArray rsub;
431 StringArray ssub;
432 bool match;
433 size_t off = 0;
434 size_t len = str.length();
435
436 do
437 {
438 match = execute(rsub, str, off, 1, eflags);
439 if( match)
440 {
441 if( rsub.empty() ||
442 rsub[0].rm_so < 0 ||
443 rsub[0].rm_eo < 0)
444 {
445 BLOCXX_THROW(RegExCompileException,
446 "Non-capturing regular expression");
447 }
448
449 if( empty || ((size_t)rsub[0].rm_so > off))
450 {
451 ssub.push_back(str.substring(off,
452 rsub[0].rm_so - off));
453 }
454 off = rsub[0].rm_eo;
455 }
456 else if(m_ecode == REG_NOMATCH)
457 {
458 String tmp = str.substring(off);
459 if( empty || !tmp.empty())
460 {
461 ssub.push_back(tmp);
462 }
463 m_ecode = REG_NOERROR;
464 m_error.erase();
465 }
466 else
467 {
468 BLOCXX_THROW_ERR(RegExExecuteException,
469 errorString().c_str(), m_ecode);
470 }
471 } while(match && len > off);
472
473 return ssub;
474}
475
476
477// -------------------------------------------------------------------
479PosixRegEx::grep(const StringArray &src, int eflags)
480{
481 if( !compiled)
482 {
483 BLOCXX_THROW(RegExCompileException,
484 "Regular expression is not compiled");
485 }
486
487 m_ecode = REG_NOERROR;
488 m_error.erase();
489
490 StringArray out;
491 if( !src.empty())
492 {
493 StringArray::const_iterator i=src.begin();
494 for( ; i != src.end(); ++i)
495 {
496 int ret = ::regexec(&m_regex, i->c_str(),
497 0, NULL, eflags);
498 if( ret == REG_NOERROR)
499 {
500 out.push_back(*i);
501 }
502 else if(ret != REG_NOMATCH)
503 {
504 m_ecode = ret;
505 m_error = getError(&m_regex, m_ecode);
506 BLOCXX_THROW_ERR(RegExExecuteException,
507 errorString().c_str(), m_ecode);
508 }
509 }
510 }
511
512 return out;
513}
514
515
516// -------------------------------------------------------------------
517bool
518PosixRegEx::match(const String &str, size_t index, int eflags) const
519{
520 if( !compiled)
521 {
522 BLOCXX_THROW(RegExCompileException,
523 "Regular expression is not compiled");
524 }
525
526 if( index > str.length())
527 {
528 BLOCXX_THROW(OutOfBoundsException,
529 Format("String index out of bounds ("
530 "length = %1, index = %2).",
531 str.length(), index
532 ).c_str());
533 }
534
535 m_ecode = ::regexec(&m_regex, str.c_str() + index,
536 0, NULL, eflags);
537
538 if( m_ecode == REG_NOERROR)
539 {
540 m_error.erase();
541 return true;
542 }
543 else if(m_ecode == REG_NOMATCH)
544 {
545 m_error = getError(&m_regex, m_ecode);
546 return false;
547 }
548 else
549 {
550 m_error = getError(&m_regex, m_ecode);
551 BLOCXX_THROW_ERR(RegExExecuteException,
552 errorString().c_str(), m_ecode);
553 }
554}
555
556
557// -------------------------------------------------------------------
558} // namespace BLOCXX_NAMESPACE
559
560#endif // BLOCXX_HAVE_REGEX_H
561#endif // BLOCXX_HAVE_REGEX
562
563/* vim: set ts=8 sts=8 sw=8 ai noet: */
564
#define BLOCXX_THROW(exType, msg)
Throw an exception using FILE and LINE.
#define BLOCXX_THROW_ERR(exType, msg, err)
Throw an exception using FILE and LINE.
const int code
iterator erase(iterator position)
Remove an element of the Array specified with an iterator.
static const size_t npos
Definition String.hpp:742
Taken from RFC 1321.
Array< String > StringArray
Definition CommonFwd.hpp:73