Boost.Locale
index.hpp
1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0.
5 // https://www.boost.org/LICENSE_1_0.txt
6 
7 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
8 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
9 
10 #include <boost/locale/boundary/types.hpp>
11 #include <boost/locale/boundary/facets.hpp>
12 #include <boost/locale/boundary/segment.hpp>
13 #include <boost/locale/boundary/boundary_point.hpp>
14 #include <boost/assert.hpp>
15 #include <boost/cstdint.hpp>
16 #include <boost/iterator/iterator_facade.hpp>
17 #include <boost/shared_ptr.hpp>
18 #include <boost/type_traits/is_same.hpp>
19 #include <algorithm>
20 #include <iostream>
21 #include <iterator>
22 #include <locale>
23 #include <stdexcept>
24 #include <string>
25 #include <vector>
26 
27 #ifdef BOOST_MSVC
28 # pragma warning(push)
29 # pragma warning(disable : 4275 4251 4231 4660)
30 #endif
31 
32 namespace boost {
33 
34  namespace locale {
35 
36  namespace boundary {
44 
46 
47  namespace details {
48 
49  template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
50  struct mapping_traits {
51  typedef typename std::iterator_traits<IteratorType>::value_type char_type;
52  static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
53  {
54  std::basic_string<char_type> str(b,e);
55  return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
56  }
57  };
58 
59  template<typename CharType,typename SomeIteratorType>
60  struct linear_iterator_traits {
61  static const bool is_linear =
62  is_same<SomeIteratorType,CharType*>::value
63  || is_same<SomeIteratorType,CharType const*>::value
64  || is_same<SomeIteratorType,typename std::basic_string<CharType>::iterator>::value
65  || is_same<SomeIteratorType,typename std::basic_string<CharType>::const_iterator>::value
66  || is_same<SomeIteratorType,typename std::vector<CharType>::iterator>::value
67  || is_same<SomeIteratorType,typename std::vector<CharType>::const_iterator>::value
68  ;
69  };
70 
71 
72 
73  template<typename IteratorType>
74  struct mapping_traits<IteratorType,std::random_access_iterator_tag> {
75 
76  typedef typename std::iterator_traits<IteratorType>::value_type char_type;
77 
78 
79 
80  static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
81  {
82  index_type result;
83 
84  //
85  // Optimize for most common cases
86  //
87  // C++0x requires that string is continious in memory and all known
88  // string implementations
89  // do this because of c_str() support.
90  //
91 
92  if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e)
93  {
94  char_type const *begin = &*b;
95  char_type const *end = begin + (e-b);
96  index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end);
97  result.swap(tmp);
98  }
99  else {
100  std::basic_string<char_type> str(b,e);
101  index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
102  result.swap(tmp);
103  }
104  return result;
105  }
106  };
107 
108  template<typename BaseIterator>
109  class mapping {
110  public:
111  typedef BaseIterator base_iterator;
112  typedef typename std::iterator_traits<base_iterator>::value_type char_type;
113 
114 
115  mapping(boundary_type type,
116  base_iterator begin,
117  base_iterator end,
118  std::locale const &loc)
119  :
120  index_(new index_type()),
121  begin_(begin),
122  end_(end)
123  {
124  index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc);
125  index_->swap(idx);
126  }
127 
128  mapping()
129  {
130  }
131 
132  index_type const &index() const
133  {
134  return *index_;
135  }
136 
137  base_iterator begin() const
138  {
139  return begin_;
140  }
141 
142  base_iterator end() const
143  {
144  return end_;
145  }
146 
147  private:
148  boost::shared_ptr<index_type> index_;
149  base_iterator begin_,end_;
150  };
151 
152  template<typename BaseIterator>
153  class segment_index_iterator :
154  public boost::iterator_facade<
155  segment_index_iterator<BaseIterator>,
156  segment<BaseIterator>,
157  boost::bidirectional_traversal_tag,
158  segment<BaseIterator> const &
159  >
160  {
161  public:
162  typedef BaseIterator base_iterator;
163  typedef mapping<base_iterator> mapping_type;
164  typedef segment<base_iterator> segment_type;
165 
166  segment_index_iterator() : current_(0,0),map_(0)
167  {
168  }
169 
170  segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) :
171  map_(map),
172  mask_(mask),
173  full_select_(full_select)
174  {
175  set(p);
176  }
177  segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) :
178  map_(map),
179  mask_(mask),
180  full_select_(full_select)
181  {
182  if(is_begin)
183  set_begin();
184  else
185  set_end();
186  }
187 
188  segment_type const &dereference() const
189  {
190  return value_;
191  }
192 
193  bool equal(segment_index_iterator const &other) const
194  {
195  return map_ == other.map_ && current_.second == other.current_.second;
196  }
197 
198  void increment()
199  {
200  std::pair<size_t,size_t> next = current_;
201  if(full_select_) {
202  next.first = next.second;
203  while(next.second < size()) {
204  next.second++;
205  if(valid_offset(next.second))
206  break;
207  }
208  if(next.second == size())
209  next.first = next.second - 1;
210  }
211  else {
212  while(next.second < size()) {
213  next.first = next.second;
214  next.second++;
215  if(valid_offset(next.second))
216  break;
217  }
218  }
219  update_current(next);
220  }
221 
222  void decrement()
223  {
224  std::pair<size_t,size_t> next = current_;
225  if(full_select_) {
226  while(next.second >1) {
227  next.second--;
228  if(valid_offset(next.second))
229  break;
230  }
231  next.first = next.second;
232  while(next.first >0) {
233  next.first--;
234  if(valid_offset(next.first))
235  break;
236  }
237  }
238  else {
239  while(next.second >1) {
240  next.second--;
241  if(valid_offset(next.second))
242  break;
243  }
244  next.first = next.second - 1;
245  }
246  update_current(next);
247  }
248 
249  private:
250 
251  void set_end()
252  {
253  current_.first = size() - 1;
254  current_.second = size();
255  value_ = segment_type(map_->end(),map_->end(),0);
256  }
257  void set_begin()
258  {
259  current_.first = current_.second = 0;
260  value_ = segment_type(map_->begin(),map_->begin(),0);
261  increment();
262  }
263 
264  void set(base_iterator p)
265  {
266  size_t dist=std::distance(map_->begin(),p);
267  index_type::const_iterator b=map_->index().begin(),e=map_->index().end();
268  index_type::const_iterator
269  boundary_point=std::upper_bound(b,e,break_info(dist));
270  while(boundary_point != e && (boundary_point->rule & mask_)==0)
271  boundary_point++;
272 
273  current_.first = current_.second = boundary_point - b;
274 
275  if(full_select_) {
276  while(current_.first > 0) {
277  current_.first --;
278  if(valid_offset(current_.first))
279  break;
280  }
281  }
282  else {
283  if(current_.first > 0)
284  current_.first --;
285  }
286  value_.first = map_->begin();
287  std::advance(value_.first,get_offset(current_.first));
288  value_.second = value_.first;
289  std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first));
290 
291  update_rule();
292  }
293 
294  void update_current(std::pair<size_t,size_t> pos)
295  {
296  std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
297  std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
298  std::advance(value_.first,first_diff);
299  std::advance(value_.second,second_diff);
300  current_ = pos;
301  update_rule();
302  }
303 
304  void update_rule()
305  {
306  if(current_.second != size()) {
307  value_.rule(index()[current_.second].rule);
308  }
309  }
310  size_t get_offset(size_t ind) const
311  {
312  if(ind == size())
313  return index().back().offset;
314  return index()[ind].offset;
315  }
316 
317  bool valid_offset(size_t offset) const
318  {
319  return offset == 0
320  || offset == size() // make sure we not acess index[size]
321  || (index()[offset].rule & mask_)!=0;
322  }
323 
324  size_t size() const
325  {
326  return index().size();
327  }
328 
329  index_type const &index() const
330  {
331  return map_->index();
332  }
333 
334 
335  segment_type value_;
336  std::pair<size_t,size_t> current_;
337  mapping_type const *map_;
338  rule_type mask_;
339  bool full_select_;
340  };
341 
342  template<typename BaseIterator>
343  class boundary_point_index_iterator :
344  public boost::iterator_facade<
345  boundary_point_index_iterator<BaseIterator>,
346  boundary_point<BaseIterator>,
347  boost::bidirectional_traversal_tag,
348  boundary_point<BaseIterator> const &
349  >
350  {
351  public:
352  typedef BaseIterator base_iterator;
353  typedef mapping<base_iterator> mapping_type;
354  typedef boundary_point<base_iterator> boundary_point_type;
355 
356  boundary_point_index_iterator() : current_(0),map_(0)
357  {
358  }
359 
360  boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) :
361  map_(map),
362  mask_(mask)
363  {
364  if(is_begin)
365  set_begin();
366  else
367  set_end();
368  }
369  boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) :
370  map_(map),
371  mask_(mask)
372  {
373  set(p);
374  }
375 
376  boundary_point_type const &dereference() const
377  {
378  return value_;
379  }
380 
381  bool equal(boundary_point_index_iterator const &other) const
382  {
383  return map_ == other.map_ && current_ == other.current_;
384  }
385 
386  void increment()
387  {
388  size_t next = current_;
389  while(next < size()) {
390  next++;
391  if(valid_offset(next))
392  break;
393  }
394  update_current(next);
395  }
396 
397  void decrement()
398  {
399  size_t next = current_;
400  while(next>0) {
401  next--;
402  if(valid_offset(next))
403  break;
404  }
405  update_current(next);
406  }
407 
408  private:
409  void set_end()
410  {
411  current_ = size();
412  value_ = boundary_point_type(map_->end(),0);
413  }
414  void set_begin()
415  {
416  current_ = 0;
417  value_ = boundary_point_type(map_->begin(),0);
418  }
419 
420  void set(base_iterator p)
421  {
422  size_t dist = std::distance(map_->begin(),p);
423 
424  index_type::const_iterator b=index().begin();
425  index_type::const_iterator e=index().end();
426  index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist));
427 
428  if(ptr==index().end())
429  current_=size()-1;
430  else
431  current_=ptr - index().begin();
432 
433  while(!valid_offset(current_))
434  current_ ++;
435 
436  std::ptrdiff_t diff = get_offset(current_) - dist;
437  std::advance(p,diff);
438  value_.iterator(p);
439  update_rule();
440  }
441 
442  void update_current(size_t pos)
443  {
444  std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
445  base_iterator i=value_.iterator();
446  std::advance(i,diff);
447  current_ = pos;
448  value_.iterator(i);
449  update_rule();
450  }
451 
452  void update_rule()
453  {
454  if(current_ != size()) {
455  value_.rule(index()[current_].rule);
456  }
457  }
458  size_t get_offset(size_t ind) const
459  {
460  if(ind == size())
461  return index().back().offset;
462  return index()[ind].offset;
463  }
464 
465  bool valid_offset(size_t offset) const
466  {
467  return offset == 0
468  || offset + 1 >= size() // last and first are always valid regardless of mark
469  || (index()[offset].rule & mask_)!=0;
470  }
471 
472  size_t size() const
473  {
474  return index().size();
475  }
476 
477  index_type const &index() const
478  {
479  return map_->index();
480  }
481 
482 
483  boundary_point_type value_;
484  size_t current_;
485  mapping_type const *map_;
486  rule_type mask_;
487  };
488 
489 
490  } // details
491 
493 
494  template<typename BaseIterator>
496 
497  template<typename BaseIterator>
499 
500 
552 
553  template<typename BaseIterator>
554  class segment_index {
555  public:
556 
560  typedef BaseIterator base_iterator;
561  #ifdef BOOST_LOCALE_DOXYGEN
562  typedef unspecified_iterator_type iterator;
580  typedef unspecified_iterator_type const_iterator;
581  #else
582  typedef details::segment_index_iterator<base_iterator> iterator;
583  typedef details::segment_index_iterator<base_iterator> const_iterator;
584  #endif
590 
600  segment_index() : mask_(0xFFFFFFFFu),full_select_(false)
601  {
602  }
610  rule_type mask,
611  std::locale const &loc=std::locale())
612  :
613  map_(type,begin,end,loc),
614  mask_(mask),
615  full_select_(false)
616  {
617  }
625  std::locale const &loc=std::locale())
626  :
627  map_(type,begin,end,loc),
628  mask_(0xFFFFFFFFu),
629  full_select_(false)
630  {
631  }
632 
655 
656 
663  void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
664  {
665  map_ = mapping_type(type,begin,end,loc);
666  }
667 
677  iterator begin() const
678  {
679  return iterator(true,&map_,mask_,full_select_);
680  }
681 
689  iterator end() const
690  {
691  return iterator(false,&map_,mask_,full_select_);
692  }
693 
712  {
713  return iterator(p,&map_,mask_,full_select_);
714  }
715 
719  rule_type rule() const
720  {
721  return mask_;
722  }
726  void rule(rule_type v)
727  {
728  mask_ = v;
729  }
730 
743 
744  bool full_select() const
745  {
746  return full_select_;
747  }
748 
761 
762  void full_select(bool v)
763  {
764  full_select_ = v;
765  }
766 
767  private:
768  friend class boundary_point_index<base_iterator>;
769  typedef details::mapping<base_iterator> mapping_type;
770  mapping_type map_;
771  rule_type mask_;
772  bool full_select_;
773  };
774 
821 
822 
823  template<typename BaseIterator>
824  class boundary_point_index {
825  public:
829  typedef BaseIterator base_iterator;
830  #ifdef BOOST_LOCALE_DOXYGEN
831  typedef unspecified_iterator_type iterator;
849  typedef unspecified_iterator_type const_iterator;
850  #else
851  typedef details::boundary_point_index_iterator<base_iterator> iterator;
852  typedef details::boundary_point_index_iterator<base_iterator> const_iterator;
853  #endif
859 
869  boundary_point_index() : mask_(0xFFFFFFFFu)
870  {
871  }
872 
880  rule_type mask,
881  std::locale const &loc=std::locale())
882  :
883  map_(type,begin,end,loc),
884  mask_(mask)
885  {
886  }
894  std::locale const &loc=std::locale())
895  :
896  map_(type,begin,end,loc),
897  mask_(0xFFFFFFFFu)
898  {
899  }
900 
923 
930  void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
931  {
932  map_ = mapping_type(type,begin,end,loc);
933  }
934 
944  iterator begin() const
945  {
946  return iterator(true,&map_,mask_);
947  }
948 
958  iterator end() const
959  {
960  return iterator(false,&map_,mask_);
961  }
962 
977  {
978  return iterator(p,&map_,mask_);
979  }
980 
984  rule_type rule() const
985  {
986  return mask_;
987  }
991  void rule(rule_type v)
992  {
993  mask_ = v;
994  }
995 
996  private:
997 
998  friend class segment_index<base_iterator>;
999  typedef details::mapping<base_iterator> mapping_type;
1000  mapping_type map_;
1001  rule_type mask_;
1002  };
1003 
1005  template<typename BaseIterator>
1006  segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) :
1007  map_(other.map_),
1008  mask_(0xFFFFFFFFu),
1009  full_select_(false)
1010  {
1011  }
1012 
1013  template<typename BaseIterator>
1014  boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) :
1015  map_(other.map_),
1016  mask_(0xFFFFFFFFu)
1017  {
1018  }
1019 
1020  template<typename BaseIterator>
1021  segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other)
1022  {
1023  map_ = other.map_;
1024  return *this;
1025  }
1026 
1027  template<typename BaseIterator>
1028  boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other)
1029  {
1030  map_ = other.map_;
1031  return *this;
1032  }
1034 
1037  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1039  #endif
1040  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1042  #endif
1043 
1046  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1048  #endif
1049  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1051  #endif
1052 
1055  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1057  #endif
1058  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1060  #endif
1061 
1064  #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
1066  #endif
1067  #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
1069  #endif
1070 
1071 
1072 
1073  } // boundary
1074 
1075  } // locale
1076 } // boost
1077 
1084 
1085 #ifdef BOOST_MSVC
1086 #pragma warning(pop)
1087 #endif
1088 
1089 #endif
void full_select(bool v)
Definition: index.hpp:762
iterator find(base_iterator p) const
Definition: index.hpp:976
a segment object that represents a pair of two iterators that define the range where this segment exi...
Definition: segment.hpp:102
boundary_type
Definition: types.hpp:38
bool full_select() const
Definition: index.hpp:744
rule_type rule() const
Definition: index.hpp:719
boundary_point_index const & operator=(segment_index< base_iterator > const &other)
This class holds an index of boundary points and allows iterating over them.
Definition: index.hpp:498
BaseIterator base_iterator
Definition: index.hpp:560
segment_index< std::u16string::const_iterator > u16ssegment_index
convenience typedef
Definition: index.hpp:1038
segment_index< char const * > csegment_index
convenience typedef
Definition: index.hpp:1044
void map(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:663
iterator begin() const
Definition: index.hpp:677
boundary_point_index< std::wstring::const_iterator > wsboundary_point_index
convenience typedef
Definition: index.hpp:1054
iterator end() const
Definition: index.hpp:958
boundary_point_index< wchar_t const * > wcboundary_point_index
convenience typedef
Definition: index.hpp:1063
boundary_point< base_iterator > value_type
Definition: index.hpp:858
segment< base_iterator > value_type
Definition: index.hpp:589
void rule(rule_type v)
Definition: index.hpp:991
segment_index const & operator=(boundary_point_index< base_iterator > const &)
uint32_t rule_type
Flags used with word boundary analysis – the type of the word, line or sentence boundary found.
Definition: types.hpp:50
boundary_point_index< char16_t const * > u16cboundary_point_index
convenience typedef
Definition: index.hpp:1065
void map(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:930
unspecified_iterator_type iterator
Definition: index.hpp:576
segment_index()
Definition: index.hpp:600
segment_index< char16_t const * > u16csegment_index
convenience typedef
Definition: index.hpp:1047
iterator end() const
Definition: index.hpp:689
iterator begin() const
Definition: index.hpp:944
boundary_point_index< std::string::const_iterator > sboundary_point_index
convenience typedef
Definition: index.hpp:1053
segment_index< std::string::const_iterator > ssegment_index
convenience typedef
Definition: index.hpp:1035
segment_index< char32_t const * > u32csegment_index
convenience typedef
Definition: index.hpp:1050
segment_index< std::wstring::const_iterator > wssegment_index
convenience typedef
Definition: index.hpp:1036
unspecified_iterator_type const_iterator
Definition: index.hpp:580
unspecified_iterator_type const_iterator
Definition: index.hpp:849
This class represents a boundary point in the text.
Definition: boundary_point.hpp:47
rule_type rule() const
Definition: index.hpp:984
void rule(rule_type v)
Definition: index.hpp:726
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:891
boundary_point_index< char32_t const * > u32cboundary_point_index
convenience typedef
Definition: index.hpp:1068
segment_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, std::locale const &loc=std::locale())
Definition: index.hpp:607
boundary_point_index< char const * > cboundary_point_index
convenience typedef
Definition: index.hpp:1062
boundary_point_index< std::u32string::const_iterator > u32sboundary_point_index
convenience typedef
Definition: index.hpp:1059
iterator find(base_iterator p) const
Definition: index.hpp:711
unspecified_iterator_type iterator
Definition: index.hpp:845
segment_index(boundary_type type, base_iterator begin, base_iterator end, std::locale const &loc=std::locale())
Definition: index.hpp:622
boundary_point_index< std::u16string::const_iterator > u16sboundary_point_index
convenience typedef
Definition: index.hpp:1056
BaseIterator base_iterator
Definition: index.hpp:829
segment_index< std::u32string::const_iterator > u32ssegment_index
convenience typedef
Definition: index.hpp:1041
std::vector< break_info > index_type
Definition: facets.hpp:82
boundary_point_index()
Definition: index.hpp:869
This class holds an index of segments in the text range and allows to iterate over them.
Definition: index.hpp:495
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, std::locale const &loc=std::locale())
Definition: index.hpp:877
segment_index< wchar_t const * > wcsegment_index
convenience typedef
Definition: index.hpp:1045